In [18]:
import torch
import xarray as xr
import numpy as np
import torch.nn as nn
import matplotlib.pyplot as plt
import time
from torch.utils.data import DataLoader, TensorDataset
import dask
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from torch.optim.lr_scheduler import LambdaLR
import torch.optim as optim

In [6]:
ds = xr.open_zarr("dataset/chile_2010_january.zarr")
print(ds)

<xarray.Dataset> Size: 47MB
Dimensions:              (time: 124, latitude: 157, longitude: 121)
Coordinates:
  * latitude             (latitude) float32 628B -17.0 -17.25 ... -55.75 -56.0
    level                int64 8B ...
  * longitude            (longitude) float32 484B 280.0 280.2 ... 309.8 310.0
  * time                 (time) datetime64[ns] 992B 2010-01-01 ... 2010-01-31...
Data variables:
    geopotential         (time, latitude, longitude) float32 9MB dask.array<chunksize=(10, 50, 50), meta=np.ndarray>
    specific_humidity    (time, latitude, longitude) float32 9MB dask.array<chunksize=(10, 50, 50), meta=np.ndarray>
    temperature          (time, latitude, longitude) float32 9MB dask.array<chunksize=(10, 50, 50), meta=np.ndarray>
    u_component_of_wind  (time, latitude, longitude) float32 9MB dask.array<chunksize=(10, 50, 50), meta=np.ndarray>
    v_component_of_wind  (time, latitude, longitude) float32 9MB dask.array<chunksize=(10, 50, 50), meta=np.ndarray>


In [9]:
def preprocess_dataset(ds, variables, train_time_scale, val_time_scale, test_time_scale):
    train_vars = [ds[var].sel(time=train_time_scale).values for var in variables]  # Entrenamiento
    val_vars = [ds[var].sel(time=val_time_scale).values for var in variables]     # Validación
    test_vars = [ds[var].sel(time=test_time_scale).values for var in variables]   # Prueba

    train_data = np.array(train_vars)
    val_data = np.array(val_vars)
    test_data = np.array(test_vars)

    train_data = torch.tensor(train_data, dtype=torch.float32)
    val_data = torch.tensor(val_data, dtype=torch.float32)
    test_data = torch.tensor(test_data, dtype=torch.float32)

    lat, lon = train_data.shape[2], train_data.shape[3]

    means = train_data.mean(dim=(1, 2, 3), keepdim=True)
    stds = train_data.std(dim=(1, 2, 3), keepdim=True)
    
    train_data = (train_data - means) / stds
    val_data = (val_data - means) / stds
    test_data = (test_data - means) / stds

    return train_data, val_data, test_data, lat, lon, means, stds

In [4]:
train_time_scale = slice("2010-01-01", "2010-01-20")
val_time_scale = slice("2010-01-21", "2010-01-25")
test_time_scale = slice("2010-01-26", "2010-01-31")

In [7]:
variables = ['geopotential', 'specific_humidity', 'temperature', 'u_component_of_wind', 'v_component_of_wind']

In [13]:
train_data, val_data, test_data, lat, lon, means, stds = preprocess_dataset(
    ds, 
    variables, 
    train_time_scale, 
    val_time_scale, 
    test_time_scale
)

train_data = train_data.permute(1, 0, 2, 3)
val_data = val_data.permute(1, 0, 2, 3)
test_data = test_data.permute(1, 0, 2, 3)

print("Train Data Shape:", train_data.shape)
print("Validation Data Shape:", val_data.shape)
print("Test Data Shape:", test_data.shape)

Train Data Shape: torch.Size([80, 5, 157, 121])
Validation Data Shape: torch.Size([20, 5, 157, 121])
Test Data Shape: torch.Size([24, 5, 157, 121])


In [14]:
train_dataset = TensorDataset(train_data)
val_dataset = TensorDataset(val_data)
test_dataset = TensorDataset(test_data)

batch_size = 8  
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

for batch in train_loader:
    print("Train Batch Shape:", batch[0].shape)
    break

Train Batch Shape: torch.Size([8, 5, 157, 121])


In [15]:
class SimpleClimateModel(nn.Module):
    def __init__(self, input_channels, output_channels):
        super(SimpleClimateModel, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, output_channels, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.conv3(x)
        return x


In [20]:
def train_model(model, train_loader, val_loader, optimizer, loss_fn, device, epochs=5):
    model.to(device)
    for epoch in range(epochs):
        # Entrenamiento
        model.train()
        train_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            input_data = batch[0].to(device)
            output = model(input_data)
            loss = loss_fn(output, input_data)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        # Validación
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                input_data = batch[0].to(device)
                output = model(input_data)
                loss = loss_fn(output, input_data)
                val_loss += loss.item()
        
        # Imprimir pérdidas
        print(f"Epoch {epoch+1}/{epochs}, Training Loss: {train_loss/len(train_loader):.4f}, Validation Loss: {val_loss/len(val_loader):.4f}")


In [21]:
epochs = 5
model = SimpleClimateModel(input_channels, output_channels)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.MSELoss() 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
train_model(model, train_loader, val_loader, optimizer, loss_fn, device, epochs=epochs)

Epoch 1/5, Training Loss: 0.8327, Validation Loss: 0.5052
Epoch 2/5, Training Loss: 0.3452, Validation Loss: 0.2573
Epoch 3/5, Training Loss: 0.2020, Validation Loss: 0.1568
Epoch 4/5, Training Loss: 0.1196, Validation Loss: 0.0882
Epoch 5/5, Training Loss: 0.0801, Validation Loss: 0.0674


In [22]:
def evaluate_model(model, data_loader, loss_fn, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in data_loader:
            input_data = batch[0].to(device)
            output = model(input_data)
            loss = loss_fn(output, input_data)
            total_loss += loss.item()
    return total_loss / len(data_loader)


In [23]:
test_loss = evaluate_model(model, test_loader, loss_fn, device)
print(f"Test Loss: {test_loss:.4f}")


Test Loss: 0.0814


In [24]:
def mean_absolute_error(model, data_loader, device):
    model.eval()
    total_mae = 0
    with torch.no_grad():
        for batch in data_loader:
            input_data = batch[0].to(device)
            output = model(input_data)
            mae = torch.abs(output - input_data).mean().item()
            total_mae += mae
    return total_mae / len(data_loader)

test_mae = mean_absolute_error(model, test_loader, device)
print(f"Mean Absolute Error (MAE): {test_mae:.4f}")


Mean Absolute Error (MAE): 0.2078


In [25]:
def generate_spatial_embeddings(latitudes, longitudes):
    """
    Genera embeddings trigonométricos para latitud y longitud.
    """
    lat_rad = torch.tensor(np.radians(latitudes), dtype=torch.float32)  # Convertir a radianes
    lon_rad = torch.tensor(np.radians(longitudes), dtype=torch.float32)

    # Calcular embeddings trigonométricos   
    sin_lat = torch.sin(lat_rad).unsqueeze(1)  # Shape: (lat, 1)
    cos_lat = torch.cos(lat_rad).unsqueeze(1)
    sin_lon = torch.sin(lon_rad).unsqueeze(0)  # Shape: (1, lon)
    cos_lon = torch.cos(lon_rad).unsqueeze(0)

    # Combinar embeddings en una cuadrícula
    spatial_embeddings = torch.cat([
        sin_lat * cos_lon, sin_lat * sin_lon,
        cos_lat * cos_lon, cos_lat * sin_lon
    ], dim=0)  # Shape: (4, lat, lon)

    return spatial_embeddings

In [26]:
def generate_temporal_embeddings(time_steps):
    """
    Genera embeddings temporales usando funciones trigonométricas.
    """
    day_of_year = (time_steps.dayofyear / 365.0) * 2 * np.pi
    hour_of_day = (time_steps.hour / 24.0) * 2 * np.pi

    # Embeddings seno y coseno
    sin_day = torch.sin(torch.tensor(day_of_year, dtype=torch.float32)).unsqueeze(1)
    cos_day = torch.cos(torch.tensor(day_of_year, dtype=torch.float32)).unsqueeze(1)
    sin_hour = torch.sin(torch.tensor(hour_of_day, dtype=torch.float32)).unsqueeze(1)
    cos_hour = torch.cos(torch.tensor(hour_of_day, dtype=torch.float32)).unsqueeze(1)

    # Combinar embeddings
    temporal_embeddings = torch.cat([sin_day, cos_day, sin_hour, cos_hour], dim=1)  # Shape: (time, 4)
    return temporal_embeddings