In [20]:
import numpy as np
samples = np.load('data/global_meteo_ex_norm.npy')
n_subsample = 800000
subsample_indices = np.random.choice(samples.shape[1], size=n_subsample, replace=False)
# Extract features and target
X = samples[0:2,subsample_indices].T
y = samples[3,subsample_indices]

In [22]:
X.shape, y.shape

((800000, 2), (800000,))

In [21]:
np.sum(np.isnan(y))

np.int64(0)

In [23]:
# Split subsampled data into training and validation sets
# Convert data to PyTorch tensors
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

import torch
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)  # Ensure correct shape
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).unsqueeze(1)

print(f"Training set: {X_train_tensor.shape[0]:,} samples")
print(f"Validation set: {X_val_tensor.shape[0]:,} samples")
print(f"Total subsampled: {X_train_tensor.shape[0] + X_val_tensor.shape[0]:,} samples")

Training set: 640,000 samples
Validation set: 160,000 samples
Total subsampled: 800,000 samples


In [24]:
from model import ClimateDataset
from torch.utils.data import DataLoader
# Create DataLoaders
batch_size = 64  # You can adjust this value based on your available memory and GPU capacity
train_loader = DataLoader(ClimateDataset(X_train_tensor, y_train_tensor), batch_size=batch_size, shuffle=True)
val_loader = DataLoader(ClimateDataset(X_val_tensor, y_val_tensor), batch_size=batch_size, shuffle=False)

In [25]:
from model import NeuralNet
from torch import nn, optim
# Initialize model, loss function, and optimizer
input_dim = 2  # Number of predictors (ssrd and vpd)
model = NeuralNet(input_dim)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_train_loss = 0
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)

        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()

    # Validation Step
    model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for batch_X, batch_y in val_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            total_val_loss += loss.item()

    avg_train_loss = total_train_loss / len(train_loader)
    avg_val_loss = total_val_loss / len(val_loader)

    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.10f}, "
            f"Val Loss: {avg_val_loss:.10f}")
            

Epoch [1/10], Train Loss: 0.0214960499, Val Loss: 0.0176034149
Epoch [2/10], Train Loss: 0.0182165210, Val Loss: 0.0177280528
Epoch [3/10], Train Loss: 0.0180230310, Val Loss: 0.0179886473
Epoch [4/10], Train Loss: 0.0179312543, Val Loss: 0.0178243490
Epoch [5/10], Train Loss: 0.0178140882, Val Loss: 0.0174690621
Epoch [6/10], Train Loss: 0.0177779048, Val Loss: 0.0177357737
Epoch [7/10], Train Loss: 0.0177382000, Val Loss: 0.0177105760
Epoch [8/10], Train Loss: 0.0176990333, Val Loss: 0.0186128591
Epoch [9/10], Train Loss: 0.0176718913, Val Loss: 0.0174579988
Epoch [10/10], Train Loss: 0.0176777286, Val Loss: 0.0173773733


In [26]:
torch.save(model.state_dict(), "./outputs/model_weights_pre-training_meteo_ex.pth")