In [11]:
import os
import sys
maindir = os.getcwd()
sys.path.append(maindir+"/src")

In [12]:
import pickle
import torch
import numpy as np
import matplotlib.pyplot as plt

from preprocessing import data_processing, compute_anomalies, \
                            compute_forced_response, compute_variance, \
                            merge_runs, stack_runs, numpy_to_torch, standardize, build_training_and_test_sets,\
                            build_training_and_test_sets_stacked

from plot_tools import plot_gt_vs_pred, animation_gt_vs_pred
from leave_one_out import leave_one_out_single, leave_one_out_procedure
from cross_validation import cross_validation_procedure

In [13]:
############### Load climate model raw data for SST
with open('data/ssp585_time_series.pkl', 'rb') as f:
    data = pickle.load(f)

###################### Load longitude and latitude 
with open('data/lon.npy', 'rb') as f:
    lon = np.load(f)

with open('data/lat.npy', 'rb') as f:
    lat = np.load(f)

# define grid (+ croping for latitude > 60)
lat_grid, lon_grid = np.meshgrid(lat[lat<=60], lon, indexing='ij')

lat_size = lat_grid.shape[0]
lon_size = lon_grid.shape[1]

In [14]:
# define pytorch precision
dtype = torch.float32

data_processed, notnan_idx, nan_idx = data_processing(data, lon, lat,max_models=100)
x = compute_anomalies(data_processed, lon_size, lat_size, nan_idx, time_period=33)
y = compute_forced_response(data_processed, lon_size, lat_size, nan_idx, time_period=33)
vars = compute_variance(x, lon_size, lat_size, nan_idx, time_period=33)

# convert numpy arrays to pytorch 
x, y, vars = numpy_to_torch(x,y,vars)

# standardize data 
x, y = standardize(x,y,vars)

# stack runs for each model
x, y, vars = stack_runs(x,y,vars,time_period=33,lon_size=lon_size,lat_size=lat_size,dtype=dtype)

# stack runs for each model
x_merged, y_merged, vars_merged = merge_runs(x,y,vars)


  mean_ref_ensemble = np.nanmean(y_tmp,axis=1)
  mean_ref_ensemble = np.nanmean(mean_ref_ensemble,axis=0)
  mean_spatial_ensemble = np.nanmean(y_tmp,axis=0)
  data_forced_response[m][r] = mean_spatial_ensemble - np.nanmean(mean_spatial_ensemble,axis=0)


## Train model

In [15]:
m0 = 'IPSL-CM6A-LR'

training_models, x_train, y_train, x_test, y_test = build_training_and_test_sets_stacked(m0,x,y,vars,lon_size,lat_size,time_period=33,dtype=dtype)

In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
import math

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe[:x.size(0), :]

class TransformerTimeSeries(nn.Module):
    def __init__(self, input_dim, d_model, nhead, num_encoder_layers, dim_feedforward, output_dim, max_len=5000):
        super(TransformerTimeSeries, self).__init__()
        self.input_dim = input_dim
        self.d_model = d_model
        self.embedding = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model, max_len)
        self.transformer_encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward), num_encoder_layers)
        self.fc_out = nn.Linear(d_model, output_dim)

    def forward(self, src):
        # src shape: (batch_size, seq_len, input_dim)
        src = self.embedding(src) * math.sqrt(self.d_model)
        src = self.pos_encoder(src)
        src = src.permute(1, 0, 2)  # Transformer expects (seq_len, batch_size, d_model)
        output = self.transformer_encoder(src)
        output = output.permute(1, 0, 2)  # Back to (batch_size, seq_len, d_model)
        output = self.fc_out(output)
        return output

# Example usage
if __name__ == "__main__":
    input_dim = len(notnan_idx)  # Number of features in the time series
    d_model = 64
    nhead = 8
    num_encoder_layers = 3
    dim_feedforward = 128
    output_dim = 1  # Predicting a single value
    max_len = 5000

    model = TransformerTimeSeries(input_dim, d_model, nhead, num_encoder_layers, dim_feedforward, output_dim, max_len)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.MSELoss()

    # Dummy data
    # x = torch.randn(32, 50, input_dim)  # Batch size of 32, sequence length of 50
    # x_train
    # y = torch.randn(32, 50, output_dim)  # Corresponding targets

    model.train()
    for epoch in range(100):
        optimizer.zero_grad()
        outputs = model(x_train[:,:,notnan_idx])
        loss = criterion(outputs, y_train[:,:,notnan_idx])
        loss.backward()
        optimizer.step()
        print(f"Epoch {epoch + 1}, Loss: {loss.item()}")

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1, Loss: 0.9676014184951782
Epoch 2, Loss: 1.7977405786514282
Epoch 3, Loss: 0.6202101707458496
Epoch 4, Loss: 0.3407157361507416
Epoch 5, Loss: 0.4338347613811493
Epoch 6, Loss: 0.3447282016277313
Epoch 7, Loss: 0.2648790776729584
Epoch 8, Loss: 0.2481316775083542
Epoch 9, Loss: 0.26629456877708435
Epoch 10, Loss: 0.2708631753921509
Epoch 11, Loss: 0.25624531507492065
Epoch 12, Loss: 0.24102328717708588
Epoch 13, Loss: 0.23486213386058807
Epoch 14, Loss: 0.23666007816791534
Epoch 15, Loss: 0.241444393992424
Epoch 16, Loss: 0.24349677562713623
Epoch 17, Loss: 0.2405456155538559
Epoch 18, Loss: 0.23478278517723083
Epoch 19, Loss: 0.22868847846984863
Epoch 20, Loss: 0.22372759878635406
Epoch 21, Loss: 0.22212961316108704
Epoch 22, Loss: 0.22185437381267548
Epoch 23, Loss: 0.22203634679317474
Epoch 24, Loss: 0.22170612215995789
Epoch 25, Loss: 0.2198239117860794
Epoch 26, Loss: 0.21752014756202698
Epoch 27, Loss: 0.2143758237361908
Epoch 28, Loss: 0.21406589448451996
Epoch 29, Loss:

In [17]:
y_pred = model(x_test[:,:,notnan_idx])

In [18]:
from algorithms import ridge_regression, ridge_regression_low_rank, low_rank_projection, \
                        prediction, train_robust_weights_model, compute_weights

In [19]:
lambda_tmp = 10.0

# compute the big matrix X and Y
training_models, x_train_merged, y_train_merged, x_test_merged, y_test_merged = build_training_and_test_sets(m0,x,y,vars,lon_size,lat_size,time_period=33,dtype=dtype)

# compute ridge regressor
w_ridge = torch.zeros(lon_size*lat_size,lon_size*lat_size,dtype=dtype)
w_ridge[np.ix_(notnan_idx,notnan_idx)] = ridge_regression(x_train_merged[:,notnan_idx], y_train_merged[:,notnan_idx], lambda_=lambda_tmp, dtype=dtype)

x_test_tmp = x[m0]
y_test_tmp = y[m0]

# ridge
y_pred_ridge = prediction(x_test_tmp, w_ridge,notnan_idx, nan_idx)

In [20]:
# compare the tranformer and the ridge regression

#compute rmse
rmse_ridge = torch.sqrt(torch.nanmean((y_test_tmp - y_pred_ridge)**2))
rmse_transformer = torch.sqrt(torch.mean((y_test_tmp[:,:,notnan_idx] - y_pred)**2))


print(f"RMSE Ridge: {rmse_ridge}")
print(f"RMSE Transformer: {rmse_transformer}")

RMSE Ridge: 0.3049261271953583
RMSE Transformer: 0.3598424196243286
