In [1]:
import os
import sys
maindir = os.getcwd()
sys.path.append(maindir+"/src")

In [2]:
import pickle
import torch
import numpy as np
import matplotlib.pyplot as plt

from preprocessing import data_processing, compute_anomalies_and_scalers, \
                            compute_forced_response, \
                            numpy_to_torch, rescale_and_merge_training_and_test_sets, \
                            rescale_training_and_test_sets

from plot_tools import plot_gt_vs_pred, animation_gt_vs_pred
from leave_one_out import leave_one_out_single, leave_one_out_procedure
from cross_validation import cross_validation_procedure

In [3]:
############### Load climate model raw data for SST
with open('data/ssp585_time_series.pkl', 'rb') as f:
    data = pickle.load(f)

###################### Load longitude and latitude 
with open('data/lon.npy', 'rb') as f:
    lon = np.load(f)

with open('data/lat.npy', 'rb') as f:
    lat = np.load(f)

# define grid (+ croping for latitude > 60)
lat_grid, lon_grid = np.meshgrid(lat[lat<=60], lon, indexing='ij')

lat_size = lat_grid.shape[0]
lon_size = lon_grid.shape[1]

In [4]:
# define pytorch precision
dtype = torch.float32

data_processed, notnan_idx, nan_idx = data_processing(data, lon, lat,max_models=100)
x, means, vars = compute_anomalies_and_scalers(data_processed, lon_size, lat_size, nan_idx, time_period=34)
y = compute_forced_response(data_processed, lon_size, lat_size, nan_idx, time_period=34)

x,y, means, vars = numpy_to_torch(x,y,means,vars, dtype=dtype)


  means[m] = np.nanmean(data_reshaped[m],axis=0)
  vars[m] = np.nanvar(data_reshaped[m],axis=0)
  mean_spatial_ensemble = np.nanmean(y_tmp,axis=0)


## Train model

In [6]:
m0 = 'IPSL-CM6A-LR'

training_models, x_train, y_train, x_test, y_test = build_training_and_test_sets_stacked(m0,x,y,vars,lon_size,lat_size,time_period=34,dtype=dtype)

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import math

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe[:x.size(0), :]

class TransformerTimeSeries(nn.Module):
    def __init__(self, input_dim, d_model, nhead, num_encoder_layers, dim_feedforward, output_dim, max_len=5000):
        super(TransformerTimeSeries, self).__init__()
        self.input_dim = input_dim
        self.d_model = d_model
        self.embedding = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model, max_len)
        self.transformer_encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward), num_encoder_layers)
        self.fc_out = nn.Linear(d_model, output_dim)

    def forward(self, src):
        # src shape: (batch_size, seq_len, input_dim)
        src = self.embedding(src) * math.sqrt(self.d_model)
        src = self.pos_encoder(src)
        src = src.permute(1, 0, 2)  # Transformer expects (seq_len, batch_size, d_model)
        output = self.transformer_encoder(src)
        output = output.permute(1, 0, 2)  # Back to (batch_size, seq_len, d_model)
        output = self.fc_out(output)
        return output

# Example usage
if __name__ == "__main__":
    input_dim = len(notnan_idx)  # Number of features in the time series
    d_model = 64
    nhead = 8
    num_encoder_layers = 3
    dim_feedforward = 128
    output_dim = 1  # Predicting a single value
    max_len = 5000

    model = TransformerTimeSeries(input_dim, d_model, nhead, num_encoder_layers, dim_feedforward, output_dim, max_len)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.MSELoss()

    model.train()
    for epoch in range(100):
        optimizer.zero_grad()
        outputs = model(x_train[:,:,notnan_idx])
        loss = criterion(outputs, y_train[:,:,notnan_idx])
        loss.backward()
        optimizer.step()
        print(f"Epoch {epoch + 1}, Loss: {loss.item()}")

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1, Loss: 0.4298463463783264
Epoch 2, Loss: 1.5747628211975098
Epoch 3, Loss: 0.5361706018447876
Epoch 4, Loss: 0.37088316679000854
Epoch 5, Loss: 0.1437569558620453
Epoch 6, Loss: 0.09530359506607056
Epoch 7, Loss: 0.13827402889728546
Epoch 8, Loss: 0.14594298601150513
Epoch 9, Loss: 0.1152665913105011
Epoch 10, Loss: 0.07917803525924683
Epoch 11, Loss: 0.05755682662129402
Epoch 12, Loss: 0.054055120795965195
Epoch 13, Loss: 0.06188967451453209
Epoch 14, Loss: 0.07051108032464981
Epoch 15, Loss: 0.07335128635168076
Epoch 16, Loss: 0.06733871251344681
Epoch 17, Loss: 0.056754451245069504
Epoch 18, Loss: 0.04507125914096832
Epoch 19, Loss: 0.03840958699584007
Epoch 20, Loss: 0.03795775771141052
Epoch 21, Loss: 0.04158945009112358
Epoch 22, Loss: 0.04560413584113121
Epoch 23, Loss: 0.04649369791150093
Epoch 24, Loss: 0.04354764148592949
Epoch 25, Loss: 0.038750018924474716
Epoch 26, Loss: 0.034097686409950256
Epoch 27, Loss: 0.03191237151622772
Epoch 28, Loss: 0.03199034556746483
Ep

In [8]:
y_pred = model(x_test[:,:,notnan_idx])

In [9]:
from algorithms import ridge_regression, ridge_regression_low_rank, low_rank_projection, \
                        prediction, train_robust_weights_model, compute_weights

In [10]:
lambda_tmp = 10.0

# compute the big matrix X and Y
training_models, x_train_merged, y_train_merged, x_test_merged, y_test_merged = build_training_and_test_sets(m0,x,y,vars,lon_size,lat_size,time_period=33,dtype=dtype)

# compute ridge regressor
w_ridge = torch.zeros(lon_size*lat_size,lon_size*lat_size,dtype=dtype)
w_ridge[np.ix_(notnan_idx,notnan_idx)] = ridge_regression(x_train_merged[:,notnan_idx], y_train_merged[:,notnan_idx], lambda_=lambda_tmp, dtype=dtype)

x_test_tmp = x[m0]
y_test_tmp = y[m0]

# ridge
y_pred_ridge = prediction(x_test_tmp, w_ridge,notnan_idx, nan_idx)

In [11]:
# compare the tranformer and the ridge regression

#compute rmse
rmse_ridge = torch.sqrt(torch.nanmean((y_test_tmp - y_pred_ridge)**2))
rmse_transformer = torch.sqrt(torch.mean((y_test_tmp[:,:,notnan_idx] - y_pred)**2))


print(f"RMSE Ridge: {rmse_ridge}")
print(f"RMSE Transformer: {rmse_transformer}")

RMSE Ridge: 0.1064210906624794
RMSE Transformer: 0.13596758246421814


In [25]:
import torch
import torch.nn as nn
import torch.optim as optim
import math

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe[:x.size(0), :]

class TransformerTimeSeries(nn.Module):
    def __init__(self, input_dim, d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, output_dim, max_len=5000):
        super(TransformerTimeSeries, self).__init__()
        self.input_dim = input_dim
        self.d_model = d_model
        self.embedding = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model, max_len)
        self.transformer = nn.Transformer(d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward)
        self.fc_out = nn.Linear(d_model, output_dim)

    def forward(self, src, tgt):
        # src and tgt shape: (batch_size, seq_len, input_dim)
        src = self.embedding(src) * math.sqrt(self.d_model)
        tgt = self.embedding(tgt) * math.sqrt(self.d_model)
        src = self.pos_encoder(src)
        tgt = self.pos_encoder(tgt)
        src = src.permute(1, 0, 2)  # Transformer expects (seq_len, batch_size, d_model)
        tgt = tgt.permute(1, 0, 2)  # Transformer expects (seq_len, batch_size, d_model)
        output = self.transformer(src, tgt)
        output = output.permute(1, 0, 2)  # Back to (batch_size, seq_len, d_model)
        output = self.fc_out(output)
        return output

# Example usage
if __name__ == "__main__":
    input_dim = len(notnan_idx)  # Number of input features
    d_model = 64
    nhead = 8
    num_encoder_layers = 3
    num_decoder_layers = 3
    dim_feedforward = 128
    output_dim = len(notnan_idx)  # Number of output features
    max_len = 5000

    model = TransformerTimeSeries(input_dim, d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, output_dim, max_len)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    criterion = nn.MSELoss()

    # Dummy data
    x = torch.randn(4, 8, input_dim)  # Batch size of 4, sequence length of 8, 5 features
    y = torch.randn(4, 8, output_dim)  # Corresponding target time series

    model.train()
    for epoch in range(10):
        optimizer.zero_grad()
        outputs = model(x_train[:,:,notnan_idx],y_train[:,:,notnan_idx])
        loss = criterion(outputs, y_train[:,:,notnan_idx])
        # outputs = model(x_train, y_train])
        # loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        print(f"Epoch {epoch + 1}, Loss: {loss.item()}")



Epoch 1, Loss: 0.38799184560775757
Epoch 2, Loss: 0.3775981068611145
Epoch 3, Loss: 0.3678566515445709
Epoch 4, Loss: 0.3593819737434387
Epoch 5, Loss: 0.35217490792274475
Epoch 6, Loss: 0.3456117510795593
Epoch 7, Loss: 0.3397618532180786
Epoch 8, Loss: 0.3341117799282074
Epoch 9, Loss: 0.32860395312309265
Epoch 10, Loss: 0.32319095730781555
