In [15]:
import math
from matplotlib import pyplot as plt
import numpy as np
from funcs import *

In [16]:
# Load Data
simulations = ['ssp126', 'ssp370', 'ssp585', 'hist-GHG', 'hist-aer']
data_path = "data/processed_data/"

X_trains, y_trains = getData(data_path, simulations)



In [17]:
# Compute mean/std of each variable for the whole dataset
meanstd_inputs = {}
len_historical = 165
ssp126Slice = ['ssp126', 'hist-GHG', 'hist-aer']
sspRestSlice = ['ssp370', 'ssp585']

for var in ['CO2', 'CH4', 'SO2', 'BC']:
    # To not take the historical data into account several time we have to slice the scenario datasets
    # and only keep the historical data once (in the first ssp index 0 in the simus list)
    array = np.concatenate([X_trains[sim][var].data for sim in ssp126Slice] + 
                           [X_trains[sim][var].sel(time=slice(len_historical, None)).data for sim in sspRestSlice])
    print((array.mean(), array.std()))
    meanstd_inputs[var] = (array.mean(), array.std())

(1074.172303244536, 1755.690699230666)
(0.1927369743762821, 0.18457590641432994)
(2.5623359997066755e-12, 2.250114566783271e-11)
(1.4947905009818064e-13, 1.0313342554838387e-12)


In [18]:
X_train_norm = {}
for sim, train_xr in X_trains.items(): 
    for var in ['CO2', 'CH4', 'SO2', 'BC']: 
        var_dims = train_xr[var].dims
        train_xr=train_xr.assign({var: (var_dims, normalize(train_xr[var].data, var, meanstd_inputs))}) 
    X_train_norm[sim] = train_xr

In [19]:
X_train_norm['ssp126']

In [20]:
X_train_norm = [X_train_norm[sim] for sim in simulations]
Y_train = [y_trains[sim] for sim in simulations]

In [21]:
var_to_predict =  'tas'
len_historical = 165


# skip_historical set to (i < 2) because of the order of the scenario and historical runs in the X_train and Y_train lists.
# In details: ssp126 0, ssp370 1 = skip historical part of the data, ssp585 2, hist-GHG 3 and hist-aer 4 = keep the whole sequence
X_train_all = np.concatenate([input_for_training(X_train_norm[i], skip_historical=(i<2), len_historical=len_historical) for i in range(len(simulations))], axis = 0)
Y_train_all = np.concatenate([output_for_training(Y_train[i], var_to_predict, skip_historical=(i<2), len_historical=len_historical) for i in range(len(simulations))], axis=0)
print(X_train_all.shape)
print(Y_train_all.shape)

(726, 10, 96, 144, 4)
(726, 1, 96, 144)


## Model

In [22]:
import torch
import torch.nn as nn
import gpytorch
from TD_classes import *
import tqdm
from gpytorch.models import ExactGP
from gpytorch.means import ConstantMean
from gpytorch.kernels import ScaleKernel, RBFKernel
from gpytorch.kernels import MultitaskKernel
from gpytorch.distributions import MultitaskMultivariateNormal


In [23]:
print(torch.cuda.is_available())

True


In [24]:
X_train_all_tensor = torch.tensor(X_train_all, dtype=torch.float32)
Y_train_all_tensor = torch.tensor(Y_train_all, dtype=torch.float32)

X_train_all_tensor = X_train_all_tensor.cuda()
Y_train_all_tensor = Y_train_all_tensor.cuda()

In [25]:
data_dim = X_train_all.shape[1] # Same as slider

class LargeFeatureExtractor(nn.Sequential):
    # TODO: Change Network Architecture to fit the high dimensional data of the Climate data
    # Look at the CNN implementation for ideas
    def __init__(self):
        super(LargeFeatureExtractor, self).__init__()
        self.add_module('TimeDistributed_CONV2D_1', TimeDistributedConv2D(in_channels=96, out_channels=20, kernel_size=3, padding='same'))
        self.add_module('RELU_1', nn.ReLU())
        self.add_module('TimeDistributedAvgPool2D', TimeDistributedAvgPool2D(pool_size=2))
        self.add_module('TimeDistributedGlobalAvgPool2D', TimeDistributedGlobalAvgPool2D())
        self.add_module('LSTM', nn.LSTM(20, 20))
        self.add_module('RELU_2', nn.ReLU())
        self.add_module('Dense_Layer', nn.Linear(in_features=20, out_features=96*144))
        
    def forward(self, x):

        x = self.TimeDistributed_CONV2D_1(x)
        x = self.RELU_1(x)
        x = self.TimeDistributedAvgPool2D(x)
        x = self.TimeDistributedGlobalAvgPool2D(x)
        x, _ = self.LSTM(x)
        x = self.RELU_2(x[:, -1:, :])
        x = self.Dense_Layer(x)
        x = x.view(-1, 96*144)
        return x

feature_extractor = LargeFeatureExtractor()

In [68]:
class BatchIndependentMultitaskGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood, num_tasks=13824):
        super().__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean(batch_shape=torch.Size([num_tasks]))
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel(batch_shape=torch.Size([num_tasks])),
            batch_shape=torch.Size([num_tasks])
        )
        self.feature_extractor = feature_extractor
        self.scale_to_bounds = gpytorch.utils.grid.ScaleToBounds(-1, 1)

    def forward(self, x):
        projected_x = self.feature_extractor(x)
        projected_x = self.scale_to_bounds(projected_x)
        
        mean_x = self.mean_module(projected_x)
        covar_x = self.covar_module(projected_x)
        return gpytorch.distributions.MultitaskMultivariateNormal.from_batch_mvn(
            gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
        )

In [69]:
class MultitaskGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood, num_tasks):
        super(MultitaskGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel()
        )
        self.task_covar_module = gpytorch.kernels.IndexKernel(
            num_tasks=num_tasks, batch_shape=torch.Size([])
        )
        self.feature_extractor = feature_extractor
        self.scale_to_bounds = gpytorch.utils.grid.ScaleToBounds(-1., 1.)

    def forward(self, x):
        projected_x = self.feature_extractor(x)
        projected_x = self.scale_to_bounds(projected_x)
        mean_x = self.mean_module(projected_x)
        covar_x = self.covar_module(projected_x) * self.task_covar_module(projected_x)
        return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x)

In [70]:
class MultiOutputGPRegressionModel(ExactGP):
    def __init__(self, train_x, train_y, likelihood, num_outputs=13824):
        super(MultiOutputGPRegressionModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = ConstantMean(batch_shape=torch.Size([num_outputs]))
        self.base_covar_module = ScaleKernel(RBFKernel(ard_num_dims=train_x.shape[-1]))
        self.covar_module = MultitaskKernel(self.base_covar_module, num_tasks=num_outputs, rank=1)
        self.feature_extractor = feature_extractor
        self.scale_to_bounds = gpytorch.utils.grid.ScaleToBounds(-1., 1.)

    def forward(self, x):
        projected_x = self.feature_extractor(x)
        projected_x = self.scale_to_bounds(projected_x)
        mean_x = self.mean_module(projected_x)
        covar_x = self.covar_module(projected_x)
        return MultitaskMultivariateNormal(mean_x, covar_x)


In [71]:
# Define new Gp model that implements a NN feature extractor i.e DKL (Does by mostly changing the foward function)
class GPRegressionModel(gpytorch.models.ExactGP):
        def __init__(self, train_x, train_y, likelihood):
            super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
            self.mean_module = gpytorch.means.ConstantMean()
            self.covar_module = gpytorch.kernels.GridInterpolationKernel(
                gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(ard_num_dims=2)),
                num_dims=2, grid_size=100
            )
            self.feature_extractor = feature_extractor

            # This module will scale the NN features so that they're nice values
            self.scale_to_bounds = gpytorch.utils.grid.ScaleToBounds(-1., 1.)

        def forward(self, x):
            # We're first putting our data through a deep net (feature extractor)
            projected_x = self.feature_extractor(x)
            projected_x = self.scale_to_bounds(projected_x)  # Make the NN values "nice"

            mean_x = self.mean_module(projected_x)
            covar_x = self.covar_module(projected_x)
            return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [72]:
likelihood = gpytorch.likelihoods.GaussianLikelihood(num_tasks=13824)
model = BatchIndependentMultitaskGPModel(X_train_all_tensor, Y_train_all_tensor, likelihood)

if torch.cuda.is_available():
    model = model.cuda()
    likelihood = likelihood.cuda()

In [75]:
# TODO: Update train function currently is just from example (boilerplate)
training_iterations = 2 

# Find optimal model hyperparameters
model.train()
likelihood.train()

# Use the adam optimizer
optimizer = torch.optim.Adam([
    {'params': model.feature_extractor.parameters()},
    {'params': model.covar_module.parameters()},
    {'params': model.mean_module.parameters()},
    {'params': model.likelihood.parameters()},
], lr=0.01)

# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) # loss might not workfor the climate data

epochs = 10
batch_size = 64

def train():
    iterator = tqdm.tqdm_notebook(range(training_iterations), desc="Epoch")
    for i in iterator:
        # Zero backprop gradients
        optimizer.zero_grad()
        # Get output from model
        output = model(X_train_all_tensor)
        # Calc loss and backprop derivatives
        print(output)
        print(Y_train_all_tensor.shape)
        loss = -mll(output, Y_train_all_tensor)
        loss.backward()
        iterator.set_postfix(loss=loss.item())
        optimizer.step()        
        
        
        
        
        
def train_batch():
    for epoch in range(epochs):
        model.train()
        likelihood.train()
        
        # Shuffle the data
        permutation = torch.randperm(X_train_all_tensor.size()[0])
        X_train_all_tensor_shuffled = X_train_all_tensor[permutation]
        Y_train_all_tensor_shuffled = Y_train_all_tensor[permutation]
        
        # Perform batch training
        for i in range(0, X_train_all_tensor.size()[0], batch_size):
            optimizer.zero_grad()
            
            # Get batch data
            batch_x = X_train_all_tensor_shuffled[i:i+batch_size]
            batch_y = Y_train_all_tensor_shuffled[i:i+batch_size]
            
            # Get output from model
            output = model(batch_x)
            
            # Calculate loss and backpropagate
            loss = -mll(output, batch_y)
            loss.backward()
            optimizer.step()
            
        print(f"Epoch {epoch+1}/{epochs} completed")


In [76]:
%time
train()

CPU times: total: 0 ns
Wall time: 0 ns


RuntimeError: You must train on the training inputs!

In [None]:
Y_train_all_tensor.squeeze().shape

torch.Size([726, 96, 144])

In [None]:
# TODO: deal with the climate data predictions differences
model.eval()
likelihood.eval()
with torch.no_grad(), gpytorch.settings.use_toeplitz(False), gpytorch.settings.fast_pred_var():
    preds = model(test_xaa)


NameError: name 'test_x' is not defined