In [1]:
from gp_helpers_from_reproduction import *
import numpy as np
import xarray as xr

In [2]:
files = ["ssp126", "ssp585", "historical", "hist-GHG"]
# Create training and testing arrays
X_train, pca_solvers = makeXTrain_PCASolvers(files)
y_train_tas = makeYTrain(files)['tas'].values.reshape(-1, 96 * 144)

X_test = makeXTest('ssp245', pca_solvers)
Y_test = xr.open_dataset(data_path + 'outputs_ssp245.nc').compute()
tas_truth = Y_test["tas"].mean('member')

In [3]:
# Removing NaNs 
nan_train_mask, nan_test_mask = X_train.isna().any(axis=1).values, X_test.isna().any(axis=1).values
X_train, X_test = X_train.dropna(axis=0, how='any'), X_test.dropna(axis=0, how='any')
y_train_tas, tas_truth = y_train_tas[~nan_train_mask], tas_truth[~nan_test_mask]

In [4]:
# Standardizing data for data that is not EOFs/PCA
X_train_CO2_mean, X_test_CO2_mean = X_train['CO2'].mean(), X_test['CO2'].mean()
X_train_CO2_std, X_test_CO2_std = X_train['CO2'].std(), X_test['CO2'].std()

X_train_CH4_mean, X_test_CH4_mean = X_train['CH4'].mean(), X_test['CH4'].mean()
X_train_CH4_std, X_test_CH4_std = X_train['CH4'].std(), X_test['CH4'].std()

X_train['CO2'] = (X_train['CO2'] - X_train_CO2_mean) / X_train_CO2_std
X_test['CO2'] = (X_test['CO2'] - X_test_CO2_mean) / X_test_CO2_std

X_train['CH4'] = (X_train['CH4'] - X_train_CH4_mean) / X_train_CH4_std
X_test['CH4'] = (X_test['CH4'] - X_test_CH4_mean) / X_test_CH4_std

# Standardizing y_train
y_train_tas_mean, y_train_tas_std = y_train_tas.mean(), y_train_tas.std()
y_train_tas = (y_train_tas - y_train_tas_mean) / y_train_tas_std

In [5]:
X_train.shape, y_train_tas.shape

((502, 12), (502, 13824))

In [6]:
y_train_tas.astype(np.float32).shape

(502, 13824)

## Model

In [7]:
import torch
import torch.nn as nn
import gpytorch
from TD_classes import *
import tqdm
from gpytorch.models import ExactGP
from gpytorch.means import ConstantMean
from gpytorch.kernels import ScaleKernel, RBFKernel
from gpytorch.kernels import MultitaskKernel
from gpytorch.distributions import MultitaskMultivariateNormal

print(torch.cuda.is_available())

True


In [8]:
# Make X_train and y_train into tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_tas, dtype=torch.float32)

# Add to cuda
X_train_tensor, y_train_tensor = X_train_tensor.cuda(), y_train_tensor.cuda()

In [9]:
data_dim = X_train_tensor.shape[1]

class LargeFeatureExtractor(nn.Sequential):
    def __init__(self):
        super(LargeFeatureExtractor, self).__init__()
        self.add_module('linear1', torch.nn.Linear(data_dim, 1000))
        self.add_module('relu1', torch.nn.ReLU())
        self.add_module('linear2', torch.nn.Linear(1000, 500))
        self.add_module('relu2', torch.nn.ReLU())
        self.add_module('linear3', torch.nn.Linear(500, 50))
        self.add_module('relu3', torch.nn.ReLU())
        self.add_module('linear4', torch.nn.Linear(50, 2))
        self.add_module('relu7', torch.nn.ReLU())

feature_extractor = LargeFeatureExtractor()

In [10]:
class BatchIndependentMultitaskGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood, num_tasks=13824):
        super().__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean(batch_shape=torch.Size([num_tasks]))
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel(batch_shape=torch.Size([num_tasks])),
            batch_shape=torch.Size([num_tasks])
        )
        self.feature_extractor = feature_extractor
        self.scale_to_bounds = gpytorch.utils.grid.ScaleToBounds(-1, 1)

    def forward(self, x):
        projected_x = self.feature_extractor(x)
        projected_x = self.scale_to_bounds(projected_x)
        
        mean_x = self.mean_module(projected_x)
        covar_x = self.covar_module(projected_x)
        return gpytorch.distributions.MultitaskMultivariateNormal.from_batch_mvn(
            gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
        )

In [11]:
# Define new Gp model that implements a NN feature extractor i.e DKL (Does by mostly changing the foward function)
class GPRegressionModel(gpytorch.models.ExactGP):
        def __init__(self, train_x, train_y, likelihood):
            super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
            self.mean_module = gpytorch.means.ConstantMean()
            self.covar_module = gpytorch.kernels.GridInterpolationKernel(
                gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(ard_num_dims=2)),
                num_dims=2, grid_size=100
            )
            self.feature_extractor = feature_extractor

            # This module will scale the NN features so that they're nice values
            self.scale_to_bounds = gpytorch.utils.grid.ScaleToBounds(-1., 1.)

        def forward(self, x):
            # We're first putting our data through a deep net (feature extractor)
            projected_x = self.feature_extractor(x)
            projected_x = self.scale_to_bounds(projected_x)  # Make the NN values "nice"
            mean_x = self.mean_module(projected_x)
            covar_x = self.covar_module(projected_x)
            return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [13]:
num_tasks = y_train_tensor.shape[1]
likelihood = gpytorch.likelihoods.GaussianLikelihood(num_tasks=num_tasks)

model = BatchIndependentMultitaskGPModel(X_train_tensor, y_train_tensor, likelihood, num_tasks)

if torch.cuda.is_available():
    model = model.cuda()
    likelihood = likelihood.cuda()

In [14]:
# TODO: Update train function currently is just from example (boilerplate)
training_iterations = 2 

# Find optimal model hyperparameters
model.train()
likelihood.train()

# Use the adam optimizer
optimizer = torch.optim.Adam([
    {'params': model.feature_extractor.parameters()},
    {'params': model.covar_module.parameters()},
    {'params': model.mean_module.parameters()},
    {'params': model.likelihood.parameters()},
], lr=0.01)

# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) # loss might not workfor the climate data

def train():
    iterator = tqdm.tqdm_notebook(range(training_iterations), desc="Epoch")
    for i in iterator:
        # Zero backprop gradients
        optimizer.zero_grad()
        # Get output from model
        output = model(X_train_tensor)
        # Calc loss and backprop derivatives
        print(output)
        print(y_train_tensor.shape)
        # output need to be (502, 13824) reshaped to (502, 13824)
        loss = -mll(output, y_train_tensor)
        loss.backward()
        iterator.set_postfix(loss=loss.item())
        optimizer.step()

In [16]:
%time
train()

CPU times: total: 0 ns
Wall time: 0 ns


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  iterator = tqdm.tqdm_notebook(range(training_iterations), desc="Epoch")


Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

MultitaskMultivariateNormal(mean shape: torch.Size([502, 13824]))
torch.Size([502, 13824])


RuntimeError: Shape mismatch: objects cannot be broadcast to a single shape

In [None]:
# make test data into tensor
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
X_test_tensor = X_test_tensor.cuda()

In [None]:
# TODO: deal with the climate data predictions differences
model.eval()
likelihood.eval()
with torch.no_grad(), gpytorch.settings.use_toeplitz(False), gpytorch.settings.fast_pred_var():
    preds = model(X_test_tensor)


In [None]:
# get y_train dims
y_train_tensor.shape[-1]

13824