In [7]:
from gp_helpers_from_reproduction import *
import numpy as np
import xarray as xr

In [8]:
files = ["ssp126", "ssp585", "historical", "hist-GHG"]
# Create training and testing arrays
X_train, pca_solvers = makeXTrain_PCASolvers(files)
y_train_tas = makeYTrain(files)['tas'].values.reshape(-1, 96 * 144)

X_test = makeXTest('ssp245', pca_solvers)
Y_test = xr.open_dataset(data_path + 'outputs_ssp245.nc').compute()
tas_truth = Y_test["tas"].mean('member')

In [9]:
# Removing NaNs 
nan_train_mask, nan_test_mask = X_train.isna().any(axis=1).values, X_test.isna().any(axis=1).values
X_train, X_test = X_train.dropna(axis=0, how='any'), X_test.dropna(axis=0, how='any')
y_train_tas, tas_truth = y_train_tas[~nan_train_mask], tas_truth[~nan_test_mask]

In [10]:
# Standardizing data for data that is not EOFs/PCA
X_train_CO2_mean, X_test_CO2_mean = X_train['CO2'].mean(), X_test['CO2'].mean()
X_train_CO2_std, X_test_CO2_std = X_train['CO2'].std(), X_test['CO2'].std()

X_train_CH4_mean, X_test_CH4_mean = X_train['CH4'].mean(), X_test['CH4'].mean()
X_train_CH4_std, X_test_CH4_std = X_train['CH4'].std(), X_test['CH4'].std()

X_train['CO2'] = (X_train['CO2'] - X_train_CO2_mean) / X_train_CO2_std
X_test['CO2'] = (X_test['CO2'] - X_test_CO2_mean) / X_test_CO2_std

X_train['CH4'] = (X_train['CH4'] - X_train_CH4_mean) / X_train_CH4_std
X_test['CH4'] = (X_test['CH4'] - X_test_CH4_mean) / X_test_CH4_std

# Standardizing y_train
y_train_tas_mean, y_train_tas_std = y_train_tas.mean(), y_train_tas.std()
y_train_tas = (y_train_tas - y_train_tas_mean) / y_train_tas_std

## Model

In [24]:
import torch
import torch.nn as nn
import gpytorch
from TD_classes import *
import tqdm
from gpytorch.models import ApproximateGP
from gpytorch.variational import VariationalStrategy, CholeskyVariationalDistribution
from gpytorch.distributions import MultivariateNormal

print(torch.cuda.is_available())

True


In [25]:
# Make X_train and y_train into tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_tas, dtype=torch.float32)

# Add to cuda
X_train_tensor, y_train_tensor = X_train_tensor.cuda(), y_train_tensor.cuda()

In [26]:
data_dim = X_train_tensor.shape[1]

class LargeFeatureExtractor(nn.Sequential):
    def __init__(self):
        super(LargeFeatureExtractor, self).__init__()
        self.add_module('linear1', torch.nn.Linear(data_dim, 1000))
        self.add_module('relu1', torch.nn.ReLU())
        self.add_module('linear2', torch.nn.Linear(1000, 500))
        self.add_module('relu2', torch.nn.ReLU())
        self.add_module('linear3', torch.nn.Linear(500, 50))
        self.add_module('relu3', torch.nn.ReLU())
        self.add_module('linear4', torch.nn.Linear(50, 12))
        self.add_module('relu7', torch.nn.ReLU())

feature_extractor = LargeFeatureExtractor()

In [27]:
# Sparse Gaussian Process Model
class SparseDKLGPModel(ApproximateGP):
    def __init__(self, feature_extractor, num_inducing_points, num_tasks):
        inducing_points = torch.randn(num_inducing_points, 12)  # feature_dims is the output dimension of your NN
        variational_distribution = CholeskyVariationalDistribution(num_inducing_points)
        variational_strategy = VariationalStrategy(self, inducing_points, variational_distribution, learn_inducing_locations=True)
        
        super(SparseDKLGPModel, self).__init__(variational_strategy)
        self.feature_extractor = feature_extractor
        self.mean_module = gpytorch.means.ConstantMean(batch_shape=torch.Size([num_tasks]))
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel(batch_shape=torch.Size([num_tasks])),
            batch_shape=torch.Size([num_tasks])
        )

    def forward(self, x):
        projected_x = self.feature_extractor(x)
        mean_x = self.mean_module(projected_x)
        covar_x = self.covar_module(projected_x)
        return MultivariateNormal(mean_x, covar_x)


In [28]:
model = SparseDKLGPModel(feature_extractor, num_inducing_points=50, num_tasks=13824)
likelihood = gpytorch.likelihoods.GaussianLikelihood()

if torch.cuda.is_available():
    model = model.cuda()
    likelihood = likelihood.cuda()

In [29]:
# TODO: Update train function currently is just from example (boilerplate)
training_iterations = 2 

# Find optimal model hyperparameters
model.train()
likelihood.train()

# Use the adam optimizer
optimizer = torch.optim.Adam([
    {'params': model.feature_extractor.parameters()},
    {'params': model.covar_module.parameters()},
    {'params': model.mean_module.parameters()},
], lr=0.01)

# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) # loss might not workfor the climate data

def train():
    iterator = tqdm.tqdm_notebook(range(training_iterations), desc="Epoch")
    for i in iterator:
        # Zero backprop gradients
        optimizer.zero_grad()
        # Get output from model
        output = model(X_train_tensor)
        # Calc loss and backprop derivatives
        print(output)
        print(y_train_tensor.shape)
        loss = -mll(output, y_train_tensor)
        loss.backward()
        iterator.set_postfix(loss=loss.item())
        optimizer.step()

In [30]:
train()

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  iterator = tqdm.tqdm_notebook(range(training_iterations), desc="Epoch")


Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
