In [3]:
import numpy as np
import xarray as xr
import pandas as pd
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
from utils import *
import numpy as np

import warnings
warnings.filterwarnings("ignore")

## Load the data

In [7]:
datapath = '/Users/jack/Library/CloudStorage/GoogleDrive-limjackailjk@gmail.com/My Drive/UCSD/DSC/DSC180/ClimateBench - Plus/ClimateBench-Plus/DKL Gaussian Process/data/processed_data/'
# datapath = 'G://My Drive//UCSD//DSC//DSC180//ClimateBench - Plus//ClimateBench-Plus//DKL Gaussian Process//data//processed_data//'
simulations = ['ssp126', 'ssp370', 'ssp585', 'hist-GHG', 'hist-aer']

In [8]:
X_train = []
Y_train = []

for i, simu in enumerate(simulations):
    input_name = 'inputs_' + simu + '.nc'
    output_name = 'outputs_' + simu + '.nc'
    # Just load hist data in these cases 'hist-GHG' and 'hist-aer'
    if 'hist' in simu:
        # load inputs 
        input_xr = xr.open_dataset(datapath + input_name)
            
        # load outputs                                                             
        output_xr = xr.open_dataset(datapath + output_name).mean(dim='member')
        output_xr = output_xr.assign({"pr": output_xr.pr * 86400, "pr90": output_xr.pr90 * 86400})\
                             .rename({'lon':'longitude', 'lat': 'latitude'})\
                             .transpose('time','latitude', 'longitude').drop(['quantile'])
    
    # Concatenate with historical data in the case of scenario 'ssp126', 'ssp370' and 'ssp585'
    else:
        # load inputs 
        input_xr = xr.open_mfdataset([datapath + 'inputs_historical.nc', datapath + input_name]).compute()
            
        # load outputs                                                             
        output_xr = xr.concat([xr.open_dataset(datapath + 'outputs_historical.nc').mean(dim='member'),
                               xr.open_dataset(datapath + output_name).mean(dim='member')],
                               dim='time').compute()
        output_xr = output_xr.assign({"pr": output_xr.pr * 86400,"pr90": output_xr.pr90 * 86400})\
                             .rename({'lon':'longitude', 'lat': 'latitude'})\
                             .transpose('time','latitude', 'longitude').drop(['quantile'])

    print(input_xr.dims, simu)

    # Append to list 
    X_train.append(input_xr)
    Y_train.append(output_xr)



## Normalize the data


In [9]:
# Compute mean/std of each variable for the whole dataset
meanstd_inputs = {}
len_historical = 165

for var in ['CO2', 'CH4', 'SO2', 'BC']:
    # To not take the historical data into account several time we have to slice the scenario datasets
    # and only keep the historical data once (in the first ssp index 0 in the simus list)
    array = np.concatenate([X_train[i][var].data for i in [0, 3, 4]] + 
                           [X_train[i][var].sel(time=slice(len_historical, None)).data for i in range(1, 3)])
    print((array.mean(), array.std()))
    meanstd_inputs[var] = (array.mean(), array.std())

(1074.172303244536, 1755.690699230666)
(0.1927369743762821, 0.18457590641432994)
(2.5623359997066755e-12, 2.250114566783271e-11)
(1.4947905009818064e-13, 1.0313342554838387e-12)


In [10]:
# normalize input data 
X_train_norm = [] 
for i, train_xr in enumerate(X_train): 
    for var in ['CO2', 'CH4', 'SO2', 'BC']: 
        var_dims = train_xr[var].dims
        train_xr=train_xr.assign({var: (var_dims, normalize(train_xr[var].data, var, meanstd_inputs))}) 
    X_train_norm.append(train_xr)

In [18]:
var_to_predict =  'tas'
# skip_historical set to (i < 2) because of the order of the scenario and historical runs in the X_train and Y_train lists.
# In details: ssp126 0, ssp370 1 = skip historical part of the data, ssp585 2, hist-GHG 3 and hist-aer 4 = keep the whole sequence
X_train_all = np.concatenate([input_for_training(X_train_norm[i], skip_historical=(i<2), len_historical=len_historical) for i in range(len(simulations))], axis = 0)
Y_train_all = np.concatenate([output_for_training(Y_train[i], var_to_predict, skip_historical=(i<2), len_historical=len_historical) for i in range(len(simulations))], axis=0)
# add a dimension to the output data
Y_train_all = Y_train_all[..., np.newaxis]


X_train_all = X_train_all.reshape(726, 4, 96, 144)
Y_train_all = Y_train_all.reshape(726, 1, 96, 144)
print(X_train_all.shape)
print(Y_train_all.shape)

(726, 4, 96, 144)
(726, 1, 96, 144)


In [23]:
def context_target_split(x, y, num_context, num_extra_target):
    """Given inputs x and their value y, return random subsets of points for
    context and target. Note that following conventions from "Empirical
    Evaluation of Neural Process Objectives" the context points are chosen as a
    subset of the target points.

    Parameters
    ----------
    x : torch.Tensor
        Shape (batch_size, num_points, x_dim)

    y : torch.Tensor
        Shape (batch_size, num_points, y_dim)

    num_context : int
        Number of context points.

    num_extra_target : int
        Number of additional target points.
    """
    batch_size, _, lat, lng = x.shape
    x = x.reshape(batch_size, 4, lat*lng)
    y = y.reshape(batch_size, 1, lat*lng)
    num_points = x.shape[2]
    # Sample locations of context and target points
    locations = np.random.choice(num_points,
                                 size=num_context + num_extra_target,
                                 replace=False)
    x_context = x[:, :, locations[:num_context]]
    y_context = y[:, :, locations[:num_context]]
    x_target = x[:, :, locations]
    y_target = y[:, :, locations]

    x_context = x_context.reshape(batch_size, 4, lat, lng)
    y_context = y_context.reshape(batch_size, 1, lat, lng)
    x_target = x_target.reshape(batch_size, 4, lat, lng)
    y_target = y_target.reshape(batch_size, 1, lat, lng)
    return x_context, y_context, x_target, y_target

In [24]:
# Context/Target set split
X_context, Y_context, X_target, Y_target = context_target_split(X_train_all, Y_train_all, num_context=3, num_extra_target=3)

ValueError: cannot reshape array of size 8712 into shape (726,4,96,144)

## Model

Neural Process Arch
- Encoder --> Takes context points x_i, y_i to r_i (deterministic path) and s_i (latent path)
- **r_c, s_c** from context points representations aggregated by mean
- Decoder --> Takes r_c, s_c, target points x_i to predict y_i

Pytorch implementation --> https://github.com/EmilienDupont/neural-processes/blob/master/neural_process.py

In [12]:
import torch
from torch import nn
from torch.nn import functional as F
from model import TimeDistributed
from np import Encoder
from torch.utils.data import DataLoader

### Encoder
- Input: x_i, y_i (context points)
    - x_i: (726, 10, 96, 144, 4)
    - y_i: (726, 1, 96, 144)
- Output: r_i, s_i

Plan to use LSTM/CNN network to encode the context points into the representations

#### Changes after meeting
- Reduce shape to (726, 96, 144, 4) and (726, 96, 144, 1) respectively since time covariance should not matter too much in this case,
- Remove LSTM layer, since time slider shouldn't matter and would make the problem easier. 
- If doesn't work, we can flatten the latlng layer too

In [31]:
class Encoder(nn.Module):
    """Maps an (x_i, y_i) pair to a representation r_i.

    Parameters
    ----------
    r_dim : int
        Dimension of output representation r.
    """
    def __init__(self, r_dim):
        super(Encoder, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=5, out_channels=16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        self.fc1 = nn.Linear(64, r_dim)

    def forward(self, context_points):
        """
        context_points : torch.Tensor
            Shape (batch_size, 4 (aerosols), 96 (lat), 144 (lon))
        """
        # context_points shape: [batch_size, channels, height, width] -> [batch_size, 5, 96, 144]
        context_points = torch.relu(self.conv1(context_points))
        context_points = self.pool(context_points)
        context_points = torch.relu(self.conv2(context_points))
        context_points = self.pool(context_points)
        context_points = torch.relu(self.conv3(context_points))
        context_points = self.global_pool(context_points)  # Reduce to [batch_size, 64, 1, 1]
        context_points = torch.flatten(context_points, 1)  # Flatten to [batch_size, 64]
        context_points = self.fc1(context_points)  # Map to [batch_size, r_dim]
        return context_points

In [38]:
def concat(x, y):
        """Concatenate x and y along channel dimension.

        Parameters
        ----------
        x : torch.Tensor
            Shape (batch_size, c, m, n).
        y : torch.Tensor
            Shape (batch_size, d, m, n).

        Returns
        -------
        torch.Tensor
            Shape (batch_size, c + d, m, n).
        """
        return torch.cat([x, y], dim=1)

In [28]:
X_train_all = torch.tensor(X_train_all).float()
Y_train_all = torch.tensor(Y_train_all).float()

x_context_points = X_train_all.reshape(726, 4, 96, 144)
y_context_points = Y_train_all.reshape(726, 1, 96, 144)
print(x_context_points.shape, y_context_points.shape)

encoder = Encoder(r_dim=128)

torch.Size([726, 4, 96, 144]) torch.Size([726, 1, 96, 144])


## MuSigmaEncoder

- Input: r_i, s_i
    - r_i: Representation of context points (726, r_dim)
    - s_i: Latent representation of context points (726, s_dim)

- Output: mu, sigma
    - mu: Mean of the distribution (726, s_i)
    - sigma: Standard deviation of the distribution (726, s_i)

In [29]:
class MuSigmaEncoder(nn.Module):
    """
    Maps a representation r to mu and sigma which will define the normal
    distribution from which we sample the latent variable z.

    Parameters
    ----------
    r_dim : int
        Dimension of output representation r.

    z_dim : int
        Dimension of latent variable z.
    """
    def __init__(self, r_dim, z_dim):
        super(MuSigmaEncoder, self).__init__()

        self.r_dim = r_dim
        self.z_dim = z_dim

        self.r_to_hidden = nn.Linear(r_dim, r_dim)
        self.hidden_to_mu = nn.Linear(r_dim, z_dim)
        self.hidden_to_sigma = nn.Linear(r_dim, z_dim)

    def forward(self, r):
        """
        r : torch.Tensor
            Shape (batch_size, r_dim)
        """
        hidden = torch.relu(self.r_to_hidden(r))
        mu = self.hidden_to_mu(hidden)
        # Define sigma following convention in "Empirical Evaluation of Neural
        # Process Objectives" and "Attentive Neural Processes"
        sigma = 0.1 + 0.9 * torch.sigmoid(self.hidden_to_sigma(hidden))
        return mu, sigma
    

mu_sigma_encoder = MuSigmaEncoder(r_dim=128, z_dim=64)

In [40]:
# Define the data loader
data = DataLoader(list(zip(x_context_points, y_context_points)), batch_size=32, shuffle=True) 
# does batches make sense here? Cause it is training by time, so technically the order/sequence of the data matters, which would mean I add a dummy batch dimension to the data, to fit into the model for now?
for x, y in data:
    print(x.shape, y.shape)
    context = torch.cat([x, y], dim=1) # [batch_size, 5, 96, 144] -> [batch_size, 4, 96, 144] + [batch_size, 1, 96, 144], concatenating along the input channels with the y output channel, everything else can stay the same as it is just the lat lng and number of year in batch
    print(context.shape)
    r_i = encoder(context)
    print(r_i.shape)
    r_i = torch.reshape(r_i, (32, 1, 128)) # [batch_size, num_points, r_dim] -> [batch_size, 1, r_dim] -> No use in choosing channel as not an image (may change to choose a specific lat lng maybe? Doesn't really make sense in my head but maybe it does in the model's)
    r =  torch.mean(r_i, dim=1)
    print(r.shape)
    mu, sigma = mu_sigma_encoder(r) # If the before part makes sense, this should be fine.
    print(mu.shape, sigma.shape)
    break


torch.Size([32, 4, 96, 144]) torch.Size([32, 1, 96, 144])
torch.Size([32, 5, 96, 144])
torch.Size([32, 128])
torch.Size([32, 128])
torch.Size([32, 64]) torch.Size([32, 64])


In [41]:
x.shape

torch.Size([32, 4, 96, 144])

In [42]:
mu, sigma = mu_sigma_encoder(r_i)

In [43]:
mu.shape

torch.Size([32, 1, 64])

## Decoder



In [44]:
class Decoder(nn.Module):
    """
    Maps target input x_target and samples z (encoding information about the
    context points) to predictions y_target.

    Parameters
    ----------
    x_dim : int
        Dimension of x values.

    z_dim : int
        Dimension of latent variable z.

    h_dim : int
        Dimension of hidden layer.

    y_dim : int
        Dimension of y values.
    """
    def __init__(self, x_dim, z_dim, h_dim, y_dim):
        super(Decoder, self).__init__()

        self.x_dim = x_dim
        self.z_dim = z_dim
        self.h_dim = h_dim
        self.y_dim = y_dim

        layers = [nn.Linear(x_dim + z_dim, h_dim),
                  nn.ReLU(inplace=True),
                  nn.Linear(h_dim, h_dim),
                  nn.ReLU(inplace=True),
                  nn.Linear(h_dim, h_dim),
                  nn.ReLU(inplace=True)]

        self.xz_to_hidden = nn.Sequential(*layers)
        self.hidden_to_mu = nn.Linear(h_dim, y_dim)
        self.hidden_to_sigma = nn.Linear(h_dim, y_dim)

    def forward(self, x, z):
        """
        x : torch.Tensor
            Shape (batch_size, num_points, x_dim)

        z : torch.Tensor
            Shape (batch_size, z_dim)

        Returns
        -------
        Returns mu and sigma for output distribution. Both have shape
        (batch_size, num_points, y_dim).
        """
        batch_size, num_points, _ = x.size()
        # Repeat z, so it can be concatenated with every x. This changes shape
        # from (batch_size, z_dim) to (batch_size, num_points, z_dim)
        z = z.unsqueeze(1).repeat(1, num_points, 1)
        # Flatten x and z to fit with linear layer
        x_flat = x.view(batch_size * num_points, self.x_dim)
        z_flat = z.view(batch_size * num_points, self.z_dim)
        # Input is concatenation of z with every row of x
        input_pairs = torch.cat((x_flat, z_flat), dim=1)
        hidden = self.xz_to_hidden(input_pairs)
        mu = self.hidden_to_mu(hidden)
        pre_sigma = self.hidden_to_sigma(hidden)
        # Reshape output into expected shape
        mu = mu.view(batch_size, num_points, self.y_dim)
        pre_sigma = pre_sigma.view(batch_size, num_points, self.y_dim)
        # Define sigma following convention in "Empirical Evaluation of Neural
        # Process Objectives" and "Attentive Neural Processes"
        sigma = 0.1 + 0.9 * F.softplus(pre_sigma)
        return mu, sigma

In [48]:
decoder = Decoder(x_dim=1, z_dim=64, h_dim=128, y_dim=1)

In [49]:
from torch.distributions import Normal

q_target = Normal(mu, sigma)
z_sample = q_target.rsample()
y_pred_mu, y_pred_sigma = decoder(x, z_sample)
p_y_pred = Normal(y_pred_mu, y_pred_sigma)

ValueError: too many values to unpack (expected 3)