# MILES-GUESS Regression Example Notebook (PyTorch)

John Schreck, David John Gagne, Charlie Becker, Gabrielle Gantos, Dhamma Kimpara, Thomas Martin

In [1]:
import os
import tqdm 
import yaml
import numpy as np
import pandas as pd
#import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import GroupShuffleSplit
from sklearn.preprocessing import MinMaxScaler, RobustScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import GroupShuffleSplit
from sklearn.preprocessing import RobustScaler, MinMaxScaler

from mlguess.torch.models import DNN
from collections import defaultdict
from torch.utils.data import TensorDataset, DataLoader

## Config File

#### Load the config file

In [2]:
config = "../config/evidential_regression_torch.yml"

with open(config) as cf:
    conf = yaml.load(cf, Loader=yaml.FullLoader)

## Data

#### Load Surface Layer data from the repo

In [3]:
data = pd.read_csv("../data/sample_cabauw_surface_layer.csv")
data["day"] = data["Time"].apply(lambda x: str(x).split(" ")[0])
data["year"] = data["Time"].apply(lambda x: str(x).split("-")[0])

#### Train-Valid-Test Splits

This is a two-step process:
1. Split all of the data on the day column between train (90%) and test (10%). The test data will be consisten accross all trained models and all data and model ensembles.
2. Split the 90% training data from Step 1 into training and validation.

In [4]:
# TODO: Why do we need two seeds?
data_seed = 0
flat_seed = 1000

In [5]:
# Need the same test_data for all trained models (data and model ensembles)
gsp = GroupShuffleSplit(n_splits=1, random_state=flat_seed, train_size=0.9)
splits = list(gsp.split(data, groups=data["year"]))
train_index, test_index = splits[0]
train_data, test_data = data.iloc[train_index].copy(), data.iloc[test_index].copy() 

# Make N train-valid splits using day as grouping variable
gsp = GroupShuffleSplit(n_splits=1,  random_state=flat_seed, train_size=0.885)
splits = list(gsp.split(train_data, groups=train_data["year"]))
train_index, valid_index = splits[data_seed]
train_data, valid_data = train_data.iloc[train_index].copy(), train_data.iloc[valid_index].copy()

#### Data Scaling

In [6]:
input_cols = conf["data"]["input_cols"]
#TODO: Should we include the other two output variables as potential options?
output_cols = ["friction_velocity:surface:m_s-1"]

In [7]:
x_scaler, y_scaler = RobustScaler(), MinMaxScaler((0, 1))
x_train = x_scaler.fit_transform(train_data[input_cols])
x_valid = x_scaler.transform(valid_data[input_cols])
x_test = x_scaler.transform(test_data[input_cols])

y_train = y_scaler.fit_transform(train_data[output_cols])
y_valid = y_scaler.transform(valid_data[output_cols])
y_test = y_scaler.transform(test_data[output_cols])

In [8]:
X = torch.FloatTensor(x_train)
y = torch.FloatTensor(y_train)

batch_size = 128

# Create dataset and dataloader
dataset = TensorDataset(X, y)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

### 1. Deterministic multi-layer perceptron (MLP) to predict some quantity

#### Train the model

In [9]:
conf["model"]["lng"] = False
device = "cuda"

In [10]:
model = DNN(**conf["model"]).to(device)

In [11]:
model

DNN(
  (fcn): Sequential(
    (0): Linear(in_features=4, out_features=1057, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Dropout(p=0.263, inplace=False)
    (3): Linear(in_features=1057, out_features=1057, bias=True)
    (4): LeakyReLU(negative_slope=0.01)
    (5): Dropout(p=0.263, inplace=False)
    (6): Linear(in_features=1057, out_features=1057, bias=True)
    (7): LeakyReLU(negative_slope=0.01)
    (8): Dropout(p=0.263, inplace=False)
    (9): Linear(in_features=1057, out_features=1057, bias=True)
    (10): LeakyReLU(negative_slope=0.01)
    (11): Dropout(p=0.263, inplace=False)
    (12): Linear(in_features=1057, out_features=1057, bias=True)
    (13): LeakyReLU(negative_slope=0.01)
    (14): Dropout(p=0.263, inplace=False)
    (15): Linear(in_features=1057, out_features=1, bias=True)
  )
)

In [12]:
results_dict = defaultdict(list)

criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

model.train()
for i, (x, y) in enumerate(dataloader):
    x = x.to(device)
    y_pred = model(x)
    y = y.to(device=device, dtype=x.dtype)
    loss = criterion(y_pred, y.to(x.dtype)).mean()

    # Backward pass and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(loss)

tensor(0.0258, device='cuda:0', grad_fn=<MeanBackward0>)


### Now lets use the evidential regressor

In [13]:
conf["model"]["lng"] = True

In [14]:
model = DNN(**conf["model"]).to(device)

In [15]:
model

DNN(
  (fcn): Sequential(
    (0): Linear(in_features=4, out_features=1057, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Dropout(p=0.263, inplace=False)
    (3): Linear(in_features=1057, out_features=1057, bias=True)
    (4): LeakyReLU(negative_slope=0.01)
    (5): Dropout(p=0.263, inplace=False)
    (6): Linear(in_features=1057, out_features=1057, bias=True)
    (7): LeakyReLU(negative_slope=0.01)
    (8): Dropout(p=0.263, inplace=False)
    (9): Linear(in_features=1057, out_features=1057, bias=True)
    (10): LeakyReLU(negative_slope=0.01)
    (11): Dropout(p=0.263, inplace=False)
    (12): Linear(in_features=1057, out_features=1057, bias=True)
    (13): LeakyReLU(negative_slope=0.01)
    (14): Dropout(p=0.263, inplace=False)
    (15): LinearNormalGamma(
      (linear): Linear(in_features=1057, out_features=4, bias=True)
    )
  )
)

### Add the training dataset variance to the model class to enable uncertainty calculations after training

In [16]:
model.training_var = [np.var(y_train)] # list of length 1 for 1 task 

In [17]:
from mlguess.torch.regression_losses import LipschitzMSELoss

In [18]:
results_dict = defaultdict(list)

criterion = LipschitzMSELoss(**conf["train_loss"])
optimizer = optim.Adam(model.parameters(), lr=0.001)

model.train()
for i, (x, y) in enumerate(dataloader):
    x = x.to(device)
    y_pred = model(x)
    gamma, nu, alpha, beta = y_pred
    y = y.to(device=device, dtype=x.dtype)
    loss = criterion(gamma, nu, alpha, beta, y.to(x.dtype))

    # Predict uncertainties
    y_pred = (_.cpu().detach() for _ in y_pred)
    mu, ale, epi, total = model.predict_uncertainty(y_pred, y_scaler=y_scaler)
    loss = loss.mean()

    # Backward pass and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(loss)

tensor(-1.8424, device='cuda:0', grad_fn=<MeanBackward0>)


### Questions? Email John Schreck (schreck@ucar.edu)