In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay, accuracy_score, make_scorer
from torch.utils.data import DataLoader, TensorDataset
import scipy.stats as stats
from torchsummary import summary
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.base import BaseEstimator
from torchmetrics import MeanSquaredError, R2Score

## Load Data

In [2]:
df = pd.read_csv('./leap-atmospheric-physics-ai-climsim/train.csv', sep=',', nrows=20000)

## Feature Selection

In [3]:
input_df = df.iloc[:, 1:557]
output_df = df.iloc[:, 557:]

In [4]:
# drop low variance with no correlation columns
no_corr_or_var = [135, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 
                  548, 549, 550, 551, 552, 553, 554, 555, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 
                  481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495]
input_df_clean = input_df.drop(input_df.columns[no_corr_or_var], axis=1)

## Data Sets

In [5]:
# split into train, validation, and test sets
X_train, X_val_test, y_train, y_val_test = train_test_split(input_df_clean, output_df, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=0.5, random_state=42)

# convert to numpy arrays
X_train_numpy = X_train.to_numpy()
X_val_numpy = X_val.to_numpy()
X_test_numpy = X_test.to_numpy()
y_train_numpy = y_train.to_numpy()
y_val_numpy = y_val.to_numpy()
y_test_numpy = y_test.to_numpy()

# MinMaxScaler due to normal distribution test failing earlier
scaler = MinMaxScaler()
X_train_numpy = scaler.fit_transform(X_train_numpy)
X_val_numpy = scaler.transform(X_val_numpy)
X_test_numpy = scaler.transform(X_test_numpy)

# convert to tensors
X_train_tensor = torch.tensor(X_train_numpy, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val_numpy, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_numpy, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_numpy, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val_numpy, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_numpy, dtype=torch.float32)

## Hyperparameters

In [6]:
input_size = X_train_tensor.shape[1]
hidden_size = 512
output_size = y_train_tensor.shape[1]
batch_size = 128
num_epochs = 20
learning_rate = 0.001
regularization_param = 0.0001
momentum_param = 0.9
dropout_p = 0.5

scheduler_factor = 0.005
scheduler_patience = 5

## Hyperparameters Optimization

In [7]:
# reworked model to work with GridSearchCV
class NeuralNetwork(BaseEstimator):
    def __init__(self, hidden_size, batch_size, learning_rate, regularization_param, momentum_param, dropout_p):
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.regularization_param = regularization_param
        self.momentum_param = momentum_param
        self.dropout_p = dropout_p

    def fit(self, X, y):
        input_size = X.shape[1]
        output_size = y.shape[1]

        self.model = nn.Sequential(
            nn.Linear(input_size, self.hidden_size),
            nn.BatchNorm1d(self.hidden_size),
            nn.LeakyReLU(),
            nn.Dropout(self.dropout_p),
            nn.Linear(self.hidden_size, self.hidden_size),
            nn.BatchNorm1d(self.hidden_size),
            nn.LeakyReLU(),
            nn.Dropout(self.dropout_p),
            nn.Linear(self.hidden_size, self.hidden_size),
            nn.BatchNorm1d(self.hidden_size),
            nn.LeakyReLU(),
            nn.Dropout(self.dropout_p),
            nn.Linear(self.hidden_size, self.hidden_size),
            nn.BatchNorm1d(self.hidden_size),
            nn.LeakyReLU(),
            nn.Dropout(self.dropout_p),
            nn.Linear(self.hidden_size, output_size)
        )

        self.criterion = nn.MSELoss()
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate, weight_decay=self.regularization_param)

        self.train_dl = DataLoader(TensorDataset(X, y), self.batch_size, shuffle=True)

        for _ in range(10):
            self.model.train()
            for x_batch, y_batch in self.train_dl:
                predictions = self.model(x_batch)
                loss = self.criterion(predictions, y_batch)
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

    def predict(self, X):
        self.model.eval()
        with torch.no_grad():
            predictions = self.model(X)
        return predictions.detach().numpy()

def pytorch_r2_score(y_true, y_pred):
    metric = R2Score(num_outputs=y_true.shape[1])
    for i in range(y_true.shape[1]):
        metric.update(y_pred[:, i], y_true[:, i])
    return metric.compute().item()

r2_scorer = make_scorer(pytorch_r2_score, greater_is_better=True)

search_grid = {
    'hidden_size': [256, 694],
    'batch_size': [64, 128, 256],
    'learning_rate': [0.0001, 0.001, 0.01],
    'regularization_param': [0.0001, 0.001, 0.01],
}

gs = GridSearchCV(NeuralNetwork(hidden_size, batch_size, learning_rate, regularization_param, momentum_param, dropout_p), 
                  param_grid=search_grid, 
                  n_jobs=4, 
                  cv=5, 
                  scoring=r2_scorer, 
                  verbose=3)

gs.fit(X_train_tensor, y_train_tensor)
print(gs.best_score_)
print(gs.best_params_)

Fitting 5 folds for each of 54 candidates, totalling 270 fits
[CV 1/5] END batch_size=64, hidden_size=256, learning_rate=0.0001, regularization_param=0.0001;, score=0.264 total time=   5.1s
[CV 4/5] END batch_size=64, hidden_size=256, learning_rate=0.0001, regularization_param=0.0001;, score=0.268 total time=   5.0s
[CV 2/5] END batch_size=64, hidden_size=256, learning_rate=0.0001, regularization_param=0.0001;, score=0.301 total time=   5.4s
[CV 3/5] END batch_size=64, hidden_size=256, learning_rate=0.0001, regularization_param=0.0001;, score=0.282 total time=   5.4s
[CV 2/5] END batch_size=64, hidden_size=256, learning_rate=0.0001, regularization_param=0.001;, score=0.253 total time=   4.0s
[CV 3/5] END batch_size=64, hidden_size=256, learning_rate=0.0001, regularization_param=0.001;, score=0.250 total time=   4.0s
[CV 5/5] END batch_size=64, hidden_size=256, learning_rate=0.0001, regularization_param=0.0001;, score=0.287 total time=   4.9s
[CV 1/5] END batch_size=64, hidden_size=256,



[CV 4/5] END batch_size=64, hidden_size=694, learning_rate=0.001, regularization_param=0.0001;, score=0.981 total time=  18.8s
[CV 5/5] END batch_size=64, hidden_size=694, learning_rate=0.001, regularization_param=0.0001;, score=0.982 total time=  19.0s
[CV 1/5] END batch_size=64, hidden_size=694, learning_rate=0.001, regularization_param=0.001;, score=0.983 total time=  18.8s
[CV 2/5] END batch_size=64, hidden_size=694, learning_rate=0.001, regularization_param=0.001;, score=0.978 total time=  19.1s
[CV 3/5] END batch_size=64, hidden_size=694, learning_rate=0.001, regularization_param=0.001;, score=0.982 total time=  19.3s
[CV 4/5] END batch_size=64, hidden_size=694, learning_rate=0.001, regularization_param=0.001;, score=0.985 total time=  19.2s
[CV 5/5] END batch_size=64, hidden_size=694, learning_rate=0.001, regularization_param=0.001;, score=0.983 total time=  19.0s
[CV 1/5] END batch_size=64, hidden_size=694, learning_rate=0.001, regularization_param=0.01;, score=0.981 total time