In [25]:
#import
import math
import wandb
import torch
import torchmetrics

import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import lightning.pytorch as pl
import torch.nn.functional as F

from torchmetrics import MeanAbsoluteError
from sklearn.metrics import mean_squared_error
from lightning.pytorch.loggers import CSVLogger
from torch.utils.data import Dataset, DataLoader, random_split
from lightning.pytorch.loggers import WandbLogger
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import LearningRateMonitor, ModelCheckpoint

In [43]:
wandb.login()

In [20]:
#make screen wide
from IPython.core.display import display, HTML
from IPython import get_ipython as get_ipython
display(HTML(
    '<style>'
        '#notebook { padding-top:0px !important;margin-left:-30px !important; } ' 
        '.container { width:100% !important; } '
        '.end_space { min-height:0px !important; } '
    '</style>'
))

In [None]:
# wrongly displayed params in wandb UI
# set up dynamic LR
# how to run the same wandb experiment from checkpoint
# val_split is it static ??? Where to split data (is it one split in validation???)
# write logs after each epoch -  probably just one more self.log(...)

# FCNN

In [49]:
#params
logger_path = './wandb_local_logs'
data_path = '../data/clasdb_pi_plus_n.txt'

project_name = "MSU_interpol"

scale_data = False
test_size = 0.1
batch_size = 32
net_architecture = [5, 60, 80, 100, 120, 140, 240, 340, 440, 640, 2000, 1040, 640, 340, 240, 140, 100, 80, 60, 20, 1]
lr = 0.00001
activation_function = nn.ReLU()
loss_func = RMSELoss()
optim_func = torch.optim.Adam
max_epochs = 2000
min_delta = 0.0001
patience = 20

hyperparams_dict = {
    'scale_data': scale_data,
    'test_size': test_size,
    'batch_size': batch_size,
    'lr': lr,
    'net_architecture': net_architecture,
    'activation_function': activation_function,
    'loss_func': loss_func,
    'optim_func': optim_func,
    'max_epochs': max_epochs,
    'min_delta': min_delta,
    'patience': patience
}

In [50]:
class InterpolDataSet(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels
        self.len = len(labels)

    def __getitem__(self, index):
        feature = self.features[index]
        label = self.labels[index]
        return feature, label

    def __len__(self):
        return self.len

class InterpolDataModule(pl.LightningDataModule):
    def __init__(self):
        super().__init__()
        self.df = None
    
    def setup(self, stage):
        # data reading and preprocessing
        df = pd.read_csv(data_path, delimiter='\t', header=None)
        df.columns = ['Ebeam', 'W', 'Q2', 'cos_theta', 'phi', 'dsigma_dOmega', 'error', 'id']

        df.loc[8314:65671, 'Ebeam'] = 5.754 # peculiarity of this dataset.
        df['phi'] = df.phi.apply(lambda x: math.radians(x))
        df = df.drop('id', axis=1)
        df = df.iloc[df[['Ebeam', 'W', 'Q2', 'cos_theta', 'phi']].drop_duplicates().index]
        self.df = df
        
        #train test split
        feature_data = df[['Ebeam', 'W', 'Q2', 'cos_theta', 'phi']]
        label_data = df['dsigma_dOmega']
        
        if scale_data:
            scaler_feature = StandardScaler()
            scaler_target = StandardScaler()
            feature_data = scaler_feature.fit_transform(feature_data)
            label_data = scaler_target.fit_transform(label_data.values.reshape(-1,1))
        else:
            pass
        
        train_feature_data, val_feature_data, train_label_data, val_label_data = train_test_split(feature_data, label_data, 
                                                                                                  test_size=test_size, random_state=1438)
        
        
        self.train_dataset = InterpolDataSet(torch.tensor(train_feature_data.values, dtype=torch.float32), 
                                             torch.tensor(train_label_data.values, dtype=torch.float32))
        
        self.val_dataset = InterpolDataSet(torch.tensor(val_feature_data.values, dtype=torch.float32), 
                                            torch.tensor(val_label_data.values, dtype=torch.float32))
    def train_dataloader(self):
        return DataLoader(dataset = self.train_dataset, batch_size = batch_size, shuffle = False, num_workers=0)

    def val_dataloader(self):
        return DataLoader(dataset = self.val_dataset, batch_size = batch_size, shuffle = False, num_workers=0)
    
class RMSELoss(torch.nn.Module):
    def __init__(self):
        super(RMSELoss,self).__init__()

    def forward(self,x,y):
        criterion = nn.MSELoss()
        loss = torch.sqrt(criterion(x, y))
        return loss

In [51]:
class InterpolRegressor(pl.LightningModule):
    def __init__(self, hyperparams):
        super(InterpolRegressor, self).__init__()
        
        self.hyperparams = hyperparams
        self.save_hyperparameters(self.hyperparams)
        
        self.mae = MeanAbsoluteError()
        self.loss_func = self.hyperparams.get('loss_func')
        
        self.optim = self.hyperparams.get('optim_func')
        
        self.net_architecture = self.hyperparams.get('net_architecture')
        self.activation_function = self.hyperparams.get('activation_function')
        
        self.net = nn.Sequential()              
        for i in range(1,len(self.net_architecture)):
            self.net.append(nn.Linear(self.net_architecture[i-1], self.net_architecture[i]))
            if i!=len(self.net_architecture)-1:
                self.net.append(self.activation_function)
            else:
                pass

    def forward(self, x):
        return self.net(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        
        loss = self.loss_func
        self.train_loss = loss(y_hat.reshape(-1), y)
        
        self.mae_train = self.mae(y_hat.reshape(-1), y)
        
        self.log('train_loss', self.train_loss, batch_size=self.hyperparams['batch_size'], 
                 on_step=False, on_epoch=True, prog_bar=True, sync_dist=True, logger=True)
        self.log('train_mae', self.mae_train, batch_size=self.hyperparams['batch_size'], 
                 on_step=False, on_epoch=True, prog_bar=True, sync_dist=True, logger=True)
        return self.train_loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        
        loss = self.loss_func
        self.val_loss = loss(y_hat.reshape(-1), y)
        
        self.mae_val = self.mae(y_hat.reshape(-1), y)
        
        self.log('val_loss', self.val_loss, batch_size=self.hyperparams['batch_size'], 
                 on_step=False, on_epoch=True, prog_bar=True, sync_dist=True, logger=True)
        self.log('val_mae', self.mae_val, batch_size=self.hyperparams['batch_size'], 
                 on_step=False, on_epoch=True, prog_bar=True, sync_dist=True, logger=True)
        return self.val_loss

    #     def on_train_epoch_end(self):
    #         self.log('train_loss', self.train_loss, on_step=False, on_epoch=True, prog_bar=False)

    #     def on_validation_epoch_end(self):
    #         self.log('step', self.trainer.current_epoch)
    #         self.log('val_loss', self.val_loss, on_step=False, on_epoch=True, prog_bar=False)
    
    def configure_optimizers(self):
        optimizer = self.optim(self.parameters(), lr=self.hyperparams.get('lr'))
        return optimizer

In [52]:
data_module = InterpolDataModule()

wandb_logger = WandbLogger(project=project_name, 
                           save_dir=logger_path,)
exp_name = wandb_logger.experiment.name

model = InterpolRegressor(hyperparams=hyperparams_dict)

early_stop_callback = EarlyStopping(monitor="val_loss", mode="min", min_delta=min_delta, patience=patience, verbose=True)

checkpoint_l1 = ModelCheckpoint(
    save_top_k=3,
    monitor="val_loss",
    mode="min",
    dirpath=f"{logger_path}/{project_name}/{exp_name}/checkpoints",
    filename="{exp_name}{val_loss:.5f}-{epoch:02d}",
)

trainer = pl.Trainer(max_epochs=max_epochs,
                     accelerator='cpu',
#                      callbacks=[early_stop_callback, checkpoint_l1],
                     callbacks=[checkpoint_l1],
                     logger=wandb_logger,
                     enable_progress_bar=False)
trainer.fit(model, data_module)

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011161336433401125, max=1.0…

/Users/andrey.golda/.pyenv/versions/3.9.0/lib/python3.9/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'activation_function' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['activation_function'])`.
/Users/andrey.golda/.pyenv/versions/3.9.0/lib/python3.9/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'loss_func' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss_func'])`.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/Users/andrey.golda/.pyenv/versions/3.9.0/lib/python3.9/site-packages/lightning/pytorch/utilities/parsing.py:43: attribute 'loss_func' removed from hparams because it cannot be pickled

  | Name                | Type              | Par

In [53]:
wandb.finish()

VBox(children=(Label(value='0.012 MB of 0.024 MB uploaded (0.008 MB deduped)\r'), FloatProgress(value=0.498611…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,█▇▇▆▆▆▅▄▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_mae,█▇▇▆▆▅▅▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_loss,█▇▇▆▆▅▅▄▃▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,█▇▇▆▅▅▄▄▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,327.0
train_loss,0.33249
train_mae,0.16894
trainer/global_step,861983.0
val_loss,0.34324
val_mae,0.17369


In [54]:
preds = model.forward(data_module.train_dataset.features)
print('', mean_squared_error(data_module.train_dataset.labels, preds.detach().numpy()))

 0.16783376


In [56]:
preds = model.forward(data_module.val_dataset.features)
mean_squared_error(data_module.val_dataset.labels, preds.detach().numpy())

0.16841991

In [34]:
checkpoint = torch.load('./wandb_local_logs/MSU_interpol/still-frost-4/val_loss=0.37239-epoch=15.ckpt', map_location=lambda storage, loc: storage)
print(checkpoint["hyper_parameters"])

{'scale_data': False, 'test_size': 0.1, 'batch_size': 16, 'lr': 5e-05, 'net_architecture': [5, 60, 80, 100, 120, 140, 140, 140, 240, 340, 440, 1040, 640, 340, 240, 140, 100, 80, 60, 20, 1], 'activation_function': ReLU(), 'optim_func': 'ADAM', 'max_epochs': 2000, 'min_delta': 0.0001, 'patience': 20}


In [42]:
MyLightningModule.load_from_checkpoint("/path/to/checkpoint.ckpt")