In [16]:
import os
import sys
import math
import wandb
import torch
import logging

import numpy as np
import pandas as pd
import torch.nn as nn
import lightning.pytorch as pl

from torchmetrics import MeanAbsoluteError
from sklearn.metrics import mean_squared_error
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from lightning.pytorch.loggers import WandbLogger
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import ReduceLROnPlateau
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import Callback, LearningRateMonitor, ModelCheckpoint

In [17]:
wandb.login()

True

# FCNN

In [18]:
class RMSELoss(torch.nn.Module):
    def __init__(self):
        super(RMSELoss,self).__init__()

    def forward(self,x,y):
        criterion = nn.MSELoss()
        loss = torch.sqrt(criterion(x, y))
        return loss

In [19]:
#params
project_name = "MSU_interpol"

logger_path = './wandb_local_logs'
data_path = '../data/clasdb_pi_plus_n.txt'

hyperparams_dict = {
    'scale_data': False,
    'test_size': 0.1,
    'batch_size': 32,
    'net_architecture': [5,10,10,10,10,10,10,1],
    'activation_function': nn.ReLU(),
    'loss_func': RMSELoss(),
    'optim_func': torch.optim.Adam,
    'max_epochs': 2000,
    'es_min_delta': 0.0001,
    'es_patience': 20,
    'lr': 0.00001,
    'lr_factor':0.5,
    'lr_patience': 3,
    'lr_cooldown': 1,
}

In [20]:
wandb_logger = WandbLogger(project=project_name,
                           save_dir=logger_path)
exp_name = wandb_logger.experiment.name

logger_full_path = os.path.join(logger_path, project_name, exp_name)

os.makedirs(logger_full_path, exist_ok=True)
logging.basicConfig(encoding='utf-8',
                    level=logging.DEBUG,
                    format='%(asctime)s : %(levelname)s : %(message)s',
                    handlers=[logging.FileHandler(os.path.join(logger_full_path, 'logs.log'), mode='w'),
                              logging.StreamHandler(sys.stdout)],
                    force=True)

2024-01-20 17:14:31,552 : DEBUG : Popen(['git', 'cat-file', '--batch-check'], cwd=/Users/andrey.golda/Documents/Study/MSU_interpol, stdin=<valid stream>, shell=False, universal_newlines=False)


In [22]:
class InterpolDataSet(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels
        self.len = len(labels)

    def __getitem__(self, index):
        feature = self.features[index]
        label = self.labels[index]
        return feature, label

    def __len__(self):
        return self.len

class InterpolDataModule(pl.LightningDataModule):
    def __init__(self, hyperparams):
        super().__init__()
        self.df = None
        self.hyperparams = hyperparams
    
    def setup(self, stage):
        # data reading and preprocessing
        df = pd.read_csv(data_path, delimiter='\t', header=None)
        df.columns = ['Ebeam', 'W', 'Q2', 'cos_theta', 'phi', 'dsigma_dOmega', 'error', 'id']

        df.loc[8314:65671, 'Ebeam'] = 5.754 # peculiarity of this dataset.
        df['phi'] = df.phi.apply(lambda x: math.radians(x))
        df = df.drop('id', axis=1)
        df = df.iloc[df[['Ebeam', 'W', 'Q2', 'cos_theta', 'phi']].drop_duplicates().index]
        self.df = df
        
        #train test split
        feature_data = df[['Ebeam', 'W', 'Q2', 'cos_theta', 'phi']]
        label_data = df['dsigma_dOmega']
        
        if self.hyperparams.get('scale_data'):
            scaler_feature = StandardScaler()
            scaler_target = StandardScaler()
            feature_data = scaler_feature.fit_transform(feature_data)
            label_data = scaler_target.fit_transform(label_data.values.reshape(-1,1))
        else:
            pass
        
        train_feature_data, val_feature_data, train_label_data, val_label_data = train_test_split(feature_data,
                                                                                                  label_data,
                                                                                                  test_size=self.hyperparams.get('test_size'),
                                                                                                  random_state=1438)
        
        
        self.train_dataset = InterpolDataSet(torch.tensor(train_feature_data.values, dtype=torch.float32), 
                                             torch.tensor(train_label_data.values, dtype=torch.float32))
        
        self.val_dataset = InterpolDataSet(torch.tensor(val_feature_data.values, dtype=torch.float32), 
                                            torch.tensor(val_label_data.values, dtype=torch.float32))
    def train_dataloader(self):
        return DataLoader(dataset = self.train_dataset, batch_size = self.hyperparams.get('batch_size'), shuffle = False, num_workers=0)

    def val_dataloader(self):
        return DataLoader(dataset = self.val_dataset, batch_size = self.hyperparams.get('batch_size'), shuffle = False, num_workers=0)

class PrintCallbacks(Callback):
    def on_train_start(self, trainer, pl_module):
        logging.info("Training is starting")

    def on_train_end(self, trainer, pl_module):
        logging.info("Training is ending")

    def on_train_epoch_end(self, trainer, pl_module):
        logging.info(f"epoch: {pl_module.current_epoch}; train_loss: {pl_module.train_loss}; train_mae: {pl_module.train_mae}")

    def on_validation_epoch_end(self, trainer, pl_module):
        logging.info(f"epoch: {pl_module.current_epoch}; val_loss: {pl_module.val_loss}; val_mae: {pl_module.val_mae}")

class InterpolRegressor(pl.LightningModule):
    def __init__(self, hyperparams):
        super(InterpolRegressor, self).__init__()

        self.train_loss, self.train_mae, self.val_loss, self.val_mae = 0,0,0,0
        self.hyperparams = hyperparams
        self.save_hyperparameters(self.hyperparams)

        self.mae = MeanAbsoluteError()
        self.loss_func = self.hyperparams.get('loss_func')

        self.optim = self.hyperparams.get('optim_func')

        self.net_architecture = self.hyperparams.get('net_architecture')
        self.activation_function = self.hyperparams.get('activation_function')

        self.net = nn.Sequential()
        for i in range(1,len(self.net_architecture)):
            self.net.append(nn.Linear(self.net_architecture[i-1], self.net_architecture[i]))
            if i!=len(self.net_architecture)-1:
                self.net.append(self.activation_function)
            else:
                pass

    def forward(self, x):
        return self.net(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)

        loss = self.loss_func
        self.train_loss = loss(y_hat.reshape(-1), y)
        self.train_mae = self.mae(y_hat.reshape(-1), y)

        self.log('train_loss', self.train_loss, batch_size=self.hyperparams['batch_size'],
                 on_step=False, on_epoch=True, prog_bar=True, sync_dist=True, logger=True)
        self.log('train_mae', self.train_mae, batch_size=self.hyperparams['batch_size'],
                 on_step=False, on_epoch=True, prog_bar=True, sync_dist=True, logger=True)

        return self.train_loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)

        loss = self.loss_func
        self.val_loss = loss(y_hat.reshape(-1), y)
        self.val_mae = self.mae(y_hat.reshape(-1), y)

        self.log('val_loss', self.val_loss, batch_size=self.hyperparams['batch_size'],
                 on_step=False, on_epoch=True, prog_bar=True, sync_dist=True, logger=True)
        self.log('val_mae', self.val_mae, batch_size=self.hyperparams['batch_size'],
                 on_step=False, on_epoch=True, prog_bar=True, sync_dist=True, logger=True)

        return self.val_loss

    def on_validation_epoch_end(self):
        sch = self.lr_schedulers()
        if isinstance(sch, torch.optim.lr_scheduler.ReduceLROnPlateau) and self.trainer.current_epoch!=0:
                sch.step(self.trainer.callback_metrics["val_loss"])

    def configure_callbacks(self):
        early_stop_callback = EarlyStopping(monitor="val_loss", mode="min",
                                            min_delta=self.hyperparams.get('es_min_delta'),
                                            patience=self.hyperparams.get('es_patience'),
                                            verbose=True)

        checkpoint_callback = ModelCheckpoint(save_top_k=3,
                                              monitor="val_loss",
                                              mode="min",
                                              dirpath=f"{logger_full_path}/checkpoints",
                                              filename="{exp_name}{val_loss:.5f}-{epoch:02d}")

        lr_monitor = LearningRateMonitor(logging_interval='epoch')

        print_callback = PrintCallbacks()

        return [early_stop_callback, checkpoint_callback, print_callback, lr_monitor]

    def configure_optimizers(self):
        optimizer = self.optim(self.parameters(), lr=self.hyperparams.get('lr'))
        lr_optim = ReduceLROnPlateau(optimizer = optimizer,
                                     mode = 'min',
                                     factor = self.hyperparams.get('lr_factor'),
                                     patience = self.hyperparams.get('lr_patience'),
                                     cooldown=self.hyperparams.get('lr_cooldown'),
                                     threshold=0.01,
                                     verbose= True)
        return {"optimizer": optimizer,
                "lr_scheduler": {
                    "scheduler": lr_optim,
                    "interval": "epoch",
                    "monitor": "val_loss",
                    "frequency": 2,
                    "name": 'lr_scheduler_monitoring'}
                }


In [23]:
data_module = InterpolDataModule(hyperparams=hyperparams_dict)
model = InterpolRegressor(hyperparams=hyperparams_dict)

trainer = pl.Trainer(max_epochs=hyperparams_dict.get('max_epochs'),
                     accelerator='cpu',
                     logger=wandb_logger,
                     enable_progress_bar=False)
trainer.fit(model, data_module)

/Users/andrey.golda/Library/Caches/pypoetry/virtualenvs/msu-interpol--lw2ADYE-py3.11/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'activation_function' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['activation_function'])`.
/Users/andrey.golda/Library/Caches/pypoetry/virtualenvs/msu-interpol--lw2ADYE-py3.11/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'loss_func' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss_func'])`.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
The following callbacks returned in `LightningModule.configure_callbacks` will override existing callbacks passed to Trainer: ModelCheckpoint

  | 

2024-01-20 17:14:40,102 : INFO : epoch: 0; val_loss: 1.0276641845703125; val_mae: 0.9381320476531982
2024-01-20 17:14:40,105 : INFO : Training is starting


/Users/andrey.golda/Library/Caches/pypoetry/virtualenvs/msu-interpol--lw2ADYE-py3.11/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/Users/andrey.golda/Library/Caches/pypoetry/virtualenvs/msu-interpol--lw2ADYE-py3.11/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


2024-01-20 17:14:51,039 : INFO : epoch: 0; val_loss: 1.5381301641464233; val_mae: 1.070046305656433
2024-01-20 17:14:51,041 : INFO : epoch: 0; train_loss: 2.8323638439178467; train_mae: 1.604421854019165


Metric val_loss improved. New best score: 2.325


2024-01-20 17:14:51,050 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=2.32471-epoch=00.ckpt
2024-01-20 17:15:01,423 : INFO : epoch: 1; val_loss: 1.3994128704071045; val_mae: 0.8468100428581238
2024-01-20 17:15:01,426 : INFO : epoch: 1; train_loss: 2.724168062210083; train_mae: 1.3916921615600586


Metric val_loss improved by 0.122 >= min_delta = 0.0001. New best score: 2.202


2024-01-20 17:15:01,434 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=2.20231-epoch=01.ckpt
2024-01-20 17:15:12,054 : INFO : epoch: 2; val_loss: 1.2062790393829346; val_mae: 0.5968577861785889
2024-01-20 17:15:12,057 : INFO : epoch: 2; train_loss: 2.548346996307373; train_mae: 1.1335967779159546


Metric val_loss improved by 0.178 >= min_delta = 0.0001. New best score: 2.024


2024-01-20 17:15:12,065 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=2.02442-epoch=02.ckpt
2024-01-20 17:15:22,470 : INFO : epoch: 3; val_loss: 1.1601380109786987; val_mae: 0.676896333694458
2024-01-20 17:15:22,472 : INFO : epoch: 3; train_loss: 2.453669786453247; train_mae: 1.166300654411316


Metric val_loss improved by 0.061 >= min_delta = 0.0001. New best score: 1.963


2024-01-20 17:15:22,480 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.96319-epoch=03.ckpt
2024-01-20 17:15:32,839 : INFO : epoch: 4; val_loss: 1.150251865386963; val_mae: 0.6962525844573975
2024-01-20 17:15:32,841 : INFO : epoch: 4; train_loss: 2.426471710205078; train_mae: 1.1695412397384644


Metric val_loss improved by 0.016 >= min_delta = 0.0001. New best score: 1.947


2024-01-20 17:15:32,848 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.94743-epoch=04.ckpt
2024-01-20 17:15:43,424 : INFO : epoch: 5; val_loss: 1.1224920749664307; val_mae: 0.6752683520317078
2024-01-20 17:15:43,426 : INFO : epoch: 5; train_loss: 2.394882917404175; train_mae: 1.148201584815979


Metric val_loss improved by 0.026 >= min_delta = 0.0001. New best score: 1.921


2024-01-20 17:15:43,434 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.92131-epoch=05.ckpt
2024-01-20 17:15:53,811 : INFO : epoch: 6; val_loss: 1.092488169670105; val_mae: 0.6648592948913574
2024-01-20 17:15:53,813 : INFO : epoch: 6; train_loss: 2.349783420562744; train_mae: 1.130745768547058


Metric val_loss improved by 0.030 >= min_delta = 0.0001. New best score: 1.891


2024-01-20 17:15:53,826 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.89120-epoch=06.ckpt
2024-01-20 17:16:04,263 : INFO : epoch: 7; val_loss: 1.0437260866165161; val_mae: 0.6397899985313416
2024-01-20 17:16:04,265 : INFO : epoch: 7; train_loss: 2.280886650085449; train_mae: 1.099838376045227


Metric val_loss improved by 0.045 >= min_delta = 0.0001. New best score: 1.846


2024-01-20 17:16:04,276 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.84608-epoch=07.ckpt
2024-01-20 17:16:14,867 : INFO : epoch: 8; val_loss: 0.9625066518783569; val_mae: 0.590488612651825
2024-01-20 17:16:14,870 : INFO : epoch: 8; train_loss: 2.1710727214813232; train_mae: 1.0491924285888672


Metric val_loss improved by 0.071 >= min_delta = 0.0001. New best score: 1.775


2024-01-20 17:16:14,878 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.77527-epoch=08.ckpt
2024-01-20 17:16:29,429 : INFO : epoch: 9; val_loss: 0.8477510809898376; val_mae: 0.533038318157196
2024-01-20 17:16:29,436 : INFO : epoch: 9; train_loss: 2.0095202922821045; train_mae: 0.9782376289367676


Metric val_loss improved by 0.098 >= min_delta = 0.0001. New best score: 1.677


2024-01-20 17:16:29,444 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.67724-epoch=09.ckpt
2024-01-20 17:16:40,157 : INFO : epoch: 10; val_loss: 0.6897841691970825; val_mae: 0.45513463020324707
2024-01-20 17:16:40,159 : INFO : epoch: 10; train_loss: 1.7822613716125488; train_mae: 0.8657105565071106


Metric val_loss improved by 0.125 >= min_delta = 0.0001. New best score: 1.552


2024-01-20 17:16:40,166 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.55198-epoch=10.ckpt
2024-01-20 17:16:49,828 : INFO : epoch: 11; val_loss: 0.5181042551994324; val_mae: 0.367775559425354
2024-01-20 17:16:49,830 : INFO : epoch: 11; train_loss: 1.5205411911010742; train_mae: 0.7395905256271362


Metric val_loss improved by 0.123 >= min_delta = 0.0001. New best score: 1.429


2024-01-20 17:16:49,838 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.42938-epoch=11.ckpt
2024-01-20 17:16:59,376 : INFO : epoch: 12; val_loss: 0.39444679021835327; val_mae: 0.3033277094364166
2024-01-20 17:16:59,379 : INFO : epoch: 12; train_loss: 1.2889070510864258; train_mae: 0.6449753642082214


Metric val_loss improved by 0.086 >= min_delta = 0.0001. New best score: 1.344


2024-01-20 17:16:59,388 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.34354-epoch=12.ckpt
2024-01-20 17:17:09,132 : INFO : epoch: 13; val_loss: 0.34733739495277405; val_mae: 0.26875007152557373
2024-01-20 17:17:09,134 : INFO : epoch: 13; train_loss: 1.1351332664489746; train_mae: 0.6030600666999817


Metric val_loss improved by 0.046 >= min_delta = 0.0001. New best score: 1.298


2024-01-20 17:17:09,144 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.29764-epoch=13.ckpt
2024-01-20 17:17:18,771 : INFO : epoch: 14; val_loss: 0.34206581115722656; val_mae: 0.27142879366874695
2024-01-20 17:17:18,774 : INFO : epoch: 14; train_loss: 1.054915189743042; train_mae: 0.582827091217041


Metric val_loss improved by 0.023 >= min_delta = 0.0001. New best score: 1.274


2024-01-20 17:17:18,781 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.27428-epoch=14.ckpt
2024-01-20 17:17:28,337 : INFO : epoch: 15; val_loss: 0.345528781414032; val_mae: 0.2727820575237274
2024-01-20 17:17:28,341 : INFO : epoch: 15; train_loss: 1.0194789171218872; train_mae: 0.5727779865264893


Metric val_loss improved by 0.014 >= min_delta = 0.0001. New best score: 1.260


2024-01-20 17:17:28,351 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.25988-epoch=15.ckpt
2024-01-20 17:17:37,935 : INFO : epoch: 16; val_loss: 0.3485380709171295; val_mae: 0.27589717507362366
2024-01-20 17:17:37,939 : INFO : epoch: 16; train_loss: 1.0040379762649536; train_mae: 0.5651742219924927


Metric val_loss improved by 0.011 >= min_delta = 0.0001. New best score: 1.249


2024-01-20 17:17:37,946 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.24869-epoch=16.ckpt
2024-01-20 17:17:47,439 : INFO : epoch: 17; val_loss: 0.3515513837337494; val_mae: 0.2785610854625702
2024-01-20 17:17:47,441 : INFO : epoch: 17; train_loss: 0.9987986087799072; train_mae: 0.5559245944023132


Metric val_loss improved by 0.010 >= min_delta = 0.0001. New best score: 1.239


2024-01-20 17:17:47,449 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.23882-epoch=17.ckpt
2024-01-20 17:17:57,392 : INFO : epoch: 18; val_loss: 0.3529449999332428; val_mae: 0.2781612277030945
2024-01-20 17:17:57,394 : INFO : epoch: 18; train_loss: 1.0001111030578613; train_mae: 0.5458493828773499


Metric val_loss improved by 0.009 >= min_delta = 0.0001. New best score: 1.229


2024-01-20 17:17:57,401 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.22944-epoch=18.ckpt
2024-01-20 17:18:07,034 : INFO : epoch: 19; val_loss: 0.3557144105434418; val_mae: 0.27803537249565125
2024-01-20 17:18:07,036 : INFO : epoch: 19; train_loss: 1.0054157972335815; train_mae: 0.5358638167381287


Metric val_loss improved by 0.009 >= min_delta = 0.0001. New best score: 1.220


2024-01-20 17:18:07,044 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.22036-epoch=19.ckpt
2024-01-20 17:18:16,668 : INFO : epoch: 20; val_loss: 0.3577132225036621; val_mae: 0.2785890996456146
2024-01-20 17:18:16,670 : INFO : epoch: 20; train_loss: 1.012942910194397; train_mae: 0.5281680822372437


Metric val_loss improved by 0.008 >= min_delta = 0.0001. New best score: 1.212


2024-01-20 17:18:16,678 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.21196-epoch=20.ckpt
2024-01-20 17:18:26,205 : INFO : epoch: 21; val_loss: 0.3573082685470581; val_mae: 0.2762609124183655
2024-01-20 17:18:26,208 : INFO : epoch: 21; train_loss: 1.022463083267212; train_mae: 0.5227200984954834


Metric val_loss improved by 0.008 >= min_delta = 0.0001. New best score: 1.204


2024-01-20 17:18:26,214 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.20433-epoch=21.ckpt
2024-01-20 17:18:35,778 : INFO : epoch: 22; val_loss: 0.35736486315727234; val_mae: 0.27298539876937866
2024-01-20 17:18:35,781 : INFO : epoch: 22; train_loss: 1.03319251537323; train_mae: 0.5188355445861816


Metric val_loss improved by 0.007 >= min_delta = 0.0001. New best score: 1.197


2024-01-20 17:18:35,790 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.19735-epoch=22.ckpt
2024-01-20 17:18:45,392 : INFO : epoch: 23; val_loss: 0.35709908604621887; val_mae: 0.26912832260131836
2024-01-20 17:18:45,394 : INFO : epoch: 23; train_loss: 1.0446699857711792; train_mae: 0.515264093875885


Metric val_loss improved by 0.006 >= min_delta = 0.0001. New best score: 1.191


2024-01-20 17:18:45,402 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.19095-epoch=23.ckpt
2024-01-20 17:18:54,876 : INFO : epoch: 24; val_loss: 0.3564703166484833; val_mae: 0.26501819491386414
2024-01-20 17:18:54,878 : INFO : epoch: 24; train_loss: 1.0568374395370483; train_mae: 0.5120647549629211


Metric val_loss improved by 0.006 >= min_delta = 0.0001. New best score: 1.185


2024-01-20 17:18:54,885 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.18506-epoch=24.ckpt
2024-01-20 17:19:04,549 : INFO : epoch: 25; val_loss: 0.35558003187179565; val_mae: 0.26080238819122314
2024-01-20 17:19:04,552 : INFO : epoch: 25; train_loss: 1.0694001913070679; train_mae: 0.5093611478805542


Metric val_loss improved by 0.005 >= min_delta = 0.0001. New best score: 1.180


2024-01-20 17:19:04,561 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.17959-epoch=25.ckpt
Epoch 00038: reducing learning rate of group 0 to 5.0000e-06.
2024-01-20 17:19:14,226 : INFO : epoch: 26; val_loss: 0.35634905099868774; val_mae: 0.2585987150669098
2024-01-20 17:19:14,228 : INFO : epoch: 26; train_loss: 1.0804847478866577; train_mae: 0.5083147287368774


Metric val_loss improved by 0.003 >= min_delta = 0.0001. New best score: 1.177


2024-01-20 17:19:14,235 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.17660-epoch=26.ckpt
2024-01-20 17:19:23,734 : INFO : epoch: 27; val_loss: 0.35642141103744507; val_mae: 0.2573113739490509
2024-01-20 17:19:23,736 : INFO : epoch: 27; train_loss: 1.0874470472335815; train_mae: 0.5088363289833069


Metric val_loss improved by 0.003 >= min_delta = 0.0001. New best score: 1.174


2024-01-20 17:19:23,744 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.17407-epoch=27.ckpt
2024-01-20 17:19:33,633 : INFO : epoch: 28; val_loss: 0.35650506615638733; val_mae: 0.2572937607765198
2024-01-20 17:19:33,635 : INFO : epoch: 28; train_loss: 1.094012975692749; train_mae: 0.5098598003387451


Metric val_loss improved by 0.002 >= min_delta = 0.0001. New best score: 1.172


2024-01-20 17:19:33,643 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.17165-epoch=28.ckpt
2024-01-20 17:19:43,130 : INFO : epoch: 29; val_loss: 0.35663941502571106; val_mae: 0.25716981291770935
Epoch 00043: reducing learning rate of group 0 to 2.5000e-06.
2024-01-20 17:19:43,133 : INFO : epoch: 29; train_loss: 1.100136160850525; train_mae: 0.5106692314147949


Metric val_loss improved by 0.002 >= min_delta = 0.0001. New best score: 1.169


2024-01-20 17:19:43,139 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.16938-epoch=29.ckpt
2024-01-20 17:19:52,662 : INFO : epoch: 30; val_loss: 0.357309490442276; val_mae: 0.2578487694263458
2024-01-20 17:19:52,664 : INFO : epoch: 30; train_loss: 1.1050198078155518; train_mae: 0.511613130569458


Metric val_loss improved by 0.001 >= min_delta = 0.0001. New best score: 1.168


2024-01-20 17:19:52,671 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/leafy-silence-29/checkpoints/exp_name=0val_loss=1.16815-epoch=30.ckpt


/Users/andrey.golda/Library/Caches/pypoetry/virtualenvs/msu-interpol--lw2ADYE-py3.11/lib/python3.11/site-packages/lightning/pytorch/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...


In [24]:
wandb.finish()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
lr_scheduler_monitoring,██████████████████████████▃▃▃▃▁▁
train_loss,█▇▇▆▆▆▅▅▅▄▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_mae,█▇▅▄▄▄▄▄▄▃▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
val_loss,█▇▆▆▆▆▅▅▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,█▆▄▄▄▄▄▄▄▃▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,30.0
lr_scheduler_monitoring,0.0
train_loss,1.19745
train_mae,0.58877
trainer/global_step,81468.0
val_loss,1.16815
val_mae,0.57397


2024-01-20 17:20:17,792 : DEBUG : Starting new HTTPS connection (17): o151352.ingest.sentry.io:443
2024-01-20 17:20:17,893 : DEBUG : Incremented Retry for (url='/api/4504800232407040/envelope/'): Retry(total=2, connect=None, read=None, redirect=None, status=None)
2024-01-20 17:20:17,895 : DEBUG : Starting new HTTPS connection (18): o151352.ingest.sentry.io:443
2024-01-20 17:20:17,963 : DEBUG : Incremented Retry for (url='/api/4504800232407040/envelope/'): Retry(total=1, connect=None, read=None, redirect=None, status=None)
2024-01-20 17:20:17,966 : DEBUG : Starting new HTTPS connection (19): o151352.ingest.sentry.io:443
2024-01-20 17:20:18,056 : DEBUG : Incremented Retry for (url='/api/4504800232407040/envelope/'): Retry(total=0, connect=None, read=None, redirect=None, status=None)
2024-01-20 17:20:18,058 : DEBUG : Starting new HTTPS connection (20): o151352.ingest.sentry.io:443
2024-01-20 17:20:18,127 : DEBUG : Starting new HTTPS connection (21): o151352.ingest.sentry.io:443
2024-01-20