In [86]:
import os
import sys
import math
import tqdm
import wandb
import torch
import logging

import numpy as np
import pandas as pd
import torch.nn as nn
import lightning.pytorch as pl

from torchmetrics import MeanAbsoluteError
from sklearn.metrics import mean_squared_error
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from lightning.pytorch.loggers import WandbLogger
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import ReduceLROnPlateau
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import Callback, LearningRateMonitor, ModelCheckpoint

In [87]:
wandb.login()

True

# FCNN

In [88]:
class RMSELoss(torch.nn.Module):
    def __init__(self):
        super(RMSELoss,self).__init__()

    def forward(self,x,y,w):
        criterion = nn.MSELoss()
        loss = torch.sqrt(criterion(x, y))*w
        return loss.mean()

In [89]:
#params
project_name = "MSU_interpol_by_energy"

logger_path = './wandb_local_logs'
data_path = '../data/clasdb_pi_plus_n.txt'

hyperparams_dict = {
    'scale_data': False,
    'augment': True,
    'augment_factor': 25,
    'test_size': 0.1,
    'batch_size': 256,
    'net_architecture': [5,60,80,100,120,140,240,340,440,640,2000,1040,640,340,240,140,100,80,60,20,1],
    'activation_function': nn.ReLU(),
    'loss_func': RMSELoss(),
    'optim_func': torch.optim.Adam,
    'max_epochs': 2000,
    'es_min_delta': 0.00001,
    'es_patience': 20,
    'lr': 0.001,
    'lr_factor':0.5,
    'lr_patience': 5,
    'lr_cooldown': 20,
}

In [90]:
wandb_logger = WandbLogger(project=project_name,
                           save_dir=logger_path)
exp_name = wandb_logger.experiment.name

logger_full_path = os.path.join(logger_path, project_name, exp_name)

os.makedirs(logger_full_path, exist_ok=True)
logging.basicConfig(encoding='utf-8',
                    level=logging.DEBUG,
                    format='%(asctime)s : %(levelname)s : %(message)s',
                    handlers=[logging.FileHandler(os.path.join(logger_full_path, 'logs.log'), mode='w'),
                              logging.StreamHandler(sys.stdout)],
                    force=True)

2024-03-30 11:09:05,583 : DEBUG : Popen(['git', 'cat-file', '--batch-check'], cwd=/Users/andrey.golda/Documents/Study/MSU_interpol, stdin=<valid stream>, shell=False, universal_newlines=False)


In [91]:
class InterpolDataSet(Dataset):
    def __init__(self, features, labels, weights):
        self.features = features
        self.labels = labels
        self.weights = weights
        self.len = len(labels)

    def __getitem__(self, index):
        feature = self.features[index]
        label = self.labels[index]
        weights = self.weights[index]
        return feature, label, weights

    def __len__(self):
        return self.len

class InterpolDataModule(pl.LightningDataModule):
    def __init__(self, hyperparams):
        super().__init__()
        self.df = None
        self.hyperparams = hyperparams
        self.train_dataset = None
        self.val_dataset = None

    def augment(self, new_augm):
        augm = pd.Series({'Ebeam': np.random.normal(loc=new_augm.Ebeam, scale=new_augm.Ebeam/30),
                           'W': np.random.normal(loc=new_augm.W, scale=new_augm.W/30),
                           'Q2': np.random.normal(loc=new_augm.Q2, scale=new_augm.Q2/30),
                           'cos_theta': np.clip(np.random.normal(loc=new_augm.cos_theta, scale=abs(new_augm.cos_theta/30)), -1, 1),
                           'phi': np.clip(np.random.normal(loc=new_augm.phi, scale=new_augm.phi/30), 0, 2*np.pi),
                           'dsigma_dOmega': np.random.normal(loc=new_augm.dsigma_dOmega, scale=new_augm.error/3),
                           'error': new_augm.error,
                           'weight': new_augm.weight,
                          })
        return augm
    
    def setup(self, stage):
        # data reading and preprocessing
        df = pd.read_csv(data_path, delimiter='\t', header=None)
        df.columns = ['Ebeam', 'W', 'Q2', 'cos_theta', 'phi', 'dsigma_dOmega', 'error', 'id']
        df.loc[8314:65671, 'Ebeam'] = 5.754 # peculiarity of this dataset.
        df['phi'] = df.phi.apply(lambda x: math.radians(x))
        df['weight'] = df['error'].apply(lambda x: x and 1 / x or 100) # x and 1 / x or 100  is just a reversed error but with validation 1/0 error in this case it will return 100
        df = df.drop('id', axis=1)
        df = df.iloc[df[['Ebeam', 'W', 'Q2', 'cos_theta', 'phi']].drop_duplicates().index]

        #critical
        # Ebeam = [5.754]
        # Q2 = [1.72, 2.05, 2.44, 2.91, 3.48, 4.155]
        # df = df[(df.Q2.isin(Q2))&(df.Ebeam.isin(Ebeam))]

        # Ebeam = [5.499]
        # W = [1.830, 1.890, 1.780, 1.950, 2.010, 1.620, 1.660, 1.700, 1.740]
        # df = df[df.Ebeam.isin(Ebeam)&(df.W.isin(W))]

        Ebeam = [1.515]
        df = df[df.Ebeam.isin(Ebeam)]

        #train test split
        feature_columns = ['Ebeam', 'W', 'Q2', 'cos_theta', 'phi']
        feature_columns_with_weights = ['Ebeam', 'W', 'Q2', 'cos_theta', 'phi', 'weight']

        feature_data = df[feature_columns_with_weights]
        label_data = df['dsigma_dOmega']
        
        if self.hyperparams.get('scale_data'):
            scaler_feature = StandardScaler()
            scaler_target = StandardScaler()
            feature_data = scaler_feature.fit_transform(feature_data)
            label_data = scaler_target.fit_transform(label_data.values.reshape(-1,1))
        else:
            pass

        if self.hyperparams.get('augment'):
            aug_series_list = []
            for i in tqdm.tqdm(df.itertuples()):
                for _ in range(self.hyperparams.get('augment_factor')):
                    aug_series_list.append(self.augment(i))

            aug_df = pd.DataFrame(aug_series_list)
            df = pd.concat([df, aug_df])
        else:
            pass

        self.df = df
        
        train_feature_data, val_feature_data, train_label_data, val_label_data = train_test_split(feature_data,
                                                                                                  label_data,
                                                                                                  test_size=self.hyperparams.get('test_size'),
                                                                                                  random_state=1438)
        
        
        self.train_dataset = InterpolDataSet(torch.tensor(train_feature_data[feature_columns].values, dtype=torch.float32),
                                             torch.tensor(train_label_data.values, dtype=torch.float32),
                                             torch.tensor(train_feature_data['weight'].values, dtype=torch.float32))
        
        self.val_dataset = InterpolDataSet(torch.tensor(val_feature_data[feature_columns].values, dtype=torch.float32),
                                           torch.tensor(val_label_data.values, dtype=torch.float32),
                                           torch.tensor(val_feature_data['weight'].values, dtype=torch.float32))
    def train_dataloader(self):
        return DataLoader(dataset = self.train_dataset, batch_size = self.hyperparams.get('batch_size'), shuffle = False, num_workers=0)

    def val_dataloader(self):
        return DataLoader(dataset = self.val_dataset, batch_size = self.hyperparams.get('batch_size'), shuffle = False, num_workers=0)

class PrintCallbacks(Callback):
    def on_train_start(self, trainer, pl_module):
        logging.info("Training is starting")

    def on_train_end(self, trainer, pl_module):
        logging.info("Training is ending")

    def on_train_epoch_end(self, trainer, pl_module):
        epoch_mean = torch.stack(pl_module.training_step_outputs).mean()
        logging.info(f"epoch: {pl_module.current_epoch}; train_loss: {epoch_mean}")
        pl_module.training_step_outputs.clear()

    def on_validation_epoch_end(self, trainer, pl_module):
        epoch_mean = torch.stack(pl_module.validation_step_outputs).mean()
        logging.info(f"epoch: {pl_module.current_epoch}; val_loss: {epoch_mean}")
        pl_module.validation_step_outputs.clear()

class InterpolRegressor(pl.LightningModule):
    def __init__(self, hyperparams):
        super(InterpolRegressor, self).__init__()

        self.train_loss, self.train_mae, self.val_loss, self.val_mae = 0,0,0,0
        self.hyperparams = hyperparams
        self.save_hyperparameters(self.hyperparams)

        self.mae = MeanAbsoluteError()
        self.loss_func = self.hyperparams.get('loss_func')

        self.optim = self.hyperparams.get('optim_func')

        self.net_architecture = self.hyperparams.get('net_architecture')
        self.activation_function = self.hyperparams.get('activation_function')

        self.training_step_outputs = []
        self.validation_step_outputs = []

        self.net = nn.Sequential()
        for i in range(1,len(self.net_architecture)):
            self.net.append(nn.Linear(self.net_architecture[i-1], self.net_architecture[i]))
            if i!=len(self.net_architecture)-1:
                self.net.append(self.activation_function)
            else:
                pass

    def forward(self, x):
        return self.net(x)

    def training_step(self, batch, batch_idx):
        x, y, w = batch
        y_hat = self.forward(x)

        loss = self.loss_func
        self.train_loss = loss.forward(y_hat.reshape(-1), y, w)
        self.train_mae = self.mae(y_hat.reshape(-1), y)

        self.log('train_loss', self.train_loss, batch_size=self.hyperparams['batch_size'],
                 on_step=False, on_epoch=True, prog_bar=True, sync_dist=True, logger=True)
        self.log('train_mae', self.train_mae, batch_size=self.hyperparams['batch_size'],
                 on_step=False, on_epoch=True, prog_bar=True, sync_dist=True, logger=True)

        self.training_step_outputs.append(self.train_loss)
        return self.train_loss

    def validation_step(self, batch, batch_idx):
        x, y, w = batch
        y_hat = self.forward(x)

        loss = self.loss_func
        self.val_loss = loss.forward(y_hat.reshape(-1), y, w)
        self.val_mae = self.mae(y_hat.reshape(-1), y)

        self.log('val_loss', self.val_loss, batch_size=self.hyperparams['batch_size'],
                 on_step=False, on_epoch=True, prog_bar=True, sync_dist=True, logger=True)
        self.log('val_mae', self.val_mae, batch_size=self.hyperparams['batch_size'],
                 on_step=False, on_epoch=True, prog_bar=True, sync_dist=True, logger=True)

        self.validation_step_outputs.append(self.val_loss)
        return self.val_loss

    def on_validation_epoch_end(self):
        sch = self.lr_schedulers()
        if isinstance(sch, torch.optim.lr_scheduler.ReduceLROnPlateau) and self.trainer.current_epoch!=0:
                sch.step(self.trainer.callback_metrics["val_loss"])

    def configure_callbacks(self):
        early_stop_callback = EarlyStopping(monitor="val_loss", mode="min",
                                            min_delta=self.hyperparams.get('es_min_delta'),
                                            patience=self.hyperparams.get('es_patience'),
                                            verbose=True)

        checkpoint_callback = ModelCheckpoint(save_top_k=3,
                                              monitor="val_loss",
                                              mode="min",
                                              dirpath=f"{logger_full_path}/checkpoints",
                                              filename="{exp_name}{val_loss:.5f}-{epoch:02d}")

        lr_monitor = LearningRateMonitor(logging_interval='epoch')

        print_callback = PrintCallbacks()

        return [early_stop_callback, checkpoint_callback, print_callback, lr_monitor]

    def configure_optimizers(self):
        optimizer = self.optim(self.parameters(), lr=self.hyperparams.get('lr'))
        lr_optim = ReduceLROnPlateau(optimizer = optimizer,
                                     mode = 'min',
                                     factor = self.hyperparams.get('lr_factor'),
                                     patience = self.hyperparams.get('lr_patience'),
                                     cooldown=self.hyperparams.get('lr_cooldown'),
                                     threshold=0.01,
                                     verbose= True)
        return {"optimizer": optimizer,
                "lr_scheduler": {
                    "scheduler": lr_optim,
                    "interval": "epoch",
                    "monitor": "val_loss",
                    "frequency": 2,
                    "name": 'lr_scheduler_monitoring'}
                }


In [92]:
data_module = InterpolDataModule(hyperparams=hyperparams_dict)
model = InterpolRegressor(hyperparams=hyperparams_dict)
# model = InterpolRegressor.load_from_checkpoint(f'./wandb_local_logs/MSU_interpol/blooming-plasma-40/checkpoints/exp_name=0val_loss=6.43574-epoch=14.ckpt', hyperparams=hyperparams_dict)

trainer = pl.Trainer(max_epochs=hyperparams_dict.get('max_epochs'),
                     accelerator='cpu',
                     logger=wandb_logger,
                     enable_progress_bar=False)
trainer.fit(model, data_module)

/Users/andrey.golda/Library/Caches/pypoetry/virtualenvs/msu-interpol--lw2ADYE-py3.11/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'activation_function' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['activation_function'])`.
/Users/andrey.golda/Library/Caches/pypoetry/virtualenvs/msu-interpol--lw2ADYE-py3.11/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'loss_func' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss_func'])`.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
The following callbacks returned in `LightningModule.configure_callbacks` will override existing callbacks passed to Trainer: ModelCheckpoint
8314i

2024-03-30 11:10:04,740 : INFO : epoch: 0; val_loss: 14.265295028686523
2024-03-30 11:10:04,744 : INFO : Training is starting


/Users/andrey.golda/Library/Caches/pypoetry/virtualenvs/msu-interpol--lw2ADYE-py3.11/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/Users/andrey.golda/Library/Caches/pypoetry/virtualenvs/msu-interpol--lw2ADYE-py3.11/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/Users/andrey.golda/Library/Caches/pypoetry/virtualenvs/msu-interpol--lw2ADYE-py3.11/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (30) is smaller than the logging interval Trainer(log_every_n_st

2024-03-30 11:10:09,160 : INFO : epoch: 0; val_loss: 8.682622909545898
2024-03-30 11:10:09,163 : INFO : epoch: 0; train_loss: 10.988037109375


Metric val_loss improved. New best score: 8.683


2024-03-30 11:10:09,234 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=8.68262-epoch=00.ckpt
2024-03-30 11:10:13,672 : INFO : epoch: 1; val_loss: 7.706509590148926
2024-03-30 11:10:13,674 : INFO : epoch: 1; train_loss: 7.918800354003906


Metric val_loss improved by 0.976 >= min_delta = 1e-05. New best score: 7.707


2024-03-30 11:10:13,740 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=7.70651-epoch=01.ckpt
2024-03-30 11:10:18,068 : INFO : epoch: 2; val_loss: 7.581689834594727
2024-03-30 11:10:18,071 : INFO : epoch: 2; train_loss: 7.356789588928223


Metric val_loss improved by 0.125 >= min_delta = 1e-05. New best score: 7.582


2024-03-30 11:10:18,151 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=7.58169-epoch=02.ckpt
2024-03-30 11:10:22,751 : INFO : epoch: 3; val_loss: 7.611301422119141
2024-03-30 11:10:22,754 : INFO : epoch: 3; train_loss: 7.1851301193237305
2024-03-30 11:10:22,824 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=7.61130-epoch=03.ckpt
2024-03-30 11:10:27,126 : INFO : epoch: 4; val_loss: 7.023857593536377
2024-03-30 11:10:27,129 : INFO : epoch: 4; train_loss: 6.826742172241211


Metric val_loss improved by 0.558 >= min_delta = 1e-05. New best score: 7.024


2024-03-30 11:10:27,195 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=7.02386-epoch=04.ckpt
2024-03-30 11:10:31,521 : INFO : epoch: 5; val_loss: 8.27336597442627
2024-03-30 11:10:31,523 : INFO : epoch: 5; train_loss: 6.56088924407959
2024-03-30 11:10:35,858 : INFO : epoch: 6; val_loss: 6.6450886726379395
2024-03-30 11:10:35,860 : INFO : epoch: 6; train_loss: 6.709530353546143


Metric val_loss improved by 0.379 >= min_delta = 1e-05. New best score: 6.645


2024-03-30 11:10:35,933 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=6.64509-epoch=06.ckpt
2024-03-30 11:10:40,204 : INFO : epoch: 7; val_loss: 6.799571514129639
2024-03-30 11:10:40,206 : INFO : epoch: 7; train_loss: 6.149912357330322
2024-03-30 11:10:40,268 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=6.79957-epoch=07.ckpt
2024-03-30 11:10:44,760 : INFO : epoch: 8; val_loss: 6.902472019195557
2024-03-30 11:10:44,763 : INFO : epoch: 8; train_loss: 6.055094242095947
2024-03-30 11:10:44,834 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=6.90247-epoch=08.ckpt
2024-03-30 11:10:49,103 : INFO : epoch: 9; val_loss: 6.456174850463867
2024-03-

Metric val_loss improved by 0.189 >= min_delta = 1e-05. New best score: 6.456


2024-03-30 11:10:49,176 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=6.45617-epoch=09.ckpt
2024-03-30 11:10:53,481 : INFO : epoch: 10; val_loss: 6.16740608215332
2024-03-30 11:10:53,483 : INFO : epoch: 10; train_loss: 5.864798069000244


Metric val_loss improved by 0.289 >= min_delta = 1e-05. New best score: 6.167


2024-03-30 11:10:53,551 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=6.16741-epoch=10.ckpt
2024-03-30 11:10:57,900 : INFO : epoch: 11; val_loss: 5.405523777008057
2024-03-30 11:10:57,902 : INFO : epoch: 11; train_loss: 5.636384963989258


Metric val_loss improved by 0.762 >= min_delta = 1e-05. New best score: 5.406


2024-03-30 11:10:57,978 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=5.40552-epoch=11.ckpt
2024-03-30 11:11:02,389 : INFO : epoch: 12; val_loss: 6.513152122497559
2024-03-30 11:11:02,392 : INFO : epoch: 12; train_loss: 5.020949840545654
2024-03-30 11:11:06,659 : INFO : epoch: 13; val_loss: 4.074276924133301
2024-03-30 11:11:06,661 : INFO : epoch: 13; train_loss: 4.825209140777588


Metric val_loss improved by 1.331 >= min_delta = 1e-05. New best score: 4.074


2024-03-30 11:11:06,792 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=4.07428-epoch=13.ckpt
2024-03-30 11:11:11,140 : INFO : epoch: 14; val_loss: 4.042821407318115
2024-03-30 11:11:11,142 : INFO : epoch: 14; train_loss: 4.537109375


Metric val_loss improved by 0.031 >= min_delta = 1e-05. New best score: 4.043


2024-03-30 11:11:11,211 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=4.04282-epoch=14.ckpt
2024-03-30 11:11:15,475 : INFO : epoch: 15; val_loss: 4.752250671386719
2024-03-30 11:11:15,477 : INFO : epoch: 15; train_loss: 3.880427360534668
2024-03-30 11:11:15,550 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=4.75225-epoch=15.ckpt
2024-03-30 11:11:19,964 : INFO : epoch: 16; val_loss: 4.393248558044434
2024-03-30 11:11:19,968 : INFO : epoch: 16; train_loss: 3.7454030513763428
2024-03-30 11:11:20,042 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=4.39325-epoch=16.ckpt
2024-03-30 11:11:24,347 : INFO : epoch: 17; val_loss: 3.7149369716644287
2

Metric val_loss improved by 0.328 >= min_delta = 1e-05. New best score: 3.715


2024-03-30 11:11:24,425 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=3.71494-epoch=17.ckpt
2024-03-30 11:11:28,675 : INFO : epoch: 18; val_loss: 3.5878007411956787
2024-03-30 11:11:28,677 : INFO : epoch: 18; train_loss: 3.3559534549713135


Metric val_loss improved by 0.127 >= min_delta = 1e-05. New best score: 3.588


2024-03-30 11:11:28,744 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=3.58780-epoch=18.ckpt
2024-03-30 11:11:33,205 : INFO : epoch: 19; val_loss: 4.45113468170166
2024-03-30 11:11:33,207 : INFO : epoch: 19; train_loss: 3.802239418029785
2024-03-30 11:11:37,440 : INFO : epoch: 20; val_loss: 2.9823687076568604
2024-03-30 11:11:37,443 : INFO : epoch: 20; train_loss: 3.420135974884033


Metric val_loss improved by 0.605 >= min_delta = 1e-05. New best score: 2.982


2024-03-30 11:11:37,518 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=2.98237-epoch=20.ckpt
2024-03-30 11:11:41,948 : INFO : epoch: 21; val_loss: 2.926678419113159
2024-03-30 11:11:41,950 : INFO : epoch: 21; train_loss: 3.1683592796325684


Metric val_loss improved by 0.056 >= min_delta = 1e-05. New best score: 2.927


2024-03-30 11:11:42,023 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=2.92668-epoch=21.ckpt
2024-03-30 11:11:46,493 : INFO : epoch: 22; val_loss: 4.068388938903809
2024-03-30 11:11:46,506 : INFO : epoch: 22; train_loss: 3.1457536220550537
2024-03-30 11:11:50,776 : INFO : epoch: 23; val_loss: 2.940657377243042
2024-03-30 11:11:50,778 : INFO : epoch: 23; train_loss: 2.873232126235962
2024-03-30 11:11:50,842 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=2.94066-epoch=23.ckpt
2024-03-30 11:11:55,344 : INFO : epoch: 24; val_loss: 3.5547945499420166
2024-03-30 11:11:55,347 : INFO : epoch: 24; train_loss: 3.2903237342834473
2024-03-30 11:11:59,652 : INFO : epoch: 25; val_loss: 3.024230480194092
2024-03-30 11:11:59,654 : INFO : epoch: 25; train_loss: 3.4921345

Metric val_loss improved by 0.219 >= min_delta = 1e-05. New best score: 2.708


2024-03-30 11:12:03,952 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=2.70792-epoch=26.ckpt
2024-03-30 11:12:08,350 : INFO : epoch: 27; val_loss: 2.7805943489074707
2024-03-30 11:12:08,352 : INFO : epoch: 27; train_loss: 2.4414148330688477
2024-03-30 11:12:08,415 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=2.78059-epoch=27.ckpt
2024-03-30 11:12:12,699 : INFO : epoch: 28; val_loss: 2.824042558670044
2024-03-30 11:12:12,701 : INFO : epoch: 28; train_loss: 2.5285470485687256
2024-03-30 11:12:12,765 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=2.82404-epoch=28.ckpt
2024-03-30 11:12:17,171 : INFO : epoch: 29; val_loss: 2.8157103061676025

Metric val_loss improved by 0.097 >= min_delta = 1e-05. New best score: 2.611


2024-03-30 11:13:17,862 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=2.61128-epoch=43.ckpt
2024-03-30 11:13:22,231 : INFO : epoch: 44; val_loss: 2.588564872741699
2024-03-30 11:13:22,233 : INFO : epoch: 44; train_loss: 2.237534761428833


Metric val_loss improved by 0.023 >= min_delta = 1e-05. New best score: 2.589


2024-03-30 11:13:22,299 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=2.58856-epoch=44.ckpt
2024-03-30 11:13:26,573 : INFO : epoch: 45; val_loss: 2.578803539276123
2024-03-30 11:13:26,575 : INFO : epoch: 45; train_loss: 2.2337098121643066


Metric val_loss improved by 0.010 >= min_delta = 1e-05. New best score: 2.579


2024-03-30 11:13:26,654 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=2.57880-epoch=45.ckpt
2024-03-30 11:13:31,090 : INFO : epoch: 46; val_loss: 2.583540678024292
2024-03-30 11:13:31,093 : INFO : epoch: 46; train_loss: 2.22794771194458
2024-03-30 11:13:31,167 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=2.58354-epoch=46.ckpt
2024-03-30 11:13:35,380 : INFO : epoch: 47; val_loss: 2.589299440383911
2024-03-30 11:13:35,382 : INFO : epoch: 47; train_loss: 2.2245941162109375
2024-03-30 11:13:39,778 : INFO : epoch: 48; val_loss: 2.5844430923461914
2024-03-30 11:13:39,781 : INFO : epoch: 48; train_loss: 2.2204360961914062
2024-03-30 11:13:39,849 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_en

Monitored metric val_loss did not improve in the last 20 records. Best score: 2.579. Signaling Trainer to stop.


2024-03-30 11:14:54,096 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol_by_energy/eager-violet-5/checkpoints/exp_name=0val_loss=2.58143-epoch=65.ckpt
2024-03-30 11:14:54,143 : INFO : Training is ending


In [93]:
wandb.finish()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
lr_scheduler_monitoring,████████████████▄▄▄▄▄▄▄▄▄▄▂▂▂▂▂▂▂▂▂▂▁▁▁▁
train_loss,█▆▅▄▅▄▄▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_mae,█▆▅▄▄▄▄▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
val_loss,█▇▇█▆▆▅▄▃▃▃▂▁▁▁▂▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,█▆▇█▅▆▅▄▃▃▃▂▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,65.0
lr_scheduler_monitoring,0.00013
train_loss,2.14632
train_mae,0.76364
trainer/global_step,1979.0
val_loss,2.58143
val_mae,0.90808
