In [123]:
import os
import sys
import math
import wandb
import torch
import logging

import numpy as np
import pandas as pd
import torch.nn as nn
import lightning.pytorch as pl

from torchmetrics import MeanAbsoluteError
from sklearn.metrics import mean_squared_error
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from lightning.pytorch.loggers import WandbLogger
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import ReduceLROnPlateau
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import Callback, LearningRateMonitor, ModelCheckpoint

In [124]:
wandb.login()

True

# FCNN

In [125]:
class RMSELoss(torch.nn.Module):
    def __init__(self):
        super(RMSELoss,self).__init__()

    def forward(self,x,y,w):
        criterion = nn.MSELoss()
        loss = torch.sqrt(criterion(x, y))*w
        return loss.mean()

In [126]:
#params
project_name = "MSU_interpol"

logger_path = './wandb_local_logs'
data_path = '../data/clasdb_pi_plus_n.txt'

hyperparams_dict = {
    'scale_data': False,
    'test_size': 0.1,
    'batch_size': 32,
    'net_architecture': [5,60,80,100,120,140,240,340,440,640,2000,1040,640,340,240,140,100,80,60,20,1],
    'activation_function': nn.ReLU(),
    'loss_func': RMSELoss(),
    'optim_func': torch.optim.Adam,
    'max_epochs': 2000,
    'es_min_delta': 0.00001,
    'es_patience': 20,
    'lr': 0.001,
    'lr_factor':0.5,
    'lr_patience': 5,
    'lr_cooldown': 20,
}

In [127]:
wandb_logger = WandbLogger(project=project_name,
                           save_dir=logger_path)
exp_name = wandb_logger.experiment.name

logger_full_path = os.path.join(logger_path, project_name, exp_name)

os.makedirs(logger_full_path, exist_ok=True)
logging.basicConfig(encoding='utf-8',
                    level=logging.DEBUG,
                    format='%(asctime)s : %(levelname)s : %(message)s',
                    handlers=[logging.FileHandler(os.path.join(logger_full_path, 'logs.log'), mode='w'),
                              logging.StreamHandler(sys.stdout)],
                    force=True)

2024-02-02 16:14:29,478 : DEBUG : Popen(['git', 'cat-file', '--batch-check'], cwd=/Users/andrey.golda/Documents/Study/MSU_interpol, stdin=<valid stream>, shell=False, universal_newlines=False)


In [128]:
class InterpolDataSet(Dataset):
    def __init__(self, features, labels, weights):
        self.features = features
        self.labels = labels
        self.weights = weights
        self.len = len(labels)

    def __getitem__(self, index):
        feature = self.features[index]
        label = self.labels[index]
        weights = self.weights[index]
        return feature, label, weights

    def __len__(self):
        return self.len

class InterpolDataModule(pl.LightningDataModule):
    def __init__(self, hyperparams):
        super().__init__()
        self.df = None
        self.hyperparams = hyperparams
        self.train_dataset = None
        self.val_dataset = None
    
    def setup(self, stage):
        # data reading and preprocessing
        df = pd.read_csv(data_path, delimiter='\t', header=None)
        df.columns = ['Ebeam', 'W', 'Q2', 'cos_theta', 'phi', 'dsigma_dOmega', 'error', 'id']
        df.loc[8314:65671, 'Ebeam'] = 5.754 # peculiarity of this dataset.
        df['phi'] = df.phi.apply(lambda x: math.radians(x))
        df['weight'] = df['error'].apply(lambda x: x and 1 / x or 100) # x and 1 / x or 100  is just a reversed error but with validation 1/0 error in this case it will return 100
        df = df.drop('id', axis=1)
        df = df.iloc[df[['Ebeam', 'W', 'Q2', 'cos_theta', 'phi']].drop_duplicates().index]
        self.df = df


        #train test split
        feature_columns = ['Ebeam', 'W', 'Q2', 'cos_theta', 'phi']
        feature_columns_with_weights = ['Ebeam', 'W', 'Q2', 'cos_theta', 'phi', 'weight']

        feature_data = df[feature_columns_with_weights]
        label_data = df['dsigma_dOmega']
        
        if self.hyperparams.get('scale_data'):
            scaler_feature = StandardScaler()
            scaler_target = StandardScaler()
            feature_data = scaler_feature.fit_transform(feature_data)
            label_data = scaler_target.fit_transform(label_data.values.reshape(-1,1))
        else:
            pass
        
        train_feature_data, val_feature_data, train_label_data, val_label_data = train_test_split(feature_data,
                                                                                                  label_data,
                                                                                                  test_size=self.hyperparams.get('test_size'),
                                                                                                  random_state=1438)
        
        
        self.train_dataset = InterpolDataSet(torch.tensor(train_feature_data[feature_columns].values, dtype=torch.float32),
                                             torch.tensor(train_label_data.values, dtype=torch.float32),
                                             torch.tensor(train_feature_data['weight'].values, dtype=torch.float32))
        
        self.val_dataset = InterpolDataSet(torch.tensor(val_feature_data[feature_columns].values, dtype=torch.float32),
                                           torch.tensor(val_label_data.values, dtype=torch.float32),
                                           torch.tensor(val_feature_data['weight'].values, dtype=torch.float32))
    def train_dataloader(self):
        return DataLoader(dataset = self.train_dataset, batch_size = self.hyperparams.get('batch_size'), shuffle = False, num_workers=0)

    def val_dataloader(self):
        return DataLoader(dataset = self.val_dataset, batch_size = self.hyperparams.get('batch_size'), shuffle = False, num_workers=0)

class PrintCallbacks(Callback):
    def on_train_start(self, trainer, pl_module):
        logging.info("Training is starting")

    def on_train_end(self, trainer, pl_module):
        logging.info("Training is ending")

    def on_train_epoch_end(self, trainer, pl_module):
        epoch_mean = torch.stack(pl_module.training_step_outputs).mean()
        logging.info(f"epoch: {pl_module.current_epoch}; train_loss: {epoch_mean}")
        pl_module.training_step_outputs.clear()

    def on_validation_epoch_end(self, trainer, pl_module):
        epoch_mean = torch.stack(pl_module.validation_step_outputs).mean()
        logging.info(f"epoch: {pl_module.current_epoch}; val_loss: {epoch_mean}")
        pl_module.validation_step_outputs.clear()

class InterpolRegressor(pl.LightningModule):
    def __init__(self, hyperparams):
        super(InterpolRegressor, self).__init__()

        self.train_loss, self.train_mae, self.val_loss, self.val_mae = 0,0,0,0
        self.hyperparams = hyperparams
        self.save_hyperparameters(self.hyperparams)

        self.mae = MeanAbsoluteError()
        self.loss_func = self.hyperparams.get('loss_func')

        self.optim = self.hyperparams.get('optim_func')

        self.net_architecture = self.hyperparams.get('net_architecture')
        self.activation_function = self.hyperparams.get('activation_function')

        self.training_step_outputs = []
        self.validation_step_outputs = []

        self.net = nn.Sequential()
        for i in range(1,len(self.net_architecture)):
            self.net.append(nn.Linear(self.net_architecture[i-1], self.net_architecture[i]))
            if i!=len(self.net_architecture)-1:
                self.net.append(self.activation_function)
            else:
                pass

    def forward(self, x):
        return self.net(x)

    def training_step(self, batch, batch_idx):
        x, y, w = batch
        y_hat = self.forward(x)

        loss = self.loss_func
        self.train_loss = loss.forward(y_hat.reshape(-1), y, w)
        self.train_mae = self.mae(y_hat.reshape(-1), y)

        self.log('train_loss', self.train_loss, batch_size=self.hyperparams['batch_size'],
                 on_step=False, on_epoch=True, prog_bar=True, sync_dist=True, logger=True)
        self.log('train_mae', self.train_mae, batch_size=self.hyperparams['batch_size'],
                 on_step=False, on_epoch=True, prog_bar=True, sync_dist=True, logger=True)

        self.training_step_outputs.append(self.train_loss)
        return self.train_loss

    def validation_step(self, batch, batch_idx):
        x, y, w = batch
        y_hat = self.forward(x)

        loss = self.loss_func
        self.val_loss = loss.forward(y_hat.reshape(-1), y, w)
        self.val_mae = self.mae(y_hat.reshape(-1), y)

        self.log('val_loss', self.val_loss, batch_size=self.hyperparams['batch_size'],
                 on_step=False, on_epoch=True, prog_bar=True, sync_dist=True, logger=True)
        self.log('val_mae', self.val_mae, batch_size=self.hyperparams['batch_size'],
                 on_step=False, on_epoch=True, prog_bar=True, sync_dist=True, logger=True)

        self.validation_step_outputs.append(self.val_loss)
        return self.val_loss

    def on_validation_epoch_end(self):
        sch = self.lr_schedulers()
        if isinstance(sch, torch.optim.lr_scheduler.ReduceLROnPlateau) and self.trainer.current_epoch!=0:
                sch.step(self.trainer.callback_metrics["val_loss"])

    def configure_callbacks(self):
        early_stop_callback = EarlyStopping(monitor="val_loss", mode="min",
                                            min_delta=self.hyperparams.get('es_min_delta'),
                                            patience=self.hyperparams.get('es_patience'),
                                            verbose=True)

        checkpoint_callback = ModelCheckpoint(save_top_k=3,
                                              monitor="val_loss",
                                              mode="min",
                                              dirpath=f"{logger_full_path}/checkpoints",
                                              filename="{exp_name}{val_loss:.5f}-{epoch:02d}")

        lr_monitor = LearningRateMonitor(logging_interval='epoch')

        print_callback = PrintCallbacks()

        return [early_stop_callback, checkpoint_callback, print_callback, lr_monitor]

    def configure_optimizers(self):
        optimizer = self.optim(self.parameters(), lr=self.hyperparams.get('lr'))
        lr_optim = ReduceLROnPlateau(optimizer = optimizer,
                                     mode = 'min',
                                     factor = self.hyperparams.get('lr_factor'),
                                     patience = self.hyperparams.get('lr_patience'),
                                     cooldown=self.hyperparams.get('lr_cooldown'),
                                     threshold=0.01,
                                     verbose= True)
        return {"optimizer": optimizer,
                "lr_scheduler": {
                    "scheduler": lr_optim,
                    "interval": "epoch",
                    "monitor": "val_loss",
                    "frequency": 2,
                    "name": 'lr_scheduler_monitoring'}
                }


In [None]:
data_module = InterpolDataModule(hyperparams=hyperparams_dict)
model = InterpolRegressor(hyperparams=hyperparams_dict)
# model = InterpolRegressor.load_from_checkpoint(f'./wandb_local_logs/MSU_interpol/fresh-surf-31/checkpoints/exp_name=0val_loss=0.32796-epoch=57.ckpt', hyperparams=hyperparams_dict)

trainer = pl.Trainer(max_epochs=hyperparams_dict.get('max_epochs'),
                     accelerator='cpu',
                     logger=wandb_logger,
                     enable_progress_bar=False)
trainer.fit(model, data_module)

/Users/andrey.golda/Library/Caches/pypoetry/virtualenvs/msu-interpol--lw2ADYE-py3.11/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'activation_function' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['activation_function'])`.
/Users/andrey.golda/Library/Caches/pypoetry/virtualenvs/msu-interpol--lw2ADYE-py3.11/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'loss_func' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss_func'])`.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
The following callbacks returned in `LightningModule.configure_callbacks` will override existing callbacks passed to Trainer: ModelCheckpoint

  | 

2024-02-02 16:14:35,276 : INFO : epoch: 0; val_loss: 18.674592971801758
2024-02-02 16:14:35,281 : INFO : Training is starting


/Users/andrey.golda/Library/Caches/pypoetry/virtualenvs/msu-interpol--lw2ADYE-py3.11/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/Users/andrey.golda/Library/Caches/pypoetry/virtualenvs/msu-interpol--lw2ADYE-py3.11/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


2024-02-02 16:17:05,922 : INFO : epoch: 0; val_loss: 13.89778995513916
2024-02-02 16:17:05,927 : INFO : epoch: 0; train_loss: 17.809070587158203


Metric val_loss improved. New best score: 13.898


2024-02-02 16:17:06,091 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=13.89779-epoch=00.ckpt
2024-02-02 16:19:42,266 : INFO : epoch: 1; val_loss: 12.569906234741211
2024-02-02 16:19:42,273 : INFO : epoch: 1; train_loss: 14.843350410461426


Metric val_loss improved by 1.328 >= min_delta = 1e-05. New best score: 12.570


2024-02-02 16:19:42,477 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=12.56991-epoch=01.ckpt
2024-02-02 16:22:22,631 : INFO : epoch: 2; val_loss: 10.14493465423584
2024-02-02 16:22:22,639 : INFO : epoch: 2; train_loss: 12.701802253723145


Metric val_loss improved by 2.425 >= min_delta = 1e-05. New best score: 10.145


2024-02-02 16:22:22,805 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=10.14494-epoch=02.ckpt
2024-02-02 16:25:10,384 : INFO : epoch: 3; val_loss: 9.686511993408203
2024-02-02 16:25:10,393 : INFO : epoch: 3; train_loss: 11.01168441772461


Metric val_loss improved by 0.458 >= min_delta = 1e-05. New best score: 9.687


2024-02-02 16:25:10,623 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=9.68652-epoch=03.ckpt
2024-02-02 16:28:24,689 : INFO : epoch: 4; val_loss: 9.099124908447266
2024-02-02 16:28:24,699 : INFO : epoch: 4; train_loss: 9.462930679321289


Metric val_loss improved by 0.587 >= min_delta = 1e-05. New best score: 9.099


2024-02-02 16:28:24,928 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=9.09912-epoch=04.ckpt
2024-02-02 16:31:22,601 : INFO : epoch: 5; val_loss: 8.970876693725586
2024-02-02 16:31:22,607 : INFO : epoch: 5; train_loss: 9.115650177001953


Metric val_loss improved by 0.128 >= min_delta = 1e-05. New best score: 8.971


2024-02-02 16:31:22,769 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=8.97088-epoch=05.ckpt
2024-02-02 16:33:52,883 : INFO : epoch: 6; val_loss: 6.396556377410889
2024-02-02 16:33:52,889 : INFO : epoch: 6; train_loss: 8.365659713745117


Metric val_loss improved by 2.574 >= min_delta = 1e-05. New best score: 6.397


2024-02-02 16:33:53,065 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=6.39655-epoch=06.ckpt
2024-02-02 16:36:34,911 : INFO : epoch: 7; val_loss: 7.189112186431885
2024-02-02 16:36:34,916 : INFO : epoch: 7; train_loss: 8.177916526794434
2024-02-02 16:36:35,074 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=7.18911-epoch=07.ckpt
2024-02-02 16:39:03,539 : INFO : epoch: 8; val_loss: 6.5800323486328125
2024-02-02 16:39:03,544 : INFO : epoch: 8; train_loss: 8.224462509155273
2024-02-02 16:39:03,701 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=6.58003-epoch=08.ckpt
2024-02-02 16:41:33,635 : INFO : epoch: 9; val_loss: 6.198523998260498
2024-02-02 16:41:33,641 : INFO 

Metric val_loss improved by 0.198 >= min_delta = 1e-05. New best score: 6.199


2024-02-02 16:41:33,802 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=6.19853-epoch=09.ckpt
2024-02-02 16:44:03,228 : INFO : epoch: 10; val_loss: 7.3740034103393555
2024-02-02 16:44:03,234 : INFO : epoch: 10; train_loss: 7.732532501220703
2024-02-02 16:46:43,481 : INFO : epoch: 11; val_loss: 6.5831451416015625
2024-02-02 16:46:43,488 : INFO : epoch: 11; train_loss: 7.386973857879639
2024-02-02 16:49:14,426 : INFO : epoch: 12; val_loss: 6.824430465698242
2024-02-02 16:49:14,432 : INFO : epoch: 12; train_loss: 7.430947780609131
2024-02-02 16:51:45,974 : INFO : epoch: 13; val_loss: 8.354735374450684
Epoch 00019: reducing learning rate of group 0 to 5.0000e-04.
2024-02-02 16:51:45,980 : INFO : epoch: 13; train_loss: 7.342649459838867
2024-02-02 16:54:26,916 : INFO : epoch: 14; val_loss: 5.907252311706543
2024-02-02 16:54:26,923 : INFO : epoch: 14; train_loss: 6.244832515716553


Metric val_loss improved by 0.291 >= min_delta = 1e-05. New best score: 5.907


2024-02-02 16:54:27,108 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=5.90725-epoch=14.ckpt
2024-02-02 16:57:06,921 : INFO : epoch: 15; val_loss: 6.51002836227417
2024-02-02 16:57:06,926 : INFO : epoch: 15; train_loss: 6.199605941772461
2024-02-02 16:59:47,338 : INFO : epoch: 16; val_loss: 5.671895980834961
2024-02-02 16:59:47,346 : INFO : epoch: 16; train_loss: 6.10825252532959


Metric val_loss improved by 0.235 >= min_delta = 1e-05. New best score: 5.672


2024-02-02 16:59:47,535 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=5.67190-epoch=16.ckpt
2024-02-02 17:02:19,334 : INFO : epoch: 17; val_loss: 5.549639701843262
2024-02-02 17:02:19,340 : INFO : epoch: 17; train_loss: 6.08781099319458


Metric val_loss improved by 0.122 >= min_delta = 1e-05. New best score: 5.550


2024-02-02 17:02:19,530 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=5.54964-epoch=17.ckpt
2024-02-02 17:04:49,890 : INFO : epoch: 18; val_loss: 5.970808982849121
2024-02-02 17:04:49,895 : INFO : epoch: 18; train_loss: 6.019715785980225
2024-02-02 17:07:34,799 : INFO : epoch: 19; val_loss: 5.949369430541992
2024-02-02 17:07:34,805 : INFO : epoch: 19; train_loss: 6.043981552124023
2024-02-02 17:10:08,359 : INFO : epoch: 20; val_loss: 5.603267192840576
2024-02-02 17:10:08,365 : INFO : epoch: 20; train_loss: 5.937986850738525
2024-02-02 17:10:08,541 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=5.60327-epoch=20.ckpt
2024-02-02 17:12:44,723 : INFO : epoch: 21; val_loss: 5.751530170440674
2024-02-02 17:12:44,729 : INFO : epoch: 21; train_loss: 5.919928550720215
2024-02-02

Metric val_loss improved by 0.461 >= min_delta = 1e-05. New best score: 5.089


2024-02-02 17:42:58,836 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=5.08856-epoch=31.ckpt
2024-02-02 17:45:47,043 : INFO : epoch: 32; val_loss: 5.48366117477417
2024-02-02 17:45:47,049 : INFO : epoch: 32; train_loss: 5.120527267456055
2024-02-02 17:45:47,229 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=5.48366-epoch=32.ckpt
2024-02-02 17:48:34,829 : INFO : epoch: 33; val_loss: 5.044585227966309
2024-02-02 17:48:34,834 : INFO : epoch: 33; train_loss: 5.097151756286621


Metric val_loss improved by 0.044 >= min_delta = 1e-05. New best score: 5.045


2024-02-02 17:48:35,017 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=5.04459-epoch=33.ckpt
2024-02-02 17:51:22,616 : INFO : epoch: 34; val_loss: 4.995327949523926
2024-02-02 17:51:22,622 : INFO : epoch: 34; train_loss: 5.052473545074463


Metric val_loss improved by 0.049 >= min_delta = 1e-05. New best score: 4.995


2024-02-02 17:51:22,806 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.99533-epoch=34.ckpt
2024-02-02 17:54:10,627 : INFO : epoch: 35; val_loss: 5.01591682434082
2024-02-02 17:54:10,633 : INFO : epoch: 35; train_loss: 5.034736156463623
2024-02-02 17:54:10,808 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=5.01592-epoch=35.ckpt
2024-02-02 17:56:58,517 : INFO : epoch: 36; val_loss: 5.056362628936768
2024-02-02 17:56:58,524 : INFO : epoch: 36; train_loss: 5.014590263366699
2024-02-02 18:00:03,326 : INFO : epoch: 37; val_loss: 5.123875617980957
2024-02-02 18:00:03,332 : INFO : epoch: 37; train_loss: 4.989715099334717
2024-02-02 18:02:51,350 : INFO : epoch: 38; val_loss: 5.058656692504883
2024-02-02 18:02:51,355 : INFO : epoch: 38; train_loss: 4.985382556915283
2024-02-02 

Metric val_loss improved by 0.049 >= min_delta = 1e-05. New best score: 4.946


2024-02-02 18:14:50,024 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.94641-epoch=42.ckpt
2024-02-02 18:19:20,882 : INFO : epoch: 43; val_loss: 5.201143741607666
2024-02-02 18:19:20,890 : INFO : epoch: 43; train_loss: 4.886948108673096
2024-02-02 20:29:42,575 : INFO : epoch: 44; val_loss: 5.359710693359375
2024-02-02 20:29:42,660 : INFO : epoch: 44; train_loss: 4.879541397094727
2024-02-02 20:33:45,786 : INFO : epoch: 45; val_loss: 5.297131061553955
2024-02-02 20:33:45,793 : INFO : epoch: 45; train_loss: 4.870675563812256
2024-02-02 20:36:47,059 : INFO : epoch: 46; val_loss: 5.258267402648926
2024-02-02 20:36:47,065 : INFO : epoch: 46; train_loss: 4.841917037963867
2024-02-02 20:39:37,758 : INFO : epoch: 47; val_loss: 5.315807342529297
2024-02-02 20:39:37,764 : INFO : epoch: 47; train_loss: 4.850797653198242
Epoch 00071: reducing learning rate of group 0 to 1.2500e-04.
2024-

Metric val_loss improved by 0.288 >= min_delta = 1e-05. New best score: 4.658


2024-02-02 20:42:42,155 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.65815-epoch=48.ckpt
2024-02-02 20:45:44,887 : INFO : epoch: 49; val_loss: 4.654748916625977
2024-02-02 20:45:44,894 : INFO : epoch: 49; train_loss: 4.612788677215576


Metric val_loss improved by 0.003 >= min_delta = 1e-05. New best score: 4.655


2024-02-02 20:45:45,089 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.65475-epoch=49.ckpt
2024-02-02 20:48:49,149 : INFO : epoch: 50; val_loss: 4.646838665008545
2024-02-02 20:48:49,155 : INFO : epoch: 50; train_loss: 4.599214553833008


Metric val_loss improved by 0.008 >= min_delta = 1e-05. New best score: 4.647


2024-02-02 20:48:49,344 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.64684-epoch=50.ckpt
2024-02-02 20:51:50,414 : INFO : epoch: 51; val_loss: 4.635341644287109
2024-02-02 20:51:50,419 : INFO : epoch: 51; train_loss: 4.592110633850098


Metric val_loss improved by 0.011 >= min_delta = 1e-05. New best score: 4.635


2024-02-02 20:51:50,615 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.63534-epoch=51.ckpt
2024-02-02 20:54:52,785 : INFO : epoch: 52; val_loss: 4.6446919441223145
2024-02-02 20:54:52,792 : INFO : epoch: 52; train_loss: 4.58595609664917
2024-02-02 20:54:52,964 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.64469-epoch=52.ckpt
2024-02-02 20:57:42,938 : INFO : epoch: 53; val_loss: 4.6612324714660645
2024-02-02 20:57:42,945 : INFO : epoch: 53; train_loss: 4.566359519958496
2024-02-02 21:00:43,906 : INFO : epoch: 54; val_loss: 4.654221057891846
2024-02-02 21:00:43,912 : INFO : epoch: 54; train_loss: 4.56739616394043
2024-02-02 21:03:31,634 : INFO : epoch: 55; val_loss: 4.654208183288574
2024-02-02 21:03:31,640 : INFO : epoch: 55; train_loss: 4.560777187347412
2024-02-02

Metric val_loss improved by 0.110 >= min_delta = 1e-05. New best score: 4.525


2024-02-02 21:36:49,720 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.52546-epoch=66.ckpt
2024-02-02 21:39:53,573 : INFO : epoch: 67; val_loss: 4.52344274520874
2024-02-02 21:39:53,579 : INFO : epoch: 67; train_loss: 4.393800258636475


Metric val_loss improved by 0.002 >= min_delta = 1e-05. New best score: 4.523


2024-02-02 21:39:53,779 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.52344-epoch=67.ckpt
2024-02-02 21:42:57,667 : INFO : epoch: 68; val_loss: 4.513409614562988
2024-02-02 21:42:57,673 : INFO : epoch: 68; train_loss: 4.38801908493042


Metric val_loss improved by 0.010 >= min_delta = 1e-05. New best score: 4.513


2024-02-02 21:42:57,888 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.51341-epoch=68.ckpt
2024-02-02 21:47:33,596 : INFO : epoch: 69; val_loss: 4.519779682159424
2024-02-02 21:47:33,603 : INFO : epoch: 69; train_loss: 4.381381511688232
2024-02-02 21:47:33,772 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.51978-epoch=69.ckpt
2024-02-02 21:53:06,765 : INFO : epoch: 70; val_loss: 4.514081001281738
2024-02-02 21:53:06,772 : INFO : epoch: 70; train_loss: 4.377162456512451
2024-02-02 21:53:06,960 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.51408-epoch=70.ckpt
2024-02-02 21:58:40,427 : INFO : epoch: 71; val_loss: 4.517697811126709
2024-02-02 21:58:40,433 : I

Metric val_loss improved by 0.001 >= min_delta = 1e-05. New best score: 4.513


2024-02-02 22:04:14,453 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.51277-epoch=72.ckpt
2024-02-02 22:10:19,791 : INFO : epoch: 73; val_loss: 4.507742881774902
2024-02-02 22:10:19,796 : INFO : epoch: 73; train_loss: 4.3655242919921875


Metric val_loss improved by 0.005 >= min_delta = 1e-05. New best score: 4.508


2024-02-02 22:10:19,972 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.50774-epoch=73.ckpt
2024-02-02 22:15:53,051 : INFO : epoch: 74; val_loss: 4.510360240936279
2024-02-02 22:15:53,058 : INFO : epoch: 74; train_loss: 4.362704277038574
2024-02-02 22:15:53,236 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.51036-epoch=74.ckpt
2024-02-02 22:21:30,414 : INFO : epoch: 75; val_loss: 4.50709342956543
2024-02-02 22:21:30,420 : INFO : epoch: 75; train_loss: 4.357779502868652


Metric val_loss improved by 0.001 >= min_delta = 1e-05. New best score: 4.507


2024-02-02 22:21:30,601 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.50709-epoch=75.ckpt
2024-02-02 22:27:05,526 : INFO : epoch: 76; val_loss: 4.507728099822998
2024-02-02 22:27:05,532 : INFO : epoch: 76; train_loss: 4.354898452758789
2024-02-02 22:27:05,704 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.50773-epoch=76.ckpt
2024-02-02 22:32:42,235 : INFO : epoch: 77; val_loss: 4.507563591003418
2024-02-02 22:32:42,240 : INFO : epoch: 77; train_loss: 4.350592613220215
2024-02-02 22:32:42,414 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.50756-epoch=77.ckpt
2024-02-02 22:38:17,423 : INFO : epoch: 78; val_loss: 4.5100579261779785
2024-02-02 22:38:17,429 : 

Metric val_loss improved by 0.000 >= min_delta = 1e-05. New best score: 4.507


2024-02-02 22:50:25,051 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.50669-epoch=80.ckpt
2024-02-02 22:56:02,661 : INFO : epoch: 81; val_loss: 4.506224632263184
2024-02-02 22:56:02,666 : INFO : epoch: 81; train_loss: 4.338267803192139


Metric val_loss improved by 0.000 >= min_delta = 1e-05. New best score: 4.506


2024-02-02 22:56:02,843 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.50622-epoch=81.ckpt
2024-02-02 23:01:44,234 : INFO : epoch: 82; val_loss: 4.495099067687988
Epoch 00123: reducing learning rate of group 0 to 3.1250e-05.
2024-02-02 23:01:44,240 : INFO : epoch: 82; train_loss: 4.335699081420898


Metric val_loss improved by 0.011 >= min_delta = 1e-05. New best score: 4.495


2024-02-02 23:01:44,436 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.49510-epoch=82.ckpt
2024-02-02 23:07:50,394 : INFO : epoch: 83; val_loss: 4.48472261428833
2024-02-02 23:07:50,399 : INFO : epoch: 83; train_loss: 4.297994613647461


Metric val_loss improved by 0.010 >= min_delta = 1e-05. New best score: 4.485


2024-02-02 23:07:50,558 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.48472-epoch=83.ckpt
2024-02-02 23:13:56,841 : INFO : epoch: 84; val_loss: 4.4887237548828125
2024-02-02 23:13:56,847 : INFO : epoch: 84; train_loss: 4.289949893951416
2024-02-02 23:13:57,037 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.48872-epoch=84.ckpt
2024-02-02 23:19:34,473 : INFO : epoch: 85; val_loss: 4.482151985168457
2024-02-02 23:19:34,479 : INFO : epoch: 85; train_loss: 4.2870192527771


Metric val_loss improved by 0.003 >= min_delta = 1e-05. New best score: 4.482


2024-02-02 23:19:34,657 : DEBUG : open file: /Users/andrey.golda/Documents/Study/MSU_interpol/training/wandb_local_logs/MSU_interpol/visionary-dew-39/checkpoints/exp_name=0val_loss=4.48215-epoch=85.ckpt


In [120]:
wandb.finish()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▁▂▁▂▁▁▂▁▂▂▃▃▃▄▄▅▅▆▆▆▇▇█▁▂▂▃▁▂▂▃▃▁▂▁▁
lr_scheduler_monitoring,████████████████▄▄▄▄▄▂▂▂▂▂▁███▄███▄█████
train_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█
train_mae,▃▃▃▃██▄▃▃▆▅▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▂▂▂▂▁▁▁▁▁▂▂▂▂
trainer/global_step,▁▂▂▂▁▂▁▂▂▂▂▂▂▂▂▃▃▄▄▅▅▆▆▇▇▇█▁▂▂▃▁▂▂▃▃▁▂▁▂
val_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█
val_mae,▃▃▃▃██▄▃▃▅▅█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▂▂▂▂▁▁▁▁▁▂▂▂▂

0,1
epoch,1.0
lr_scheduler_monitoring,0.0
train_loss,31.04678
train_mae,1.04107
trainer/global_step,5256.0
val_loss,29.64243
val_mae,1.00007


2024-02-02 16:12:17,798 : DEBUG : Starting new HTTPS connection (1): o151352.ingest.sentry.io:443
2024-02-02 16:12:18,195 : DEBUG : Incremented Retry for (url='/api/4504800232407040/envelope/'): Retry(total=2, connect=None, read=None, redirect=None, status=None)
2024-02-02 16:12:18,198 : DEBUG : Starting new HTTPS connection (2): o151352.ingest.sentry.io:443
2024-02-02 16:12:18,572 : DEBUG : Incremented Retry for (url='/api/4504800232407040/envelope/'): Retry(total=1, connect=None, read=None, redirect=None, status=None)
2024-02-02 16:12:18,574 : DEBUG : Starting new HTTPS connection (3): o151352.ingest.sentry.io:443
2024-02-02 16:12:18,942 : DEBUG : Incremented Retry for (url='/api/4504800232407040/envelope/'): Retry(total=0, connect=None, read=None, redirect=None, status=None)
2024-02-02 16:12:18,943 : DEBUG : Starting new HTTPS connection (4): o151352.ingest.sentry.io:443
2024-02-02 16:12:19,441 : DEBUG : Starting new HTTPS connection (5): o151352.ingest.sentry.io:443
