# Setup


In [2]:
import math
import os

import numpy as np
import torch

ROOT_DIR=os.path.dirname("/kaggle/working")
DATA_DIR=os.path.join("/kaggle/input/go-daddy/godaddy-microbusiness-density-forecasting") ##Directory of dataset

EXPERIMENTS_DIR=os.path.join(ROOT_DIR, "logs/experiments")
use_cuda = torch .cuda.is_available()
DEVICE = torch.device("cuda" if use_cuda else "cpu")


N_CENSUS_FEATURES= 5 #pct_bb,pct_college,pct_foreign_born,pct_it_workers,median_hh_inc
#cfips is not considered as a feature we use a one-hot encoding for it


USE_CENSUS= False #Without census features

AE_LATENT_DIM= 32

LSTM_HIDDEN_DIM = 8

SEQ_LEN=6
SEQ_STRIDE= 1

N_COUNTY=3142
N_DIMS_COUNTY_ENCODING=  math.ceil(math.log(N_COUNTY,2))

FEATURES_AE_CENSUS_DIR=os.path.join(EXPERIMENTS_DIR, "features_ae_2_dims")
FEATURES_AE_LATENT_DIM= 2

TRAIN_FILE= os.path.join(DATA_DIR, "train.csv")
TEST_FILE= os.path.join(DATA_DIR, "test.csv")

CENSUS_FILE =os.path.join(DATA_DIR, "census_interpolated.csv")

NB_FUTURES= 10 #Number of days to predict


#Scaling factors for microbusiness density
MEAN_MB= 3.817671
STD_MB= 4.991087

MAX_MB= 300
MIN_MB= 0.0

## Utils

In [3]:
import json
import os
from enum import Enum
from itertools import islice
import numpy as np
import pandas as pd
import torch
import logging
import os
import sys
from time import strftime
def setup_logger(args):
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    a_logger = logging.getLogger()
    a_logger.setLevel(args.log_level)
    log_dir=os.path.join(ROOT_DIR,"logs","output_logs")
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    output_file_handler = logging.FileHandler(os.path.join(log_dir,strftime("log_%d_%m_%Y_%H_%M.log")))
    stdout_handler = logging.StreamHandler(sys.stdout)
    stdout_handler.setFormatter(formatter)
    a_logger.propagate=False
    a_logger.addHandler(output_file_handler)
    a_logger.addHandler(stdout_handler)

def read_json(path_json):
    with open(path_json, encoding='utf8') as json_file:
        return json.load(json_file)
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x))
def chunks(data, SIZE):
    """Split a dictionnary into parts of max_size =SIZE"""
    it = iter(data)
    for _ in range(0, len(data), SIZE):
        yield {k: data[k] for k in islice(it, SIZE)}

def sorted_dict(x, ascending=True):
    """
    Sort dict according to value.
    x must be a primitive type: int,float, str...
    @param x:
    @return:
    """
    return dict(sorted(x.items(), key=lambda item: (1 if ascending else -1) * item[1]))
def reverse_dict(input_dict):
    """
    Reverse a dictonary
    Args:
        input_dict:

    Returns:

    """
    inv_dict = {}
    for k, v in input_dict.items():
        inv_dict[v] = inv_dict.get(v, []) + [k]

    return inv_dict

def save_matrix(matrix,filename):
    with open(filename,'wb') as output:
        np.save(output,matrix)
def load_matrix(filename,auto_delete=False):
    with open(filename,'rb') as input:
        matrix=np.load(input)

    if auto_delete:
        os.remove(filename)
    return matrix



class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0



from enum import Enum
class DatasetType(Enum):
    TRAIN="train"
    VALID="valid"
    TEST="test"



def extract_census_features(row,cfips_index,single_row=True):
    """

    @param row: Row of the dataframe
    @param cfips_index: index of the cfips for one-hot encoding
    @return:
    """
    ##If series :


    if single_row:
        features_tensor = torch.tensor( [row['pct_bb'],
                                        row['pct_college'],
                                        row['pct_foreign_born'],
                                        row['pct_it_workers'],
                                        row['median_hh_inc']
                                        ], dtype=torch.float32)
        cfips_one_hot = get_cfips_encoding(row['cfips'], cfips_index)
        # Min-max normalization
        features_tensor[ 0] = (features_tensor[ 0] - 24.5) / (97.6 - 24.5)
        features_tensor[ 1] = (features_tensor[ 1] / 48)
        features_tensor[ 2] = (features_tensor[ 2] / 54)
        features_tensor[ 3] = (features_tensor[ 3] / 17.4)
        features_tensor[ 4] = (features_tensor[ 4] - 17109) / (1586821 - 17109)

    else :
        features_tensor= torch.from_numpy(row[['pct_bb', 'pct_college', 'pct_foreign_born', 'pct_it_workers', 'median_hh_inc']].values)
        row_one_hots= [get_cfips_encoding(cfips,cfips_index) for cfips in row['cfips']]
        cfips_one_hot = torch.stack(row_one_hots)
        #Min-max normalization
        features_tensor[:,0] = (features_tensor[:,0]- 24.5)/ (97.6-24.5)
        features_tensor[:,1] = (features_tensor[:,1] /48)
        features_tensor[:,2] = (features_tensor[:,2]/ 54)
        features_tensor[:,3] = (features_tensor[:,3] / 17.4)
        features_tensor[:,4] = (features_tensor[:,4]- 17109)/(1586821-17109)


    ##Add one-hot encoding of cfips
    if single_row:
        features_tensor = torch.cat((cfips_one_hot, features_tensor))
    else:
        features_tensor = torch.cat((cfips_one_hot,features_tensor), 1)

    return features_tensor.float()






def get_cfips_index():
    """
    Return a dictionary with key=cfips and value=index for using a one-hot encoding
    """
    df= pd.read_csv(os.path.join(DATA_DIR, "census_interpolated.csv"))
    cfips = df['cfips'].unique()
    cfips.sort()
    #Sort cfips
    return {cfips[i]: i for i in range(len(cfips))}


def get_cfips_encoding(cfips,cfips_index):
    """
     return the base 2 encoding of cfips
    """

    #n_dims is the number of bits needed to represent the cfips

    bin_index=np.binary_repr(cfips_index[cfips],width=N_DIMS_COUNTY_ENCODING)
    enc = torch.tensor([int(x) for x in bin_index],dtype=torch.float32)
    return enc





# Network

In [4]:
import json
import logging
import os

import numpy as np
import torch
from torch import nn
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        #Sin/Cos positional encoding
        super().__init__()
        self.d_model = d_model
        self.max_len = max_len
        self.pe = torch.zeros(self.max_len, self.d_model)
        position = torch.arange(0, self.max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, self.d_model, 2).float() * (-np.log(10000.0) / self.d_model))
        self.pe[:, 0::2] = torch.sin(position * div_term)
        self.pe[:, 1::2] = torch.cos(position * div_term)
        self.pe = self.pe.unsqueeze(0).transpose(0, 1).squeeze(1)
        #COnvert to nn.Parameter
        self.pe = nn.Parameter(self.pe, requires_grad=False)



    def forward(self, x):
        #Add positional encoding to the input (Pay attention to the dimensions (the pe does not have the batch dimension))
        x = x + self.pe[:x.size(0), :]
        return x



class TransformerPredictor(nn.Module):

    def __init__(self,
                 emb_dim=32,
                 n_layers=3,
                 n_head=8,
                 max_seq_len=100,
                 dim_feedforward=128,
                 use_derivative=True,
                 use_census=USE_CENSUS,
                 n_dims_census_emb=2,
                 experiment_dir="my_model", reset=False, load_best=True):
        """
        @param features_encoder :
        @param input_dim:
        @param hidden_dim:
        @param ues_encoder:²
        @param experiment_dir:
        @param reset:
        @param load_best:
        """

        super(TransformerPredictor, self).__init__()
        self.variante_num=4
        self.emb_dim = emb_dim
        self.n_layers = n_layers
        self.n_head = n_head
        self.dim_feedforward = dim_feedforward
        self.use_census = use_census
        self.max_seq_len = max_seq_len
        self.census_features_encoder = None
        self.n_dims_census_emb = n_dims_census_emb
        self.input_dim =1
        self.use_derivative = use_derivative
        if self.use_derivative:
            self.input_dim += 2 # 2 for derivative

        if self.use_census:
            self.input_dim = self.input_dim  + self.n_dims_census_emb



        self.experiment_dir = experiment_dir
        self.model_name = os.path.basename(self.experiment_dir)
        self.reset = reset
        self.load_best = load_best
        self.setup_dirs()
        self.setup_network()


        if not reset: self.load_state()

    ##1. Defining network architecture
    def setup_network(self):
        """
        Initialize the network  architecture here
        @return:
        """
        #Input encoder from self.input_dim to self.emb_dim along with positional encoding
        if self.use_census:
            self.query_encoder = nn.Sequential(nn.Linear(N_DIMS_COUNTY_ENCODING + N_CENSUS_FEATURES, self.emb_dim))
            self.census_features_encoder= nn.Sequential(
                nn.Linear(N_CENSUS_FEATURES,self.n_dims_census_emb),
            )

        self.input_embedding = nn.Sequential(
            nn.Linear(self.input_dim, self.emb_dim),
        )

        ##Positional encoding
        self.positional_encoding = PositionalEncoding(self.emb_dim, max_len=self.max_seq_len)
        self.dropout = nn.Dropout(p=0.1)
        self.transformer_encoder  = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=self.emb_dim, nhead=self.n_head, dim_feedforward=self.dim_feedforward,
                                       dropout=0,
                                       batch_first=True),
            num_layers=self.n_layers
        )
        self.transformer_decoder = nn.TransformerDecoder(
            nn.TransformerDecoderLayer(d_model=self.emb_dim, nhead=self.n_head, dim_feedforward=self.dim_feedforward,
                                       dropout=0,
                                       batch_first=True),
            num_layers=self.n_layers,
            
        )

        if self.use_census:
            self.regressor = nn.Sequential(
                nn.Linear(2*self.emb_dim, 1024),
                nn.ReLU(),
                nn.Linear(1024,1)
            )

        else:
            self.regressor = nn.Sequential(
                nn.Linear(self.emb_dim, 1)
            )


    ##2. Model Saving/Loading
    def load_state(self, best=False):
        """
        Load model
        :param self:
        :return:
        """
        if best and os.path.exists(self.save_best_file):
            logging.info(f"Loading best model state : {self.save_file}")
            self.load_state_dict(torch.load(self.save_file, map_location=DEVICE))
            return

        if os.path.exists(self.save_file):
            logging.info(f"Loading model state : {self.save_file}")
            self.load_state_dict(torch.load(self.save_file, map_location=DEVICE))

    def save_state(self, best=False):
        if best:
            logging.info("Saving best model")
            torch.save(self.state_dict(), self.save_best_file)
        torch.save(self.state_dict(), self.save_file)

    ##3. Setupping directories for weights /logs ... etc
    def setup_dirs(self):
        """
        Checking and creating directories for weights storage
        @return:
        """
        self.save_file = os.path.join(self.experiment_dir, f"{self.model_name}.pt")
        self.save_best_file = os.path.join(self.experiment_dir, f"{self.model_name}_best.pt")
        if not os.path.exists(self.experiment_dir):
            os.makedirs(self.experiment_dir)

    #4. Forward call
    def forward(self, X_input):
        """
        +Forward call here.
        It a time series, so we need the full sequence output (strided by 1)
        @param X:
        @return:
        """
        #0. Preparing the input (Removing the target from the input)
        X = X_input[:, :-1, :] # Removing the target from the input (Only required when using census features)


        if self.use_derivative:
            d_left= torch.zeros((X.shape[0], X.shape[1], 1), device=DEVICE)
            d_left[:,1:, -1] = X[:, 1:, -1] - X[:, :-1, -1]

            d_right= torch.zeros((X.shape[0], X.shape[1], 1), device=DEVICE)
            d_right[:,:-1, -1] = X[:, 1:, -1] - X[:, :-1, -1]

            X = torch.cat((d_left, X, d_right), dim=-1) ## Adding the derivative to the input as a new feature


        if self.use_census:
            target = X[:, -1, :]  # Last element of the sequence is the target .
            query = self.query_encoder(target[:, :N_DIMS_COUNTY_ENCODING+N_CENSUS_FEATURES])

            enc_census = self.census_features_encoder(X[:, :, N_DIMS_COUNTY_ENCODING:N_CENSUS_FEATURES + N_DIMS_COUNTY_ENCODING])
            X = torch.cat((X[:, :, N_CENSUS_FEATURES + N_DIMS_COUNTY_ENCODING:], enc_census), dim=-1)


        #2. Apply the input encoder to the input
        X = self.input_embedding(X)

        #3. Add the positional encoding
        X = self.positional_encoding(X)


        #4. Add a query token to the input. Encoding of the cfips. (It is the same for all the sequence)
        if self.use_census:
            X = torch.cat((query.unsqueeze(1), X), dim=1)


        #4. Apply the transformer encoder to get the memory
        X = self.transformer_encoder(X)

        if self.use_census:
            query_enc= X[:, 0, :]
            X = X[:, 1:, :]#Removing the query token



        #.5 Apply the transformer decoder to get the next item in the sequence
        tgt_sequence = torch.zeros(X.shape[0], 1, X.shape[-1]).to(DEVICE)
        tgt_mask = torch.ones(1,1).to(DEVICE)

        #6. Then apply the transformer to get the next item in the sequence
        output = self.transformer_decoder(tgt_sequence, memory=X, tgt_mask= tgt_mask)#We want to predict the next item in the sequence

        #7.We only want the last output of the sequence
        output= output[:, -1, :]
        if self.use_census:
            output= torch.cat((output, query_enc), dim=-1)

        #3. Finally apply the regressor to get the predictions.
        output = self.regressor(output)

        return output







# Dataset

In [5]:
import os

import pandas as pd
import torch
from torch.utils.data import Dataset
class LstmDataset(Dataset):
    def __init__(self, type, seq_len, stride=1):
        self.type = type
        self.seq_len = seq_len
        self.stride = stride

        self.file = os.path.join(DATA_DIR, f"train_with_census_{'train' if type==DatasetType.TRAIN else 'val' if type==DatasetType.VALID  else 'test'}.csv")
        self.load_data()

    def init_transforms(self):
        """
        Initialize transforms.Might be different for each dataset type
        """

    def load_data(self):
        """
        Load data from the data items if necessary
        """
        self.data = pd.read_csv(self.file)
        self.data['first_day_of_month'] = pd.to_datetime(self.data['first_day_of_month'])

    def __len__(self):
        return len(self.data) // self.stride

    def __getitem__(self, item):
        """
        Retrieving seq_len data
        1. The county (CFIPS) should be the same
        2. And the difference between the date(first_day_of_month) should be at most 3 months
        """
        i = item * self.stride
        county = self.data.iloc[i]['cfips']

        rows_data=self.data.iloc[i:i+self.seq_len]

        #Check if the county is the same
        is_valid = len(rows_data)==self.seq_len and (rows_data['cfips'].unique()[0]==county) and (rows_data['first_day_of_month'].diff().max()<pd.Timedelta(days=90))

        if not is_valid:
            ##Find a random item that is valid
            return self.__getitem__(torch.randint(0, len(self), (1,)).item())

        #Taking seq_len rows and considering the following features
        #pct_bb,pct_college,pct_foreign_born,pct_it_workers,median_hh_inc, active,microbusiness_density
        features_tensor = torch.tensor(
            rows_data[['pct_bb', 'pct_college', 'pct_foreign_born', 'pct_it_workers', 'median_hh_inc','year', 'active',
                        'microbusiness_density']].values, dtype=torch.float32)

        #return the iterator
        return features_tensor

import json
import os
from unicodedata import category
import numpy as np
import pandas as pd
import torch
from PIL import Image
from matplotlib import pyplot as plt
from torch.utils.data import Dataset
from torchvision.transforms import transforms
from tqdm import tqdm
from enum import Enum
class CensusDataset(Dataset):
    def __init__(self, type):
        self.type=type
        self.load_data()
        pass

    def load_data(self):
        """
        Load data from the data items if necessary
        Returns:

        """
        self.data_file=os.path.join(DATA_DIR,f"train_with_census_ae_{'train' if self.type == DatasetType.TRAIN else 'test'}.csv")
        self.data = pd.read_csv(self.data_file)




    def __len__(self):
        return len(self.data)


    def __getitem__(self, idx):
        """
        pct_bb,pct_college,pct_foreign_born,pct_it_workers,median_hh_inc,year .
        Retrieve the following features from the dataset and return the corresponding tensor

        Returns:
        """
        row=self.data.iloc[idx]
        features_tensor=torch.tensor([row['pct_bb'],row['pct_college'],row['pct_foreign_born'],\
                                      row['pct_it_workers'],row['median_hh_inc'],row['year']],dtype=torch.float32)
        return features_tensor







# Loss and metrics

In [6]:
import torch
from torch import nn


class SmapeCriterion(nn.Module):
    """
    Class to compute the SMAPE loss.
    """
    def __init__(self):
        super(SmapeCriterion, self).__init__()

    def forward(self, y_pred, y_true):
        """
        @param y_pred: Predicted values
        @param y_true: True values
        @return: SMAPE loss
        """
        eps = 1e-8
        return 100*torch.mean(2 * torch.abs(y_pred - y_true) / (torch.abs(y_pred) + torch.abs(y_true) + eps))

    def __str__(self):
        return "SMAPE"

    def __repr__(self):
        return str(self)



# Dataset

In [7]:
import logging
import os
import random

import pandas as pd
import torch
from torch.utils.data import Dataset
from tqdm import tqdm

EVAL_START_DATE = "2022-05-01"
TEST_START_DATE =  "2022-11-01"

SEED=42
class MicroDensityDataset(Dataset):
    def __init__(self, type, seq_len, stride=1,use_census=USE_CENSUS):
        self.type = type
        self.seq_len = seq_len
        self.stride = stride if type == DatasetType.TRAIN else 1
        self.use_census = use_census
        self.load_data()
        self.prepare_sequences()


    def init_transforms(self):
        """
        Initialize transforms.Might be different for each dataset type
        """

    def load_data(self):
        """
        Load data from the data items if necessary
        """

        self.main_file = os.path.join(DATA_DIR, "train.csv")
        self.main_df = pd.read_csv(self.main_file)

        if self.type == DatasetType.TEST:
            self.test_df = pd.read_csv(TEST_FILE)
            self.test_df["microbusiness_density"] = [0 for _ in range(len(self.test_df))]
            self.test_df["county"] =["NAN" for _ in range(len(self.test_df))]
            self.test_df["state"] =["NAN" for _ in range(len(self.test_df))]

            self.main_df = pd.concat([self.main_df, self.test_df], ignore_index=True)

            self.test_df =self.test_df.sort_values(by=["cfips","first_day_of_month"])
            self.test_df = self.test_df.reset_index(drop=True)

        if self.use_census:
            #Merge the census features
            self.cfips_index=get_cfips_index()
            self.census_df = pd.read_csv(CENSUS_FILE)

            self.main_df=pd.merge(self.main_df,self.census_df,on=["cfips","first_day_of_month"],how="left")


        ##Group by cfips and sort by date
        self.main_df=self.main_df.sort_values(by=["cfips","first_day_of_month"])
        self.main_df["id"] =list(range(len(self.main_df)))



    def prepare_sequences(self):
        """
        Prepare the sequences for the LSTM:
        Build a list of (id(offset), id(seq_len+offset)) tuples
        """
        self.sequences=[]

        if self.type == DatasetType.TRAIN:
            ##Train data are dates before EVAL_START_DATE
            df=self.main_df[self.main_df['first_day_of_month']<EVAL_START_DATE]

            for i in tqdm(range(0, len(df)-self.seq_len, self.stride), desc="Preparing sequences of dataset of type train"):

                ##The cfips should be the same for the whole sequence(just check the first and last rows)
                if df.iloc[i]["cfips"] != df.iloc[i + self.seq_len - 1]["cfips"]:
                    continue

                if i + self.seq_len > len(df) :
                    break

                #Get the corresponding ids
                self.sequences.append((df.iloc[i]["id"], df.iloc[i]["id"]+ self.seq_len))



        else :


            if self.type == DatasetType.VALID:
                df = self.main_df[self.main_df['first_day_of_month'] >= EVAL_START_DATE]

            else:
                df = self.main_df[self.main_df['first_day_of_month'] >= TEST_START_DATE]


            for i in tqdm(range(0, len(df),self.stride), desc="Preparing sequences of dataset of type {}".format("eval" if self.type == DatasetType.VALID else "test")):
                ## In eval and test sequences, the step to predict should always be the last one of the sequence

                ##Find the offest of the start in the main df


                offset=df.iloc[i]["id"]

                offset = offset - self.seq_len  # The step to predict is the last one of the sequence


                ##check if the cfips is the same
                if self.main_df.iloc[offset]["cfips"] != self.main_df.iloc[offset + self.seq_len - 1]["cfips"]:
                    #Warning
                    print("Warning: cfips is not the same for the whole sequence . Offsets :",offset,offset + self.seq_len - 1)

                self.sequences.append((offset, offset + self.seq_len))






    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, item):
        """
        Retrieving seq_len data
        1. The county (CFIPS) should be the same
        2. And the difference between the date(first_day_of_month) should be at most 3 months
        """
        start,end=self.sequences[item]
        rows_data=self.main_df.iloc[start:end]


        #ensure unique cfips
        # assert len(rows_data["cfips"].unique())==1

        tensor = torch.tensor(rows_data[['microbusiness_density']].values,
                                       dtype=torch.float32)  # Not considering the census features

        #FEatures scaling


        if self.use_census:
            censur_features_tensor = extract_census_features(rows_data, cfips_index=self.cfips_index,single_row=False)
            tensor = torch.cat((censur_features_tensor,tensor), dim=1)



        return tensor



    def mix_with(self, other_dataset, size=0.8):
        """
        Combine two datasets exemple a train dataset and test dataset
        @param other_dataset:
        @param size:
        @return:
        """

        all_sequences= self.sequences + other_dataset.sequences
        random.shuffle(all_sequences)
        self.sequences=all_sequences[:int(len(all_sequences)*size)]
        other_dataset.sequences=all_sequences[int(len(all_sequences)*size):]
        logging.info("Combined dataset: {} sequences for train and {} sequences for test".format(len(self.sequences),len(other_dataset.sequences)))




# Trainer

In [8]:
import csv
import json
import logging
import os
import shutil
import warnings

import numpy as np
import pandas as pd
import torch
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
class TrainerTransformerPredictor:
    """
    Class to manage the full training pipeline
    """
    def __init__(self, network: TransformerPredictor,
                 criterion,
                 optimizer,
                 scheduler=None,
                 nb_epochs=10, batch_size=128, reset=False):
        """
        @param network:
        @param dataset_name:
        @param images_dirs:
        @param loss:
        @param optimizer:
        @param nb_epochs:
        @param nb_workers: Number of worker for the dataloader
        """
        self.network = network
        self.batch_size = batch_size
        self.loss_fn=criterion

        self.optimizer = optimizer
        self.scheduler =scheduler if scheduler else\
            torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, mode='min', factor=0.2, patience=10,min_lr=1e-5)

        self.nb_epochs = nb_epochs
        self.experiment_dir = self.network.experiment_dir
        self.model_info_file = os.path.join(self.experiment_dir, "model.json")
        self.model_info_best_file = os.path.join(self.experiment_dir, "model_best.json")

        if reset:
            if os.path.exists(self.experiment_dir):
                shutil.rmtree(self.experiment_dir)
        if not os.path.exists(self.experiment_dir):
            os.makedirs(self.experiment_dir)

        self.start_epoch = 0
        if not reset and os.path.exists(self.model_info_file):
            with open(self.model_info_file, "r") as f:
                self.start_epoch = json.load(f)["epoch"] + 1
                self.nb_epochs += self.start_epoch
                logging.info("Resuming from epoch {}".format(self.start_epoch))


    def save_model_info(self, infos, best=False):
        json.dump(infos, open(self.model_info_file, 'w'),indent=4)
        if best: json.dump(infos, open(self.model_info_best_file, 'w'),indent=4)

    def fit(self,train_dataloader,val_dataloader):
        logging.info("Launch training on {}".format(DEVICE))
        if self.network.use_census:
            logging.info("Using encoder census data")

        self.summary_writer = SummaryWriter(log_dir=self.experiment_dir)
        itr = self.start_epoch * len(train_dataloader) * self.batch_size  ##Global counter for steps

        #Save model graph
        # self.summary_writer.add_graph(self.network, next(iter(train_dataloader)).to(DEVICE)[:,:-1,:])

        self.best_val_loss = 1e20  # infinity
        if os.path.exists(self.model_info_file):
            with open(self.model_info_file, "r") as f:
                model_info = json.load(f)
                lr=model_info["lr"]
                logging.info(f"Setting lr to {lr}")
                for g in self.optimizer.param_groups:
                    g['lr'] = lr

        if os.path.exists(self.model_info_best_file):
            with open(self.model_info_best_file, "r") as f:
                best_model_info = json.load(f)
                self.best_val_loss = best_model_info["val_loss"]


        for epoch in range(self.start_epoch, self.nb_epochs):  # Training loop
            self.network.train()
            """"
            0. Initialize loss and other metrics
            """
            running_loss=Averager()
            pbar = tqdm(train_dataloader, desc=f"Epoch {epoch + 1}/{self.nb_epochs}")
            for _, batch in enumerate(pbar):
                """
                Training lopp
                """
                self.optimizer.zero_grad()
                itr += self.batch_size
                """
                1.Forward pass
                """
                batch = batch.to(DEVICE)

                y_pred = self.network(batch)
                ## The output is the values of the density for each time step

                """
                2.Loss computation and other metrics
                """
                # The density is the last item of the batch
                y_true = batch[:,:,-1].to(DEVICE)
                loss = self.loss_fn(y_pred, y_true[:, -1:])

                """
                3.Optimizing
                """
                loss.backward()
                self.optimizer.step()
                running_loss.send(loss.cpu().item())
                pbar.set_postfix(current_loss=loss.cpu().item(), current_mean_loss=running_loss.value)

                """
                4.Writing logs and tensorboard data, loss and other metrics
                """
                self.summary_writer.add_scalar("Train/loss", loss.item(), itr)



            #If step lr scheduler
            if isinstance(self.scheduler,torch.optim.lr_scheduler.StepLR):
                self.scheduler.step()
            else:
                self.scheduler.step(epoch_val_loss.value)
            epoch_val_loss =self.eval(val_dataloader,epoch)

            infos = {
                "epoch": epoch,
                "train_loss":running_loss.value,
                "val_loss":epoch_val_loss.value,
                "lr": self.optimizer.param_groups[0]['lr'],
                "input_dim": self.network.input_dim,
                "emb_dim": self.network.emb_dim,
                "dim_feedforward": self.network.dim_feedforward,
                "n_head": self.network.n_head,
                "n_layers": self.network.n_layers,
                "seq_len": train_dataloader.dataset.seq_len,
                "batch_size": train_dataloader.batch_size,
                "stride": train_dataloader.dataset.stride,
                "use_census": self.network.use_census,
                "variante": self.network.variante_num,

            }

            logging.info("Epoch {} - Train loss: {:.4f} - Val loss: {:.4f}".format(epoch, running_loss.value, epoch_val_loss.value))

            if epoch_val_loss.value < self.best_val_loss:
                self.best_val_loss = epoch_val_loss.value
                best = True
            else:
                best = False

            self.network.save_state(best=best)
            self.save_model_info(infos, best=best)


            self.summary_writer.add_scalar("Epoch_train/loss", running_loss.value, epoch)
            self.summary_writer.add_scalar("Epoch_val/loss", epoch_val_loss.value, epoch)



    def eval(self, val_dataloader,epoch):
        """
        Compute loss and metrics on a validation dataloader
        @return:
        """
        with torch.no_grad():
            self.network.eval()
            running_loss=Averager()
            pbar = tqdm(val_dataloader, desc=f"Validation Epoch {epoch + 1}/{self.nb_epochs}")
            for _, batch in enumerate(pbar):

                """
                Training lopp
                """
                """
                1.Forward pass
                """
                batch=batch.to(DEVICE)
                y_pred = self.network(batch)
                """
                2.Loss computation and other metrics
                """
                y_true = batch[:,:,-1]


                loss = self.loss_fn(y_pred, y_true[:, -1:])

                running_loss.send(loss.item())

                pbar.set_postfix(current_loss=loss.item(), current_mean_loss=running_loss.value)


        return running_loss



    def run_test(self, test_dataloader):
        """
        Compute loss and metrics on a validation dataloader
        @return:
        """
        assert test_dataloader.batch_size == 1, "Batch size must be 1 for test"
        predictions = []
        row_ids = []
        with torch.no_grad():
            self.network.eval()
            for i, input in enumerate(tqdm(test_dataloader," Running tests for submission")):
                input = input.to(DEVICE)
                y_pred = self.network(input.to(DEVICE)).cpu().squeeze().item()

                # Denormalize. MEAN_MB, STD_MB (if noramlized)
                # y_pred = y_pred * STD_MB + MEAN_MB
                """
                2.Loss computation and other metrics
                """
                predictions.append(y_pred)

                ##Update all microbusiness_den isty column
                row_id=test_dataloader.dataset.test_df.loc[i,"row_id"]
                row_ids.append(row_id)

                test_dataloader.dataset.main_df.loc[test_dataloader.dataset.main_df["row_id"]==row_id,"microbusiness_density"]=y_pred


        #Merge predictions
        predictions=np.array(predictions)


        #Update all microbusiness_denisty column

        pred_test_df = pd.DataFrame(
            {
                "row_id":row_ids,
                 "microbusiness_density":predictions}

                                )
        pred_test_df.to_csv(os.path.join(self.experiment_dir,"submission.csv"),index=False)

        return pred_test_df



# Runner

In [None]:
from dataclasses import dataclass
import argparse
import logging
import os
import pickle
import torch.utils.data

@dataclass
class Arguments:
    reset: bool = False
    learning_rate: float = 0.001
    nb_epochs: int = 1000
    model_name: str = None
    num_workers: int = 2
    batch_size: int = 256
    log_level: str = "INFO"
    autorun_tb: bool = True
    use_census: bool = True
    use_derivative: bool = True
    seq_len: int = 20
    seq_stride: int = 1
    emb_dim: int = 64
    n_layers: int =8
    n_head: int = 8
    dim_feedforward: int = 256

def cli():
    return Arguments()

def main(args):

    #Format the model name
    if args.model_name is None:
        model_name=f"trf_{'ae_' if args.use_census else ''}{'dv_' if args.use_derivative else ''}ed.{args.emb_dim}_nl.{args.n_layers}_nh.{args.n_head}_df.{args.dim_feedforward}_sl.{args.seq_len}_ss.{args.seq_stride}_lr.{args.learning_rate}_bs.{args.batch_size}"
    else :
        model_name=args.model_name



    experiment_dir = os.path.join(EXPERIMENTS_DIR, model_name)

    network =TransformerPredictor(
                                    experiment_dir=experiment_dir,
                                    emb_dim=args.emb_dim,
                                      n_layers=args.n_layers,
                                      n_head=args.n_head,
                                      dim_feedforward=args.dim_feedforward,
                                      use_census=args.use_census,
                                    max_seq_len=args.seq_len-1,
                                    reset=args.reset
                ).to(DEVICE)
    
    #Using multiple gpus
    
    #Adam optimizer
    optimizer = torch.optim.Adam(network.parameters(), lr=args.learning_rate)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=150, gamma=0.5)#Reduce the learning rate by half every 150 epochs

    criterion= SmapeCriterion().to(DEVICE)


    logging.info("Training : "+model_name)
    trainer = TrainerTransformerPredictor(network,
                      criterion,
                      optimizer=optimizer,
                      scheduler=scheduler,
                      nb_epochs= args.nb_epochs,
                      batch_size=args.batch_size,
                      reset=args.reset,
                      )

    # Save  the dataset according to type, seq_len_stride and use_census: using pickle

    if not os.path.exists(os.path.join(ROOT_DIR,"dataset","pickle")):
        os.makedirs(os.path.join(ROOT_DIR,"dataset","pickle"))

    datasets_pickle_path = os.path.join(ROOT_DIR,"dataset","pickle",f"all_dataset_{args.seq_len}_{args.seq_stride}_{args.use_census}.pickle")


    if not os.path.exists(datasets_pickle_path):
        train_dataset = MicroDensityDataset(type=DatasetType.TRAIN, seq_len=args.seq_len, stride=args.seq_stride,
                                            use_census=args.use_census)
        val_dataset = MicroDensityDataset(type=DatasetType.VALID, seq_len=args.seq_len, stride=args.seq_stride,
                                          use_census=args.use_census)

        train_dataset.mix_with(val_dataset,size=0.8) #Mix train and val dataset to avoid disparity between the two in terms of dates distribution

        test_dataset = MicroDensityDataset(type=DatasetType.TEST, seq_len=args.seq_len, stride=args.seq_stride,
                                           use_census=args.use_census)

        with open(datasets_pickle_path,"wb") as f:
            logging.info(f"Saving datasets to {datasets_pickle_path}")
            pickle.dump((train_dataset,val_dataset,test_dataset),f)
    else:
        with open(datasets_pickle_path,"rb") as f:
            logging.info(f"Loading datasets  from {datasets_pickle_path}")
            train_dataset,val_dataset,test_dataset = pickle.load(f)



    logging.info(f"Nb sequences : Train {len(train_dataset)} - Val {len(val_dataset)} - Test {len(test_dataset)}")

    train_dataloader=torch.utils.data.DataLoader(train_dataset,batch_size=args.batch_size,num_workers=args.num_workers,shuffle=True,drop_last=False,persistent_workers=True)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size,num_workers=0,drop_last=False)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1,num_workers=0,drop_last=False,shuffle=False)

    ##Train
    trainer.fit(train_dataloader,val_dataloader)

    ##Load best model
    trainer.network.load_state(best=True)
    trainer.run_test(test_dataloader=test_dataloader)



args = cli()
setup_logger(args)
main(args)


2023-02-12 15:45:12,272 - root - INFO - Training : trf_ae_dv_ed.64_nl.8_nh.8_df.256_sl.20_ss.1_lr.0.001_bs.256


Preparing sequences of dataset of type train: 100%|██████████| 103435/103435 [00:36<00:00, 2819.21it/s]
Preparing sequences of dataset of type eval: 100%|██████████| 18810/18810 [00:07<00:00, 2441.25it/s]

2023-02-12 15:45:58,403 - root - INFO - Combined dataset: 50159 sequences for train and 12540 sequences for test



Preparing sequences of dataset of type test: 100%|██████████| 25080/25080 [00:09<00:00, 2636.44it/s]

2023-02-12 15:46:08,581 - root - INFO - Saving datasets to /kaggle/dataset/pickle/all_dataset_20_1_True.pickle





2023-02-12 15:46:09,391 - root - INFO - Nb sequences : Train 50159 - Val 12540 - Test 25080
2023-02-12 15:46:09,392 - root - INFO - Launch training on cuda
2023-02-12 15:46:09,393 - root - INFO - Using encoder census data


Epoch 1/1000: 100%|██████████| 196/196 [01:08<00:00,  2.84it/s, current_loss=5.71, current_mean_loss=16.9]
Validation Epoch 1/1000: 100%|██████████| 49/49 [00:19<00:00,  2.48it/s, current_loss=8.46, current_mean_loss=9.26]

2023-02-12 15:47:42,247 - root - INFO - Epoch 0 - Train loss: 16.8900 - Val loss: 9.2609
2023-02-12 15:47:42,251 - root - INFO - Saving best model



Epoch 2/1000: 100%|██████████| 196/196 [01:06<00:00,  2.96it/s, current_loss=4.25, current_mean_loss=6.41]
Validation Epoch 2/1000: 100%|██████████| 49/49 [00:19<00:00,  2.51it/s, current_loss=4.78, current_mean_loss=5.41]

2023-02-12 15:49:08,007 - root - INFO - Epoch 1 - Train loss: 6.4054 - Val loss: 5.4110
2023-02-12 15:49:08,008 - root - INFO - Saving best model



Epoch 3/1000: 100%|██████████| 196/196 [01:06<00:00,  2.97it/s, current_loss=3.3, current_mean_loss=4.98] 
Validation Epoch 3/1000: 100%|██████████| 49/49 [00:19<00:00,  2.52it/s, current_loss=3.81, current_mean_loss=4.21]

2023-02-12 15:50:33,562 - root - INFO - Epoch 2 - Train loss: 4.9770 - Val loss: 4.2076
2023-02-12 15:50:33,564 - root - INFO - Saving best model



Epoch 4/1000: 100%|██████████| 196/196 [01:06<00:00,  2.95it/s, current_loss=7.36, current_mean_loss=4.04]
Validation Epoch 4/1000: 100%|██████████| 49/49 [00:19<00:00,  2.50it/s, current_loss=4.32, current_mean_loss=4.23]

2023-02-12 15:51:59,690 - root - INFO - Epoch 3 - Train loss: 4.0414 - Val loss: 4.2336



Epoch 5/1000: 100%|██████████| 196/196 [01:06<00:00,  2.97it/s, current_loss=4.59, current_mean_loss=3.97]
Validation Epoch 5/1000: 100%|██████████| 49/49 [00:19<00:00,  2.50it/s, current_loss=3.57, current_mean_loss=3.81]

2023-02-12 15:53:25,435 - root - INFO - Epoch 4 - Train loss: 3.9659 - Val loss: 3.8146
2023-02-12 15:53:25,436 - root - INFO - Saving best model



Epoch 6/1000: 100%|██████████| 196/196 [01:06<00:00,  2.97it/s, current_loss=4.25, current_mean_loss=3.49]
Validation Epoch 6/1000: 100%|██████████| 49/49 [00:19<00:00,  2.52it/s, current_loss=2.62, current_mean_loss=3]   

2023-02-12 15:54:50,994 - root - INFO - Epoch 5 - Train loss: 3.4893 - Val loss: 2.9974
2023-02-12 15:54:50,997 - root - INFO - Saving best model



Epoch 7/1000: 100%|██████████| 196/196 [01:05<00:00,  2.99it/s, current_loss=2.49, current_mean_loss=3.42]
Validation Epoch 7/1000: 100%|██████████| 49/49 [00:19<00:00,  2.54it/s, current_loss=3.89, current_mean_loss=3.87]

2023-02-12 15:56:16,031 - root - INFO - Epoch 6 - Train loss: 3.4230 - Val loss: 3.8698



Epoch 8/1000: 100%|██████████| 196/196 [01:05<00:00,  2.98it/s, current_loss=3.47, current_mean_loss=3.57]
Validation Epoch 8/1000: 100%|██████████| 49/49 [00:19<00:00,  2.48it/s, current_loss=3.02, current_mean_loss=3.44]

2023-02-12 15:57:41,669 - root - INFO - Epoch 7 - Train loss: 3.5735 - Val loss: 3.4387



Epoch 9/1000: 100%|██████████| 196/196 [01:06<00:00,  2.96it/s, current_loss=4.61, current_mean_loss=2.99]
Validation Epoch 9/1000: 100%|██████████| 49/49 [00:19<00:00,  2.52it/s, current_loss=3.23, current_mean_loss=4.36]

2023-02-12 15:59:07,311 - root - INFO - Epoch 8 - Train loss: 2.9936 - Val loss: 4.3565



Epoch 10/1000: 100%|██████████| 196/196 [01:05<00:00,  2.98it/s, current_loss=3.12, current_mean_loss=3.72]
Validation Epoch 10/1000: 100%|██████████| 49/49 [00:18<00:00,  2.60it/s, current_loss=1.82, current_mean_loss=1.83]

2023-02-12 16:00:32,089 - root - INFO - Epoch 9 - Train loss: 3.7212 - Val loss: 1.8338
2023-02-12 16:00:32,090 - root - INFO - Saving best model



Epoch 11/1000: 100%|██████████| 196/196 [01:06<00:00,  2.95it/s, current_loss=1.62, current_mean_loss=2.92]
Validation Epoch 11/1000: 100%|██████████| 49/49 [00:19<00:00,  2.52it/s, current_loss=1.59, current_mean_loss=1.73]

2023-02-12 16:01:58,149 - root - INFO - Epoch 10 - Train loss: 2.9166 - Val loss: 1.7316
2023-02-12 16:01:58,151 - root - INFO - Saving best model



Epoch 12/1000: 100%|██████████| 196/196 [01:05<00:00,  2.97it/s, current_loss=2.69, current_mean_loss=2.93]
Validation Epoch 12/1000: 100%|██████████| 49/49 [00:19<00:00,  2.50it/s, current_loss=3.31, current_mean_loss=3.74]

2023-02-12 16:03:23,837 - root - INFO - Epoch 11 - Train loss: 2.9264 - Val loss: 3.7361



Epoch 13/1000: 100%|██████████| 196/196 [01:05<00:00,  2.99it/s, current_loss=2, current_mean_loss=2.73]   
Validation Epoch 13/1000: 100%|██████████| 49/49 [00:19<00:00,  2.51it/s, current_loss=1.78, current_mean_loss=1.82]

2023-02-12 16:04:48,961 - root - INFO - Epoch 12 - Train loss: 2.7328 - Val loss: 1.8189



Epoch 14/1000: 100%|██████████| 196/196 [01:05<00:00,  3.00it/s, current_loss=4.4, current_mean_loss=2.8]  
Validation Epoch 14/1000: 100%|██████████| 49/49 [00:19<00:00,  2.51it/s, current_loss=4.64, current_mean_loss=4.55]

2023-02-12 16:06:13,903 - root - INFO - Epoch 13 - Train loss: 2.7973 - Val loss: 4.5478



Epoch 15/1000: 100%|██████████| 196/196 [01:06<00:00,  2.96it/s, current_loss=2.36, current_mean_loss=3.08]
Validation Epoch 15/1000: 100%|██████████| 49/49 [00:19<00:00,  2.52it/s, current_loss=4.13, current_mean_loss=4.26]

2023-02-12 16:07:39,719 - root - INFO - Epoch 14 - Train loss: 3.0818 - Val loss: 4.2640



Epoch 16/1000: 100%|██████████| 196/196 [01:05<00:00,  2.99it/s, current_loss=3.33, current_mean_loss=2.53]
Validation Epoch 16/1000: 100%|██████████| 49/49 [00:19<00:00,  2.51it/s, current_loss=5.11, current_mean_loss=4.88]

2023-02-12 16:09:04,781 - root - INFO - Epoch 15 - Train loss: 2.5343 - Val loss: 4.8847



Epoch 17/1000: 100%|██████████| 196/196 [01:05<00:00,  2.98it/s, current_loss=2.08, current_mean_loss=2.75]
Validation Epoch 17/1000: 100%|██████████| 49/49 [00:19<00:00,  2.49it/s, current_loss=2.41, current_mean_loss=2.56]

2023-02-12 16:10:30,290 - root - INFO - Epoch 16 - Train loss: 2.7470 - Val loss: 2.5624



Epoch 18/1000: 100%|██████████| 196/196 [01:05<00:00,  2.99it/s, current_loss=1.88, current_mean_loss=2.52]
Validation Epoch 18/1000: 100%|██████████| 49/49 [00:19<00:00,  2.50it/s, current_loss=1.91, current_mean_loss=2.02]

2023-02-12 16:11:55,551 - root - INFO - Epoch 17 - Train loss: 2.5242 - Val loss: 2.0226



Epoch 19/1000: 100%|██████████| 196/196 [01:05<00:00,  2.99it/s, current_loss=1.71, current_mean_loss=2.21]
Validation Epoch 19/1000: 100%|██████████| 49/49 [00:19<00:00,  2.56it/s, current_loss=2.06, current_mean_loss=2.08]

2023-02-12 16:13:20,259 - root - INFO - Epoch 18 - Train loss: 2.2087 - Val loss: 2.0816



Epoch 20/1000: 100%|██████████| 196/196 [01:05<00:00,  3.01it/s, current_loss=2.14, current_mean_loss=2.38]
Validation Epoch 20/1000: 100%|██████████| 49/49 [00:19<00:00,  2.55it/s, current_loss=2.24, current_mean_loss=2.37]

2023-02-12 16:14:44,550 - root - INFO - Epoch 19 - Train loss: 2.3808 - Val loss: 2.3725



Epoch 21/1000: 100%|██████████| 196/196 [01:04<00:00,  3.03it/s, current_loss=2.21, current_mean_loss=2.57]
Validation Epoch 21/1000: 100%|██████████| 49/49 [00:19<00:00,  2.51it/s, current_loss=1.84, current_mean_loss=1.99]

2023-02-12 16:16:08,777 - root - INFO - Epoch 20 - Train loss: 2.5687 - Val loss: 1.9851



Epoch 22/1000: 100%|██████████| 196/196 [01:03<00:00,  3.06it/s, current_loss=1.55, current_mean_loss=2.07]
Validation Epoch 22/1000: 100%|██████████| 49/49 [00:18<00:00,  2.62it/s, current_loss=1.82, current_mean_loss=1.93]

2023-02-12 16:17:31,501 - root - INFO - Epoch 21 - Train loss: 2.0726 - Val loss: 1.9321



Epoch 23/1000: 100%|██████████| 196/196 [01:04<00:00,  3.05it/s, current_loss=5.45, current_mean_loss=2.25]
Validation Epoch 23/1000: 100%|██████████| 49/49 [00:19<00:00,  2.54it/s, current_loss=2.96, current_mean_loss=3.01]

2023-02-12 16:18:55,074 - root - INFO - Epoch 22 - Train loss: 2.2474 - Val loss: 3.0097



Epoch 24/1000: 100%|██████████| 196/196 [01:03<00:00,  3.09it/s, current_loss=2.07, current_mean_loss=2.28]
Validation Epoch 24/1000: 100%|██████████| 49/49 [00:19<00:00,  2.54it/s, current_loss=2.06, current_mean_loss=2.18]

2023-02-12 16:20:17,945 - root - INFO - Epoch 23 - Train loss: 2.2799 - Val loss: 2.1846



Epoch 25/1000: 100%|██████████| 196/196 [01:04<00:00,  3.04it/s, current_loss=1.63, current_mean_loss=2.24]
Validation Epoch 25/1000: 100%|██████████| 49/49 [00:18<00:00,  2.61it/s, current_loss=1.79, current_mean_loss=1.87]

2023-02-12 16:21:41,199 - root - INFO - Epoch 24 - Train loss: 2.2447 - Val loss: 1.8703



Epoch 26/1000: 100%|██████████| 196/196 [01:04<00:00,  3.06it/s, current_loss=2.26, current_mean_loss=2.09]
Validation Epoch 26/1000: 100%|██████████| 49/49 [00:19<00:00,  2.52it/s, current_loss=2.19, current_mean_loss=2.22]

2023-02-12 16:23:04,725 - root - INFO - Epoch 25 - Train loss: 2.0871 - Val loss: 2.2194



Epoch 27/1000: 100%|██████████| 196/196 [01:04<00:00,  3.04it/s, current_loss=1.99, current_mean_loss=2.37]
Validation Epoch 27/1000: 100%|██████████| 49/49 [00:19<00:00,  2.56it/s, current_loss=1.84, current_mean_loss=2.05]

2023-02-12 16:24:28,357 - root - INFO - Epoch 26 - Train loss: 2.3735 - Val loss: 2.0483



Epoch 28/1000: 100%|██████████| 196/196 [01:05<00:00,  3.00it/s, current_loss=2.16, current_mean_loss=2.13]
Validation Epoch 28/1000: 100%|██████████| 49/49 [00:18<00:00,  2.60it/s, current_loss=1.91, current_mean_loss=1.86]

2023-02-12 16:25:52,577 - root - INFO - Epoch 27 - Train loss: 2.1327 - Val loss: 1.8642



Epoch 29/1000: 100%|██████████| 196/196 [01:04<00:00,  3.04it/s, current_loss=2.87, current_mean_loss=2.09]
Validation Epoch 29/1000: 100%|██████████| 49/49 [00:19<00:00,  2.55it/s, current_loss=2.27, current_mean_loss=2.41]

2023-02-12 16:27:16,437 - root - INFO - Epoch 28 - Train loss: 2.0888 - Val loss: 2.4128



Epoch 30/1000: 100%|██████████| 196/196 [01:04<00:00,  3.04it/s, current_loss=2.62, current_mean_loss=2.15]
Validation Epoch 30/1000: 100%|██████████| 49/49 [00:18<00:00,  2.62it/s, current_loss=3.45, current_mean_loss=3.28]

2023-02-12 16:28:39,664 - root - INFO - Epoch 29 - Train loss: 2.1488 - Val loss: 3.2847



Epoch 31/1000: 100%|██████████| 196/196 [01:04<00:00,  3.05it/s, current_loss=1.79, current_mean_loss=2]   
Validation Epoch 31/1000: 100%|██████████| 49/49 [00:19<00:00,  2.50it/s, current_loss=2.04, current_mean_loss=2.07]

2023-02-12 16:30:03,643 - root - INFO - Epoch 30 - Train loss: 1.9973 - Val loss: 2.0734



Epoch 32/1000: 100%|██████████| 196/196 [01:04<00:00,  3.03it/s, current_loss=2.21, current_mean_loss=2.16]
Validation Epoch 32/1000: 100%|██████████| 49/49 [00:19<00:00,  2.56it/s, current_loss=2.07, current_mean_loss=2.25]

2023-02-12 16:31:27,599 - root - INFO - Epoch 31 - Train loss: 2.1626 - Val loss: 2.2509



Epoch 33/1000: 100%|██████████| 196/196 [01:04<00:00,  3.04it/s, current_loss=2.51, current_mean_loss=2.41]
Validation Epoch 33/1000: 100%|██████████| 49/49 [00:18<00:00,  2.64it/s, current_loss=2.49, current_mean_loss=2.41]

2023-02-12 16:32:50,725 - root - INFO - Epoch 32 - Train loss: 2.4067 - Val loss: 2.4108



Epoch 34/1000: 100%|██████████| 196/196 [01:04<00:00,  3.04it/s, current_loss=2.25, current_mean_loss=2.1] 
Validation Epoch 34/1000: 100%|██████████| 49/49 [00:19<00:00,  2.54it/s, current_loss=2.64, current_mean_loss=2.51]

2023-02-12 16:34:14,620 - root - INFO - Epoch 33 - Train loss: 2.0979 - Val loss: 2.5148



Epoch 35/1000: 100%|██████████| 196/196 [01:05<00:00,  3.01it/s, current_loss=1.82, current_mean_loss=1.99]
Validation Epoch 35/1000: 100%|██████████| 49/49 [00:19<00:00,  2.55it/s, current_loss=1.84, current_mean_loss=1.9] 

2023-02-12 16:35:38,938 - root - INFO - Epoch 34 - Train loss: 1.9867 - Val loss: 1.8962



Epoch 36/1000: 100%|██████████| 196/196 [01:04<00:00,  3.02it/s, current_loss=1.74, current_mean_loss=2.11]
Validation Epoch 36/1000: 100%|██████████| 49/49 [00:18<00:00,  2.61it/s, current_loss=1.65, current_mean_loss=1.73]

2023-02-12 16:37:02,731 - root - INFO - Epoch 35 - Train loss: 2.1106 - Val loss: 1.7284
2023-02-12 16:37:02,732 - root - INFO - Saving best model



Epoch 37/1000: 100%|██████████| 196/196 [01:04<00:00,  3.02it/s, current_loss=1.84, current_mean_loss=1.98]
Validation Epoch 37/1000: 100%|██████████| 49/49 [00:19<00:00,  2.56it/s, current_loss=1.96, current_mean_loss=1.98]

2023-02-12 16:38:26,912 - root - INFO - Epoch 36 - Train loss: 1.9753 - Val loss: 1.9794



Epoch 38/1000: 100%|██████████| 196/196 [01:05<00:00,  2.99it/s, current_loss=1.67, current_mean_loss=2.22]
Validation Epoch 38/1000: 100%|██████████| 49/49 [00:19<00:00,  2.49it/s, current_loss=2.1, current_mean_loss=2.26] 

2023-02-12 16:39:52,299 - root - INFO - Epoch 37 - Train loss: 2.2171 - Val loss: 2.2589



Epoch 39/1000: 100%|██████████| 196/196 [01:05<00:00,  3.01it/s, current_loss=1.56, current_mean_loss=2.36]
Validation Epoch 39/1000: 100%|██████████| 49/49 [00:18<00:00,  2.59it/s, current_loss=1.63, current_mean_loss=1.69]

2023-02-12 16:41:16,351 - root - INFO - Epoch 38 - Train loss: 2.3648 - Val loss: 1.6864
2023-02-12 16:41:16,352 - root - INFO - Saving best model



Epoch 40/1000: 100%|██████████| 196/196 [01:04<00:00,  3.04it/s, current_loss=1.74, current_mean_loss=1.87]
Validation Epoch 40/1000: 100%|██████████| 49/49 [00:19<00:00,  2.53it/s, current_loss=2.31, current_mean_loss=2.32]

2023-02-12 16:42:40,357 - root - INFO - Epoch 39 - Train loss: 1.8742 - Val loss: 2.3229



Epoch 41/1000: 100%|██████████| 196/196 [01:04<00:00,  3.03it/s, current_loss=1.77, current_mean_loss=2.04]
Validation Epoch 41/1000: 100%|██████████| 49/49 [00:19<00:00,  2.48it/s, current_loss=1.78, current_mean_loss=1.92]

2023-02-12 16:44:04,809 - root - INFO - Epoch 40 - Train loss: 2.0400 - Val loss: 1.9194



Epoch 42/1000: 100%|██████████| 196/196 [01:04<00:00,  3.02it/s, current_loss=2.31, current_mean_loss=1.96]
Validation Epoch 42/1000: 100%|██████████| 49/49 [00:18<00:00,  2.63it/s, current_loss=1.97, current_mean_loss=2.13]

2023-02-12 16:45:28,515 - root - INFO - Epoch 41 - Train loss: 1.9591 - Val loss: 2.1341



Epoch 43/1000: 100%|██████████| 196/196 [01:04<00:00,  3.03it/s, current_loss=1.6, current_mean_loss=2.01] 
Validation Epoch 43/1000: 100%|██████████| 49/49 [00:19<00:00,  2.51it/s, current_loss=2.21, current_mean_loss=2.2] 

2023-02-12 16:46:52,761 - root - INFO - Epoch 42 - Train loss: 2.0104 - Val loss: 2.2005



Epoch 44/1000: 100%|██████████| 196/196 [01:04<00:00,  3.03it/s, current_loss=1.83, current_mean_loss=1.97]
Validation Epoch 44/1000: 100%|██████████| 49/49 [00:18<00:00,  2.58it/s, current_loss=2.62, current_mean_loss=2.66]

2023-02-12 16:48:16,388 - root - INFO - Epoch 43 - Train loss: 1.9664 - Val loss: 2.6590



Epoch 45/1000: 100%|██████████| 196/196 [01:04<00:00,  3.02it/s, current_loss=1.88, current_mean_loss=1.96]
Validation Epoch 45/1000: 100%|██████████| 49/49 [00:19<00:00,  2.52it/s, current_loss=1.97, current_mean_loss=1.97]

2023-02-12 16:49:40,752 - root - INFO - Epoch 44 - Train loss: 1.9571 - Val loss: 1.9682



Epoch 46/1000: 100%|██████████| 196/196 [01:04<00:00,  3.05it/s, current_loss=1.38, current_mean_loss=1.99]
Validation Epoch 46/1000: 100%|██████████| 49/49 [00:19<00:00,  2.54it/s, current_loss=2.2, current_mean_loss=2.22] 

2023-02-12 16:51:04,324 - root - INFO - Epoch 45 - Train loss: 1.9917 - Val loss: 2.2180



Epoch 47/1000: 100%|██████████| 196/196 [01:04<00:00,  3.03it/s, current_loss=2.87, current_mean_loss=1.9] 
Validation Epoch 47/1000: 100%|██████████| 49/49 [00:19<00:00,  2.55it/s, current_loss=2.15, current_mean_loss=2.19]

2023-02-12 16:52:28,320 - root - INFO - Epoch 46 - Train loss: 1.9045 - Val loss: 2.1945



Epoch 48/1000: 100%|██████████| 196/196 [01:08<00:00,  2.87it/s, current_loss=1.45, current_mean_loss=1.89]
Validation Epoch 48/1000: 100%|██████████| 49/49 [00:18<00:00,  2.61it/s, current_loss=1.65, current_mean_loss=1.68]

2023-02-12 16:53:55,394 - root - INFO - Epoch 47 - Train loss: 1.8899 - Val loss: 1.6791
2023-02-12 16:53:55,396 - root - INFO - Saving best model



Epoch 49/1000: 100%|██████████| 196/196 [01:05<00:00,  2.99it/s, current_loss=1.45, current_mean_loss=1.88]
Validation Epoch 49/1000: 100%|██████████| 49/49 [00:19<00:00,  2.50it/s, current_loss=2.09, current_mean_loss=2.24]

2023-02-12 16:55:20,532 - root - INFO - Epoch 48 - Train loss: 1.8825 - Val loss: 2.2421



Epoch 50/1000: 100%|██████████| 196/196 [01:04<00:00,  3.02it/s, current_loss=1.48, current_mean_loss=1.79]
Validation Epoch 50/1000: 100%|██████████| 49/49 [00:18<00:00,  2.63it/s, current_loss=1.58, current_mean_loss=1.65]

2023-02-12 16:56:44,165 - root - INFO - Epoch 49 - Train loss: 1.7937 - Val loss: 1.6508
2023-02-12 16:56:44,166 - root - INFO - Saving best model



Epoch 51/1000: 100%|██████████| 196/196 [01:04<00:00,  3.04it/s, current_loss=2.64, current_mean_loss=1.86]
Validation Epoch 51/1000: 100%|██████████| 49/49 [00:19<00:00,  2.58it/s, current_loss=2.66, current_mean_loss=2.69]

2023-02-12 16:58:07,772 - root - INFO - Epoch 50 - Train loss: 1.8560 - Val loss: 2.6933



Epoch 52/1000: 100%|██████████| 196/196 [01:04<00:00,  3.04it/s, current_loss=1.89, current_mean_loss=2.13]
Validation Epoch 52/1000: 100%|██████████| 49/49 [00:19<00:00,  2.56it/s, current_loss=1.74, current_mean_loss=1.97]

2023-02-12 16:59:31,370 - root - INFO - Epoch 51 - Train loss: 2.1336 - Val loss: 1.9689



Epoch 53/1000: 100%|██████████| 196/196 [01:05<00:00,  3.01it/s, current_loss=1.58, current_mean_loss=1.88]
Validation Epoch 53/1000: 100%|██████████| 49/49 [00:19<00:00,  2.57it/s, current_loss=1.8, current_mean_loss=1.94] 

2023-02-12 17:00:55,706 - root - INFO - Epoch 52 - Train loss: 1.8794 - Val loss: 1.9415



Epoch 54/1000: 100%|██████████| 196/196 [01:04<00:00,  3.02it/s, current_loss=1.34, current_mean_loss=1.93]
Validation Epoch 54/1000: 100%|██████████| 49/49 [00:19<00:00,  2.53it/s, current_loss=1.92, current_mean_loss=1.96]

2023-02-12 17:02:19,949 - root - INFO - Epoch 53 - Train loss: 1.9252 - Val loss: 1.9598



Epoch 55/1000: 100%|██████████| 196/196 [01:05<00:00,  3.00it/s, current_loss=1.77, current_mean_loss=1.75]
Validation Epoch 55/1000: 100%|██████████| 49/49 [00:19<00:00,  2.54it/s, current_loss=1.81, current_mean_loss=1.86]

2023-02-12 17:03:44,748 - root - INFO - Epoch 54 - Train loss: 1.7461 - Val loss: 1.8555



Epoch 56/1000: 100%|██████████| 196/196 [01:04<00:00,  3.02it/s, current_loss=1.71, current_mean_loss=1.85]
Validation Epoch 56/1000: 100%|██████████| 49/49 [00:18<00:00,  2.59it/s, current_loss=1.87, current_mean_loss=2.03]

2023-02-12 17:05:08,654 - root - INFO - Epoch 55 - Train loss: 1.8454 - Val loss: 2.0319



Epoch 57/1000: 100%|██████████| 196/196 [01:04<00:00,  3.02it/s, current_loss=2.21, current_mean_loss=1.83]
Validation Epoch 57/1000: 100%|██████████| 49/49 [00:19<00:00,  2.53it/s, current_loss=2.04, current_mean_loss=2.07]

2023-02-12 17:06:33,072 - root - INFO - Epoch 56 - Train loss: 1.8254 - Val loss: 2.0697



Epoch 58/1000: 100%|██████████| 196/196 [01:05<00:00,  3.01it/s, current_loss=1.82, current_mean_loss=1.9] 
Validation Epoch 58/1000: 100%|██████████| 49/49 [00:19<00:00,  2.51it/s, current_loss=2.05, current_mean_loss=2.13]

2023-02-12 17:07:57,858 - root - INFO - Epoch 57 - Train loss: 1.8986 - Val loss: 2.1340



Epoch 59/1000: 100%|██████████| 196/196 [01:05<00:00,  3.01it/s, current_loss=1.76, current_mean_loss=1.87]
Validation Epoch 59/1000: 100%|██████████| 49/49 [00:19<00:00,  2.54it/s, current_loss=1.65, current_mean_loss=1.76]

2023-02-12 17:09:22,341 - root - INFO - Epoch 58 - Train loss: 1.8695 - Val loss: 1.7607



Epoch 60/1000: 100%|██████████| 196/196 [01:05<00:00,  3.00it/s, current_loss=1.83, current_mean_loss=1.81]
Validation Epoch 60/1000: 100%|██████████| 49/49 [00:19<00:00,  2.54it/s, current_loss=1.61, current_mean_loss=1.67]

2023-02-12 17:10:47,035 - root - INFO - Epoch 59 - Train loss: 1.8079 - Val loss: 1.6732



Epoch 61/1000: 100%|██████████| 196/196 [01:05<00:00,  3.00it/s, current_loss=1.54, current_mean_loss=1.91]
Validation Epoch 61/1000: 100%|██████████| 49/49 [00:19<00:00,  2.53it/s, current_loss=1.65, current_mean_loss=1.74]

2023-02-12 17:12:11,877 - root - INFO - Epoch 60 - Train loss: 1.9093 - Val loss: 1.7412



Epoch 62/1000: 100%|██████████| 196/196 [01:04<00:00,  3.02it/s, current_loss=1.53, current_mean_loss=1.78]
Validation Epoch 62/1000: 100%|██████████| 49/49 [00:18<00:00,  2.60it/s, current_loss=1.58, current_mean_loss=1.62]

2023-02-12 17:13:35,742 - root - INFO - Epoch 61 - Train loss: 1.7841 - Val loss: 1.6202
2023-02-12 17:13:35,743 - root - INFO - Saving best model



Epoch 63/1000: 100%|██████████| 196/196 [01:05<00:00,  3.00it/s, current_loss=2.73, current_mean_loss=1.92]
Validation Epoch 63/1000: 100%|██████████| 49/49 [00:19<00:00,  2.51it/s, current_loss=2.36, current_mean_loss=2.36]

2023-02-12 17:15:00,627 - root - INFO - Epoch 62 - Train loss: 1.9195 - Val loss: 2.3573



Epoch 64/1000: 100%|██████████| 196/196 [01:05<00:00,  2.98it/s, current_loss=1.68, current_mean_loss=1.94]
Validation Epoch 64/1000: 100%|██████████| 49/49 [00:19<00:00,  2.49it/s, current_loss=1.91, current_mean_loss=1.98]

2023-02-12 17:16:26,221 - root - INFO - Epoch 63 - Train loss: 1.9406 - Val loss: 1.9822



Epoch 65/1000: 100%|██████████| 196/196 [01:04<00:00,  3.03it/s, current_loss=2.39, current_mean_loss=2]   
Validation Epoch 65/1000: 100%|██████████| 49/49 [00:18<00:00,  2.59it/s, current_loss=1.59, current_mean_loss=1.72]

2023-02-12 17:17:49,858 - root - INFO - Epoch 64 - Train loss: 2.0028 - Val loss: 1.7216



Epoch 66/1000: 100%|██████████| 196/196 [01:05<00:00,  3.01it/s, current_loss=1.87, current_mean_loss=1.85]
Validation Epoch 66/1000: 100%|██████████| 49/49 [00:19<00:00,  2.51it/s, current_loss=1.59, current_mean_loss=1.69]

2023-02-12 17:19:14,611 - root - INFO - Epoch 65 - Train loss: 1.8457 - Val loss: 1.6851



Epoch 67/1000:  69%|██████▉   | 136/196 [00:45<00:16,  3.68it/s, current_loss=1.94, current_mean_loss=1.91]

Epoch 13071: reducing learning rate of group 0 to 5.0000e-04.


Epoch 67/1000: 100%|██████████| 196/196 [01:05<00:00,  3.00it/s, current_loss=1.66, current_mean_loss=1.81]
Validation Epoch 67/1000: 100%|██████████| 49/49 [00:19<00:00,  2.51it/s, current_loss=1.56, current_mean_loss=1.64]

2023-02-12 17:20:39,484 - root - INFO - Epoch 66 - Train loss: 1.8107 - Val loss: 1.6436



Epoch 68/1000: 100%|██████████| 196/196 [01:05<00:00,  2.98it/s, current_loss=1.42, current_mean_loss=1.55]
Validation Epoch 68/1000: 100%|██████████| 49/49 [00:19<00:00,  2.55it/s, current_loss=1.66, current_mean_loss=1.69]

2023-02-12 17:22:04,568 - root - INFO - Epoch 67 - Train loss: 1.5488 - Val loss: 1.6941



Epoch 69/1000: 100%|██████████| 196/196 [01:05<00:00,  3.01it/s, current_loss=1.43, current_mean_loss=1.54]
Validation Epoch 69/1000: 100%|██████████| 49/49 [00:19<00:00,  2.50it/s, current_loss=1.67, current_mean_loss=1.68]

2023-02-12 17:23:29,411 - root - INFO - Epoch 68 - Train loss: 1.5399 - Val loss: 1.6806



Epoch 70/1000: 100%|██████████| 196/196 [01:05<00:00,  3.01it/s, current_loss=1.34, current_mean_loss=1.55]
Validation Epoch 70/1000: 100%|██████████| 49/49 [00:20<00:00,  2.44it/s, current_loss=1.53, current_mean_loss=1.61]

2023-02-12 17:24:54,792 - root - INFO - Epoch 69 - Train loss: 1.5508 - Val loss: 1.6094
2023-02-12 17:24:54,793 - root - INFO - Saving best model



Epoch 71/1000: 100%|██████████| 196/196 [01:06<00:00,  2.97it/s, current_loss=1.88, current_mean_loss=1.6] 
Validation Epoch 71/1000: 100%|██████████| 49/49 [00:19<00:00,  2.58it/s, current_loss=1.59, current_mean_loss=1.66]

2023-02-12 17:26:19,953 - root - INFO - Epoch 70 - Train loss: 1.5954 - Val loss: 1.6621



Epoch 72/1000: 100%|██████████| 196/196 [01:06<00:00,  2.96it/s, current_loss=1.38, current_mean_loss=1.53]
Validation Epoch 72/1000: 100%|██████████| 49/49 [00:20<00:00,  2.44it/s, current_loss=1.67, current_mean_loss=1.67]

2023-02-12 17:27:46,282 - root - INFO - Epoch 71 - Train loss: 1.5264 - Val loss: 1.6668



Epoch 73/1000: 100%|██████████| 196/196 [01:04<00:00,  3.05it/s, current_loss=1.59, current_mean_loss=1.55]
Validation Epoch 73/1000: 100%|██████████| 49/49 [00:19<00:00,  2.52it/s, current_loss=1.67, current_mean_loss=1.69]

2023-02-12 17:29:10,062 - root - INFO - Epoch 72 - Train loss: 1.5483 - Val loss: 1.6855



Epoch 74/1000: 100%|██████████| 196/196 [01:05<00:00,  3.01it/s, current_loss=1.86, current_mean_loss=1.6] 
Validation Epoch 74/1000: 100%|██████████| 49/49 [00:18<00:00,  2.60it/s, current_loss=1.66, current_mean_loss=1.71]

2023-02-12 17:30:34,152 - root - INFO - Epoch 73 - Train loss: 1.6028 - Val loss: 1.7129



Epoch 75/1000: 100%|██████████| 196/196 [01:04<00:00,  3.03it/s, current_loss=1.48, current_mean_loss=1.54]
Validation Epoch 75/1000: 100%|██████████| 49/49 [00:18<00:00,  2.59it/s, current_loss=1.56, current_mean_loss=1.57]

2023-02-12 17:31:57,874 - root - INFO - Epoch 74 - Train loss: 1.5422 - Val loss: 1.5713
2023-02-12 17:31:57,875 - root - INFO - Saving best model



Epoch 76/1000: 100%|██████████| 196/196 [01:04<00:00,  3.06it/s, current_loss=1.72, current_mean_loss=1.58]
Validation Epoch 76/1000: 100%|██████████| 49/49 [00:19<00:00,  2.56it/s, current_loss=1.56, current_mean_loss=1.61]

2023-02-12 17:33:21,125 - root - INFO - Epoch 75 - Train loss: 1.5762 - Val loss: 1.6091



Epoch 77/1000: 100%|██████████| 196/196 [01:04<00:00,  3.03it/s, current_loss=1.4, current_mean_loss=1.55] 
Validation Epoch 77/1000: 100%|██████████| 49/49 [00:18<00:00,  2.59it/s, current_loss=1.65, current_mean_loss=1.77]

2023-02-12 17:34:44,687 - root - INFO - Epoch 76 - Train loss: 1.5491 - Val loss: 1.7686



Epoch 78/1000: 100%|██████████| 196/196 [01:04<00:00,  3.03it/s, current_loss=1.31, current_mean_loss=1.58]
Validation Epoch 78/1000: 100%|██████████| 49/49 [00:19<00:00,  2.55it/s, current_loss=1.57, current_mean_loss=1.67]

2023-02-12 17:36:08,702 - root - INFO - Epoch 77 - Train loss: 1.5844 - Val loss: 1.6732



Epoch 79/1000: 100%|██████████| 196/196 [01:04<00:00,  3.04it/s, current_loss=1.46, current_mean_loss=1.6] 
Validation Epoch 79/1000: 100%|██████████| 49/49 [00:19<00:00,  2.55it/s, current_loss=1.51, current_mean_loss=1.6] 

2023-02-12 17:37:32,564 - root - INFO - Epoch 78 - Train loss: 1.5997 - Val loss: 1.6035



Epoch 80/1000: 100%|██████████| 196/196 [01:03<00:00,  3.07it/s, current_loss=1.61, current_mean_loss=1.57]
Validation Epoch 80/1000: 100%|██████████| 49/49 [00:18<00:00,  2.62it/s, current_loss=1.67, current_mean_loss=1.73]

2023-02-12 17:38:55,289 - root - INFO - Epoch 79 - Train loss: 1.5709 - Val loss: 1.7289



Epoch 81/1000: 100%|██████████| 196/196 [01:05<00:00,  2.98it/s, current_loss=1.89, current_mean_loss=1.59]
Validation Epoch 81/1000: 100%|██████████| 49/49 [00:19<00:00,  2.54it/s, current_loss=1.68, current_mean_loss=1.73]

2023-02-12 17:40:20,395 - root - INFO - Epoch 80 - Train loss: 1.5875 - Val loss: 1.7323



Epoch 82/1000: 100%|██████████| 196/196 [01:04<00:00,  3.04it/s, current_loss=1.82, current_mean_loss=1.57] 
Validation Epoch 82/1000: 100%|██████████| 49/49 [00:19<00:00,  2.55it/s, current_loss=1.48, current_mean_loss=1.57]

2023-02-12 17:41:44,167 - root - INFO - Epoch 81 - Train loss: 1.5700 - Val loss: 1.5657
2023-02-12 17:41:44,170 - root - INFO - Saving best model



Epoch 83/1000: 100%|██████████| 196/196 [01:03<00:00,  3.06it/s, current_loss=1.52, current_mean_loss=1.6] 
Validation Epoch 83/1000: 100%|██████████| 49/49 [00:18<00:00,  2.63it/s, current_loss=1.66, current_mean_loss=1.65]

2023-02-12 17:43:06,914 - root - INFO - Epoch 82 - Train loss: 1.6000 - Val loss: 1.6542



Epoch 84/1000: 100%|██████████| 196/196 [01:04<00:00,  3.06it/s, current_loss=1.29, current_mean_loss=1.57]
Validation Epoch 84/1000: 100%|██████████| 49/49 [00:18<00:00,  2.58it/s, current_loss=1.56, current_mean_loss=1.6] 

2023-02-12 17:44:30,086 - root - INFO - Epoch 83 - Train loss: 1.5705 - Val loss: 1.6041



Epoch 85/1000: 100%|██████████| 196/196 [01:04<00:00,  3.05it/s, current_loss=1.65, current_mean_loss=1.57]
Validation Epoch 85/1000: 100%|██████████| 49/49 [00:18<00:00,  2.62it/s, current_loss=1.62, current_mean_loss=1.69]

2023-02-12 17:45:53,188 - root - INFO - Epoch 84 - Train loss: 1.5700 - Val loss: 1.6855



Epoch 86/1000: 100%|██████████| 196/196 [01:04<00:00,  3.05it/s, current_loss=1.72, current_mean_loss=1.57]
Validation Epoch 86/1000: 100%|██████████| 49/49 [00:19<00:00,  2.53it/s, current_loss=1.54, current_mean_loss=1.66]

2023-02-12 17:47:16,904 - root - INFO - Epoch 85 - Train loss: 1.5735 - Val loss: 1.6584



Epoch 87/1000: 100%|██████████| 196/196 [01:05<00:00,  2.99it/s, current_loss=1.28, current_mean_loss=1.57]
Validation Epoch 87/1000: 100%|██████████| 49/49 [00:19<00:00,  2.50it/s, current_loss=1.55, current_mean_loss=1.59]

2023-02-12 17:48:42,014 - root - INFO - Epoch 86 - Train loss: 1.5651 - Val loss: 1.5887



Epoch 88/1000: 100%|██████████| 196/196 [01:05<00:00,  3.00it/s, current_loss=1.79, current_mean_loss=1.55]
Validation Epoch 88/1000: 100%|██████████| 49/49 [00:18<00:00,  2.59it/s, current_loss=1.63, current_mean_loss=1.67]

2023-02-12 17:50:06,278 - root - INFO - Epoch 87 - Train loss: 1.5508 - Val loss: 1.6718



Epoch 89/1000: 100%|██████████| 196/196 [01:04<00:00,  3.05it/s, current_loss=1.78, current_mean_loss=1.54]
Validation Epoch 89/1000: 100%|██████████| 49/49 [00:19<00:00,  2.54it/s, current_loss=1.49, current_mean_loss=1.56]

2023-02-12 17:51:29,927 - root - INFO - Epoch 88 - Train loss: 1.5401 - Val loss: 1.5623
2023-02-12 17:51:29,928 - root - INFO - Saving best model



Epoch 90/1000: 100%|██████████| 196/196 [01:04<00:00,  3.03it/s, current_loss=1.16, current_mean_loss=1.6] 
Validation Epoch 90/1000: 100%|██████████| 49/49 [00:19<00:00,  2.52it/s, current_loss=1.7, current_mean_loss=1.73] 

2023-02-12 17:52:54,193 - root - INFO - Epoch 89 - Train loss: 1.5972 - Val loss: 1.7279



Epoch 91/1000: 100%|██████████| 196/196 [01:05<00:00,  2.99it/s, current_loss=1.99, current_mean_loss=1.54]
Validation Epoch 91/1000: 100%|██████████| 49/49 [00:19<00:00,  2.56it/s, current_loss=1.71, current_mean_loss=1.74]

2023-02-12 17:54:18,930 - root - INFO - Epoch 90 - Train loss: 1.5397 - Val loss: 1.7431



Epoch 92/1000: 100%|██████████| 196/196 [01:04<00:00,  3.05it/s, current_loss=1.31, current_mean_loss=1.58]
Validation Epoch 92/1000: 100%|██████████| 49/49 [00:19<00:00,  2.55it/s, current_loss=1.6, current_mean_loss=1.62] 

2023-02-12 17:55:42,503 - root - INFO - Epoch 91 - Train loss: 1.5758 - Val loss: 1.6197



Epoch 93/1000: 100%|██████████| 196/196 [01:04<00:00,  3.05it/s, current_loss=1.49, current_mean_loss=1.54]
Validation Epoch 93/1000: 100%|██████████| 49/49 [00:19<00:00,  2.53it/s, current_loss=1.56, current_mean_loss=1.56]

2023-02-12 17:57:06,072 - root - INFO - Epoch 92 - Train loss: 1.5391 - Val loss: 1.5586
2023-02-12 17:57:06,073 - root - INFO - Saving best model



Epoch 94/1000: 100%|██████████| 196/196 [01:04<00:00,  3.05it/s, current_loss=1.41, current_mean_loss=1.54]
Validation Epoch 94/1000: 100%|██████████| 49/49 [00:18<00:00,  2.59it/s, current_loss=1.54, current_mean_loss=1.58]

2023-02-12 17:58:29,374 - root - INFO - Epoch 93 - Train loss: 1.5364 - Val loss: 1.5804



Epoch 95/1000: 100%|██████████| 196/196 [01:03<00:00,  3.07it/s, current_loss=1.32, current_mean_loss=1.53]
Validation Epoch 95/1000: 100%|██████████| 49/49 [00:19<00:00,  2.53it/s, current_loss=1.64, current_mean_loss=1.72]

2023-02-12 17:59:52,667 - root - INFO - Epoch 94 - Train loss: 1.5340 - Val loss: 1.7178



Epoch 96/1000: 100%|██████████| 196/196 [01:04<00:00,  3.03it/s, current_loss=1.57, current_mean_loss=1.55]
Validation Epoch 96/1000: 100%|██████████| 49/49 [00:18<00:00,  2.62it/s, current_loss=1.56, current_mean_loss=1.6] 

2023-02-12 18:01:16,148 - root - INFO - Epoch 95 - Train loss: 1.5541 - Val loss: 1.5999



Epoch 97/1000: 100%|██████████| 196/196 [01:04<00:00,  3.04it/s, current_loss=1.41, current_mean_loss=1.59]
Validation Epoch 97/1000: 100%|██████████| 49/49 [00:19<00:00,  2.51it/s, current_loss=1.62, current_mean_loss=1.71]

2023-02-12 18:02:40,262 - root - INFO - Epoch 96 - Train loss: 1.5916 - Val loss: 1.7149



Epoch 98/1000:   5%|▍         | 9/196 [00:03<00:58,  3.22it/s, current_loss=1.4, current_mean_loss=1.45] 

Epoch 19021: reducing learning rate of group 0 to 2.5000e-04.


Epoch 98/1000: 100%|██████████| 196/196 [01:07<00:00,  2.89it/s, current_loss=1.38, current_mean_loss=1.45]
Validation Epoch 98/1000: 100%|██████████| 49/49 [00:19<00:00,  2.49it/s, current_loss=1.53, current_mean_loss=1.54]

2023-02-12 18:04:07,754 - root - INFO - Epoch 97 - Train loss: 1.4464 - Val loss: 1.5394
2023-02-12 18:04:07,755 - root - INFO - Saving best model



Epoch 99/1000: 100%|██████████| 196/196 [01:04<00:00,  3.02it/s, current_loss=1.44, current_mean_loss=1.45]
Validation Epoch 99/1000: 100%|██████████| 49/49 [00:18<00:00,  2.62it/s, current_loss=1.5, current_mean_loss=1.51] 

2023-02-12 18:05:31,536 - root - INFO - Epoch 98 - Train loss: 1.4503 - Val loss: 1.5137
2023-02-12 18:05:31,537 - root - INFO - Saving best model



Epoch 100/1000: 100%|██████████| 196/196 [01:04<00:00,  3.05it/s, current_loss=1.1, current_mean_loss=1.43] 
Validation Epoch 100/1000: 100%|██████████| 49/49 [00:19<00:00,  2.56it/s, current_loss=1.48, current_mean_loss=1.52]

2023-02-12 18:06:55,096 - root - INFO - Epoch 99 - Train loss: 1.4305 - Val loss: 1.5243



Epoch 101/1000: 100%|██████████| 196/196 [01:05<00:00,  3.01it/s, current_loss=1.55, current_mean_loss=1.44] 
Validation Epoch 101/1000: 100%|██████████| 49/49 [00:19<00:00,  2.57it/s, current_loss=1.46, current_mean_loss=1.51]

2023-02-12 18:08:19,291 - root - INFO - Epoch 100 - Train loss: 1.4428 - Val loss: 1.5060
2023-02-12 18:08:19,293 - root - INFO - Saving best model



Epoch 102/1000: 100%|██████████| 196/196 [01:03<00:00,  3.07it/s, current_loss=1.37, current_mean_loss=1.44]
Validation Epoch 102/1000: 100%|██████████| 49/49 [00:18<00:00,  2.60it/s, current_loss=1.49, current_mean_loss=1.51]

2023-02-12 18:09:42,045 - root - INFO - Epoch 101 - Train loss: 1.4391 - Val loss: 1.5079



Epoch 103/1000: 100%|██████████| 196/196 [01:04<00:00,  3.05it/s, current_loss=2.1, current_mean_loss=1.45] 
Validation Epoch 103/1000: 100%|██████████| 49/49 [00:19<00:00,  2.55it/s, current_loss=1.54, current_mean_loss=1.57]

2023-02-12 18:11:05,624 - root - INFO - Epoch 102 - Train loss: 1.4524 - Val loss: 1.5667



Epoch 104/1000: 100%|██████████| 196/196 [01:03<00:00,  3.07it/s, current_loss=1.23, current_mean_loss=1.45]
Validation Epoch 104/1000: 100%|██████████| 49/49 [00:18<00:00,  2.58it/s, current_loss=1.66, current_mean_loss=1.61]

2023-02-12 18:12:28,513 - root - INFO - Epoch 103 - Train loss: 1.4473 - Val loss: 1.6109



Epoch 105/1000: 100%|██████████| 196/196 [01:04<00:00,  3.06it/s, current_loss=1.19, current_mean_loss=1.44]
Validation Epoch 105/1000: 100%|██████████| 49/49 [00:18<00:00,  2.60it/s, current_loss=1.49, current_mean_loss=1.53]

2023-02-12 18:13:51,443 - root - INFO - Epoch 104 - Train loss: 1.4380 - Val loss: 1.5252



Epoch 106/1000: 100%|██████████| 196/196 [01:04<00:00,  3.06it/s, current_loss=1.32, current_mean_loss=1.44]
Validation Epoch 106/1000: 100%|██████████| 49/49 [00:19<00:00,  2.56it/s, current_loss=1.49, current_mean_loss=1.51]

2023-02-12 18:15:14,746 - root - INFO - Epoch 105 - Train loss: 1.4409 - Val loss: 1.5067



Epoch 107/1000: 100%|██████████| 196/196 [01:04<00:00,  3.06it/s, current_loss=1.32, current_mean_loss=1.46]
Validation Epoch 107/1000: 100%|██████████| 49/49 [00:18<00:00,  2.61it/s, current_loss=1.51, current_mean_loss=1.52]

2023-02-12 18:16:37,731 - root - INFO - Epoch 106 - Train loss: 1.4616 - Val loss: 1.5240



Epoch 108/1000: 100%|██████████| 196/196 [01:04<00:00,  3.03it/s, current_loss=1.16, current_mean_loss=1.43] 
Validation Epoch 108/1000: 100%|██████████| 49/49 [00:19<00:00,  2.53it/s, current_loss=1.49, current_mean_loss=1.57]

2023-02-12 18:18:01,811 - root - INFO - Epoch 107 - Train loss: 1.4348 - Val loss: 1.5692



Epoch 109/1000: 100%|██████████| 196/196 [01:04<00:00,  3.05it/s, current_loss=1.24, current_mean_loss=1.45]
Validation Epoch 109/1000: 100%|██████████| 49/49 [00:18<00:00,  2.59it/s, current_loss=1.55, current_mean_loss=1.56]

2023-02-12 18:19:25,061 - root - INFO - Epoch 108 - Train loss: 1.4456 - Val loss: 1.5604



Epoch 110/1000: 100%|██████████| 196/196 [01:04<00:00,  3.03it/s, current_loss=1.32, current_mean_loss=1.44]
Validation Epoch 110/1000: 100%|██████████| 49/49 [00:18<00:00,  2.59it/s, current_loss=1.52, current_mean_loss=1.57]

2023-02-12 18:20:48,664 - root - INFO - Epoch 109 - Train loss: 1.4442 - Val loss: 1.5661



Epoch 111/1000: 100%|██████████| 196/196 [01:03<00:00,  3.07it/s, current_loss=1.29, current_mean_loss=1.46] 
Validation Epoch 111/1000: 100%|██████████| 49/49 [00:18<00:00,  2.60it/s, current_loss=1.57, current_mean_loss=1.63]

2023-02-12 18:22:11,464 - root - INFO - Epoch 110 - Train loss: 1.4559 - Val loss: 1.6262



Epoch 112/1000: 100%|██████████| 196/196 [01:04<00:00,  3.05it/s, current_loss=1.4, current_mean_loss=1.46] 
Validation Epoch 112/1000: 100%|██████████| 49/49 [00:19<00:00,  2.56it/s, current_loss=1.55, current_mean_loss=1.56]

2023-02-12 18:23:34,837 - root - INFO - Epoch 111 - Train loss: 1.4585 - Val loss: 1.5578



Epoch 113/1000: 100%|██████████| 196/196 [01:03<00:00,  3.10it/s, current_loss=1.09, current_mean_loss=1.47]
Validation Epoch 113/1000: 100%|██████████| 49/49 [00:18<00:00,  2.59it/s, current_loss=1.48, current_mean_loss=1.52]

2023-02-12 18:24:57,075 - root - INFO - Epoch 112 - Train loss: 1.4697 - Val loss: 1.5200



Epoch 114/1000: 100%|██████████| 196/196 [01:05<00:00,  3.01it/s, current_loss=1.41, current_mean_loss=1.44]
Validation Epoch 114/1000: 100%|██████████| 49/49 [00:18<00:00,  2.58it/s, current_loss=1.48, current_mean_loss=1.53]

2023-02-12 18:26:21,140 - root - INFO - Epoch 113 - Train loss: 1.4427 - Val loss: 1.5320



Epoch 115/1000: 100%|██████████| 196/196 [01:04<00:00,  3.03it/s, current_loss=1.32, current_mean_loss=1.43]
Validation Epoch 115/1000: 100%|██████████| 49/49 [00:18<00:00,  2.58it/s, current_loss=1.47, current_mean_loss=1.53]

2023-02-12 18:27:44,817 - root - INFO - Epoch 114 - Train loss: 1.4262 - Val loss: 1.5271



Epoch 116/1000: 100%|██████████| 196/196 [01:05<00:00,  2.99it/s, current_loss=1.57, current_mean_loss=1.44]
Validation Epoch 116/1000: 100%|██████████| 49/49 [00:18<00:00,  2.59it/s, current_loss=1.48, current_mean_loss=1.55]

2023-02-12 18:29:09,404 - root - INFO - Epoch 115 - Train loss: 1.4371 - Val loss: 1.5513



Epoch 117/1000: 100%|██████████| 196/196 [01:05<00:00,  3.00it/s, current_loss=1.62, current_mean_loss=1.43]
Validation Epoch 117/1000: 100%|██████████| 49/49 [00:19<00:00,  2.53it/s, current_loss=1.58, current_mean_loss=1.59]

2023-02-12 18:30:34,204 - root - INFO - Epoch 116 - Train loss: 1.4267 - Val loss: 1.5941



Epoch 118/1000: 100%|██████████| 196/196 [01:04<00:00,  3.02it/s, current_loss=1.31, current_mean_loss=1.43]
Validation Epoch 118/1000: 100%|██████████| 49/49 [00:18<00:00,  2.63it/s, current_loss=1.52, current_mean_loss=1.52]

2023-02-12 18:31:57,844 - root - INFO - Epoch 117 - Train loss: 1.4307 - Val loss: 1.5194



Epoch 119/1000: 100%|██████████| 196/196 [01:04<00:00,  3.06it/s, current_loss=1.2, current_mean_loss=1.44] 
Validation Epoch 119/1000: 100%|██████████| 49/49 [00:19<00:00,  2.52it/s, current_loss=1.49, current_mean_loss=1.52]

2023-02-12 18:33:21,437 - root - INFO - Epoch 118 - Train loss: 1.4384 - Val loss: 1.5154



Epoch 120/1000: 100%|██████████| 196/196 [01:03<00:00,  3.07it/s, current_loss=1.62, current_mean_loss=1.44] 
Validation Epoch 120/1000: 100%|██████████| 49/49 [00:19<00:00,  2.54it/s, current_loss=1.51, current_mean_loss=1.56]

2023-02-12 18:34:44,545 - root - INFO - Epoch 119 - Train loss: 1.4355 - Val loss: 1.5563



Epoch 121/1000: 100%|██████████| 196/196 [01:03<00:00,  3.07it/s, current_loss=1.36, current_mean_loss=1.44]
Validation Epoch 121/1000: 100%|██████████| 49/49 [00:18<00:00,  2.61it/s, current_loss=1.55, current_mean_loss=1.56]

2023-02-12 18:36:07,231 - root - INFO - Epoch 120 - Train loss: 1.4402 - Val loss: 1.5622



Epoch 122/1000: 100%|██████████| 196/196 [01:04<00:00,  3.05it/s, current_loss=1.32, current_mean_loss=1.43]
Validation Epoch 122/1000: 100%|██████████| 49/49 [00:19<00:00,  2.52it/s, current_loss=1.53, current_mean_loss=1.52]

2023-02-12 18:37:31,042 - root - INFO - Epoch 121 - Train loss: 1.4255 - Val loss: 1.5228



Epoch 123/1000: 100%|██████████| 196/196 [01:04<00:00,  3.04it/s, current_loss=1.52, current_mean_loss=1.43]
Validation Epoch 123/1000: 100%|██████████| 49/49 [00:19<00:00,  2.54it/s, current_loss=1.52, current_mean_loss=1.55]

2023-02-12 18:38:54,809 - root - INFO - Epoch 122 - Train loss: 1.4321 - Val loss: 1.5478



Epoch 124/1000: 100%|██████████| 196/196 [01:04<00:00,  3.04it/s, current_loss=1.43, current_mean_loss=1.42]
Validation Epoch 124/1000: 100%|██████████| 49/49 [00:19<00:00,  2.57it/s, current_loss=1.52, current_mean_loss=1.55]

2023-02-12 18:40:18,513 - root - INFO - Epoch 123 - Train loss: 1.4222 - Val loss: 1.5549



Epoch 125/1000: 100%|██████████| 196/196 [01:04<00:00,  3.02it/s, current_loss=1.13, current_mean_loss=1.43]
Validation Epoch 125/1000: 100%|██████████| 49/49 [00:19<00:00,  2.47it/s, current_loss=1.6, current_mean_loss=1.61] 

2023-02-12 18:41:43,282 - root - INFO - Epoch 124 - Train loss: 1.4254 - Val loss: 1.6125



Epoch 126/1000: 100%|██████████| 196/196 [01:05<00:00,  2.98it/s, current_loss=1.92, current_mean_loss=1.43]
Validation Epoch 126/1000: 100%|██████████| 49/49 [00:19<00:00,  2.50it/s, current_loss=1.48, current_mean_loss=1.55]

2023-02-12 18:43:08,606 - root - INFO - Epoch 125 - Train loss: 1.4332 - Val loss: 1.5517



Epoch 127/1000: 100%|██████████| 196/196 [01:05<00:00,  2.99it/s, current_loss=1.2, current_mean_loss=1.43] 
Validation Epoch 127/1000: 100%|██████████| 49/49 [00:18<00:00,  2.59it/s, current_loss=1.53, current_mean_loss=1.58]

2023-02-12 18:44:32,995 - root - INFO - Epoch 126 - Train loss: 1.4284 - Val loss: 1.5841



Epoch 128/1000: 100%|██████████| 196/196 [01:04<00:00,  3.05it/s, current_loss=1.22, current_mean_loss=1.42]
Validation Epoch 128/1000: 100%|██████████| 49/49 [00:19<00:00,  2.51it/s, current_loss=1.53, current_mean_loss=1.57]

2023-02-12 18:45:56,894 - root - INFO - Epoch 127 - Train loss: 1.4155 - Val loss: 1.5673



Epoch 129/1000: 100%|██████████| 196/196 [01:03<00:00,  3.08it/s, current_loss=1.27, current_mean_loss=1.45]
Validation Epoch 129/1000: 100%|██████████| 49/49 [00:18<00:00,  2.62it/s, current_loss=1.56, current_mean_loss=1.62]

2023-02-12 18:47:19,420 - root - INFO - Epoch 128 - Train loss: 1.4478 - Val loss: 1.6241



Epoch 130/1000: 100%|██████████| 196/196 [01:05<00:00,  2.98it/s, current_loss=1.48, current_mean_loss=1.43]
Validation Epoch 130/1000:  41%|████      | 20/49 [00:08<00:10,  2.66it/s, current_loss=1.35, current_mean_loss=1.65]