In [224]:
import math
from typing import List
import sys
import os
from pathlib import Path
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.discriminant_analysis import StandardScaler
from sklearn.model_selection import train_test_split
from copy import deepcopy
import torch.optim as optim
from transformers import AutoTokenizer, AutoModel
import optuna
from optuna.trial import TrialState
from sklearn.model_selection import KFold
import statistics

In [225]:
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), "..")))

In [226]:
from utils import calculate_metric

In [338]:
INPUT = "../data/data_removing_na.xlsx"

MODEL_PATH = 'best_mlp_emb_model.pth'
BATCH_SIZE = 1

DEVICE = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
DEVICE_LIST = [2, 1, 3, 0]

MODEL = 'ProsusAI/finbert' #'AdaptLLM/finance-LLM'

In [228]:
DEVICE

device(type='cuda', index=2)

In [229]:
df = pd.read_excel(INPUT)

In [230]:
df.head()

Unnamed: 0,rr1_30,currency,seniorioty_adj,coupon rate,domicile_country,exchange_country,Industry_sector,Industry_group,Industry_subgroup,event_type,...,PD_55_pd,PD_56_pd,PD_57_pd,PD_58_pd,PD_59_pd,PD_60_pd,DTD,NI_Over_TA,Size,defaulted_in_last_6_months
0,0.259908,USD,Senior Subordinated Unsecured,9.0,United States,United States,Consumer Discretionary,Retail & Whsle - Discretionary,E-Commerce Discretionary,Bankruptcy Filing,...,0.396731,0.397453,0.398148,0.398819,0.399467,0.400092,-0.732815,-0.007137,-0.852484,False
1,0.032729,USD,Senior Subordinated Unsecured,5.75,United States,United States,Health Care,Health Care,Health Care Facilities & Svcs,Default Corp Action,...,0.957454,0.957467,0.95748,0.957492,0.957503,0.957514,-1.666262,-0.000286,-1.186347,False
2,0.9724,USD,Unsecured,5.675,South Korea,South Korea,Consumer Discretionary,Retail & Whsle - Discretionary,Wholesale - Discretionary,Default Corp Action,...,0.568169,0.568693,0.569197,0.569682,0.57015,0.5706,-1.853366,0.000191,1.053677,False
3,1.047416,CHF,Unsecured,0.125,South Korea,South Korea,Consumer Discretionary,Retail & Whsle - Discretionary,Wholesale - Discretionary,Default Corp Action,...,0.568169,0.568693,0.569197,0.569682,0.57015,0.5706,-1.853366,0.000191,1.053677,False
4,0.848872,JPY,Unsecured,1.75,Japan,Japan,Industrials,Industrial Products,Electrical Equipment,Bankruptcy Filing,...,0.130285,0.130688,0.131081,0.131465,0.13184,0.132206,-0.768857,-0.028058,-1.946507,False


In [231]:
df.shape

(1725, 165)

In [232]:
feature_list = df.columns
category_feature_key = ['currency', 'seniorioty', 'domicile_country', 'exchange_country', 'Industry_sector', 'Industry_group', 'Industry_subgroup', 'event_type',
                        'event_type_subcategory_sum', 'defaulted_in_last_6_months', 'defaulted_in_last_5_years']
category_features = [i for i in feature_list if any(sub in i for sub in category_feature_key)]
non_category_features = [i for i in feature_list if i not in category_features]

print(len(category_features))
print(len(non_category_features))

11
154


In [233]:
non_category_features.remove('rr1_30')

In [234]:
non_category_features

['coupon rate',
 'SP500 MD',
 'Average daily 1-year SP500 return',
 'Ratio to MA',
 'US Corporate Bond Yield Spread',
 'US Corporate Bond Yield Spread(3-5 year)',
 'US Corporate Bond Yield Spread(5-7 year)',
 'US Corporate Bond Yield Spread(7-10 year)',
 'US Corporate Bond Yield Spread(10+ year)',
 'US Generic Govt 3 Month Yield',
 'US Generic Govt 6 Month Yield',
 'US Generic Govt 12 Month Yield',
 'US Generic Govt 2 Year Yield',
 'US Generic Govt 3 Year Yield',
 'US Generic Govt 5 Year Yield',
 'US Generic Govt 7 Year Yield',
 'US Generic Govt 10 Year Yield',
 'marketcap',
 'sector_domicile_dtd',
 'sector_exchange_dtd',
 'sector_dtd',
 'PD_1_domicile_sector',
 'PD_3_domicile_sector',
 'PD_12_domicile_sector',
 'PD_1_domicile_subsec',
 'PD_3_domicile_subsec',
 'PD_12_domicile_subsec',
 'PD_1_exch_sector',
 'PD_3_exch_sector',
 'PD_12_exch_sector',
 'PD_1_exch_subsector',
 'PD_3_exch_subsector',
 'PD_12_exch_subsector',
 'PD_1_global_sector',
 'PD_3_global_sector',
 'PD_12_global_secto

In [235]:
category_features

['currency',
 'seniorioty_adj',
 'domicile_country',
 'exchange_country',
 'Industry_sector',
 'Industry_group',
 'Industry_subgroup',
 'event_type',
 'event_type_subcategory_sum',
 'defaulted_in_last_5_years',
 'defaulted_in_last_6_months']

In [236]:
for i in category_features:
    if i == "seniorioty_adj":
        df[i] = df[i] + " Bond"
    elif i == "domicile_country":
        df[i] = "domicile country: " + df[i]
    elif i == "exchange_country":
        df[i] = "exchange_country: " + df[i]
    elif i == "event_type":
        df[i] = "default reason: " + df[i]
    elif i == "defaulted_in_last_6_months":
        df[i] = df[i].apply(lambda x: "The bond has defaulted in the last 6 months" if x == True else "The bond has not defaulted in the last 6 months")
    elif i == "defaulted_in_last_5_years":
        df[i] = df[i].apply(lambda x: "The bond has defaulted in the last 5 years" if x == True else "The bond has not defaulted in the last 5 years")

In [237]:
df[category_features]

Unnamed: 0,currency,seniorioty_adj,domicile_country,exchange_country,Industry_sector,Industry_group,Industry_subgroup,event_type,event_type_subcategory_sum,defaulted_in_last_5_years,defaulted_in_last_6_months
0,USD,Senior Subordinated Unsecured Bond,domicile country: United States,exchange_country: United States,Consumer Discretionary,Retail & Whsle - Discretionary,E-Commerce Discretionary,default reason: Bankruptcy Filing,Debt Restructuring,The bond has not defaulted in the last 5 years,The bond has not defaulted in the last 6 months
1,USD,Senior Subordinated Unsecured Bond,domicile country: United States,exchange_country: United States,Health Care,Health Care,Health Care Facilities & Svcs,default reason: Default Corp Action,Missing Coupon payment only,The bond has not defaulted in the last 5 years,The bond has not defaulted in the last 6 months
2,USD,Unsecured Bond,domicile country: South Korea,exchange_country: South Korea,Consumer Discretionary,Retail & Whsle - Discretionary,Wholesale - Discretionary,default reason: Default Corp Action,Missing Interest payment,The bond has not defaulted in the last 5 years,The bond has not defaulted in the last 6 months
3,CHF,Unsecured Bond,domicile country: South Korea,exchange_country: South Korea,Consumer Discretionary,Retail & Whsle - Discretionary,Wholesale - Discretionary,default reason: Default Corp Action,Missing Interest payment,The bond has not defaulted in the last 5 years,The bond has not defaulted in the last 6 months
4,JPY,Unsecured Bond,domicile country: Japan,exchange_country: Japan,Industrials,Industrial Products,Electrical Equipment,default reason: Bankruptcy Filing,Rehabilitation,The bond has not defaulted in the last 5 years,The bond has not defaulted in the last 6 months
...,...,...,...,...,...,...,...,...,...,...,...
1720,MYR,Senior Secured Bond,domicile country: Malaysia,exchange_country: Malaysia,Consumer Discretionary,Consumer Discretionary Products,Automotive,default reason: Bankruptcy Filing,Others,The bond has not defaulted in the last 5 years,The bond has not defaulted in the last 6 months
1721,MYR,Senior Secured Bond,domicile country: Malaysia,exchange_country: Malaysia,Consumer Discretionary,Consumer Discretionary Products,Automotive,default reason: Bankruptcy Filing,Others,The bond has not defaulted in the last 5 years,The bond has not defaulted in the last 6 months
1722,MYR,Senior Secured Bond,domicile country: Malaysia,exchange_country: Malaysia,Consumer Discretionary,Consumer Discretionary Products,Automotive,default reason: Bankruptcy Filing,Others,The bond has not defaulted in the last 5 years,The bond has not defaulted in the last 6 months
1723,MYR,Senior Secured Bond,domicile country: Malaysia,exchange_country: Malaysia,Consumer Discretionary,Consumer Discretionary Products,Automotive,default reason: Bankruptcy Filing,Others,The bond has not defaulted in the last 5 years,The bond has not defaulted in the last 6 months


In [293]:
labels = df["rr1_30"]
features = df.drop(columns=["rr1_30"])

In [239]:
features.shape

(1725, 164)

In [240]:
# split data into training and val set
test_size = 0.25
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=test_size, random_state=42)

print('Training Features Shape:', train_features.shape)
print('Training Labels Shape:', train_labels.shape)
print('Val Features Shape:', test_features.shape)
print('Val Labels Shape:', test_labels.shape)

Training Features Shape: (1293, 164)
Training Labels Shape: (1293,)
Val Features Shape: (432, 164)
Val Labels Shape: (432,)


In [241]:
# Normalize the data
# Prepare the ColumnTransformer
# scaler = ColumnTransformer(
#     transformers=[
#         ('num', StandardScaler(), non_category_features)   # StandardScaler()
#     ],
#     remainder='passthrough'  # Leave categorical features untouched
# )

# Don't cheat - fit only on training data
# scaler.fit(features)
# train_features = scaler.transform(train_features)
# test_features = scaler.transform(test_features)

In [242]:
# feature_list = non_category_features + category_features

In [243]:
# train_features = pd.DataFrame(train_features, columns=feature_list)
# test_features = pd.DataFrame(test_features, columns=feature_list)

In [244]:
train_features.head()

Unnamed: 0,currency,seniorioty_adj,coupon rate,domicile_country,exchange_country,Industry_sector,Industry_group,Industry_subgroup,event_type,event_type_subcategory_sum,...,PD_55_pd,PD_56_pd,PD_57_pd,PD_58_pd,PD_59_pd,PD_60_pd,DTD,NI_Over_TA,Size,defaulted_in_last_6_months
755,USD,Senior Unsecured Bond,7.5,domicile country: United States,exchange_country: United States,Energy,Oil & Gas,Oil & Gas Producers,default reason: Default Corp Action,Missing Coupon payment only,...,0.691011,0.691382,0.691738,0.69208,0.692408,0.692725,-1.084433,-0.052027,-2.074964,The bond has not defaulted in the last 6 months
588,USD,Unsecured Bond,6.0,domicile country: Indonesia,exchange_country: Indonesia,Materials,Materials,Metals & Mining,default reason: Default Corp Action,Debt Restructuring,...,0.183801,0.184996,0.186167,0.187313,0.188437,0.189539,-0.540409,0.017209,0.864692,The bond has not defaulted in the last 6 months
585,USD,Senior Secured Bond,11.0,domicile country: United States,exchange_country: United States,Materials,Materials,Metals & Mining,default reason: Default Corp Action,Debt Restructuring,...,0.278774,0.280216,0.281615,0.282972,0.28429,0.285571,0.754647,-0.010395,-0.342209,The bond has not defaulted in the last 6 months
1329,USD,Senior Secured Bond,9.125,domicile country: United States,exchange_country: United States,Energy,Oil & Gas,Oil & Gas Services & Equip,default reason: Default Corp Action,Missing Coupon & principal payment,...,0.190375,0.191471,0.192544,0.193594,0.194622,0.195628,-0.24208,-0.022618,-2.808528,The bond has not defaulted in the last 6 months
973,USD,Senior Secured Bond,9.25,domicile country: United States,exchange_country: United States,Energy,Oil & Gas,Oil & Gas Producers,default reason: Default Corp Action,Others,...,0.149501,0.15093,0.152327,0.153692,0.155027,0.156332,1.138686,3.3e-05,-0.085154,The bond has not defaulted in the last 6 months


In [246]:
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModel.from_pretrained(MODEL)
model.eval()

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False

In [247]:
# empty cache first
torch.cuda.empty_cache()

In [248]:
model = nn.DataParallel(model, device_ids = DEVICE_LIST)
model.to(DEVICE)

DataParallel(
  (module): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwis

In [249]:
# Function to create batches
def create_batches(inputs, batch_size):
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]
    
    for i in range(0, len(input_ids), batch_size):
        input_batch = {
            "input_ids": input_ids[i:i + batch_size].to(DEVICE),
            "attention_mask": attention_mask[i:i + batch_size].to(DEVICE),
        }
        yield input_batch

In [252]:
# Convert the non-categorical columns to tensors
train_features_np = train_features[non_category_features].values.astype('float32')
train_features_tensors = torch.tensor(train_features_np, dtype=torch.float32)

test_features_np = test_features[non_category_features].values.astype('float32')
test_features_tensors = torch.tensor(test_features_np, dtype=torch.float32)

In [255]:
train_features_tensors.shape

torch.Size([1293, 153])

In [256]:
batch_size = 2  # Adjust batch size based on your GPU memory

for i in category_features:
    # empty cache first
    torch.cuda.empty_cache()
    
    uniqueCats = list(set(df[i]))
    
    inputs = tokenizer(uniqueCats, return_tensors="pt", padding=True, truncation=True)
    inputs = {key: value.to(DEVICE) for key, value in inputs.items()}
    
    # create input batches
    batches = list(create_batches(inputs, batch_size))
    
    # Get the embeddings
    all_embeddings = []

    for batch in batches:
        with torch.no_grad():
            outputs = model(**batch)
            embeddings = outputs.last_hidden_state.mean(dim=1)

            # The embeddings are in the shape [batch_size, sequence_length, hidden_size]
            all_embeddings.append(embeddings)
            
    
    cpu_embeddings = torch.cat(all_embeddings, dim=0).to("cpu")
    catDict = {key: value for key, value in zip(uniqueCats, cpu_embeddings)}
    
    #### map to train_features
    train_features[i] = train_features[i].map(catDict)

    # Convert the 'category_tensor' column to a list of tensors and stack them
    train_category_tensors = torch.stack(train_features[i].tolist())

    # Concatenate the tensors along the second dimension (columns)
    train_features_tensors = torch.cat((train_features_tensors, train_category_tensors), dim=1)
    
    print(f"Shape of train_category_tensors: {train_category_tensors.shape} for feature {i}")
    print(f"Shape of train_feature_tensors: {train_features_tensors.shape}")
    
    #### map to test_features
    test_features[i] = test_features[i].map(catDict)

    # Convert the 'category_tensor' column to a list of tensors and stack them
    test_category_tensors = torch.stack(test_features[i].tolist())

    # Concatenate the tensors along the second dimension (columns)
    test_features_tensors = torch.cat((test_features_tensors, test_category_tensors), dim=1)
    
    print(f"Shape of test_category_tensors: {test_category_tensors.shape} for feature {i}")
    print(f"Shape of test_feature_tensors: {test_features_tensors.shape}")

Shape of train_category_tensors: torch.Size([1293, 768]) for feature currency
Shape of train_feature_tensors: torch.Size([1293, 921])
Shape of test_category_tensors: torch.Size([432, 768]) for feature currency
Shape of test_feature_tensors: torch.Size([432, 921])
Shape of train_category_tensors: torch.Size([1293, 768]) for feature seniorioty_adj
Shape of train_feature_tensors: torch.Size([1293, 1689])
Shape of test_category_tensors: torch.Size([432, 768]) for feature seniorioty_adj
Shape of test_feature_tensors: torch.Size([432, 1689])
Shape of train_category_tensors: torch.Size([1293, 768]) for feature domicile_country
Shape of train_feature_tensors: torch.Size([1293, 2457])
Shape of test_category_tensors: torch.Size([432, 768]) for feature domicile_country
Shape of test_feature_tensors: torch.Size([432, 2457])
Shape of train_category_tensors: torch.Size([1293, 768]) for feature exchange_country
Shape of train_feature_tensors: torch.Size([1293, 3225])
Shape of test_category_tensors: t

In [257]:
train_features_tensors.shape

torch.Size([1293, 8601])

In [258]:
test_features_tensors.shape

torch.Size([432, 8601])

In [259]:
# split data into training and val set
# test_size = 0.25
# train_features_tensors2, val_features_tensors, train_labels2, val_labels = train_test_split(train_features_tensors, train_labels, test_size=test_size, random_state=42)

# print('Training Features Shape:', train_features_tensors2.shape)
# print('Training Labels Shape:', train_labels2.shape)
# print('Val Features Shape:', val_features_tensors.shape)
# print('Val Labels Shape:', val_labels.shape)

In [260]:
# similarity = F.cosine_similarity(embeddings[0], embeddings[1], dim=0)
# print(f"Cosine Similarity: {similarity.item()}")

In [261]:
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        feature = self.features[idx]
        label = self.labels[idx]
        return torch.tensor(feature, dtype=torch.float32), torch.tensor(label, dtype=torch.float32)

In [262]:
# Create dataset instances
# train_dataset = CustomDataset(train_features_tensors2, train_labels2.to_numpy())
# eval_dataset = CustomDataset(val_features_tensors, val_labels.to_numpy())
# test_dataset = CustomDataset(test_features_tensors, test_labels.to_numpy())

In [263]:
# train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
# eval_loader = DataLoader(eval_dataset, batch_size=BATCH_SIZE, shuffle=False)
# test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [264]:
# for i, (inputs, labels) in enumerate(train_loader):
#     print(i, inputs.shape, labels.shape)
#     # print(inputs.size(0), inputs.view(inputs.size(0), -1).shape)

TUNING PARAMETERS

In [265]:
# !pip install optuna

In [266]:
class MLP(nn.Module):
    def __init__(
        self,
        *,
        d_in: int,
        d_layers: List[int],
        dropout: float,
        d_out: int,
        activation_name='ReLU',
        negative_slope=0.01
    ) -> None:
        super().__init__()

        self.linear_layers = nn.ModuleList([
            nn.Linear(d_layers[i - 1] if i else d_in, x)
            for i, x in enumerate(d_layers)
            ])
        
        # self.dropout_layer = nn.ModuleList([nn.Dropout(p) for p in dropout])
        self.dropout_layer = nn.Dropout(dropout)
        self.head = nn.Linear(d_layers[-1] if d_layers else d_in, d_out)
        self.activation_layer = getattr(nn, activation_name)()
        
        if self.activation_layer == nn.LeakyReLU:
            self.activation_layer = self.activation_layer(negative_slope)
     

    def forward(self, inputs):

        x = inputs
        for linear_layer in self.linear_layers:
            x = linear_layer(x)
            x = self.activation_layer(x)
            x = self.dropout_layer(x)
        x = self.head(x)
        x = x.squeeze(-1)
        return x

In [267]:
# define loss function
criterion = nn.MSELoss()

In [269]:
EPOCHS = 100

In [270]:
train_features_np = train_features_tensors.numpy()
test_features_np = test_features_tensors.numpy()

In [271]:
scaler = StandardScaler()

In [272]:
def objective(trial):
    # Define hyperparameters
    num_blocks = trial.suggest_int('num_blocks', 1, 5)
    
    out_features_list = []
    for i in range(num_blocks):
        out_features = trial.suggest_int(f'out_features_{i}', 10, 128)
        out_features_list.append(out_features)

    p = trial.suggest_float("dropout_l{}".format(i), 0, 0.5)
    activation_name = trial.suggest_categorical(f'activation_{i}', ['ReLU', 'Tanh', 'LeakyReLU'])
    negative_slope = 0.01
    if activation_name == 'LeakyReLU':
        negative_slope = trial.suggest_float(f"negative_slope_{i}", 0.01, 1)

    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    weight_decay = trial.suggest_float("weight_decay", 1e-9, 1e-1, log=True)

    # Define cross-validation setup
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    val_losses = []

    for train_idx, val_idx in kf.split(train_features_np):
        # Create training and validation datasets for the current fold
        X_train_fold, X_val_fold = train_features_np[train_idx], train_features_np[val_idx]
        y_train_fold, y_val_fold = train_labels.iloc[train_idx], train_labels.iloc[val_idx]
        
        # scaling features
        X_train_fold[:, :len(non_category_features)] = scaler.fit_transform(X_train_fold[:, :len(non_category_features)])
        X_val_fold[:, :len(non_category_features)] = scaler.transform(X_val_fold[:, :len(non_category_features)])
            
        # Initialize the model for this fold
        model = MLP(d_in=X_train_fold.shape[1], d_layers=out_features_list, dropout=p, d_out=1, activation_name=activation_name, negative_slope=negative_slope)
        model = nn.DataParallel(model, device_ids = DEVICE_LIST)
        model.to(DEVICE)
        
        # define optimizer
        if optimizer_name == "Adam":
         optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr, weight_decay=weight_decay)
        else:
            momentum = trial.suggest_float("momentum", 1e-9, 0.95, log=True)
            optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr, weight_decay=weight_decay, momentum=momentum)
        
        # Define the loss function and optimizer
        criterion = nn.MSELoss()
        
        # Prepare DataLoader for training
        train_dataset = CustomDataset(X_train_fold, y_train_fold.to_numpy())
        val_dataset = CustomDataset(X_val_fold, y_val_fold.to_numpy())
        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)
            
        # Training of the model.
        model.train()
        for epoch in range(EPOCHS):
            for batch_idx, (data, target) in enumerate(train_loader):
                data, target = data.to(DEVICE), target.to(DEVICE)

                optimizer.zero_grad()
                output = model(data)
                # print("shape", output.shape, target.shape)
                loss = criterion(output, target)
                loss.backward()
                optimizer.step()

        # Validation of the model.
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(val_loader):
                data, target = data.to(DEVICE), target.to(DEVICE)
                output = model(data)
                val_loss = criterion(output, target).item()
                val_losses.append(val_loss**0.5) #rmse

        trial.report(val_loss, epoch)

    # Return the average validation loss across all folds
    # return torch.stack(val_losses).mean()
    return np.mean(val_losses)


In [274]:
# 3. Create a study object and optimize the objective function.
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=100, timeout=600)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))


[I 2024-08-28 07:49:22,856] A new study created in memory with name: no-name-211e8e25-35c8-4cfd-b1fb-e2e31a751665
[I 2024-08-28 08:07:06,566] Trial 0 finished with value: 0.20382025131928802 and parameters: {'num_blocks': 5, 'out_features_0': 21, 'out_features_1': 111, 'out_features_2': 103, 'out_features_3': 113, 'out_features_4': 25, 'dropout_l4': 0.2291005622500366, 'activation_4': 'Tanh', 'optimizer': 'RMSprop', 'lr': 1.6553025287544413e-05, 'weight_decay': 1.5600765342073137e-05, 'momentum': 4.508246992366513e-06}. Best is trial 0 with value: 0.20382025131928802.


Study statistics: 
  Number of finished trials:  1
  Number of pruned trials:  0
  Number of complete trials:  1
Best trial:
  Value:  0.20382025131928802
  Params: 
    num_blocks: 5
    out_features_0: 21
    out_features_1: 111
    out_features_2: 103
    out_features_3: 113
    out_features_4: 25
    dropout_l4: 0.2291005622500366
    activation_4: Tanh
    optimizer: RMSprop
    lr: 1.6553025287544413e-05
    weight_decay: 1.5600765342073137e-05
    momentum: 4.508246992366513e-06


In [275]:
trial.params

{'num_blocks': 5,
 'out_features_0': 21,
 'out_features_1': 111,
 'out_features_2': 103,
 'out_features_3': 113,
 'out_features_4': 25,
 'dropout_l4': 0.2291005622500366,
 'activation_4': 'Tanh',
 'optimizer': 'RMSprop',
 'lr': 1.6553025287544413e-05,
 'weight_decay': 1.5600765342073137e-05,
 'momentum': 4.508246992366513e-06}

In [276]:
MODEL_CONFIG = {"linear": [], "dropout": 0, "optimizer": {}, "negative_slope": 0.01, "activation_name": ""}

for key, value in trial.params.items():
    if "out_features" in key:
        MODEL_CONFIG["linear"].append(value)
    elif "dropout" in key:
        MODEL_CONFIG["dropout"] = value
    elif "negative_slope" in key:
        MODEL_CONFIG["negative_slope"] = value
    elif "activation" in key:
        MODEL_CONFIG["activation_name"] = value
    elif "num_blocks" in key:
        continue
    else:
        MODEL_CONFIG["optimizer"][key] = value

In [277]:
MODEL_CONFIG

{'linear': [21, 111, 103, 113, 25],
 'dropout': 0.2291005622500366,
 'optimizer': {'optimizer': 'RMSprop',
  'lr': 1.6553025287544413e-05,
  'weight_decay': 1.5600765342073137e-05,
  'momentum': 4.508246992366513e-06},
 'negative_slope': 0.01,
 'activation_name': 'Tanh'}

RUNNING THE MODEL

In [278]:
# empty cache first
torch.cuda.empty_cache()

In [279]:
# scaling features
train_features_np[:,:len(non_category_features)] = scaler.fit_transform(train_features_np[:,:len(non_category_features)])
test_features_np[:,:len(non_category_features)] = scaler.transform(test_features_np[:,:len(non_category_features)])
            
# Create dataset instances
train_dataset = CustomDataset(train_features_np, train_labels.to_numpy())
test_dataset = CustomDataset(test_features_np, test_labels.to_numpy())

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=test_features_np.shape[0], shuffle=True)

In [280]:
model = MLP(d_in=train_features_np.shape[1], d_layers=MODEL_CONFIG["linear"], dropout=MODEL_CONFIG["dropout"], d_out=1, activation_name=MODEL_CONFIG["activation_name"], negative_slope=MODEL_CONFIG['negative_slope'])
model

MLP(
  (linear_layers): ModuleList(
    (0): Linear(in_features=8601, out_features=21, bias=True)
    (1): Linear(in_features=21, out_features=111, bias=True)
    (2): Linear(in_features=111, out_features=103, bias=True)
    (3): Linear(in_features=103, out_features=113, bias=True)
    (4): Linear(in_features=113, out_features=25, bias=True)
  )
  (dropout_layer): Dropout(p=0.2291005622500366, inplace=False)
  (head): Linear(in_features=25, out_features=1, bias=True)
  (activation_layer): Tanh()
)

In [281]:
model = nn.DataParallel(model, device_ids = DEVICE_LIST)
model.to(DEVICE)

DataParallel(
  (module): MLP(
    (linear_layers): ModuleList(
      (0): Linear(in_features=8601, out_features=21, bias=True)
      (1): Linear(in_features=21, out_features=111, bias=True)
      (2): Linear(in_features=111, out_features=103, bias=True)
      (3): Linear(in_features=103, out_features=113, bias=True)
      (4): Linear(in_features=113, out_features=25, bias=True)
    )
    (dropout_layer): Dropout(p=0.2291005622500366, inplace=False)
    (head): Linear(in_features=25, out_features=1, bias=True)
    (activation_layer): Tanh()
  )
)

In [282]:
# define optimizer
optim_config = deepcopy(MODEL_CONFIG["optimizer"])
del optim_config["optimizer"]

optimizer = getattr(optim, MODEL_CONFIG["optimizer"]["optimizer"])(model.parameters(), **optim_config)
optimizer

RMSprop (
Parameter Group 0
    alpha: 0.99
    centered: False
    differentiable: False
    eps: 1e-08
    foreach: None
    lr: 1.6553025287544413e-05
    maximize: False
    momentum: 4.508246992366513e-06
    weight_decay: 1.5600765342073137e-05
)

In [283]:
EPOCH = 200
criterion = nn.MSELoss()

for ep in tqdm(range(EPOCH)):

    model.train()
    running_loss = 0.0
    for i, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
        
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)

        loss = criterion(outputs, targets)
        
        loss.backward()

        optimizer.step()

        # print statistics
        running_loss += loss.item() * inputs.size(0)
        if i % 100 == 99:    # print every 100 mini-batches
            print(f'[{ep + 1}, {i + 1:5d}] loss: {running_loss / 100:.3f}')


    train_loss = running_loss  / len(train_loader.dataset)
    print(f'Epoch [{ep+1}], Train Loss: {train_loss:.4f}')

# save the best model
torch.save(model.state_dict(), MODEL_PATH)

  0%|          | 0/200 [00:00<?, ?it/s]

[1,   100] loss: 0.178
[1,   200] loss: 0.327
[1,   300] loss: 0.455
[1,   400] loss: 0.594
[1,   500] loss: 0.727
[1,   600] loss: 0.860
[1,   700] loss: 0.994
[1,   800] loss: 1.139
[1,   900] loss: 1.236
[1,  1000] loss: 1.355
[1,  1100] loss: 1.479


  0%|          | 1/200 [00:02<09:06,  2.75s/it]

[1,  1200] loss: 1.585
Epoch [1], Train Loss: 0.1292
[2,   100] loss: 0.124
[2,   200] loss: 0.235
[2,   300] loss: 0.337
[2,   400] loss: 0.427
[2,   500] loss: 0.528
[2,   600] loss: 0.627
[2,   700] loss: 0.728
[2,   800] loss: 0.836
[2,   900] loss: 0.948
[2,  1000] loss: 1.039
[2,  1100] loss: 1.156


  1%|          | 2/200 [00:05<09:04,  2.75s/it]

[2,  1200] loss: 1.276
Epoch [2], Train Loss: 0.1067
[3,   100] loss: 0.099
[3,   200] loss: 0.198
[3,   300] loss: 0.301
[3,   400] loss: 0.406
[3,   500] loss: 0.509
[3,   600] loss: 0.625
[3,   700] loss: 0.725
[3,   800] loss: 0.832
[3,   900] loss: 0.912
[3,  1000] loss: 0.985
[3,  1100] loss: 1.105


  2%|▏         | 3/200 [00:08<09:01,  2.75s/it]

[3,  1200] loss: 1.200
Epoch [3], Train Loss: 0.0993
[4,   100] loss: 0.107
[4,   200] loss: 0.209
[4,   300] loss: 0.322
[4,   400] loss: 0.413
[4,   500] loss: 0.519
[4,   600] loss: 0.613
[4,   700] loss: 0.718
[4,   800] loss: 0.828
[4,   900] loss: 0.942
[4,  1000] loss: 1.046
[4,  1100] loss: 1.122


  2%|▏         | 4/200 [00:11<09:29,  2.91s/it]

[4,  1200] loss: 1.198
Epoch [4], Train Loss: 0.0995
[5,   100] loss: 0.091
[5,   200] loss: 0.215
[5,   300] loss: 0.303
[5,   400] loss: 0.376
[5,   500] loss: 0.475
[5,   600] loss: 0.567
[5,   700] loss: 0.657
[5,   800] loss: 0.745
[5,   900] loss: 0.847
[5,  1000] loss: 0.936
[5,  1100] loss: 1.035


  2%|▎         | 5/200 [00:14<09:14,  2.84s/it]

[5,  1200] loss: 1.126
Epoch [5], Train Loss: 0.0940
[6,   100] loss: 0.103
[6,   200] loss: 0.204
[6,   300] loss: 0.284
[6,   400] loss: 0.399
[6,   500] loss: 0.508
[6,   600] loss: 0.610
[6,   700] loss: 0.691
[6,   800] loss: 0.766
[6,   900] loss: 0.863
[6,  1000] loss: 0.950
[6,  1100] loss: 1.039


  3%|▎         | 6/200 [00:16<09:04,  2.81s/it]

[6,  1200] loss: 1.122
Epoch [6], Train Loss: 0.0940
[7,   100] loss: 0.105
[7,   200] loss: 0.187
[7,   300] loss: 0.279
[7,   400] loss: 0.368
[7,   500] loss: 0.475
[7,   600] loss: 0.558
[7,   700] loss: 0.656
[7,   800] loss: 0.752
[7,   900] loss: 0.858
[7,  1000] loss: 0.946
[7,  1100] loss: 1.027


  4%|▎         | 7/200 [00:19<09:02,  2.81s/it]

[7,  1200] loss: 1.117
Epoch [7], Train Loss: 0.0938
[8,   100] loss: 0.106
[8,   200] loss: 0.178
[8,   300] loss: 0.287
[8,   400] loss: 0.374
[8,   500] loss: 0.461
[8,   600] loss: 0.548
[8,   700] loss: 0.640
[8,   800] loss: 0.738
[8,   900] loss: 0.848
[8,  1000] loss: 0.936
[8,  1100] loss: 1.025


  4%|▍         | 8/200 [00:22<08:56,  2.80s/it]

[8,  1200] loss: 1.128
Epoch [8], Train Loss: 0.0930
[9,   100] loss: 0.111
[9,   200] loss: 0.190
[9,   300] loss: 0.287
[9,   400] loss: 0.364
[9,   500] loss: 0.465
[9,   600] loss: 0.550
[9,   700] loss: 0.634
[9,   800] loss: 0.729
[9,   900] loss: 0.828
[9,  1000] loss: 0.895
[9,  1100] loss: 0.989


  4%|▍         | 9/200 [00:25<08:49,  2.77s/it]

[9,  1200] loss: 1.079
Epoch [9], Train Loss: 0.0884
[10,   100] loss: 0.068
[10,   200] loss: 0.165
[10,   300] loss: 0.234
[10,   400] loss: 0.326
[10,   500] loss: 0.398
[10,   600] loss: 0.479
[10,   700] loss: 0.559
[10,   800] loss: 0.640
[10,   900] loss: 0.727
[10,  1000] loss: 0.817
[10,  1100] loss: 0.923


  5%|▌         | 10/200 [00:27<08:45,  2.77s/it]

[10,  1200] loss: 1.025
Epoch [10], Train Loss: 0.0865
[11,   100] loss: 0.096
[11,   200] loss: 0.173
[11,   300] loss: 0.271
[11,   400] loss: 0.342
[11,   500] loss: 0.428
[11,   600] loss: 0.511
[11,   700] loss: 0.619
[11,   800] loss: 0.700
[11,   900] loss: 0.785
[11,  1000] loss: 0.874
[11,  1100] loss: 0.962
[11,  1200] loss: 1.037


  6%|▌         | 11/200 [00:30<08:42,  2.77s/it]

Epoch [11], Train Loss: 0.0852
[12,   100] loss: 0.078
[12,   200] loss: 0.168
[12,   300] loss: 0.249
[12,   400] loss: 0.332
[12,   500] loss: 0.419
[12,   600] loss: 0.509
[12,   700] loss: 0.589
[12,   800] loss: 0.664
[12,   900] loss: 0.758
[12,  1000] loss: 0.833
[12,  1100] loss: 0.920


  6%|▌         | 12/200 [00:33<08:38,  2.76s/it]

[12,  1200] loss: 1.009
Epoch [12], Train Loss: 0.0834
[13,   100] loss: 0.095
[13,   200] loss: 0.159
[13,   300] loss: 0.237
[13,   400] loss: 0.326
[13,   500] loss: 0.390
[13,   600] loss: 0.482
[13,   700] loss: 0.562
[13,   800] loss: 0.645
[13,   900] loss: 0.760
[13,  1000] loss: 0.862
[13,  1100] loss: 0.941


  6%|▋         | 13/200 [00:36<08:35,  2.75s/it]

[13,  1200] loss: 1.048
Epoch [13], Train Loss: 0.0874
[14,   100] loss: 0.074
[14,   200] loss: 0.156
[14,   300] loss: 0.244
[14,   400] loss: 0.313
[14,   500] loss: 0.391
[14,   600] loss: 0.484
[14,   700] loss: 0.591
[14,   800] loss: 0.651
[14,   900] loss: 0.729
[14,  1000] loss: 0.826
[14,  1100] loss: 0.925


  7%|▋         | 14/200 [00:38<08:31,  2.75s/it]

[14,  1200] loss: 1.009
Epoch [14], Train Loss: 0.0856
[15,   100] loss: 0.090
[15,   200] loss: 0.167
[15,   300] loss: 0.232
[15,   400] loss: 0.324
[15,   500] loss: 0.397
[15,   600] loss: 0.485
[15,   700] loss: 0.560
[15,   800] loss: 0.661
[15,   900] loss: 0.749
[15,  1000] loss: 0.822
[15,  1100] loss: 0.924
[15,  1200] loss: 0.990


  8%|▊         | 15/200 [00:41<08:28,  2.75s/it]

Epoch [15], Train Loss: 0.0832
[16,   100] loss: 0.076
[16,   200] loss: 0.161
[16,   300] loss: 0.240
[16,   400] loss: 0.326
[16,   500] loss: 0.413
[16,   600] loss: 0.491
[16,   700] loss: 0.574
[16,   800] loss: 0.651
[16,   900] loss: 0.743
[16,  1000] loss: 0.825
[16,  1100] loss: 0.920


  8%|▊         | 16/200 [00:44<08:26,  2.75s/it]

[16,  1200] loss: 1.013
Epoch [16], Train Loss: 0.0838
[17,   100] loss: 0.083
[17,   200] loss: 0.178
[17,   300] loss: 0.255
[17,   400] loss: 0.342
[17,   500] loss: 0.419
[17,   600] loss: 0.525
[17,   700] loss: 0.599
[17,   800] loss: 0.669
[17,   900] loss: 0.770
[17,  1000] loss: 0.837
[17,  1100] loss: 0.934


  8%|▊         | 17/200 [00:47<08:23,  2.75s/it]

[17,  1200] loss: 1.014
Epoch [17], Train Loss: 0.0835
[18,   100] loss: 0.086
[18,   200] loss: 0.162
[18,   300] loss: 0.235
[18,   400] loss: 0.329
[18,   500] loss: 0.414
[18,   600] loss: 0.472
[18,   700] loss: 0.557
[18,   800] loss: 0.647
[18,   900] loss: 0.734
[18,  1000] loss: 0.836
[18,  1100] loss: 0.918
[18,  1200] loss: 1.004


  9%|▉         | 18/200 [00:49<08:21,  2.76s/it]

Epoch [18], Train Loss: 0.0832
[19,   100] loss: 0.075
[19,   200] loss: 0.140
[19,   300] loss: 0.230
[19,   400] loss: 0.318
[19,   500] loss: 0.389
[19,   600] loss: 0.475
[19,   700] loss: 0.554
[19,   800] loss: 0.642
[19,   900] loss: 0.727
[19,  1000] loss: 0.818
[19,  1100] loss: 0.918
[19,  1200] loss: 1.007


 10%|▉         | 19/200 [00:52<08:21,  2.77s/it]

Epoch [19], Train Loss: 0.0842
[20,   100] loss: 0.087
[20,   200] loss: 0.164
[20,   300] loss: 0.234
[20,   400] loss: 0.325
[20,   500] loss: 0.400
[20,   600] loss: 0.477
[20,   700] loss: 0.557
[20,   800] loss: 0.628
[20,   900] loss: 0.727
[20,  1000] loss: 0.815
[20,  1100] loss: 0.887


 10%|█         | 20/200 [00:55<08:18,  2.77s/it]

[20,  1200] loss: 0.962
Epoch [20], Train Loss: 0.0802
[21,   100] loss: 0.073
[21,   200] loss: 0.146
[21,   300] loss: 0.216
[21,   400] loss: 0.300
[21,   500] loss: 0.364
[21,   600] loss: 0.455
[21,   700] loss: 0.546
[21,   800] loss: 0.611
[21,   900] loss: 0.683
[21,  1000] loss: 0.746
[21,  1100] loss: 0.840


 10%|█         | 21/200 [00:58<08:14,  2.76s/it]

[21,  1200] loss: 0.921
Epoch [21], Train Loss: 0.0779
[22,   100] loss: 0.102
[22,   200] loss: 0.197
[22,   300] loss: 0.255
[22,   400] loss: 0.321
[22,   500] loss: 0.412
[22,   600] loss: 0.493
[22,   700] loss: 0.590
[22,   800] loss: 0.662
[22,   900] loss: 0.752
[22,  1000] loss: 0.825
[22,  1100] loss: 0.914


 11%|█         | 22/200 [01:01<08:12,  2.77s/it]

[22,  1200] loss: 0.972
Epoch [22], Train Loss: 0.0806
[23,   100] loss: 0.082
[23,   200] loss: 0.147
[23,   300] loss: 0.219
[23,   400] loss: 0.297
[23,   500] loss: 0.369
[23,   600] loss: 0.443
[23,   700] loss: 0.518
[23,   800] loss: 0.596
[23,   900] loss: 0.672
[23,  1000] loss: 0.740
[23,  1100] loss: 0.833


 12%|█▏        | 23/200 [01:03<08:08,  2.76s/it]

[23,  1200] loss: 0.915
Epoch [23], Train Loss: 0.0769
[24,   100] loss: 0.097
[24,   200] loss: 0.168
[24,   300] loss: 0.252
[24,   400] loss: 0.329
[24,   500] loss: 0.418
[24,   600] loss: 0.505
[24,   700] loss: 0.576
[24,   800] loss: 0.637
[24,   900] loss: 0.718
[24,  1000] loss: 0.783
[24,  1100] loss: 0.871


 12%|█▏        | 24/200 [01:06<08:06,  2.76s/it]

[24,  1200] loss: 0.947
Epoch [24], Train Loss: 0.0783
[25,   100] loss: 0.084
[25,   200] loss: 0.148
[25,   300] loss: 0.254
[25,   400] loss: 0.336
[25,   500] loss: 0.418
[25,   600] loss: 0.480
[25,   700] loss: 0.556
[25,   800] loss: 0.652
[25,   900] loss: 0.734
[25,  1000] loss: 0.794
[25,  1100] loss: 0.862


 12%|█▎        | 25/200 [01:09<08:02,  2.76s/it]

[25,  1200] loss: 0.946
Epoch [25], Train Loss: 0.0793
[26,   100] loss: 0.097
[26,   200] loss: 0.183
[26,   300] loss: 0.268
[26,   400] loss: 0.339
[26,   500] loss: 0.409
[26,   600] loss: 0.488
[26,   700] loss: 0.551
[26,   800] loss: 0.611
[26,   900] loss: 0.703
[26,  1000] loss: 0.773
[26,  1100] loss: 0.851


 13%|█▎        | 26/200 [01:12<07:59,  2.76s/it]

[26,  1200] loss: 0.938
Epoch [26], Train Loss: 0.0780
[27,   100] loss: 0.085
[27,   200] loss: 0.160
[27,   300] loss: 0.248
[27,   400] loss: 0.336
[27,   500] loss: 0.418
[27,   600] loss: 0.479
[27,   700] loss: 0.569
[27,   800] loss: 0.640
[27,   900] loss: 0.700
[27,  1000] loss: 0.796
[27,  1100] loss: 0.867


 14%|█▎        | 27/200 [01:14<07:56,  2.76s/it]

[27,  1200] loss: 0.931
Epoch [27], Train Loss: 0.0774
[28,   100] loss: 0.067
[28,   200] loss: 0.139
[28,   300] loss: 0.210
[28,   400] loss: 0.298
[28,   500] loss: 0.358
[28,   600] loss: 0.427
[28,   700] loss: 0.507
[28,   800] loss: 0.585
[28,   900] loss: 0.678
[28,  1000] loss: 0.757
[28,  1100] loss: 0.832


 14%|█▍        | 28/200 [01:17<07:54,  2.76s/it]

[28,  1200] loss: 0.896
Epoch [28], Train Loss: 0.0759
[29,   100] loss: 0.079
[29,   200] loss: 0.151
[29,   300] loss: 0.231
[29,   400] loss: 0.307
[29,   500] loss: 0.374
[29,   600] loss: 0.444
[29,   700] loss: 0.526
[29,   800] loss: 0.581
[29,   900] loss: 0.669
[29,  1000] loss: 0.730
[29,  1100] loss: 0.815


 14%|█▍        | 29/200 [01:20<07:51,  2.76s/it]

[29,  1200] loss: 0.893
Epoch [29], Train Loss: 0.0746
[30,   100] loss: 0.063
[30,   200] loss: 0.154
[30,   300] loss: 0.224
[30,   400] loss: 0.321
[30,   500] loss: 0.390
[30,   600] loss: 0.450
[30,   700] loss: 0.524
[30,   800] loss: 0.611
[30,   900] loss: 0.677
[30,  1000] loss: 0.760
[30,  1100] loss: 0.834
[30,  1200] loss: 0.912


 15%|█▌        | 30/200 [01:23<07:48,  2.76s/it]

Epoch [30], Train Loss: 0.0768
[31,   100] loss: 0.088
[31,   200] loss: 0.179
[31,   300] loss: 0.248
[31,   400] loss: 0.341
[31,   500] loss: 0.422
[31,   600] loss: 0.492
[31,   700] loss: 0.566
[31,   800] loss: 0.629
[31,   900] loss: 0.706
[31,  1000] loss: 0.776
[31,  1100] loss: 0.847


 16%|█▌        | 31/200 [01:25<07:48,  2.77s/it]

[31,  1200] loss: 0.932
Epoch [31], Train Loss: 0.0776
[32,   100] loss: 0.074
[32,   200] loss: 0.152
[32,   300] loss: 0.212
[32,   400] loss: 0.279
[32,   500] loss: 0.362
[32,   600] loss: 0.449
[32,   700] loss: 0.527
[32,   800] loss: 0.607
[32,   900] loss: 0.711
[32,  1000] loss: 0.779
[32,  1100] loss: 0.846
[32,  1200] loss: 0.930


 16%|█▌        | 32/200 [01:29<08:07,  2.90s/it]

Epoch [32], Train Loss: 0.0771
[33,   100] loss: 0.062
[33,   200] loss: 0.156
[33,   300] loss: 0.217
[33,   400] loss: 0.284
[33,   500] loss: 0.380
[33,   600] loss: 0.466
[33,   700] loss: 0.540
[33,   800] loss: 0.607
[33,   900] loss: 0.680
[33,  1000] loss: 0.764
[33,  1100] loss: 0.824


 16%|█▋        | 33/200 [01:31<07:57,  2.86s/it]

[33,  1200] loss: 0.889
Epoch [33], Train Loss: 0.0736
[34,   100] loss: 0.080
[34,   200] loss: 0.161
[34,   300] loss: 0.249
[34,   400] loss: 0.324
[34,   500] loss: 0.406
[34,   600] loss: 0.483
[34,   700] loss: 0.551
[34,   800] loss: 0.604
[34,   900] loss: 0.678
[34,  1000] loss: 0.769
[34,  1100] loss: 0.847


 17%|█▋        | 34/200 [01:34<07:49,  2.83s/it]

[34,  1200] loss: 0.903
Epoch [34], Train Loss: 0.0744
[35,   100] loss: 0.067
[35,   200] loss: 0.134
[35,   300] loss: 0.220
[35,   400] loss: 0.286
[35,   500] loss: 0.367
[35,   600] loss: 0.435
[35,   700] loss: 0.525
[35,   800] loss: 0.597
[35,   900] loss: 0.658
[35,  1000] loss: 0.741
[35,  1100] loss: 0.840


 18%|█▊        | 35/200 [01:37<07:42,  2.80s/it]

[35,  1200] loss: 0.910
Epoch [35], Train Loss: 0.0752
[36,   100] loss: 0.070
[36,   200] loss: 0.151
[36,   300] loss: 0.230
[36,   400] loss: 0.316
[36,   500] loss: 0.396
[36,   600] loss: 0.475
[36,   700] loss: 0.559
[36,   800] loss: 0.622
[36,   900] loss: 0.692
[36,  1000] loss: 0.773
[36,  1100] loss: 0.864


 18%|█▊        | 36/200 [01:40<07:37,  2.79s/it]

[36,  1200] loss: 0.931
Epoch [36], Train Loss: 0.0771
[37,   100] loss: 0.072
[37,   200] loss: 0.140
[37,   300] loss: 0.210
[37,   400] loss: 0.292
[37,   500] loss: 0.373
[37,   600] loss: 0.449
[37,   700] loss: 0.524
[37,   800] loss: 0.594
[37,   900] loss: 0.667
[37,  1000] loss: 0.749
[37,  1100] loss: 0.817
[37,  1200] loss: 0.900
Epoch [37], Train Loss: 0.0739


 18%|█▊        | 37/200 [01:42<07:34,  2.79s/it]

[38,   100] loss: 0.082
[38,   200] loss: 0.152
[38,   300] loss: 0.228
[38,   400] loss: 0.305
[38,   500] loss: 0.364
[38,   600] loss: 0.436
[38,   700] loss: 0.511
[38,   800] loss: 0.595
[38,   900] loss: 0.663
[38,  1000] loss: 0.736
[38,  1100] loss: 0.831


 19%|█▉        | 38/200 [01:45<07:32,  2.79s/it]

[38,  1200] loss: 0.915
Epoch [38], Train Loss: 0.0758
[39,   100] loss: 0.066
[39,   200] loss: 0.128
[39,   300] loss: 0.209
[39,   400] loss: 0.272
[39,   500] loss: 0.337
[39,   600] loss: 0.437
[39,   700] loss: 0.510
[39,   800] loss: 0.580
[39,   900] loss: 0.661
[39,  1000] loss: 0.730
[39,  1100] loss: 0.811


 20%|█▉        | 39/200 [01:48<07:27,  2.78s/it]

[39,  1200] loss: 0.877
Epoch [39], Train Loss: 0.0731
[40,   100] loss: 0.076
[40,   200] loss: 0.146
[40,   300] loss: 0.205
[40,   400] loss: 0.264
[40,   500] loss: 0.333
[40,   600] loss: 0.406
[40,   700] loss: 0.495
[40,   800] loss: 0.581
[40,   900] loss: 0.649
[40,  1000] loss: 0.707
[40,  1100] loss: 0.788


 20%|██        | 40/200 [01:51<07:23,  2.77s/it]

[40,  1200] loss: 0.872
Epoch [40], Train Loss: 0.0729
[41,   100] loss: 0.072
[41,   200] loss: 0.162
[41,   300] loss: 0.241
[41,   400] loss: 0.311
[41,   500] loss: 0.397
[41,   600] loss: 0.450
[41,   700] loss: 0.516
[41,   800] loss: 0.605
[41,   900] loss: 0.676
[41,  1000] loss: 0.752
[41,  1100] loss: 0.829


 20%|██        | 41/200 [01:53<07:20,  2.77s/it]

[41,  1200] loss: 0.897
Epoch [41], Train Loss: 0.0744
[42,   100] loss: 0.064
[42,   200] loss: 0.143
[42,   300] loss: 0.214
[42,   400] loss: 0.300
[42,   500] loss: 0.372
[42,   600] loss: 0.436
[42,   700] loss: 0.505
[42,   800] loss: 0.574
[42,   900] loss: 0.646
[42,  1000] loss: 0.712
[42,  1100] loss: 0.803


 21%|██        | 42/200 [01:56<07:17,  2.77s/it]

[42,  1200] loss: 0.865
Epoch [42], Train Loss: 0.0718
[43,   100] loss: 0.078
[43,   200] loss: 0.144
[43,   300] loss: 0.223
[43,   400] loss: 0.306
[43,   500] loss: 0.375
[43,   600] loss: 0.446
[43,   700] loss: 0.523
[43,   800] loss: 0.587
[43,   900] loss: 0.645
[43,  1000] loss: 0.720
[43,  1100] loss: 0.788
[43,  1200] loss: 0.859


 22%|██▏       | 43/200 [01:59<07:15,  2.77s/it]

Epoch [43], Train Loss: 0.0720
[44,   100] loss: 0.072
[44,   200] loss: 0.137
[44,   300] loss: 0.206
[44,   400] loss: 0.289
[44,   500] loss: 0.363
[44,   600] loss: 0.459
[44,   700] loss: 0.520
[44,   800] loss: 0.579
[44,   900] loss: 0.639
[44,  1000] loss: 0.710
[44,  1100] loss: 0.786


 22%|██▏       | 44/200 [02:02<07:25,  2.85s/it]

[44,  1200] loss: 0.865
Epoch [44], Train Loss: 0.0725
[45,   100] loss: 0.081
[45,   200] loss: 0.158
[45,   300] loss: 0.248
[45,   400] loss: 0.304
[45,   500] loss: 0.379
[45,   600] loss: 0.438
[45,   700] loss: 0.493
[45,   800] loss: 0.567
[45,   900] loss: 0.644
[45,  1000] loss: 0.719
[45,  1100] loss: 0.780


 22%|██▎       | 45/200 [02:05<07:18,  2.83s/it]

[45,  1200] loss: 0.857
Epoch [45], Train Loss: 0.0719
[46,   100] loss: 0.066
[46,   200] loss: 0.153
[46,   300] loss: 0.230
[46,   400] loss: 0.307
[46,   500] loss: 0.368
[46,   600] loss: 0.446
[46,   700] loss: 0.522
[46,   800] loss: 0.600
[46,   900] loss: 0.663
[46,  1000] loss: 0.724
[46,  1100] loss: 0.797


 23%|██▎       | 46/200 [02:08<07:18,  2.85s/it]

[46,  1200] loss: 0.858
Epoch [46], Train Loss: 0.0720
[47,   100] loss: 0.060
[47,   200] loss: 0.134
[47,   300] loss: 0.216
[47,   400] loss: 0.295
[47,   500] loss: 0.357
[47,   600] loss: 0.437
[47,   700] loss: 0.493
[47,   800] loss: 0.555
[47,   900] loss: 0.625
[47,  1000] loss: 0.701
[47,  1100] loss: 0.781


 24%|██▎       | 47/200 [02:11<07:16,  2.86s/it]

[47,  1200] loss: 0.850
Epoch [47], Train Loss: 0.0708
[48,   100] loss: 0.072
[48,   200] loss: 0.148
[48,   300] loss: 0.231
[48,   400] loss: 0.299
[48,   500] loss: 0.365
[48,   600] loss: 0.456
[48,   700] loss: 0.527
[48,   800] loss: 0.599
[48,   900] loss: 0.649
[48,  1000] loss: 0.729
[48,  1100] loss: 0.798


 24%|██▍       | 48/200 [02:13<07:10,  2.83s/it]

[48,  1200] loss: 0.861
Epoch [48], Train Loss: 0.0698
[49,   100] loss: 0.073
[49,   200] loss: 0.138
[49,   300] loss: 0.196
[49,   400] loss: 0.270
[49,   500] loss: 0.356
[49,   600] loss: 0.426
[49,   700] loss: 0.480
[49,   800] loss: 0.558
[49,   900] loss: 0.621
[49,  1000] loss: 0.698
[49,  1100] loss: 0.755


 24%|██▍       | 49/200 [02:16<07:04,  2.81s/it]

[49,  1200] loss: 0.828
Epoch [49], Train Loss: 0.0685
[50,   100] loss: 0.074
[50,   200] loss: 0.150
[50,   300] loss: 0.225
[50,   400] loss: 0.277
[50,   500] loss: 0.337
[50,   600] loss: 0.421
[50,   700] loss: 0.507
[50,   800] loss: 0.569
[50,   900] loss: 0.625
[50,  1000] loss: 0.682
[50,  1100] loss: 0.735


 25%|██▌       | 50/200 [02:19<06:59,  2.80s/it]

[50,  1200] loss: 0.817
Epoch [50], Train Loss: 0.0691
[51,   100] loss: 0.059
[51,   200] loss: 0.143
[51,   300] loss: 0.207
[51,   400] loss: 0.270
[51,   500] loss: 0.349
[51,   600] loss: 0.422
[51,   700] loss: 0.484
[51,   800] loss: 0.549
[51,   900] loss: 0.635
[51,  1000] loss: 0.711
[51,  1100] loss: 0.765


 26%|██▌       | 51/200 [02:22<06:55,  2.79s/it]

[51,  1200] loss: 0.844
Epoch [51], Train Loss: 0.0700
[52,   100] loss: 0.095
[52,   200] loss: 0.159
[52,   300] loss: 0.224
[52,   400] loss: 0.296
[52,   500] loss: 0.361
[52,   600] loss: 0.426
[52,   700] loss: 0.482
[52,   800] loss: 0.565
[52,   900] loss: 0.627
[52,  1000] loss: 0.704
[52,  1100] loss: 0.793


 26%|██▌       | 52/200 [02:24<06:51,  2.78s/it]

[52,  1200] loss: 0.859
Epoch [52], Train Loss: 0.0717
[53,   100] loss: 0.063
[53,   200] loss: 0.144
[53,   300] loss: 0.223
[53,   400] loss: 0.278
[53,   500] loss: 0.337
[53,   600] loss: 0.399
[53,   700] loss: 0.464
[53,   800] loss: 0.532
[53,   900] loss: 0.599
[53,  1000] loss: 0.659
[53,  1100] loss: 0.725


 26%|██▋       | 53/200 [02:27<06:48,  2.78s/it]

[53,  1200] loss: 0.804
Epoch [53], Train Loss: 0.0673
[54,   100] loss: 0.069
[54,   200] loss: 0.132
[54,   300] loss: 0.181
[54,   400] loss: 0.274
[54,   500] loss: 0.337
[54,   600] loss: 0.415
[54,   700] loss: 0.479
[54,   800] loss: 0.546
[54,   900] loss: 0.617
[54,  1000] loss: 0.687
[54,  1100] loss: 0.755


 27%|██▋       | 54/200 [02:30<06:45,  2.78s/it]

[54,  1200] loss: 0.833
Epoch [54], Train Loss: 0.0696
[55,   100] loss: 0.059
[55,   200] loss: 0.123
[55,   300] loss: 0.189
[55,   400] loss: 0.264
[55,   500] loss: 0.330
[55,   600] loss: 0.400
[55,   700] loss: 0.476
[55,   800] loss: 0.553
[55,   900] loss: 0.611
[55,  1000] loss: 0.688
[55,  1100] loss: 0.754
[55,  1200] loss: 0.824


 28%|██▊       | 55/200 [02:33<06:46,  2.80s/it]

Epoch [55], Train Loss: 0.0689
[56,   100] loss: 0.064
[56,   200] loss: 0.134
[56,   300] loss: 0.201
[56,   400] loss: 0.258
[56,   500] loss: 0.340
[56,   600] loss: 0.406
[56,   700] loss: 0.468
[56,   800] loss: 0.545
[56,   900] loss: 0.605
[56,  1000] loss: 0.676
[56,  1100] loss: 0.751


 28%|██▊       | 56/200 [02:36<06:49,  2.84s/it]

[56,  1200] loss: 0.824
Epoch [56], Train Loss: 0.0689
[57,   100] loss: 0.068
[57,   200] loss: 0.135
[57,   300] loss: 0.202
[57,   400] loss: 0.276
[57,   500] loss: 0.346
[57,   600] loss: 0.410
[57,   700] loss: 0.453
[57,   800] loss: 0.533
[57,   900] loss: 0.596
[57,  1000] loss: 0.668
[57,  1100] loss: 0.730


 28%|██▊       | 57/200 [02:39<06:43,  2.82s/it]

[57,  1200] loss: 0.794
Epoch [57], Train Loss: 0.0661
[58,   100] loss: 0.076
[58,   200] loss: 0.149
[58,   300] loss: 0.218
[58,   400] loss: 0.283
[58,   500] loss: 0.339
[58,   600] loss: 0.413
[58,   700] loss: 0.478
[58,   800] loss: 0.559
[58,   900] loss: 0.622
[58,  1000] loss: 0.692
[58,  1100] loss: 0.750


 29%|██▉       | 58/200 [02:41<06:38,  2.81s/it]

[58,  1200] loss: 0.806
Epoch [58], Train Loss: 0.0678
[59,   100] loss: 0.063
[59,   200] loss: 0.128
[59,   300] loss: 0.211
[59,   400] loss: 0.289
[59,   500] loss: 0.348
[59,   600] loss: 0.405
[59,   700] loss: 0.477
[59,   800] loss: 0.539
[59,   900] loss: 0.622
[59,  1000] loss: 0.691
[59,  1100] loss: 0.768
[59,  1200] loss: 0.829


 30%|██▉       | 59/200 [02:44<06:33,  2.79s/it]

Epoch [59], Train Loss: 0.0687
[60,   100] loss: 0.059
[60,   200] loss: 0.125
[60,   300] loss: 0.177
[60,   400] loss: 0.220
[60,   500] loss: 0.276
[60,   600] loss: 0.334
[60,   700] loss: 0.386
[60,   800] loss: 0.447
[60,   900] loss: 0.528
[60,  1000] loss: 0.596
[60,  1100] loss: 0.676


 30%|███       | 60/200 [02:47<06:29,  2.78s/it]

[60,  1200] loss: 0.747
Epoch [60], Train Loss: 0.0643
[61,   100] loss: 0.053
[61,   200] loss: 0.143
[61,   300] loss: 0.216
[61,   400] loss: 0.296
[61,   500] loss: 0.354
[61,   600] loss: 0.418
[61,   700] loss: 0.484
[61,   800] loss: 0.550
[61,   900] loss: 0.600
[61,  1000] loss: 0.661
[61,  1100] loss: 0.727


 30%|███       | 61/200 [02:50<06:25,  2.77s/it]

[61,  1200] loss: 0.783
Epoch [61], Train Loss: 0.0664
[62,   100] loss: 0.068
[62,   200] loss: 0.138
[62,   300] loss: 0.223
[62,   400] loss: 0.288
[62,   500] loss: 0.338
[62,   600] loss: 0.404
[62,   700] loss: 0.469
[62,   800] loss: 0.521
[62,   900] loss: 0.585
[62,  1000] loss: 0.645
[62,  1100] loss: 0.716


 31%|███       | 62/200 [02:52<06:22,  2.77s/it]

[62,  1200] loss: 0.776
Epoch [62], Train Loss: 0.0654
[63,   100] loss: 0.063
[63,   200] loss: 0.121
[63,   300] loss: 0.185
[63,   400] loss: 0.243
[63,   500] loss: 0.330
[63,   600] loss: 0.398
[63,   700] loss: 0.463
[63,   800] loss: 0.541
[63,   900] loss: 0.612
[63,  1000] loss: 0.681
[63,  1100] loss: 0.739


 32%|███▏      | 63/200 [02:55<06:18,  2.77s/it]

[63,  1200] loss: 0.806
Epoch [63], Train Loss: 0.0671
[64,   100] loss: 0.066
[64,   200] loss: 0.133
[64,   300] loss: 0.210
[64,   400] loss: 0.266
[64,   500] loss: 0.350
[64,   600] loss: 0.420
[64,   700] loss: 0.496
[64,   800] loss: 0.551
[64,   900] loss: 0.610
[64,  1000] loss: 0.668
[64,  1100] loss: 0.733


 32%|███▏      | 64/200 [02:58<06:15,  2.76s/it]

[64,  1200] loss: 0.788
Epoch [64], Train Loss: 0.0654
[65,   100] loss: 0.058
[65,   200] loss: 0.122
[65,   300] loss: 0.200
[65,   400] loss: 0.268
[65,   500] loss: 0.341
[65,   600] loss: 0.412
[65,   700] loss: 0.480
[65,   800] loss: 0.540
[65,   900] loss: 0.597
[65,  1000] loss: 0.649
[65,  1100] loss: 0.694


 32%|███▎      | 65/200 [03:01<06:11,  2.75s/it]

[65,  1200] loss: 0.765
Epoch [65], Train Loss: 0.0644
[66,   100] loss: 0.080
[66,   200] loss: 0.129
[66,   300] loss: 0.181
[66,   400] loss: 0.248
[66,   500] loss: 0.328
[66,   600] loss: 0.404
[66,   700] loss: 0.470
[66,   800] loss: 0.529
[66,   900] loss: 0.589
[66,  1000] loss: 0.659
[66,  1100] loss: 0.725


 33%|███▎      | 66/200 [03:03<06:08,  2.75s/it]

[66,  1200] loss: 0.800
Epoch [66], Train Loss: 0.0663
[67,   100] loss: 0.068
[67,   200] loss: 0.124
[67,   300] loss: 0.206
[67,   400] loss: 0.268
[67,   500] loss: 0.319
[67,   600] loss: 0.391
[67,   700] loss: 0.446
[67,   800] loss: 0.513
[67,   900] loss: 0.584
[67,  1000] loss: 0.651
[67,  1100] loss: 0.704


 34%|███▎      | 67/200 [03:06<06:05,  2.75s/it]

[67,  1200] loss: 0.786
Epoch [67], Train Loss: 0.0659
[68,   100] loss: 0.060
[68,   200] loss: 0.106
[68,   300] loss: 0.166
[68,   400] loss: 0.225
[68,   500] loss: 0.299
[68,   600] loss: 0.370
[68,   700] loss: 0.426
[68,   800] loss: 0.509
[68,   900] loss: 0.574
[68,  1000] loss: 0.653
[68,  1100] loss: 0.722


 34%|███▍      | 68/200 [03:09<06:03,  2.75s/it]

[68,  1200] loss: 0.779
Epoch [68], Train Loss: 0.0643
[69,   100] loss: 0.067
[69,   200] loss: 0.120
[69,   300] loss: 0.166
[69,   400] loss: 0.235
[69,   500] loss: 0.297
[69,   600] loss: 0.367
[69,   700] loss: 0.430
[69,   800] loss: 0.500
[69,   900] loss: 0.555
[69,  1000] loss: 0.622
[69,  1100] loss: 0.706


 34%|███▍      | 69/200 [03:12<06:01,  2.76s/it]

[69,  1200] loss: 0.780
Epoch [69], Train Loss: 0.0649
[70,   100] loss: 0.068
[70,   200] loss: 0.133
[70,   300] loss: 0.181
[70,   400] loss: 0.253
[70,   500] loss: 0.328
[70,   600] loss: 0.401
[70,   700] loss: 0.467
[70,   800] loss: 0.533
[70,   900] loss: 0.600
[70,  1000] loss: 0.675
[70,  1100] loss: 0.718
[70,  1200] loss: 0.787


 35%|███▌      | 70/200 [03:15<06:03,  2.80s/it]

Epoch [70], Train Loss: 0.0656
[71,   100] loss: 0.071
[71,   200] loss: 0.142
[71,   300] loss: 0.210
[71,   400] loss: 0.263
[71,   500] loss: 0.341
[71,   600] loss: 0.399
[71,   700] loss: 0.458
[71,   800] loss: 0.522
[71,   900] loss: 0.574
[71,  1000] loss: 0.640
[71,  1100] loss: 0.708


 36%|███▌      | 71/200 [03:17<06:02,  2.81s/it]

[71,  1200] loss: 0.777
Epoch [71], Train Loss: 0.0650
[72,   100] loss: 0.072
[72,   200] loss: 0.132
[72,   300] loss: 0.192
[72,   400] loss: 0.256
[72,   500] loss: 0.309
[72,   600] loss: 0.370
[72,   700] loss: 0.433
[72,   800] loss: 0.505
[72,   900] loss: 0.585
[72,  1000] loss: 0.653
[72,  1100] loss: 0.719
[72,  1200] loss: 0.786


 36%|███▌      | 72/200 [03:20<05:59,  2.81s/it]

Epoch [72], Train Loss: 0.0650
[73,   100] loss: 0.066
[73,   200] loss: 0.129
[73,   300] loss: 0.192
[73,   400] loss: 0.259
[73,   500] loss: 0.312
[73,   600] loss: 0.385
[73,   700] loss: 0.446
[73,   800] loss: 0.510
[73,   900] loss: 0.576
[73,  1000] loss: 0.649
[73,  1100] loss: 0.721


 36%|███▋      | 73/200 [03:23<05:54,  2.79s/it]

[73,  1200] loss: 0.778
Epoch [73], Train Loss: 0.0644
[74,   100] loss: 0.071
[74,   200] loss: 0.139
[74,   300] loss: 0.184
[74,   400] loss: 0.251
[74,   500] loss: 0.311
[74,   600] loss: 0.395
[74,   700] loss: 0.461
[74,   800] loss: 0.519
[74,   900] loss: 0.583
[74,  1000] loss: 0.654
[74,  1100] loss: 0.727


 37%|███▋      | 74/200 [03:26<05:51,  2.79s/it]

[74,  1200] loss: 0.781
Epoch [74], Train Loss: 0.0663
[75,   100] loss: 0.066
[75,   200] loss: 0.132
[75,   300] loss: 0.194
[75,   400] loss: 0.249
[75,   500] loss: 0.314
[75,   600] loss: 0.382
[75,   700] loss: 0.460
[75,   800] loss: 0.514
[75,   900] loss: 0.559
[75,  1000] loss: 0.634
[75,  1100] loss: 0.696


 38%|███▊      | 75/200 [03:28<05:48,  2.79s/it]

[75,  1200] loss: 0.762
Epoch [75], Train Loss: 0.0635
[76,   100] loss: 0.060
[76,   200] loss: 0.126
[76,   300] loss: 0.191
[76,   400] loss: 0.250
[76,   500] loss: 0.297
[76,   600] loss: 0.362
[76,   700] loss: 0.428
[76,   800] loss: 0.497
[76,   900] loss: 0.566
[76,  1000] loss: 0.628
[76,  1100] loss: 0.686
[76,  1200] loss: 0.735


 38%|███▊      | 76/200 [03:32<06:06,  2.95s/it]

Epoch [76], Train Loss: 0.0623
[77,   100] loss: 0.050
[77,   200] loss: 0.131
[77,   300] loss: 0.204
[77,   400] loss: 0.270
[77,   500] loss: 0.328
[77,   600] loss: 0.381
[77,   700] loss: 0.444
[77,   800] loss: 0.510
[77,   900] loss: 0.577
[77,  1000] loss: 0.635
[77,  1100] loss: 0.691


 38%|███▊      | 77/200 [03:35<05:56,  2.90s/it]

[77,  1200] loss: 0.746
Epoch [77], Train Loss: 0.0633
[78,   100] loss: 0.066
[78,   200] loss: 0.121
[78,   300] loss: 0.170
[78,   400] loss: 0.231
[78,   500] loss: 0.285
[78,   600] loss: 0.370
[78,   700] loss: 0.433
[78,   800] loss: 0.509
[78,   900] loss: 0.560
[78,  1000] loss: 0.630
[78,  1100] loss: 0.697


 39%|███▉      | 78/200 [03:37<05:48,  2.86s/it]

[78,  1200] loss: 0.758
Epoch [78], Train Loss: 0.0622
[79,   100] loss: 0.053
[79,   200] loss: 0.131
[79,   300] loss: 0.178
[79,   400] loss: 0.242
[79,   500] loss: 0.290
[79,   600] loss: 0.337
[79,   700] loss: 0.394
[79,   800] loss: 0.461
[79,   900] loss: 0.521
[79,  1000] loss: 0.593
[79,  1100] loss: 0.652


 40%|███▉      | 79/200 [03:40<05:41,  2.83s/it]

[79,  1200] loss: 0.726
Epoch [79], Train Loss: 0.0617
[80,   100] loss: 0.069
[80,   200] loss: 0.129
[80,   300] loss: 0.194
[80,   400] loss: 0.252
[80,   500] loss: 0.312
[80,   600] loss: 0.390
[80,   700] loss: 0.454
[80,   800] loss: 0.536
[80,   900] loss: 0.597
[80,  1000] loss: 0.647
[80,  1100] loss: 0.724


 40%|████      | 80/200 [03:43<05:36,  2.80s/it]

[80,  1200] loss: 0.789
Epoch [80], Train Loss: 0.0654
[81,   100] loss: 0.075
[81,   200] loss: 0.149
[81,   300] loss: 0.223
[81,   400] loss: 0.277
[81,   500] loss: 0.339
[81,   600] loss: 0.391
[81,   700] loss: 0.464
[81,   800] loss: 0.522
[81,   900] loss: 0.575
[81,  1000] loss: 0.631
[81,  1100] loss: 0.688


 40%|████      | 81/200 [03:46<05:31,  2.79s/it]

[81,  1200] loss: 0.743
Epoch [81], Train Loss: 0.0618
[82,   100] loss: 0.066
[82,   200] loss: 0.121
[82,   300] loss: 0.188
[82,   400] loss: 0.249
[82,   500] loss: 0.319
[82,   600] loss: 0.375
[82,   700] loss: 0.442
[82,   800] loss: 0.522
[82,   900] loss: 0.577
[82,  1000] loss: 0.645
[82,  1100] loss: 0.695


 41%|████      | 82/200 [03:48<05:28,  2.78s/it]

[82,  1200] loss: 0.748
Epoch [82], Train Loss: 0.0639
[83,   100] loss: 0.058
[83,   200] loss: 0.121
[83,   300] loss: 0.186
[83,   400] loss: 0.245
[83,   500] loss: 0.317
[83,   600] loss: 0.372
[83,   700] loss: 0.441
[83,   800] loss: 0.508
[83,   900] loss: 0.565
[83,  1000] loss: 0.629
[83,  1100] loss: 0.688


 42%|████▏     | 83/200 [03:51<05:26,  2.79s/it]

[83,  1200] loss: 0.741
Epoch [83], Train Loss: 0.0618
[84,   100] loss: 0.060
[84,   200] loss: 0.109
[84,   300] loss: 0.165
[84,   400] loss: 0.227
[84,   500] loss: 0.290
[84,   600] loss: 0.356
[84,   700] loss: 0.411
[84,   800] loss: 0.466
[84,   900] loss: 0.533
[84,  1000] loss: 0.603
[84,  1100] loss: 0.690


 42%|████▏     | 84/200 [03:54<05:23,  2.79s/it]

[84,  1200] loss: 0.749
Epoch [84], Train Loss: 0.0632
[85,   100] loss: 0.061
[85,   200] loss: 0.122
[85,   300] loss: 0.179
[85,   400] loss: 0.270
[85,   500] loss: 0.331
[85,   600] loss: 0.388
[85,   700] loss: 0.451
[85,   800] loss: 0.512
[85,   900] loss: 0.577
[85,  1000] loss: 0.651
[85,  1100] loss: 0.704


 42%|████▎     | 85/200 [03:57<05:19,  2.78s/it]

[85,  1200] loss: 0.754
Epoch [85], Train Loss: 0.0625
[86,   100] loss: 0.078
[86,   200] loss: 0.154
[86,   300] loss: 0.209
[86,   400] loss: 0.276
[86,   500] loss: 0.337
[86,   600] loss: 0.395
[86,   700] loss: 0.451
[86,   800] loss: 0.523
[86,   900] loss: 0.581
[86,  1000] loss: 0.627
[86,  1100] loss: 0.708


 43%|████▎     | 86/200 [03:59<05:16,  2.78s/it]

[86,  1200] loss: 0.764
Epoch [86], Train Loss: 0.0635
[87,   100] loss: 0.051
[87,   200] loss: 0.101
[87,   300] loss: 0.168
[87,   400] loss: 0.224
[87,   500] loss: 0.280
[87,   600] loss: 0.344
[87,   700] loss: 0.426
[87,   800] loss: 0.492
[87,   900] loss: 0.566
[87,  1000] loss: 0.627
[87,  1100] loss: 0.690


 44%|████▎     | 87/200 [04:02<05:13,  2.77s/it]

[87,  1200] loss: 0.751
Epoch [87], Train Loss: 0.0622
[88,   100] loss: 0.044
[88,   200] loss: 0.130
[88,   300] loss: 0.175
[88,   400] loss: 0.230
[88,   500] loss: 0.292
[88,   600] loss: 0.352
[88,   700] loss: 0.419
[88,   800] loss: 0.492
[88,   900] loss: 0.559
[88,  1000] loss: 0.621
[88,  1100] loss: 0.675


 44%|████▍     | 88/200 [04:05<05:11,  2.78s/it]

[88,  1200] loss: 0.726
Epoch [88], Train Loss: 0.0598
[89,   100] loss: 0.062
[89,   200] loss: 0.127
[89,   300] loss: 0.187
[89,   400] loss: 0.244
[89,   500] loss: 0.313
[89,   600] loss: 0.363
[89,   700] loss: 0.411
[89,   800] loss: 0.462
[89,   900] loss: 0.524
[89,  1000] loss: 0.584
[89,  1100] loss: 0.647


 44%|████▍     | 89/200 [04:08<05:09,  2.79s/it]

[89,  1200] loss: 0.715
Epoch [89], Train Loss: 0.0594
[90,   100] loss: 0.059
[90,   200] loss: 0.111
[90,   300] loss: 0.175
[90,   400] loss: 0.225
[90,   500] loss: 0.295
[90,   600] loss: 0.377
[90,   700] loss: 0.434
[90,   800] loss: 0.496
[90,   900] loss: 0.553
[90,  1000] loss: 0.625
[90,  1100] loss: 0.675


 45%|████▌     | 90/200 [04:11<05:05,  2.78s/it]

[90,  1200] loss: 0.741
Epoch [90], Train Loss: 0.0626
[91,   100] loss: 0.057
[91,   200] loss: 0.127
[91,   300] loss: 0.186
[91,   400] loss: 0.249
[91,   500] loss: 0.314
[91,   600] loss: 0.368
[91,   700] loss: 0.419
[91,   800] loss: 0.494
[91,   900] loss: 0.560
[91,  1000] loss: 0.612
[91,  1100] loss: 0.663


 46%|████▌     | 91/200 [04:13<05:02,  2.78s/it]

[91,  1200] loss: 0.714
Epoch [91], Train Loss: 0.0595
[92,   100] loss: 0.057
[92,   200] loss: 0.114
[92,   300] loss: 0.187
[92,   400] loss: 0.250
[92,   500] loss: 0.305
[92,   600] loss: 0.366
[92,   700] loss: 0.419
[92,   800] loss: 0.481
[92,   900] loss: 0.536
[92,  1000] loss: 0.606
[92,  1100] loss: 0.667


 46%|████▌     | 92/200 [04:16<04:59,  2.77s/it]

[92,  1200] loss: 0.718
Epoch [92], Train Loss: 0.0618
[93,   100] loss: 0.061
[93,   200] loss: 0.105
[93,   300] loss: 0.155
[93,   400] loss: 0.225
[93,   500] loss: 0.289
[93,   600] loss: 0.342
[93,   700] loss: 0.419
[93,   800] loss: 0.488
[93,   900] loss: 0.555
[93,  1000] loss: 0.620
[93,  1100] loss: 0.682


 46%|████▋     | 93/200 [04:19<04:55,  2.76s/it]

[93,  1200] loss: 0.746
Epoch [93], Train Loss: 0.0628
[94,   100] loss: 0.055
[94,   200] loss: 0.121
[94,   300] loss: 0.176
[94,   400] loss: 0.243
[94,   500] loss: 0.288
[94,   600] loss: 0.338
[94,   700] loss: 0.393
[94,   800] loss: 0.452
[94,   900] loss: 0.524
[94,  1000] loss: 0.575
[94,  1100] loss: 0.629


 47%|████▋     | 94/200 [04:22<04:53,  2.76s/it]

[94,  1200] loss: 0.692
Epoch [94], Train Loss: 0.0585
[95,   100] loss: 0.061
[95,   200] loss: 0.109
[95,   300] loss: 0.170
[95,   400] loss: 0.233
[95,   500] loss: 0.274
[95,   600] loss: 0.339
[95,   700] loss: 0.395
[95,   800] loss: 0.474
[95,   900] loss: 0.549
[95,  1000] loss: 0.607
[95,  1100] loss: 0.669


 48%|████▊     | 95/200 [04:24<04:50,  2.76s/it]

[95,  1200] loss: 0.743
Epoch [95], Train Loss: 0.0613
[96,   100] loss: 0.056
[96,   200] loss: 0.114
[96,   300] loss: 0.177
[96,   400] loss: 0.231
[96,   500] loss: 0.296
[96,   600] loss: 0.348
[96,   700] loss: 0.413
[96,   800] loss: 0.481
[96,   900] loss: 0.532
[96,  1000] loss: 0.611
[96,  1100] loss: 0.666


 48%|████▊     | 96/200 [04:27<04:48,  2.77s/it]

[96,  1200] loss: 0.723
Epoch [96], Train Loss: 0.0606
[97,   100] loss: 0.070
[97,   200] loss: 0.124
[97,   300] loss: 0.188
[97,   400] loss: 0.239
[97,   500] loss: 0.320
[97,   600] loss: 0.392
[97,   700] loss: 0.447
[97,   800] loss: 0.497
[97,   900] loss: 0.561
[97,  1000] loss: 0.626
[97,  1100] loss: 0.691


 48%|████▊     | 97/200 [04:30<04:45,  2.77s/it]

[97,  1200] loss: 0.741
Epoch [97], Train Loss: 0.0600
[98,   100] loss: 0.056
[98,   200] loss: 0.113
[98,   300] loss: 0.182
[98,   400] loss: 0.247
[98,   500] loss: 0.316
[98,   600] loss: 0.362
[98,   700] loss: 0.422
[98,   800] loss: 0.475
[98,   900] loss: 0.532
[98,  1000] loss: 0.608
[98,  1100] loss: 0.657


 49%|████▉     | 98/200 [04:33<04:41,  2.76s/it]

[98,  1200] loss: 0.704
Epoch [98], Train Loss: 0.0590
[99,   100] loss: 0.058
[99,   200] loss: 0.118
[99,   300] loss: 0.180
[99,   400] loss: 0.232
[99,   500] loss: 0.290
[99,   600] loss: 0.341
[99,   700] loss: 0.395
[99,   800] loss: 0.472
[99,   900] loss: 0.549
[99,  1000] loss: 0.631
[99,  1100] loss: 0.701


 50%|████▉     | 99/200 [04:36<04:39,  2.77s/it]

[99,  1200] loss: 0.753
Epoch [99], Train Loss: 0.0619
[100,   100] loss: 0.072
[100,   200] loss: 0.124
[100,   300] loss: 0.187
[100,   400] loss: 0.235
[100,   500] loss: 0.297
[100,   600] loss: 0.356
[100,   700] loss: 0.422
[100,   800] loss: 0.467
[100,   900] loss: 0.541
[100,  1000] loss: 0.606
[100,  1100] loss: 0.649


 50%|█████     | 100/200 [04:38<04:36,  2.76s/it]

[100,  1200] loss: 0.707
Epoch [100], Train Loss: 0.0588
[101,   100] loss: 0.050
[101,   200] loss: 0.114
[101,   300] loss: 0.157
[101,   400] loss: 0.235
[101,   500] loss: 0.299
[101,   600] loss: 0.352
[101,   700] loss: 0.409
[101,   800] loss: 0.466
[101,   900] loss: 0.527
[101,  1000] loss: 0.584
[101,  1100] loss: 0.641


 50%|█████     | 101/200 [04:41<04:33,  2.76s/it]

[101,  1200] loss: 0.698
Epoch [101], Train Loss: 0.0571
[102,   100] loss: 0.061
[102,   200] loss: 0.117
[102,   300] loss: 0.179
[102,   400] loss: 0.248
[102,   500] loss: 0.307
[102,   600] loss: 0.365
[102,   700] loss: 0.431
[102,   800] loss: 0.485
[102,   900] loss: 0.540
[102,  1000] loss: 0.596
[102,  1100] loss: 0.642


 51%|█████     | 102/200 [04:44<04:30,  2.76s/it]

[102,  1200] loss: 0.704
Epoch [102], Train Loss: 0.0583
[103,   100] loss: 0.056
[103,   200] loss: 0.118
[103,   300] loss: 0.162
[103,   400] loss: 0.228
[103,   500] loss: 0.305
[103,   600] loss: 0.360
[103,   700] loss: 0.427
[103,   800] loss: 0.481
[103,   900] loss: 0.536
[103,  1000] loss: 0.597
[103,  1100] loss: 0.655


 52%|█████▏    | 103/200 [04:47<04:27,  2.76s/it]

[103,  1200] loss: 0.719
Epoch [103], Train Loss: 0.0596
[104,   100] loss: 0.064
[104,   200] loss: 0.137
[104,   300] loss: 0.182
[104,   400] loss: 0.243
[104,   500] loss: 0.306
[104,   600] loss: 0.375
[104,   700] loss: 0.442
[104,   800] loss: 0.507
[104,   900] loss: 0.568
[104,  1000] loss: 0.617
[104,  1100] loss: 0.664


 52%|█████▏    | 104/200 [04:49<04:24,  2.76s/it]

[104,  1200] loss: 0.728
Epoch [104], Train Loss: 0.0600
[105,   100] loss: 0.056
[105,   200] loss: 0.104
[105,   300] loss: 0.164
[105,   400] loss: 0.211
[105,   500] loss: 0.268
[105,   600] loss: 0.345
[105,   700] loss: 0.413
[105,   800] loss: 0.481
[105,   900] loss: 0.548
[105,  1000] loss: 0.598
[105,  1100] loss: 0.670
[105,  1200] loss: 0.718
Epoch [105], Train Loss: 0.0589


 52%|█████▎    | 105/200 [04:52<04:22,  2.76s/it]

[106,   100] loss: 0.040
[106,   200] loss: 0.098
[106,   300] loss: 0.150
[106,   400] loss: 0.208
[106,   500] loss: 0.274
[106,   600] loss: 0.322
[106,   700] loss: 0.380
[106,   800] loss: 0.433
[106,   900] loss: 0.488
[106,  1000] loss: 0.557
[106,  1100] loss: 0.614


 53%|█████▎    | 106/200 [04:55<04:19,  2.76s/it]

[106,  1200] loss: 0.674
Epoch [106], Train Loss: 0.0572
[107,   100] loss: 0.061
[107,   200] loss: 0.118
[107,   300] loss: 0.160
[107,   400] loss: 0.226
[107,   500] loss: 0.282
[107,   600] loss: 0.353
[107,   700] loss: 0.422
[107,   800] loss: 0.472
[107,   900] loss: 0.528
[107,  1000] loss: 0.584
[107,  1100] loss: 0.648


 54%|█████▎    | 107/200 [04:58<04:16,  2.76s/it]

[107,  1200] loss: 0.692
Epoch [107], Train Loss: 0.0571
[108,   100] loss: 0.070
[108,   200] loss: 0.129
[108,   300] loss: 0.174
[108,   400] loss: 0.230
[108,   500] loss: 0.290
[108,   600] loss: 0.336
[108,   700] loss: 0.382
[108,   800] loss: 0.440
[108,   900] loss: 0.492
[108,  1000] loss: 0.550
[108,  1100] loss: 0.618


 54%|█████▍    | 108/200 [05:00<04:14,  2.76s/it]

[108,  1200] loss: 0.672
Epoch [108], Train Loss: 0.0564
[109,   100] loss: 0.056
[109,   200] loss: 0.103
[109,   300] loss: 0.166
[109,   400] loss: 0.232
[109,   500] loss: 0.282
[109,   600] loss: 0.326
[109,   700] loss: 0.387
[109,   800] loss: 0.451
[109,   900] loss: 0.507
[109,  1000] loss: 0.561
[109,  1100] loss: 0.633
[109,  1200] loss: 0.690


 55%|█████▍    | 109/200 [05:03<04:13,  2.78s/it]

Epoch [109], Train Loss: 0.0577
[110,   100] loss: 0.055
[110,   200] loss: 0.116
[110,   300] loss: 0.186
[110,   400] loss: 0.234
[110,   500] loss: 0.289
[110,   600] loss: 0.355
[110,   700] loss: 0.410
[110,   800] loss: 0.473
[110,   900] loss: 0.537
[110,  1000] loss: 0.591
[110,  1100] loss: 0.646
[110,  1200] loss: 0.706


 55%|█████▌    | 110/200 [05:06<04:15,  2.84s/it]

Epoch [110], Train Loss: 0.0585
[111,   100] loss: 0.044
[111,   200] loss: 0.099
[111,   300] loss: 0.143
[111,   400] loss: 0.199
[111,   500] loss: 0.259
[111,   600] loss: 0.310
[111,   700] loss: 0.362
[111,   800] loss: 0.421
[111,   900] loss: 0.497
[111,  1000] loss: 0.553
[111,  1100] loss: 0.619


 56%|█████▌    | 111/200 [05:09<04:14,  2.87s/it]

[111,  1200] loss: 0.673
Epoch [111], Train Loss: 0.0569
[112,   100] loss: 0.058
[112,   200] loss: 0.127
[112,   300] loss: 0.177
[112,   400] loss: 0.223
[112,   500] loss: 0.271
[112,   600] loss: 0.318
[112,   700] loss: 0.386
[112,   800] loss: 0.445
[112,   900] loss: 0.508
[112,  1000] loss: 0.554
[112,  1100] loss: 0.615


 56%|█████▌    | 112/200 [05:12<04:09,  2.83s/it]

[112,  1200] loss: 0.665
Epoch [112], Train Loss: 0.0574
[113,   100] loss: 0.088
[113,   200] loss: 0.147
[113,   300] loss: 0.197
[113,   400] loss: 0.253
[113,   500] loss: 0.299
[113,   600] loss: 0.352
[113,   700] loss: 0.409
[113,   800] loss: 0.482
[113,   900] loss: 0.537
[113,  1000] loss: 0.594
[113,  1100] loss: 0.635


 56%|█████▋    | 113/200 [05:15<04:04,  2.81s/it]

[113,  1200] loss: 0.701
Epoch [113], Train Loss: 0.0578
[114,   100] loss: 0.056
[114,   200] loss: 0.105
[114,   300] loss: 0.155
[114,   400] loss: 0.217
[114,   500] loss: 0.269
[114,   600] loss: 0.330
[114,   700] loss: 0.380
[114,   800] loss: 0.446
[114,   900] loss: 0.527
[114,  1000] loss: 0.567
[114,  1100] loss: 0.617


 57%|█████▋    | 114/200 [05:17<04:00,  2.79s/it]

[114,  1200] loss: 0.681
Epoch [114], Train Loss: 0.0568
[115,   100] loss: 0.049
[115,   200] loss: 0.115
[115,   300] loss: 0.173
[115,   400] loss: 0.227
[115,   500] loss: 0.282
[115,   600] loss: 0.339
[115,   700] loss: 0.399
[115,   800] loss: 0.450
[115,   900] loss: 0.505
[115,  1000] loss: 0.560
[115,  1100] loss: 0.629


 57%|█████▊    | 115/200 [05:20<03:56,  2.78s/it]

[115,  1200] loss: 0.685
Epoch [115], Train Loss: 0.0571
[116,   100] loss: 0.060
[116,   200] loss: 0.119
[116,   300] loss: 0.170
[116,   400] loss: 0.233
[116,   500] loss: 0.280
[116,   600] loss: 0.331
[116,   700] loss: 0.393
[116,   800] loss: 0.461
[116,   900] loss: 0.516
[116,  1000] loss: 0.573
[116,  1100] loss: 0.645


 58%|█████▊    | 116/200 [05:23<03:53,  2.77s/it]

[116,  1200] loss: 0.690
Epoch [116], Train Loss: 0.0567
[117,   100] loss: 0.039
[117,   200] loss: 0.090
[117,   300] loss: 0.142
[117,   400] loss: 0.202
[117,   500] loss: 0.268
[117,   600] loss: 0.323
[117,   700] loss: 0.379
[117,   800] loss: 0.442
[117,   900] loss: 0.513
[117,  1000] loss: 0.581
[117,  1100] loss: 0.626
[117,  1200] loss: 0.685


 58%|█████▊    | 117/200 [05:26<03:49,  2.77s/it]

Epoch [117], Train Loss: 0.0571
[118,   100] loss: 0.058
[118,   200] loss: 0.121
[118,   300] loss: 0.191
[118,   400] loss: 0.235
[118,   500] loss: 0.293
[118,   600] loss: 0.355
[118,   700] loss: 0.404
[118,   800] loss: 0.445
[118,   900] loss: 0.508
[118,  1000] loss: 0.586
[118,  1100] loss: 0.623


 59%|█████▉    | 118/200 [05:28<03:46,  2.77s/it]

[118,  1200] loss: 0.689
Epoch [118], Train Loss: 0.0577
[119,   100] loss: 0.065
[119,   200] loss: 0.126
[119,   300] loss: 0.189
[119,   400] loss: 0.247
[119,   500] loss: 0.313
[119,   600] loss: 0.372
[119,   700] loss: 0.425
[119,   800] loss: 0.484
[119,   900] loss: 0.528
[119,  1000] loss: 0.581
[119,  1100] loss: 0.643


 60%|█████▉    | 119/200 [05:31<03:43,  2.76s/it]

[119,  1200] loss: 0.687
Epoch [119], Train Loss: 0.0580
[120,   100] loss: 0.049
[120,   200] loss: 0.108
[120,   300] loss: 0.173
[120,   400] loss: 0.226
[120,   500] loss: 0.300
[120,   600] loss: 0.364
[120,   700] loss: 0.404
[120,   800] loss: 0.461
[120,   900] loss: 0.517
[120,  1000] loss: 0.579
[120,  1100] loss: 0.638


 60%|██████    | 120/200 [05:34<03:41,  2.77s/it]

[120,  1200] loss: 0.689
Epoch [120], Train Loss: 0.0582
[121,   100] loss: 0.049
[121,   200] loss: 0.105
[121,   300] loss: 0.157
[121,   400] loss: 0.209
[121,   500] loss: 0.268
[121,   600] loss: 0.314
[121,   700] loss: 0.386
[121,   800] loss: 0.444
[121,   900] loss: 0.481
[121,  1000] loss: 0.548
[121,  1100] loss: 0.607


 60%|██████    | 121/200 [05:37<03:38,  2.77s/it]

[121,  1200] loss: 0.677
Epoch [121], Train Loss: 0.0560
[122,   100] loss: 0.048
[122,   200] loss: 0.102
[122,   300] loss: 0.166
[122,   400] loss: 0.224
[122,   500] loss: 0.290
[122,   600] loss: 0.344
[122,   700] loss: 0.394
[122,   800] loss: 0.449
[122,   900] loss: 0.517
[122,  1000] loss: 0.583
[122,  1100] loss: 0.643


 61%|██████    | 122/200 [05:39<03:35,  2.77s/it]

[122,  1200] loss: 0.696
Epoch [122], Train Loss: 0.0578
[123,   100] loss: 0.055
[123,   200] loss: 0.110
[123,   300] loss: 0.165
[123,   400] loss: 0.222
[123,   500] loss: 0.272
[123,   600] loss: 0.335
[123,   700] loss: 0.381
[123,   800] loss: 0.438
[123,   900] loss: 0.504
[123,  1000] loss: 0.552
[123,  1100] loss: 0.608


 62%|██████▏   | 123/200 [05:42<03:32,  2.76s/it]

[123,  1200] loss: 0.653
Epoch [123], Train Loss: 0.0547
[124,   100] loss: 0.049
[124,   200] loss: 0.109
[124,   300] loss: 0.165
[124,   400] loss: 0.209
[124,   500] loss: 0.275
[124,   600] loss: 0.326
[124,   700] loss: 0.377
[124,   800] loss: 0.444
[124,   900] loss: 0.508
[124,  1000] loss: 0.569
[124,  1100] loss: 0.609


 62%|██████▏   | 124/200 [05:45<03:29,  2.76s/it]

[124,  1200] loss: 0.668
Epoch [124], Train Loss: 0.0556
[125,   100] loss: 0.056
[125,   200] loss: 0.094
[125,   300] loss: 0.163
[125,   400] loss: 0.212
[125,   500] loss: 0.272
[125,   600] loss: 0.339
[125,   700] loss: 0.388
[125,   800] loss: 0.462
[125,   900] loss: 0.519
[125,  1000] loss: 0.579
[125,  1100] loss: 0.637


 62%|██████▎   | 125/200 [05:48<03:26,  2.76s/it]

[125,  1200] loss: 0.693
Epoch [125], Train Loss: 0.0569
[126,   100] loss: 0.046
[126,   200] loss: 0.107
[126,   300] loss: 0.153
[126,   400] loss: 0.218
[126,   500] loss: 0.263
[126,   600] loss: 0.324
[126,   700] loss: 0.389
[126,   800] loss: 0.450
[126,   900] loss: 0.495
[126,  1000] loss: 0.556
[126,  1100] loss: 0.612


 63%|██████▎   | 126/200 [05:50<03:24,  2.76s/it]

[126,  1200] loss: 0.674
Epoch [126], Train Loss: 0.0559
[127,   100] loss: 0.038
[127,   200] loss: 0.098
[127,   300] loss: 0.164
[127,   400] loss: 0.213
[127,   500] loss: 0.282
[127,   600] loss: 0.351
[127,   700] loss: 0.395
[127,   800] loss: 0.465
[127,   900] loss: 0.510
[127,  1000] loss: 0.561
[127,  1100] loss: 0.607
[127,  1200] loss: 0.656


 64%|██████▎   | 127/200 [05:53<03:22,  2.77s/it]

Epoch [127], Train Loss: 0.0539
[128,   100] loss: 0.048
[128,   200] loss: 0.112
[128,   300] loss: 0.189
[128,   400] loss: 0.250
[128,   500] loss: 0.301
[128,   600] loss: 0.347
[128,   700] loss: 0.402
[128,   800] loss: 0.445
[128,   900] loss: 0.502
[128,  1000] loss: 0.549
[128,  1100] loss: 0.621
[128,  1200] loss: 0.677


 64%|██████▍   | 128/200 [05:56<03:25,  2.85s/it]

Epoch [128], Train Loss: 0.0574
[129,   100] loss: 0.058
[129,   200] loss: 0.105
[129,   300] loss: 0.157
[129,   400] loss: 0.206
[129,   500] loss: 0.255
[129,   600] loss: 0.324
[129,   700] loss: 0.392
[129,   800] loss: 0.461
[129,   900] loss: 0.516
[129,  1000] loss: 0.575
[129,  1100] loss: 0.623


 64%|██████▍   | 129/200 [05:59<03:20,  2.82s/it]

[129,  1200] loss: 0.675
Epoch [129], Train Loss: 0.0572
[130,   100] loss: 0.052
[130,   200] loss: 0.109
[130,   300] loss: 0.169
[130,   400] loss: 0.228
[130,   500] loss: 0.287
[130,   600] loss: 0.344
[130,   700] loss: 0.414
[130,   800] loss: 0.473
[130,   900] loss: 0.531
[130,  1000] loss: 0.583
[130,  1100] loss: 0.620


 65%|██████▌   | 130/200 [06:02<03:16,  2.80s/it]

[130,  1200] loss: 0.674
Epoch [130], Train Loss: 0.0558
[131,   100] loss: 0.066
[131,   200] loss: 0.110
[131,   300] loss: 0.160
[131,   400] loss: 0.229
[131,   500] loss: 0.284
[131,   600] loss: 0.338
[131,   700] loss: 0.379
[131,   800] loss: 0.421
[131,   900] loss: 0.495
[131,  1000] loss: 0.569
[131,  1100] loss: 0.627


 66%|██████▌   | 131/200 [06:05<03:12,  2.79s/it]

[131,  1200] loss: 0.687
Epoch [131], Train Loss: 0.0565
[132,   100] loss: 0.048
[132,   200] loss: 0.094
[132,   300] loss: 0.156
[132,   400] loss: 0.217
[132,   500] loss: 0.281
[132,   600] loss: 0.364
[132,   700] loss: 0.424
[132,   800] loss: 0.483
[132,   900] loss: 0.533
[132,  1000] loss: 0.595
[132,  1100] loss: 0.653


 66%|██████▌   | 132/200 [06:07<03:08,  2.78s/it]

[132,  1200] loss: 0.691
Epoch [132], Train Loss: 0.0577
[133,   100] loss: 0.052
[133,   200] loss: 0.123
[133,   300] loss: 0.178
[133,   400] loss: 0.243
[133,   500] loss: 0.294
[133,   600] loss: 0.346
[133,   700] loss: 0.423
[133,   800] loss: 0.476
[133,   900] loss: 0.525
[133,  1000] loss: 0.575
[133,  1100] loss: 0.616


 66%|██████▋   | 133/200 [06:10<03:06,  2.78s/it]

[133,  1200] loss: 0.658
Epoch [133], Train Loss: 0.0553
[134,   100] loss: 0.058
[134,   200] loss: 0.111
[134,   300] loss: 0.175
[134,   400] loss: 0.244
[134,   500] loss: 0.303
[134,   600] loss: 0.353
[134,   700] loss: 0.403
[134,   800] loss: 0.446
[134,   900] loss: 0.491
[134,  1000] loss: 0.547
[134,  1100] loss: 0.612


 67%|██████▋   | 134/200 [06:13<03:02,  2.77s/it]

[134,  1200] loss: 0.677
Epoch [134], Train Loss: 0.0564
[135,   100] loss: 0.046
[135,   200] loss: 0.097
[135,   300] loss: 0.154
[135,   400] loss: 0.215
[135,   500] loss: 0.277
[135,   600] loss: 0.340
[135,   700] loss: 0.382
[135,   800] loss: 0.447
[135,   900] loss: 0.512
[135,  1000] loss: 0.541
[135,  1100] loss: 0.601


 68%|██████▊   | 135/200 [06:16<02:59,  2.76s/it]

[135,  1200] loss: 0.649
Epoch [135], Train Loss: 0.0551
[136,   100] loss: 0.083
[136,   200] loss: 0.130
[136,   300] loss: 0.189
[136,   400] loss: 0.233
[136,   500] loss: 0.293
[136,   600] loss: 0.359
[136,   700] loss: 0.412
[136,   800] loss: 0.460
[136,   900] loss: 0.507
[136,  1000] loss: 0.563
[136,  1100] loss: 0.623
[136,  1200] loss: 0.677


 68%|██████▊   | 136/200 [06:18<02:56,  2.75s/it]

Epoch [136], Train Loss: 0.0568
[137,   100] loss: 0.045
[137,   200] loss: 0.096
[137,   300] loss: 0.149
[137,   400] loss: 0.210
[137,   500] loss: 0.258
[137,   600] loss: 0.318
[137,   700] loss: 0.387
[137,   800] loss: 0.436
[137,   900] loss: 0.500
[137,  1000] loss: 0.555
[137,  1100] loss: 0.605


 68%|██████▊   | 137/200 [06:21<02:53,  2.76s/it]

[137,  1200] loss: 0.655
Epoch [137], Train Loss: 0.0549
[138,   100] loss: 0.044
[138,   200] loss: 0.090
[138,   300] loss: 0.139
[138,   400] loss: 0.208
[138,   500] loss: 0.267
[138,   600] loss: 0.329
[138,   700] loss: 0.378
[138,   800] loss: 0.430
[138,   900] loss: 0.489
[138,  1000] loss: 0.529
[138,  1100] loss: 0.588


 69%|██████▉   | 138/200 [06:24<02:50,  2.76s/it]

[138,  1200] loss: 0.635
Epoch [138], Train Loss: 0.0537
[139,   100] loss: 0.052
[139,   200] loss: 0.118
[139,   300] loss: 0.163
[139,   400] loss: 0.218
[139,   500] loss: 0.267
[139,   600] loss: 0.331
[139,   700] loss: 0.379
[139,   800] loss: 0.445
[139,   900] loss: 0.501
[139,  1000] loss: 0.562
[139,  1100] loss: 0.611


 70%|██████▉   | 139/200 [06:27<02:48,  2.76s/it]

[139,  1200] loss: 0.673
Epoch [139], Train Loss: 0.0560
[140,   100] loss: 0.047
[140,   200] loss: 0.120
[140,   300] loss: 0.173
[140,   400] loss: 0.230
[140,   500] loss: 0.289
[140,   600] loss: 0.337
[140,   700] loss: 0.394
[140,   800] loss: 0.449
[140,   900] loss: 0.509
[140,  1000] loss: 0.580
[140,  1100] loss: 0.624


 70%|███████   | 140/200 [06:29<02:45,  2.75s/it]

[140,  1200] loss: 0.673
Epoch [140], Train Loss: 0.0553
[141,   100] loss: 0.049
[141,   200] loss: 0.091
[141,   300] loss: 0.148
[141,   400] loss: 0.215
[141,   500] loss: 0.280
[141,   600] loss: 0.355
[141,   700] loss: 0.399
[141,   800] loss: 0.440
[141,   900] loss: 0.501
[141,  1000] loss: 0.550
[141,  1100] loss: 0.601


 70%|███████   | 141/200 [06:32<02:42,  2.75s/it]

[141,  1200] loss: 0.656
Epoch [141], Train Loss: 0.0546
[142,   100] loss: 0.041
[142,   200] loss: 0.087
[142,   300] loss: 0.133
[142,   400] loss: 0.201
[142,   500] loss: 0.272
[142,   600] loss: 0.317
[142,   700] loss: 0.374
[142,   800] loss: 0.419
[142,   900] loss: 0.473
[142,  1000] loss: 0.526
[142,  1100] loss: 0.585


 71%|███████   | 142/200 [06:35<02:39,  2.75s/it]

[142,  1200] loss: 0.643
Epoch [142], Train Loss: 0.0536
[143,   100] loss: 0.044
[143,   200] loss: 0.091
[143,   300] loss: 0.139
[143,   400] loss: 0.196
[143,   500] loss: 0.246
[143,   600] loss: 0.289
[143,   700] loss: 0.351
[143,   800] loss: 0.415
[143,   900] loss: 0.466
[143,  1000] loss: 0.532
[143,  1100] loss: 0.573


 72%|███████▏  | 143/200 [06:38<02:36,  2.75s/it]

[143,  1200] loss: 0.641
Epoch [143], Train Loss: 0.0533
[144,   100] loss: 0.045
[144,   200] loss: 0.089
[144,   300] loss: 0.159
[144,   400] loss: 0.206
[144,   500] loss: 0.257
[144,   600] loss: 0.316
[144,   700] loss: 0.385
[144,   800] loss: 0.428
[144,   900] loss: 0.489
[144,  1000] loss: 0.542
[144,  1100] loss: 0.597


 72%|███████▏  | 144/200 [06:40<02:33,  2.75s/it]

[144,  1200] loss: 0.650
Epoch [144], Train Loss: 0.0547
[145,   100] loss: 0.053
[145,   200] loss: 0.106
[145,   300] loss: 0.168
[145,   400] loss: 0.219
[145,   500] loss: 0.275
[145,   600] loss: 0.331
[145,   700] loss: 0.383
[145,   800] loss: 0.444
[145,   900] loss: 0.496
[145,  1000] loss: 0.539
[145,  1100] loss: 0.585


 72%|███████▎  | 145/200 [06:43<02:31,  2.75s/it]

[145,  1200] loss: 0.618
Epoch [145], Train Loss: 0.0513
[146,   100] loss: 0.045
[146,   200] loss: 0.096
[146,   300] loss: 0.153
[146,   400] loss: 0.203
[146,   500] loss: 0.255
[146,   600] loss: 0.317
[146,   700] loss: 0.369
[146,   800] loss: 0.422
[146,   900] loss: 0.474
[146,  1000] loss: 0.533
[146,  1100] loss: 0.578


 73%|███████▎  | 146/200 [06:46<02:28,  2.75s/it]

[146,  1200] loss: 0.628
Epoch [146], Train Loss: 0.0530
[147,   100] loss: 0.048
[147,   200] loss: 0.102
[147,   300] loss: 0.158
[147,   400] loss: 0.204
[147,   500] loss: 0.254
[147,   600] loss: 0.303
[147,   700] loss: 0.357
[147,   800] loss: 0.416
[147,   900] loss: 0.476
[147,  1000] loss: 0.515
[147,  1100] loss: 0.572


 74%|███████▎  | 147/200 [06:49<02:25,  2.75s/it]

[147,  1200] loss: 0.631
Epoch [147], Train Loss: 0.0534
[148,   100] loss: 0.050
[148,   200] loss: 0.091
[148,   300] loss: 0.143
[148,   400] loss: 0.187
[148,   500] loss: 0.236
[148,   600] loss: 0.295
[148,   700] loss: 0.354
[148,   800] loss: 0.406
[148,   900] loss: 0.454
[148,  1000] loss: 0.518
[148,  1100] loss: 0.585


 74%|███████▍  | 148/200 [06:51<02:22,  2.75s/it]

[148,  1200] loss: 0.634
Epoch [148], Train Loss: 0.0523
[149,   100] loss: 0.056
[149,   200] loss: 0.101
[149,   300] loss: 0.159
[149,   400] loss: 0.222
[149,   500] loss: 0.278
[149,   600] loss: 0.327
[149,   700] loss: 0.388
[149,   800] loss: 0.422
[149,   900] loss: 0.463
[149,  1000] loss: 0.515
[149,  1100] loss: 0.569


 74%|███████▍  | 149/200 [06:54<02:20,  2.75s/it]

[149,  1200] loss: 0.633
Epoch [149], Train Loss: 0.0531
[150,   100] loss: 0.041
[150,   200] loss: 0.092
[150,   300] loss: 0.148
[150,   400] loss: 0.198
[150,   500] loss: 0.258
[150,   600] loss: 0.310
[150,   700] loss: 0.366
[150,   800] loss: 0.408
[150,   900] loss: 0.464
[150,  1000] loss: 0.510
[150,  1100] loss: 0.566
[150,  1200] loss: 0.620


 75%|███████▌  | 150/200 [06:57<02:17,  2.75s/it]

Epoch [150], Train Loss: 0.0520
[151,   100] loss: 0.061
[151,   200] loss: 0.114
[151,   300] loss: 0.154
[151,   400] loss: 0.212
[151,   500] loss: 0.260
[151,   600] loss: 0.315
[151,   700] loss: 0.362
[151,   800] loss: 0.423
[151,   900] loss: 0.477
[151,  1000] loss: 0.532
[151,  1100] loss: 0.579
[151,  1200] loss: 0.626


 76%|███████▌  | 151/200 [07:00<02:15,  2.77s/it]

Epoch [151], Train Loss: 0.0525
[152,   100] loss: 0.052
[152,   200] loss: 0.102
[152,   300] loss: 0.158
[152,   400] loss: 0.206
[152,   500] loss: 0.263
[152,   600] loss: 0.324
[152,   700] loss: 0.382
[152,   800] loss: 0.449
[152,   900] loss: 0.497
[152,  1000] loss: 0.539
[152,  1100] loss: 0.587
[152,  1200] loss: 0.633


 76%|███████▌  | 152/200 [07:03<02:14,  2.80s/it]

Epoch [152], Train Loss: 0.0524
[153,   100] loss: 0.050
[153,   200] loss: 0.097
[153,   300] loss: 0.159
[153,   400] loss: 0.218
[153,   500] loss: 0.269
[153,   600] loss: 0.323
[153,   700] loss: 0.391
[153,   800] loss: 0.458
[153,   900] loss: 0.508
[153,  1000] loss: 0.565
[153,  1100] loss: 0.602


 76%|███████▋  | 153/200 [07:05<02:10,  2.79s/it]

[153,  1200] loss: 0.650
Epoch [153], Train Loss: 0.0559
[154,   100] loss: 0.057
[154,   200] loss: 0.108
[154,   300] loss: 0.153
[154,   400] loss: 0.197
[154,   500] loss: 0.248
[154,   600] loss: 0.295
[154,   700] loss: 0.357
[154,   800] loss: 0.408
[154,   900] loss: 0.464
[154,  1000] loss: 0.522
[154,  1100] loss: 0.558


 77%|███████▋  | 154/200 [07:08<02:07,  2.77s/it]

[154,  1200] loss: 0.608
Epoch [154], Train Loss: 0.0520
[155,   100] loss: 0.051
[155,   200] loss: 0.102
[155,   300] loss: 0.146
[155,   400] loss: 0.199
[155,   500] loss: 0.248
[155,   600] loss: 0.297
[155,   700] loss: 0.352
[155,   800] loss: 0.398
[155,   900] loss: 0.463
[155,  1000] loss: 0.509
[155,  1100] loss: 0.578


 78%|███████▊  | 155/200 [07:11<02:04,  2.77s/it]

[155,  1200] loss: 0.626
Epoch [155], Train Loss: 0.0524
[156,   100] loss: 0.053
[156,   200] loss: 0.114
[156,   300] loss: 0.172
[156,   400] loss: 0.211
[156,   500] loss: 0.276
[156,   600] loss: 0.343
[156,   700] loss: 0.389
[156,   800] loss: 0.439
[156,   900] loss: 0.485
[156,  1000] loss: 0.532
[156,  1100] loss: 0.579
[156,  1200] loss: 0.637


 78%|███████▊  | 156/200 [07:14<02:06,  2.86s/it]

Epoch [156], Train Loss: 0.0533
[157,   100] loss: 0.049
[157,   200] loss: 0.099
[157,   300] loss: 0.154
[157,   400] loss: 0.210
[157,   500] loss: 0.254
[157,   600] loss: 0.294
[157,   700] loss: 0.357
[157,   800] loss: 0.400
[157,   900] loss: 0.450
[157,  1000] loss: 0.500
[157,  1100] loss: 0.558


 78%|███████▊  | 157/200 [07:17<02:05,  2.91s/it]

[157,  1200] loss: 0.611
Epoch [157], Train Loss: 0.0517
[158,   100] loss: 0.047
[158,   200] loss: 0.122
[158,   300] loss: 0.175
[158,   400] loss: 0.228
[158,   500] loss: 0.262
[158,   600] loss: 0.321
[158,   700] loss: 0.381
[158,   800] loss: 0.432
[158,   900] loss: 0.472
[158,  1000] loss: 0.528
[158,  1100] loss: 0.577


 79%|███████▉  | 158/200 [07:20<02:00,  2.87s/it]

[158,  1200] loss: 0.630
Epoch [158], Train Loss: 0.0518
[159,   100] loss: 0.048
[159,   200] loss: 0.093
[159,   300] loss: 0.155
[159,   400] loss: 0.208
[159,   500] loss: 0.261
[159,   600] loss: 0.317
[159,   700] loss: 0.373
[159,   800] loss: 0.425
[159,   900] loss: 0.462
[159,  1000] loss: 0.514
[159,  1100] loss: 0.572


 80%|███████▉  | 159/200 [07:22<01:56,  2.85s/it]

[159,  1200] loss: 0.632
Epoch [159], Train Loss: 0.0533
[160,   100] loss: 0.045
[160,   200] loss: 0.094
[160,   300] loss: 0.145
[160,   400] loss: 0.204
[160,   500] loss: 0.258
[160,   600] loss: 0.312
[160,   700] loss: 0.351
[160,   800] loss: 0.390
[160,   900] loss: 0.447
[160,  1000] loss: 0.496
[160,  1100] loss: 0.572


 80%|████████  | 160/200 [07:25<01:53,  2.83s/it]

[160,  1200] loss: 0.614
Epoch [160], Train Loss: 0.0503
[161,   100] loss: 0.059
[161,   200] loss: 0.111
[161,   300] loss: 0.147
[161,   400] loss: 0.202
[161,   500] loss: 0.245
[161,   600] loss: 0.295
[161,   700] loss: 0.352
[161,   800] loss: 0.401
[161,   900] loss: 0.468
[161,  1000] loss: 0.528
[161,  1100] loss: 0.577


 80%|████████  | 161/200 [07:28<01:49,  2.81s/it]

[161,  1200] loss: 0.637
Epoch [161], Train Loss: 0.0526
[162,   100] loss: 0.044
[162,   200] loss: 0.100
[162,   300] loss: 0.146
[162,   400] loss: 0.189
[162,   500] loss: 0.232
[162,   600] loss: 0.289
[162,   700] loss: 0.346
[162,   800] loss: 0.412
[162,   900] loss: 0.459
[162,  1000] loss: 0.511
[162,  1100] loss: 0.566


 81%|████████  | 162/200 [07:31<01:46,  2.79s/it]

[162,  1200] loss: 0.606
Epoch [162], Train Loss: 0.0515
[163,   100] loss: 0.054
[163,   200] loss: 0.092
[163,   300] loss: 0.157
[163,   400] loss: 0.205
[163,   500] loss: 0.257
[163,   600] loss: 0.304
[163,   700] loss: 0.351
[163,   800] loss: 0.408
[163,   900] loss: 0.454
[163,  1000] loss: 0.504
[163,  1100] loss: 0.550
[163,  1200] loss: 0.592


 82%|████████▏ | 163/200 [07:34<01:44,  2.82s/it]

Epoch [163], Train Loss: 0.0498
[164,   100] loss: 0.052
[164,   200] loss: 0.098
[164,   300] loss: 0.142
[164,   400] loss: 0.188
[164,   500] loss: 0.237
[164,   600] loss: 0.278
[164,   700] loss: 0.317
[164,   800] loss: 0.379
[164,   900] loss: 0.437
[164,  1000] loss: 0.492
[164,  1100] loss: 0.536
[164,  1200] loss: 0.601


 82%|████████▏ | 164/200 [07:36<01:41,  2.82s/it]

Epoch [164], Train Loss: 0.0497
[165,   100] loss: 0.045
[165,   200] loss: 0.088
[165,   300] loss: 0.136
[165,   400] loss: 0.196
[165,   500] loss: 0.246
[165,   600] loss: 0.282
[165,   700] loss: 0.342
[165,   800] loss: 0.393
[165,   900] loss: 0.438
[165,  1000] loss: 0.474
[165,  1100] loss: 0.520


 82%|████████▎ | 165/200 [07:39<01:38,  2.81s/it]

[165,  1200] loss: 0.558
Epoch [165], Train Loss: 0.0480
[166,   100] loss: 0.034
[166,   200] loss: 0.102
[166,   300] loss: 0.154
[166,   400] loss: 0.222
[166,   500] loss: 0.277
[166,   600] loss: 0.327
[166,   700] loss: 0.380
[166,   800] loss: 0.427
[166,   900] loss: 0.493
[166,  1000] loss: 0.529
[166,  1100] loss: 0.583


 83%|████████▎ | 166/200 [07:42<01:34,  2.79s/it]

[166,  1200] loss: 0.619
Epoch [166], Train Loss: 0.0521
[167,   100] loss: 0.044
[167,   200] loss: 0.093
[167,   300] loss: 0.145
[167,   400] loss: 0.188
[167,   500] loss: 0.250
[167,   600] loss: 0.310
[167,   700] loss: 0.368
[167,   800] loss: 0.428
[167,   900] loss: 0.499
[167,  1000] loss: 0.551
[167,  1100] loss: 0.586
[167,  1200] loss: 0.628


 84%|████████▎ | 167/200 [07:45<01:31,  2.78s/it]

Epoch [167], Train Loss: 0.0535
[168,   100] loss: 0.044
[168,   200] loss: 0.092
[168,   300] loss: 0.137
[168,   400] loss: 0.186
[168,   500] loss: 0.244
[168,   600] loss: 0.289
[168,   700] loss: 0.334
[168,   800] loss: 0.392
[168,   900] loss: 0.434
[168,  1000] loss: 0.482
[168,  1100] loss: 0.555


 84%|████████▍ | 168/200 [07:48<01:28,  2.78s/it]

[168,  1200] loss: 0.610
Epoch [168], Train Loss: 0.0512
[169,   100] loss: 0.037
[169,   200] loss: 0.081
[169,   300] loss: 0.133
[169,   400] loss: 0.183
[169,   500] loss: 0.230
[169,   600] loss: 0.280
[169,   700] loss: 0.343
[169,   800] loss: 0.410
[169,   900] loss: 0.457
[169,  1000] loss: 0.510
[169,  1100] loss: 0.563


 84%|████████▍ | 169/200 [07:50<01:25,  2.77s/it]

[169,  1200] loss: 0.624
Epoch [169], Train Loss: 0.0513
[170,   100] loss: 0.058
[170,   200] loss: 0.113
[170,   300] loss: 0.148
[170,   400] loss: 0.192
[170,   500] loss: 0.237
[170,   600] loss: 0.304
[170,   700] loss: 0.354
[170,   800] loss: 0.398
[170,   900] loss: 0.450
[170,  1000] loss: 0.510
[170,  1100] loss: 0.541


 85%|████████▌ | 170/200 [07:53<01:23,  2.77s/it]

[170,  1200] loss: 0.590
Epoch [170], Train Loss: 0.0503
[171,   100] loss: 0.048
[171,   200] loss: 0.104
[171,   300] loss: 0.158
[171,   400] loss: 0.223
[171,   500] loss: 0.272
[171,   600] loss: 0.327
[171,   700] loss: 0.381
[171,   800] loss: 0.423
[171,   900] loss: 0.470
[171,  1000] loss: 0.515
[171,  1100] loss: 0.572
[171,  1200] loss: 0.619


 86%|████████▌ | 171/200 [07:56<01:21,  2.80s/it]

Epoch [171], Train Loss: 0.0520
[172,   100] loss: 0.056
[172,   200] loss: 0.111
[172,   300] loss: 0.170
[172,   400] loss: 0.222
[172,   500] loss: 0.268
[172,   600] loss: 0.303
[172,   700] loss: 0.359
[172,   800] loss: 0.397
[172,   900] loss: 0.452
[172,  1000] loss: 0.498
[172,  1100] loss: 0.565
[172,  1200] loss: 0.616


 86%|████████▌ | 172/200 [07:59<01:17,  2.79s/it]

Epoch [172], Train Loss: 0.0517
[173,   100] loss: 0.037
[173,   200] loss: 0.087
[173,   300] loss: 0.156
[173,   400] loss: 0.220
[173,   500] loss: 0.259
[173,   600] loss: 0.300
[173,   700] loss: 0.333
[173,   800] loss: 0.390
[173,   900] loss: 0.437
[173,  1000] loss: 0.491
[173,  1100] loss: 0.541
[173,  1200] loss: 0.597


 86%|████████▋ | 173/200 [08:01<01:15,  2.78s/it]

Epoch [173], Train Loss: 0.0502
[174,   100] loss: 0.039
[174,   200] loss: 0.086
[174,   300] loss: 0.124
[174,   400] loss: 0.178
[174,   500] loss: 0.227
[174,   600] loss: 0.281
[174,   700] loss: 0.331
[174,   800] loss: 0.383
[174,   900] loss: 0.446
[174,  1000] loss: 0.496
[174,  1100] loss: 0.544
[174,  1200] loss: 0.602


 87%|████████▋ | 174/200 [08:04<01:12,  2.78s/it]

Epoch [174], Train Loss: 0.0517
[175,   100] loss: 0.052
[175,   200] loss: 0.088
[175,   300] loss: 0.126
[175,   400] loss: 0.179
[175,   500] loss: 0.237
[175,   600] loss: 0.284
[175,   700] loss: 0.332
[175,   800] loss: 0.378
[175,   900] loss: 0.449
[175,  1000] loss: 0.501
[175,  1100] loss: 0.549


 88%|████████▊ | 175/200 [08:07<01:09,  2.78s/it]

[175,  1200] loss: 0.595
Epoch [175], Train Loss: 0.0496
[176,   100] loss: 0.048
[176,   200] loss: 0.095
[176,   300] loss: 0.152
[176,   400] loss: 0.200
[176,   500] loss: 0.246
[176,   600] loss: 0.295
[176,   700] loss: 0.337
[176,   800] loss: 0.401
[176,   900] loss: 0.458
[176,  1000] loss: 0.494
[176,  1100] loss: 0.550
[176,  1200] loss: 0.611


 88%|████████▊ | 176/200 [08:10<01:06,  2.78s/it]

Epoch [176], Train Loss: 0.0511
[177,   100] loss: 0.051
[177,   200] loss: 0.095
[177,   300] loss: 0.140
[177,   400] loss: 0.190
[177,   500] loss: 0.239
[177,   600] loss: 0.294
[177,   700] loss: 0.353
[177,   800] loss: 0.400
[177,   900] loss: 0.458
[177,  1000] loss: 0.511
[177,  1100] loss: 0.558


 88%|████████▊ | 177/200 [08:13<01:04,  2.81s/it]

[177,  1200] loss: 0.599
Epoch [177], Train Loss: 0.0499
[178,   100] loss: 0.055
[178,   200] loss: 0.110
[178,   300] loss: 0.161
[178,   400] loss: 0.210
[178,   500] loss: 0.266
[178,   600] loss: 0.322
[178,   700] loss: 0.370
[178,   800] loss: 0.414
[178,   900] loss: 0.465
[178,  1000] loss: 0.508
[178,  1100] loss: 0.557


 89%|████████▉ | 178/200 [08:15<01:01,  2.80s/it]

[178,  1200] loss: 0.611
Epoch [178], Train Loss: 0.0502
[179,   100] loss: 0.053
[179,   200] loss: 0.099
[179,   300] loss: 0.147
[179,   400] loss: 0.179
[179,   500] loss: 0.238
[179,   600] loss: 0.286
[179,   700] loss: 0.331
[179,   800] loss: 0.376
[179,   900] loss: 0.423
[179,  1000] loss: 0.496
[179,  1100] loss: 0.536


 90%|████████▉ | 179/200 [08:18<00:58,  2.78s/it]

[179,  1200] loss: 0.588
Epoch [179], Train Loss: 0.0496
[180,   100] loss: 0.044
[180,   200] loss: 0.085
[180,   300] loss: 0.132
[180,   400] loss: 0.169
[180,   500] loss: 0.219
[180,   600] loss: 0.277
[180,   700] loss: 0.326
[180,   800] loss: 0.390
[180,   900] loss: 0.441
[180,  1000] loss: 0.486
[180,  1100] loss: 0.530


 90%|█████████ | 180/200 [08:21<00:55,  2.78s/it]

[180,  1200] loss: 0.587
Epoch [180], Train Loss: 0.0506
[181,   100] loss: 0.061
[181,   200] loss: 0.113
[181,   300] loss: 0.183
[181,   400] loss: 0.234
[181,   500] loss: 0.277
[181,   600] loss: 0.333
[181,   700] loss: 0.367
[181,   800] loss: 0.425
[181,   900] loss: 0.490
[181,  1000] loss: 0.529
[181,  1100] loss: 0.578


 90%|█████████ | 181/200 [08:24<00:52,  2.78s/it]

[181,  1200] loss: 0.625
Epoch [181], Train Loss: 0.0511
[182,   100] loss: 0.045
[182,   200] loss: 0.110
[182,   300] loss: 0.157
[182,   400] loss: 0.217
[182,   500] loss: 0.273
[182,   600] loss: 0.323
[182,   700] loss: 0.364
[182,   800] loss: 0.406
[182,   900] loss: 0.453
[182,  1000] loss: 0.506
[182,  1100] loss: 0.555
[182,  1200] loss: 0.599


 91%|█████████ | 182/200 [08:27<00:50,  2.79s/it]

Epoch [182], Train Loss: 0.0495
[183,   100] loss: 0.037
[183,   200] loss: 0.091
[183,   300] loss: 0.141
[183,   400] loss: 0.191
[183,   500] loss: 0.235
[183,   600] loss: 0.279
[183,   700] loss: 0.329
[183,   800] loss: 0.377
[183,   900] loss: 0.431
[183,  1000] loss: 0.472
[183,  1100] loss: 0.530


 92%|█████████▏| 183/200 [08:29<00:47,  2.78s/it]

[183,  1200] loss: 0.593
Epoch [183], Train Loss: 0.0495
[184,   100] loss: 0.057
[184,   200] loss: 0.107
[184,   300] loss: 0.158
[184,   400] loss: 0.203
[184,   500] loss: 0.243
[184,   600] loss: 0.291
[184,   700] loss: 0.353
[184,   800] loss: 0.401
[184,   900] loss: 0.453
[184,  1000] loss: 0.501
[184,  1100] loss: 0.553


 92%|█████████▏| 184/200 [08:32<00:44,  2.79s/it]

[184,  1200] loss: 0.594
Epoch [184], Train Loss: 0.0501
[185,   100] loss: 0.050
[185,   200] loss: 0.116
[185,   300] loss: 0.163
[185,   400] loss: 0.217
[185,   500] loss: 0.267
[185,   600] loss: 0.316
[185,   700] loss: 0.351
[185,   800] loss: 0.401
[185,   900] loss: 0.442
[185,  1000] loss: 0.502
[185,  1100] loss: 0.551
[185,  1200] loss: 0.608


 92%|█████████▎| 185/200 [08:35<00:41,  2.79s/it]

Epoch [185], Train Loss: 0.0505
[186,   100] loss: 0.055
[186,   200] loss: 0.101
[186,   300] loss: 0.145
[186,   400] loss: 0.205
[186,   500] loss: 0.256
[186,   600] loss: 0.314
[186,   700] loss: 0.351
[186,   800] loss: 0.409
[186,   900] loss: 0.455
[186,  1000] loss: 0.512
[186,  1100] loss: 0.553


 93%|█████████▎| 186/200 [08:38<00:38,  2.78s/it]

[186,  1200] loss: 0.601
Epoch [186], Train Loss: 0.0500
[187,   100] loss: 0.033
[187,   200] loss: 0.080
[187,   300] loss: 0.130
[187,   400] loss: 0.173
[187,   500] loss: 0.214
[187,   600] loss: 0.276
[187,   700] loss: 0.329
[187,   800] loss: 0.370
[187,   900] loss: 0.422
[187,  1000] loss: 0.501
[187,  1100] loss: 0.543


 94%|█████████▎| 187/200 [08:40<00:36,  2.77s/it]

[187,  1200] loss: 0.586
Epoch [187], Train Loss: 0.0498
[188,   100] loss: 0.052
[188,   200] loss: 0.098
[188,   300] loss: 0.155
[188,   400] loss: 0.190
[188,   500] loss: 0.235
[188,   600] loss: 0.287
[188,   700] loss: 0.348
[188,   800] loss: 0.392
[188,   900] loss: 0.449
[188,  1000] loss: 0.509
[188,  1100] loss: 0.556


 94%|█████████▍| 188/200 [08:43<00:33,  2.78s/it]

[188,  1200] loss: 0.608
Epoch [188], Train Loss: 0.0508
[189,   100] loss: 0.051
[189,   200] loss: 0.099
[189,   300] loss: 0.160
[189,   400] loss: 0.203
[189,   500] loss: 0.259
[189,   600] loss: 0.328
[189,   700] loss: 0.371
[189,   800] loss: 0.409
[189,   900] loss: 0.461
[189,  1000] loss: 0.507
[189,  1100] loss: 0.557


 94%|█████████▍| 189/200 [08:46<00:30,  2.78s/it]

[189,  1200] loss: 0.602
Epoch [189], Train Loss: 0.0504
[190,   100] loss: 0.073
[190,   200] loss: 0.124
[190,   300] loss: 0.175
[190,   400] loss: 0.216
[190,   500] loss: 0.259
[190,   600] loss: 0.307
[190,   700] loss: 0.347
[190,   800] loss: 0.406
[190,   900] loss: 0.452
[190,  1000] loss: 0.491
[190,  1100] loss: 0.537


 95%|█████████▌| 190/200 [08:49<00:27,  2.77s/it]

[190,  1200] loss: 0.598
Epoch [190], Train Loss: 0.0504
[191,   100] loss: 0.047
[191,   200] loss: 0.109
[191,   300] loss: 0.145
[191,   400] loss: 0.197
[191,   500] loss: 0.237
[191,   600] loss: 0.279
[191,   700] loss: 0.328
[191,   800] loss: 0.374
[191,   900] loss: 0.444
[191,  1000] loss: 0.486
[191,  1100] loss: 0.546


 96%|█████████▌| 191/200 [08:51<00:24,  2.76s/it]

[191,  1200] loss: 0.599
Epoch [191], Train Loss: 0.0498
[192,   100] loss: 0.050
[192,   200] loss: 0.103
[192,   300] loss: 0.146
[192,   400] loss: 0.206
[192,   500] loss: 0.259
[192,   600] loss: 0.301
[192,   700] loss: 0.360
[192,   800] loss: 0.412
[192,   900] loss: 0.456
[192,  1000] loss: 0.515
[192,  1100] loss: 0.570


 96%|█████████▌| 192/200 [08:54<00:22,  2.76s/it]

[192,  1200] loss: 0.621
Epoch [192], Train Loss: 0.0507
[193,   100] loss: 0.066
[193,   200] loss: 0.130
[193,   300] loss: 0.183
[193,   400] loss: 0.222
[193,   500] loss: 0.273
[193,   600] loss: 0.317
[193,   700] loss: 0.363
[193,   800] loss: 0.404
[193,   900] loss: 0.453
[193,  1000] loss: 0.500
[193,  1100] loss: 0.547


 96%|█████████▋| 193/200 [08:57<00:19,  2.75s/it]

[193,  1200] loss: 0.594
Epoch [193], Train Loss: 0.0490
[194,   100] loss: 0.051
[194,   200] loss: 0.098
[194,   300] loss: 0.154
[194,   400] loss: 0.204
[194,   500] loss: 0.249
[194,   600] loss: 0.283
[194,   700] loss: 0.323
[194,   800] loss: 0.391
[194,   900] loss: 0.449
[194,  1000] loss: 0.496
[194,  1100] loss: 0.539


 97%|█████████▋| 194/200 [09:00<00:16,  2.75s/it]

[194,  1200] loss: 0.606
Epoch [194], Train Loss: 0.0505
[195,   100] loss: 0.054
[195,   200] loss: 0.093
[195,   300] loss: 0.125
[195,   400] loss: 0.189
[195,   500] loss: 0.241
[195,   600] loss: 0.270
[195,   700] loss: 0.327
[195,   800] loss: 0.375
[195,   900] loss: 0.428
[195,  1000] loss: 0.483
[195,  1100] loss: 0.519


 98%|█████████▊| 195/200 [09:02<00:13,  2.75s/it]

[195,  1200] loss: 0.569
Epoch [195], Train Loss: 0.0484
[196,   100] loss: 0.043
[196,   200] loss: 0.088
[196,   300] loss: 0.137
[196,   400] loss: 0.178
[196,   500] loss: 0.230
[196,   600] loss: 0.282
[196,   700] loss: 0.340
[196,   800] loss: 0.396
[196,   900] loss: 0.440
[196,  1000] loss: 0.482
[196,  1100] loss: 0.526
[196,  1200] loss: 0.570


 98%|█████████▊| 196/200 [09:05<00:11,  2.77s/it]

Epoch [196], Train Loss: 0.0478
[197,   100] loss: 0.057
[197,   200] loss: 0.109
[197,   300] loss: 0.144
[197,   400] loss: 0.199
[197,   500] loss: 0.252
[197,   600] loss: 0.316
[197,   700] loss: 0.365
[197,   800] loss: 0.418
[197,   900] loss: 0.459
[197,  1000] loss: 0.501
[197,  1100] loss: 0.545
[197,  1200] loss: 0.590


 98%|█████████▊| 197/200 [09:08<00:08,  2.80s/it]

Epoch [197], Train Loss: 0.0501
[198,   100] loss: 0.066
[198,   200] loss: 0.113
[198,   300] loss: 0.155
[198,   400] loss: 0.215
[198,   500] loss: 0.259
[198,   600] loss: 0.295
[198,   700] loss: 0.343
[198,   800] loss: 0.404
[198,   900] loss: 0.452
[198,  1000] loss: 0.510
[198,  1100] loss: 0.550


 99%|█████████▉| 198/200 [09:11<00:05,  2.79s/it]

[198,  1200] loss: 0.584
Epoch [198], Train Loss: 0.0485
[199,   100] loss: 0.054
[199,   200] loss: 0.100
[199,   300] loss: 0.157
[199,   400] loss: 0.213
[199,   500] loss: 0.267
[199,   600] loss: 0.314
[199,   700] loss: 0.373
[199,   800] loss: 0.421
[199,   900] loss: 0.459
[199,  1000] loss: 0.513
[199,  1100] loss: 0.557


100%|█████████▉| 199/200 [09:14<00:02,  2.79s/it]

[199,  1200] loss: 0.601
Epoch [199], Train Loss: 0.0514
[200,   100] loss: 0.045
[200,   200] loss: 0.089
[200,   300] loss: 0.124
[200,   400] loss: 0.170
[200,   500] loss: 0.209
[200,   600] loss: 0.264
[200,   700] loss: 0.310
[200,   800] loss: 0.357
[200,   900] loss: 0.405
[200,  1000] loss: 0.449
[200,  1100] loss: 0.496


100%|██████████| 200/200 [09:16<00:00,  2.78s/it]

[200,  1200] loss: 0.541
Epoch [200], Train Loss: 0.0451





In [285]:
torch.cuda.empty_cache()

In [287]:
# Testing phase
model.eval()
test_loss = 0.0

with torch.no_grad():
    for inputs, targets in test_loader:
        # inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        
        if isinstance(model, nn.DataParallel):
            model = model.module  # Unwrap from DataParallel
        model = model.to('cpu')
        
        outputs = model(inputs)

        # save metrics
        mae, mape, rmse, rsqr = calculate_metric(outputs.numpy(), targets.numpy())
        print(f"Training average mean absolute error: {mae}")
        print(f"Training average mean absolute percentage error: {mape}")
        print(f"Training average root mean squared error: {rmse}")
        print(f"Training average R2: {rsqr}")

Training average mean absolute error: 0.18328002095222473
Training average mean absolute percentage error: 400.52638053894043
Training average root mean squared error: 0.24551226020856826
Training average R2: 0.473771870136261


In [288]:
# let’s load back in our saved model
# model = MLP()
# model.load_state_dict(torch.load(MODEL_PATH))

RUN 5-fold

In [305]:
torch.cuda.empty_cache()

In [294]:
features = np.concatenate([train_features_np, test_features_np], axis=0)
features

array([[ 0.0765418 , -0.6170648 , -0.54855525, ..., -0.25331536,
        -0.20173293,  0.32425982],
       [-0.42837855,  0.5031553 ,  0.8686651 , ..., -0.25331536,
        -0.20173293,  0.32425982],
       [ 1.2546893 ,  0.31953788,  0.73418504, ..., -0.25331536,
        -0.20173293,  0.32425982],
       ...,
       [ 0.05971107,  1.0929172 ,  0.8371482 , ..., -0.25331536,
        -0.20173293,  0.32425982],
       [-2.2797532 , -0.02818421, -0.5013583 , ..., -0.25331536,
        -0.20173293,  0.32425982],
       [ 1.591303  , -0.22871129, -0.3548853 , ..., -0.25331536,
        -0.20173293,  0.32425982]], dtype=float32)

In [340]:
labels = pd.concat([train_labels, test_labels], axis=0, ignore_index=True)
labels

0       0.082481
1       0.378845
2       0.836149
3       0.987208
4       1.021458
          ...   
1720    0.471411
1721    0.823750
1722    0.241612
1723    0.762054
1724    0.199981
Name: rr1_30, Length: 1725, dtype: float64

In [295]:
features.shape

(1725, 8601)

In [342]:
labels.shape

(1725,)

In [344]:
DEVICE = "cpu"

In [347]:
# Define cross-validation setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)
EPOCHS = 300
val_mae = []
val_mape = []
val_rmse = []
val_rsqr = []


for train_idx, val_idx in kf.split(features):
    # Create training and validation datasets for the current fold
    X_train_fold, X_val_fold = features[train_idx], features[val_idx]
    y_train_fold, y_val_fold = labels.iloc[train_idx], labels.iloc[val_idx]
    
    # scaling features
    X_train_fold[:,:len(non_category_features)] = scaler.fit_transform(X_train_fold[:,:len(non_category_features)])
    X_val_fold[:,:len(non_category_features)] = scaler.transform(X_val_fold[:,:len(non_category_features)])
        
    # Initialize the model for this fold
    model = MLP(d_in=X_train_fold.shape[1], d_layers=MODEL_CONFIG["linear"], dropout=MODEL_CONFIG["dropout"], d_out=1, activation_name=MODEL_CONFIG["activation_name"], negative_slope=MODEL_CONFIG['negative_slope'])
    # model = nn.DataParallel(model, device_ids = DEVICE_LIST)
    model.to(DEVICE)
    
    # define optimizer
    optimizer = getattr(optim, MODEL_CONFIG["optimizer"]["optimizer"])(model.parameters(), **optim_config)
    
    # Define the loss function and optimizer
    criterion = nn.MSELoss()
    
    # Prepare DataLoader for training
    train_dataset = CustomDataset(X_train_fold, y_train_fold.to_numpy())
    val_dataset = CustomDataset(X_val_fold, y_val_fold.to_numpy())
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=val_dataset.features.shape[0], shuffle=True)
        
    # Training of the model.
    model.train()
    running_loss = 0
    for epoch in range(EPOCHS):
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(DEVICE), target.to(DEVICE)

            optimizer.zero_grad()
            output = model(data)
            
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            # print statistics
            running_loss += loss.item() * data.size(0)
            if i % 100 == 99:    # print every 100 mini-batches
                print(f'[{ep + 1}, {i + 1:5d}] loss: {running_loss / 100:.3f}')


    train_loss = running_loss  / len(train_loader.dataset)
    print(f'Epoch [{ep+1}], Train Loss: {train_loss:.4f}')
            # print(f'Epoch [{ep+1}], Train Loss: {train_loss:.4f}')

    # Validation of the model.
    model.eval()
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(val_loader):
            # data, target = data.to(DEVICE), target.to(DEVICE)
            if isinstance(model, nn.DataParallel):
                model = model.module  # Unwrap from DataParallel
            model = model.to('cpu')
            outputs = model(data)
            
            # save metrics
            mae, mape, rmse, rsqr = calculate_metric(outputs.numpy(), target.numpy())
            val_mae.append(mae)
            val_mape.append(mape)
            val_rmse.append(rmse)
            val_rsqr.append(rsqr)
            

Epoch [200], Train Loss: 7334.2948
Epoch [200], Train Loss: 7288.8388
Epoch [200], Train Loss: 7247.8600
Epoch [200], Train Loss: 7344.0239
Epoch [200], Train Loss: 7367.6819


In [348]:
print(f"Test average mean absolute error: {statistics.mean(val_mae)}")
print(f"Test average mean absolute percentage error: {statistics.mean(val_mape)}")
print(f"Test average root mean squared error: {statistics.mean(val_rmse)}")
print(f"Test average R2: {statistics.mean(val_rsqr)}")

Test average mean absolute error: 0.16954633593559265
Test average mean absolute percentage error: 3544.4460558891296
Test average root mean squared error: 0.23661047059014853
Test average R2: 0.48723597526550294


In [349]:
val_rmse

[0.23308723643692,
 0.24839541960561307,
 0.2511300830867783,
 0.22129581841764218,
 0.22914379540378904]