In [71]:
import math
from typing import List
import sys
import os
from pathlib import Path
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.discriminant_analysis import StandardScaler
from sklearn.model_selection import train_test_split
from copy import deepcopy
import torch.optim as optim
from transformers import AutoTokenizer, AutoModel
import optuna
from optuna.trial import TrialState
from sklearn.model_selection import KFold
import statistics
import time

In [72]:
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), "..")))

In [73]:
from utils import calculate_metric

In [74]:
INPUT = "../data/data_removing_na.xlsx"

MODEL_PATH = 'best_resnet_5_fold_emb_model.pth'
BATCH_SIZE = 64
BATCH_NORM = True

DEVICE = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
DEVICE_LIST = [2, 1, 3]

MODEL = 'ProsusAI/finbert' #'AdaptLLM/finance-LLM'

In [75]:
DEVICE

device(type='cuda', index=2)

In [215]:
df = pd.read_excel(INPUT)

In [216]:
df.head()

Unnamed: 0,rr1_30,currency,seniorioty_adj,coupon rate,domicile_country,exchange_country,Industry_sector,Industry_group,Industry_subgroup,event_type,...,PD_55_pd,PD_56_pd,PD_57_pd,PD_58_pd,PD_59_pd,PD_60_pd,DTD,NI_Over_TA,Size,defaulted_in_last_6_months
0,0.259908,USD,Senior Subordinated Unsecured,9.0,United States,United States,Consumer Discretionary,Retail & Whsle - Discretionary,E-Commerce Discretionary,Bankruptcy Filing,...,0.396731,0.397453,0.398148,0.398819,0.399467,0.400092,-0.732815,-0.007137,-0.852484,False
1,0.032729,USD,Senior Subordinated Unsecured,5.75,United States,United States,Health Care,Health Care,Health Care Facilities & Svcs,Default Corp Action,...,0.957454,0.957467,0.95748,0.957492,0.957503,0.957514,-1.666262,-0.000286,-1.186347,False
2,0.9724,USD,Unsecured,5.675,South Korea,South Korea,Consumer Discretionary,Retail & Whsle - Discretionary,Wholesale - Discretionary,Default Corp Action,...,0.568169,0.568693,0.569197,0.569682,0.57015,0.5706,-1.853366,0.000191,1.053677,False
3,1.047416,CHF,Unsecured,0.125,South Korea,South Korea,Consumer Discretionary,Retail & Whsle - Discretionary,Wholesale - Discretionary,Default Corp Action,...,0.568169,0.568693,0.569197,0.569682,0.57015,0.5706,-1.853366,0.000191,1.053677,False
4,0.848872,JPY,Unsecured,1.75,Japan,Japan,Industrials,Industrial Products,Electrical Equipment,Bankruptcy Filing,...,0.130285,0.130688,0.131081,0.131465,0.13184,0.132206,-0.768857,-0.028058,-1.946507,False


In [217]:
df.shape

(1725, 165)

In [218]:
feature_list = df.columns
category_feature_key = ['currency', 'seniorioty', 'domicile_country', 'exchange_country', 'Industry_sector', 'Industry_group', 'Industry_subgroup', 'event_type',
                        'event_type_subcategory_sum', 'defaulted_in_last_6_months', 'defaulted_in_last_5_years']
category_features = [i for i in feature_list if any(sub in i for sub in category_feature_key)]
non_category_features = [i for i in feature_list if i not in category_features]

print(len(category_features))
print(len(non_category_features))

11
154


In [219]:
non_category_features.remove('rr1_30')

In [220]:
non_category_features

['coupon rate',
 'SP500 MD',
 'Average daily 1-year SP500 return',
 'Ratio to MA',
 'US Corporate Bond Yield Spread',
 'US Corporate Bond Yield Spread(3-5 year)',
 'US Corporate Bond Yield Spread(5-7 year)',
 'US Corporate Bond Yield Spread(7-10 year)',
 'US Corporate Bond Yield Spread(10+ year)',
 'US Generic Govt 3 Month Yield',
 'US Generic Govt 6 Month Yield',
 'US Generic Govt 12 Month Yield',
 'US Generic Govt 2 Year Yield',
 'US Generic Govt 3 Year Yield',
 'US Generic Govt 5 Year Yield',
 'US Generic Govt 7 Year Yield',
 'US Generic Govt 10 Year Yield',
 'marketcap',
 'sector_domicile_dtd',
 'sector_exchange_dtd',
 'sector_dtd',
 'PD_1_domicile_sector',
 'PD_3_domicile_sector',
 'PD_12_domicile_sector',
 'PD_1_domicile_subsec',
 'PD_3_domicile_subsec',
 'PD_12_domicile_subsec',
 'PD_1_exch_sector',
 'PD_3_exch_sector',
 'PD_12_exch_sector',
 'PD_1_exch_subsector',
 'PD_3_exch_subsector',
 'PD_12_exch_subsector',
 'PD_1_global_sector',
 'PD_3_global_sector',
 'PD_12_global_secto

In [221]:
category_features

['currency',
 'seniorioty_adj',
 'domicile_country',
 'exchange_country',
 'Industry_sector',
 'Industry_group',
 'Industry_subgroup',
 'event_type',
 'event_type_subcategory_sum',
 'defaulted_in_last_5_years',
 'defaulted_in_last_6_months']

In [222]:
for i in category_features:
    if i == "seniorioty_adj":
        df[i] = df[i] + " Bond"
    elif i == "domicile_country":
        df[i] = "domicile country: " + df[i]
    elif i == "exchange_country":
        df[i] = "exchange_country: " + df[i]
    elif i == "event_type":
        df[i] = "default reason: " + df[i]
    elif i == "defaulted_in_last_6_months":
        df[i] = df[i].apply(lambda x: "The bond has defaulted in the last 6 months" if x == True else "The bond has not defaulted in the last 6 months")
    elif i == "defaulted_in_last_5_years":
        df[i] = df[i].apply(lambda x: "The bond has defaulted in the last 5 years" if x == True else "The bond has not defaulted in the last 5 years")

In [223]:
df[category_features]

Unnamed: 0,currency,seniorioty_adj,domicile_country,exchange_country,Industry_sector,Industry_group,Industry_subgroup,event_type,event_type_subcategory_sum,defaulted_in_last_5_years,defaulted_in_last_6_months
0,USD,Senior Subordinated Unsecured Bond,domicile country: United States,exchange_country: United States,Consumer Discretionary,Retail & Whsle - Discretionary,E-Commerce Discretionary,default reason: Bankruptcy Filing,Debt Restructuring,The bond has not defaulted in the last 5 years,The bond has not defaulted in the last 6 months
1,USD,Senior Subordinated Unsecured Bond,domicile country: United States,exchange_country: United States,Health Care,Health Care,Health Care Facilities & Svcs,default reason: Default Corp Action,Missing Coupon payment only,The bond has not defaulted in the last 5 years,The bond has not defaulted in the last 6 months
2,USD,Unsecured Bond,domicile country: South Korea,exchange_country: South Korea,Consumer Discretionary,Retail & Whsle - Discretionary,Wholesale - Discretionary,default reason: Default Corp Action,Missing Interest payment,The bond has not defaulted in the last 5 years,The bond has not defaulted in the last 6 months
3,CHF,Unsecured Bond,domicile country: South Korea,exchange_country: South Korea,Consumer Discretionary,Retail & Whsle - Discretionary,Wholesale - Discretionary,default reason: Default Corp Action,Missing Interest payment,The bond has not defaulted in the last 5 years,The bond has not defaulted in the last 6 months
4,JPY,Unsecured Bond,domicile country: Japan,exchange_country: Japan,Industrials,Industrial Products,Electrical Equipment,default reason: Bankruptcy Filing,Rehabilitation,The bond has not defaulted in the last 5 years,The bond has not defaulted in the last 6 months
...,...,...,...,...,...,...,...,...,...,...,...
1720,MYR,Senior Secured Bond,domicile country: Malaysia,exchange_country: Malaysia,Consumer Discretionary,Consumer Discretionary Products,Automotive,default reason: Bankruptcy Filing,Others,The bond has not defaulted in the last 5 years,The bond has not defaulted in the last 6 months
1721,MYR,Senior Secured Bond,domicile country: Malaysia,exchange_country: Malaysia,Consumer Discretionary,Consumer Discretionary Products,Automotive,default reason: Bankruptcy Filing,Others,The bond has not defaulted in the last 5 years,The bond has not defaulted in the last 6 months
1722,MYR,Senior Secured Bond,domicile country: Malaysia,exchange_country: Malaysia,Consumer Discretionary,Consumer Discretionary Products,Automotive,default reason: Bankruptcy Filing,Others,The bond has not defaulted in the last 5 years,The bond has not defaulted in the last 6 months
1723,MYR,Senior Secured Bond,domicile country: Malaysia,exchange_country: Malaysia,Consumer Discretionary,Consumer Discretionary Products,Automotive,default reason: Bankruptcy Filing,Others,The bond has not defaulted in the last 5 years,The bond has not defaulted in the last 6 months


In [224]:
labels = df["rr1_30"]
features = df.drop(columns=["rr1_30"])

In [225]:
features.shape

(1725, 164)

In [226]:
# split data into training and val set
test_size = 0.25
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=test_size, random_state=42)

print('Training Features Shape:', train_features.shape)
print('Training Labels Shape:', train_labels.shape)
print('Val Features Shape:', test_features.shape)
print('Val Labels Shape:', test_labels.shape)

Training Features Shape: (1293, 164)
Training Labels Shape: (1293,)
Val Features Shape: (432, 164)
Val Labels Shape: (432,)


In [228]:
train_features.head()

Unnamed: 0,currency,seniorioty_adj,coupon rate,domicile_country,exchange_country,Industry_sector,Industry_group,Industry_subgroup,event_type,event_type_subcategory_sum,...,PD_55_pd,PD_56_pd,PD_57_pd,PD_58_pd,PD_59_pd,PD_60_pd,DTD,NI_Over_TA,Size,defaulted_in_last_6_months
755,USD,Senior Unsecured Bond,7.5,domicile country: United States,exchange_country: United States,Energy,Oil & Gas,Oil & Gas Producers,default reason: Default Corp Action,Missing Coupon payment only,...,0.691011,0.691382,0.691738,0.69208,0.692408,0.692725,-1.084433,-0.052027,-2.074964,The bond has not defaulted in the last 6 months
588,USD,Unsecured Bond,6.0,domicile country: Indonesia,exchange_country: Indonesia,Materials,Materials,Metals & Mining,default reason: Default Corp Action,Debt Restructuring,...,0.183801,0.184996,0.186167,0.187313,0.188437,0.189539,-0.540409,0.017209,0.864692,The bond has not defaulted in the last 6 months
585,USD,Senior Secured Bond,11.0,domicile country: United States,exchange_country: United States,Materials,Materials,Metals & Mining,default reason: Default Corp Action,Debt Restructuring,...,0.278774,0.280216,0.281615,0.282972,0.28429,0.285571,0.754647,-0.010395,-0.342209,The bond has not defaulted in the last 6 months
1329,USD,Senior Secured Bond,9.125,domicile country: United States,exchange_country: United States,Energy,Oil & Gas,Oil & Gas Services & Equip,default reason: Default Corp Action,Missing Coupon & principal payment,...,0.190375,0.191471,0.192544,0.193594,0.194622,0.195628,-0.24208,-0.022618,-2.808528,The bond has not defaulted in the last 6 months
973,USD,Senior Secured Bond,9.25,domicile country: United States,exchange_country: United States,Energy,Oil & Gas,Oil & Gas Producers,default reason: Default Corp Action,Others,...,0.149501,0.15093,0.152327,0.153692,0.155027,0.156332,1.138686,3.3e-05,-0.085154,The bond has not defaulted in the last 6 months


In [229]:
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModel.from_pretrained(MODEL)
model.eval()

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False

In [230]:
# empty cache first
torch.cuda.empty_cache()

In [231]:
model = nn.DataParallel(model, device_ids = DEVICE_LIST)
model.to(DEVICE)

DataParallel(
  (module): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwis

In [232]:
# Function to create batches
def create_batches(inputs, batch_size):
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]
    
    for i in range(0, len(input_ids), batch_size):
        input_batch = {
            "input_ids": input_ids[i:i + batch_size].to(DEVICE),
            "attention_mask": attention_mask[i:i + batch_size].to(DEVICE),
        }
        yield input_batch

In [233]:
# Convert the non-categorical columns to tensors
train_features_np = train_features[non_category_features].values.astype('float32')
train_features_tensors = torch.tensor(train_features_np, dtype=torch.float32)

test_features_np = test_features[non_category_features].values.astype('float32')
test_features_tensors = torch.tensor(test_features_np, dtype=torch.float32)

In [234]:
train_features_tensors.shape

torch.Size([1293, 153])

In [235]:
batch_size = 2  # Adjust batch size based on your GPU memory

for i in category_features:
    # empty cache first
    torch.cuda.empty_cache()
    
    uniqueCats = list(set(df[i]))
    
    inputs = tokenizer(uniqueCats, return_tensors="pt", padding=True, truncation=True)
    inputs = {key: value.to(DEVICE) for key, value in inputs.items()}
    
    # create input batches
    batches = list(create_batches(inputs, batch_size))
    
    # Get the embeddings
    all_embeddings = []

    for batch in batches:
        with torch.no_grad():
            outputs = model(**batch)
            embeddings = outputs.last_hidden_state.mean(dim=1)

            # The embeddings are in the shape [batch_size, sequence_length, hidden_size]
            all_embeddings.append(embeddings)
            
    
    cpu_embeddings = torch.cat(all_embeddings, dim=0).to("cpu")
    catDict = {key: value for key, value in zip(uniqueCats, cpu_embeddings)}
    
    #### map to train_features
    train_features[i] = train_features[i].map(catDict)

    # Convert the 'category_tensor' column to a list of tensors and stack them
    train_category_tensors = torch.stack(train_features[i].tolist())

    # Concatenate the tensors along the second dimension (columns)
    train_features_tensors = torch.cat((train_features_tensors, train_category_tensors), dim=1)
    
    print(f"Shape of train_category_tensors: {train_category_tensors.shape} for feature {i}")
    print(f"Shape of train_feature_tensors: {train_features_tensors.shape}")
    
    #### map to test_features
    test_features[i] = test_features[i].map(catDict)

    # Convert the 'category_tensor' column to a list of tensors and stack them
    test_category_tensors = torch.stack(test_features[i].tolist())

    # Concatenate the tensors along the second dimension (columns)
    test_features_tensors = torch.cat((test_features_tensors, test_category_tensors), dim=1)
    
    print(f"Shape of test_category_tensors: {test_category_tensors.shape} for feature {i}")
    print(f"Shape of test_feature_tensors: {test_features_tensors.shape}")

Shape of train_category_tensors: torch.Size([1293, 768]) for feature currency
Shape of train_feature_tensors: torch.Size([1293, 921])
Shape of test_category_tensors: torch.Size([432, 768]) for feature currency
Shape of test_feature_tensors: torch.Size([432, 921])
Shape of train_category_tensors: torch.Size([1293, 768]) for feature seniorioty_adj
Shape of train_feature_tensors: torch.Size([1293, 1689])
Shape of test_category_tensors: torch.Size([432, 768]) for feature seniorioty_adj
Shape of test_feature_tensors: torch.Size([432, 1689])
Shape of train_category_tensors: torch.Size([1293, 768]) for feature domicile_country
Shape of train_feature_tensors: torch.Size([1293, 2457])
Shape of test_category_tensors: torch.Size([432, 768]) for feature domicile_country
Shape of test_feature_tensors: torch.Size([432, 2457])
Shape of train_category_tensors: torch.Size([1293, 768]) for feature exchange_country
Shape of train_feature_tensors: torch.Size([1293, 3225])
Shape of test_category_tensors: t

In [103]:
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        feature = self.features[idx]
        label = self.labels[idx]
        return torch.tensor(feature, dtype=torch.float32), torch.tensor(label, dtype=torch.float32)

TUNING PARAMETERS

In [121]:
# Define the ResNetBlock
class ResNetBlock(nn.Module):
    def __init__(self, in_features: int, out_features: int, dropout=0.5, batch_norm = True, activation_name='ReLU', negative_slope=0.01):
        super(ResNetBlock, self).__init__()
        if batch_norm:
            self.batch_norm = nn.BatchNorm1d(in_features)
        else:
            self.batch_norm = nn.Identity()

        self.linear = nn.Linear(in_features, out_features)
        self.relu = getattr(nn, activation_name)()
        
        if self.relu == nn.LeakyReLU():
            self.relu = self.relu(negative_slope)
            
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(out_features, in_features)

    def forward(self, x):
        resisual = x
        out = self.batch_norm(resisual)
        out = self.linear(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.linear2(out)
        out = self.dropout(out)
        out += resisual # Add the input tensor to the output
        return out

# Define the Prediction layer
class Prediction(nn.Module):
    def __init__(self, in_features: int, out_features: int, batchNorm = True):
        super(Prediction, self).__init__()
        if batchNorm:
            self.batch_norm = nn.BatchNorm1d(in_features)
        else:
            self.batch_norm = nn.Identity()

        self.relu = nn.ReLU()
        self.linear = nn.Linear(in_features, out_features)

    def forward(self, x):
        x = self.batch_norm(x)
        x = self.relu(x)
        x = self.linear(x)
        return x

# Define the ResNet model
class ResNet(nn.Module):
    def __init__(self, input_dim: int, dropout: float, num_blocks: int, out_features_list: List[int], batch_norm = True, activation_name='ReLU', negative_slope=0.01):
        super(ResNet, self).__init__()
        self.layers = nn.ModuleList()
        
        # First linear layer to convert input_dim to the first out_features
        self.layers.append(nn.Linear(input_dim, out_features_list[0]))
        
        # Add the ResNet blocks
        for i in range(num_blocks):
            if i > 0:
                self.layers.append(ResNetBlock(out_features_list[0], out_features_list[i], dropout, batch_norm = batch_norm, activation_name=activation_name, negative_slope=negative_slope))
        
        # Add the prediction layer
        self.prediction = Prediction(out_features_list[0], 1, batchNorm = batch_norm)

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        x = self.prediction(x)
        return x

In [118]:
# define loss function
criterion = nn.MSELoss()

In [110]:
EPOCHS = 300

In [111]:
# set random seed
torch.manual_seed(42)
torch.cuda.manual_seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [238]:
train_features_np = train_features_tensors.numpy()
test_features_np = test_features_tensors.numpy()

In [114]:
scaler = StandardScaler()

In [147]:
def objective(trial):    
    # Generate the model.
    num_blocks = trial.suggest_int('num_blocks', 1, 5)
    
    out_features_list = []
    for i in range(num_blocks):
        out_features = trial.suggest_int(f'out_features_{i}', 10, 128)
        out_features_list.append(out_features)

    p = trial.suggest_float("dropout", 0, 0.5)
    # activation_name = trial.suggest_categorical(f'activation_{i}', ['ReLU', 'Tanh', 'LeakyReLU'])
    # negative_slope = 0.01
    # if activation_name == 'LeakyReLU':
    #     negative_slope = trial.suggest_float(f"negative_slope_{i}", 0.01, 1)
        

    # model = ResNet(input_dim=train_features.shape[1], num_blocks=num_blocks, dropout=p, out_features_list=out_features_list, batch_norm=BATCH_NORM).to(DEVICE)

    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    weight_decay = trial.suggest_float("weight_decay", 1e-9, 1e-1, log=True)


    # Define cross-validation setup
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    val_losses = []

    for train_idx, val_idx in kf.split(train_features_np):
        # Create training and validation datasets for the current fold
        X_train_fold, X_val_fold = train_features_np[train_idx], train_features_np[val_idx]
        y_train_fold, y_val_fold = train_labels.values.reshape(-1, 1)[train_idx], train_labels.values.reshape(-1, 1)[val_idx]
        
        # scaling features
        X_train_fold[:, :len(non_category_features)] = scaler.fit_transform(X_train_fold[:, :len(non_category_features)])
        X_val_fold[:, :len(non_category_features)] = scaler.transform(X_val_fold[:, :len(non_category_features)])
            
        # Initialize the model for this fold
        model = ResNet(input_dim=X_train_fold.shape[1], num_blocks=num_blocks, dropout=p, out_features_list=out_features_list, batch_norm=BATCH_NORM) #activation_name=activation_name, negative_slope=negative_slope)
        model = nn.DataParallel(model, device_ids = DEVICE_LIST)
        model.to(DEVICE)
        
        # define optimizer
        if optimizer_name == "Adam":
         optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr, weight_decay=weight_decay)
        else:
            momentum = trial.suggest_float("momentum", 1e-9, 0.95, log=True)
            optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr, weight_decay=weight_decay, momentum=momentum)
        
        # Define the loss function and optimizer
        criterion = nn.MSELoss()
        
        # Prepare DataLoader for training
        train_dataset = CustomDataset(X_train_fold, y_train_fold)
        val_dataset = CustomDataset(X_val_fold, y_val_fold)
        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)
        
        # print(y_val_fold.shape)
            
        # Training of the model.
        model.train()
        for epoch in range(EPOCHS):
            for batch_idx, (data, target) in enumerate(train_loader):
                data, target = data.to(DEVICE), target.to(DEVICE)

                optimizer.zero_grad()
                output = model(data)
                # print('shape', output.shape, target.shape)
                loss = criterion(output, target)
                loss.backward()
                optimizer.step()

        # Validation of the model.
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(val_loader):
                data, target = data.to(DEVICE), target.to(DEVICE)
                output = model(data)
                val_loss = criterion(output, target).item()
                val_losses.append(val_loss**0.5) #rmse

        trial.report(val_loss, epoch)

    # Return the average validation loss across all folds
    return np.mean(val_losses)

In [148]:
# 3. Create a study object and optimize the objective function.
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=100, timeout=600)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))


[I 2024-08-30 03:38:02,172] A new study created in memory with name: no-name-6a6083d6-91c4-4ad8-87e8-8949a2260409
[I 2024-08-30 03:43:31,739] Trial 0 finished with value: 0.2506820472087633 and parameters: {'num_blocks': 4, 'out_features_0': 58, 'out_features_1': 11, 'out_features_2': 123, 'out_features_3': 86, 'dropout': 0.38721048119932927, 'optimizer': 'Adam', 'lr': 8.69746101373016e-05, 'weight_decay': 0.0008572312583192992}. Best is trial 0 with value: 0.2506820472087633.
[I 2024-08-30 03:48:38,122] Trial 1 finished with value: 0.31909633573849966 and parameters: {'num_blocks': 3, 'out_features_0': 32, 'out_features_1': 36, 'out_features_2': 111, 'dropout': 0.18793793564346756, 'optimizer': 'Adam', 'lr': 0.011698051307734764, 'weight_decay': 0.03508778767544126}. Best is trial 0 with value: 0.2506820472087633.


Study statistics: 
  Number of finished trials:  2
  Number of pruned trials:  0
  Number of complete trials:  2
Best trial:
  Value:  0.2506820472087633
  Params: 
    num_blocks: 4
    out_features_0: 58
    out_features_1: 11
    out_features_2: 123
    out_features_3: 86
    dropout: 0.38721048119932927
    optimizer: Adam
    lr: 8.69746101373016e-05
    weight_decay: 0.0008572312583192992


In [149]:
trial.params

{'num_blocks': 4,
 'out_features_0': 58,
 'out_features_1': 11,
 'out_features_2': 123,
 'out_features_3': 86,
 'dropout': 0.38721048119932927,
 'optimizer': 'Adam',
 'lr': 8.69746101373016e-05,
 'weight_decay': 0.0008572312583192992}

In [150]:
MODEL_CONFIG = {"out_features_list": [], "dropout": 0, "optimizer": {}, "num_blocks": 0}

for key, value in trial.params.items():
    if "out_features" in key:
        MODEL_CONFIG["out_features_list"].append(value)
    elif "dropout" in key:
        MODEL_CONFIG["dropout"] = value
    elif "negative_slope" in key:
        MODEL_CONFIG["negative_slope"] = value
    elif "activation" in key:
        MODEL_CONFIG["activation_name"] = value
    elif "num_blocks" in key:
        MODEL_CONFIG["num_blocks"] = value
    elif "batch_size" in key:
        BATCH_SIZE = int(value)
    else:
        MODEL_CONFIG["optimizer"][key] = value

In [151]:
MODEL_CONFIG

{'out_features_list': [58, 11, 123, 86],
 'dropout': 0.38721048119932927,
 'optimizer': {'optimizer': 'Adam',
  'lr': 8.69746101373016e-05,
  'weight_decay': 0.0008572312583192992},
 'num_blocks': 4}

RUNNING THE MODEL

In [None]:
# empty cache first
torch.cuda.empty_cache()

In [241]:
scaler = StandardScaler()

In [242]:
# scaling features
train_features_np[:,:len(non_category_features)] = scaler.fit_transform(train_features_np[:,:len(non_category_features)])
test_features_np[:,:len(non_category_features)] = scaler.transform(test_features_np[:,:len(non_category_features)])
            
# Create dataset instances
train_dataset = CustomDataset(train_features_np, train_labels.values.reshape(-1, 1))
test_dataset = CustomDataset(test_features_np, test_labels.values.reshape(-1, 1))

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=test_features_np.shape[0], shuffle=True)

In [245]:
m_config = deepcopy(MODEL_CONFIG)
del m_config["optimizer"]

In [246]:
m_config

{'out_features_list': [58, 11, 123, 86],
 'dropout': 0.38721048119932927,
 'num_blocks': 4}

In [263]:
model = ResNet(input_dim=train_features_np.shape[1], batch_norm=BATCH_NORM, **m_config)
model

ResNet(
  (layers): ModuleList(
    (0): Linear(in_features=8601, out_features=58, bias=True)
    (1): ResNetBlock(
      (batch_norm): BatchNorm1d(58, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (linear): Linear(in_features=58, out_features=11, bias=True)
      (relu): ReLU()
      (dropout): Dropout(p=0.38721048119932927, inplace=False)
      (linear2): Linear(in_features=11, out_features=58, bias=True)
    )
    (2): ResNetBlock(
      (batch_norm): BatchNorm1d(58, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (linear): Linear(in_features=58, out_features=123, bias=True)
      (relu): ReLU()
      (dropout): Dropout(p=0.38721048119932927, inplace=False)
      (linear2): Linear(in_features=123, out_features=58, bias=True)
    )
    (3): ResNetBlock(
      (batch_norm): BatchNorm1d(58, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (linear): Linear(in_features=58, out_features=86, bias=True)
      (relu): ReLU()


In [264]:
model = nn.DataParallel(model, device_ids = DEVICE_LIST)
model.to(DEVICE)

DataParallel(
  (module): ResNet(
    (layers): ModuleList(
      (0): Linear(in_features=8601, out_features=58, bias=True)
      (1): ResNetBlock(
        (batch_norm): BatchNorm1d(58, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (linear): Linear(in_features=58, out_features=11, bias=True)
        (relu): ReLU()
        (dropout): Dropout(p=0.38721048119932927, inplace=False)
        (linear2): Linear(in_features=11, out_features=58, bias=True)
      )
      (2): ResNetBlock(
        (batch_norm): BatchNorm1d(58, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (linear): Linear(in_features=58, out_features=123, bias=True)
        (relu): ReLU()
        (dropout): Dropout(p=0.38721048119932927, inplace=False)
        (linear2): Linear(in_features=123, out_features=58, bias=True)
      )
      (3): ResNetBlock(
        (batch_norm): BatchNorm1d(58, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (linear): Linear(i

In [265]:
# define optimizer
optim_config = deepcopy(MODEL_CONFIG["optimizer"])
del optim_config["optimizer"]

optimizer = getattr(optim, MODEL_CONFIG["optimizer"]["optimizer"])(model.parameters(), **optim_config)
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 8.69746101373016e-05
    maximize: False
    weight_decay: 0.0008572312583192992
)

In [266]:

EPOCH = 500
criterion = nn.MSELoss()
start_time = time.time()

for ep in tqdm(range(EPOCH)):

    model.train()
    running_loss = 0.0
    for i, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
        
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)

        loss = criterion(outputs, targets)
        
        loss.backward()

        optimizer.step()

        # print statistics
        running_loss += loss.item() * inputs.size(0)
        if i % 100 == 99:    # print every 100 mini-batches
            print(f'[{ep + 1}, {i + 1:5d}] loss: {running_loss / 100:.3f}')


    train_loss = running_loss  / len(train_loader.dataset)
    print(f'Epoch [{ep+1}], Train Loss: {train_loss:.4f}')
    
# print out training time
elapsed_time = time.time() - start_time
print(f"Training time: {elapsed_time:.3f} seconds")

# save the best model
torch.save(model.state_dict(), MODEL_PATH)

  0%|          | 1/500 [00:00<02:10,  3.82it/s]

Epoch [1], Train Loss: 0.8174


  0%|          | 2/500 [00:00<02:27,  3.38it/s]

Epoch [2], Train Loss: 0.7034


  1%|          | 3/500 [00:00<02:26,  3.39it/s]

Epoch [3], Train Loss: 0.6229


  1%|          | 4/500 [00:01<02:22,  3.49it/s]

Epoch [4], Train Loss: 0.5434


  1%|          | 5/500 [00:01<02:20,  3.51it/s]

Epoch [5], Train Loss: 0.4746


  1%|          | 6/500 [00:01<02:20,  3.52it/s]

Epoch [6], Train Loss: 0.4233


  1%|▏         | 7/500 [00:01<02:17,  3.59it/s]

Epoch [7], Train Loss: 0.3785


  2%|▏         | 8/500 [00:02<02:18,  3.55it/s]

Epoch [8], Train Loss: 0.3369


  2%|▏         | 9/500 [00:02<02:24,  3.40it/s]

Epoch [9], Train Loss: 0.2978


  2%|▏         | 10/500 [00:02<02:26,  3.35it/s]

Epoch [10], Train Loss: 0.2650


  2%|▏         | 11/500 [00:03<02:20,  3.48it/s]

Epoch [11], Train Loss: 0.2361


  2%|▏         | 12/500 [00:03<02:31,  3.23it/s]

Epoch [12], Train Loss: 0.2134


  3%|▎         | 13/500 [00:03<02:35,  3.13it/s]

Epoch [13], Train Loss: 0.1926


  3%|▎         | 14/500 [00:04<02:23,  3.39it/s]

Epoch [14], Train Loss: 0.1732


  3%|▎         | 15/500 [00:04<02:30,  3.23it/s]

Epoch [15], Train Loss: 0.1618


  3%|▎         | 16/500 [00:04<02:24,  3.35it/s]

Epoch [16], Train Loss: 0.1450


  3%|▎         | 17/500 [00:04<02:18,  3.48it/s]

Epoch [17], Train Loss: 0.1487


  4%|▎         | 18/500 [00:05<02:17,  3.51it/s]

Epoch [18], Train Loss: 0.1295


  4%|▍         | 19/500 [00:05<02:15,  3.54it/s]

Epoch [19], Train Loss: 0.1339


  4%|▍         | 20/500 [00:05<02:20,  3.42it/s]

Epoch [20], Train Loss: 0.1224


  4%|▍         | 21/500 [00:06<02:13,  3.60it/s]

Epoch [21], Train Loss: 0.1129


  4%|▍         | 22/500 [00:06<02:27,  3.25it/s]

Epoch [22], Train Loss: 0.1152


  5%|▍         | 23/500 [00:06<02:22,  3.35it/s]

Epoch [23], Train Loss: 0.1111


  5%|▍         | 24/500 [00:07<02:16,  3.48it/s]

Epoch [24], Train Loss: 0.1003


  5%|▌         | 25/500 [00:07<02:11,  3.62it/s]

Epoch [25], Train Loss: 0.1019


  5%|▌         | 26/500 [00:07<02:15,  3.51it/s]

Epoch [26], Train Loss: 0.0990


  5%|▌         | 27/500 [00:07<02:14,  3.52it/s]

Epoch [27], Train Loss: 0.0991


  6%|▌         | 28/500 [00:08<02:07,  3.71it/s]

Epoch [28], Train Loss: 0.0936


  6%|▌         | 29/500 [00:08<02:09,  3.65it/s]

Epoch [29], Train Loss: 0.0884


  6%|▌         | 30/500 [00:08<02:14,  3.49it/s]

Epoch [30], Train Loss: 0.0925


  6%|▌         | 31/500 [00:08<02:09,  3.63it/s]

Epoch [31], Train Loss: 0.0866


  6%|▋         | 32/500 [00:09<02:13,  3.51it/s]

Epoch [32], Train Loss: 0.0916


  7%|▋         | 33/500 [00:09<02:14,  3.46it/s]

Epoch [33], Train Loss: 0.0855


  7%|▋         | 34/500 [00:09<02:14,  3.46it/s]

Epoch [34], Train Loss: 0.0811


  7%|▋         | 35/500 [00:10<02:09,  3.58it/s]

Epoch [35], Train Loss: 0.0855


  7%|▋         | 36/500 [00:10<02:05,  3.71it/s]

Epoch [36], Train Loss: 0.0814


  7%|▋         | 37/500 [00:10<02:13,  3.46it/s]

Epoch [37], Train Loss: 0.0828


  8%|▊         | 38/500 [00:10<02:07,  3.62it/s]

Epoch [38], Train Loss: 0.0758


  8%|▊         | 39/500 [00:11<02:19,  3.31it/s]

Epoch [39], Train Loss: 0.0777


  8%|▊         | 40/500 [00:11<02:21,  3.25it/s]

Epoch [40], Train Loss: 0.0802


  8%|▊         | 41/500 [00:11<02:19,  3.29it/s]

Epoch [41], Train Loss: 0.0750


  8%|▊         | 42/500 [00:12<02:21,  3.24it/s]

Epoch [42], Train Loss: 0.0761


  9%|▊         | 43/500 [00:12<02:20,  3.26it/s]

Epoch [43], Train Loss: 0.0768


  9%|▉         | 44/500 [00:12<02:14,  3.39it/s]

Epoch [44], Train Loss: 0.0738


  9%|▉         | 45/500 [00:13<02:08,  3.54it/s]

Epoch [45], Train Loss: 0.0740


  9%|▉         | 46/500 [00:13<02:12,  3.42it/s]

Epoch [46], Train Loss: 0.0700


  9%|▉         | 47/500 [00:13<02:11,  3.44it/s]

Epoch [47], Train Loss: 0.0765


 10%|▉         | 48/500 [00:13<02:08,  3.52it/s]

Epoch [48], Train Loss: 0.0750


 10%|▉         | 49/500 [00:14<02:08,  3.50it/s]

Epoch [49], Train Loss: 0.0766


 10%|█         | 50/500 [00:14<02:04,  3.60it/s]

Epoch [50], Train Loss: 0.0746


 10%|█         | 51/500 [00:14<02:05,  3.57it/s]

Epoch [51], Train Loss: 0.0694


 10%|█         | 52/500 [00:15<02:05,  3.57it/s]

Epoch [52], Train Loss: 0.0682


 11%|█         | 53/500 [00:15<02:05,  3.56it/s]

Epoch [53], Train Loss: 0.0712


 11%|█         | 54/500 [00:15<02:01,  3.67it/s]

Epoch [54], Train Loss: 0.0698


 11%|█         | 55/500 [00:15<01:59,  3.71it/s]

Epoch [55], Train Loss: 0.0711


 11%|█         | 56/500 [00:16<01:57,  3.77it/s]

Epoch [56], Train Loss: 0.0681


 11%|█▏        | 57/500 [00:16<02:00,  3.66it/s]

Epoch [57], Train Loss: 0.0690


 12%|█▏        | 58/500 [00:16<01:57,  3.75it/s]

Epoch [58], Train Loss: 0.0675


 12%|█▏        | 59/500 [00:16<02:00,  3.66it/s]

Epoch [59], Train Loss: 0.0678


 12%|█▏        | 60/500 [00:17<02:02,  3.60it/s]

Epoch [60], Train Loss: 0.0664


 12%|█▏        | 61/500 [00:17<02:00,  3.64it/s]

Epoch [61], Train Loss: 0.0684


 12%|█▏        | 62/500 [00:17<01:59,  3.67it/s]

Epoch [62], Train Loss: 0.0647


 13%|█▎        | 63/500 [00:17<01:58,  3.68it/s]

Epoch [63], Train Loss: 0.0640


 13%|█▎        | 64/500 [00:18<02:05,  3.47it/s]

Epoch [64], Train Loss: 0.0646


 13%|█▎        | 65/500 [00:18<02:04,  3.49it/s]

Epoch [65], Train Loss: 0.0625


 13%|█▎        | 66/500 [00:18<02:03,  3.51it/s]

Epoch [66], Train Loss: 0.0642


 13%|█▎        | 67/500 [00:19<02:03,  3.50it/s]

Epoch [67], Train Loss: 0.0649


 14%|█▎        | 68/500 [00:19<02:03,  3.50it/s]

Epoch [68], Train Loss: 0.0615


 14%|█▍        | 69/500 [00:19<02:02,  3.53it/s]

Epoch [69], Train Loss: 0.0639


 14%|█▍        | 70/500 [00:20<01:59,  3.59it/s]

Epoch [70], Train Loss: 0.0620


 14%|█▍        | 71/500 [00:20<01:58,  3.63it/s]

Epoch [71], Train Loss: 0.0600


 14%|█▍        | 72/500 [00:20<02:00,  3.56it/s]

Epoch [72], Train Loss: 0.0612


 15%|█▍        | 73/500 [00:20<02:01,  3.51it/s]

Epoch [73], Train Loss: 0.0614


 15%|█▍        | 74/500 [00:21<02:02,  3.48it/s]

Epoch [74], Train Loss: 0.0603


 15%|█▌        | 75/500 [00:21<01:57,  3.63it/s]

Epoch [75], Train Loss: 0.0599


 15%|█▌        | 76/500 [00:21<01:57,  3.62it/s]

Epoch [76], Train Loss: 0.0575


 15%|█▌        | 77/500 [00:21<02:00,  3.51it/s]

Epoch [77], Train Loss: 0.0588


 16%|█▌        | 78/500 [00:22<01:57,  3.59it/s]

Epoch [78], Train Loss: 0.0608


 16%|█▌        | 79/500 [00:22<01:57,  3.59it/s]

Epoch [79], Train Loss: 0.0594


 16%|█▌        | 80/500 [00:22<01:56,  3.59it/s]

Epoch [80], Train Loss: 0.0576


 16%|█▌        | 81/500 [00:23<01:53,  3.69it/s]

Epoch [81], Train Loss: 0.0589


 16%|█▋        | 82/500 [00:23<01:59,  3.50it/s]

Epoch [82], Train Loss: 0.0578


 17%|█▋        | 83/500 [00:23<01:56,  3.58it/s]

Epoch [83], Train Loss: 0.0578


 17%|█▋        | 84/500 [00:23<02:00,  3.46it/s]

Epoch [84], Train Loss: 0.0562


 17%|█▋        | 85/500 [00:24<02:05,  3.30it/s]

Epoch [85], Train Loss: 0.0565


 17%|█▋        | 86/500 [00:24<02:05,  3.30it/s]

Epoch [86], Train Loss: 0.0559


 17%|█▋        | 87/500 [00:24<02:06,  3.27it/s]

Epoch [87], Train Loss: 0.0546


 18%|█▊        | 88/500 [00:25<02:07,  3.24it/s]

Epoch [88], Train Loss: 0.0566


 18%|█▊        | 89/500 [00:25<02:02,  3.36it/s]

Epoch [89], Train Loss: 0.0558


 18%|█▊        | 90/500 [00:25<01:57,  3.48it/s]

Epoch [90], Train Loss: 0.0551


 18%|█▊        | 91/500 [00:26<01:55,  3.54it/s]

Epoch [91], Train Loss: 0.0548


 18%|█▊        | 92/500 [00:26<01:58,  3.43it/s]

Epoch [92], Train Loss: 0.0527


 19%|█▊        | 93/500 [00:26<01:53,  3.58it/s]

Epoch [93], Train Loss: 0.0535


 19%|█▉        | 94/500 [00:26<01:49,  3.70it/s]

Epoch [94], Train Loss: 0.0555


 19%|█▉        | 95/500 [00:27<02:03,  3.28it/s]

Epoch [95], Train Loss: 0.0539


 19%|█▉        | 96/500 [00:27<01:59,  3.38it/s]

Epoch [96], Train Loss: 0.0546


 19%|█▉        | 97/500 [00:27<01:56,  3.45it/s]

Epoch [97], Train Loss: 0.0542


 20%|█▉        | 98/500 [00:28<01:56,  3.45it/s]

Epoch [98], Train Loss: 0.0502


 20%|█▉        | 99/500 [00:28<01:54,  3.50it/s]

Epoch [99], Train Loss: 0.0529


 20%|██        | 100/500 [00:28<01:53,  3.52it/s]

Epoch [100], Train Loss: 0.0525


 20%|██        | 101/500 [00:28<01:52,  3.54it/s]

Epoch [101], Train Loss: 0.0516


 20%|██        | 102/500 [00:29<01:50,  3.60it/s]

Epoch [102], Train Loss: 0.0495


 21%|██        | 103/500 [00:29<02:01,  3.28it/s]

Epoch [103], Train Loss: 0.0541


 21%|██        | 104/500 [00:29<01:57,  3.37it/s]

Epoch [104], Train Loss: 0.0557


 21%|██        | 105/500 [00:30<01:55,  3.42it/s]

Epoch [105], Train Loss: 0.0490


 21%|██        | 106/500 [00:30<02:11,  2.99it/s]

Epoch [106], Train Loss: 0.0528


 21%|██▏       | 107/500 [00:30<02:05,  3.14it/s]

Epoch [107], Train Loss: 0.0525


 22%|██▏       | 108/500 [00:31<02:08,  3.06it/s]

Epoch [108], Train Loss: 0.0491


 22%|██▏       | 109/500 [00:31<02:02,  3.19it/s]

Epoch [109], Train Loss: 0.0493


 22%|██▏       | 110/500 [00:31<01:59,  3.26it/s]

Epoch [110], Train Loss: 0.0511


 22%|██▏       | 111/500 [00:31<01:54,  3.41it/s]

Epoch [111], Train Loss: 0.0503


 22%|██▏       | 112/500 [00:32<01:52,  3.46it/s]

Epoch [112], Train Loss: 0.0495


 23%|██▎       | 113/500 [00:32<01:50,  3.49it/s]

Epoch [113], Train Loss: 0.0472


 23%|██▎       | 114/500 [00:32<01:48,  3.56it/s]

Epoch [114], Train Loss: 0.0506


 23%|██▎       | 115/500 [00:33<01:49,  3.50it/s]

Epoch [115], Train Loss: 0.0493


 23%|██▎       | 116/500 [00:33<02:01,  3.16it/s]

Epoch [116], Train Loss: 0.0514


 23%|██▎       | 117/500 [00:33<02:09,  2.95it/s]

Epoch [117], Train Loss: 0.0466


 24%|██▎       | 118/500 [00:34<02:12,  2.88it/s]

Epoch [118], Train Loss: 0.0463


 24%|██▍       | 119/500 [00:34<02:08,  2.96it/s]

Epoch [119], Train Loss: 0.0470


 24%|██▍       | 120/500 [00:34<02:00,  3.15it/s]

Epoch [120], Train Loss: 0.0532


 24%|██▍       | 121/500 [00:35<02:00,  3.16it/s]

Epoch [121], Train Loss: 0.0478


 24%|██▍       | 122/500 [00:35<01:54,  3.29it/s]

Epoch [122], Train Loss: 0.0479


 25%|██▍       | 123/500 [00:35<01:52,  3.35it/s]

Epoch [123], Train Loss: 0.0463


 25%|██▍       | 124/500 [00:36<01:57,  3.20it/s]

Epoch [124], Train Loss: 0.0463


 25%|██▌       | 125/500 [00:36<01:54,  3.27it/s]

Epoch [125], Train Loss: 0.0476


 25%|██▌       | 126/500 [00:36<01:55,  3.23it/s]

Epoch [126], Train Loss: 0.0481


 25%|██▌       | 127/500 [00:37<01:57,  3.17it/s]

Epoch [127], Train Loss: 0.0478


 26%|██▌       | 128/500 [00:37<01:51,  3.34it/s]

Epoch [128], Train Loss: 0.0476


 26%|██▌       | 129/500 [00:37<01:51,  3.33it/s]

Epoch [129], Train Loss: 0.0456


 26%|██▌       | 130/500 [00:37<01:48,  3.42it/s]

Epoch [130], Train Loss: 0.0463


 26%|██▌       | 131/500 [00:38<01:54,  3.22it/s]

Epoch [131], Train Loss: 0.0465


 26%|██▋       | 132/500 [00:38<01:48,  3.39it/s]

Epoch [132], Train Loss: 0.0437


 27%|██▋       | 133/500 [00:38<01:50,  3.32it/s]

Epoch [133], Train Loss: 0.0449


 27%|██▋       | 134/500 [00:39<01:50,  3.32it/s]

Epoch [134], Train Loss: 0.0457


 27%|██▋       | 135/500 [00:39<01:56,  3.14it/s]

Epoch [135], Train Loss: 0.0475


 27%|██▋       | 136/500 [00:39<01:58,  3.08it/s]

Epoch [136], Train Loss: 0.0470


 27%|██▋       | 137/500 [00:40<02:01,  2.99it/s]

Epoch [137], Train Loss: 0.0443


 28%|██▊       | 138/500 [00:40<01:55,  3.15it/s]

Epoch [138], Train Loss: 0.0459


 28%|██▊       | 139/500 [00:40<01:51,  3.23it/s]

Epoch [139], Train Loss: 0.0464


 28%|██▊       | 140/500 [00:41<01:52,  3.21it/s]

Epoch [140], Train Loss: 0.0452


 28%|██▊       | 141/500 [00:41<01:46,  3.37it/s]

Epoch [141], Train Loss: 0.0459


 28%|██▊       | 142/500 [00:41<01:45,  3.39it/s]

Epoch [142], Train Loss: 0.0438


 29%|██▊       | 143/500 [00:41<01:44,  3.42it/s]

Epoch [143], Train Loss: 0.0432


 29%|██▉       | 144/500 [00:42<01:45,  3.38it/s]

Epoch [144], Train Loss: 0.0453


 29%|██▉       | 145/500 [00:42<01:45,  3.36it/s]

Epoch [145], Train Loss: 0.0432


 29%|██▉       | 146/500 [00:42<01:44,  3.37it/s]

Epoch [146], Train Loss: 0.0460


 29%|██▉       | 147/500 [00:43<01:40,  3.51it/s]

Epoch [147], Train Loss: 0.0441


 30%|██▉       | 148/500 [00:43<01:40,  3.50it/s]

Epoch [148], Train Loss: 0.0410


 30%|██▉       | 149/500 [00:43<01:36,  3.63it/s]

Epoch [149], Train Loss: 0.0439


 30%|███       | 150/500 [00:43<01:38,  3.56it/s]

Epoch [150], Train Loss: 0.0424


 30%|███       | 151/500 [00:44<01:35,  3.64it/s]

Epoch [151], Train Loss: 0.0442


 30%|███       | 152/500 [00:44<01:31,  3.78it/s]

Epoch [152], Train Loss: 0.0430


 31%|███       | 153/500 [00:44<01:32,  3.77it/s]

Epoch [153], Train Loss: 0.0432


 31%|███       | 154/500 [00:44<01:33,  3.69it/s]

Epoch [154], Train Loss: 0.0416


 31%|███       | 155/500 [00:45<01:31,  3.76it/s]

Epoch [155], Train Loss: 0.0446


 31%|███       | 156/500 [00:45<01:31,  3.75it/s]

Epoch [156], Train Loss: 0.0420


 31%|███▏      | 157/500 [00:45<01:29,  3.85it/s]

Epoch [157], Train Loss: 0.0447


 32%|███▏      | 158/500 [00:45<01:36,  3.56it/s]

Epoch [158], Train Loss: 0.0409


 32%|███▏      | 159/500 [00:46<01:37,  3.51it/s]

Epoch [159], Train Loss: 0.0444


 32%|███▏      | 160/500 [00:46<01:36,  3.51it/s]

Epoch [160], Train Loss: 0.0401


 32%|███▏      | 161/500 [00:46<01:37,  3.49it/s]

Epoch [161], Train Loss: 0.0426


 32%|███▏      | 162/500 [00:47<01:37,  3.45it/s]

Epoch [162], Train Loss: 0.0428


 33%|███▎      | 163/500 [00:47<01:39,  3.38it/s]

Epoch [163], Train Loss: 0.0439


 33%|███▎      | 164/500 [00:47<01:34,  3.54it/s]

Epoch [164], Train Loss: 0.0409


 33%|███▎      | 165/500 [00:48<01:39,  3.36it/s]

Epoch [165], Train Loss: 0.0426


 33%|███▎      | 166/500 [00:48<01:36,  3.46it/s]

Epoch [166], Train Loss: 0.0412


 33%|███▎      | 167/500 [00:48<01:35,  3.47it/s]

Epoch [167], Train Loss: 0.0414


 34%|███▎      | 168/500 [00:48<01:33,  3.53it/s]

Epoch [168], Train Loss: 0.0424


 34%|███▍      | 169/500 [00:49<01:33,  3.53it/s]

Epoch [169], Train Loss: 0.0392


 34%|███▍      | 170/500 [00:49<01:33,  3.51it/s]

Epoch [170], Train Loss: 0.0428


 34%|███▍      | 171/500 [00:49<01:31,  3.59it/s]

Epoch [171], Train Loss: 0.0411


 34%|███▍      | 172/500 [00:49<01:30,  3.62it/s]

Epoch [172], Train Loss: 0.0402


 35%|███▍      | 173/500 [00:50<01:30,  3.62it/s]

Epoch [173], Train Loss: 0.0402


 35%|███▍      | 174/500 [00:50<01:28,  3.68it/s]

Epoch [174], Train Loss: 0.0398


 35%|███▌      | 175/500 [00:50<01:29,  3.63it/s]

Epoch [175], Train Loss: 0.0410


 35%|███▌      | 176/500 [00:51<01:29,  3.60it/s]

Epoch [176], Train Loss: 0.0420


 35%|███▌      | 177/500 [00:51<01:26,  3.73it/s]

Epoch [177], Train Loss: 0.0401


 36%|███▌      | 178/500 [00:51<01:27,  3.69it/s]

Epoch [178], Train Loss: 0.0402


 36%|███▌      | 179/500 [00:51<01:26,  3.69it/s]

Epoch [179], Train Loss: 0.0408


 36%|███▌      | 180/500 [00:52<01:31,  3.51it/s]

Epoch [180], Train Loss: 0.0396


 36%|███▌      | 181/500 [00:52<01:29,  3.55it/s]

Epoch [181], Train Loss: 0.0385


 36%|███▋      | 182/500 [00:52<01:34,  3.37it/s]

Epoch [182], Train Loss: 0.0396


 37%|███▋      | 183/500 [00:53<01:32,  3.41it/s]

Epoch [183], Train Loss: 0.0410


 37%|███▋      | 184/500 [00:53<01:30,  3.50it/s]

Epoch [184], Train Loss: 0.0399


 37%|███▋      | 185/500 [00:53<01:30,  3.47it/s]

Epoch [185], Train Loss: 0.0389


 37%|███▋      | 186/500 [00:53<01:34,  3.33it/s]

Epoch [186], Train Loss: 0.0413


 37%|███▋      | 187/500 [00:54<01:33,  3.36it/s]

Epoch [187], Train Loss: 0.0390


 38%|███▊      | 188/500 [00:54<01:36,  3.25it/s]

Epoch [188], Train Loss: 0.0399


 38%|███▊      | 189/500 [00:54<01:33,  3.32it/s]

Epoch [189], Train Loss: 0.0381


 38%|███▊      | 190/500 [00:55<01:32,  3.34it/s]

Epoch [190], Train Loss: 0.0405


 38%|███▊      | 191/500 [00:55<01:29,  3.45it/s]

Epoch [191], Train Loss: 0.0395


 38%|███▊      | 192/500 [00:55<01:29,  3.44it/s]

Epoch [192], Train Loss: 0.0393


 39%|███▊      | 193/500 [00:56<01:27,  3.51it/s]

Epoch [193], Train Loss: 0.0404


 39%|███▉      | 194/500 [00:56<01:26,  3.56it/s]

Epoch [194], Train Loss: 0.0410


 39%|███▉      | 195/500 [00:56<01:25,  3.56it/s]

Epoch [195], Train Loss: 0.0385


 39%|███▉      | 196/500 [00:56<01:21,  3.71it/s]

Epoch [196], Train Loss: 0.0390


 39%|███▉      | 197/500 [00:57<01:20,  3.76it/s]

Epoch [197], Train Loss: 0.0371


 40%|███▉      | 198/500 [00:57<01:22,  3.65it/s]

Epoch [198], Train Loss: 0.0376


 40%|███▉      | 199/500 [00:57<01:22,  3.66it/s]

Epoch [199], Train Loss: 0.0392


 40%|████      | 200/500 [00:57<01:22,  3.62it/s]

Epoch [200], Train Loss: 0.0406


 40%|████      | 201/500 [00:58<01:23,  3.57it/s]

Epoch [201], Train Loss: 0.0380


 40%|████      | 202/500 [00:58<01:21,  3.66it/s]

Epoch [202], Train Loss: 0.0382


 41%|████      | 203/500 [00:58<01:24,  3.51it/s]

Epoch [203], Train Loss: 0.0377


 41%|████      | 204/500 [00:59<01:24,  3.50it/s]

Epoch [204], Train Loss: 0.0391


 41%|████      | 205/500 [00:59<01:25,  3.44it/s]

Epoch [205], Train Loss: 0.0379


 41%|████      | 206/500 [00:59<01:24,  3.48it/s]

Epoch [206], Train Loss: 0.0385


 41%|████▏     | 207/500 [00:59<01:24,  3.45it/s]

Epoch [207], Train Loss: 0.0363


 42%|████▏     | 208/500 [01:00<01:24,  3.46it/s]

Epoch [208], Train Loss: 0.0363


 42%|████▏     | 209/500 [01:00<01:22,  3.51it/s]

Epoch [209], Train Loss: 0.0385


 42%|████▏     | 210/500 [01:00<01:24,  3.41it/s]

Epoch [210], Train Loss: 0.0382


 42%|████▏     | 211/500 [01:01<01:23,  3.46it/s]

Epoch [211], Train Loss: 0.0379


 42%|████▏     | 212/500 [01:01<01:21,  3.54it/s]

Epoch [212], Train Loss: 0.0373


 43%|████▎     | 213/500 [01:01<01:21,  3.50it/s]

Epoch [213], Train Loss: 0.0388


 43%|████▎     | 214/500 [01:01<01:20,  3.57it/s]

Epoch [214], Train Loss: 0.0373


 43%|████▎     | 215/500 [01:02<01:21,  3.50it/s]

Epoch [215], Train Loss: 0.0360


 43%|████▎     | 216/500 [01:02<01:18,  3.61it/s]

Epoch [216], Train Loss: 0.0365


 43%|████▎     | 217/500 [01:02<01:16,  3.70it/s]

Epoch [217], Train Loss: 0.0387


 44%|████▎     | 218/500 [01:03<01:17,  3.63it/s]

Epoch [218], Train Loss: 0.0371


 44%|████▍     | 219/500 [01:03<01:20,  3.48it/s]

Epoch [219], Train Loss: 0.0361


 44%|████▍     | 220/500 [01:03<01:18,  3.55it/s]

Epoch [220], Train Loss: 0.0375


 44%|████▍     | 221/500 [01:03<01:17,  3.58it/s]

Epoch [221], Train Loss: 0.0368


 44%|████▍     | 222/500 [01:04<01:22,  3.36it/s]

Epoch [222], Train Loss: 0.0374


 45%|████▍     | 223/500 [01:04<01:20,  3.45it/s]

Epoch [223], Train Loss: 0.0366


 45%|████▍     | 224/500 [01:04<01:19,  3.46it/s]

Epoch [224], Train Loss: 0.0364


 45%|████▌     | 225/500 [01:05<01:18,  3.51it/s]

Epoch [225], Train Loss: 0.0359


 45%|████▌     | 226/500 [01:05<01:27,  3.13it/s]

Epoch [226], Train Loss: 0.0383


 45%|████▌     | 227/500 [01:05<01:27,  3.11it/s]

Epoch [227], Train Loss: 0.0354


 46%|████▌     | 228/500 [01:06<01:23,  3.26it/s]

Epoch [228], Train Loss: 0.0355


 46%|████▌     | 229/500 [01:06<01:27,  3.09it/s]

Epoch [229], Train Loss: 0.0361


 46%|████▌     | 230/500 [01:06<01:24,  3.19it/s]

Epoch [230], Train Loss: 0.0369


 46%|████▌     | 231/500 [01:07<01:23,  3.20it/s]

Epoch [231], Train Loss: 0.0358


 46%|████▋     | 232/500 [01:07<01:21,  3.29it/s]

Epoch [232], Train Loss: 0.0346


 47%|████▋     | 233/500 [01:07<01:23,  3.21it/s]

Epoch [233], Train Loss: 0.0352


 47%|████▋     | 234/500 [01:07<01:26,  3.09it/s]

Epoch [234], Train Loss: 0.0358


 47%|████▋     | 235/500 [01:08<01:24,  3.13it/s]

Epoch [235], Train Loss: 0.0344


 47%|████▋     | 236/500 [01:08<01:29,  2.97it/s]

Epoch [236], Train Loss: 0.0365


 47%|████▋     | 237/500 [01:09<01:28,  2.98it/s]

Epoch [237], Train Loss: 0.0354


 48%|████▊     | 238/500 [01:09<01:23,  3.12it/s]

Epoch [238], Train Loss: 0.0340


 48%|████▊     | 239/500 [01:09<01:21,  3.22it/s]

Epoch [239], Train Loss: 0.0360


 48%|████▊     | 240/500 [01:09<01:17,  3.37it/s]

Epoch [240], Train Loss: 0.0354


 48%|████▊     | 241/500 [01:10<01:14,  3.48it/s]

Epoch [241], Train Loss: 0.0350


 48%|████▊     | 242/500 [01:10<01:15,  3.41it/s]

Epoch [242], Train Loss: 0.0368


 49%|████▊     | 243/500 [01:10<01:14,  3.45it/s]

Epoch [243], Train Loss: 0.0344


 49%|████▉     | 244/500 [01:11<01:17,  3.32it/s]

Epoch [244], Train Loss: 0.0337


 49%|████▉     | 245/500 [01:11<01:19,  3.19it/s]

Epoch [245], Train Loss: 0.0362


 49%|████▉     | 246/500 [01:11<01:20,  3.17it/s]

Epoch [246], Train Loss: 0.0373


 49%|████▉     | 247/500 [01:12<01:20,  3.15it/s]

Epoch [247], Train Loss: 0.0365


 50%|████▉     | 248/500 [01:12<01:18,  3.21it/s]

Epoch [248], Train Loss: 0.0359


 50%|████▉     | 249/500 [01:12<01:17,  3.26it/s]

Epoch [249], Train Loss: 0.0355


 50%|█████     | 250/500 [01:12<01:17,  3.22it/s]

Epoch [250], Train Loss: 0.0350


 50%|█████     | 251/500 [01:13<01:14,  3.34it/s]

Epoch [251], Train Loss: 0.0338


 50%|█████     | 252/500 [01:13<01:14,  3.35it/s]

Epoch [252], Train Loss: 0.0375


 51%|█████     | 253/500 [01:13<01:16,  3.21it/s]

Epoch [253], Train Loss: 0.0374


 51%|█████     | 254/500 [01:14<01:14,  3.30it/s]

Epoch [254], Train Loss: 0.0348


 51%|█████     | 255/500 [01:14<01:17,  3.17it/s]

Epoch [255], Train Loss: 0.0342


 51%|█████     | 256/500 [01:14<01:17,  3.14it/s]

Epoch [256], Train Loss: 0.0367


 51%|█████▏    | 257/500 [01:15<01:17,  3.15it/s]

Epoch [257], Train Loss: 0.0333


 52%|█████▏    | 258/500 [01:15<01:16,  3.18it/s]

Epoch [258], Train Loss: 0.0336


 52%|█████▏    | 259/500 [01:15<01:13,  3.27it/s]

Epoch [259], Train Loss: 0.0352


 52%|█████▏    | 260/500 [01:16<01:14,  3.22it/s]

Epoch [260], Train Loss: 0.0335


 52%|█████▏    | 261/500 [01:16<01:10,  3.38it/s]

Epoch [261], Train Loss: 0.0342


 52%|█████▏    | 262/500 [01:16<01:07,  3.51it/s]

Epoch [262], Train Loss: 0.0329


 53%|█████▎    | 263/500 [01:16<01:14,  3.19it/s]

Epoch [263], Train Loss: 0.0326


 53%|█████▎    | 264/500 [01:17<01:09,  3.40it/s]

Epoch [264], Train Loss: 0.0334


 53%|█████▎    | 265/500 [01:17<01:06,  3.55it/s]

Epoch [265], Train Loss: 0.0330


 53%|█████▎    | 266/500 [01:17<01:06,  3.53it/s]

Epoch [266], Train Loss: 0.0321


 53%|█████▎    | 267/500 [01:18<01:07,  3.45it/s]

Epoch [267], Train Loss: 0.0343


 54%|█████▎    | 268/500 [01:18<01:07,  3.45it/s]

Epoch [268], Train Loss: 0.0343


 54%|█████▍    | 269/500 [01:18<01:06,  3.46it/s]

Epoch [269], Train Loss: 0.0342


 54%|█████▍    | 270/500 [01:18<01:07,  3.42it/s]

Epoch [270], Train Loss: 0.0349


 54%|█████▍    | 271/500 [01:19<01:06,  3.44it/s]

Epoch [271], Train Loss: 0.0324


 54%|█████▍    | 272/500 [01:19<01:06,  3.43it/s]

Epoch [272], Train Loss: 0.0317


 55%|█████▍    | 273/500 [01:19<01:12,  3.14it/s]

Epoch [273], Train Loss: 0.0332


 55%|█████▍    | 274/500 [01:20<01:09,  3.23it/s]

Epoch [274], Train Loss: 0.0328


 55%|█████▌    | 275/500 [01:20<01:08,  3.28it/s]

Epoch [275], Train Loss: 0.0325


 55%|█████▌    | 276/500 [01:20<01:07,  3.31it/s]

Epoch [276], Train Loss: 0.0334


 55%|█████▌    | 277/500 [01:21<01:06,  3.33it/s]

Epoch [277], Train Loss: 0.0320


 56%|█████▌    | 278/500 [01:21<01:05,  3.38it/s]

Epoch [278], Train Loss: 0.0306


 56%|█████▌    | 279/500 [01:21<01:05,  3.38it/s]

Epoch [279], Train Loss: 0.0324


 56%|█████▌    | 280/500 [01:21<01:06,  3.32it/s]

Epoch [280], Train Loss: 0.0341


 56%|█████▌    | 281/500 [01:22<01:05,  3.36it/s]

Epoch [281], Train Loss: 0.0334


 56%|█████▋    | 282/500 [01:22<01:04,  3.38it/s]

Epoch [282], Train Loss: 0.0327


 57%|█████▋    | 283/500 [01:22<01:05,  3.30it/s]

Epoch [283], Train Loss: 0.0318


 57%|█████▋    | 284/500 [01:23<01:04,  3.34it/s]

Epoch [284], Train Loss: 0.0342


 57%|█████▋    | 285/500 [01:23<01:04,  3.35it/s]

Epoch [285], Train Loss: 0.0323


 57%|█████▋    | 286/500 [01:23<01:03,  3.38it/s]

Epoch [286], Train Loss: 0.0306


 57%|█████▋    | 287/500 [01:23<01:02,  3.41it/s]

Epoch [287], Train Loss: 0.0327


 58%|█████▊    | 288/500 [01:24<01:02,  3.41it/s]

Epoch [288], Train Loss: 0.0329


 58%|█████▊    | 289/500 [01:24<01:01,  3.42it/s]

Epoch [289], Train Loss: 0.0334


 58%|█████▊    | 290/500 [01:24<00:59,  3.51it/s]

Epoch [290], Train Loss: 0.0334


 58%|█████▊    | 291/500 [01:25<01:00,  3.48it/s]

Epoch [291], Train Loss: 0.0308


 58%|█████▊    | 292/500 [01:25<01:00,  3.46it/s]

Epoch [292], Train Loss: 0.0324


 59%|█████▊    | 293/500 [01:25<01:02,  3.32it/s]

Epoch [293], Train Loss: 0.0332


 59%|█████▉    | 294/500 [01:26<01:02,  3.29it/s]

Epoch [294], Train Loss: 0.0318


 59%|█████▉    | 295/500 [01:26<01:01,  3.32it/s]

Epoch [295], Train Loss: 0.0320


 59%|█████▉    | 296/500 [01:26<01:01,  3.32it/s]

Epoch [296], Train Loss: 0.0319


 59%|█████▉    | 297/500 [01:26<01:01,  3.31it/s]

Epoch [297], Train Loss: 0.0326


 60%|█████▉    | 298/500 [01:27<01:00,  3.33it/s]

Epoch [298], Train Loss: 0.0315


 60%|█████▉    | 299/500 [01:27<01:00,  3.34it/s]

Epoch [299], Train Loss: 0.0324


 60%|██████    | 300/500 [01:27<00:59,  3.35it/s]

Epoch [300], Train Loss: 0.0318


 60%|██████    | 301/500 [01:28<00:59,  3.36it/s]

Epoch [301], Train Loss: 0.0323


 60%|██████    | 302/500 [01:28<00:58,  3.37it/s]

Epoch [302], Train Loss: 0.0318


 61%|██████    | 303/500 [01:28<00:57,  3.40it/s]

Epoch [303], Train Loss: 0.0324


 61%|██████    | 304/500 [01:29<00:57,  3.39it/s]

Epoch [304], Train Loss: 0.0336


 61%|██████    | 305/500 [01:29<01:02,  3.12it/s]

Epoch [305], Train Loss: 0.0308


 61%|██████    | 306/500 [01:29<01:01,  3.17it/s]

Epoch [306], Train Loss: 0.0337


 61%|██████▏   | 307/500 [01:30<01:03,  3.05it/s]

Epoch [307], Train Loss: 0.0319


 62%|██████▏   | 308/500 [01:30<01:04,  2.95it/s]

Epoch [308], Train Loss: 0.0317


 62%|██████▏   | 309/500 [01:30<01:09,  2.73it/s]

Epoch [309], Train Loss: 0.0309


 62%|██████▏   | 310/500 [01:31<01:09,  2.72it/s]

Epoch [310], Train Loss: 0.0334


 62%|██████▏   | 311/500 [01:31<01:10,  2.68it/s]

Epoch [311], Train Loss: 0.0305


 62%|██████▏   | 312/500 [01:31<01:05,  2.87it/s]

Epoch [312], Train Loss: 0.0294


 63%|██████▎   | 313/500 [01:32<01:06,  2.79it/s]

Epoch [313], Train Loss: 0.0314


 63%|██████▎   | 314/500 [01:32<01:06,  2.81it/s]

Epoch [314], Train Loss: 0.0325


 63%|██████▎   | 315/500 [01:32<01:04,  2.86it/s]

Epoch [315], Train Loss: 0.0303


 63%|██████▎   | 316/500 [01:33<01:03,  2.89it/s]

Epoch [316], Train Loss: 0.0292


 63%|██████▎   | 317/500 [01:33<01:03,  2.89it/s]

Epoch [317], Train Loss: 0.0308


 64%|██████▎   | 318/500 [01:34<01:04,  2.80it/s]

Epoch [318], Train Loss: 0.0307


 64%|██████▍   | 319/500 [01:34<01:01,  2.93it/s]

Epoch [319], Train Loss: 0.0300


 64%|██████▍   | 320/500 [01:34<01:00,  2.97it/s]

Epoch [320], Train Loss: 0.0310


 64%|██████▍   | 321/500 [01:34<00:58,  3.08it/s]

Epoch [321], Train Loss: 0.0279


 64%|██████▍   | 322/500 [01:35<00:54,  3.24it/s]

Epoch [322], Train Loss: 0.0308


 65%|██████▍   | 323/500 [01:35<00:58,  3.05it/s]

Epoch [323], Train Loss: 0.0298


 65%|██████▍   | 324/500 [01:35<00:56,  3.13it/s]

Epoch [324], Train Loss: 0.0299


 65%|██████▌   | 325/500 [01:36<00:54,  3.22it/s]

Epoch [325], Train Loss: 0.0297


 65%|██████▌   | 326/500 [01:36<00:53,  3.27it/s]

Epoch [326], Train Loss: 0.0288


 65%|██████▌   | 327/500 [01:36<00:58,  2.96it/s]

Epoch [327], Train Loss: 0.0299


 66%|██████▌   | 328/500 [01:37<00:56,  3.04it/s]

Epoch [328], Train Loss: 0.0291


 66%|██████▌   | 329/500 [01:37<00:56,  3.02it/s]

Epoch [329], Train Loss: 0.0309


 66%|██████▌   | 330/500 [01:37<00:57,  2.97it/s]

Epoch [330], Train Loss: 0.0292


 66%|██████▌   | 331/500 [01:38<00:54,  3.12it/s]

Epoch [331], Train Loss: 0.0308


 66%|██████▋   | 332/500 [01:38<00:52,  3.20it/s]

Epoch [332], Train Loss: 0.0300


 67%|██████▋   | 333/500 [01:38<00:57,  2.92it/s]

Epoch [333], Train Loss: 0.0303


 67%|██████▋   | 334/500 [01:39<00:53,  3.11it/s]

Epoch [334], Train Loss: 0.0313


 67%|██████▋   | 335/500 [01:39<00:51,  3.23it/s]

Epoch [335], Train Loss: 0.0296


 67%|██████▋   | 336/500 [01:39<00:53,  3.09it/s]

Epoch [336], Train Loss: 0.0282


 67%|██████▋   | 337/500 [01:40<00:53,  3.02it/s]

Epoch [337], Train Loss: 0.0299


 68%|██████▊   | 338/500 [01:40<00:50,  3.18it/s]

Epoch [338], Train Loss: 0.0300


 68%|██████▊   | 339/500 [01:40<00:56,  2.84it/s]

Epoch [339], Train Loss: 0.0277


 68%|██████▊   | 340/500 [01:41<00:58,  2.73it/s]

Epoch [340], Train Loss: 0.0300


 68%|██████▊   | 341/500 [01:41<00:54,  2.91it/s]

Epoch [341], Train Loss: 0.0317


 68%|██████▊   | 342/500 [01:41<00:58,  2.69it/s]

Epoch [342], Train Loss: 0.0290


 69%|██████▊   | 343/500 [01:42<01:00,  2.59it/s]

Epoch [343], Train Loss: 0.0299


 69%|██████▉   | 344/500 [01:42<01:01,  2.55it/s]

Epoch [344], Train Loss: 0.0294


 69%|██████▉   | 345/500 [01:43<00:55,  2.79it/s]

Epoch [345], Train Loss: 0.0277


 69%|██████▉   | 346/500 [01:43<00:56,  2.72it/s]

Epoch [346], Train Loss: 0.0290


 69%|██████▉   | 347/500 [01:43<00:52,  2.91it/s]

Epoch [347], Train Loss: 0.0301


 70%|██████▉   | 348/500 [01:44<00:49,  3.04it/s]

Epoch [348], Train Loss: 0.0290


 70%|██████▉   | 349/500 [01:44<00:51,  2.93it/s]

Epoch [349], Train Loss: 0.0300


 70%|███████   | 350/500 [01:44<00:51,  2.91it/s]

Epoch [350], Train Loss: 0.0299


 70%|███████   | 351/500 [01:45<00:48,  3.06it/s]

Epoch [351], Train Loss: 0.0279


 70%|███████   | 352/500 [01:45<00:48,  3.02it/s]

Epoch [352], Train Loss: 0.0289


 71%|███████   | 353/500 [01:45<00:50,  2.90it/s]

Epoch [353], Train Loss: 0.0311


 71%|███████   | 354/500 [01:46<00:47,  3.04it/s]

Epoch [354], Train Loss: 0.0271


 71%|███████   | 355/500 [01:46<00:46,  3.15it/s]

Epoch [355], Train Loss: 0.0299


 71%|███████   | 356/500 [01:46<00:47,  3.00it/s]

Epoch [356], Train Loss: 0.0294


 71%|███████▏  | 357/500 [01:47<00:46,  3.07it/s]

Epoch [357], Train Loss: 0.0282


 72%|███████▏  | 358/500 [01:47<00:45,  3.14it/s]

Epoch [358], Train Loss: 0.0291


 72%|███████▏  | 359/500 [01:47<00:43,  3.21it/s]

Epoch [359], Train Loss: 0.0278


 72%|███████▏  | 360/500 [01:48<00:45,  3.07it/s]

Epoch [360], Train Loss: 0.0291


 72%|███████▏  | 361/500 [01:48<00:43,  3.18it/s]

Epoch [361], Train Loss: 0.0280


 72%|███████▏  | 362/500 [01:48<00:42,  3.26it/s]

Epoch [362], Train Loss: 0.0287


 73%|███████▎  | 363/500 [01:48<00:45,  3.01it/s]

Epoch [363], Train Loss: 0.0291


 73%|███████▎  | 364/500 [01:49<00:42,  3.24it/s]

Epoch [364], Train Loss: 0.0288


 73%|███████▎  | 365/500 [01:49<00:40,  3.35it/s]

Epoch [365], Train Loss: 0.0280


 73%|███████▎  | 366/500 [01:49<00:40,  3.29it/s]

Epoch [366], Train Loss: 0.0286


 73%|███████▎  | 367/500 [01:50<00:40,  3.28it/s]

Epoch [367], Train Loss: 0.0288


 74%|███████▎  | 368/500 [01:50<00:39,  3.33it/s]

Epoch [368], Train Loss: 0.0280


 74%|███████▍  | 369/500 [01:50<00:38,  3.36it/s]

Epoch [369], Train Loss: 0.0273


 74%|███████▍  | 370/500 [01:51<00:42,  3.09it/s]

Epoch [370], Train Loss: 0.0301


 74%|███████▍  | 371/500 [01:51<00:40,  3.19it/s]

Epoch [371], Train Loss: 0.0301


 74%|███████▍  | 372/500 [01:51<00:39,  3.26it/s]

Epoch [372], Train Loss: 0.0304


 75%|███████▍  | 373/500 [01:51<00:38,  3.26it/s]

Epoch [373], Train Loss: 0.0294


 75%|███████▍  | 374/500 [01:52<00:38,  3.25it/s]

Epoch [374], Train Loss: 0.0279


 75%|███████▌  | 375/500 [01:52<00:37,  3.30it/s]

Epoch [375], Train Loss: 0.0264


 75%|███████▌  | 376/500 [01:52<00:38,  3.25it/s]

Epoch [376], Train Loss: 0.0266


 75%|███████▌  | 377/500 [01:53<00:38,  3.20it/s]

Epoch [377], Train Loss: 0.0280


 76%|███████▌  | 378/500 [01:53<00:37,  3.21it/s]

Epoch [378], Train Loss: 0.0292


 76%|███████▌  | 379/500 [01:53<00:37,  3.26it/s]

Epoch [379], Train Loss: 0.0271


 76%|███████▌  | 380/500 [01:54<00:35,  3.40it/s]

Epoch [380], Train Loss: 0.0276


 76%|███████▌  | 381/500 [01:54<00:33,  3.53it/s]

Epoch [381], Train Loss: 0.0274


 76%|███████▋  | 382/500 [01:54<00:34,  3.44it/s]

Epoch [382], Train Loss: 0.0284


 77%|███████▋  | 383/500 [01:54<00:33,  3.54it/s]

Epoch [383], Train Loss: 0.0275


 77%|███████▋  | 384/500 [01:55<00:32,  3.57it/s]

Epoch [384], Train Loss: 0.0279


 77%|███████▋  | 385/500 [01:55<00:35,  3.25it/s]

Epoch [385], Train Loss: 0.0273


 77%|███████▋  | 386/500 [01:55<00:34,  3.28it/s]

Epoch [386], Train Loss: 0.0280


 77%|███████▋  | 387/500 [01:56<00:38,  2.95it/s]

Epoch [387], Train Loss: 0.0271


 78%|███████▊  | 388/500 [01:56<00:38,  2.93it/s]

Epoch [388], Train Loss: 0.0283


 78%|███████▊  | 389/500 [01:56<00:35,  3.16it/s]

Epoch [389], Train Loss: 0.0275


 78%|███████▊  | 390/500 [01:57<00:36,  2.99it/s]

Epoch [390], Train Loss: 0.0286


 78%|███████▊  | 391/500 [01:57<00:34,  3.14it/s]

Epoch [391], Train Loss: 0.0292


 78%|███████▊  | 392/500 [01:57<00:33,  3.24it/s]

Epoch [392], Train Loss: 0.0258


 79%|███████▊  | 393/500 [01:58<00:33,  3.19it/s]

Epoch [393], Train Loss: 0.0266


 79%|███████▉  | 394/500 [01:58<00:34,  3.09it/s]

Epoch [394], Train Loss: 0.0271


 79%|███████▉  | 395/500 [01:58<00:33,  3.12it/s]

Epoch [395], Train Loss: 0.0286


 79%|███████▉  | 396/500 [01:59<00:32,  3.22it/s]

Epoch [396], Train Loss: 0.0282


 79%|███████▉  | 397/500 [01:59<00:35,  2.92it/s]

Epoch [397], Train Loss: 0.0281


 80%|███████▉  | 398/500 [01:59<00:34,  2.92it/s]

Epoch [398], Train Loss: 0.0274


 80%|███████▉  | 399/500 [02:00<00:32,  3.07it/s]

Epoch [399], Train Loss: 0.0281


 80%|████████  | 400/500 [02:00<00:32,  3.11it/s]

Epoch [400], Train Loss: 0.0274


 80%|████████  | 401/500 [02:00<00:29,  3.31it/s]

Epoch [401], Train Loss: 0.0263


 80%|████████  | 402/500 [02:01<00:29,  3.37it/s]

Epoch [402], Train Loss: 0.0280


 81%|████████  | 403/500 [02:01<00:28,  3.39it/s]

Epoch [403], Train Loss: 0.0277


 81%|████████  | 404/500 [02:01<00:27,  3.49it/s]

Epoch [404], Train Loss: 0.0276


 81%|████████  | 405/500 [02:01<00:26,  3.58it/s]

Epoch [405], Train Loss: 0.0260


 81%|████████  | 406/500 [02:02<00:25,  3.65it/s]

Epoch [406], Train Loss: 0.0289


 81%|████████▏ | 407/500 [02:02<00:27,  3.35it/s]

Epoch [407], Train Loss: 0.0284


 82%|████████▏ | 408/500 [02:02<00:27,  3.32it/s]

Epoch [408], Train Loss: 0.0281


 82%|████████▏ | 409/500 [02:03<00:26,  3.39it/s]

Epoch [409], Train Loss: 0.0281


 82%|████████▏ | 410/500 [02:03<00:25,  3.47it/s]

Epoch [410], Train Loss: 0.0264


 82%|████████▏ | 411/500 [02:03<00:24,  3.56it/s]

Epoch [411], Train Loss: 0.0291


 82%|████████▏ | 412/500 [02:03<00:24,  3.62it/s]

Epoch [412], Train Loss: 0.0286


 83%|████████▎ | 413/500 [02:04<00:23,  3.65it/s]

Epoch [413], Train Loss: 0.0271


 83%|████████▎ | 414/500 [02:04<00:24,  3.50it/s]

Epoch [414], Train Loss: 0.0270


 83%|████████▎ | 415/500 [02:04<00:23,  3.63it/s]

Epoch [415], Train Loss: 0.0280


 83%|████████▎ | 416/500 [02:04<00:22,  3.73it/s]

Epoch [416], Train Loss: 0.0285


 83%|████████▎ | 417/500 [02:05<00:24,  3.34it/s]

Epoch [417], Train Loss: 0.0274


 84%|████████▎ | 418/500 [02:05<00:24,  3.39it/s]

Epoch [418], Train Loss: 0.0274


 84%|████████▍ | 419/500 [02:05<00:23,  3.38it/s]

Epoch [419], Train Loss: 0.0266


 84%|████████▍ | 420/500 [02:06<00:23,  3.35it/s]

Epoch [420], Train Loss: 0.0262


 84%|████████▍ | 421/500 [02:06<00:22,  3.46it/s]

Epoch [421], Train Loss: 0.0271


 84%|████████▍ | 422/500 [02:06<00:22,  3.52it/s]

Epoch [422], Train Loss: 0.0276


 85%|████████▍ | 423/500 [02:06<00:21,  3.62it/s]

Epoch [423], Train Loss: 0.0279


 85%|████████▍ | 424/500 [02:07<00:21,  3.57it/s]

Epoch [424], Train Loss: 0.0287


 85%|████████▌ | 425/500 [02:07<00:20,  3.61it/s]

Epoch [425], Train Loss: 0.0268


 85%|████████▌ | 426/500 [02:07<00:20,  3.62it/s]

Epoch [426], Train Loss: 0.0249


 85%|████████▌ | 427/500 [02:08<00:20,  3.64it/s]

Epoch [427], Train Loss: 0.0275


 86%|████████▌ | 428/500 [02:08<00:19,  3.65it/s]

Epoch [428], Train Loss: 0.0296


 86%|████████▌ | 429/500 [02:08<00:19,  3.66it/s]

Epoch [429], Train Loss: 0.0282


 86%|████████▌ | 430/500 [02:08<00:19,  3.65it/s]

Epoch [430], Train Loss: 0.0286


 86%|████████▌ | 431/500 [02:09<00:19,  3.59it/s]

Epoch [431], Train Loss: 0.0265


 86%|████████▋ | 432/500 [02:09<00:19,  3.54it/s]

Epoch [432], Train Loss: 0.0251


 87%|████████▋ | 433/500 [02:09<00:18,  3.64it/s]

Epoch [433], Train Loss: 0.0278


 87%|████████▋ | 434/500 [02:10<00:18,  3.65it/s]

Epoch [434], Train Loss: 0.0254


 87%|████████▋ | 435/500 [02:10<00:18,  3.43it/s]

Epoch [435], Train Loss: 0.0261


 87%|████████▋ | 436/500 [02:10<00:18,  3.44it/s]

Epoch [436], Train Loss: 0.0265


 87%|████████▋ | 437/500 [02:10<00:18,  3.32it/s]

Epoch [437], Train Loss: 0.0254


 88%|████████▊ | 438/500 [02:11<00:18,  3.34it/s]

Epoch [438], Train Loss: 0.0271


 88%|████████▊ | 439/500 [02:11<00:17,  3.45it/s]

Epoch [439], Train Loss: 0.0247


 88%|████████▊ | 440/500 [02:11<00:17,  3.46it/s]

Epoch [440], Train Loss: 0.0268


 88%|████████▊ | 441/500 [02:12<00:16,  3.56it/s]

Epoch [441], Train Loss: 0.0250


 88%|████████▊ | 442/500 [02:12<00:16,  3.50it/s]

Epoch [442], Train Loss: 0.0272


 89%|████████▊ | 443/500 [02:12<00:15,  3.57it/s]

Epoch [443], Train Loss: 0.0290


 89%|████████▉ | 444/500 [02:12<00:15,  3.60it/s]

Epoch [444], Train Loss: 0.0260


 89%|████████▉ | 445/500 [02:13<00:15,  3.61it/s]

Epoch [445], Train Loss: 0.0275


 89%|████████▉ | 446/500 [02:13<00:14,  3.64it/s]

Epoch [446], Train Loss: 0.0272


 89%|████████▉ | 447/500 [02:13<00:14,  3.66it/s]

Epoch [447], Train Loss: 0.0266


 90%|████████▉ | 448/500 [02:14<00:14,  3.65it/s]

Epoch [448], Train Loss: 0.0255


 90%|████████▉ | 449/500 [02:14<00:14,  3.54it/s]

Epoch [449], Train Loss: 0.0275


 90%|█████████ | 450/500 [02:14<00:13,  3.58it/s]

Epoch [450], Train Loss: 0.0263


 90%|█████████ | 451/500 [02:14<00:13,  3.61it/s]

Epoch [451], Train Loss: 0.0288


 90%|█████████ | 452/500 [02:15<00:13,  3.65it/s]

Epoch [452], Train Loss: 0.0285


 91%|█████████ | 453/500 [02:15<00:12,  3.66it/s]

Epoch [453], Train Loss: 0.0273


 91%|█████████ | 454/500 [02:15<00:12,  3.71it/s]

Epoch [454], Train Loss: 0.0275


 91%|█████████ | 455/500 [02:15<00:11,  3.76it/s]

Epoch [455], Train Loss: 0.0267


 91%|█████████ | 456/500 [02:16<00:11,  3.74it/s]

Epoch [456], Train Loss: 0.0257


 91%|█████████▏| 457/500 [02:16<00:11,  3.78it/s]

Epoch [457], Train Loss: 0.0258


 92%|█████████▏| 458/500 [02:16<00:10,  3.83it/s]

Epoch [458], Train Loss: 0.0272


 92%|█████████▏| 459/500 [02:16<00:11,  3.63it/s]

Epoch [459], Train Loss: 0.0269


 92%|█████████▏| 460/500 [02:17<00:10,  3.71it/s]

Epoch [460], Train Loss: 0.0257


 92%|█████████▏| 461/500 [02:17<00:11,  3.37it/s]

Epoch [461], Train Loss: 0.0278


 92%|█████████▏| 462/500 [02:17<00:11,  3.29it/s]

Epoch [462], Train Loss: 0.0249


 93%|█████████▎| 463/500 [02:18<00:10,  3.39it/s]

Epoch [463], Train Loss: 0.0246


 93%|█████████▎| 464/500 [02:18<00:11,  3.22it/s]

Epoch [464], Train Loss: 0.0262


 93%|█████████▎| 465/500 [02:18<00:10,  3.29it/s]

Epoch [465], Train Loss: 0.0258


 93%|█████████▎| 466/500 [02:19<00:10,  3.38it/s]

Epoch [466], Train Loss: 0.0262


 93%|█████████▎| 467/500 [02:19<00:09,  3.57it/s]

Epoch [467], Train Loss: 0.0253


 94%|█████████▎| 468/500 [02:19<00:09,  3.52it/s]

Epoch [468], Train Loss: 0.0248


 94%|█████████▍| 469/500 [02:19<00:09,  3.40it/s]

Epoch [469], Train Loss: 0.0250


 94%|█████████▍| 470/500 [02:20<00:08,  3.45it/s]

Epoch [470], Train Loss: 0.0260


 94%|█████████▍| 471/500 [02:20<00:08,  3.53it/s]

Epoch [471], Train Loss: 0.0257


 94%|█████████▍| 472/500 [02:20<00:08,  3.30it/s]

Epoch [472], Train Loss: 0.0282


 95%|█████████▍| 473/500 [02:21<00:08,  3.28it/s]

Epoch [473], Train Loss: 0.0253


 95%|█████████▍| 474/500 [02:21<00:07,  3.26it/s]

Epoch [474], Train Loss: 0.0253


 95%|█████████▌| 475/500 [02:21<00:07,  3.27it/s]

Epoch [475], Train Loss: 0.0261


 95%|█████████▌| 476/500 [02:22<00:07,  3.39it/s]

Epoch [476], Train Loss: 0.0256


 95%|█████████▌| 477/500 [02:22<00:06,  3.51it/s]

Epoch [477], Train Loss: 0.0263


 96%|█████████▌| 478/500 [02:22<00:06,  3.38it/s]

Epoch [478], Train Loss: 0.0259


 96%|█████████▌| 479/500 [02:22<00:06,  3.46it/s]

Epoch [479], Train Loss: 0.0270


 96%|█████████▌| 480/500 [02:23<00:05,  3.59it/s]

Epoch [480], Train Loss: 0.0272


 96%|█████████▌| 481/500 [02:23<00:05,  3.48it/s]

Epoch [481], Train Loss: 0.0250


 96%|█████████▋| 482/500 [02:23<00:05,  3.38it/s]

Epoch [482], Train Loss: 0.0254


 97%|█████████▋| 483/500 [02:24<00:05,  3.40it/s]

Epoch [483], Train Loss: 0.0265


 97%|█████████▋| 484/500 [02:24<00:04,  3.52it/s]

Epoch [484], Train Loss: 0.0246


 97%|█████████▋| 485/500 [02:24<00:04,  3.22it/s]

Epoch [485], Train Loss: 0.0266


 97%|█████████▋| 486/500 [02:25<00:04,  3.23it/s]

Epoch [486], Train Loss: 0.0249


 97%|█████████▋| 487/500 [02:25<00:04,  3.14it/s]

Epoch [487], Train Loss: 0.0252


 98%|█████████▊| 488/500 [02:25<00:03,  3.13it/s]

Epoch [488], Train Loss: 0.0265


 98%|█████████▊| 489/500 [02:25<00:03,  3.23it/s]

Epoch [489], Train Loss: 0.0251


 98%|█████████▊| 490/500 [02:26<00:03,  3.32it/s]

Epoch [490], Train Loss: 0.0250


 98%|█████████▊| 491/500 [02:26<00:02,  3.49it/s]

Epoch [491], Train Loss: 0.0250


 98%|█████████▊| 492/500 [02:26<00:02,  3.35it/s]

Epoch [492], Train Loss: 0.0251


 99%|█████████▊| 493/500 [02:27<00:02,  3.45it/s]

Epoch [493], Train Loss: 0.0239


 99%|█████████▉| 494/500 [02:27<00:01,  3.46it/s]

Epoch [494], Train Loss: 0.0246


 99%|█████████▉| 495/500 [02:27<00:01,  3.46it/s]

Epoch [495], Train Loss: 0.0256


 99%|█████████▉| 496/500 [02:28<00:01,  3.20it/s]

Epoch [496], Train Loss: 0.0252


 99%|█████████▉| 497/500 [02:28<00:00,  3.35it/s]

Epoch [497], Train Loss: 0.0247


100%|█████████▉| 498/500 [02:28<00:00,  3.29it/s]

Epoch [498], Train Loss: 0.0243


100%|█████████▉| 499/500 [02:28<00:00,  3.40it/s]

Epoch [499], Train Loss: 0.0258


100%|██████████| 500/500 [02:29<00:00,  3.35it/s]

Epoch [500], Train Loss: 0.0267
Training time: 149.175 seconds





In [267]:
torch.cuda.empty_cache()

In [268]:
# Testing phase
model.eval()
test_loss = 0.0

with torch.no_grad():
    for inputs, targets in test_loader:
        # inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        
        if isinstance(model, nn.DataParallel):
            model = model.module  # Unwrap from DataParallel
        model = model.to('cpu')
        
        outputs = model(inputs)

        # save metrics
        mae, mape, rmse, rsqr = calculate_metric(outputs.numpy(), targets.numpy())
        print(f"Training average mean absolute error: {mae}")
        print(f"Training average mean absolute percentage error: {mape}")
        print(f"Training average root mean squared error: {rmse}")
        print(f"Training average R2: {rsqr}")

Training average mean absolute error: 0.1486234962940216
Training average mean absolute percentage error: 278.23894023895264
Training average root mean squared error: 0.22259254548104257
Training average R2: 0.5674374103546143


In [165]:
# let’s load back in our saved model
# model = MLP()
# model.load_state_dict(torch.load(MODEL_PATH))

RUN 5-fold

In [269]:
torch.cuda.empty_cache()

In [270]:
features = np.concatenate([train_features_np, test_features_np], axis=0)
features

array([[ 0.0765418 , -0.6170648 , -0.54855525, ..., -0.25331536,
        -0.20173293,  0.32425982],
       [-0.42837855,  0.5031553 ,  0.8686651 , ..., -0.25331536,
        -0.20173293,  0.32425982],
       [ 1.2546893 ,  0.31953788,  0.73418504, ..., -0.25331536,
        -0.20173293,  0.32425982],
       ...,
       [ 0.05971107,  1.0929172 ,  0.8371482 , ..., -0.25331536,
        -0.20173293,  0.32425982],
       [-2.2797532 , -0.02818421, -0.5013583 , ..., -0.25331536,
        -0.20173293,  0.32425982],
       [ 1.591303  , -0.22871129, -0.3548853 , ..., -0.25331536,
        -0.20173293,  0.32425982]], dtype=float32)

In [271]:
labels = pd.concat([train_labels, test_labels], axis=0, ignore_index=True)
labels

0       0.082481
1       0.378845
2       0.836149
3       0.987208
4       1.021458
          ...   
1720    0.471411
1721    0.823750
1722    0.241612
1723    0.762054
1724    0.199981
Name: rr1_30, Length: 1725, dtype: float64

In [274]:
# Define cross-validation setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)
EPOCHS = 500
val_mae = []
val_mape = []
val_rmse = []
val_rsqr = []


for train_idx, val_idx in kf.split(features):
    # Create training and validation datasets for the current fold
    X_train_fold, X_val_fold = features[train_idx], features[val_idx]
    y_train_fold, y_val_fold = labels.values.reshape(-1, 1)[train_idx], labels.values.reshape(-1, 1)[val_idx]
    
    # scaling features
    X_train_fold[:,:len(non_category_features)] = scaler.fit_transform(X_train_fold[:,:len(non_category_features)])
    X_val_fold[:,:len(non_category_features)] = scaler.transform(X_val_fold[:,:len(non_category_features)])
        
    # Initialize the model for this fold
    odel = ResNet(input_dim=X_train_fold.shape[1], batch_norm=BATCH_NORM, **m_config)
    model = nn.DataParallel(model, device_ids = DEVICE_LIST)
    model.to(DEVICE)
    
    # define optimizer
    optimizer = getattr(optim, MODEL_CONFIG["optimizer"]["optimizer"])(model.parameters(), **optim_config)
    
    # Define the loss function and optimizer
    criterion = nn.MSELoss()
    
    # Prepare DataLoader for training
    train_dataset = CustomDataset(X_train_fold, y_train_fold)
    val_dataset = CustomDataset(X_val_fold, y_val_fold)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=val_dataset.features.shape[0], shuffle=True)
        
    # Training of the model.
    model.train()
    running_loss = 0
    for epoch in range(EPOCHS):
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(DEVICE), target.to(DEVICE)

            optimizer.zero_grad()
            output = model(data)
            
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            # print statistics
            running_loss += loss.item() * data.size(0)
            if i % 100 == 99:    # print every 100 mini-batches
                print(f'[{ep + 1}, {i + 1:5d}] loss: {running_loss / 100:.3f}')


    train_loss = running_loss  / len(train_loader.dataset)
    print(f'Epoch [{ep+1}], Train Loss: {train_loss:.4f}')
            # print(f'Epoch [{ep+1}], Train Loss: {train_loss:.4f}')

    # Validation of the model.
    model.eval()
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(val_loader):
            # data, target = data.to(DEVICE), target.to(DEVICE)
            if isinstance(model, nn.DataParallel):
                model = model.module  # Unwrap from DataParallel
            model = model.to('cpu')
            outputs = model(data)
            
            # save metrics
            mae, mape, rmse, rsqr = calculate_metric(outputs.numpy(), target.numpy())
            val_mae.append(mae)
            val_mape.append(mape)
            val_rmse.append(rmse)
            val_rsqr.append(rsqr)
            

Epoch [500], Train Loss: 11.4113
Epoch [500], Train Loss: 10.1372
Epoch [500], Train Loss: 9.9146
Epoch [500], Train Loss: 9.8181
Epoch [500], Train Loss: 9.3126


In [275]:
print(f"Test average mean absolute error: {statistics.mean(val_mae)}")
print(f"Test average mean absolute percentage error: {statistics.mean(val_mape)}")
print(f"Test average root mean squared error: {statistics.mean(val_rmse)}")
print(f"Test average R2: {statistics.mean(val_rsqr)}")

Test average mean absolute error: 0.14244692027568817
Test average mean absolute percentage error: 2524.3922352790833
Test average root mean squared error: 0.21147047337581887
Test average R2: 0.5876680970191955


In [276]:
val_rmse

[0.2191690960236959,
 0.21912177033663768,
 0.21159403671562974,
 0.21641166934998218,
 0.1910557944531488]