In [3]:
import math
from typing import List
import sys
import os
from pathlib import Path
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.discriminant_analysis import StandardScaler
from sklearn.model_selection import train_test_split
from copy import deepcopy
import torch.optim as optim
import optuna
from sklearn.model_selection import KFold
from optuna.trial import TrialState
import statistics
import time
from utils import calculate_metric

In [2]:
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), "..")))

In [None]:
DATA_FOLDER = "../data"
TRAIN_FEATURES = DATA_FOLDER + '/train_features2.xlsx'
TRAIN_LABELS = DATA_FOLDER + "/train_labels2.xlsx"
TEST_FEATURES = DATA_FOLDER + "/test_features2.xlsx"
TEST_LABELS = DATA_FOLDER + "/test_labels2.xlsx"

OUTPUT_PATH = '../output/resnet'
TRAINING_OUTPUT_FILE = '../output/train_predictions.xlsx'
TEST_OUTPUT_FILE = '../output/test_predictions.xlsx'
OOF_PREDICTIONS_FILE = '../output/oof_predictions.xlsx'
SHEET_NAME = "resnet"

MODEL_PATH = OUTPUT_PATH + '/best_resnet_5fold__model.pth'
BATCH_SIZE = 64
BATCH_NORM = True

DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
DEVICE_LIST = [0, 1, 3]

EPOCH = 500

Path(OUTPUT_PATH).mkdir(parents=True, exist_ok=True)

In [5]:
train_feature_df = pd.read_excel(TRAIN_FEATURES)
train_label_df = pd.read_excel(TRAIN_LABELS)
test_feature_df = pd.read_excel(TEST_FEATURES)
test_label_df = pd.read_excel(TEST_LABELS)

In [6]:
train_feature_df.shape

(1293, 317)

In [7]:
feature_list = train_feature_df.columns
category_feature_key = ['currency', 'seniorioty_adj', 'domicile_country',	'exchange_country',	'Industry_sector',	'Industry_group',	'Industry_subgroup', 'event_type',
                        'event_type_subcategory_sum']
category_features = [i for i in feature_list if any(sub in i for sub in category_feature_key)]
non_category_features = [i for i in feature_list if i not in category_features]

print(len(category_features))
print(len(non_category_features))

162
155


In [8]:
# Normalize the data
# Prepare the ColumnTransformer
scaler = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), non_category_features)   # StandardScaler()
    ],
    remainder='passthrough'  # Leave categorical features untouched
)

In [9]:
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        feature = self.features[idx]
        label = self.labels[idx]
        return torch.tensor(feature, dtype=torch.float32), torch.tensor(label, dtype=torch.float32)

TUNING PARAMETERS

In [10]:
# Define the ResNetBlock
class ResNetBlock(nn.Module):
    def __init__(self, in_features: int, out_features: int, dropout=0.5, batch_norm = True, activation_name='ReLU', negative_slope=0.01):
        super(ResNetBlock, self).__init__()
        if batch_norm:
            self.batch_norm = nn.BatchNorm1d(in_features)
        else:
            self.batch_norm = nn.Identity()

        self.linear = nn.Linear(in_features, out_features)
        self.relu = getattr(nn, activation_name)()
        
        if self.relu == nn.LeakyReLU():
            self.relu = self.relu(negative_slope)
            
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(out_features, in_features)

    def forward(self, x):
        resisual = x
        out = self.batch_norm(resisual)
        out = self.linear(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.linear2(out)
        out = self.dropout(out)
        out += resisual # Add the input tensor to the output
        return out

# Define the Prediction layer
class Prediction(nn.Module):
    def __init__(self, in_features: int, out_features: int, batchNorm = True):
        super(Prediction, self).__init__()
        if batchNorm:
            self.batch_norm = nn.BatchNorm1d(in_features)
        else:
            self.batch_norm = nn.Identity()

        self.relu = nn.ReLU()
        self.linear = nn.Linear(in_features, out_features)

    def forward(self, x):
        x = self.batch_norm(x)
        x = self.relu(x)
        x = self.linear(x)
        return x

# Define the ResNet model
class ResNet(nn.Module):
    def __init__(self, input_dim: int, dropout: float, num_blocks: int, out_features_list: List[int], batch_norm = True, activation_name='ReLU', negative_slope=0.01):
        super(ResNet, self).__init__()
        self.layers = nn.ModuleList()
        
        # First linear layer to convert input_dim to the first out_features
        self.layers.append(nn.Linear(input_dim, out_features_list[0]))
        
        # Add the ResNet blocks
        for i in range(num_blocks):
            if i > 0:
                self.layers.append(ResNetBlock(out_features_list[0], out_features_list[i], dropout, batch_norm = batch_norm, activation_name=activation_name, negative_slope=negative_slope))
        
        # Add the prediction layer
        self.prediction = Prediction(out_features_list[0], 1, batchNorm = batch_norm)

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        x = self.prediction(x)
        return x

In [11]:
# set random seed
torch.manual_seed(42)
torch.cuda.manual_seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [12]:
def objective(trial):    
    # Generate the model.
    num_blocks = trial.suggest_int('num_blocks', 1, 5)
    
    out_features_list = []
    for i in range(num_blocks):
        out_features = trial.suggest_int(f'out_features_{i}', 10, 128)
        out_features_list.append(out_features)

    p = trial.suggest_float("dropout", 0, 0.5)
    # activation_name = trial.suggest_categorical(f'activation_{i}', ['ReLU', 'Tanh', 'LeakyReLU'])
    # negative_slope = 0.01
    # if activation_name == 'LeakyReLU':
    #     negative_slope = trial.suggest_float(f"negative_slope_{i}", 0.01, 1)
        

    # model = ResNet(input_dim=train_features.shape[1], num_blocks=num_blocks, dropout=p, out_features_list=out_features_list, batch_norm=BATCH_NORM).to(DEVICE)

    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    weight_decay = trial.suggest_float("weight_decay", 1e-9, 1e-1, log=True)


    # Define cross-validation setup
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    val_losses = []

    for train_idx, val_idx in kf.split(train_feature_df):
        # Create training and validation datasets for the current fold
        X_train_fold, X_val_fold = train_feature_df.iloc[train_idx], train_feature_df.iloc[val_idx]
        y_train_fold, y_val_fold = train_label_df.iloc[train_idx], train_label_df.iloc[val_idx]
        
        # scaling features
        X_train_fold = scaler.fit_transform(X_train_fold)
        X_val_fold = scaler.transform(X_val_fold)
            
        # Initialize the model for this fold
        model = ResNet(input_dim=X_train_fold.shape[1], num_blocks=num_blocks, dropout=p, out_features_list=out_features_list, batch_norm=BATCH_NORM) #activation_name=activation_name, negative_slope=negative_slope)
        model = nn.DataParallel(model, device_ids = DEVICE_LIST)
        model.to(DEVICE)
        
        # define optimizer
        if optimizer_name == "Adam":
         optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr, weight_decay=weight_decay)
        else:
            momentum = trial.suggest_float("momentum", 1e-9, 0.95, log=True)
            optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr, weight_decay=weight_decay, momentum=momentum)
        
        # Define the loss function and optimizer
        criterion = nn.MSELoss()
        
        # Prepare DataLoader for training
        train_dataset = CustomDataset(X_train_fold, y_train_fold.to_numpy())
        val_dataset = CustomDataset(X_val_fold, y_val_fold.to_numpy())
        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)
            
        # Training of the model.
        model.train()
        for epoch in range(EPOCH):
            for batch_idx, (data, target) in enumerate(train_loader):
                data, target = data.to(DEVICE), target.to(DEVICE)

                optimizer.zero_grad()
                output = model(data)
                # print("shape", output.shape, target.shape)
                loss = criterion(output, target)
                loss.backward()
                optimizer.step()

        # Validation of the model.
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(val_loader):
                data, target = data.to(DEVICE), target.to(DEVICE)
                output = model(data)
                val_loss = criterion(output, target).item()
                val_losses.append(val_loss**0.5) #rmse

        trial.report(val_loss, epoch)

    # Return the average validation loss across all folds
    return np.mean(val_losses)

In [15]:
# empty cache first
torch.cuda.empty_cache()

In [23]:
# 3. Create a study object and optimize the objective function.
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50, timeout=600)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))


[I 2024-08-25 08:00:53,526] A new study created in memory with name: no-name-72a640f4-9bcd-4308-b3f6-dd0a6d3280bf
[I 2024-08-25 08:01:14,292] Trial 0 finished with value: 0.9134922794366809 and parameters: {'num_blocks': 2, 'out_features_0': 108, 'out_features_1': 87, 'dropout': 0.34993481535154414, 'optimizer': 'Adam', 'lr': 2.4568773803093256e-05, 'weight_decay': 1.203515062303189e-09}. Best is trial 0 with value: 0.9134922794366809.
[I 2024-08-25 08:01:49,767] Trial 1 finished with value: 0.27579441390843945 and parameters: {'num_blocks': 5, 'out_features_0': 57, 'out_features_1': 123, 'out_features_2': 77, 'out_features_3': 69, 'out_features_4': 47, 'dropout': 0.3932207461407539, 'optimizer': 'RMSprop', 'lr': 0.011406191167181808, 'weight_decay': 2.838472060753255e-07, 'momentum': 8.22073384149122e-09}. Best is trial 1 with value: 0.27579441390843945.
[I 2024-08-25 08:02:10,267] Trial 2 finished with value: 0.5380747205440599 and parameters: {'num_blocks': 2, 'out_features_0': 28, 

Study statistics: 
  Number of finished trials:  22
  Number of pruned trials:  0
  Number of complete trials:  22
Best trial:
  Value:  0.24069473629096774
  Params: 
    num_blocks: 3
    out_features_0: 24
    out_features_1: 91
    out_features_2: 54
    dropout: 0.4049073766569241
    optimizer: RMSprop
    lr: 0.0002714543459380886
    weight_decay: 7.006491903330438e-08
    momentum: 8.011034123049581e-05


In [24]:
trial.params

{'num_blocks': 3,
 'out_features_0': 24,
 'out_features_1': 91,
 'out_features_2': 54,
 'dropout': 0.4049073766569241,
 'optimizer': 'RMSprop',
 'lr': 0.0002714543459380886,
 'weight_decay': 7.006491903330438e-08,
 'momentum': 8.011034123049581e-05}

In [None]:
best_params = trial.params

In [14]:
MODEL_CONFIG = {"out_features_list": [], "dropout": 0, "optimizer": {}, "num_blocks": 0}

for key, value in best_params.items():
    if "out_features" in key:
        MODEL_CONFIG["out_features_list"].append(value)
    elif "dropout" in key:
        MODEL_CONFIG["dropout"] = value
    elif "negative_slope" in key:
        MODEL_CONFIG["negative_slope"] = value
    elif "activation" in key:
        MODEL_CONFIG["activation_name"] = value
    elif "num_blocks" in key:
        MODEL_CONFIG["num_blocks"] = value
    elif "batch_size" in key:
        BATCH_SIZE = int(value)
    else:
        MODEL_CONFIG["optimizer"][key] = value

In [15]:
MODEL_CONFIG

{'out_features_list': [24, 91, 54],
 'dropout': 0.4049073766569241,
 'optimizer': {'optimizer': 'RMSprop',
  'lr': 0.0002714543459380886,
  'weight_decay': 7.006491903330438e-08,
  'momentum': 8.011034123049581e-05},
 'num_blocks': 3}

In [16]:
BATCH_SIZE

64

RUNNING THE MODEL

In [17]:
# Transform training data
train_features = scaler.fit_transform(train_feature_df)
test_features = scaler.transform(test_feature_df)

new_feature_list = non_category_features + category_features

In [18]:
# Create dataset instances
train_dataset = CustomDataset(train_features, train_label_df.to_numpy())
test_dataset = CustomDataset(test_features, test_label_df.to_numpy())

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=test_feature_df.shape[0], shuffle=True)

In [19]:
m_config = deepcopy(MODEL_CONFIG)
del m_config["optimizer"]
model = ResNet(input_dim=train_features.shape[1], batch_norm=BATCH_NORM, **m_config)
model

ResNet(
  (layers): ModuleList(
    (0): Linear(in_features=317, out_features=24, bias=True)
    (1): ResNetBlock(
      (batch_norm): BatchNorm1d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (linear): Linear(in_features=24, out_features=91, bias=True)
      (relu): ReLU()
      (dropout): Dropout(p=0.4049073766569241, inplace=False)
      (linear2): Linear(in_features=91, out_features=24, bias=True)
    )
    (2): ResNetBlock(
      (batch_norm): BatchNorm1d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (linear): Linear(in_features=24, out_features=54, bias=True)
      (relu): ReLU()
      (dropout): Dropout(p=0.4049073766569241, inplace=False)
      (linear2): Linear(in_features=54, out_features=24, bias=True)
    )
  )
  (prediction): Prediction(
    (batch_norm): BatchNorm1d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
    (linear): Linear(in_features=24, out_features=1, bias=True)
  

In [20]:
# empty cache first
torch.cuda.empty_cache()

In [25]:
model = nn.DataParallel(model, device_ids = DEVICE_LIST)
model.to(DEVICE)

DataParallel(
  (module): ResNet(
    (layers): ModuleList(
      (0): Linear(in_features=317, out_features=24, bias=True)
      (1): ResNetBlock(
        (batch_norm): BatchNorm1d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (linear): Linear(in_features=24, out_features=91, bias=True)
        (relu): ReLU()
        (dropout): Dropout(p=0.4049073766569241, inplace=False)
        (linear2): Linear(in_features=91, out_features=24, bias=True)
      )
      (2): ResNetBlock(
        (batch_norm): BatchNorm1d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (linear): Linear(in_features=24, out_features=54, bias=True)
        (relu): ReLU()
        (dropout): Dropout(p=0.4049073766569241, inplace=False)
        (linear2): Linear(in_features=54, out_features=24, bias=True)
      )
    )
    (prediction): Prediction(
      (batch_norm): BatchNorm1d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()


In [21]:
# define optimizer
optim_config = deepcopy(MODEL_CONFIG["optimizer"])
del optim_config["optimizer"]

optimizer = getattr(optim, MODEL_CONFIG["optimizer"]["optimizer"])(model.parameters(), **optim_config)
optimizer

RMSprop (
Parameter Group 0
    alpha: 0.99
    centered: False
    differentiable: False
    eps: 1e-08
    foreach: None
    lr: 0.0002714543459380886
    maximize: False
    momentum: 8.011034123049581e-05
    weight_decay: 7.006491903330438e-08
)

In [27]:
EPOCH = 500
criterion = nn.MSELoss()
start_time = time.time()

for ep in tqdm(range(EPOCH)):

    model.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        # print(inputs.shape, labels.shape)
        
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)

        loss = criterion(outputs, labels)
        
        loss.backward()

        optimizer.step()

        # print statistics
        running_loss += loss.item() * inputs.size(0)
        if i % 100 == 99:    # print every 100 mini-batches
            print(f'[{ep + 1}, {i + 1:5d}] loss: {running_loss / 100:.3f}')

train_loss = running_loss  / len(train_loader.dataset)
print(f'Epoch [{ep+1}], Train Loss: {train_loss:.4f}')

# print out training time
elapsed_time = time.time() - start_time
print(f"Training time: {elapsed_time:.3f} seconds")

100%|██████████| 500/500 [01:57<00:00,  4.24it/s]

Epoch [500], Train Loss: 0.0180
Training time: 118.053 seconds





In [39]:
torch.save(model.state_dict(), MODEL_PATH)

In [30]:
# Testing phase
model.eval()
# Lists to store predictions and targets
all_predictions = []
all_targets = []

with torch.no_grad():
    for inputs, labels in train_loader:
        # inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        
        if isinstance(model, nn.DataParallel):
            model = model.module  # Unwrap from DataParallel
        model = model.to('cpu')
        
        outputs = model(inputs)
        # Collect predictions and targets
        all_predictions.extend(outputs.cpu().numpy())
        all_targets.extend(labels.cpu().numpy())

# Convert to numpy arrays
y_pred = np.array(all_predictions)
y_true = np.array(all_targets)

# save metrics
mae, mape, rmse, rsqr = calculate_metric(y_pred, y_true)
print(f"Training average mean absolute error: {mae}")
print(f"Training average mean absolute percentage error: {mape}")
print(f"Training average root mean squared error: {rmse}")
print(f"Training average R2: {rsqr}")

Training average mean absolute error: 0.07803716510534286
Training average mean absolute percentage error: 298.17864894866943
Training average root mean squared error: 0.12046847110869388
Training average R2: 0.8662272691726685


In [34]:
# Testing phase
model.eval()
all_predictions = []
all_targets = []

with torch.no_grad():
    for inputs, labels in test_loader:
        # inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        
        if isinstance(model, nn.DataParallel):
            model = model.module  # Unwrap from DataParallel
        model = model.to('cpu')
        
        outputs = model(inputs)
        # Collect predictions and targets
        all_predictions.extend(outputs.cpu().numpy())
        all_targets.extend(labels.cpu().numpy())

# Convert to numpy arrays
y_test_pred = np.array(all_predictions)
y_test_true = np.array(all_targets)

# save metrics
mae, mape, rmse, rsqr = calculate_metric(y_test_pred, y_test_true)
print(f"Test average mean absolute error: {mae}")
print(f"Test average mean absolute percentage error: {mape}")
print(f"Test average root mean squared error: {rmse}")
print(f"Test average R2: {rsqr}")

Test average mean absolute error: 0.16242802143096924
Test average mean absolute percentage error: 259.7667455673218
Test average root mean squared error: 0.24180740863491207
Test average R2: 0.4895339012145996


In [36]:
# save train predictions
train_predictions_df = pd.DataFrame({"predictions": y_pred.ravel()})
with pd.ExcelWriter(TRAINING_OUTPUT_FILE, mode='a', engine='openpyxl', if_sheet_exists='replace') as writer:
    # Write the new DataFrame to a new sheet
    train_predictions_df.to_excel(writer, sheet_name=SHEET_NAME, index=False)
    
# save test predictions
test_predictions_df = pd.DataFrame({"predictions": y_test_pred.ravel()})
with pd.ExcelWriter(TEST_OUTPUT_FILE, mode='a', engine='openpyxl', if_sheet_exists='replace') as writer:
    # Write the new DataFrame to a new sheet
    test_predictions_df.to_excel(writer, sheet_name=SHEET_NAME, index=False)


5 fold CV

In [22]:
features = pd.concat([train_feature_df, test_feature_df], axis=0, ignore_index=True)
features

Unnamed: 0,coupon rate,SP500 MD,Average daily 1-year SP500 return,Ratio to MA,US Corporate Bond Yield Spread,US Corporate Bond Yield Spread(3-5 year),US Corporate Bond Yield Spread(5-7 year),US Corporate Bond Yield Spread(7-10 year),US Corporate Bond Yield Spread(10+ year),US Generic Govt 3 Month Yield,...,event_type_subcategory_sum_Missing Coupon payment only,event_type_subcategory_sum_Missing Interest payment,event_type_subcategory_sum_Missing Loan payment,event_type_subcategory_sum_Missing Principal payment,event_type_subcategory_sum_Others,event_type_subcategory_sum_Pre-Negotiated Chapter 11,event_type_subcategory_sum_Protection,event_type_subcategory_sum_Receivership,event_type_subcategory_sum_Rehabilitation,event_type_subcategory_sum_Restructuring
0,7.500,-117.46020,-0.000189,125.407139,177.213028,134.012054,198.8153,191.364395,223.346344,0.1983,...,True,False,False,False,False,False,False,False,False,False
1,6.000,166.38276,0.000768,-4.603446,101.613617,77.032829,123.3998,105.932022,139.111115,0.0355,...,False,False,False,False,False,False,False,False,False,False
2,11.000,119.85752,0.000678,-11.950380,104.545959,77.416649,129.4317,111.818001,139.717407,0.0101,...,False,False,False,False,False,False,False,False,False,False
3,9.125,653.51208,0.001638,-2.494861,90.736633,64.654129,95.3731,92.141212,121.666237,0.0152,...,False,False,False,False,False,False,False,False,False,False
4,9.250,231.89472,0.000664,4.823413,98.533821,68.759308,93.4174,107.424469,139.741165,1.2865,...,False,False,False,False,True,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1720,3.250,147.33344,0.000556,3.538252,128.976395,95.360374,138.8445,138.946106,172.733887,0.0000,...,False,False,False,False,False,False,False,False,False,False
1721,10.000,175.31656,0.000554,-4.067337,121.170998,92.879501,123.2500,131.104904,162.916901,0.7516,...,False,True,False,False,False,False,False,False,False,False
1722,7.450,315.81748,0.000747,8.604100,126.786606,89.018188,114.9728,131.522430,171.701096,0.0864,...,False,False,False,False,False,False,False,False,False,False
1723,0.500,31.75120,-0.000157,-4.373852,126.595230,105.460007,121.6212,147.382416,163.772141,4.8375,...,False,False,False,False,False,False,False,False,False,False


In [23]:
labels = pd.concat([train_label_df, test_label_df], axis=0, ignore_index=True)
labels

Unnamed: 0,rr1_30
0,0.082481
1,0.378845
2,0.836149
3,0.987208
4,1.021458
...,...
1720,0.471411
1721,0.823750
1722,0.241612
1723,0.762054


In [None]:
# Define cross-validation setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)
EPOCHS = 500
val_mae = []
val_mape = []
val_rmse = []
val_rsqr = []
oof_predictions = np.zeros(labels.shape[0])


for train_idx, val_idx in kf.split(features):
    all_predictions = []
    all_targets = []
    # Create training and validation datasets for the current fold
    X_train_fold, X_val_fold = features.iloc[train_idx], features.iloc[val_idx]
    y_train_fold, y_val_fold = labels.iloc[train_idx], labels.iloc[val_idx]
    
    # scaling features
    X_train_fold = scaler.fit_transform(X_train_fold)
    X_val_fold = scaler.transform(X_val_fold)
        
    # Initialize the model for this fold
    model = ResNet(input_dim=X_train_fold.shape[1], batch_norm=BATCH_NORM, **m_config)
    model = nn.DataParallel(model, device_ids = DEVICE_LIST)
    model.to(DEVICE)
    
    # define optimizer
    optimizer = getattr(optim, MODEL_CONFIG["optimizer"]["optimizer"])(model.parameters(), **optim_config)
    
    # Define the loss function and optimizer
    criterion = nn.MSELoss()
    
    # Prepare DataLoader for training
    train_dataset = CustomDataset(X_train_fold, y_train_fold.to_numpy())
    val_dataset = CustomDataset(X_val_fold, y_val_fold.to_numpy())
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=val_dataset.features.shape[0], shuffle=True)
        
    # Training of the model.
    model.train()
    for epoch in range(EPOCHS):
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(DEVICE), target.to(DEVICE)

            optimizer.zero_grad()
            output = model(data)
            
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            # print(f'Epoch [{ep+1}], Train Loss: {train_loss:.4f}')

    # Validation of the model.
    model.eval()
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(val_loader):
            # data, target = data.to(DEVICE), target.to(DEVICE)
            if isinstance(model, nn.DataParallel):
                model = model.module  # Unwrap from DataParallel
            model = model.to('cpu')
            outputs = model(data)
            
            # Collect predictions and targets
        all_predictions.extend(outputs.cpu().numpy())
        all_targets.extend(target.cpu().numpy())

    # Convert to numpy arrays
    y_test_pred = np.array(all_predictions).ravel()
    y_test_true = np.array(all_targets).ravel()
    oof_predictions[val_idx] = y_test_pred

    # save metrics
    mae, mape, rmse, rsqr = calculate_metric(y_test_pred, y_test_true)
    val_mae.append(mae)
    val_mape.append(mape)
    val_rmse.append(rmse)
    val_rsqr.append(rsqr)

In [None]:
print(f"Test average mean absolute error: {statistics.mean(val_mae)}")
print(f"Test average mean absolute percentage error: {statistics.mean(val_mape)}")
print(f"Test average root mean squared error: {statistics.mean(val_rmse)}")
print(f"Test average R2: {statistics.mean(val_rsqr)}")

Test average mean absolute error: 0.15643084049224854
Test average mean absolute percentage error: 4577.370375394821
Test average root mean squared error: 0.24134598316855008
Test average R2: 0.4652834296226501


In [None]:
oof_predictions_df = pd.DataFrame({"predictions": oof_predictions.ravel()})
with pd.ExcelWriter(OOF_PREDICTIONS_FILE, mode='a', engine='openpyxl', if_sheet_exists='replace') as writer:
    # Write the new DataFrame to a new sheet
    oof_predictions_df.to_excel(writer, sheet_name=SHEET_NAME, index=False)