# 1. Introduction

In [None]:
import os
import time
from datetime import datetime
import numpy as np
import pandas as pd

# pytorch related
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import Dataset, DataLoader

# utils libraries
from tqdm import tqdm
import yaml # for config file


In [None]:
# set global state
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_cpu = os.cpu_count()
print(f'This machine run on {device} with {num_cpu} cpus')

# fundtion load config
def load_config(config_path):
    with open(config_path, 'r') as file:
        config = yaml.safe_load(file)
    return config

# load config
config = load_config('/mnt/d/torch-playground/scripts/config.yaml')

# 2. Dataset and Dataloader

In [None]:
# create custom dataset class
class CustomDataset(Dataset):
    '''
    Custom dataset class for loading
    input: file path
    output: tensor
    '''
    def __init__(self, file_path, target_status, target_score):
        self.file_path = file_path
        self.target_status = target_status
        self.target_score = target_score
        self.data = pd.read_csv(file_path)

        self.X = self.data.drop([target_status, target_score], axis=1)
        self.y_status = self.data[target_status]
        self.y_score = self.data[target_score]

    def __len__(self):
        '''
        return the number of samples
        '''
        return len(self.data)
    
    def __getitem__(self, idx):
        '''
        return the sample at index idx
        if use .reshape(-1, 1) to add dimension --> every sample will be shape (1,1)
        when batched, it will be shape (batch_size, 1, 1)
        if use .unsqueeze(-1) to add dimension --> every sample will be shape (1,)
        when batched, it will be shape (batch_size, 1); use .unsqueeze(-1) recommended
        '''
        
        X = torch.tensor(self.X.iloc[idx].values, dtype=torch.float32)
        target_status = torch.tensor(self.y_status.iloc[idx], dtype=torch.float32).unsqueeze(-1) # add dimension
        target_score = torch.tensor(self.y_score.iloc[idx], dtype=torch.float32).unsqueeze(-1) # add dimension
        return X, target_score, target_status
                

# 3. Project Hyperparameters

In [None]:
# set hyperparameters
# data 
train_path = config['data']['train_path']
val_path = config['data']['val_path']
test_path = config['data']['test_path']
target_status = config['data']['target_status']
target_score = config['data']['target_score']
batch_size = config['training']['batch_size']

# model
input_size = config['model']['input_size']
hidden_size_1 = config['model']['hidden_size_1']
hidden_size_2 = config['model']['hidden_size_2']
hidden_size_3 = config['model']['hidden_size_3']
output_size_status = config['model']['output_size_status']
output_size_score = config['model']['output_size_score']

# training
learning_rate = config['training']['learning_rate']
epochs = config['training']['epochs']
patience = config['training']['patience']
min_delta = config['training']['min_delta']

# logging
timestamp = datetime.now().strftime('%Y_%m_%d_%H_%M_%S')

# checkpoint directory
checkpoint_dir = f"{config['logging']['checkpoint_dir']}/{timestamp}"

# tensorboard log directory
tensorboard_log_dir = f'{config['logging']['tensorboard_log_dir']}/{timestamp}'
writer = SummaryWriter(log_dir=tensorboard_log_dir) # initialise tensorboard writer

In [None]:
# train dataset and train loader
train_dataset = CustomDataset(file_path=train_path, target_status=target_status, target_score=target_score)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_cpu)

# validation dataset and validation loader
val_dataset = CustomDataset(file_path=val_path, target_status=target_status, target_score=target_score)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_cpu)

# test dataset and test loader
test_dataset = CustomDataset(test_path, target_status=target_status, target_score=target_score)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_cpu)

In [None]:
for i, (X, y_status, y_score) in enumerate(train_loader):
    print(f'this is shape of X: {X.shape}')
    print(f'this is shape of y_status: {y_status.shape}')
    print(f'this is shape of y_score: {y_score.shape}')
    break

# 4. Model Architecture

In [None]:
# model architecture
class LoanModel(nn.Module):
    '''
    Docstring for LoanModel
    '''
    def __init__(self, input_size, hidden_size_1, hidden_size_2, hidden_size_3, output_size_score, output_size_status):
        '''
        Docstring for __init__
        '''
        super().__init__() # no need to pass class and self

        # shared layers
        self.fc1 = nn.Linear(input_size, hidden_size_1)
        self.fc2 = nn.Linear(hidden_size_1, hidden_size_2)
        self.fc3 = nn.Linear(hidden_size_2, hidden_size_3)
        
        # output layer for regression
        self.regression_head = nn.Linear(hidden_size_3, output_size_score) # output size for regression

        # output layer for binary classification
        self.classification_head = nn.Linear(hidden_size_3, output_size_status) # output size for classification

    def forward(self, x):
        '''
        Docstring for forward
        '''
        # shared layers
        # relu activation function for hidden layers
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))

        # regression head
        score_output = self.regression_head(x) # no activation function for regression

        # classification head
        # sigmoid for binary classification
        status_output = torch.sigmoid(self.classification_head(x))

        return score_output, status_output


# 5. Loss Function 

In [None]:
# loss for credit score
score_loss_fn = nn.MSELoss(reduction='mean')
status_loss_fn = nn.BCELoss(reduction='mean')

# 6. Optimiser

In [None]:
#  instantiate model
model = LoanModel(input_size, hidden_size_1, hidden_size_2, hidden_size_3, output_size_score, output_size_status).to(device)

# score optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 7. Train and Evaluate Functions

In [None]:
def train_one_epoch(model, train_loader, score_loss_fn, status_loss_fn, optimiser, device):
    '''
    Docstring for train_one_epoch
    :type device: '''

    # move model to device
    model.to(device)

    # set model to training mode
    model.train()

    # initialise metrics
    epoch_loss = 0.0 # total loss for each epoch
    total_samples = 0.0 # total samples for each epoch

    # loop through the training data
    for X, target_score, target_status in train_loader:
        X, target_score, target_status = X.to(device), target_score.to(device), target_status.to(device) # move data to device

        # forward pass
        score_output, status_output = model(X)

        # calculate training loss for score
        score_loss_training = score_loss_fn(score_output, target_score)

        # calculate training loss for status
        status_loss_training = status_loss_fn(status_output, target_status)

        # total training loss
        total_loss_training = score_loss_training + status_loss_training

        # backward pass
        optimiser.zero_grad()
        total_loss_training.backward()
        optimiser.step()

        # metrics computation
        batch_size = X.size(0)
        epoch_loss += total_loss_training.item() * batch_size
        total_samples += batch_size
    
    # average training loss
    avg_training_loss = epoch_loss / total_samples
    print(f'Average Training Loss: {avg_training_loss:.4f}')

    return avg_training_loss

def evaluate_model(model, val_loader, score_loss_fn, status_loss_fn, device):
    '''
    Docstring for evaluate_model
    :type device: '''
    # set model to evaluation mode
    model.eval()
    epoch_loss = 0.0
    total_samples = 0.0

    # no gradient computation
    with torch.no_grad():
        # loop through the validation data
        for X, target_score, target_status in val_loader:
            X, target_score, target_status = X.to(device), target_status.to(device), target_score.to(device)

            # forward pass
            score_output, status_output = model(X)

            # calculate score loss
            score_loss_val = score_loss_fn(score_output, target_score)

            # calculate status loss
            status_loss_val = status_loss_fn(status_output, target_status)

            # total loss
            total_loss = score_loss_val + status_loss_val

            # metrics computation
            batch_size = X.size(0)
            total_samples += batch_size
            epoch_loss += total_loss.item() * batch_size
    
    # average validation loss
    avg_val_loss = epoch_loss / total_samples
    print(f'Average Validation Loss: {avg_val_loss:.4f}')

    return avg_val_loss


# 8. Early Stopping and Model Checkpoint

In [None]:
# === Early Stopping logic ===
class EarlyStoping:
    def __init__(self, patience=5, min_delta=0.0):
        self.patience = patience
        self.min_delta = min_delta
        self.best_val_loss = float('inf')
        self.wait = 0
        self.stopped_train = False

    def __call__(self, val_loss):
        if self.best_val_loss - val_loss > self.min_delta:
            self.best_val_loss = val_loss
            self.wait = 0
        else:
            self.wait += 1
            if self.wait >= self.patience:
                self.stopped_train = True
        return self.stopped_train
    
# === Save the best model ===
def save_checkpoint(model, file_name):
    checkpoint_path = checkpoint_dir
    os.makedirs(checkpoint_path, exist_ok=True)
    model_save_path = f'{checkpoint_path}/{file_name}.pth'
    torch.save(model.state_dict(), model_save_path)
    print(f'Model saved to {model_save_path}')

# 9. Main Train Loop

In [14]:
# === Main Train Loop ===

early_stoping = EarlyStoping(patience=patience, min_delta=min_delta)

for epoch in range(epochs):
    # train for one epoch
    train_loss = train_one_epoch(model, train_loader, score_loss_fn, status_loss_fn, optimizer, device)

    # evaluate on validation set
    val_loss = evaluate_model(model, val_loader, score_loss_fn, status_loss_fn, device)

    print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

    # tensorboard logging
    writer.add_scalar('Loss/train', train_loss, global_step=epoch)
    writer.add_scalar('Loss/val', val_loss, global_step=epoch)
    writer.flush()

    # # early stoping
    # if early_stoping(val_loss):
    #     print(f'Early stopping triggered at {epoch+1} epoch')
    #     break
    # # save the best model
    # # Ensure the directory exists before saving the model
    # # os.makedirs(f'{checkpoint_dir}/{timestamp}', exist_ok=True)
    # save_checkpoint(model, file_name='loan_best_model')

Average Training Loss: 0.2287
Average Validation Loss: 3.5134
Epoch 1/100, Train Loss: 0.2287, Val Loss: 3.5134
Average Training Loss: 0.2277
Average Validation Loss: 3.4943
Epoch 2/100, Train Loss: 0.2277, Val Loss: 3.4943
Average Training Loss: 0.2271
Average Validation Loss: 3.5910
Epoch 3/100, Train Loss: 0.2271, Val Loss: 3.5910
Average Training Loss: 0.2266
Average Validation Loss: 3.6130
Epoch 4/100, Train Loss: 0.2266, Val Loss: 3.6130
Average Training Loss: 0.2262
Average Validation Loss: 3.6552
Epoch 5/100, Train Loss: 0.2262, Val Loss: 3.6552
Average Training Loss: 0.2251
Average Validation Loss: 3.6375
Epoch 6/100, Train Loss: 0.2251, Val Loss: 3.6375
Average Training Loss: 0.2247
Average Validation Loss: 3.6884
Epoch 7/100, Train Loss: 0.2247, Val Loss: 3.6884
Average Training Loss: 0.2241
Average Validation Loss: 3.6700
Epoch 8/100, Train Loss: 0.2241, Val Loss: 3.6700
Average Training Loss: 0.2236
Average Validation Loss: 3.6616
Epoch 9/100, Train Loss: 0.2236, Val Loss:

In [None]:
# # Load the best model checkpoint
# # Automatically find the latest checkpoint based on the timestamp in the filename
# import glob
# import os

# checkpoint_dir = config['logging']['checkpoint_dir']
# latest_checkpoint = max(glob.glob(os.path.join(checkpoint_dir, 'loan_score_best_model_*.pth')), key=os.path.getctime)

# # Load the latest model checkpoint
# model.load_state_dict(torch.load(latest_checkpoint))
# model.to(device)
# model.eval()

# # Evaluate the model on the test dataset
# test_score_loss, test_status_loss, test_avg_loss = evaluate_model(
#     model=model,
#     val_loader=test_loader,
#     score_loss_fn=score_loss_fn,
#     status_loss_fn=status_loss_fn,
#     device=device
# )

# # # log test loss
# # writer.add_scalar('score_loss/Test', test_score_loss)
# # writer.add_scalar('status_loss/Test', test_status_loss)
# # writer.add_scalar('Average_Loss/Test', test_avg_loss)
# writer.add_scalar('Average_Loss/Training', test_avg_loss, global_step=final_epoch)
# writer.add_scalar('Average_Loss/Validation', test_avg_loss, global_step=final_epoch)

# # Flush  and close the writer
writer.flush()
writer.close()