# 1. Introduction

In [1]:
import os
import time
from datetime import datetime
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import yaml
# set global state
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_cpu = os.cpu_count()
print(f'This machine run on {device} with {num_cpu} cpus')

This machine run on cuda with 8 cpus


In [2]:
def load_config(config_path):
    with open(config_path, 'r') as file:
        config = yaml.safe_load(file)
    return config

# load config
config = load_config('/mnt/d/torch-playground/scripts/config.yaml')

# 2. Dataset and Dataloader

In [3]:
# create custom dataset class
class CustomDataset(Dataset):
    '''
    Custom dataset class for loading
    input: file path
    output: tensor
    '''
    def __init__(self, file_path, target_status, target_score):
        self.file_path = file_path
        self.target_status = target_status
        self.target_score = target_score
        self.data = pd.read_csv(file_path)

        self.X = self.data.drop([target_status, target_score], axis=1)
        self.y_status = self.data[target_status]
        self.y_score = self.data[target_score]

    def __len__(self):
        '''
        return the number of samples
        '''
        return len(self.data)
    
    def __getitem__(self, idx):
        '''
        return the sample at index idx
        if use .reshape(-1, 1) to add dimension --> every sample will be shape (1,1)
        when batched, it will be shape (batch_size, 1, 1)
        if use .unsqueeze(-1) to add dimension --> every sample will be shape (1,)
        when batched, it will be shape (batch_size, 1); use .unsqueeze(-1) recommended
        '''
        
        X = torch.tensor(self.X.iloc[idx].values, dtype=torch.float32)
        target_status = torch.tensor(self.y_status.iloc[idx], dtype=torch.float32).unsqueeze(-1) # add dimension
        target_score = torch.tensor(self.y_score.iloc[idx], dtype=torch.float32).unsqueeze(-1) # add dimension
        return X, target_score, target_status
                

In [4]:
# set hyperparameters
# data 
train_path = config['data']['train_path']
val_path = config['data']['val_path']
test_path = config['data']['test_path']
target_status = config['data']['target_status']
target_score = config['data']['target_score']
batch_size = config['training']['batch_size']

# model
input_size = config['model']['input_size']
hidden_size_1 = config['model']['hidden_size_1']
hidden_size_2 = config['model']['hidden_size_2']
hidden_size_3 = config['model']['hidden_size_3']
output_size_status = config['model']['output_size_status']
output_size_score = config['model']['output_size_score']

# training
learning_rate = config['training']['learning_rate']
num_epochs = config['training']['num_epochs']

# logging
timestamp = datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
checkpoint_dir = config['logging']['checkpoint_dir']

tensorboard_log_dir = f'{config['logging']['tensorboard_log_dir']}/{timestamp}'
writer = SummaryWriter(log_dir=tensorboard_log_dir)

In [5]:
# train dataset and train loader
train_dataset = CustomDataset(file_path=train_path, target_status=target_status, target_score=target_score)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_cpu)

# validation dataset and validation loader
val_dataset = CustomDataset(file_path=val_path, target_status=target_status, target_score=target_score)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_cpu)

# test dataset and test loader
test_dataset = CustomDataset(test_path, target_status=target_status, target_score=target_score)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_cpu)

In [6]:
for i, (X, y_status, y_score) in enumerate(train_loader):
    print(f'this is shape of X: {X.shape}')
    print(f'this is shape of y_status: {y_status.shape}')
    print(f'this is shape of y_score: {y_score.shape}')
    break

this is shape of X: torch.Size([64, 18])
this is shape of y_status: torch.Size([64, 1])
this is shape of y_score: torch.Size([64, 1])


# 3. Model Architecture

In [7]:
# model architecture
class LoanModel(nn.Module):
    '''
    Docstring for LoanModel
    '''
    def __init__(self, input_size, hidden_size_1, hidden_size_2, hidden_size_3, output_size_score, output_size_status):
        '''
        Docstring for __init__
        '''
        super().__init__() # no need to pass class and self

        # shared layers
        self.fc1 = nn.Linear(input_size, hidden_size_1)
        self.fc2 = nn.Linear(hidden_size_1, hidden_size_2)
        self.fc3 = nn.Linear(hidden_size_2, hidden_size_3)
        
        # output layer for regression
        self.regression_head = nn.Linear(hidden_size_3, output_size_score) # output size for regression

        # output layer for binary classification
        self.classification_head = nn.Linear(hidden_size_3, output_size_status) # output size for classification

    def forward(self, x):
        '''
        Docstring for forward
        '''
        # shared layers
        # relu activation function for hidden layers
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))

        # regression head
        score_output = self.regression_head(x) # no activation function for regression

        # classification head
        # sigmoid for binary classification
        status_output = torch.sigmoid(self.classification_head(x))

        return score_output, status_output


# 4. Loss Function 

In [8]:
# loss for credit score
score_loss_fn = nn.MSELoss()
status_loss_fn = nn.BCELoss()

# 5. Optimiser

In [9]:
#  instantiate model
model = LoanModel(input_size, hidden_size_1, hidden_size_2, hidden_size_3, output_size_score, output_size_status).to(device)
# score optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [10]:
model

LoanModel(
  (fc1): Linear(in_features=18, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=32, bias=True)
  (regression_head): Linear(in_features=32, out_features=1, bias=True)
  (classification_head): Linear(in_features=32, out_features=1, bias=True)
)

# 6. Training Loop

In [11]:
def evaluate_model(model, val_loader, score_loss_fn, status_loss_fn, device):
    '''
    Docstring for evaluate_model
    :param model: Description
    :type model: 
    :param val_loader: Description
    :type val_loader: 
    :param criterion: Description
    :type criterion: 
    :param device: Description
    :type device: '''
    # set model to evaluation mode
    model.eval()
    running_val_loss = 0.0

    with torch.no_grad():
        for X, target_score, target_status in val_loader:
            X, target_score, target_status = X.to(device), target_status.to(device), target_score.to(device)

            # forward pass
            score_output, status_output = model(X)

            # calculate score loss
            score_loss_val = score_loss_fn(score_output, target_score)

            # calculate status loss
            status_loss_val = status_loss_fn(status_output, target_status)

            # total loss
            total_loss = score_loss_val + status_loss_val

            # accumulate loss
            running_val_loss += total_loss.item()

    avg_running_val_loss = running_val_loss / len(val_loader)

    # return average validation loss
    return score_loss_val, status_loss_val, avg_running_val_loss



# training loop
def train_model(model, train_loader, val_loader, score_loss_fn, status_loss_fn, optimiser, device, num_epochs):
    '''
    docstring for _train_model
    '''
    # move model to device
    model.to(device)

    # Ensure checkpoint directory exists
    os.makedirs(checkpoint_dir, exist_ok=True)

    # set best validation loss to infinity
    best_val_loss = np.inf
    epoch_no_improve = 0  # counting epochs with no improvement for early stopping
    patience = 5  # number of epochs to wait before stopping training
    final_epoch = 0

    # loop through num_epochs
    for epoch in range(num_epochs):
        # set model to training mode
        model.train()
        running_train_loss = 0.0

        # loop through the training data
        for X, target_score, target_status in train_loader:
            X, target_score, target_status = X.to(device), target_score.to(device), target_status.to(device)

            # zero the gradients
            optimiser.zero_grad()

            # forward pass
            score_output, status_output = model(X)

            # calculate trainng loss for score
            score_loss_training = score_loss_fn(score_output, target_score)

            # calculate training loss for status
            status_loss_training = status_loss_fn(status_output, target_status)

            # total training loss
            total_loss_training = score_loss_training + status_loss_training

            # accumulate training loss
            running_train_loss += total_loss_training.item()

        # average training loss
        avg_running_train_loss = running_train_loss / len(train_loader)

        # evaluate model
        score_loss_val, status_loss_val, avg_running_val_loss = evaluate_model(
            model=model,
            val_loader=val_loader, 
            score_loss_fn=score_loss_fn, 
            status_loss_fn=status_loss_fn, 
            device=device
        )

        # log to training loss for score and status for each epoch
        writer.add_scalar('score_loss/Training', score_loss_training, epoch)
        writer.add_scalar('status_loss/Training', status_loss_training, epoch)

        # log to validation loss for score and status for each epoch
        writer.add_scalar('score_loss/Validation', score_loss_val, epoch)
        writer.add_scalar('status_loss/Validation', status_loss_val, epoch)

        # log average training and validation loss for each epoch
        writer.add_scalar('Average_Loss/Training', avg_running_train_loss, epoch)
        writer.add_scalar('Average_Loss/Validation', avg_running_val_loss, epoch)

        # print training and validation loss for each epoch
        print(f'Epoch: {epoch+1}/{num_epochs} | Average Training Loss: {avg_running_train_loss:.4f} | Average Validation Loss: {avg_running_val_loss:.4f}')

        # backward pass
        total_loss_training.backward()

        # update weights
        optimiser.step()

        # save model checkpoint if validation loss improves
        if avg_running_val_loss < best_val_loss:
            best_val_loss = avg_running_val_loss
            torch.save(model.state_dict(), f'{checkpoint_dir}/loan_score_best_model_{timestamp}.pth')
            epoch_no_improve = 0
            print(f'Model improved, saving at epoch: {epoch+1} with validation loss: {best_val_loss:.4f}. Checkpoint saved.')
        else:
            epoch_no_improve += 1
            print(f'No improvement, patience: {epoch_no_improve}/{patience}')

        if epoch_no_improve >= patience:
            print('Early stopping triggered.')
            print(f'stop at epoch: {epoch+1}')
            final_epoch += epoch + 1
            break

    print('Training complete.')
    return final_epoch


In [12]:
for i, (X, y_1, y_2) in enumerate(train_loader):
    print(f'the shape of X is {X.shape}')
    print(f'the shape of y_1 is {y_1.shape}')
    print(f'the shape of y_2 is {y_2.shape}')
    print('-------------------')
    if i == 2:
        break

the shape of X is torch.Size([64, 18])
the shape of y_1 is torch.Size([64, 1])
the shape of y_2 is torch.Size([64, 1])
-------------------
the shape of X is torch.Size([64, 18])
the shape of y_1 is torch.Size([64, 1])
the shape of y_2 is torch.Size([64, 1])
-------------------
the shape of X is torch.Size([64, 18])
the shape of y_1 is torch.Size([64, 1])
the shape of y_2 is torch.Size([64, 1])
-------------------


In [13]:
start = time.time()
final_epoch = train_model(model=model, train_loader=train_loader, val_loader=val_loader, num_epochs=num_epochs, score_loss_fn=score_loss_fn, status_loss_fn=status_loss_fn, optimiser=optimizer, device=device)
end = time.time()
print(f'training time: {end - start}')

Epoch: 1/100 | Average Training Loss: 1.1469 | Average Validation Loss: 0.9907
Model improved, saving at epoch: 1 with validation loss: 0.9907. Checkpoint saved.
Epoch: 2/100 | Average Training Loss: 1.1059 | Average Validation Loss: 0.9729
Model improved, saving at epoch: 2 with validation loss: 0.9729. Checkpoint saved.
Epoch: 3/100 | Average Training Loss: 1.0691 | Average Validation Loss: 0.9576
Model improved, saving at epoch: 3 with validation loss: 0.9576. Checkpoint saved.
Epoch: 4/100 | Average Training Loss: 1.0352 | Average Validation Loss: 0.9444
Model improved, saving at epoch: 4 with validation loss: 0.9444. Checkpoint saved.
Epoch: 5/100 | Average Training Loss: 1.0036 | Average Validation Loss: 0.9329
Model improved, saving at epoch: 5 with validation loss: 0.9329. Checkpoint saved.
Epoch: 6/100 | Average Training Loss: 0.9744 | Average Validation Loss: 0.9229
Model improved, saving at epoch: 6 with validation loss: 0.9229. Checkpoint saved.
Epoch: 7/100 | Average Train

In [14]:
# Load the best model checkpoint
# Automatically find the latest checkpoint based on the timestamp in the filename
import glob
import os

checkpoint_dir = config['logging']['checkpoint_dir']
latest_checkpoint = max(glob.glob(os.path.join(checkpoint_dir, 'loan_score_best_model_*.pth')), key=os.path.getctime)

# Load the latest model checkpoint
model.load_state_dict(torch.load(latest_checkpoint))
model.to(device)
model.eval()

# Evaluate the model on the test dataset
test_score_loss, test_status_loss, test_avg_loss = evaluate_model(
    model=model,
    val_loader=test_loader,
    score_loss_fn=score_loss_fn,
    status_loss_fn=status_loss_fn,
    device=device
)

# # log test loss
# writer.add_scalar('score_loss/Test', test_score_loss)
# writer.add_scalar('status_loss/Test', test_status_loss)
# writer.add_scalar('Average_Loss/Test', test_avg_loss)
writer.add_scalar('Average_Loss/Training', test_avg_loss, global_step=final_epoch)
writer.add_scalar('Average_Loss/Validation', test_avg_loss, global_step=final_epoch)

# Flush  and close the writer
writer.flush()
writer.close()