In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from models.mlp import MLP
from models.tab_transformer import TabTransformer

In [None]:
def find_best_model_state(model_name, X, y, input_dim, num_classes, stratified_fold, batch_size=64, num_epochs=200, n_folds=5, device='cuda', metric='f1_score'):
    """Get best model out of Cross-Validation, using the specified metric."""
    best_model_state = None
    best_metric = -np.inf
    fold_results_list = []
    for fold, (train_ids, val_ids) in enumerate(stratified_fold.split(X, y)):
        print(f'Fold {fold + 1}/{n_folds}')
        # Prepare data loaders
        train_loader = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(X[train_ids], y[train_ids]),
            batch_size=batch_size, num_workers=8, shuffle=True
        )
        val_loader = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(X[val_ids], y[val_ids]),
            batch_size=batch_size, num_workers=8, 
        )
        # Initialize model, criterion, and optimizer
        if model_name == 'MLP':
            model = MLP(input_dim=input_dim, num_classes=num_classes).to(device)
        else:
            model = TabTransformer(input_dim=input_dim, num_classes=num_classes).to(device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001)

        # Train model
        model_state = train_model(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=num_epochs)
        model.load_state_dict(model_state)
        # Evaluate model
        result_df = evaluate_model(model, val_loader, device)
        metric_result = result_df[metric].values[0]

        fold_result = {
            'Model Name': model_name,
            'Fold': fold + 1,
            'Best Model': '',
            'Best f1_score': ''
        }
        fold_result.update(result_df.to_dict(orient='records')[0])
        fold_results_list.append(fold_result)

        if metric_result > best_metric:
            best_metric = metric_result
            best_model_state = model.state_dict()
        print(f'Best {metric} for fold {fold + 1}: {best_metric:.4f}')
        
    return best_model_state, fold_results_list

def train_model(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=500, patience=10):
    best_val_loss = float('inf')
    no_improve = 0
    best_model_state = None

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        # Validation phase
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                outputs = model(batch_x)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item()

        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = model.state_dict()
            no_improve = 0
        else:
            no_improve += 1
            if no_improve >= patience:
                print(f'Early stopping at epoch {epoch}')
                break

        if (epoch + 1) % 10 == 0 or epoch == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}')

    return best_model_state

def evaluate_model(model, test_loader, device):
    model.eval()
    all_preds = []
    all_labels = []
    all_probs = []
    all_attentions = []
    
    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            batch_x = batch_x.to(device)
            outputs = model(batch_x)
            probs = torch.nn.functional.softmax(outputs, dim=1)
            _, predicted = torch.max(outputs.data, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(batch_y.numpy())
            all_probs.extend(probs.cpu().numpy())

            # Get attention weights
            if hasattr(model, 'get_attention_weights') and callable(model.get_attention_weights):
                try:
                    attention_weights = model.get_attention_weights(batch_x)
                    if attention_weights is not None:  # Make sure it's not None
                        all_attentions.append(attention_weights.cpu().numpy())
                except Exception as e:
                    print(f"Error getting attention weights: {e}")
    
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')
    roc_auc = roc_auc_score(all_labels, all_probs, multi_class='ovr')
    
    results = {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'roc_auc': roc_auc,
        'attention_weights': np.concatenate(all_attentions, axis=0) if len(all_attentions) > 0 else None
    }
    
    return pd.DataFrame([results])

def save_model_state(model_state, output_path):
    torch.save(model_state, output_path)
    print(f'Model state saved to {output_path}')

In [3]:
# System settings
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load data
train_df = pd.read_csv('./data/3_train_processed.csv')
train_ros_df = pd.read_csv('./data/3_train_ros_processed.csv')
train_smote_df = pd.read_csv('./data/3_train_smote_processed.csv')
test_df = pd.read_csv('./data/3_test_processed.csv')

# Preprocess data
train_features = train_df.drop(['credit_score'], axis=1)
train_ros_features = train_ros_df.drop(['credit_score'], axis=1)
train_smote_features = train_smote_df.drop(['credit_score'], axis=1)
test_features = test_df.drop(['credit_score'], axis=1)

train_labels = train_df['credit_score']
train_ros_labels = train_ros_df['credit_score']
train_smote_labels = train_smote_df['credit_score']
test_labels = test_df['credit_score']

X = torch.FloatTensor(train_features.values)
X_ros = torch.FloatTensor(train_ros_features.values)
X_smote = torch.FloatTensor(train_smote_features.values)
X_test = torch.FloatTensor(test_features.values)
y = torch.LongTensor(train_labels.values)
y_ros = torch.LongTensor(train_ros_labels.values)
y_smote = torch.LongTensor(train_smote_labels.values)
y_test = torch.LongTensor(test_labels.values)

In [4]:
# fold
n_folds = 5
batch_size = 128
num_epochs = 500
# MLP
stratified_fold = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
stratified_fold_ros = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
stratified_fold_smote = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
print('MLP - original data')
mlp_best_state, mlp_fold_results_list = find_best_model_state('MLP', X, y, input_dim=X.shape[1], num_classes=3, stratified_fold=stratified_fold, batch_size=batch_size, num_epochs=num_epochs, device=device)
print('MLP - ROS data')
mlp_best_ros_state, mlp_fold_results_list_ros = find_best_model_state('MLP', X_ros, y_ros, input_dim=X_ros.shape[1], num_classes=3, stratified_fold=stratified_fold_ros, batch_size=batch_size, num_epochs=num_epochs, device=device)
print('MLP - SMOTE data')
mlp_best_smote_state, mlp_fold_results_list_smote = find_best_model_state('MLP', X_smote, y_smote, input_dim=X_smote.shape[1], num_classes=3, stratified_fold=stratified_fold_smote, batch_size=batch_size, num_epochs=num_epochs, device=device)
save_model_state(mlp_best_state, './models/mlp_best_state.pth')
save_model_state(mlp_best_ros_state, './models/mlp_best_ros_state.pth')
save_model_state(mlp_best_smote_state, './models/mlp_best_smote_state.pth')

MLP - original data
Fold 1/5
Epoch [1/500], Train Loss: 0.7486, Val Loss: 0.6931
Epoch [10/500], Train Loss: 0.6187, Val Loss: 0.6359
Epoch [20/500], Train Loss: 0.5792, Val Loss: 0.6234
Epoch [30/500], Train Loss: 0.5467, Val Loss: 0.6114
Epoch [40/500], Train Loss: 0.5260, Val Loss: 0.5994
Epoch [50/500], Train Loss: 0.5088, Val Loss: 0.5872
Epoch [60/500], Train Loss: 0.4939, Val Loss: 0.5787
Epoch [70/500], Train Loss: 0.4857, Val Loss: 0.5756
Epoch [80/500], Train Loss: 0.4764, Val Loss: 0.5713
Epoch [90/500], Train Loss: 0.4667, Val Loss: 0.5677
Early stopping at epoch 96
Best f1_score for fold 1: 0.7439
Fold 2/5
Epoch [1/500], Train Loss: 0.7481, Val Loss: 0.7000
Epoch [10/500], Train Loss: 0.6193, Val Loss: 0.6425
Epoch [20/500], Train Loss: 0.5811, Val Loss: 0.6342
Epoch [30/500], Train Loss: 0.5517, Val Loss: 0.6103
Epoch [40/500], Train Loss: 0.5264, Val Loss: 0.6015
Epoch [50/500], Train Loss: 0.5108, Val Loss: 0.5991
Epoch [60/500], Train Loss: 0.4953, Val Loss: 0.5859
Ear

In [5]:
# fold
n_folds = 5
batch_size = 128
num_epochs = 500
# TabTransformer
stratified_fold = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
stratified_fold_ros = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
stratified_fold_smote = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
print('TabTransformer - original data')
tab_transformer_best_state, tab_transformer_fold_results = find_best_model_state('TabTransformer', X, y, input_dim=X.shape[1], num_classes=3, stratified_fold=stratified_fold, batch_size=batch_size, num_epochs=num_epochs, device=device)
print('TabTransformer - ROS data')
tab_transformer_best_ros_state, tab_transformer_fold_results_ros = find_best_model_state('TabTransformer', X_ros, y_ros, input_dim=X_ros.shape[1], num_classes=3, stratified_fold=stratified_fold_ros, batch_size=batch_size, num_epochs=num_epochs, device=device)
print('TabTransformer - SMOTE data')
tab_transformer_best_smote_state, tab_transformer_fold_results_smote = find_best_model_state('TabTransformer', X_smote, y_smote, input_dim=X_smote.shape[1], num_classes=3, stratified_fold=stratified_fold_smote, batch_size=batch_size, num_epochs=num_epochs, device=device)
save_model_state(tab_transformer_best_state, './models/tab_transformer_best_state.pth')
save_model_state(tab_transformer_best_ros_state, './models/tab_transformer_best_ros_state.pth')
save_model_state(tab_transformer_best_smote_state, './models/tab_transformer_best_smote_state.pth')

TabTransformer - original data
Fold 1/5
Epoch [1/500], Train Loss: 0.7095, Val Loss: 0.6770
Epoch [10/500], Train Loss: 0.6271, Val Loss: 0.6383
Epoch [20/500], Train Loss: 0.5801, Val Loss: 0.6056
Epoch [30/500], Train Loss: 0.5305, Val Loss: 0.5772
Epoch [40/500], Train Loss: 0.4928, Val Loss: 0.5675
Epoch [50/500], Train Loss: 0.4620, Val Loss: 0.5409
Epoch [60/500], Train Loss: 0.4427, Val Loss: 0.5393
Epoch [70/500], Train Loss: 0.4264, Val Loss: 0.5372
Epoch [80/500], Train Loss: 0.4117, Val Loss: 0.5233
Epoch [90/500], Train Loss: 0.4035, Val Loss: 0.5186
Early stopping at epoch 98
Best f1_score for fold 1: 0.7829
Fold 2/5
Epoch [1/500], Train Loss: 0.7089, Val Loss: 0.6743
Epoch [10/500], Train Loss: 0.6283, Val Loss: 0.6436
Epoch [20/500], Train Loss: 0.5839, Val Loss: 0.6217
Epoch [30/500], Train Loss: 0.5299, Val Loss: 0.5897
Epoch [40/500], Train Loss: 0.4897, Val Loss: 0.5734
Epoch [50/500], Train Loss: 0.4590, Val Loss: 0.5841
Epoch [60/500], Train Loss: 0.4368, Val Loss:

In [7]:
pd.concat([pd.DataFrame(mlp_fold_results_list), pd.DataFrame(mlp_fold_results_list_ros), pd.DataFrame(mlp_fold_results_list_smote), pd.DataFrame(tab_transformer_fold_results), pd.DataFrame(tab_transformer_fold_results_ros), pd.DataFrame(tab_transformer_fold_results_smote)], axis=0).to_csv('./data/5_final_fold_results.csv', index=False)

In [8]:
# Load best model
mlp_best_model_state = torch.load('./models/mlp_best_state.pth')
mlp_best_model = MLP(input_dim=X.shape[1], num_classes=3).to(device)
mlp_best_model.load_state_dict(mlp_best_model_state)
mlp_best_roc_model_state = torch.load('./models/mlp_best_ros_state.pth')
mlp_best_roc_model = MLP(input_dim=X.shape[1], num_classes=3).to(device)
mlp_best_roc_model.load_state_dict(mlp_best_roc_model_state)
mlp_best_smote_model_state = torch.load('./models/mlp_best_smote_state.pth')
mlp_best_smote_model = MLP(input_dim=X.shape[1], num_classes=3).to(device)
mlp_best_smote_model.load_state_dict(mlp_best_smote_model_state)
tab_transformer_best_model_state = torch.load('./models/tab_transformer_best_state.pth')
tab_transformer_best_model = TabTransformer(input_dim=X.shape[1], num_classes=3).to(device)
tab_transformer_best_model.load_state_dict(tab_transformer_best_model_state)
tab_transformer_best_roc_model_state = torch.load('./models/tab_transformer_best_ros_state.pth')
tab_transformer_best_roc_model = TabTransformer(input_dim=X.shape[1], num_classes=3).to(device)
tab_transformer_best_roc_model.load_state_dict(tab_transformer_best_roc_model_state)
tab_transformer_best_smote_model_state = torch.load('./models/tab_transformer_best_smote_state.pth')
tab_transformer_best_smote_model = TabTransformer(input_dim=X.shape[1], num_classes=3).to(device)
tab_transformer_best_smote_model.load_state_dict(tab_transformer_best_smote_model_state)

def evaluate_model(model, X_test, y_test, device, name='MLP'):
    """Evaluate the model on the test set."""
    test_loader = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(X_test, y_test),
        batch_size=128, num_workers=8
    )
    model.eval()
    all_preds = []
    all_labels = []
    all_probs = []
    
    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            batch_x = batch_x.to(device)
            outputs = model(batch_x)
            probs = torch.nn.functional.softmax(outputs, dim=1)
            _, predicted = torch.max(outputs.data, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(batch_y.numpy())
            all_probs.extend(probs.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')
    roc_auc = roc_auc_score(all_labels, all_probs, multi_class='ovr')

    results = {
        'name': name,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'roc_auc': roc_auc
    }

    return pd.DataFrame([results])

# Evaluate best model
result_mlp_df = evaluate_model(mlp_best_model, X_test, y_test, device, name='MLP')
result_mlp_roc_df = evaluate_model(mlp_best_roc_model, X_test, y_test, device, name='MLP_ROS')
result_mlp_smote_df = evaluate_model(mlp_best_smote_model, X_test, y_test, device, name='MLP_SMOTE')
result_tab_transformer_df = evaluate_model(tab_transformer_best_model, X_test, y_test, device, name='TabTransformer')
result_tab_transformer_roc_df = evaluate_model(tab_transformer_best_roc_model, X_test, y_test, device, name='TabTransformer_ROS')
result_tab_transformer_smote_df = evaluate_model(tab_transformer_best_smote_model, X_test, y_test, device, name='TabTransformer_SMOTE')

# Combine results
result_df = pd.concat([result_mlp_df, result_mlp_roc_df, result_mlp_smote_df, result_tab_transformer_df, result_tab_transformer_roc_df, result_tab_transformer_smote_df], axis=0)
result_df.to_csv('./data/5_test_result.csv', index=False)

In [9]:
result_df

Unnamed: 0,name,accuracy,precision,recall,f1_score,roc_auc
0,MLP,0.755274,0.743117,0.742785,0.741985,0.896892
0,MLP_ROS,0.749198,0.733657,0.797016,0.748563,0.902189
0,MLP_SMOTE,0.750886,0.727633,0.769699,0.743005,0.892295
0,TabTransformer,0.785485,0.771186,0.79595,0.781334,0.909323
0,TabTransformer_ROS,0.781828,0.764055,0.820988,0.780982,0.912706
0,TabTransformer_SMOTE,0.776653,0.760519,0.79016,0.771449,0.907742


In [10]:
print(result_df)

                   name  accuracy  precision    recall  f1_score   roc_auc
0                   MLP  0.755274   0.743117  0.742785  0.741985  0.896892
0               MLP_ROS  0.749198   0.733657  0.797016  0.748563  0.902189
0             MLP_SMOTE  0.750886   0.727633  0.769699  0.743005  0.892295
0        TabTransformer  0.785485   0.771186  0.795950  0.781334  0.909323
0    TabTransformer_ROS  0.781828   0.764055  0.820988  0.780982  0.912706
0  TabTransformer_SMOTE  0.776653   0.760519  0.790160  0.771449  0.907742
