In [None]:
# Import Python libraries
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Importing the models
from models.mlp import MLP
from models.transformer import Transformer
from models.ft_transformer_wrapper import FTTransformerWrapper
from models.tab_transformer_wrapper import TabTransformerWrapper

In [8]:
def find_best_model_state(model_name, X, y, input_dim, num_classes, stratified_fold, batch_size=64, num_epochs=200, n_folds=10, device='cuda', metric='f1_score', cat_dims=[0, 12, 15, 29, 30], cat_idxs=[12, 3, 3, 3, 3], num_idxs=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45]):
    """Get best model out of Cross-Validation, using the specified metric."""
    best_model_state = None
    best_metric = -np.inf
    fold_results_list = []
    for fold, (train_ids, val_ids) in enumerate(stratified_fold.split(X, y)):
        print(f'Fold {fold + 1}/{n_folds}')
        # Prepare data loaders
        train_loader = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(X[train_ids], y[train_ids]),
            batch_size=batch_size, num_workers=0, shuffle=True
        )
        val_loader = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(X[val_ids], y[val_ids]),
            batch_size=batch_size, num_workers=0, 
        )
        # Initialize model, criterion, and optimizer
        if model_name == 'MLP':
            model = MLP(input_dim=input_dim, num_classes=num_classes).to(device)
        elif model_name == 'TabTransformer':
            model = TabTransformerWrapper(cat_dims=cat_dims,cat_idxs=cat_idxs,num_idxs=num_idxs,num_classes=num_classes,num_heads=4,num_layers=2,dim_model=64,dropout=0.1).to(device)
        elif model_name == 'FTTransformer':
            model = FTTransformerWrapper(cat_dims=cat_dims,cat_idxs=cat_idxs,num_idxs=num_idxs,num_classes=num_classes,num_heads=4,num_layers=2,dim_model=64,dim_ff=128,dropout=0.1).to(device)
        elif model_name == 'Transformer':
            model = Transformer(input_dim=input_dim, num_classes=num_classes, num_heads=4, num_layers=2, dim_model=64, dim_ff=128, dropout=0.1).to(device)
        else:
            raise ValueError(f"Unknown model name: {model_name}")

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001)

        # Train model
        model_state = train_model(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=num_epochs)
        model.load_state_dict(model_state)
        # Evaluate model
        result_df = evaluate_model(model, val_loader, device)
        metric_result = result_df[metric].values[0]

        fold_result = {
            'Fold': fold + 1,
            'Model Name': model_name,
        }
        fold_result.update(result_df.to_dict(orient='records')[0])
        fold_results_list.append(fold_result)

        if metric_result > best_metric:
            best_metric = metric_result
            best_model_state = model.state_dict()
        print(f'Best {metric} for fold {fold + 1}: {best_metric:.4f}')
        
    return best_model_state, fold_results_list

def train_model(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=500, patience=10):
    best_val_loss = float('inf')
    no_improve = 0
    best_model_state = None

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        # Validation phase
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                outputs = model(batch_x)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item()

        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = model.state_dict()
            no_improve = 0
        else:
            no_improve += 1
            if no_improve >= patience:
                print(f'Early stopping at epoch {epoch}')
                break

        if (epoch + 1) % 10 == 0 or epoch == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}')

    return best_model_state

def evaluate_model(model, test_loader, device):
    model.eval()
    all_preds = []
    all_labels = []
    all_probs = []
    all_attentions = []
    
    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            batch_x = batch_x.to(device)
            outputs = model(batch_x)
            probs = torch.nn.functional.softmax(outputs, dim=1)
            _, predicted = torch.max(outputs.data, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(batch_y.numpy())
            all_probs.extend(probs.cpu().numpy())

            # Get attention weights
            if hasattr(model, 'get_attention_weights') and callable(model.get_attention_weights):
                try:
                    attention_weights = model.get_attention_weights(batch_x)
                    if attention_weights is not None:  # Make sure it's not None
                        all_attentions.append(attention_weights.cpu().numpy())
                except Exception as e:
                    print(f"Error getting attention weights: {e}")
    
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')
    roc_auc = roc_auc_score(all_labels, all_probs, multi_class='ovr')
    
    results = {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'roc_auc': roc_auc,
        'attention_weights': None # np.concatenate(all_attentions, axis=0) if len(all_attentions) > 0 else None
    }
    
    return pd.DataFrame([results])

def save_model_state(model_state, output_path):
    torch.save(model_state, output_path)
    print(f'Model state saved to {output_path}')

In [None]:
# System settings
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# TabTransformer Model Settings

# Load data
train_df = pd.read_csv('./data/3_train_processed.csv')
train_ros_df = pd.read_csv('./data/3_train_ros_processed.csv')
train_smote_df = pd.read_csv('./data/3_train_smote_processed.csv')
test_df = pd.read_csv('./data/3_test_processed.csv')

# Preprocess data
train_features = train_df.drop(['credit_score'], axis=1)
train_ros_features = train_ros_df.drop(['credit_score'], axis=1)
train_smote_features = train_smote_df.drop(['credit_score'], axis=1)
test_features = test_df.drop(['credit_score'], axis=1)

train_labels = train_df['credit_score']
train_ros_labels = train_ros_df['credit_score']
train_smote_labels = train_smote_df['credit_score']
test_labels = test_df['credit_score']

X = torch.FloatTensor(train_features.values)
X_ros = torch.FloatTensor(train_ros_features.values)
X_smote = torch.FloatTensor(train_smote_features.values)
X_test = torch.FloatTensor(test_features.values)
y = torch.LongTensor(train_labels.values)
y_ros = torch.LongTensor(train_ros_labels.values)
y_smote = torch.LongTensor(train_smote_labels.values)
y_test = torch.LongTensor(test_labels.values)

# Transformer Model Settings
# 1. Categorical features
cat_columns = ['month', 'credit_mix', 'payment_of_min_amount', 'spending_level', 'payment_size']
cat_idxs = [0, 12, 15, 29, 30]
# 2. Continuous features - Month could have 12 unique values, so we treat it as a categorical feature
cat_dims = [12, train_df['credit_mix'].nunique(), train_df['payment_of_min_amount'].nunique(), 
            train_df['spending_level'].nunique() + 1, train_df['payment_size'].nunique()]
# 3. Other columns
all_columns = list(train_df.columns)
num_idxs = [i for i in range(len(all_columns)) if i not in cat_idxs and i != all_columns.index('credit_score')]

In [10]:
# fold
n_folds = 10
batch_size = 128
num_epochs = 500
# MLP
stratified_fold = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
stratified_fold_ros = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
stratified_fold_smote = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)

print('MLP - original data')
mlp_best_state, mlp_fold_results_list = find_best_model_state('MLP', X, y, input_dim=X.shape[1], num_classes=3, stratified_fold=stratified_fold, batch_size=batch_size, num_epochs=num_epochs, n_folds=n_folds, device=device)
print('MLP - ROS data')
mlp_best_ros_state, mlp_fold_results_list_ros = find_best_model_state('MLP', X_ros, y_ros, input_dim=X_ros.shape[1], num_classes=3, stratified_fold=stratified_fold_ros, batch_size=batch_size, num_epochs=num_epochs, n_folds=n_folds, device=device)
print('MLP - SMOTE data')
mlp_best_smote_state, mlp_fold_results_list_smote = find_best_model_state('MLP', X_smote, y_smote, input_dim=X_smote.shape[1], num_classes=3, stratified_fold=stratified_fold_smote, batch_size=batch_size, num_epochs=num_epochs, n_folds=n_folds, device=device)
save_model_state(mlp_best_state, './models/mlp_best_state.pth')
save_model_state(mlp_best_ros_state, './models/mlp_best_ros_state.pth')
save_model_state(mlp_best_smote_state, './models/mlp_best_smote_state.pth')
# Save csv
mlp_fold_results_df = pd.DataFrame(mlp_fold_results_list).to_csv('./models/mlp_fold_results.csv', index=False)
mlp_fold_results_df_ros = pd.DataFrame(mlp_fold_results_list_ros).to_csv('./models/mlp_fold_results_ros.csv', index=False)
mlp_fold_results_df_smote = pd.DataFrame(mlp_fold_results_list_smote).to_csv('./models/mlp_fold_results_smote.csv', index=False)


MLP - original data
Fold 1/10
Epoch [1/500], Train Loss: 0.7424, Val Loss: 0.6865
Epoch [10/500], Train Loss: 0.6165, Val Loss: 0.6282
Epoch [20/500], Train Loss: 0.5765, Val Loss: 0.6123
Epoch [30/500], Train Loss: 0.5449, Val Loss: 0.5923
Epoch [40/500], Train Loss: 0.5255, Val Loss: 0.5779
Epoch [50/500], Train Loss: 0.5070, Val Loss: 0.5667
Epoch [60/500], Train Loss: 0.4945, Val Loss: 0.5555
Epoch [70/500], Train Loss: 0.4827, Val Loss: 0.5584
Epoch [80/500], Train Loss: 0.4723, Val Loss: 0.5476
Epoch [90/500], Train Loss: 0.4695, Val Loss: 0.5394
Epoch [100/500], Train Loss: 0.4628, Val Loss: 0.5429
Epoch [110/500], Train Loss: 0.4570, Val Loss: 0.5402
Early stopping at epoch 111
Best f1_score for fold 1: 0.7578
Fold 2/10
Epoch [1/500], Train Loss: 0.7440, Val Loss: 0.6893
Epoch [10/500], Train Loss: 0.6168, Val Loss: 0.6314
Epoch [20/500], Train Loss: 0.5735, Val Loss: 0.6141
Epoch [30/500], Train Loss: 0.5424, Val Loss: 0.5998
Epoch [40/500], Train Loss: 0.5198, Val Loss: 0.584

In [11]:
# fold
n_folds = 10
batch_size = 128
num_epochs = 500
# Transformer
stratified_fold = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
stratified_fold_ros = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
stratified_fold_smote = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
    
print('Transformer - original data')
transformer_best_state, transformer_fold_results_list = find_best_model_state('Transformer', X, y, input_dim=X.shape[1], num_classes=3, stratified_fold=stratified_fold, batch_size=batch_size, num_epochs=num_epochs, n_folds=n_folds, device=device)
print('Transformer - ROS data')
transformer_best_ros_state, transformer_fold_results_list_ros = find_best_model_state('Transformer', X_ros, y_ros, input_dim=X_ros.shape[1], num_classes=3, stratified_fold=stratified_fold_ros, batch_size=batch_size, num_epochs=num_epochs, n_folds=n_folds, device=device)
print('Transformer - SMOTE data')
transformer_best_smote_state, transformer_fold_results_list_smote = find_best_model_state('Transformer', X_smote, y_smote, input_dim=X_smote.shape[1], num_classes=3, stratified_fold=stratified_fold_smote, batch_size=batch_size, num_epochs=num_epochs, n_folds=n_folds, device=device)
save_model_state(transformer_best_state, './models/transformer_best_state.pth')
save_model_state(transformer_best_ros_state, './models/transformer_best_ros_state.pth')
save_model_state(transformer_best_smote_state, './models/transformer_best_smote_state.pth')
# Save csv
transformer_fold_results_df = pd.DataFrame(transformer_fold_results_list).to_csv('./models/transformer_fold_results.csv', index=False)
transformer_fold_results_df_ros = pd.DataFrame(transformer_fold_results_list_ros).to_csv('./models/transformer_fold_results_ros.csv', index=False)
transformer_fold_results_df_smote = pd.DataFrame(transformer_fold_results_list_smote).to_csv('./models/transformer_fold_results_smote.csv', index=False)

Transformer - original data
Fold 1/10
Epoch [1/500], Train Loss: 0.7123, Val Loss: 0.6636
Epoch [10/500], Train Loss: 0.6245, Val Loss: 0.6305
Epoch [20/500], Train Loss: 0.5775, Val Loss: 0.6051
Epoch [30/500], Train Loss: 0.5038, Val Loss: 0.5553
Epoch [40/500], Train Loss: 0.4440, Val Loss: 0.5200
Epoch [50/500], Train Loss: 0.3999, Val Loss: 0.5144
Epoch [60/500], Train Loss: 0.3643, Val Loss: 0.5098
Early stopping at epoch 61
Best f1_score for fold 1: 0.7963
Fold 2/10
Epoch [1/500], Train Loss: 0.7163, Val Loss: 0.6601
Epoch [10/500], Train Loss: 0.6266, Val Loss: 0.6287
Epoch [20/500], Train Loss: 0.5873, Val Loss: 0.6064
Epoch [30/500], Train Loss: 0.5327, Val Loss: 0.5768
Epoch [40/500], Train Loss: 0.4707, Val Loss: 0.5597
Epoch [50/500], Train Loss: 0.4259, Val Loss: 0.5209
Epoch [60/500], Train Loss: 0.3924, Val Loss: 0.5167
Early stopping at epoch 63
Best f1_score for fold 2: 0.7963
Fold 3/10
Epoch [1/500], Train Loss: 0.7145, Val Loss: 0.6797
Epoch [10/500], Train Loss: 0.

In [12]:
# fold
n_folds = 10
batch_size = 128
num_epochs = 500
# TabTransformer
stratified_fold = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
stratified_fold_ros = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
stratified_fold_smote = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)

print('TabTransformer - original data')
tab_transformer_best_state, tab_transformer_fold_results_list = find_best_model_state('TabTransformer', X, y, input_dim=X.shape[1], num_classes=3, stratified_fold=stratified_fold, batch_size=batch_size, num_epochs=num_epochs, n_folds=n_folds, device=device, cat_dims=cat_dims, cat_idxs=cat_idxs, num_idxs=num_idxs)
tab_transformer_best_ros_state, tab_transformer_fold_results_list_ros = find_best_model_state('TabTransformer', X_ros, y_ros, input_dim=X_ros.shape[1], num_classes=3, stratified_fold=stratified_fold_ros, batch_size=batch_size, num_epochs=num_epochs, n_folds=n_folds, device=device, cat_dims=cat_dims, cat_idxs=cat_idxs, num_idxs=num_idxs)
tab_transformer_best_smote_state, tab_transformer_fold_results_list_smote = find_best_model_state('TabTransformer', X_smote, y_smote, input_dim=X_smote.shape[1], num_classes=3, stratified_fold=stratified_fold_smote, batch_size=batch_size, num_epochs=num_epochs, n_folds=n_folds, device=device, cat_dims=cat_dims, cat_idxs=cat_idxs, num_idxs=num_idxs)
save_model_state(tab_transformer_best_state, './models/tab_transformer_best_state.pth')
save_model_state(tab_transformer_best_ros_state, './models/tab_transformer_best_ros_state.pth')
save_model_state(tab_transformer_best_smote_state, './models/tab_transformer_best_smote_state.pth')
# Save csv
tab_transformer_fold_results_df = pd.DataFrame(tab_transformer_fold_results_list).to_csv('./models/tab_transformer_fold_results.csv', index=False)
tab_transformer_fold_results_df_ros = pd.DataFrame(tab_transformer_fold_results_list_ros).to_csv('./models/tab_transformer_fold_results_ros.csv', index=False)
tab_transformer_fold_results_df_smote = pd.DataFrame(tab_transformer_fold_results_list_smote).to_csv('./models/tab_transformer_fold_results_smote.csv', index=False)

TabTransformer - original data
Fold 1/10
Epoch [1/500], Train Loss: 0.7177, Val Loss: 0.6738
Epoch [10/500], Train Loss: 0.5999, Val Loss: 0.6273
Epoch [20/500], Train Loss: 0.2390, Val Loss: 0.7052
Early stopping at epoch 24
Best f1_score for fold 1: 0.7346
Fold 2/10
Epoch [1/500], Train Loss: 0.7190, Val Loss: 0.6679
Epoch [10/500], Train Loss: 0.5941, Val Loss: 0.6275
Epoch [20/500], Train Loss: 0.2279, Val Loss: 0.7234
Early stopping at epoch 24
Best f1_score for fold 2: 0.7492
Fold 3/10
Epoch [1/500], Train Loss: 0.7197, Val Loss: 0.6839
Epoch [10/500], Train Loss: 0.6041, Val Loss: 0.6322
Epoch [20/500], Train Loss: 0.2793, Val Loss: 0.7147
Early stopping at epoch 24
Best f1_score for fold 3: 0.7492
Fold 4/10
Epoch [1/500], Train Loss: 0.7250, Val Loss: 0.6813
Epoch [10/500], Train Loss: 0.5999, Val Loss: 0.6242
Epoch [20/500], Train Loss: 0.2517, Val Loss: 0.7397
Early stopping at epoch 24
Best f1_score for fold 4: 0.7492
Fold 5/10
Epoch [1/500], Train Loss: 0.7248, Val Loss: 0.

In [13]:
# fold
n_folds = 10
batch_size = 128
num_epochs = 500
# FTTransformer
stratified_fold = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
stratified_fold_ros = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
stratified_fold_smote = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)

print('FTTransformer - original data')
ft_transformer_best_state, ft_transformer_fold_results_list = find_best_model_state('FTTransformer', X, y, input_dim=X.shape[1], num_classes=3, stratified_fold=stratified_fold, batch_size=batch_size, num_epochs=num_epochs, n_folds=n_folds, device=device, cat_dims=cat_dims, cat_idxs=cat_idxs, num_idxs=num_idxs)
print('FTTransformer - ROS data')
ft_transformer_best_ros_state, ft_transformer_fold_results_list_ros = find_best_model_state('FTTransformer', X_ros, y_ros, input_dim=X_ros.shape[1], num_classes=3, stratified_fold=stratified_fold_ros, batch_size=batch_size, num_epochs=num_epochs, n_folds=n_folds, device=device, cat_dims=cat_dims, cat_idxs=cat_idxs, num_idxs=num_idxs)
print('FTTransformer - SMOTE data')
ft_transformer_best_smote_state, ft_transformer_fold_results_list_smote = find_best_model_state('FTTransformer', X_smote, y_smote, input_dim=X_smote.shape[1], num_classes=3, stratified_fold=stratified_fold_smote, batch_size=batch_size, num_epochs=num_epochs, n_folds=n_folds, device=device, cat_dims=cat_dims, cat_idxs=cat_idxs, num_idxs=num_idxs)
save_model_state(ft_transformer_best_state, './models/ft_transformer_best_state.pth')
save_model_state(ft_transformer_best_ros_state, './models/ft_transformer_best_ros_state.pth')
save_model_state(ft_transformer_best_smote_state, './models/ft_transformer_best_smote_state.pth')
# Save csv
ft_transformer_fold_results_df = pd.DataFrame(ft_transformer_fold_results_list).to_csv('./models/ft_transformer_fold_results.csv', index=False)
ft_transformer_fold_results_df_ros = pd.DataFrame(ft_transformer_fold_results_list_ros).to_csv('./models/ft_transformer_fold_results_ros.csv', index=False)
ft_transformer_fold_results_df_smote = pd.DataFrame(ft_transformer_fold_results_list_smote).to_csv('./models/ft_transformer_fold_results_smote.csv', index=False)

FTTransformer - original data
Fold 1/10
Epoch [1/500], Train Loss: 0.7190, Val Loss: 0.6755
Epoch [10/500], Train Loss: 0.6264, Val Loss: 0.6249
Epoch [20/500], Train Loss: 0.5990, Val Loss: 0.6119
Epoch [30/500], Train Loss: 0.5581, Val Loss: 0.5866
Epoch [40/500], Train Loss: 0.5139, Val Loss: 0.5537
Epoch [50/500], Train Loss: 0.4755, Val Loss: 0.5247
Epoch [60/500], Train Loss: 0.4429, Val Loss: 0.5116
Epoch [70/500], Train Loss: 0.4179, Val Loss: 0.4994
Epoch [80/500], Train Loss: 0.4009, Val Loss: 0.4984
Epoch [90/500], Train Loss: 0.3848, Val Loss: 0.4794
Early stopping at epoch 99
Best f1_score for fold 1: 0.8098
Fold 2/10
Epoch [1/500], Train Loss: 0.7207, Val Loss: 0.6832
Epoch [10/500], Train Loss: 0.6259, Val Loss: 0.6269
Epoch [20/500], Train Loss: 0.5914, Val Loss: 0.5998
Epoch [30/500], Train Loss: 0.5406, Val Loss: 0.5575
Epoch [40/500], Train Loss: 0.4913, Val Loss: 0.5257
Epoch [50/500], Train Loss: 0.4552, Val Loss: 0.5118
Epoch [60/500], Train Loss: 0.4232, Val Loss

In [14]:
final_fold_results_df = pd.concat([pd.DataFrame(mlp_fold_results_list), pd.DataFrame(mlp_fold_results_list_ros), pd.DataFrame(mlp_fold_results_list_smote), pd.DataFrame(transformer_fold_results_list), pd.DataFrame(transformer_fold_results_list_ros), pd.DataFrame(transformer_fold_results_list_smote), pd.DataFrame(tab_transformer_fold_results_list), pd.DataFrame(tab_transformer_fold_results_list_ros), pd.DataFrame(tab_transformer_fold_results_list_smote), pd.DataFrame(ft_transformer_fold_results_list), pd.DataFrame(ft_transformer_fold_results_list_ros), pd.DataFrame(ft_transformer_fold_results_list_smote)], ignore_index=True)
final_fold_results_df.to_csv('./data/5_final_fold_results.csv', index=False)
final_fold_results_df

Unnamed: 0,Fold,Model Name,accuracy,precision,recall,f1_score,roc_auc,attention_weights
0,1,MLP,0.772293,0.756238,0.760196,0.757818,0.905977,
1,2,MLP,0.763010,0.746337,0.755540,0.750704,0.899468,
2,3,MLP,0.763010,0.743496,0.765572,0.753224,0.902575,
3,4,MLP,0.774824,0.759398,0.771537,0.764981,0.906906,
4,5,MLP,0.762447,0.745330,0.757961,0.751172,0.898570,
...,...,...,...,...,...,...,...,...
115,6,FTTransformer,0.873851,0.874368,0.873848,0.872224,0.960346,
116,7,FTTransformer,0.874565,0.875111,0.874562,0.872525,0.958017,
117,8,FTTransformer,0.877777,0.877824,0.877774,0.876277,0.961730,
118,9,FTTransformer,0.880364,0.880113,0.880361,0.879117,0.959837,


In [3]:
import pandas as pd
# Get average training results
final_fold_results_df = pd.read_csv('./data/5_final_fold_results.csv')
num_folds = 10
model_names = ['MLP'] * num_folds + ['MLP with ROS'] * num_folds + ['MLP with SMOTE'] * num_folds + ['Transformer'] * num_folds + ['Transformer with ROS'] * num_folds + ['Transformer with SMOTE'] * num_folds + ['TabTransformer'] * num_folds + ['TabTransformer with ROS'] * num_folds + ['TabTransformer with SMOTE'] * num_folds + ['FTTransformer'] * num_folds + ['FTTransformer with ROS'] * num_folds + ['FTTransformer with SMOTE'] * num_folds
final_fold_results_df['Model Name'] = model_names
final_fold_results_df = final_fold_results_df.groupby(['Model Name']).mean().reset_index()
print(final_fold_results_df)

                   Model Name  Fold  accuracy  precision    recall  f1_score  \
0               FTTransformer   5.5  0.802129   0.787394  0.806742  0.795623   
1      FTTransformer with ROS   5.5  0.891762   0.895721  0.891762  0.888227   
2    FTTransformer with SMOTE   5.5  0.876435   0.876573  0.876435  0.874892   
3                         MLP   5.5  0.766854   0.749328  0.761156  0.754689   
4                MLP with ROS   5.5  0.847835   0.850938  0.847834  0.841922   
5              MLP with SMOTE   5.5  0.839431   0.838955  0.839431  0.837012   
6              TabTransformer   5.5  0.749595   0.735625  0.730490  0.732660   
7     TabTransformer with ROS   5.5  0.883162   0.882723  0.883162  0.881073   
8   TabTransformer with SMOTE   5.5  0.847326   0.846277  0.847326  0.846413   
9                 Transformer   5.5  0.798107   0.783437  0.796345  0.789124   
10       Transformer with ROS   5.5  0.897141   0.899735  0.897141  0.894328   
11     Transformer with SMOTE   5.5  0.8

In [15]:
# Load best model
mlp_best_model_state = torch.load('./models/mlp_best_state.pth')
mlp_best_model = MLP(input_dim=X.shape[1], num_classes=3).to(device)
mlp_best_model.load_state_dict(mlp_best_model_state)
mlp_best_roc_model_state = torch.load('./models/mlp_best_ros_state.pth')
mlp_best_roc_model = MLP(input_dim=X.shape[1], num_classes=3).to(device)
mlp_best_roc_model.load_state_dict(mlp_best_roc_model_state)
mlp_best_smote_model_state = torch.load('./models/mlp_best_smote_state.pth')
mlp_best_smote_model = MLP(input_dim=X.shape[1], num_classes=3).to(device)
mlp_best_smote_model.load_state_dict(mlp_best_smote_model_state)
transformer_best_model_state = torch.load('./models/transformer_best_state.pth')
transformer_best_model = Transformer(input_dim=X.shape[1], num_classes=3, num_heads=4, num_layers=2, dim_model=64, dim_ff=128, dropout=0.1).to(device)
transformer_best_model.load_state_dict(transformer_best_model_state)
transformer_best_roc_model_state = torch.load('./models/transformer_best_ros_state.pth')
transformer_best_roc_model = Transformer(input_dim=X.shape[1], num_classes=3, num_heads=4, num_layers=2, dim_model=64, dim_ff=128, dropout=0.1).to(device)
transformer_best_roc_model.load_state_dict(transformer_best_roc_model_state)
transformer_best_smote_model_state = torch.load('./models/transformer_best_smote_state.pth')
transformer_best_smote_model = Transformer(input_dim=X.shape[1], num_classes=3, num_heads=4, num_layers=2, dim_model=64, dim_ff=128, dropout=0.1).to(device)
transformer_best_smote_model.load_state_dict(transformer_best_smote_model_state)
ft_transformer_best_model_state = torch.load('./models/ft_transformer_best_state.pth')
ft_transformer_best_model = FTTransformerWrapper(cat_dims=cat_dims, cat_idxs=cat_idxs, num_idxs=num_idxs, num_classes=3, num_heads=4, num_layers=2, dim_model=64, dim_ff=128, dropout=0.1).to(device)
ft_transformer_best_model.load_state_dict(ft_transformer_best_model_state)
ft_transformer_best_roc_model_state = torch.load('./models/ft_transformer_best_ros_state.pth')
ft_transformer_best_roc_model = FTTransformerWrapper(cat_dims=cat_dims, cat_idxs=cat_idxs, num_idxs=num_idxs, num_classes=3, num_heads=4, num_layers=2, dim_model=64, dim_ff=128, dropout=0.1).to(device)
ft_transformer_best_roc_model.load_state_dict(ft_transformer_best_roc_model_state)
ft_transformer_best_smote_model_state = torch.load('./models/ft_transformer_best_smote_state.pth')
ft_transformer_best_smote_model = FTTransformerWrapper(cat_dims=cat_dims, cat_idxs=cat_idxs, num_idxs=num_idxs, num_classes=3, num_heads=4, num_layers=2, dim_model=64, dim_ff=128, dropout=0.1).to(device)
ft_transformer_best_smote_model.load_state_dict(ft_transformer_best_smote_model_state)
tab_transformer_best_model_state = torch.load('./models/tab_transformer_best_state.pth')
tab_transformer_best_model = TabTransformerWrapper(cat_dims=cat_dims,cat_idxs=cat_idxs,num_idxs=num_idxs,num_classes=3,num_heads=4,num_layers=2, dim_model=64, dropout=0.1).to(device)
tab_transformer_best_model.load_state_dict(tab_transformer_best_model_state)
tab_transformer_best_roc_model_state = torch.load('./models/tab_transformer_best_ros_state.pth')
tab_transformer_best_roc_model = TabTransformerWrapper(cat_dims=cat_dims, cat_idxs=cat_idxs, num_idxs=num_idxs, num_classes=3, num_heads=4, num_layers=2, dim_model=64, dropout=0.1).to(device)
tab_transformer_best_roc_model.load_state_dict(tab_transformer_best_roc_model_state)
tab_transformer_best_smote_model_state = torch.load('./models/tab_transformer_best_smote_state.pth')
tab_transformer_best_smote_model = TabTransformerWrapper(cat_dims=cat_dims, cat_idxs=cat_idxs, num_idxs=num_idxs, num_classes=3, num_heads=4, num_layers=2, dim_model=64, dropout=0.1).to(device)
tab_transformer_best_smote_model.load_state_dict(tab_transformer_best_smote_model_state)

def evaluate_model(model, X_test, y_test, device, name='MLP'):
    """Evaluate the model on the test set."""
    test_loader = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(X_test, y_test),
        batch_size=128, num_workers=0
    )
    model.eval()
    all_preds = []
    all_labels = []
    all_probs = []
    
    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            batch_x = batch_x.to(device)
            outputs = model(batch_x)
            probs = torch.nn.functional.softmax(outputs, dim=1)
            _, predicted = torch.max(outputs.data, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(batch_y.numpy())
            all_probs.extend(probs.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')
    roc_auc = roc_auc_score(all_labels, all_probs, multi_class='ovr')

    results = {
        'name': name,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'roc_auc': roc_auc
    }

    return pd.DataFrame([results])

# Evaluate best model
result_mlp_df = evaluate_model(mlp_best_model, X_test, y_test, device, name='MLP')
result_mlp_roc_df = evaluate_model(mlp_best_roc_model, X_test, y_test, device, name='MLP_ROS')
result_mlp_smote_df = evaluate_model(mlp_best_smote_model, X_test, y_test, device, name='MLP_SMOTE')
result_transformer_df = evaluate_model(transformer_best_model, X_test, y_test, device, name='Transformer')
result_transformer_roc_df = evaluate_model(transformer_best_roc_model, X_test, y_test, device, name='Transformer_ROS')
result_transformer_smote_df = evaluate_model(transformer_best_smote_model, X_test, y_test, device, name='Transformer_SMOTE')
result_ft_transformer_df = evaluate_model(ft_transformer_best_model, X_test, y_test, device, name='FTTransformer')
result_ft_transformer_roc_df = evaluate_model(ft_transformer_best_roc_model, X_test, y_test, device, name='FTTransformer_ROS')
result_ft_transformer_smote_df = evaluate_model(ft_transformer_best_smote_model, X_test, y_test, device, name='FTTransformer_SMOTE')
result_tab_transformer_df = evaluate_model(tab_transformer_best_model, X_test, y_test, device, name='TabTransformer')
result_tab_transformer_roc_df = evaluate_model(tab_transformer_best_roc_model, X_test, y_test, device, name='TabTransformer_ROS')
result_tab_transformer_smote_df = evaluate_model(tab_transformer_best_smote_model, X_test, y_test, device, name='TabTransformer_SMOTE')

# Combine results
result_df = pd.concat([
    result_mlp_df, 
    result_mlp_roc_df, 
    result_mlp_smote_df, 
    result_transformer_df, 
    result_transformer_roc_df, 
    result_transformer_smote_df,
    result_tab_transformer_df,
    result_tab_transformer_roc_df,
    result_tab_transformer_smote_df,
    result_ft_transformer_df,
    result_ft_transformer_roc_df,
    result_ft_transformer_smote_df
], ignore_index=True)
result_df.to_csv('./data/5_test_result.csv', index=False)

In [16]:
result_df

Unnamed: 0,name,accuracy,precision,recall,f1_score,roc_auc
0,MLP,0.76827,0.75104,0.769231,0.75892,0.903942
1,MLP_ROS,0.761744,0.744606,0.803901,0.760298,0.906396
2,MLP_SMOTE,0.7591,0.738338,0.768856,0.750945,0.897471
3,Transformer,0.799437,0.783634,0.801266,0.791659,0.920219
4,Transformer_ROS,0.808551,0.790537,0.829693,0.805809,0.922623
5,Transformer_SMOTE,0.809564,0.796752,0.816514,0.805312,0.924563
6,TabTransformer,0.753812,0.741802,0.73443,0.73793,0.898271
7,TabTransformer_ROS,0.751505,0.736487,0.738947,0.737669,0.895666
8,TabTransformer_SMOTE,0.753193,0.735038,0.752972,0.74314,0.895229
9,FTTransformer,0.806639,0.791545,0.816989,0.801723,0.920135


In [4]:
import pandas as pd
result_df = pd.read_csv('./data/5_test_result.csv')
print(result_df)

                    name  accuracy  precision    recall  f1_score   roc_auc
0                    MLP  0.768270   0.751040  0.769231  0.758920  0.903942
1                MLP_ROS  0.761744   0.744606  0.803901  0.760298  0.906396
2              MLP_SMOTE  0.759100   0.738338  0.768856  0.750945  0.897471
3            Transformer  0.799437   0.783634  0.801266  0.791659  0.920219
4        Transformer_ROS  0.808551   0.790537  0.829693  0.805809  0.922623
5      Transformer_SMOTE  0.809564   0.796752  0.816514  0.805312  0.924563
6         TabTransformer  0.753812   0.741802  0.734430  0.737930  0.898271
7     TabTransformer_ROS  0.751505   0.736487  0.738947  0.737669  0.895666
8   TabTransformer_SMOTE  0.753193   0.735038  0.752972  0.743140  0.895229
9          FTTransformer  0.806639   0.791545  0.816989  0.801723  0.920135
10     FTTransformer_ROS  0.795668   0.780664  0.825457  0.795312  0.918643
11   FTTransformer_SMOTE  0.803713   0.790833  0.808548  0.798253  0.919698
