In [2]:
from data_preprocessing import load_data, preprocess_data
from visualization import plot_roc_curve
import torch
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, classification_report
from model import ImprovedBankMarketingModel, train_model, weighted_soft_voting
import torch.utils.data as td
import warnings
warnings.filterwarnings('ignore')
import torch.nn as nn
from sklearn.preprocessing import StandardScaler, LabelEncoder
from ComputeMetrics import calculate_recall, calculate_accuracy, calculate_precision
from sklearn.metrics import confusion_matrix
from CostFunction import cost_sensitive_metric
from model import FocalLoss
 

In [3]:
data = load_data('bank_customers_train.csv')
processed_data = preprocess_data(data)    
# Split the data into features and target
scaler = StandardScaler()
x = scaler.fit_transform(processed_data.drop(columns=['y']))  # Drop target column
y = LabelEncoder().fit_transform(processed_data['y'])  # Encode target labels



In [4]:

# Train-test split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Normalize the features
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# Convert to PyTorch tensors
train_x = torch.tensor(x_train, dtype=torch.float32)
train_y = torch.tensor(y_train, dtype=torch.long)
test_x = torch.tensor(x_test, dtype=torch.float32)
test_y = torch.tensor(y_test, dtype=torch.long)

# Create DataLoader for batching
train_data = td.TensorDataset(train_x, train_y)
test_data = td.TensorDataset(test_x, test_y)
train_loader = td.DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = td.DataLoader(test_data, batch_size=64, shuffle=False)


In [None]:
criterion = FocalLoss(alpha=1, gamma=2)

# Define hyperparameters
input_dim = x_train.shape[1]
hidden_dim = 128
output_dim = 2
num_models = 3
epochs = 10
threshold = 0.3

# Prepare data loaders (replace x_train, y_train with actual datasets)
train_dataset = TensorDataset(torch.tensor(x_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Train ensemble models
models = []
weights = []

for i in range(num_models):
    model = ImprovedBankMarketingModel(input_dim, hidden_dim, output_dim)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = FocalLoss(alpha=1, gamma=2)  # Replace with the actual implementation of FocalLoss
    train_model(model, train_loader, criterion, optimizer, epochs)
    models.append(model)
    weights.append(1.0)

# Test ensemble model (replace test_x and y_test with actual datasets)
test_x_tensor = torch.tensor(test_x, dtype=torch.float32)
ensemble_probs = weighted_soft_voting(models, weights, test_x_tensor)
final_preds = (ensemble_probs >= threshold).astype(int)



Epoch 1/10, Loss: 79.0463
Epoch 2/10, Loss: 75.8364
Epoch 3/10, Loss: 75.1800
Epoch 4/10, Loss: 74.6301
Epoch 5/10, Loss: 74.3703
Epoch 6/10, Loss: 74.1651
Epoch 7/10, Loss: 74.0251
Epoch 8/10, Loss: 73.7724
Epoch 9/10, Loss: 73.6367
Epoch 10/10, Loss: 73.4987
Epoch 1/10, Loss: 78.5072
Epoch 2/10, Loss: 75.3573
Epoch 3/10, Loss: 74.8264
Epoch 4/10, Loss: 74.5709
Epoch 5/10, Loss: 74.2711
Epoch 6/10, Loss: 74.1718
Epoch 7/10, Loss: 73.7254
Epoch 8/10, Loss: 73.6976
Epoch 9/10, Loss: 73.6054
Epoch 10/10, Loss: 73.3357
Epoch 1/10, Loss: 79.2294
Epoch 2/10, Loss: 75.7026
Epoch 3/10, Loss: 75.0070
Epoch 4/10, Loss: 74.6900
Epoch 5/10, Loss: 74.3641
Epoch 6/10, Loss: 74.2346
Epoch 7/10, Loss: 73.8128
Epoch 8/10, Loss: 73.7700
Epoch 9/10, Loss: 73.6221
Epoch 10/10, Loss: 73.6411


'print("Final Accuracy:", accuracy_score(y_test, final_preds))\nprint("Classification Report:")\nprint(classification_report(y_test, final_preds))'

In [7]:
# Evaluate the ensemble model
accuracy = calculate_accuracy(y_test, final_preds)
recall = calculate_recall(y_test, final_preds)
precision = calculate_precision(y_test, final_preds)
conf_matrix = confusion_matrix(y_test, final_preds)
class_report = classification_report(y_test, final_preds)
total_cost = cost_sensitive_metric(y_test, final_preds)

# Print evaluation metrics
print(f"Accuracy of the model: {accuracy*100:.2f}% \n")
print(f"Recall of the model: {recall*100:.2f}%\n")
print(f"Precision of the model: {precision*100:.2f}%\n")
print("Confusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)
print(f"\nTotal Misclassification Cost: {total_cost}")


Accuracy of the model: 80.30% 

Recall of the model: 71.36%

Precision of the model: 32.71%

Confusion Matrix:
[[5666 1292]
 [ 252  628]]

Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.81      0.88      6958
           1       0.33      0.71      0.45       880

    accuracy                           0.80      7838
   macro avg       0.64      0.76      0.66      7838
weighted avg       0.89      0.80      0.83      7838


Total Misclassification Cost: 19060
