## Imports

In [40]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve, accuracy_score, roc_auc_score, log_loss

from sklearn.model_selection import train_test_split

from src.data_prep import prepare_data
# from src.models import logistic_regression_model, decision_tree_model, random_forest_model, lightgbm_model
# from src.train import train_sklearn_model
from src.evaluation import evaluate_model

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

import joblib

import torch
import torch.nn as nn
import torch.optim as optim

from imblearn.over_sampling import SMOTE

import lightgbm as lgb

sns.set(style="whitegrid")

## Load and Prepare Data

In [6]:
dataset_name = 'creditcard'
X_train, X_test, y_train, y_test = prepare_data(dataset_name)

## Train and Evaluate Models

### Logistic Regression

In [7]:
logistic_regression = LogisticRegression()
logistic_regression.fit(X_train, y_train)


In [8]:
y_test_pred = logistic_regression.predict(X_test)


In [11]:
accuracy_score(y_test, y_test_pred)

0.9992802219023208

In [12]:
predictions = pd.DataFrame({"y_true": y_test, "y_pred": y_test_pred})
predictions.to_csv("artifacts/predictions/logistic_regression_predictions.csv", index=False)

joblib.dump(logistic_regression, "artifacts/models/logistic_regression_model.pkl")
print("Logistic Regression model and predictions saved successfully!")


Logistic Regression model and predictions saved successfully!


### Decision Tree

### Random Forest

In [13]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

y_test_pred = rf.predict(X_test)

predictions = pd.DataFrame({"y_true": y_test, "y_pred": y_test_pred})
predictions.to_csv("artifacts/predictions/random_forest_predictions.csv", index=False)

joblib.dump(rf, "artifacts/models/random_forest_model.pkl")
print("Random Forest model and predictions saved successfully!")

Random Forest model and predictions saved successfully!


In [14]:
accuracy_score(y_test, y_test_pred)

0.9995084442259752

### LightGBM

[LightGBM] [Info] Number of positive: 300, number of negative: 170583
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.012829 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7650
[LightGBM] [Info] Number of data points in the train set: 170883, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.001756 -> initscore=-6.343195
[LightGBM] [Info] Start training from score -6.343195
Training until validation scores don't improve for 20 rounds
[100]	fit's binary_logloss: 0.00182857	val's binary_logloss: 0.00391336
[200]	fit's binary_logloss: 0.000773641	val's binary_logloss: 0.00333556
[300]	fit's binary_logloss: 0.000361373	val's binary_logloss: 0.00321147
Early stopping, best iteration is:
[329]	fit's binary_logloss: 0.000293073	val's binary_logloss: 0.00320633

Test's ROC AUC: 0.97419
Test's logloss: 0.00300


<lightgbm.basic.Booster at 0x2b1d033fe80>

### Neural Network

#### Light

In [25]:
class LightNN(nn.Module):
    def __init__(self, input_dim):
        super(LightNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 16)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(16, 8)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(8, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

input_dim = X_train.shape[1]
model = LightNN(input_dim)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.to_numpy(), dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.to_numpy(), dtype=torch.float32).view(-1, 1)

for epoch in range(10):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

model.eval()
with torch.no_grad():
    y_test_pred = model(X_test_tensor).numpy()
    y_test_pred = (y_test_pred > 0.5).astype(int)

predictions = pd.DataFrame({"y_true": y_test, "y_pred": y_test_pred.ravel()})
predictions.to_csv("artifacts/predictions/light_nn_predictions.csv", index=False)

torch.save(model.state_dict(), "artifacts/models/light_nn_model.pth")
print("Light Neural Network model and predictions saved successfully!")


Epoch 1, Loss: 0.7862
Epoch 2, Loss: 0.7829
Epoch 3, Loss: 0.7796
Epoch 4, Loss: 0.7762
Epoch 5, Loss: 0.7729
Epoch 6, Loss: 0.7696
Epoch 7, Loss: 0.7663
Epoch 8, Loss: 0.7631
Epoch 9, Loss: 0.7598
Epoch 10, Loss: 0.7565
Light Neural Network model and predictions saved successfully!


#### Dense

In [26]:
class DenseNN(nn.Module):
    def __init__(self, input_dim):
        super(DenseNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(64, 32)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(32, 16)
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(16, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        x = self.relu3(self.fc3(x))
        x = self.sigmoid(self.fc4(x))
        return x

model = DenseNN(input_dim)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(20):  # Number of epochs
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

model.eval()
with torch.no_grad():
    y_test_pred = model(X_test_tensor).numpy()
    y_test_pred = (y_test_pred > 0.5).astype(int)

predictions = pd.DataFrame({"y_true": y_test, "y_pred": y_test_pred.ravel()})
predictions.to_csv("artifacts/predictions/dense_nn_predictions.csv", index=False)

torch.save(model.state_dict(), "artifacts/models/dense_nn_model.pth")
print("Dense Neural Network model and predictions saved successfully!")


Epoch 1, Loss: 0.6651
Epoch 2, Loss: 0.6584
Epoch 3, Loss: 0.6518
Epoch 4, Loss: 0.6453
Epoch 5, Loss: 0.6388
Epoch 6, Loss: 0.6323
Epoch 7, Loss: 0.6256
Epoch 8, Loss: 0.6188
Epoch 9, Loss: 0.6117
Epoch 10, Loss: 0.6042
Epoch 11, Loss: 0.5964
Epoch 12, Loss: 0.5881
Epoch 13, Loss: 0.5792
Epoch 14, Loss: 0.5699
Epoch 15, Loss: 0.5599
Epoch 16, Loss: 0.5493
Epoch 17, Loss: 0.5382
Epoch 18, Loss: 0.5264
Epoch 19, Loss: 0.5141
Epoch 20, Loss: 0.5012
Dense Neural Network model and predictions saved successfully!


#### SMOTE

In [31]:
smote = SMOTE(random_state=42)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

X_train_balanced_tensor = torch.tensor(X_train_balanced, dtype=torch.float32)
y_train_balanced_tensor = torch.tensor(y_train_balanced, dtype=torch.float32).view(-1, 1)

model = LightNN(input_dim)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_balanced_tensor)
    loss = criterion(outputs, y_train_balanced_tensor)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

model.eval()
with torch.no_grad():
    y_test_pred = model(X_test_tensor).numpy()
    y_test_pred = (y_test_pred > 0.5).astype(int)

predictions = pd.DataFrame({"y_true": y_test, "y_pred": y_test_pred.ravel()})
predictions.to_csv("artifacts/predictions/light_nn_SMOTE_predictions.csv", index=False)

torch.save(model.state_dict(), "artifacts/models/light_nn_SMOTE_model.pth")
print("Light Neural Network with SMOTE model and predictions saved successfully!")

Epoch 1, Loss: 0.8642
Epoch 2, Loss: 0.8481
Epoch 3, Loss: 0.8325
Epoch 4, Loss: 0.8175
Epoch 5, Loss: 0.8030
Epoch 6, Loss: 0.7890
Epoch 7, Loss: 0.7756
Epoch 8, Loss: 0.7628
Epoch 9, Loss: 0.7505
Epoch 10, Loss: 0.7388
Light Neural Network with SMOTE model and predictions saved successfully!


In [33]:
smote = SMOTE(random_state=42)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

X_train_balanced_tensor = torch.tensor(X_train_balanced, dtype=torch.float32)
y_train_balanced_tensor = torch.tensor(y_train_balanced, dtype=torch.float32).view(-1, 1)

model = DenseNN(input_dim)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_balanced_tensor)
    loss = criterion(outputs, y_train_balanced_tensor)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

model.eval()
with torch.no_grad():
    y_test_pred = model(X_test_tensor).numpy()
    y_test_pred = (y_test_pred > 0.5).astype(int)

predictions = pd.DataFrame({"y_true": y_test, "y_pred": y_test_pred.ravel()})
predictions.to_csv("artifacts/predictions/dense_nn_SMOTE_predictions.csv", index=False)

torch.save(model.state_dict(), "artifacts/models/dense_nn_SMOTE_model.pth")
print("Dense Neural Network with SMOTE model and predictions saved successfully!")

Epoch 1, Loss: 0.7143
Epoch 2, Loss: 0.7058
Epoch 3, Loss: 0.6980
Epoch 4, Loss: 0.6915
Epoch 5, Loss: 0.6859
Epoch 6, Loss: 0.6808
Epoch 7, Loss: 0.6755
Epoch 8, Loss: 0.6701
Epoch 9, Loss: 0.6645
Epoch 10, Loss: 0.6587
Dense Neural Network with SMOTE model and predictions saved successfully!


## Comparing Losses

### Binary Cross Entropy

### Focal Loss

### Weighted Binary Cross Entropy

### ROC-Star