## Imports

In [33]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve, accuracy_score

from src.data_prep import prepare_data
# from src.models import logistic_regression_model, decision_tree_model, random_forest_model, lightgbm_model
# from src.train import train_sklearn_model
from src.evaluation import evaluate_model

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

import joblib

import torch
import torch.nn as nn
import torch.optim as optim

from imblearn.over_sampling import SMOTE

sns.set(style="whitegrid")

2024-11-21 13:37:35.927537: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Load and Prepare Data

In [18]:
dataset_name = 'creditcard'
X_train, X_test, y_train, y_test = prepare_data(dataset_name)

## Train and Evaluate Models

### Logistic Regression

In [21]:
logistic_regression = LogisticRegression()
logistic_regression.fit(X_train, y_train)


LogisticRegression()

In [23]:
y_test_pred = logistic_regression.predict(X_test)


In [29]:
accuracy_score(y_test, y_test_pred)

0.9992275552122467

In [30]:
predictions = pd.DataFrame({"y_true": y_test, "y_pred": y_test_pred})
predictions.to_csv("artifacts/predictions/logistic_regression_predictions.csv", index=False)

joblib.dump(logistic_regression, "artifacts/models/logistic_regression_model.pkl")
print("Logistic Regression model and predictions saved successfully!")


Logistic Regression model and predictions saved successfully!


### Decision Tree

### Random Forest

In [31]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

y_test_pred = rf.predict(X_test)

predictions = pd.DataFrame({"y_true": y_test, "y_pred": y_test_pred})
predictions.to_csv("artifacts/predictions/random_forest_predictions.csv", index=False)

joblib.dump(rf, "artifacts/models/random_forest_model.pkl")
print("Random Forest model and predictions saved successfully!")

Random Forest model and predictions saved successfully!


In [32]:
accuracy_score(y_test, y_test_pred)

0.9995962220427653

### LightGBM

### Neural Network

#### Light

In [None]:
class LightNN(nn.Module):
    def __init__(self, input_dim):
        super(LightNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 16)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(16, 8)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(8, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

input_dim = X_train.shape[1]
model = LightNN(input_dim)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

for epoch in range(10):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

model.eval()
with torch.no_grad():
    y_test_pred = model(X_test_tensor).numpy()
    y_test_pred = (y_test_pred > 0.5).astype(int)

predictions = pd.DataFrame({"y_true": y_test, "y_pred": y_test_pred.ravel()})
predictions.to_csv("artifacts/predictions/light_nn_predictions.csv", index=False)

torch.save(model.state_dict(), "artifacts/models/light_nn_model.pth")
print("Light Neural Network model and predictions saved successfully!")


#### Dense

In [34]:
class DenseNN(nn.Module):
    def __init__(self, input_dim):
        super(DenseNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(64, 32)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(32, 16)
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(16, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        x = self.relu3(self.fc3(x))
        x = self.sigmoid(self.fc4(x))
        return x

model = DenseNN(input_dim)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(20):  # Number of epochs
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

model.eval()
with torch.no_grad():
    y_test_pred = model(X_test_tensor).numpy()
    y_test_pred = (y_test_pred > 0.5).astype(int)

predictions = pd.DataFrame({"y_true": y_test, "y_pred": y_test_pred.ravel()})
predictions.to_csv("artifacts/predictions/dense_nn_predictions.csv", index=False)

torch.save(model.state_dict(), "artifacts/models/dense_nn_model.pth")
print("Dense Neural Network model and predictions saved successfully!")


NameError: name 'nn' is not defined

#### SMOTE

In [None]:
smote = SMOTE(random_state=42)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

X_train_balanced_tensor = torch.tensor(X_train_balanced.values, dtype=torch.float32)
y_train_balanced_tensor = torch.tensor(y_train_balanced.values, dtype=torch.float32).view(-1, 1)

model = LightNN(input_dim)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_balanced_tensor)
    loss = criterion(outputs, y_train_balanced_tensor)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

model.eval()
with torch.no_grad():
    y_test_pred = model(X_test_tensor).numpy()
    y_test_pred = (y_test_pred > 0.5).astype(int)

predictions = pd.DataFrame({"y_true": y_test, "y_pred": y_test_pred.ravel()})
predictions.to_csv("artifacts/predictions/light_nn_SMOTE_predictions.csv", index=False)

torch.save(model.state_dict(), "artifacts/models/light_nn_SMOTE_model.pth")
print("Light Neural Network with SMOTE model and predictions saved successfully!")

In [None]:
smote = SMOTE(random_state=42)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

X_train_balanced_tensor = torch.tensor(X_train_balanced.values, dtype=torch.float32)
y_train_balanced_tensor = torch.tensor(y_train_balanced.values, dtype=torch.float32).view(-1, 1)

model = DenseNN(input_dim)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_balanced_tensor)
    loss = criterion(outputs, y_train_balanced_tensor)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

model.eval()
with torch.no_grad():
    y_test_pred = model(X_test_tensor).numpy()
    y_test_pred = (y_test_pred > 0.5).astype(int)

predictions = pd.DataFrame({"y_true": y_test, "y_pred": y_test_pred.ravel()})
predictions.to_csv("artifacts/predictions/dense_nn_SMOTE_predictions.csv", index=False)

torch.save(model.state_dict(), "artifacts/models/dense_nn_SMOTE_model.pth")
print("Dense Neural Network with SMOTE model and predictions saved successfully!")

## Comparing Losses

### Binary Cross Entropy

### Focal Loss

### Weighted Binary Cross Entropy

### ROC-Star