In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
from imblearn.over_sampling import SMOTE
import pandas as pd

df = pd.read_csv("../data/creditcard_cleaned.csv")
X = df.drop('Class', axis=1)
y = df['Class']

In [5]:
# ====== 1. Prepare data ======
# Assume X, y are defined
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

smote = SMOTE(random_state=42)
X_train_sm, y_train_sm = smote.fit_resample(X_train, y_train)

def print_metrics(name, y_true, y_pred):
    print(f"--- {name} ---")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred))
    print("Recall:", recall_score(y_true, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
    print()

In [6]:
# Logistic Regression - No SMOTE, No Class Weights
lr = LogisticRegression(max_iter=500, random_state=42)
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
print_metrics("Logistic Regression | No SMOTE | No Weights", y_test, y_pred)

--- Logistic Regression | No SMOTE | No Weights ---
Accuracy: 0.9991365030134283
Precision: 0.8484848484848485
Recall: 0.5894736842105263
Confusion Matrix:
 [[56641    10]
 [   39    56]]



In [7]:
# Logistic Regression - No SMOTE, With Class Weights
lr_w = LogisticRegression(max_iter=500, random_state=42, class_weight='balanced')
lr_w.fit(X_train, y_train)
y_pred = lr_w.predict(X_test)
print_metrics("Logistic Regression | No SMOTE | Class Weights", y_test, y_pred)

--- Logistic Regression | No SMOTE | Class Weights ---
Accuracy: 0.9751876784266732
Precision: 0.05611899932386748
Recall: 0.8736842105263158
Confusion Matrix:
 [[55255  1396]
 [   12    83]]



In [8]:
# Logistic Regression - SMOTE, No Class Weights
lr_sm = LogisticRegression(max_iter=500, random_state=42)
lr_sm.fit(X_train_sm, y_train_sm)
y_pred = lr_sm.predict(X_test)
print_metrics("Logistic Regression | SMOTE | No Weights", y_test, y_pred)

--- Logistic Regression | SMOTE | No Weights ---
Accuracy: 0.9737250202657456
Precision: 0.05313700384122919
Recall: 0.8736842105263158
Confusion Matrix:
 [[55172  1479]
 [   12    83]]



In [9]:
# Logistic Regression - SMOTE, With Class Weights
lr_sm_w = LogisticRegression(max_iter=500, random_state=42, class_weight='balanced')
lr_sm_w.fit(X_train_sm, y_train_sm)
y_pred = lr_sm_w.predict(X_test)
print_metrics("Logistic Regression | SMOTE | Class Weights", y_test, y_pred)

--- Logistic Regression | SMOTE | Class Weights ---
Accuracy: 0.9737250202657456
Precision: 0.05313700384122919
Recall: 0.8736842105263158
Confusion Matrix:
 [[55172  1479]
 [   12    83]]



In [10]:
# Random Forest - No SMOTE, No Class Weights
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
print_metrics("Random Forest | No SMOTE | No Weights", y_test, y_pred)

--- Random Forest | No SMOTE | No Weights ---
Accuracy: 0.9995241955380115
Precision: 0.9722222222222222
Recall: 0.7368421052631579
Confusion Matrix:
 [[56649     2]
 [   25    70]]



In [11]:
# Random Forest - No SMOTE, With Class Weights
rf_w = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
rf_w.fit(X_train, y_train)
y_pred = rf_w.predict(X_test)
print_metrics("Random Forest | No SMOTE | Class Weights", y_test, y_pred)

--- Random Forest | No SMOTE | Class Weights ---
Accuracy: 0.9995241955380115
Precision: 0.9857142857142858
Recall: 0.7263157894736842
Confusion Matrix:
 [[56650     1]
 [   26    69]]



In [12]:
# Random Forest - SMOTE, No Class Weights
rf_sm = RandomForestClassifier(n_estimators=100, random_state=42)
rf_sm.fit(X_train_sm, y_train_sm)
y_pred = rf_sm.predict(X_test)
print_metrics("Random Forest | SMOTE | No Weights", y_test, y_pred)

--- Random Forest | SMOTE | No Weights ---
Accuracy: 0.9994889507630493
Precision: 0.9230769230769231
Recall: 0.7578947368421053
Confusion Matrix:
 [[56645     6]
 [   23    72]]



In [13]:
# Random Forest - SMOTE, With Class Weights
rf_sm_w = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
rf_sm_w.fit(X_train_sm, y_train_sm)
y_pred = rf_sm_w.predict(X_test)
print_metrics("Random Forest | SMOTE | Class Weights", y_test, y_pred)

--- Random Forest | SMOTE | Class Weights ---
Accuracy: 0.9994889507630493
Precision: 0.9230769230769231
Recall: 0.7578947368421053
Confusion Matrix:
 [[56645     6]
 [   23    72]]

