In [2]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
from imblearn.over_sampling import SMOTE
import pandas as pd

df = pd.read_csv("../data/creditcard_cleaned.csv")
X = df.drop('Class', axis=1)
y = df['Class']

# ====== 1. Prepare data ======
# X, y should already be cleaned and ready (no leakage)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Helper: evaluate & store results
def evaluate_model(name, model, X_test, y_test, results):
    y_pred = model.predict(X_test)
    results.append({
        'Model': name,
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred),
        'Recall': recall_score(y_test, y_pred),
        'Confusion Matrix': confusion_matrix(y_test, y_pred)
    })

# ====== 2. Run experiments ======
results = []
smote = SMOTE(random_state=42)

for clf_name, clf in [
    ("Logistic Regression", LogisticRegression(max_iter=500, random_state=42)),
    ("Random Forest", RandomForestClassifier(n_estimators=100, random_state=42))
]:
    # --- (A) No SMOTE, no class weights
    model = clf
    model.fit(X_train, y_train)
    evaluate_model(f"{clf_name} | No SMOTE | No Weights", model, X_test, y_test, results)

    # --- (B) No SMOTE, with class weights
    if clf_name == "Logistic Regression":
        model = LogisticRegression(max_iter=500, random_state=42, class_weight='balanced')
    else:
        model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
    model.fit(X_train, y_train)
    evaluate_model(f"{clf_name} | No SMOTE | Class Weights", model, X_test, y_test, results)

    # --- (C) SMOTE, no class weights
    X_train_sm, y_train_sm = smote.fit_resample(X_train, y_train)
    model = clf
    model.fit(X_train_sm, y_train_sm)
    evaluate_model(f"{clf_name} | SMOTE | No Weights", model, X_test, y_test, results)

    # --- (D) SMOTE, with class weights
    if clf_name == "Logistic Regression":
        model = LogisticRegression(max_iter=500, random_state=42, class_weight='balanced')
    else:
        model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
    model.fit(X_train_sm, y_train_sm)
    evaluate_model(f"{clf_name} | SMOTE | Class Weights", model, X_test, y_test, results)

# ====== 3. Display results ======
df_results = pd.DataFrame(results)
pd.set_option('display.max_colwidth', None)
print(df_results[['Model', 'Accuracy', 'Precision', 'Recall', 'Confusion Matrix']])


KeyboardInterrupt: 