In [2]:
# =========================================================
# 1. Imports
# =========================================================
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score
import joblib

# =========================================================
# 2. Load dataset
# =========================================================
print("📂 Loading dataset...")
# File is in the root folder next to fraud.ipynb
df = pd.read_csv("creditcard.csv")
print(f"✅ Dataset loaded: {df.shape[0]} rows, {df.shape[1]} columns")

# =========================================================
# 3. Prepare features + target
# =========================================================
X = df.drop("Class", axis=1)   # Features
y = df["Class"]                # Target (fraud = 1, legit = 0)

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# =========================================================
# 4. Scale features
# =========================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

# =========================================================
# 5. Train model
# =========================================================
print("🚀 Training model...")
model = LogisticRegression(max_iter=1000, class_weight="balanced", random_state=42)
model.fit(X_train_scaled, y_train)
print("✅ Model training complete.")

# =========================================================
# 6. Evaluation
# =========================================================
y_pred = model.predict(X_test_scaled)

print("\n📊 Classification Report:\n", classification_report(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, model.predict_proba(X_test_scaled)[:, 1]))

# =========================================================
# 7. Save model + scaler
# =========================================================
save_path = "./"  # root folder

joblib.dump(model, os.path.join(save_path, "fraud_model.joblib"))
joblib.dump(scaler, os.path.join(save_path, "fraud_scaler.joblib"))

print(f"✅ Model and scaler saved in: {save_path}")


📂 Loading dataset...
✅ Dataset loaded: 284807 rows, 31 columns
🚀 Training model...
✅ Model training complete.

📊 Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.98      0.99     56864
           1       0.06      0.92      0.11        98

    accuracy                           0.98     56962
   macro avg       0.53      0.95      0.55     56962
weighted avg       1.00      0.98      0.99     56962

ROC AUC: 0.9720834996210077
✅ Model and scaler saved in: ./
