In [1]:
import pandas as pd 
import numpy as np 
from sklearn.model_selection import train_test_split, GridSearchCV 
from sklearn.preprocessing import StandardScaler 
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, roc_curve, precision_recall_curve 
import matplotlib.pyplot as plt 
import seaborn as sns 
from xgboost import XGBClassifier

In [2]:
df = pd.read_csv("/Users/nsmz/creditcard_data/creditcard.csv") 
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [3]:
#preprocessing
scaler = StandardScaler() 
df['scaled_time'] = scaler.fit_transform(df['Time'].values.reshape(-1, 1)) 
df['scaled_amount'] = scaler.fit_transform(df['Amount'].values.reshape(-1, 1))

# Drop original Time and Amount 
df = df.drop(['Time', 'Amount'], axis=1)

# Move scaled columns next to the others 
scaled_cols = ['scaled_time', 'scaled_amount'] 
df = pd.concat([df.drop(scaled_cols, axis=1), df[scaled_cols]], axis=1) 
df.head()

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V22,V23,V24,V25,V26,V27,V28,Class,scaled_time,scaled_amount
0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,...,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,0,-1.996583,0.244964
1,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,...,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,0,-1.996583,-0.342475
2,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,...,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,0,-1.996562,1.160686
3,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,...,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,0,-1.996562,0.140534
4,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,...,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,0,-1.996541,-0.073403


In [4]:
#Train-Test Split 
X = df.drop('Class', axis=1) 
y = df['Class'] 
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42, stratify=y ) 

print("Training set shape:", X_train.shape) 
print("Testing set shape:", X_test.shape) 
print("Fraud cases in training:", sum(y_train)) 
print("Fraud cases in testing:", sum(y_test))

Training set shape: (227845, 30)
Testing set shape: (56962, 30)
Fraud cases in training: 394
Fraud cases in testing: 98


In [5]:
#Handling Imbalance

from collections import Counter

# Check imbalance ratio in training set
counter = Counter(y_train)
majority = counter[0]
minority = counter[1]
imbalance_ratio = majority / minority

print("Class distribution:", counter)
print("Imbalance ratio (majority/minority):", imbalance_ratio)

# This ratio is useful for scale_pos_weight in XGBoost
scale_pos_weight = imbalance_ratio


Class distribution: Counter({0: 227451, 1: 394})
Imbalance ratio (majority/minority): 577.2868020304569


In [6]:
#Logistic Regression Baseline

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

# Define model with class_weight='balanced' to handle imbalance
log_reg = LogisticRegression(max_iter=1000, class_weight='balanced', random_state=42)

# Train
log_reg.fit(X_train, y_train)

# Predict
y_pred_lr = log_reg.predict(X_test)

# Evaluation
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_lr))

print("\nClassification Report:")
print(classification_report(y_test, y_pred_lr))

print("\nROC AUC Score:")
print(roc_auc_score(y_test, y_pred_lr))


Confusion Matrix:
[[55475  1389]
 [    8    90]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.98      0.99     56864
           1       0.06      0.92      0.11        98

    accuracy                           0.98     56962
   macro avg       0.53      0.95      0.55     56962
weighted avg       1.00      0.98      0.99     56962


ROC AUC Score:
0.9469703223157581


In [7]:
# Step 6: Random Forest Classifier

from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(
    n_estimators=100,
    random_state=42,
    class_weight='balanced_subsample',  # handles class imbalance
    n_jobs=-1
)

rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_rf))

print("\nClassification Report:")
print(classification_report(y_test, y_pred_rf))

print("\nROC AUC Score:")
print(roc_auc_score(y_test, y_pred_rf))


Confusion Matrix:
[[56861     3]
 [   24    74]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.96      0.76      0.85        98

    accuracy                           1.00     56962
   macro avg       0.98      0.88      0.92     56962
weighted avg       1.00      1.00      1.00     56962


ROC AUC Score:
0.8775246416799696


In [8]:
# Step 7: Random Forest Hyperparameter Tuning

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

rf = RandomForestClassifier(random_state=42, n_jobs=-1, class_weight='balanced')

param_grid_rf = {
    'n_estimators': [100, 200],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}

grid_rf = GridSearchCV(
    rf,
    param_grid_rf,
    scoring='f1',
    cv=3,
    n_jobs=-1,
    verbose=1
)

grid_rf.fit(X_train, y_train)

print("Best RF Params:", grid_rf.best_params_)
print("Best RF F1 Score:", grid_rf.best_score_)


Fitting 3 folds for each of 24 candidates, totalling 72 fits


Exception ignored in: <function ResourceTracker.__del__ at 0x104981bc0>
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/opt/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/opt/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x105ba9bc0>
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/opt/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/opt/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x10584dbc0>
Traceback (most recent call last

Best RF Params: {'max_depth': 20, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}
Best RF F1 Score: 0.8404799090903957


In [9]:
best_rf = grid_rf.best_estimator_

y_pred_rf = best_rf.predict(X_test)

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_rf))

print("\nClassification Report:")
print(classification_report(y_test, y_pred_rf))

print("\nROC AUC Score:")
print(roc_auc_score(y_test, best_rf.predict_proba(X_test)[:, 1]))


Confusion Matrix:
[[56859     5]
 [   21    77]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.94      0.79      0.86        98

    accuracy                           1.00     56962
   macro avg       0.97      0.89      0.93     56962
weighted avg       1.00      1.00      1.00     56962


ROC AUC Score:
0.9468468626899269


In [10]:
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

# Recalculate scale_pos_weight (ratio of negatives to positives)
from collections import Counter
counts = Counter(y_train)
scale_pos_weight = counts[0] / counts[1]
print("scale_pos_weight:", scale_pos_weight)

# Initialize XGB with imbalance handling
xgb_model = XGBClassifier(
    n_estimators=200,
    max_depth=6,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    scale_pos_weight=scale_pos_weight,
    eval_metric='auc',
    random_state=42,
    n_jobs=-1
)

# Train
xgb_model.fit(X_train, y_train)

# Predict
y_pred_xgb = xgb_model.predict(X_test)
y_prob_xgb = xgb_model.predict_proba(X_test)[:, 1]

# Evaluate
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred_xgb))
print("\nClassification Report:")
print(classification_report(y_test, y_pred_xgb))
print("ROC-AUC Score:", roc_auc_score(y_test, y_prob_xgb))


scale_pos_weight: 577.2868020304569

Confusion Matrix:
[[56852    12]
 [   16    82]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.87      0.84      0.85        98

    accuracy                           1.00     56962
   macro avg       0.94      0.92      0.93     56962
weighted avg       1.00      1.00      1.00     56962

ROC-AUC Score: 0.9822178660434349


In [13]:
import xgboost as xgb

# Assign your tuned XGBoost model to best_xgb if not already saved
best_xgb = xgb.XGBClassifier(
    scale_pos_weight=577.2868020304569,
    eval_metric='logloss',
    use_label_encoder=False,
    random_state=42
)

best_xgb.fit(X_train, y_train)


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [15]:
# Final Ensemble: Random Forest + XGBoost (Voting Classifier)

# ==========================
# Final Ensemble: RF + XGB
# ==========================

import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from imblearn.over_sampling import SMOTE
import numpy as np

# Apply SMOTE to balance the training data
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_train, y_train)

# Define your tuned models
best_rf = RandomForestClassifier(
    n_estimators=200,
    max_depth=10,
    random_state=42
)

best_xgb = xgb.XGBClassifier(
    scale_pos_weight=577.2868020304569,
    eval_metric='logloss',
    random_state=42
)

# Ensemble
ensemble_model = VotingClassifier(
    estimators=[
        ('rf', best_rf),
        ('xgb', best_xgb)
    ],
    voting='soft'
)

ensemble_model.fit(X_res, y_res)

# Predict with adjustable threshold
y_proba = ensemble_model.predict_proba(X_test)[:,1]
threshold = 0.3  # adjust to boost fraud recall
y_pred_thresh = (y_proba >= threshold).astype(int)

# Evaluation
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_thresh))
print("\nClassification Report:")
print(classification_report(y_test, y_pred_thresh))
print("ROC-AUC:", roc_auc_score(y_test, y_proba))


Confusion Matrix:
[[56690   174]
 [   13    85]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.33      0.87      0.48        98

    accuracy                           1.00     56962
   macro avg       0.66      0.93      0.74     56962
weighted avg       1.00      1.00      1.00     56962

ROC-AUC: 0.9826137264134691


In [16]:
from sklearn.metrics import f1_score

y_proba = ensemble_model.predict_proba(X_test)[:,1]
thresholds = np.arange(0.1, 0.6, 0.01)
f1_scores = []

for t in thresholds:
    y_pred = (y_proba >= t).astype(int)
    f1_scores.append(f1_score(y_test, y_pred, pos_label=1))

best_idx = np.argmax(f1_scores)
best_threshold = thresholds[best_idx]
print("Best Threshold:", best_threshold)
print("F1-score at best threshold:", f1_scores[best_idx])

# Apply best threshold
y_pred_best = (y_proba >= best_threshold).astype(int)
print("\nConfusion Matrix at Best Threshold:")
print(confusion_matrix(y_test, y_pred_best))
print("\nClassification Report at Best Threshold:")
print(classification_report(y_test, y_pred_best))


Best Threshold: 0.5899999999999997
F1-score at best threshold: 0.7248908296943232

Confusion Matrix at Best Threshold:
[[56816    48]
 [   15    83]]

Classification Report at Best Threshold:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.63      0.85      0.72        98

    accuracy                           1.00     56962
   macro avg       0.82      0.92      0.86     56962
weighted avg       1.00      1.00      1.00     56962



In [17]:
import numpy as np

best_threshold = 0.589  # from our previous optimization

def predict_fraud(model, X_new):
    """
    Predict fraud for new transactions.

    Args:
        model : trained ensemble model
        X_new : pandas DataFrame or numpy array of new transaction features

    Returns:
        predictions : 0 (non-fraud) or 1 (fraud)
        probabilities : predicted probability of fraud
    """
    y_proba = model.predict_proba(X_new)[:,1]
    y_pred = (y_proba >= best_threshold).astype(int)
    return y_pred, y_proba

# Example usage:
# y_pred, y_proba = predict_fraud(ensemble_model, X_test)
# print(y_pred[:10], y_proba[:10])


In [24]:
import joblib

# Save each model individually
joblib.dump(best_rf, "best_rf.pkl")
joblib.dump(best_xgb, "best_xgb.pkl")
joblib.dump(best_threshold, "fraud_threshold.pkl")


['fraud_threshold.pkl']

In [None]:
from sklearn.ensemble import VotingClassifier
import joblib

# Load models and threshold
best_rf = joblib.load("best_rf.pkl")
best_xgb = joblib.load("best_xgb.pkl")
best_threshold = joblib.load("fraud_threshold.pkl")

# Recreate the ensemble
ensemble_model = VotingClassifier(
    estimators=[('rf', best_rf), ('xgb', best_xgb)],
    voting='soft'
)

# Optional: fit on SMOTE-resampled data if needed
ensemble_model.fit(X_res, y_res)  # X_res, y_res used during training

