##INSTALL

In [2]:
!pip install kaggle -q
!pip install imbalanced-learn -q

##IMPORTS

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import joblib
import os

##DOWNLOAD CREDIT CARD FRAUD DATASET (KAGGLE API)

In [5]:
os.environ['KAGGLE_CONFIG_DIR'] = "/content/"
!chmod 600 /content/kaggle.json

!kaggle datasets download -d mlg-ulb/creditcardfraud --force
!unzip -oq creditcardfraud.zip

Dataset URL: https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud
License(s): DbCL-1.0
Downloading creditcardfraud.zip to /content
  0% 0.00/66.0M [00:00<?, ?B/s]
100% 66.0M/66.0M [00:00<00:00, 1.43GB/s]


##LOAD DATA

In [6]:
df = pd.read_csv("creditcard.csv")
print("Dataset Loaded:", df.shape)
print(df['Class'].value_counts())

Dataset Loaded: (284807, 31)
Class
0    284315
1       492
Name: count, dtype: int64


##PREPROCESSING

In [7]:
scaler = StandardScaler()
df['Amount_scaled'] = scaler.fit_transform(df[['Amount']])
df['Time_scaled'] = scaler.fit_transform(df[['Time']])

X = df.drop(columns=['Class', 'Amount', 'Time'])
y = df['Class']

##TRAIN-TEST SPLIT

In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

##HANDLE IMBALANCE WITH SMOTE

In [9]:
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

print("Original class distribution:", y_train.value_counts())
print("After SMOTE:", y_train_res.value_counts())

Original class distribution: Class
0    227451
1       394
Name: count, dtype: int64
After SMOTE: Class
0    227451
1    227451
Name: count, dtype: int64


##TRAIN RANDOM FOREST

In [10]:
rf = RandomForestClassifier(
    n_estimators=200,
    max_depth=10,
    class_weight='balanced',
    random_state=42
)
rf.fit(X_train_res, y_train_res)

##EVALUATE MODEL

In [11]:
y_pred = rf.predict(X_test)
y_prob = rf.predict_proba(X_test)[:,1]

print("===== Classification Report =====")
print(classification_report(y_test, y_pred))

print("===== Confusion Matrix =====")
print(confusion_matrix(y_test, y_pred))

roc = roc_auc_score(y_test, y_prob)
print("ROC-AUC Score:", roc)

===== Classification Report =====
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.42      0.86      0.57        98

    accuracy                           1.00     56962
   macro avg       0.71      0.93      0.78     56962
weighted avg       1.00      1.00      1.00     56962

===== Confusion Matrix =====
[[56750   114]
 [   14    84]]
ROC-AUC Score: 0.978381286391878


##SAVE MODEL

In [12]:
joblib.dump(rf, "random_forest.pkl")
print("Random Forest model saved as random_forest.pkl")

Random Forest model saved as random_forest.pkl
