In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from collections import Counter

In [2]:
# 1. Load Dataset
df = pd.read_csv("creditcard.csv")

# 2. Preprocessing
X = df.drop(columns=['Class'])
y = df['Class']

# Scale 'Time' and 'Amount' columns
scaler = StandardScaler()
X[['Time', 'Amount']] = scaler.fit_transform(X[['Time', 'Amount']])

In [3]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# 3. Handle Class Imbalance: Undersample majority class (Class 0)
fraud = df[df['Class'] == 1]
non_fraud = df[df['Class'] == 0].sample(n=len(fraud), random_state=42)

df_balanced = pd.concat([fraud, non_fraud])
X_bal = df_balanced.drop(columns=['Class'])
y_bal = df_balanced['Class']
X_bal[['Time', 'Amount']] = scaler.transform(X_bal[['Time', 'Amount']])

In [7]:
# 4. Train Model
model = RandomForestClassifier(random_state=42)
model.fit(X_bal, y_bal)

# 5. Evaluation on real test set
y_pred = model.predict(X_test)
print("\n=== Classification Report ===")
print(classification_report(y_test, y_pred, digits=4))


=== Classification Report ===
              precision    recall  f1-score   support

           0     1.0000    0.9749    0.9873     85295
           1     0.0646    1.0000    0.1214       148

    accuracy                         0.9749     85443
   macro avg     0.5323    0.9874    0.5543     85443
weighted avg     0.9984    0.9749    0.9858     85443



In [8]:
# Simulate one transaction from the test data
sample_transaction = X_test.iloc[0].values.reshape(1, -1)  # Taking the first transaction
predicted_class = model.predict(sample_transaction)[0]

print("\n--- Simulated Test Transaction ---")
print("Predicted Class:", "🔴 FRAUD" if predicted_class == 1 else "🟢 LEGIT")


--- Simulated Test Transaction ---
Predicted Class: 🟢 LEGIT


