In [1]:
import os
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report

In [3]:
df = pd.read_csv("data/transactions.csv")

In [5]:
features = ['amount']
X = df[features]

In [7]:
# Normalize the amount feature
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train Isolation Forest model
model = IsolationForest(n_estimators=100, contamination=0.04, random_state=42)
df['anomaly_score'] = model.fit_predict(X_scaled)

# Convert anomaly score to binary fraud prediction
df['fraud_pred'] = df['anomaly_score'].apply(lambda x: 1 if x == -1 else 0)

# Save output to CSV
os.makedirs("data", exist_ok=True)
df.to_csv("data/fraud_scored.csv", index=False)
print("✅ fraud_scored.csv saved.")


✅ fraud_scored.csv saved.


In [9]:
# Evaluate predictions
y_true = df['is_fraud']
y_pred = df['fraud_pred']

# Confusion Matrix
print("\n📊 Confusion Matrix:")
print(confusion_matrix(y_true, y_pred))

# Classification Report
print("\n📈 Classification Report:")
print(classification_report(y_true, y_pred, digits=3))


📊 Confusion Matrix:
[[4624  186]
 [ 177   13]]

📈 Classification Report:
              precision    recall  f1-score   support

           0      0.963     0.961     0.962      4810
           1      0.065     0.068     0.067       190

    accuracy                          0.927      5000
   macro avg      0.514     0.515     0.515      5000
weighted avg      0.929     0.927     0.928      5000

