In [6]:
import os
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [8]:
os.makedirs("data", exist_ok=True)

In [10]:
def create_transaction_data(n=5000):
    np.random.seed(42)
    data = {
        'transaction_id': np.arange(1, n + 1),
        'timestamp': [datetime.now() - timedelta(minutes=i) for i in range(n)],
        'user_id': np.random.randint(1000, 1100, n),
        'amount': np.round(np.random.exponential(scale=200, size=n), 2),
        'location': np.random.choice(['Chennai', 'Mumbai', 'Delhi', 'Bangalore'], n),
        'device_type': np.random.choice(['Mobile', 'Desktop', 'Tablet'], n),
        'is_fraud': np.random.choice([0, 1], n, p=[0.96, 0.04])
    }
    df = pd.DataFrame(data)
    df.to_csv("data/transactions.csv", index=False)
    print("✅ transactions.csv generated.")

create_transaction_data()

✅ transactions.csv generated.


In [14]:
import os
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report

In [16]:
df = pd.read_csv("data/transactions.csv")

In [18]:
os.makedirs("data", exist_ok=True)

In [21]:
features = ['amount']
X = df[features]

In [23]:
# Step 1: Normalize the amount column
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [25]:
# Step 2: Apply Isolation Forest
model = IsolationForest(n_estimators=100, contamination=0.04, random_state=42)
df['anomaly_score'] = model.fit_predict(X_scaled)

In [27]:
# Step 3: Convert scores to binary predictions
df['fraud_pred'] = df['anomaly_score'].apply(lambda x: 1 if x == -1 else 0)

In [29]:
# Step 4: Save results
df.to_csv("data/fraud_scored.csv", index=False)
print("✅ fraud_scored.csv saved.")

✅ fraud_scored.csv saved.


In [31]:
# Step 5: Evaluation
y_true = df['is_fraud']
y_pred = df['fraud_pred']

In [33]:
# Show confusion matrix
cm = confusion_matrix(y_true, y_pred)
print("\n📊 Confusion Matrix:")
print(cm)

# Classification Report (Precision, Recall, F1)
print("\n📈 Classification Report:")
print(classification_report(y_true, y_pred, digits=3))


📊 Confusion Matrix:
[[4624  186]
 [ 177   13]]

📈 Classification Report:
              precision    recall  f1-score   support

           0      0.963     0.961     0.962      4810
           1      0.065     0.068     0.067       190

    accuracy                          0.927      5000
   macro avg      0.514     0.515     0.515      5000
weighted avg      0.929     0.927     0.928      5000

