In [14]:
import pandas as pd
import numpy as np
import os
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score, average_precision_score


In [15]:
train_data = pd.read_csv(
    os.path.join(
        os.path.dirname(os.getcwd()),
        'data',
        'training',
        'final_anomaly_training.csv'
    )
)
train_data.describe()

Unnamed: 0,distance_from_home,distance_from_last_transaction,ratio_to_median_purchase_price,repeat_retailer,used_chip,used_pin_number,online_order,fraud
count,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0
mean,0.7620937,1.32714,0.5115073,-0.118658,0.350732,0.10073,-0.349274,0.087404
std,2.952638,9.519952,1.749233,0.323386,0.4772,0.300971,0.476741,0.282426
min,-0.4522426,-0.326715,-0.6123311,-1.0,0.0,0.0,-1.0,0.0
25%,-0.2770901,-0.2293651,-0.3214841,0.0,0.0,0.0,-1.0,0.0
50%,4.03482e-17,-1.8200200000000003e-17,3.426058e-17,0.0,0.0,0.0,0.0,0.0
75%,0.7229099,0.7706349,0.6785159,0.0,1.0,0.0,0.0,0.0
max,262.9377,3885.243,164.6674,0.0,1.0,1.0,0.0,1.0


In [16]:
fraud_count = train_data[train_data['fraud'] == 1].shape[0]
print(f"Total number of transactions: {train_data.shape[0]}")
print(f"Number of fraud cases: {fraud_count}")

Total number of transactions: 500000
Number of fraud cases: 43702


In [17]:
# Prepare data for anomaly detection (unsupervised)
X = train_data.drop(columns=['fraud'])

model = IsolationForest(contamination=0.01, random_state=42)
model.fit(X)
scores = model.decision_function(X)
anomalies = model.predict(X)

print("Anomaly scores:", scores)
print("Anomalies detected:", np.sum(anomalies == -1))

Anomaly scores: [ 0.27383664  0.29810147  0.29025661 ...  0.29881162  0.19377354
 -0.00958613]
Anomalies detected: 5000


In [18]:
test_data = pd.read_csv(
    os.path.join(
        os.path.dirname(os.getcwd()),
        'data',
        'testing',
        'final_full_testing.csv'
    )
)
test_data.describe()

Unnamed: 0,distance_from_home,distance_from_last_transaction,ratio_to_median_purchase_price,repeat_retailer,used_chip,used_pin_number,online_order,fraud
count,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0
mean,0.7614665,1.312348,0.5084815,-0.11827,0.350066,0.100486,-0.349622,0.087402
std,3.027468,7.231631,1.705449,0.322928,0.476991,0.300647,0.476851,0.282424
min,-0.4580047,-0.3259626,-0.6126256,-1.0,0.0,0.0,-1.0,0.0
25%,-0.2798222,-0.2295467,-0.3226632,0.0,0.0,0.0,-1.0,0.0
50%,-4.087564e-17,3.6172940000000005e-17,3.424517e-17,0.0,0.0,0.0,0.0,0.0
75%,0.7201778,0.7704533,0.6773368,0.0,1.0,0.0,0.0,0.0
max,488.8798,887.2887,103.11,0.0,1.0,1.0,0.0,1.0


In [19]:
# 1. Prepare test data
X_test = test_data.drop(columns=['fraud'])

# 2. Use the trained model to predict anomalies in the test set
test_scores = model.decision_function(X_test)
test_anomalies = model.predict(X_test)

# 3. (Optional) Evaluate against true labels
y_test = test_data['fraud']

# Convert Isolation Forest labels to binary: -1 (anomaly) → 1 (fraud), 1 (normal) → 0
y_pred = (test_anomalies == -1).astype(int)

print("Anomaly Scores:", test_scores)
print("Anomalies Detected in Test Set:", np.sum(y_pred))
print(classification_report(y_test, y_pred, digits=4))
print("ROC AUC:", roc_auc_score(y_test, test_scores * -1))
print("PR AUC:", average_precision_score(y_test, test_scores * -1))

Anomaly Scores: [0.28438209 0.24164638 0.26492327 ... 0.26033694 0.25320323 0.26353297]
Anomalies Detected in Test Set: 4943
              precision    recall  f1-score   support

         0.0     0.9141    0.9918    0.9514    456299
         1.0     0.2399    0.0271    0.0488     43701

    accuracy                         0.9075    500000
   macro avg     0.5770    0.5095    0.5001    500000
weighted avg     0.8552    0.9075    0.8725    500000

ROC AUC: 0.7535106823437537
PR AUC: 0.17918038168560402


In [20]:
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import precision_recall_curve, auc

In [21]:
validation_data = pd.read_csv(
    os.path.join(
        os.path.dirname(os.getcwd()),
        'data',
        'testing',
        'final_heldout_testing.csv'
    )
)
validation_data.describe()

new_test_data = pd.read_csv(
    os.path.join(
        os.path.dirname(os.getcwd()),
        'data',
        'testing',
        'final_split_testing.csv'
    )
)
new_test_data.describe()

Unnamed: 0,distance_from_home,distance_from_last_transaction,ratio_to_median_purchase_price,repeat_retailer,used_chip,used_pin_number,online_order,fraud
count,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0
mean,0.760834,1.311892,0.50825,-0.118504,0.350292,0.100288,-0.349556,0.0874
std,2.969346,7.293331,1.709334,0.323205,0.477062,0.300384,0.476831,0.282421
min,-0.456953,-0.325963,-0.612626,-1.0,0.0,0.0,-1.0,0.0
25%,-0.279957,-0.229598,-0.323073,0.0,0.0,0.0,-1.0,0.0
50%,-0.000382,-0.000813,-0.000256,0.0,0.0,0.0,0.0,0.0
75%,0.721089,0.766483,0.678434,0.0,1.0,0.0,0.0,0.0
max,403.482162,887.288666,89.584904,0.0,1.0,1.0,0.0,1.0


In [22]:
X_val = validation_data.drop(columns=['fraud'])
y_val = validation_data['fraud']

val_scores = model.decision_function(X_val)

In [23]:
# Flip score sign because IsolationForest: lower = more anomalous
precision, recall, thresholds = precision_recall_curve(y_val, -val_scores)
pr_auc = auc(recall, precision)

# Pick best threshold (e.g., maximize F1, or tune for high recall)
best_idx = (2 * precision * recall / (precision + recall)).argmax()
best_thresh = thresholds[best_idx]

print(f"Best threshold: {best_thresh:.4f}, PR AUC: {pr_auc:.4f}")

Best threshold: -0.2257, PR AUC: 0.1766


In [24]:
X_test = test_data.drop(columns=['fraud'])
y_test = test_data['fraud']

test_scores = model.decision_function(X_test)
y_pred_test = (test_scores < -best_thresh).astype(int)  # flip again

print("Anomaly Scores:", test_scores)
print("Anomalies Detected in Test Set:", np.sum(y_pred_test))
print(classification_report(y_test, y_pred_test, digits=4))

Anomaly Scores: [0.28438209 0.24164638 0.26492327 ... 0.26033694 0.25320323 0.26353297]
Anomalies Detected in Test Set: 211775
              precision    recall  f1-score   support

         0.0     0.9728    0.6145    0.7532    456299
         1.0     0.1693    0.8204    0.2807     43701

    accuracy                         0.6325    500000
   macro avg     0.5710    0.7174    0.5169    500000
weighted avg     0.9025    0.6325    0.7119    500000

