# 🔍 Test New Transactions using Pre-trained Isolation Forest Model

This notebook loads the previously trained Isolation Forest model and scaler (saved as `.pkl` files),
applies the same preprocessing steps to a new dataset, and predicts whether transactions are **Normal** or **Suspicious**.

We also evaluate performance using **Accuracy, Precision, Recall, F1-score, and AUC** (if labels are available in the new dataset).

In [37]:
import pandas as pd
import numpy as np
import joblib
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report

# ----------------------------
# 1. Load model and scaler
# ----------------------------
model = joblib.load("anomaly_detection_model.pkl")
scaler = joblib.load("scaler.pkl")
print("✅ Model and Scaler Loaded Successfully")

✅ Model and Scaler Loaded Successfully


In [38]:
# ----------------------------
# 2. Load new dataset
# ----------------------------
df_new = pd.read_csv("mock_data.csv")
df_new['txn_date'] = pd.to_datetime(df_new['txn_date'])
print("Dataset Shape:", df_new.shape)
df_new.head()

Dataset Shape: (5000, 7)


Unnamed: 0,userId,transactionId,amount,txn_date,txn_latitude,txn_longitude,is_anomaly
0,user_1305,a9a0e79e-4b17-492e-89b9-5e48473c56b2,7193.38,2024-02-11 05:39:00,13.134193,77.078754,False
1,user_2782,fb95f1e9-c74b-4b19-af62-d072c384650a,11729.41,2024-07-17 16:37:00,12.569863,77.494159,False
2,user_2615,0d510510-8445-48f4-852e-bfb6e3601175,12619.87,2024-01-10 01:52:00,12.818951,77.270831,False
3,user_380,6084847e-62c1-4ee5-8416-2b30ca634c54,4611.27,2025-06-13 00:39:00,13.564693,77.184185,False
4,user_2174,90a503d8-cf25-4f0b-b14e-b6d6e690047f,1626.21,2024-09-08 05:46:00,13.479164,77.699319,True


In [39]:
# ----------------------------
# 3. Feature Engineering
# ----------------------------

# Time-based features
df_new['hour_of_day'] = df_new['txn_date'].dt.hour
df_new['day_of_week'] = df_new['txn_date'].dt.dayofweek

# Per-user averages
user_avg_features = df_new.groupby('userId').agg(
    avg_amount=('amount', 'mean'),
    avg_lat=('txn_latitude', 'mean'),
    avg_lon=('txn_longitude', 'mean')
).reset_index()

user_avg_features.columns = ['userId', 'user_avg_amount', 'user_avg_lat', 'user_avg_lon']
df_new = pd.merge(df_new, user_avg_features, on='userId', how='left')

# Amount deviation
df_new['amount_deviation'] = df_new['amount'] / (df_new['user_avg_amount'] + 1e-6)

# Haversine distance function
def haversine_distance(lat1, lon1, lat2, lon2):
    R = 6371
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat/2)**2 + np.cos(lat1)*np.cos(lat2)*np.sin(dlon/2)**2
    return R * 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

df_new['distance_from_home'] = haversine_distance(
    df_new['txn_latitude'], df_new['txn_longitude'],
    df_new['user_avg_lat'], df_new['user_avg_lon']
)

df_new.head()

Unnamed: 0,userId,transactionId,amount,txn_date,txn_latitude,txn_longitude,is_anomaly,hour_of_day,day_of_week,user_avg_amount,user_avg_lat,user_avg_lon,amount_deviation,distance_from_home
0,user_1305,a9a0e79e-4b17-492e-89b9-5e48473c56b2,7193.38,2024-02-11 05:39:00,13.134193,77.078754,False,5,6,6220.946667,12.718506,77.411743,1.156316,58.641831
1,user_2782,fb95f1e9-c74b-4b19-af62-d072c384650a,11729.41,2024-07-17 16:37:00,12.569863,77.494159,False,16,2,11729.41,12.569863,77.494159,1.0,0.0
2,user_2615,0d510510-8445-48f4-852e-bfb6e3601175,12619.87,2024-01-10 01:52:00,12.818951,77.270831,False,1,2,10683.636667,12.496063,77.442277,1.181234,40.435643
3,user_380,6084847e-62c1-4ee5-8416-2b30ca634c54,4611.27,2025-06-13 00:39:00,13.564693,77.184185,False,0,4,7278.865,13.435776,77.234687,0.633515,15.339661
4,user_2174,90a503d8-cf25-4f0b-b14e-b6d6e690047f,1626.21,2024-09-08 05:46:00,13.479164,77.699319,True,5,6,8154.473333,13.208312,77.647849,0.199426,30.627869


In [40]:
# ----------------------------
# 4. Select Features
# ----------------------------
features = ['amount', 'hour_of_day', 'day_of_week',
            'amount_deviation', 'distance_from_home']
X_new = df_new[features]
print("Feature Matrix Shape:", X_new.shape)

Feature Matrix Shape: (5000, 5)


In [41]:
# ----------------------------
# 5. Scale + Predict
# ----------------------------
X_new_scaled = scaler.transform(X_new)
df_new['prediction'] = model.predict(X_new_scaled)

# Map -1 = anomaly, 1 = normal
df_new['prediction_label'] = df_new['prediction'].map({-1: "Suspicious", 1: "Normal"})

df_new[['userId', 'amount', 'prediction_label']].head()

Unnamed: 0,userId,amount,prediction_label
0,user_1305,7193.38,
1,user_2782,11729.41,
2,user_2615,12619.87,
3,user_380,4611.27,
4,user_2174,1626.21,


In [42]:
# ----------------------------
# 6. Evaluate (with labels if present)
# ----------------------------
if "is_anomaly" in df_new.columns:
    # Map ground-truth to match model output: 1 = Normal, -1 = Suspicious
    y_true = df_new["is_anomaly"].map({False: 1, True: -1}).astype(int)
    y_pred = df_new["prediction"]

    # Compute metrics
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, pos_label=-1)
    rec = recall_score(y_true, y_pred, pos_label=-1)
    f1 = f1_score(y_true, y_pred, pos_label=-1)

    try:
        auc = roc_auc_score((y_true == -1).astype(int), (y_pred == -1).astype(int))
    except Exception as e:
        auc = None

    # Print results
    print("\n📊 Evaluation Metrics:")
    print(f"Accuracy : {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall   : {rec:.4f}")
    print(f"F1-Score : {f1:.4f}")
    if auc is not None:
        print(f"ROC-AUC  : {auc:.4f}")

    print("\nDetailed Report:\n", classification_report(y_true, y_pred))

else:
    # Fallback unsupervised stats
    print("⚠️ No ground-truth labels found in dataset. Skipping evaluation.")
    anomaly_rate = (df_new["prediction"] == -1).mean()
    print(f"🔍 Detected Anomaly Rate: {anomaly_rate*100:.2f}%")


ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

In [44]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report

# Compute metrics
acc = accuracy_score(y_true, y_pred)
prec = precision_score(y_true, y_pred, average='weighted')
rec = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')

try:
    auc = roc_auc_score((y_true == -1).astype(int), (y_pred == -1).astype(int))
except Exception:
    auc = None

print("\n📊 Evaluation Metrics:")
print(f"Accuracy  : {acc:.4f}")
print(f"Precision : {prec:.4f}")
print(f"Recall    : {rec:.4f}")
print(f"F1-Score  : {f1:.4f}")
if auc is not None:
    print(f"ROC-AUC   : {auc:.4f}")

print("\nDetailed Report:\n", classification_report(y_true, y_pred))



📊 Evaluation Metrics:
Accuracy  : 0.0016
Precision : 0.9468
Recall    : 0.0016
F1-Score  : 0.0032
ROC-AUC   : 0.5000

Detailed Report:
               precision    recall  f1-score   support

          -1       0.00      0.00      0.00       266
           0       0.00      0.00      0.00         0
           1       1.00      0.00      0.00      4734

    accuracy                           0.00      5000
   macro avg       0.33      0.00      0.00      5000
weighted avg       0.95      0.00      0.00      5000



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [25]:
# ----------------------------
# 7. Save Results
# ----------------------------
df_new.to_csv("new_transactions_with_predictions.csv", index=False)
print("✅ Predictions saved to new_transactions_with_predictions.csv")

✅ Predictions saved to new_transactions_with_predictions.csv
