In [7]:
import pandas as pd
import joblib
import os
import json

# ✅ Correct paths (adjust if needed depending on where you're running)
DATA_PATH = "dataset.csv"              # or "src/data/dataset.csv"
MODEL_PATH = "model_v1.pkl"            # or "src/models/model_v1.pkl"

# ✅ Load dataset and model
df = pd.read_csv(DATA_PATH)
model = joblib.load(MODEL_PATH)

print("✅ Loaded dataset:", df.shape)

# ✅ Select model features
FEATURES = [
    "amount",
    "device_trust_score",
    "is_new_recipient",
    "daily_txn_count_user",
    "daily_txn_amount_user"
]

# ✅ Compute anomaly score
X = df[FEATURES]
df["anomaly_score"] = model.decision_function(X) * -1   # invert to make high = risky

# ✅ Extract Top 20 highest-risk transactions
TOP_K = 20
top_risky = df.sort_values("anomaly_score", ascending=False).head(TOP_K)
top_risky


✅ Loaded dataset: (60000, 18)


Unnamed: 0,txn_id,user_id,timestamp,amount,currency,direction,recipient_id,recipient_type,merchant_category,origin_geo,dest_geo,channel,device_trust_score,is_new_recipient,daily_txn_count_user,daily_txn_amount_user,label_is_anomaly,anomaly_type,anomaly_score
48714,283111fa-bb43-4b74-92c5-ae51aba0b9ef,user_000473,2025-10-26T05:43:13.689455Z,210.66,USD,send,r_3565,user,gift,US,US,mobile,1.0,1,14,2949.24,1,velocity,0.179969
8363,3500e0f4-54d8-4b24-ad83-2a809656cf6c,user_002363,2025-10-25T22:46:38.741976Z,314.89,USD,send,r_4460,merchant,streaming,US,US,mobile,0.689,0,18,5668.02,1,velocity,0.174335
20374,bd2518f0-7cb5-430d-bff6-d6a22374a3e0,user_000061,2025-11-02T13:09:22.297146Z,87.04,USD,send,r_3499,user,travel,US,IN,api,0.627,1,18,1566.72,1,velocity,0.167667
48374,deb6fbe2-6d38-4463-b999-e6aa5e96df7f,user_002875,2025-10-21T02:45:07.153398Z,6888.54,USD,send,r_3966,user,utilities,US,US,api,0.623,0,3,1077.87,1,high_amount,0.163968
38893,730d5a52-539f-45bd-9b32-d034561d0903,user_004423,2025-10-26T00:24:09.273578Z,91.98,USD,receive,r_1515,user,streaming,IN,US,mobile,0.701,1,19,1747.62,1,velocity,0.163605
30079,924784b0-2254-47fb-8e2c-6bb54e2ce878,user_000165,2025-10-25T15:25:35.290587Z,95.84,USD,send,r_483,merchant,crypto,US,GB,mobile,0.834,1,19,1820.96,1,velocity,0.160158
31425,51ea6a5a-8acd-41c7-a413-53e3c9e38901,user_004285,2025-10-28T13:55:45.599265Z,194.99,USD,receive,r_3793,user,groceries,US,US,mobile,0.528,0,17,3314.83,1,velocity,0.159733
58775,3ba3cded-ff4d-4954-8a12-0f0d27aea033,user_000485,2025-10-24T13:04:07.543642Z,92.31,USD,receive,r_2551,merchant,utilities,US,US,web,0.633,1,15,1384.65,1,velocity,0.158813
482,2022ff86-522c-4b68-b397-e86000c2f8ea,user_000738,2025-10-23T00:28:41.443265Z,86.38,USD,send,r_2183,merchant,gift,US,US,mobile,0.778,1,18,1554.84,1,velocity,0.157822
54470,16ea730c-aea5-470c-806b-efbad41dfe6f,user_002683,2025-11-01T00:35:39.795975Z,59.75,USD,send,r_779,user,streaming,NG,GB,mobile,1.0,1,18,1075.5,1,velocity,0.155908


In [8]:
# ✅ Build list of JSON-ready dicts for LLM use
records = top_risky[[
    "amount",
    "device_trust_score",
    "is_new_recipient",
    "daily_txn_count_user",
    "daily_txn_amount_user",
    "merchant_category",
    "origin_geo",
    "dest_geo",
    "anomaly_score"
]].to_dict(orient='records')

records[:2]   # preview first 2 records


[{'amount': 210.66,
  'device_trust_score': 1.0,
  'is_new_recipient': 1,
  'daily_txn_count_user': 14,
  'daily_txn_amount_user': 2949.24,
  'merchant_category': 'gift',
  'origin_geo': 'US',
  'dest_geo': 'US',
  'anomaly_score': 0.17996866983141524},
 {'amount': 314.89,
  'device_trust_score': 0.689,
  'is_new_recipient': 0,
  'daily_txn_count_user': 18,
  'daily_txn_amount_user': 5668.02,
  'merchant_category': 'streaming',
  'origin_geo': 'US',
  'dest_geo': 'US',
  'anomaly_score': 0.1743352720469643}]

In [9]:
os.makedirs("examples", exist_ok=True)

with open("examples/top_risky_transactions.json", "w") as f:
    json.dump(records, f, indent=2)

print("✅ Exported → examples/top_risky_transactions.json")


✅ Exported → examples/top_risky_transactions.json
