In [2]:
!pip install google-cloud-bigquery --quiet
from google.colab import auth
auth.authenticate_user()


In [4]:
from google.colab import auth
auth.authenticate_user()

PROJECT_ID = "ai-risk-monitor-477122"   # <-- replace this
REGION = "US"

from google.cloud import bigquery
client = bigquery.Client(project=PROJECT_ID, location=REGION)


In [7]:
query = """
SELECT *
FROM `ai-risk-monitor-477122.ai_risk_monitor.transactions`
"""
df = client.query(query).to_dataframe()
df.to_csv("dataset.csv", index=False)

print("✅ Export completed. Rows:", len(df))


✅ Export completed. Rows: 120000


In [9]:
FEATURES = [
    "amount",
    "device_trust_score",
    "is_new_recipient",
    "daily_txn_count_user",
    "daily_txn_amount_user"
]


In [10]:
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
import joblib

X = df[FEATURES]
y_true = df["label_is_anomaly"]

model = IsolationForest(
    n_estimators=200,
    contamination=y_true.mean(),  # use true anomaly rate
    random_state=42
)
model.fit(X)

joblib.dump(model, "model_v1.pkl")


['model_v1.pkl']

In [11]:
from sklearn.metrics import roc_auc_score, precision_score

scores = model.decision_function(X) * -1  # convert: higher = more anomalous
df["score"] = scores

roc = roc_auc_score(y_true, scores)
print("ROC-AUC:", roc)


ROC-AUC: 0.8848185299559455


In [12]:
def precision_at_k(y_true, scores, k=0.05):
    cutoff = int(len(scores) * k)
    top_k = df.sort_values("score", ascending=False).head(cutoff)
    return top_k.label_is_anomaly.mean()

print("Precision @ 5%:", precision_at_k(y_true, scores, k=0.05))
print("Precision @ 1%:", precision_at_k(y_true, scores, k=0.01))


Precision @ 5%: 0.281
Precision @ 1%: 0.5683333333333334


In [13]:
import numpy as np

threshold = np.percentile(scores, 95)  # top 5% flagged
y_pred = (scores >= threshold).astype(int)

false_pos_rate = ((y_pred == 1) & (y_true == 0)).mean()
false_pos_rate


np.float64(0.03595)

In [16]:
from sklearn.inspection import permutation_importance
from sklearn.metrics import roc_auc_score, make_scorer

def anomaly_scorer(model, X, y):
    scores = model.decision_function(X) * -1   # invert (higher = more anomaly)
    return roc_auc_score(y, scores)

scoring_fn = make_scorer(anomaly_scorer, needs_proba=False)

result = permutation_importance(
    model,
    X,
    y_true,
    scoring=scoring_fn,
    n_repeats=5,
    random_state=42
)

importances = pd.Series(result.importances_mean, index=FEATURES)
importances.sort_values().plot.barh()


TypeError: anomaly_scorer() got an unexpected keyword argument 'needs_proba'