# Weapons Detection Content Moderation â€” Analysis

Compare three approaches: Baseline BERT, Calibrated (Platt), and Theory-Constrained (Isotonic + 3-tier + age-dependent thresholds). Includes calibration metrics (ECE, MCE, Brier), cost analysis, and drift detection.

In [None]:
import sys
from pathlib import Path
sys.path.insert(0, str(Path().resolve()))
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score

from data.synthetic_ads import generate_synthetic_ads
from data.cost_matrices import get_cost_matrix, total_cost
from approaches.baseline_model import BaselineWeaponsClassifier
from approaches.calibrated_model import CalibratedWeaponsClassifier
from approaches.theory_constrained_model import TheoryConstrainedWeaponsClassifier
from evaluation.calibration_metrics import ece, mce, brier_score, reliability_diagram_data
from evaluation.cost_analysis import human_review_queue_size
from evaluation.drift_detection import kl_divergence_bins, detect_drift
import config

## 1. Load or generate data

In [None]:
if config.SYNTHETIC_ADS_PATH.exists():
    df = pd.read_csv(config.SYNTHETIC_ADS_PATH)
else:
    df = generate_synthetic_ads()
texts = (df["title"] + " " + df["description"] + " " + df["keywords"].fillna(")).tolist()
y = df["label"].values
X_train, X_val, y_train, y_val = train_test_split(
    texts, y, test_size=0.2, random_state=config.RANDOM_STATE, stratify=y
)
print("Train:", len(X_train), "Val:", len(X_val), "Positive rate:", y.mean())

## 2. Train / fit three approaches

In [None]:
base = BaselineWeaponsClassifier()
cal = CalibratedWeaponsClassifier(base_classifier=base)
theory = TheoryConstrainedWeaponsClassifier(base_classifier=base)
cal.fit_calibration(X_val, y_val)
theory.fit_calibration(X_val, y_val)
print("Models ready.")

## 3. Predictions and metrics

In [None]:
p1 = base.predict_proba(X_val).ravel()
p2 = cal.predict_proba(X_val).ravel()
p3 = theory.predict_proba(X_val).ravel()

pred1 = (p1 > 0.5).astype(int)
pred2 = (p2 > config.THRESHOLD_CALIBRATED).astype(int)
pred3 = (p3 >= config.THRESHOLD_HUMAN_REVIEW_LOW).astype(int)

def safe(fn, y, p):
    return round(fn(y, p, zero_division=0), 4)

rows = [
    {"Approach": "Baseline", "ECE": ece(y_val, p1), "Precision": safe(precision_score, y_val, pred1), "Recall": safe(recall_score, y_val, pred1), "F1": safe(f1_score, y_val, pred1), "Brier": brier_score(y_val, p1)},
    {"Approach": "Calibrated", "ECE": ece(y_val, p2), "Precision": safe(precision_score, y_val, pred2), "Recall": safe(recall_score, y_val, pred2), "F1": safe(f1_score, y_val, pred2), "Brier": brier_score(y_val, p2)},
    {"Approach": "Theory-Constrained", "ECE": ece(y_val, p3), "Precision": safe(precision_score, y_val, pred3), "Recall": safe(recall_score, y_val, pred3), "F1": safe(f1_score, y_val, pred3), "Brier": brier_score(y_val, p3)},
]
pd.DataFrame(rows)

## 4. Calibration curves (reliability diagrams)

In [None]:
from visualization.calibration_plots import plot_reliability_diagram
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
for ax, (probs, name) in zip(axes, [(p1, "Baseline"), (p2, "Calibrated"), (p3, "Theory-Constrained")]):
    plot_reliability_diagram(y_val, probs, n_bins=10, title=name, ax=ax, label=name)
plt.tight_layout()
plt.show()

## 5. Cost matrix and total cost

In [None]:
print("Cost matrix (general):")
print(get_cost_matrix("general"))
c1 = total_cost(y_val, pred1, "general")
c2 = total_cost(y_val, pred2, "general")
c3 = total_cost(y_val, pred3, "general")
print("Total cost (validation): Baseline", c1, "Calibrated", c2, "Theory", c3)
print("Human review queue size (Theory):", human_review_queue_size(p3, 0.40, 0.90))  # p3 = scores

## 6. Drift detection (KL divergence)

In [None]:
p_train = theory.predict_proba(X_train).ravel()
is_drift, kl = detect_drift(p_train, p3, config.DRIFT_KL_THRESHOLD)
print(f"KL(prod || train) = {kl:.4f}", "-> Drift" if is_drift else "-> No drift")