# 05_pipeline_analysis.ipynb

## 概要
02_main.py の出力を分析するノートブック。
Stage1 (XGBoost) → Stage2 (LR Gate) → Stage3 (AI Agent) の各段階の性能を評価する。

## 分析内容
1. **Stage1 分析**: XGBoostモデルの性能、閾値分析
2. **Stage2 分析**: LR Gateの性能、Handoff選択分析
3. **Stage3 分析**: AI Agentのゲート発火分析（POST_LLM_FLIP_GATE, P1-P3, B1-B4）
4. **FP/FN パターン分析**: 各段階での誤分類パターン
5. **特徴量分析**: 識別力のある特徴量の特定

---
## Cell 0: Setup and Configuration

In [None]:
import os
import json
import pickle
from pathlib import Path
from datetime import datetime

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    confusion_matrix, classification_report, 
    precision_recall_curve, roc_curve, auc,
    f1_score, precision_score, recall_score
)

# Japanese font support
plt.rcParams['font.family'] = ['DejaVu Sans', 'Hiragino Sans', 'Yu Gothic', 'Meiryo', 'sans-serif']
plt.rcParams['axes.unicode_minus'] = False

# Display settings
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 100)
pd.set_option('display.width', 200)

print("Libraries loaded successfully")

In [None]:
# RUN_ID configuration
# Change this to analyze different pipeline runs
RUN_ID = os.environ.get('RUN_ID', '2026-01-17_132657')

ARTIFACTS_DIR = Path(f'artifacts/{RUN_ID}')
RESULTS_DIR = ARTIFACTS_DIR / 'results'
HANDOFF_DIR = ARTIFACTS_DIR / 'handoff'
MODELS_DIR = ARTIFACTS_DIR / 'models'

print(f"RUN_ID: {RUN_ID}")
print(f"ARTIFACTS_DIR: {ARTIFACTS_DIR}")
print(f"Directory exists: {ARTIFACTS_DIR.exists()}")

---
## Cell 1: Load Data

In [None]:
# Load Stage1 decisions
stage1_df = pd.read_csv(RESULTS_DIR / 'stage1_decisions_latest.csv')
print(f"Stage1 decisions: {len(stage1_df)} rows")
print(f"Columns: {list(stage1_df.columns)}")

# Load Stage2 decisions
stage2_df = pd.read_csv(RESULTS_DIR / 'stage2_decisions_latest.csv')
print(f"\nStage2 decisions: {len(stage2_df)} rows")
print(f"Columns: {list(stage2_df.columns)}")

In [None]:
# Load evaluation data (if available)
eval_files = list((RESULTS_DIR / 'stage2_validation').glob('eval_df__*.csv'))
if eval_files:
    eval_df = pd.read_csv(eval_files[0])
    print(f"Evaluation data: {len(eval_df)} rows")
    print(f"Columns: {list(eval_df.columns)}")
else:
    eval_df = None
    print("No evaluation data found")

# Load all_test_merged (full test set with agent predictions)
merged_files = list((RESULTS_DIR / 'stage2_validation').glob('all_test_merged__*.csv'))
if merged_files:
    all_test_df = pd.read_csv(merged_files[0])
    print(f"\nAll test merged: {len(all_test_df)} rows")
    print(f"Columns: {list(all_test_df.columns)}")
else:
    all_test_df = None
    print("No all_test_merged data found")

In [None]:
# Load handoff data with features
handoff_pkl = HANDOFF_DIR / '04-1_config_and_data_preparation.pkl'
if handoff_pkl.exists():
    with open(handoff_pkl, 'rb') as f:
        handoff_data = pickle.load(f)
    
    fn_features_df = handoff_data.get('fn_features_df')
    cert_full_info_map = handoff_data.get('cert_full_info_map', {})
    brand_keywords = handoff_data.get('brand_keywords', [])
    DANGEROUS_TLDS = set(handoff_data.get('DANGEROUS_TLDS', []))
    LEGITIMATE_TLDS = set(handoff_data.get('LEGITIMATE_TLDS', []))
    
    print(f"Handoff features: {len(fn_features_df)} rows")
    print(f"Cert info map: {len(cert_full_info_map)} entries")
    print(f"Brand keywords: {len(brand_keywords)} keywords")
    print(f"Dangerous TLDs: {len(DANGEROUS_TLDS)}")
else:
    print("Handoff data not found")
    fn_features_df = None

In [None]:
# Load route1 thresholds
with open(RESULTS_DIR / 'route1_thresholds.json', 'r') as f:
    route1_thresholds = json.load(f)

print("Route1 Thresholds:")
print(f"  t_low: {route1_thresholds['t_low']}")
print(f"  t_high: {route1_thresholds['t_high']}")
print(f"  coverage: {route1_thresholds['coverage']:.2%}")

---
## Cell 2: Stage1 Analysis (XGBoost)

In [None]:
# Stage1 decision distribution
print("=" * 60)
print("Stage1 Decision Distribution")
print("=" * 60)
print(stage1_df['stage1_decision'].value_counts())

# Create stage1_pred from decision
stage1_df['stage1_pred'] = stage1_df['stage1_decision'].map({
    'phishing': 1,
    'benign': 0,
    'handoff_to_agent': -1  # defer
})

In [None]:
# Stage1 performance (excluding handoff)
stage1_decided = stage1_df[stage1_df['stage1_pred'] >= 0].copy()

if len(stage1_decided) > 0:
    y_true_s1 = stage1_decided['y_true']
    y_pred_s1 = stage1_decided['stage1_pred']
    
    print("\n" + "=" * 60)
    print("Stage1 Performance (Auto-decided samples only)")
    print("=" * 60)
    print(f"Total auto-decided: {len(stage1_decided)}")
    print(f"\nClassification Report:")
    print(classification_report(y_true_s1, y_pred_s1, target_names=['Benign', 'Phishing']))
    
    # Confusion matrix
    cm = confusion_matrix(y_true_s1, y_pred_s1)
    fig, ax = plt.subplots(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax,
                xticklabels=['Benign', 'Phishing'],
                yticklabels=['Benign', 'Phishing'])
    ax.set_xlabel('Predicted')
    ax.set_ylabel('Actual')
    ax.set_title('Stage1 Confusion Matrix (Auto-decided)')
    plt.tight_layout()
    plt.show()

In [None]:
# ML probability distribution by class
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Distribution
ax = axes[0]
for label, name in [(0, 'Benign'), (1, 'Phishing')]:
    subset = stage1_df[stage1_df['y_true'] == label]['ml_probability']
    ax.hist(subset, bins=50, alpha=0.6, label=f'{name} (n={len(subset)})')
ax.axvline(route1_thresholds['t_low'], color='green', linestyle='--', label=f"t_low={route1_thresholds['t_low']:.4f}")
ax.axvline(route1_thresholds['t_high'], color='red', linestyle='--', label=f"t_high={route1_thresholds['t_high']:.4f}")
ax.set_xlabel('ML Probability')
ax.set_ylabel('Count')
ax.set_title('ML Probability Distribution by Class')
ax.legend()

# Log scale
ax = axes[1]
for label, name in [(0, 'Benign'), (1, 'Phishing')]:
    subset = stage1_df[stage1_df['y_true'] == label]['ml_probability']
    ax.hist(subset, bins=50, alpha=0.6, label=f'{name}')
ax.axvline(route1_thresholds['t_low'], color='green', linestyle='--', label=f"t_low")
ax.axvline(route1_thresholds['t_high'], color='red', linestyle='--', label=f"t_high")
ax.set_xlabel('ML Probability')
ax.set_ylabel('Count (log scale)')
ax.set_yscale('log')
ax.set_title('ML Probability Distribution (Log Scale)')
ax.legend()

plt.tight_layout()
plt.show()

In [None]:
# Threshold sweep analysis
thresholds = np.arange(0.0, 1.01, 0.05)
results = []

for th in thresholds:
    y_pred = (stage1_df['ml_probability'] >= th).astype(int)
    y_true = stage1_df['y_true']
    
    tp = ((y_pred == 1) & (y_true == 1)).sum()
    fp = ((y_pred == 1) & (y_true == 0)).sum()
    tn = ((y_pred == 0) & (y_true == 0)).sum()
    fn = ((y_pred == 0) & (y_true == 1)).sum()
    
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
    
    results.append({
        'threshold': th,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'tp': tp, 'fp': fp, 'tn': tn, 'fn': fn
    })

sweep_df = pd.DataFrame(results)

# Plot
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(sweep_df['threshold'], sweep_df['precision'], label='Precision', marker='o')
ax.plot(sweep_df['threshold'], sweep_df['recall'], label='Recall', marker='s')
ax.plot(sweep_df['threshold'], sweep_df['f1'], label='F1', marker='^')
ax.axvline(0.5, color='gray', linestyle='--', alpha=0.5, label='Default (0.5)')
ax.set_xlabel('Threshold')
ax.set_ylabel('Score')
ax.set_title('Threshold Sweep Analysis')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Best F1
best_idx = sweep_df['f1'].idxmax()
print(f"\nBest F1: {sweep_df.loc[best_idx, 'f1']:.4f} at threshold {sweep_df.loc[best_idx, 'threshold']:.2f}")

---
## Cell 3: Stage2 Analysis (LR Gate / Handoff)

In [None]:
# Stage2 handoff analysis
print("=" * 60)
print("Stage2 Handoff Analysis")
print("=" * 60)

# Handoff region = samples that Stage1 didn't auto-decide
handoff_region = stage1_df[stage1_df['stage1_decision'] == 'handoff_to_agent']
print(f"\nHandoff region size: {len(handoff_region)}")
print(f"  Phishing (y_true=1): {(handoff_region['y_true']==1).sum()}")
print(f"  Benign (y_true=0): {(handoff_region['y_true']==0).sum()}")

# ML probability in handoff region
print(f"\nML probability in handoff region:")
print(handoff_region['ml_probability'].describe())

In [None]:
# Handoff region ML distribution
fig, ax = plt.subplots(figsize=(10, 5))

for label, name, color in [(0, 'Benign (FP Risk)', 'blue'), (1, 'Phishing (FN)', 'red')]:
    subset = handoff_region[handoff_region['y_true'] == label]['ml_probability']
    ax.hist(subset, bins=50, alpha=0.6, label=f'{name} (n={len(subset)})', color=color)

ax.set_xlabel('ML Probability')
ax.set_ylabel('Count')
ax.set_title('ML Probability Distribution in Handoff Region')
ax.legend()
plt.tight_layout()
plt.show()

In [None]:
# Certificate features in handoff region (if available)
if fn_features_df is not None:
    print("\n" + "=" * 60)
    print("Certificate Features in Handoff Region")
    print("=" * 60)
    
    # Compare FN vs FP
    fn_group = fn_features_df[fn_features_df['y_true'] == 1]
    fp_group = fn_features_df[fn_features_df['y_true'] == 0]
    
    print(f"\nFN (Phishing in handoff): {len(fn_group)}")
    print(f"FP Risk (Benign in handoff): {len(fp_group)}")
    
    # Feature comparison
    features_to_compare = ['ml_probability', 'cert_validity_days', 'cert_age_days', 
                           'san_count', 'is_free_ca', 'has_organization']
    
    comparison = []
    for feat in features_to_compare:
        if feat in fn_features_df.columns:
            fn_mean = fn_group[feat].mean()
            fp_mean = fp_group[feat].mean()
            comparison.append({
                'Feature': feat,
                'FN Mean': fn_mean,
                'FP Mean': fp_mean,
                'Diff': fn_mean - fp_mean
            })
    
    comparison_df = pd.DataFrame(comparison)
    print("\nFeature Comparison (FN vs FP in Handoff):")
    print(comparison_df.to_string(index=False))

---
## Cell 4: Stage3 Analysis (AI Agent)

In [None]:
# Stage3 AI Agent performance
if eval_df is not None:
    print("=" * 60)
    print("Stage3 AI Agent Performance")
    print("=" * 60)
    
    print(f"\nEvaluation samples: {len(eval_df)}")
    print(f"  Phishing: {(eval_df['y_true']==1).sum()}")
    print(f"  Benign: {(eval_df['y_true']==0).sum()}")
    
    # Agent predictions
    y_true = eval_df['y_true']
    y_agent = eval_df['agent_pred'].astype(int)
    
    print("\nAI Agent Classification Report:")
    print(classification_report(y_true, y_agent, target_names=['Benign', 'Phishing']))
    
    # Confusion matrix
    cm = confusion_matrix(y_true, y_agent)
    fig, ax = plt.subplots(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Oranges', ax=ax,
                xticklabels=['Benign', 'Phishing'],
                yticklabels=['Benign', 'Phishing'])
    ax.set_xlabel('Predicted')
    ax.set_ylabel('Actual')
    ax.set_title('AI Agent Confusion Matrix')
    plt.tight_layout()
    plt.show()

In [None]:
# Stage1 vs AI Agent comparison
if eval_df is not None:
    print("\n" + "=" * 60)
    print("Stage1 vs AI Agent Comparison")
    print("=" * 60)
    
    y_true = eval_df['y_true']
    y_stage1 = eval_df['stage1_pred'].astype(int)
    y_agent = eval_df['agent_pred'].astype(int)
    
    # Metrics
    metrics = []
    for name, y_pred in [('Stage1', y_stage1), ('AI Agent', y_agent)]:
        tp = ((y_pred == 1) & (y_true == 1)).sum()
        fp = ((y_pred == 1) & (y_true == 0)).sum()
        tn = ((y_pred == 0) & (y_true == 0)).sum()
        fn = ((y_pred == 0) & (y_true == 1)).sum()
        
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
        
        metrics.append({
            'Model': name,
            'TP': tp, 'FP': fp, 'TN': tn, 'FN': fn,
            'Precision': f'{precision:.3f}',
            'Recall': f'{recall:.3f}',
            'F1': f'{f1:.3f}'
        })
    
    metrics_df = pd.DataFrame(metrics)
    print(metrics_df.to_string(index=False))

---
## Cell 5: FP/FN Pattern Analysis

In [None]:
# TLD classification
HIGH_DANGER_TLDS = frozenset([
    'tk', 'ml', 'ga', 'cf', 'gq',  # Free TLDs
    'icu', 'cfd', 'sbs', 'rest', 'cyou',  # Phishing-specific
    'pw', 'buzz', 'lat',  # High phishing rate
])

MEDIUM_DANGER_TLDS = frozenset([
    'top', 'shop', 'xyz', 'cc', 'online', 'site', 'website',
    'club', 'vip', 'asia', 'one', 'link', 'click', 'live',
    'cn', 'tokyo', 'dev', 'me', 'pe', 'ar', 'cl', 'mw', 'ci',
])

def extract_tld(domain):
    """Extract TLD from domain"""
    import re
    match = re.search(r'\.([^.]+)$', str(domain))
    return match.group(1) if match else ''

def classify_tld(tld):
    """Classify TLD danger level"""
    tld = tld.lower()
    if tld in HIGH_DANGER_TLDS:
        return 'high_danger'
    elif tld in MEDIUM_DANGER_TLDS:
        return 'medium_danger'
    else:
        return 'non_danger'

In [None]:
# FP Analysis
if eval_df is not None:
    print("=" * 60)
    print("False Positive (FP) Analysis")
    print("=" * 60)
    
    fp_cases = eval_df[(eval_df['agent_pred'] == True) & (eval_df['y_true'] == 0)].copy()
    print(f"\nTotal FP: {len(fp_cases)}")
    
    # TLD analysis
    fp_cases['tld'] = fp_cases['domain'].apply(extract_tld)
    fp_cases['tld_class'] = fp_cases['tld'].apply(classify_tld)
    
    print("\nFP by TLD class:")
    print(fp_cases['tld_class'].value_counts())
    
    # ML distribution
    print("\nFP ML distribution:")
    ml_bins = [0, 0.15, 0.20, 0.30, 0.50, 1.0]
    fp_cases['ml_bin'] = pd.cut(fp_cases['ml_probability'], bins=ml_bins)
    print(fp_cases['ml_bin'].value_counts().sort_index())
    
    # Certificate analysis
    print("\nFP Certificate analysis:")
    short_cert = fp_cases['cert_validity_days'] <= 90
    print(f"  Short cert (<=90 days): {short_cert.sum()} ({short_cert.mean()*100:.1f}%)")
    print(f"  Long cert (>90 days): {(~short_cert).sum()} ({(~short_cert).mean()*100:.1f}%)")

In [None]:
# FP samples that should have been blocked by POST_LLM_FLIP_GATE
if eval_df is not None:
    print("\n" + "=" * 60)
    print("POST_LLM_FLIP_GATE Analysis")
    print("=" * 60)
    
    # Non-danger TLD + ML < 0.30 should be blocked
    should_be_blocked = (
        (fp_cases['tld_class'] == 'non_danger') & 
        (fp_cases['ml_probability'] < 0.30)
    )
    
    print(f"\nFP that should have been blocked (non-danger TLD + ML<0.30): {should_be_blocked.sum()}")
    print(f"  → These were likely overridden by P1 gate (low_signal_phishing_gate)")
    
    # With short certificate
    blocked_with_short_cert = should_be_blocked & short_cert
    print(f"\n  + Short cert (<=90 days): {blocked_with_short_cert.sum()}")
    print(f"  → P1 gate condition: brand_detected + short_cert + ML<0.30")

In [None]:
# FN Analysis
if eval_df is not None:
    print("\n" + "=" * 60)
    print("False Negative (FN) Analysis")
    print("=" * 60)
    
    fn_cases = eval_df[(eval_df['agent_pred'] == False) & (eval_df['y_true'] == 1)].copy()
    print(f"\nTotal FN: {len(fn_cases)}")
    
    # TLD analysis
    fn_cases['tld'] = fn_cases['domain'].apply(extract_tld)
    fn_cases['tld_class'] = fn_cases['tld'].apply(classify_tld)
    
    print("\nFN by TLD class:")
    print(fn_cases['tld_class'].value_counts())
    
    # ML distribution
    print("\nFN ML distribution:")
    fn_cases['ml_bin'] = pd.cut(fn_cases['ml_probability'], bins=ml_bins)
    print(fn_cases['ml_bin'].value_counts().sort_index())

In [None]:
# FP/FN sample display
if eval_df is not None:
    print("\n" + "=" * 60)
    print("Sample FP Cases (non-danger TLD, ML < 0.30)")
    print("=" * 60)
    
    fp_low_ml = fp_cases[
        (fp_cases['tld_class'] == 'non_danger') & 
        (fp_cases['ml_probability'] < 0.30)
    ][['domain', 'ml_probability', 'cert_validity_days', 'cert_san_count']].head(15)
    
    print(fp_low_ml.to_string(index=False))

---
## Cell 6: Gate Simulation

In [None]:
# Simulate different gate configurations
if eval_df is not None:
    print("=" * 60)
    print("Gate Configuration Simulation")
    print("=" * 60)
    
    # Add TLD info
    sim_df = eval_df.copy()
    sim_df['tld'] = sim_df['domain'].apply(extract_tld)
    sim_df['tld_class'] = sim_df['tld'].apply(classify_tld)
    sim_df['short_cert'] = sim_df['cert_validity_days'] <= 90
    
    y_true = sim_df['y_true']
    
    def calc_metrics(y_pred):
        tp = ((y_pred == 1) & (y_true == 1)).sum()
        fp = ((y_pred == 1) & (y_true == 0)).sum()
        tn = ((y_pred == 0) & (y_true == 0)).sum()
        fn = ((y_pred == 0) & (y_true == 1)).sum()
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
        return {'TP': tp, 'FP': fp, 'TN': tn, 'FN': fn, 'Precision': precision, 'Recall': recall, 'F1': f1}
    
    # Current (baseline)
    current = calc_metrics(sim_df['agent_pred'].astype(int))
    current['Config'] = 'Current (Agent)'
    
    # Stage1 only
    stage1_only = calc_metrics(sim_df['stage1_pred'].astype(int))
    stage1_only['Config'] = 'Stage1 Only'
    
    # Scenario: Disable P1 gate for non-danger TLD
    # = Block LLM PHISH when: non-danger TLD + ML < 0.30
    y_sim1 = sim_df['agent_pred'].astype(int).copy()
    block_mask = (
        (sim_df['agent_pred'] == True) & 
        (sim_df['tld_class'] == 'non_danger') & 
        (sim_df['ml_probability'] < 0.30)
    )
    y_sim1[block_mask] = 0
    scenario1 = calc_metrics(y_sim1)
    scenario1['Config'] = 'P1 disabled (non-danger TLD)'
    
    # Scenario: More aggressive gate (ML < 0.35)
    y_sim2 = sim_df['agent_pred'].astype(int).copy()
    block_mask2 = (
        (sim_df['agent_pred'] == True) & 
        (sim_df['tld_class'] == 'non_danger') & 
        (sim_df['ml_probability'] < 0.35)
    )
    y_sim2[block_mask2] = 0
    scenario2 = calc_metrics(y_sim2)
    scenario2['Config'] = 'Gate ML < 0.35 (non-danger)'
    
    # Summary
    results = pd.DataFrame([stage1_only, current, scenario1, scenario2])
    results = results[['Config', 'TP', 'FP', 'TN', 'FN', 'Precision', 'Recall', 'F1']]
    results['Precision'] = results['Precision'].apply(lambda x: f'{x:.3f}')
    results['Recall'] = results['Recall'].apply(lambda x: f'{x:.3f}')
    results['F1'] = results['F1'].apply(lambda x: f'{x:.3f}')
    
    print("\nGate Configuration Comparison:")
    print(results.to_string(index=False))

In [None]:
# Sample Extra TP domains that would be LOST by Gate B (non_danger + ML < 0.35)
if eval_df is not None and len(extra_tp) > 0:
    print("=" * 80)
    print("Extra TP at Risk: Phishing that would be MISSED by Gate B (non_danger + ML < 0.35)")
    print("=" * 80)
    
    gate_b_mask = (extra_tp['tld_class'] == 'non_danger') & (extra_tp['ml_probability'] < 0.35)
    at_risk = extra_tp[gate_b_mask]
    
    print(f"\nTotal Extra TP: {len(extra_tp)}")
    print(f"At risk with Gate B: {len(at_risk)}")
    
    if len(at_risk) > 0:
        print(f"\nDomains that would be MISSED (shown as BENIGN when actually PHISHING):")
        cols = ['domain', 'ml_probability', 'tld_class', 'cert_validity_days', 'cert_san_count']
        for _, row in at_risk[cols].iterrows():
            print(f"  {row['domain']:<45} ML={row['ml_probability']:.3f}  cert={row['cert_validity_days']:.0f}d  SAN={row['cert_san_count']:.0f}")
        
        # Check if these are brand impersonation
        print(f"\n  → Review these domains: are they clear brand impersonation?")
        print(f"  → If yes, consider improving brand detection instead of loosening gate")

In [None]:
# Sample Extra FP domains (to understand why Agent is wrong)
if eval_df is not None and len(extra_fp) > 0:
    print("=" * 80)
    print("Sample Extra FP Domains (Agent wrong, Stage1 correct)")
    print("=" * 80)
    
    cols_to_show = ['domain', 'ml_probability', 'tld_class', 'cert_validity_days', 'cert_san_count']
    
    # By TLD class
    for tld_class in ['non_danger', 'medium_danger', 'high_danger']:
        subset = extra_fp[extra_fp['tld_class'] == tld_class]
        if len(subset) > 0:
            print(f"\n--- {tld_class.upper()} TLD ({len(subset)} cases) ---")
            sample = subset.nsmallest(10, 'ml_probability')[cols_to_show]
            for _, row in sample.iterrows():
                print(f"  {row['domain']:<40} ML={row['ml_probability']:.3f}  cert={row['cert_validity_days']:.0f}d  SAN={row['cert_san_count']:.0f}")

In [None]:
# Gate Candidate Impact Analysis: FP saved vs TP lost
if eval_df is not None and len(extra_fp) > 0:
    print("=" * 80)
    print("Gate Candidate Impact Analysis: FP saved vs TP lost")
    print("=" * 80)
    
    # Extra TP: Stage1 said BENIGN, Agent said PHISH (correctly)
    extra_tp = analysis_df[
        (analysis_df['stage1_pred'] == 0) &  # Stage1 = BENIGN
        (analysis_df['agent_pred'] == True) &  # Agent = PHISH
        (analysis_df['y_true'] == 1)  # Actually PHISHING
    ].copy()
    extra_tp['tld'] = extra_tp['domain'].apply(extract_tld)
    extra_tp['tld_class'] = extra_tp['tld'].apply(classify_tld)
    
    print(f"\nExtra TP (Agent correctly identifies phishing that Stage1 missed): {len(extra_tp)}")
    
    # Analyze each gate candidate
    gate_candidates = [
        ('Gate A: non_danger + ML < 0.30', 
         lambda df: (df['tld_class'] == 'non_danger') & (df['ml_probability'] < 0.30)),
        ('Gate B: non_danger + ML < 0.35', 
         lambda df: (df['tld_class'] == 'non_danger') & (df['ml_probability'] < 0.35)),
        ('Gate C: non_danger + ML < 0.40', 
         lambda df: (df['tld_class'] == 'non_danger') & (df['ml_probability'] < 0.40)),
        ('Gate D: (non_danger + ML < 0.35) OR (SAN > 10)', 
         lambda df: ((df['tld_class'] == 'non_danger') & (df['ml_probability'] < 0.35)) | (df['cert_san_count'] > 10)),
        ('Gate E: non_danger + ML < 0.35 + short_cert', 
         lambda df: (df['tld_class'] == 'non_danger') & (df['ml_probability'] < 0.35) & (df['cert_validity_days'] <= 90)),
    ]
    
    print("\n" + "-" * 80)
    print(f"{'Gate':<45} {'FP Saved':>10} {'TP Lost':>10} {'Net':>10} {'F1 Impact':>12}")
    print("-" * 80)
    
    # Current metrics
    y_true = analysis_df['y_true']
    y_agent = analysis_df['agent_pred'].astype(int)
    
    base_tp = ((y_agent == 1) & (y_true == 1)).sum()
    base_fp = ((y_agent == 1) & (y_true == 0)).sum()
    base_fn = ((y_agent == 0) & (y_true == 1)).sum()
    base_prec = base_tp / (base_tp + base_fp)
    base_rec = base_tp / (base_tp + base_fn)
    base_f1 = 2 * base_prec * base_rec / (base_prec + base_rec)
    
    for name, condition_fn in gate_candidates:
        fp_blocked = condition_fn(extra_fp).sum()
        tp_blocked = condition_fn(extra_tp).sum()
        
        # Calculate new F1
        new_tp = base_tp - tp_blocked
        new_fp = base_fp - fp_blocked
        new_fn = base_fn + tp_blocked
        new_prec = new_tp / (new_tp + new_fp) if (new_tp + new_fp) > 0 else 0
        new_rec = new_tp / (new_tp + new_fn) if (new_tp + new_fn) > 0 else 0
        new_f1 = 2 * new_prec * new_rec / (new_prec + new_rec) if (new_prec + new_rec) > 0 else 0
        
        f1_change = new_f1 - base_f1
        net = fp_blocked - tp_blocked
        
        print(f"{name:<45} {fp_blocked:>10} {tp_blocked:>10} {net:>+10} {f1_change:>+11.4f}")
    
    print("-" * 80)
    print(f"Current: TP={base_tp}, FP={base_fp}, FN={base_fn}, F1={base_f1:.4f}")

In [None]:
# Extra FP: Pattern Discovery
if eval_df is not None and len(extra_fp) > 0:
    print("=" * 80)
    print("Extra FP: Pattern Discovery")
    print("=" * 80)
    
    # Define patterns to check
    patterns = []
    
    # Pattern 1: Non-danger TLD + Low ML + Short Cert
    p1 = (extra_fp['tld_class'] == 'non_danger') & \
         (extra_fp['ml_probability'] < 0.30) & \
         (extra_fp['cert_validity_days'] <= 90)
    patterns.append(('P1: non_danger + ML<0.30 + short_cert', p1))
    
    # Pattern 2: Non-danger TLD + Very Low ML
    p2 = (extra_fp['tld_class'] == 'non_danger') & \
         (extra_fp['ml_probability'] < 0.20)
    patterns.append(('P2: non_danger + ML<0.20', p2))
    
    # Pattern 3: High SAN count (legitimate indicator)
    p3 = extra_fp['cert_san_count'] > 10
    patterns.append(('P3: SAN count > 10', p3))
    
    # Pattern 4: Medium danger TLD + Low ML
    p4 = (extra_fp['tld_class'] == 'medium_danger') & \
         (extra_fp['ml_probability'] < 0.35)
    patterns.append(('P4: medium_danger + ML<0.35', p4))
    
    # Pattern 5: Non-danger + ML < 0.35 (broader)
    p5 = (extra_fp['tld_class'] == 'non_danger') & \
         (extra_fp['ml_probability'] < 0.35)
    patterns.append(('P5: non_danger + ML<0.35', p5))
    
    # Pattern 6: Long certificate (>365 days)
    p6 = extra_fp['cert_validity_days'] > 365
    patterns.append(('P6: long_cert (>365 days)', p6))
    
    print("\nPattern Analysis:")
    print("-" * 60)
    print(f"{'Pattern':<45} {'Count':>8} {'%':>8}")
    print("-" * 60)
    
    for name, mask in patterns:
        count = mask.sum()
        pct = count / len(extra_fp) * 100 if len(extra_fp) > 0 else 0
        print(f"{name:<45} {count:>8} {pct:>7.1f}%")
    
    print("-" * 60)
    print(f"{'Total Extra FP':<45} {len(extra_fp):>8}")
    
    # Combined pattern analysis
    print("\n" + "=" * 60)
    print("Combined Pattern: Potential Gate Candidates")
    print("=" * 60)
    
    # Candidate 1: non_danger + ML < 0.35
    c1 = (extra_fp['tld_class'] == 'non_danger') & (extra_fp['ml_probability'] < 0.35)
    print(f"\n[Candidate 1] non_danger TLD + ML < 0.35")
    print(f"  Would block: {c1.sum()} Extra FP ({c1.sum()/len(extra_fp)*100:.1f}%)")
    
    # Candidate 2: non_danger + ML < 0.40
    c2 = (extra_fp['tld_class'] == 'non_danger') & (extra_fp['ml_probability'] < 0.40)
    print(f"\n[Candidate 2] non_danger TLD + ML < 0.40")
    print(f"  Would block: {c2.sum()} Extra FP ({c2.sum()/len(extra_fp)*100:.1f}%)")
    
    # Candidate 3: high SAN (>10) + non_danger
    c3 = (extra_fp['cert_san_count'] > 10) & (extra_fp['tld_class'] == 'non_danger')
    print(f"\n[Candidate 3] SAN > 10 + non_danger TLD")
    print(f"  Would block: {c3.sum()} Extra FP ({c3.sum()/len(extra_fp)*100:.1f}%)")

In [None]:
# Extra FP: ML probability and certificate analysis
if eval_df is not None and len(extra_fp) > 0:
    print("=" * 60)
    print("Extra FP: ML Probability Distribution")
    print("=" * 60)
    
    # ML distribution
    ml_bins = [0, 0.10, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.50, 1.0]
    extra_fp['ml_bin'] = pd.cut(extra_fp['ml_probability'], bins=ml_bins)
    
    print("\nML probability distribution:")
    ml_dist = extra_fp['ml_bin'].value_counts().sort_index()
    for bin_range, count in ml_dist.items():
        pct = count / len(extra_fp) * 100
        bar = '█' * int(pct / 2)
        print(f"  {bin_range}: {count:3d} ({pct:5.1f}%) {bar}")
    
    print(f"\nML statistics:")
    print(f"  Mean: {extra_fp['ml_probability'].mean():.4f}")
    print(f"  Median: {extra_fp['ml_probability'].median():.4f}")
    print(f"  Std: {extra_fp['ml_probability'].std():.4f}")
    print(f"  Min: {extra_fp['ml_probability'].min():.4f}")
    print(f"  Max: {extra_fp['ml_probability'].max():.4f}")
    
    # Certificate analysis
    print("\n" + "=" * 60)
    print("Extra FP: Certificate Analysis")
    print("=" * 60)
    
    short_cert = extra_fp['cert_validity_days'] <= 90
    has_cert = extra_fp['cert_validity_days'] > 0
    
    print(f"\nCertificate validity:")
    print(f"  Has certificate: {has_cert.sum()} ({has_cert.mean()*100:.1f}%)")
    print(f"  Short cert (<=90 days): {short_cert.sum()} ({short_cert.mean()*100:.1f}%)")
    print(f"  Long cert (>90 days): {(~short_cert & has_cert).sum()}")
    
    # SAN count
    print(f"\nSAN count statistics:")
    print(f"  Mean: {extra_fp['cert_san_count'].mean():.1f}")
    print(f"  Median: {extra_fp['cert_san_count'].median():.1f}")
    print(f"  Low SAN (<=3): {(extra_fp['cert_san_count'] <= 3).sum()}")
    print(f"  High SAN (>10): {(extra_fp['cert_san_count'] > 10).sum()}")

In [None]:
# Extra FP Analysis: Stage1=BENIGN, Agent=PHISH, y_true=0
if eval_df is not None:
    print("=" * 80)
    print("EXTRA FP ANALYSIS: Cases where AI Agent incorrectly overrides Stage1")
    print("=" * 80)
    
    # Create analysis dataframe
    analysis_df = eval_df.copy()
    analysis_df['tld'] = analysis_df['domain'].apply(extract_tld)
    analysis_df['tld_class'] = analysis_df['tld'].apply(classify_tld)
    
    # Extra FP: Stage1 said BENIGN (correct), Agent said PHISH (wrong)
    extra_fp = analysis_df[
        (analysis_df['stage1_pred'] == 0) &  # Stage1 = BENIGN
        (analysis_df['agent_pred'] == True) &  # Agent = PHISH
        (analysis_df['y_true'] == 0)  # Actually BENIGN
    ].copy()
    
    # Stage1 FP (for comparison)
    stage1_fp = analysis_df[
        (analysis_df['stage1_pred'] == 1) &
        (analysis_df['y_true'] == 0)
    ]
    
    print(f"\nStage1 FP count: {len(stage1_fp)}")
    print(f"AI Agent total FP: {len(fp_cases)}")
    print(f"Extra FP (Agent adds): {len(extra_fp)}")
    print(f"  → AI Agent is adding {len(extra_fp)} incorrect PHISH predictions")
    
    # TLD distribution of Extra FP
    print("\n" + "-" * 60)
    print("Extra FP by TLD Class:")
    print("-" * 60)
    print(extra_fp['tld_class'].value_counts())
    print(f"\nPercentage breakdown:")
    for tld_class in ['non_danger', 'medium_danger', 'high_danger']:
        count = (extra_fp['tld_class'] == tld_class).sum()
        pct = count / len(extra_fp) * 100 if len(extra_fp) > 0 else 0
        print(f"  {tld_class}: {count} ({pct:.1f}%)")

---
## Cell 6b: AI Agent Extra FP Analysis (Deep Dive)

**目的**: Stage1がBENIGNと判定したのに、AI AgentがPHISHと誤判定したケース（Extra FP）を深掘り分析

**問題**: AI Agent は Stage1 より 95件も多くFPを出している。この原因を特定し、改善策を検討する。

---
## Cell 7: Cost Analysis

In [None]:
# Cost analysis (FN = 3.0, FP = 1.0)
if eval_df is not None:
    print("=" * 60)
    print("Cost Analysis (FN=3.0, FP=1.0)")
    print("=" * 60)
    
    FN_COST = 3.0
    FP_COST = 1.0
    
    def calc_cost(tp, fp, tn, fn):
        return fn * FN_COST + fp * FP_COST
    
    # Current
    y_true = eval_df['y_true']
    y_agent = eval_df['agent_pred'].astype(int)
    y_stage1 = eval_df['stage1_pred'].astype(int)
    
    agent_tp = ((y_agent == 1) & (y_true == 1)).sum()
    agent_fp = ((y_agent == 1) & (y_true == 0)).sum()
    agent_fn = ((y_agent == 0) & (y_true == 1)).sum()
    agent_cost = calc_cost(agent_tp, agent_fp, 0, agent_fn)
    
    stage1_tp = ((y_stage1 == 1) & (y_true == 1)).sum()
    stage1_fp = ((y_stage1 == 1) & (y_true == 0)).sum()
    stage1_fn = ((y_stage1 == 0) & (y_true == 1)).sum()
    stage1_cost = calc_cost(stage1_tp, stage1_fp, 0, stage1_fn)
    
    print(f"\nStage1 Only:")
    print(f"  FP: {stage1_fp}, FN: {stage1_fn}")
    print(f"  Cost: {stage1_cost:.0f}")
    
    print(f"\nAI Agent:")
    print(f"  FP: {agent_fp}, FN: {agent_fn}")
    print(f"  Cost: {agent_cost:.0f}")
    
    print(f"\nDifference: {agent_cost - stage1_cost:+.0f}")
    if agent_cost < stage1_cost:
        print("  → AI Agent reduces total cost")
    else:
        print("  → AI Agent increases total cost")

---
## Cell 8: Summary Report

In [None]:
# Generate summary report
print("=" * 80)
print("PIPELINE ANALYSIS SUMMARY REPORT")
print(f"RUN_ID: {RUN_ID}")
print(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("=" * 80)

print("\n### Stage1 (XGBoost)")
print(f"- Total samples: {len(stage1_df)}")
print(f"- Auto-decided: {len(stage1_decided)}")
print(f"- Handoff to agent: {len(handoff_region)}")
print(f"- Thresholds: t_low={route1_thresholds['t_low']:.4f}, t_high={route1_thresholds['t_high']:.4f}")

if eval_df is not None:
    print("\n### Stage3 (AI Agent)")
    print(f"- Evaluated samples: {len(eval_df)}")
    print(f"- Agent FP: {agent_fp}")
    print(f"- Agent FN: {agent_fn}")
    print(f"- F1 Score: {2*agent_tp/(2*agent_tp+agent_fp+agent_fn):.3f}")

print("\n### Key Findings")
if eval_df is not None:
    fp_non_danger_low_ml = len(fp_cases[(fp_cases['tld_class'] == 'non_danger') & (fp_cases['ml_probability'] < 0.30)])
    print(f"- FP with non-danger TLD + ML<0.30: {fp_non_danger_low_ml}")
    print(f"  → These should be blocked by POST_LLM_FLIP_GATE but are overridden by P1 gate")
    print(f"- FP with short certificate (<=90 days): {short_cert.sum()} ({short_cert.mean()*100:.1f}%)")
    print(f"  → Let's Encrypt (90-day certs) is common on legitimate sites")

print("\n### Recommendations")
print("1. Consider disabling P1 gate for non-danger TLDs with ML < 0.30")
print("2. Review brand detection logic to reduce false brand matches")
print("3. Consider adding certificate age (cert_age_days) as additional signal")