In [7]:
# ============================================
# Cell 1: 環境設定と初期化
# ============================================

import os
import sys
import json
import pickle
import pandas as pd
import numpy as np
import time
from pathlib import Path
from typing import Dict, Any, List, Optional

# パスの設定
BASE_DIR = Path("/home/asomura/nextstep")
sys.path.insert(0, str(BASE_DIR))

# _compatを使用（オプション）
try:
    from _compat import paths
    cfg = paths.load_config(os.getenv("CONFIG_JSON"), strict=False)
    RUN_ID = cfg.get("run_id") or os.getenv("RUN_ID") or "20251030_004055"
except ImportError:
    RUN_ID = "20251030_004055"
    print("[INFO] _compat not found, using default RUN_ID")

ARTIFACTS_DIR = BASE_DIR / "artifacts" / RUN_ID
HANDOFF_DIR = ARTIFACTS_DIR / "handoff"
LOGS_DIR = ARTIFACTS_DIR / "logs"

print(f"[INFO] BASE_DIR = {BASE_DIR}")
print(f"[INFO] RUN_ID = {RUN_ID}")
print(f"[INFO] ARTIFACTS_DIR = {ARTIFACTS_DIR}")

# 出力用のタイムスタンプ
import datetime
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
print(f"[INFO] Timestamp: {timestamp}")

[INFO] BASE_DIR = /home/asomura/nextstep
[INFO] RUN_ID = 2025-11-18_053624
[INFO] ARTIFACTS_DIR = /home/asomura/nextstep/artifacts/2025-11-18_053624
[INFO] Timestamp: 2025-11-20_185414


In [8]:
# ============================================
# Cell 2: External Dataの読み込みとBrand Keywords補強
# ============================================

# handoffからexternal_dataを読み込み
handoff_path = HANDOFF_DIR / "04-3_llm_tools_setup_with_tools.pkl"

if not handoff_path.exists():
    print(f"[ERROR] Handoff file not found: {handoff_path}")
    print("[INFO] Trying alternative path...")
    handoff_path = Path("/home/asomura/nextstep/artifacts/20251030_004055/handoff/04-3_llm_tools_setup_with_tools.pkl")

with open(handoff_path, 'rb') as f:
    external_data = pickle.load(f)

print(f"[INFO] external_data loaded from: {handoff_path}")
print(f"[INFO] external_data keys: {list(external_data.keys())[:8]} ...")

# brand_keywordsの確認と補強
if 'brand_keywords' not in external_data:
    external_data['brand_keywords'] = []
    print("[WARNING] brand_keywords not found, creating new list")

# 現在のブランドを小文字で取得
brands_lower = [b.lower() for b in external_data['brand_keywords'] if isinstance(b, str)]
print(f"[INFO] Current brand_keywords count: {len(external_data['brand_keywords'])}")

# 重要ブランドのリスト（日本の金融機関を重点的に）
essential_brands = {
    # 日本の金融機関
    "mufg", "smbc", "mizuho", "ufj", "mitsubishi", "sumitomo", "mitsui",
    "resona", "jcb", "visa", "mastercard", "amex",
    "japanpost", "jppost", "yucho", "jabank", "shinkin",
    # 日本のサービス
    "rakuten", "mercari", "yahoo", "line", "docomo", "au", "softbank",
    "ntt", "kddi", "uniqlo", "muji", "nitori",
    # グローバルブランド
    "amazon", "google", "apple", "microsoft", "paypal", "netflix",
    "facebook", "meta", "instagram", "twitter", "x",
    "metamask", "binance", "coinbase", "ethereum", "bitcoin",
    # 短縮URL・CDN
    "bit", "tinyurl", "shorturl", "cdn", "cloudflare", "akamai"
}

# 不足しているブランドを追加
added_brands = []
for brand in essential_brands:
    if brand not in brands_lower:
        external_data['brand_keywords'].append(brand)
        added_brands.append(brand)

print(f"[INFO] Added {len(added_brands)} new brands")
print(f"[INFO] Total brand_keywords: {len(external_data['brand_keywords'])}")

# 主要ブランドの確認表示
check_brands = ["mufg", "smbc", "amazon", "mercari", "rakuten", "metamask"]
print("\n[Brand Check]")
for brand in check_brands:
    exists = brand in [b.lower() for b in external_data['brand_keywords']]
    status = "✅" if exists else "❌"
    print(f"  {brand:15} {status}")

[ERROR] Handoff file not found: /home/asomura/nextstep/artifacts/2025-11-18_053624/handoff/04-3_llm_tools_setup_with_tools.pkl
[INFO] Trying alternative path...
[INFO] external_data loaded from: /home/asomura/nextstep/artifacts/20251030_004055/handoff/04-3_llm_tools_setup_with_tools.pkl
[INFO] external_data keys: ['cfg', 'brand_keywords', 'cert_full_info_map', 'tools', 'llm', 'async_client', 'tools_code', 'present_top'] ...
[INFO] Current brand_keywords count: 100
[INFO] Added 39 new brands
[INFO] Total brand_keywords: 139

[Brand Check]
  mufg            ✅
  smbc            ✅
  amazon          ✅
  mercari         ✅
  rakuten         ✅
  metamask        ✅


In [9]:
# ============================================
# Cell 3: Random100データの準備（元のノートブック準拠）
# ============================================

from urllib.parse import urlparse

# データ探索用ヘルパー関数
def _search_eval_df(obj):
    '''dict/list 再帰で DataFrame を探す'''
    if obj is None:
        return None
    if isinstance(obj, pd.DataFrame):
        return obj
    if isinstance(obj, dict):
        # よくあるキー直参照
        for k in ("false_negatives_df", "fn_df", "eval_df", "random_eval_df"):
            v = obj.get(k)
            if isinstance(v, pd.DataFrame):
                return v
        for v in obj.values():
            hit = _search_eval_df(v)
            if hit is not None:
                return hit
    if isinstance(obj, (list, tuple)):
        for v in obj:
            hit = _search_eval_df(v)
            if hit is not None:
                return hit
    return None

def _normalize_eval_df(df):
    '''列名の揺れに対応して domain, ml_probability の2列に正規化'''
    print("[DEBUG] columns:", list(df.columns))
    
    # lower → 元名のマップ
    lower2orig = {c.lower(): c for c in df.columns}
    
    # 1) domain候補
    domain_candidates = ["domain", "fqdn", "domain_name", "hostname", "host", "requested_host"]
    domain_key = None
    for key in domain_candidates:
        if key in lower2orig:
            domain_key = lower2orig[key]
            break
    
    if domain_key is None:
        # 部分一致
        for c in df.columns:
            if any(kw in c.lower() for kw in ["domain", "fqdn", "host", "url"]):
                domain_key = c
                print(f"[DEBUG] domain fallback: {c}")
                break
    
    # 2) ml_probability候補
    ml_candidates = ["ml_probability", "ml_prob", "probability", "prediction_proba", 
                     "score", "pred_proba", "proba", "confidence"]
    mlp_key = None
    for key in ml_candidates:
        if key in lower2orig:
            mlp_key = lower2orig[key]
            break
    
    if mlp_key is None:
        # floatカラムから推測
        float_cols = [c for c in df.columns if pd.api.types.is_float_dtype(df[c])]
        for c in float_cols:
            if any(kw in c.lower() for kw in ["prob", "score", "pred"]):
                mlp_key = c
                print(f"[DEBUG] ml_probability fallback: {c}")
                break
    
    if domain_key is None or mlp_key is None:
        print("[ERROR] could not infer domain/ml_probability columns")
        return None
    
    print(f"[DEBUG] chosen columns: domain={domain_key}, ml_prob={mlp_key}")
    
    tmp = df[[domain_key, mlp_key]].copy()
    tmp.columns = ["domain", "ml_probability"]
    
    # url→domainの正規化
    if "url" in domain_key.lower():
        def _to_domain(x):
            if not isinstance(x, str):
                return ""
            if "://" in x:
                netloc = urlparse(x).netloc
            else:
                netloc = x
            netloc = netloc.split("@")[-1].split(":")[0]
            return netloc.lower()
        tmp["domain"] = tmp["domain"].map(_to_domain)
    
    # クリーンアップ
    tmp["domain"] = tmp["domain"].astype(str).str.strip().str.lower()
    tmp = tmp[tmp["domain"].str.len() > 0]
    tmp["ml_probability"] = pd.to_numeric(tmp["ml_probability"], errors="coerce")
    tmp = tmp[(tmp["ml_probability"] >= 0.0) & (tmp["ml_probability"] <= 1.0)]
    tmp = tmp.dropna(subset=["ml_probability"]).reset_index(drop=True)
    
    return tmp

# Random100データを取得
def get_random100_domains():
    '''Random100のドメインリストを取得（元のノートブックのロジック準拠）'''
    
    # 方法1: 既存のCSVから
    csv_paths = [
        LOGS_DIR / "random_eval_domains_latest.csv",
        ARTIFACTS_DIR / "logs" / "random_eval_domains_latest.csv",
        Path("/home/asomura/nextstep/artifacts/2025-11-16_184237/logs/random_eval_domains_latest.csv")
    ]
    
    for csv_path in csv_paths:
        if csv_path.exists():
            print(f"[INFO] Loading existing CSV from: {csv_path}")
            df = pd.read_csv(csv_path)
            return df
    
    # 方法2: pickleから生成（元のノートブックのロジック）
    print("[INFO] CSV not found, generating from pickle...")
    
    pickle_paths = [
        HANDOFF_DIR / "04-2_statistical_analysis.pkl",
        HANDOFF_DIR / "04-3_llm_tools_setup_with_tools.pkl",
        Path("/home/asomura/nextstep/artifacts/20251030_004055/handoff/04-2_statistical_analysis.pkl"),
        Path("/home/asomura/nextstep/artifacts/20251030_004055/handoff/04-3_llm_tools_setup_with_tools.pkl")
    ]
    
    eval_source_df = None
    for pickle_path in pickle_paths:
        if not pickle_path.exists():
            continue
            
        print(f"[INFO] Trying: {pickle_path}")
        try:
            with open(pickle_path, 'rb') as f:
                obj = pickle.load(f)
            
            # DataFrameを探す
            raw_df = _search_eval_df(obj)
            if raw_df is not None and len(raw_df) > 0:
                print(f"[INFO] Found DataFrame with {len(raw_df)} rows")
                eval_source_df = raw_df
                break
        except Exception as e:
            print(f"[WARN] Failed to load {pickle_path}: {e}")
    
    if eval_source_df is None:
        raise RuntimeError(
            "評価用DataFrameが見つかりません。\n"
            "04-2 または 04-3 のpickleファイルを確認してください。"
        )
    
    # 正規化
    norm_df = _normalize_eval_df(eval_source_df)
    if norm_df is None:
        raise RuntimeError("DataFrameの正規化に失敗しました")
    
    print(f"[INFO] Source rows (normalized): {len(norm_df)}")
    
    # ランダムサンプリング（元のノートブックは固定シードなし）
    sample_n = min(100, len(norm_df))
    random100 = norm_df.sample(n=sample_n).reset_index(drop=True)
    
    # CSV保存
    out_csv = LOGS_DIR / "random_eval_domains_latest.csv"
    LOGS_DIR.mkdir(parents=True, exist_ok=True)
    random100.to_csv(out_csv, index=False, encoding="utf-8")
    print(f"[INFO] Random100 saved to: {out_csv}")
    
    return random100

# Random100データを取得
random100_df = get_random100_domains()
print(f"[INFO] Random100 loaded: {len(random100_df)} domains")
print(random100_df.head())

# ML確率の分布を確認
print(f"\n[ML Probability Distribution]")
print(f"  Min:    {random100_df['ml_probability'].min():.3f}")
print(f"  Max:    {random100_df['ml_probability'].max():.3f}")
print(f"  Mean:   {random100_df['ml_probability'].mean():.3f}")
print(f"  Median: {random100_df['ml_probability'].median():.3f}")

# 高リスクドメイン（ML確率 > 0.4）の数
high_risk = random100_df[random100_df['ml_probability'] > 0.4]
print(f"  High risk (>0.4): {len(high_risk)} domains")

[INFO] Loading existing CSV from: /home/asomura/nextstep/artifacts/2025-11-16_184237/logs/random_eval_domains_latest.csv
[INFO] Random100 loaded: 100 domains
            domain  ml_probability
0          jmbf.cn        0.453648
1  baidu-xiamen.cn        0.244429
2      tsjianye.cn        0.236587
3    internetku.id        0.129556
4  teslafarmer.com        0.094156

[ML Probability Distribution]
  Min:    0.004
  Max:    0.497
  Mean:   0.201
  Median: 0.179
  High risk (>0.4): 10 domains


In [10]:
# ============================================
# Cell 4: LangGraphエージェントの初期化（Random100用完全版）
# ============================================

import sys
import os
import types

# 環境設定
os.chdir(BASE_DIR)
phishing_agent_path = BASE_DIR / "phishing_agent"
if str(phishing_agent_path) not in sys.path:
    sys.path.insert(0, str(phishing_agent_path))

print("=" * 80)
print("AGENT INITIALIZATION WITH LLM")
print("=" * 80)

# 1. VLLMのダミーAPIキーを設定（必須）
os.environ['OPENAI_API_KEY'] = 'dummy-key-for-vllm'
base_url = "http://192.168.100.71:30000"

# 2. LLM設定を強制有効化
print("\n[1] Fixing LLM Configuration")
print("-" * 40)

if 'external_data' not in globals():
    print("❌ external_data not found! Run Cell 2 first.")
else:
    if 'cfg' not in external_data:
        external_data['cfg'] = {}
    
    external_data['cfg']['llm'] = {
        'enabled': True,
        'provider': 'vllm',
        'base_url': base_url,
        'vllm_base_url': base_url,
        'model': 'Qwen/Qwen3-14B-FP8',
        'vllm_model': 'Qwen/Qwen3-14B-FP8'
    }
    print("✅ LLM configuration enabled")

# 3. phishpkgをクリア（古いものがあれば）
for key in list(sys.modules.keys()):
    if key.startswith('phishpkg'):
        del sys.modules[key]

# 4. LLMクライアントを作成
print("\n[2] Creating LLM Client")
print("-" * 40)

try:
    from langchain_community.llms import VLLMOpenAI
    
    llm_client = VLLMOpenAI(
        openai_api_base=base_url,
        openai_api_key="dummy",
        model_name="Qwen/Qwen3-14B-FP8",
        temperature=0.1,
        max_tokens=1024,
    )
    
    external_data['llm'] = llm_client
    print("✅ LLM client created and set")
    
except Exception as e:
    print(f"❌ LLM client creation failed: {e}")

# 5. Phase6配線（LLM有効）
print("\n[3] Wiring Phase6")
print("-" * 40)

try:
    from phishing_agent.phase6_wiring import wire_phase6
    wire_phase6(prefer_compat=True, fake_llm=False)  # fake_llm=Falseが重要！
    print("✅ Phase6 wired with real LLM")
except Exception as e:
    print(f"⚠️ Phase6 wiring: {e}")

# 6. LangGraphエージェントをインポート
print("\n[4] Importing LangGraphPhishingAgent")
print("-" * 40)

from phishing_agent.langgraph_module import LangGraphPhishingAgent
print("✅ LangGraphPhishingAgent imported")

# ========================================
# [COMMENTED OUT] Brand Detection Patching
# ========================================
# 改良版brand_impersonation_checkへの差し替えをコメントアウト
# 理由: Cell 3（改良版の定義）を削除したため、未定義エラーを回避
#
# print("\n[5] Patching Brand Detection")
# print("-" * 40)
#
# if 'phishpkg.tools_module' in sys.modules:
#     tools_module = sys.modules['phishpkg.tools_module']
#     
#     # 元の関数を保存
#     if hasattr(tools_module, 'brand_impersonation_check'):
#         tools_module._original_brand_check = tools_module.brand_impersonation_check
#     
#     # 改良版で置き換え
#     tools_module.brand_impersonation_check = brand_impersonation_check_enhanced
#     print("✅ Brand detection patched with enhanced version")
# else:
#     print("⚠️ phishpkg.tools_module not found, brand detection might not be enhanced")

# 8. エージェント初期化
print("\n[6] Initializing Agent")
print("-" * 40)

agent = LangGraphPhishingAgent(
    strict_mode=False,
    external_data=external_data
)
print("✅ Agent initialized")

# LLM設定の確認と強制有効化
if hasattr(agent, 'llm_config'):
    if not agent.llm_config.enabled:
        agent.llm_config.enabled = True
        print("✅ Forced LLM enabled=True")
    print(f"   LLM Status: enabled={agent.llm_config.enabled}")

# 9. 動作確認テスト
print("\n[7] Quick Verification Test")
print("-" * 40)

import time
test_domain = "test-amazon.com"
start = time.time()

try:
    result = agent.evaluate(test_domain, 0.35)
    elapsed = time.time() - start
    
    print(f"Test domain: {test_domain}")
    print(f"  Time: {elapsed:.2f}s")
    print(f"  is_phishing: {result.get('ai_is_phishing')}")
    
    if elapsed > 2.0:
        print("  ✅ LLM is working (slow response)")
    elif elapsed > 0.5:
        print("  ⚠️ LLM might be working")
    else:
        print("  ❌ LLM not working (too fast)")
        print("  ⚠️ Check VLLM server status")
    
    # Brand検出の確認
    brand_result = result.get('tool_results', {}).get('brand', {})
    if brand_result.get('data', {}).get('detected_issues'):
        brands = brand_result['data']['details'].get('detected_brands', [])
        print(f"  Brands detected: {brands}")
        print("  ✅ Brand detection is working")
    
except Exception as e:
    print(f"  ❌ Test failed: {e}")

print("\n" + "=" * 80)
print("AGENT READY FOR EVALUATION")
print("=" * 80)
print("✅ LLM: Enabled")
print("✅ Brand Detection: Using phishing_agent module version")
print("✅ Agent: Initialized")
print("\nProceed to Cell 5 for Random100 evaluation")
print("Expected time: ~8-10 minutes for 100 domains")
print("=" * 80)


AGENT INITIALIZATION WITH LLM

[1] Fixing LLM Configuration
----------------------------------------
✅ LLM configuration enabled

[2] Creating LLM Client
----------------------------------------
✅ LLM client created and set

[3] Wiring Phase6
----------------------------------------
✅ Phase6 wired with real LLM

[4] Importing LangGraphPhishingAgent
----------------------------------------
✅ LangGraphPhishingAgent imported

[6] Initializing Agent
----------------------------------------


KeyError: 'phishpkg.agent_foundations'

In [None]:
# ============================================
# Cell 5: Random100評価実行（修正版）
# ============================================

import time
import pandas as pd
from datetime import datetime

print("[INFO] Starting evaluation of 100 domains...")
print("=" * 80)

# LLM設定の確認
if 'agent' in globals() and hasattr(agent, 'llm_config'):
    llm_config = agent.llm_config
    if llm_config.enabled:
        print(f"[INFO] LLM initialized successfully: {llm_config.model} at {llm_config.base_url}")
    else:
        print("[WARNING] LLM is disabled - results may be limited")
else:
    print("[WARNING] Agent not properly initialized")

# 評価結果を格納
results = []
start_time = time.time()

# 各ドメインを評価
for idx, row in random100_df.iterrows():
    domain = row['domain']
    ml_prob = row['ml_probability']
    
    try:
        # エージェントで評価
        eval_start = time.time()
        result = agent.evaluate(domain, ml_prob)
        elapsed = time.time() - eval_start
        
        # detected_brandsを取得（修正版）
        # まずトップレベルで探す
        detected_brands = result.get('detected_brands', [])
        
        # もしトップレベルになければ、ai_assessmentから探す
        if not detected_brands and 'ai_assessment' in result:
            ai_assessment = result['ai_assessment']
            if isinstance(ai_assessment, dict):
                detected_brands = ai_assessment.get('detected_brands', [])
        
        # それでもなければ、risk_factorsから探す
        if not detected_brands and 'risk_factors' in result:
            risk_factors = result.get('risk_factors', {})
            if isinstance(risk_factors, list):
                # risk_factorsがリストの場合
                for factor in risk_factors:
                    if 'brand' in str(factor).lower():
                        # ブランド関連のリスクファクターがある
                        detected_brands = ['unknown_brand']
                        break
        
        # 最後の手段：tool_resultsから直接取得を試みる（もし存在すれば）
        if not detected_brands:
            tool_results = result.get('tool_results', {})
            if 'brand' in tool_results:
                brand_result = tool_results['brand']
                if isinstance(brand_result, dict):
                    brand_data = brand_result.get('data', {})
                    if isinstance(brand_data, dict):
                        details = brand_data.get('details', {})
                        if isinstance(details, dict):
                            detected_brands = details.get('detected_brands', [])
        
        # brand_detectedの判定
        brand_detected = len(detected_brands) > 0
        
        # 結果を記録
        results.append({
            'domain': domain,
            'ml_probability': ml_prob,
            'ai_is_phishing': result.get('ai_is_phishing', False),
            'ai_confidence': result.get('ai_confidence', 0.0),
            'ai_risk_level': result.get('ai_risk_level', 'unknown'),
            'tools_used': result.get('tools_used', []),
            'processing_time': elapsed,
            'brand_detected': brand_detected,
            'detected_brands': detected_brands if detected_brands else '',  # リストか空文字列
            'error': None
        })
        
        # プログレス表示（改良版）
        phish_symbol = "✓" if result.get('ai_is_phishing') else "✗"
        brand_str = f"{detected_brands[0]}" if detected_brands else "-"
        
        print(f"[{idx+1:3}/{len(random100_df)}] {domain:40} "
              f"ml={ml_prob:.3f} {phish_symbol} "
              f"conf={result.get('ai_confidence', 0):.3f} "
              f"risk={result.get('ai_risk_level', 'unknown'):12} "
              f"brand={brand_str:15} "
              f"({elapsed:.1f}s)")
        
        # 20件ごとに統計を表示
        if (idx + 1) % 20 == 0:
            phishing_count = sum(1 for r in results if r['ai_is_phishing'])
            brand_count = sum(1 for r in results if r['brand_detected'])
            avg_time = sum(r['processing_time'] for r in results) / len(results)
            print(f"  --- Progress: {idx+1}/{len(random100_df)} | "
                  f"Phishing: {phishing_count} | "
                  f"Brands: {brand_count} | "
                  f"Avg time: {avg_time:.2f}s ---")
            
    except Exception as e:
        # エラーが発生した場合
        elapsed = time.time() - eval_start
        error_msg = str(e)
        
        results.append({
            'domain': domain,
            'ml_probability': ml_prob,
            'ai_is_phishing': False,
            'ai_confidence': 0.0,
            'ai_risk_level': 'error',
            'tools_used': [],
            'processing_time': elapsed,
            'brand_detected': False,
            'detected_brands': '',
            'error': error_msg
        })
        
        print(f"[{idx+1:3}/{len(random100_df)}] {domain:40} "
              f"ml={ml_prob:.3f} ✗ ERROR: {error_msg[:30]}... ({elapsed:.1f}s)")

# 最終統計
total_time = time.time() - start_time
print("=" * 80)
print(f"[INFO] Evaluation complete! Total time: {total_time:.2f}s")

# 結果をDataFrameに変換
results_df = pd.DataFrame(results)

# タイムスタンプ付きでファイル保存
timestamp = datetime.now().strftime("%Y-%m-%d_%H%M%S")
result_file = BASE_DIR / f"random100_eval_{timestamp}.csv"
results_df.to_csv(result_file, index=False)
print(f"[INFO] Results saved to: {result_file}")

# 統計サマリーを計算
summary = {
    'total_domains': len(results_df),
    'phishing_detected': results_df['ai_is_phishing'].sum(),
    'phishing_rate': results_df['ai_is_phishing'].mean(),
    'brand_detected': results_df['brand_detected'].sum(),
    'brand_detection_rate': results_df['brand_detected'].mean(),
    'avg_confidence': results_df['ai_confidence'].mean(),
    'avg_processing_time': results_df['processing_time'].mean(),
    'errors': results_df['error'].notna().sum(),
}

# リスクレベルの分布
risk_dist = results_df['ai_risk_level'].value_counts().to_dict()
summary['risk_distribution'] = risk_dist

# ブランド検出の詳細
brand_domains = results_df[results_df['brand_detected'] == True]
if not brand_domains.empty:
    # 検出されたブランドのリスト
    all_brands = []
    for brands_str in brand_domains['detected_brands']:
        if brands_str and brands_str != '':
            if isinstance(brands_str, list):
                all_brands.extend(brands_str)
            else:
                all_brands.append(str(brands_str))
    
    from collections import Counter
    brand_counts = Counter(all_brands)
    summary['detected_brand_counts'] = dict(brand_counts)
else:
    summary['detected_brand_counts'] = {}

In [6]:
# NumPy/Pandas型をPython標準型に変換
import numpy as np

def convert_to_json_serializable(obj):
    """NumPy/Pandas型をJSON化可能な型に変換"""
    if isinstance(obj, (np.int64, np.int32, np.int16, np.int8)):
        return int(obj)
    elif isinstance(obj, (np.float64, np.float32)):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, pd.Series):
        return obj.tolist()
    elif isinstance(obj, dict):
        return {key: convert_to_json_serializable(value) for key, value in obj.items()}
    elif isinstance(obj, list):
        return [convert_to_json_serializable(item) for item in obj]
    else:
        return obj

# summaryを変換してから保存
summary_converted = convert_to_json_serializable(summary)

# サマリーを保存
import json
summary_file = BASE_DIR / f"random100_summary_{timestamp}.json"
with open(summary_file, 'w', encoding='utf-8') as f:
    json.dump(summary_converted, f, ensure_ascii=False, indent=2)
print(f"[INFO] Summary saved to: {summary_file}")

# 結果表示
print("\n" + "=" * 80)
print("EVALUATION SUMMARY")
print("=" * 80)
print(f"\n[Overall Statistics]")
print(f"  Total domains evaluated: {summary['total_domains']}")
print(f"  Phishing detected:       {summary['phishing_detected']} ({summary['phishing_rate']*100:.1f}%)")
print(f"  Brand impersonation:     {summary['brand_detected']} ({summary['brand_detection_rate']*100:.1f}%)")
print(f"  Average confidence:      {summary['avg_confidence']:.3f}")
print(f"  Average processing time: {summary['avg_processing_time']:.2f}s")
print(f"  Errors:                  {summary['errors']}")

print(f"\n[Risk Level Distribution]")
for level in ['critical', 'high', 'medium-high', 'medium', 'low', 'unknown', 'error']:
    count = risk_dist.get(level, 0)
    pct = (count / summary['total_domains']) * 100
    print(f"  {level:12}  {count:3} ({pct:5.1f}%)")

print(f"\n[Brand Detection Analysis]")
if summary['detected_brand_counts']:
    for brand, count in sorted(summary['detected_brand_counts'].items(), 
                              key=lambda x: x[1], reverse=True)[:10]:
        print(f"  {brand:20} {count:3} detections")
else:
    print("  No brands detected")

# 高信頼度のフィッシングサイトを表示
print(f"\n[High Confidence Phishing (conf >= 0.6)]")
high_conf = results_df[(results_df['ai_is_phishing'] == True) & 
                       (results_df['ai_confidence'] >= 0.6)]
print(f"  Count: {len(high_conf)}")
if not high_conf.empty:
    for _, row in high_conf.head(10).iterrows():
        brands_str = row['detected_brands'] if row['detected_brands'] else '-'
        print(f"  - {row['domain']:40} conf={row['ai_confidence']:.3f} brands={brands_str}")

# 重要なブランドの検出状況
print(f"\n[Target Brand Detection]")
target_brands = ['mufg', 'smbc', 'amazon', 'metamask', 'paypal', 'visa', 'mastercard']
for brand in target_brands:
    # ドメイン名にブランドが含まれるものを探す
    brand_in_domain = results_df[results_df['domain'].str.contains(brand, case=False, na=False)]
    # 実際に検出されたもの
    brand_detected = 0
    for _, row in brand_in_domain.iterrows():
        if row['brand_detected'] and row['detected_brands']:
            brands_list = row['detected_brands'] if isinstance(row['detected_brands'], list) else [row['detected_brands']]
            if any(brand in str(b).lower() for b in brands_list):
                brand_detected += 1
    
    if len(brand_in_domain) > 0:
        print(f"  {brand:10} {brand_detected:2}/{len(brand_in_domain):2} ({brand_detected/len(brand_in_domain)*100:5.1f}%)")

print("\n" + "=" * 80)
print("EVALUATION COMPLETE")
print("=" * 80)
print(f"Phishing Detection Rate: {summary['phishing_rate']*100:.1f}%")
print(f"Brand Detection Rate:    {summary['brand_detection_rate']*100:.1f}%")
print(f"Files saved:")
print(f"  - {result_file}")
print(f"  - {summary_file}")

[INFO] Summary saved to: /home/asomura/nextstep/random100_summary_2025-11-18_054804.json

EVALUATION SUMMARY

[Overall Statistics]
  Total domains evaluated: 100
  Phishing detected:       49 (49.0%)
  Brand impersonation:     17 (17.0%)
  Average confidence:      0.371
  Average processing time: 6.93s
  Errors:                  0

[Risk Level Distribution]
  critical        0 (  0.0%)
  high           27 ( 27.0%)
  medium-high    22 ( 22.0%)
  medium          8 (  8.0%)
  low            43 ( 43.0%)
  unknown         0 (  0.0%)
  error           0 (  0.0%)

[Brand Detection Analysis]
  x                      5 detections
  x(compound)            4 detections
  line(compound)         2 detections
  line                   1 detections
  ntt(fuzzy)             1 detections
  visa                   1 detections
  au(substring)          1 detections
  mufg                   1 detections
  aeoncard               1 detections

[High Confidence Phishing (conf >= 0.6)]
  Count: 26
  - baidu-xia