# 多模态检测一致性实验 - 实验分析

本notebook用于深入分析实验结果，包括性能评估、对比分析和可视化展示。

## 目录
1. [环境设置](#环境设置)
2. [数据加载](#数据加载)
3. [基准测试](#基准测试)
4. [性能评估](#性能评估)
5. [对比分析](#对比分析)
6. [错误分析](#错误分析)
7. [参数敏感性分析](#参数敏感性分析)
8. [可视化报告](#可视化报告)

## 环境设置

In [None]:
import sys
import os
import warnings
warnings.filterwarnings('ignore')

# 添加项目路径
sys.path.append('../src')

# 导入必要的库
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import cross_val_score
import yaml
import pickle
from pathlib import Path

# 导入项目模块
from src.evaluation.experiment_evaluator import ExperimentEvaluator, ExperimentConfig
from src.utils.metrics import RetrievalEvaluator, DetectionEvaluator
from src.utils.visualizer import ResultVisualizer, VisualizationConfig
from src.pipeline import DefensePipeline, PipelineConfig

# 设置绘图样式
plt.style.use('seaborn-v0_8')
sns.set_palette('husl')

print("实验分析环境设置完成！")


## 数据加载

In [None]:
# 加载实验配置
with open('../configs/default.yaml', 'r', encoding='utf-8') as f:
    config = yaml.safe_load(f)

# 设置实验路径
experiment_dir = Path('../experiments')
results_dir = Path('../results')

# 加载实验结果（如果存在）
experiment_results = []

if results_dir.exists():
    for result_file in results_dir.glob('*.pkl'):
        try:
            with open(result_file, 'rb') as f:
                result = pickle.load(f)
                experiment_results.append(result)
                print(f"加载实验结果: {result_file.name}")
        except Exception as e:
            print(f"加载 {result_file.name} 失败: {e}")

print(f"\n总共加载了 {len(experiment_results)} 个实验结果")

# 如果没有现有结果，创建模拟数据
if not experiment_results:
    print("\n没有找到现有实验结果，将创建模拟数据进行演示...")
    
    # 创建模拟实验数据
    np.random.seed(42)
    
    # 模拟不同方法的性能数据
    methods = ['Baseline', 'Text-Variant', 'SD-Reference', 'Full-Pipeline']
    metrics = ['accuracy', 'precision', 'recall', 'f1_score', 'auc_roc']
    
    simulation_data = {}
    for method in methods:
        simulation_data[method] = {}
        base_performance = 0.7 + np.random.random() * 0.2
        for metric in metrics:
            # 添加一些随机变化
            noise = np.random.normal(0, 0.05)
            simulation_data[method][metric] = np.clip(base_performance + noise, 0, 1)
    
    print("模拟数据创建完成！")

## 基准测试

In [None]:
# 初始化评估器
experiment_config = ExperimentConfig(
    experiment_name="benchmark_analysis",
    metrics=['accuracy', 'precision', 'recall', 'f1_score', 'auc_roc'],
    enable_cross_validation=True,
    cv_folds=5,
    enable_statistical_tests=True,
    enable_visualization=True
)

evaluator = ExperimentEvaluator(experiment_config)

# 基准性能数据
baseline_metrics = {
    'accuracy': 0.75,
    'precision': 0.73,
    'recall': 0.78,
    'f1_score': 0.75,
    'auc_roc': 0.82
}

print("基准性能指标:")
for metric, value in baseline_metrics.items():
    print(f"- {metric}: {value:.3f}")

# 如果有模拟数据，显示对比
if 'simulation_data' in locals():
    print("\n各方法性能对比:")
    
    # 创建性能对比表
    performance_df = pd.DataFrame(simulation_data).T
    print(performance_df.round(3))
    
    # 计算相对于基准的改进
    print("\n相对于基准的改进:")
    for method in methods[1:]:  # 跳过基准方法
        improvements = {}
        for metric in metrics:
            baseline_val = simulation_data['Baseline'][metric]
            method_val = simulation_data[method][metric]
            improvement = (method_val - baseline_val) / baseline_val * 100
            improvements[metric] = improvement
        
        print(f"\n{method}:")
        for metric, improvement in improvements.items():
            print(f"  {metric}: {improvement:+.1f}%")

## 性能评估

In [None]:
# 创建性能评估可视化
if 'simulation_data' in locals():
    # 1. 雷达图显示各方法的综合性能
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    # 雷达图
    ax = axes[0, 0]
    angles = np.linspace(0, 2 * np.pi, len(metrics), endpoint=False)
    angles = np.concatenate((angles, [angles[0]]))
    
    for method in methods:
        values = [simulation_data[method][metric] for metric in metrics]
        values += [values[0]]
        ax.plot(angles, values, 'o-', linewidth=2, label=method)
        ax.fill(angles, values, alpha=0.1)
    
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(metrics)
    ax.set_ylim(0, 1)
    ax.set_title('各方法综合性能雷达图')
    ax.legend()
    ax.grid(True)
    
    # 2. 条形图显示各指标对比
    ax = axes[0, 1]
    x = np.arange(len(metrics))
    width = 0.2
    
    for i, method in enumerate(methods):
        values = [simulation_data[method][metric] for metric in metrics]
        ax.bar(x + i * width, values, width, label=method)
    
    ax.set_xlabel('指标')
    ax.set_ylabel('性能值')
    ax.set_title('各指标性能对比')
    ax.set_xticks(x + width * 1.5)
    ax.set_xticklabels(metrics, rotation=45)
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    # 3. 热力图显示性能矩阵
    ax = axes[1, 0]
    performance_matrix = np.array([[simulation_data[method][metric] for metric in metrics] for method in methods])
    im = ax.imshow(performance_matrix, cmap='YlOrRd', aspect='auto')
    
    ax.set_xticks(range(len(metrics)))
    ax.set_yticks(range(len(methods)))
    ax.set_xticklabels(metrics, rotation=45)
    ax.set_yticklabels(methods)
    ax.set_title('性能热力图')
    
    # 添加数值标注
    for i in range(len(methods)):
        for j in range(len(metrics)):
            text = ax.text(j, i, f'{performance_matrix[i, j]:.3f}',
                         ha='center', va='center', color='black', fontsize=8)
    
    plt.colorbar(im, ax=ax)
    
    # 4. 改进幅度图
    ax = axes[1, 1]
    baseline_values = [simulation_data['Baseline'][metric] for metric in metrics]
    best_values = [max(simulation_data[method][metric] for method in methods) for metric in metrics]
    improvements = [(best - baseline) / baseline * 100 for best, baseline in zip(best_values, baseline_values)]
    
    bars = ax.bar(metrics, improvements, color='skyblue', alpha=0.7)
    ax.set_ylabel('改进幅度 (%)')
    ax.set_title('相对基准的最大改进幅度')
    ax.grid(True, alpha=0.3)
    
    # 添加数值标注
    for bar, improvement in zip(bars, improvements):
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height + 0.5,
               f'{improvement:.1f}%', ha='center', va='bottom')
    
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

## 对比分析

In [None]:
# 统计显著性测试
from scipy import stats

if 'simulation_data' in locals():
    print("统计显著性分析:")
    print("=" * 50)
    
    # 生成模拟的多次实验数据
    n_experiments = 30
    experiment_data = {}
    
    for method in methods:
        experiment_data[method] = {}
        for metric in metrics:
            # 基于原始值生成正态分布的实验数据
            mean_val = simulation_data[method][metric]
            std_val = 0.02  # 2%的标准差
            experiment_data[method][metric] = np.random.normal(mean_val, std_val, n_experiments)
    
    # 进行t检验
    baseline_method = 'Baseline'
    
    for method in methods[1:]:
        print(f"\n{method} vs {baseline_method}:")
        for metric in metrics:
            baseline_scores = experiment_data[baseline_method][metric]
            method_scores = experiment_data[method][metric]
            
            # 进行配对t检验
            t_stat, p_value = stats.ttest_rel(method_scores, baseline_scores)
            
            # 计算效应大小 (Cohen's d)
            pooled_std = np.sqrt((np.var(method_scores) + np.var(baseline_scores)) / 2)
            cohens_d = (np.mean(method_scores) - np.mean(baseline_scores)) / pooled_std
            
            significance = "***" if p_value < 0.001 else "**" if p_value < 0.01 else "*" if p_value < 0.05 else "ns"
            
            print(f"  {metric}: t={t_stat:.3f}, p={p_value:.4f} {significance}, d={cohens_d:.3f}")
    
    # 创建箱线图显示分布
    fig, axes = plt.subplots(2, 3, figsize=(18, 10))
    axes = axes.flatten()
    
    for i, metric in enumerate(metrics):
        ax = axes[i]
        
        data_to_plot = [experiment_data[method][metric] for method in methods]
        box_plot = ax.boxplot(data_to_plot, labels=methods, patch_artist=True)
        
        # 设置颜色
        colors = ['lightblue', 'lightgreen', 'lightcoral', 'lightyellow']
        for patch, color in zip(box_plot['boxes'], colors):
            patch.set_facecolor(color)
        
        ax.set_title(f'{metric.upper()} 分布')
        ax.set_ylabel('性能值')
        ax.grid(True, alpha=0.3)
        
        # 旋转x轴标签
        plt.setp(ax.get_xticklabels(), rotation=45)
    
    # 隐藏多余的子图
    if len(metrics) < len(axes):
        axes[-1].set_visible(False)
    
    plt.tight_layout()
    plt.show()

## 错误分析

In [None]:
# 模拟错误分析数据
np.random.seed(42)

# 生成模拟的预测结果
n_samples = 1000
y_true = np.random.binomial(1, 0.3, n_samples)  # 30%的正样本

# 不同方法的预测结果
predictions = {}
for method in methods:
    # 基于方法性能生成预测
    accuracy = simulation_data[method]['accuracy']
    # 生成预测概率
    pred_proba = np.random.beta(2, 2, n_samples)
    # 调整预测概率以匹配准确率
    pred_proba = pred_proba * accuracy + np.random.normal(0, 0.1, n_samples)
    pred_proba = np.clip(pred_proba, 0, 1)
    
    # 生成二分类预测
    y_pred = (pred_proba > 0.5).astype(int)
    
    predictions[method] = {
        'y_pred': y_pred,
        'y_proba': pred_proba
    }

# 错误分析
print("错误分析报告:")
print("=" * 50)

for method in methods:
    y_pred = predictions[method]['y_pred']
    
    # 计算混淆矩阵
    cm = confusion_matrix(y_true, y_pred)
    tn, fp, fn, tp = cm.ravel()
    
    # 计算错误类型
    false_positive_rate = fp / (fp + tn)
    false_negative_rate = fn / (fn + tp)
    
    print(f"\n{method}:")
    print(f"  假阳性率 (FPR): {false_positive_rate:.3f}")
    print(f"  假阴性率 (FNR): {false_negative_rate:.3f}")
    print(f"  假阳性数量: {fp}")
    print(f"  假阴性数量: {fn}")

# 可视化混淆矩阵
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.flatten()

for i, method in enumerate(methods):
    ax = axes[i]
    y_pred = predictions[method]['y_pred']
    
    cm = confusion_matrix(y_true, y_pred)
    
    # 绘制混淆矩阵热力图
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax)
    ax.set_title(f'{method} - 混淆矩阵')
    ax.set_xlabel('预测标签')
    ax.set_ylabel('真实标签')
    ax.set_xticklabels(['负类', '正类'])
    ax.set_yticklabels(['负类', '正类'])

plt.tight_layout()
plt.show()

# ROC曲线对比
from sklearn.metrics import roc_curve, auc

plt.figure(figsize=(10, 8))

for method in methods:
    y_proba = predictions[method]['y_proba']
    fpr, tpr, _ = roc_curve(y_true, y_proba)
    roc_auc = auc(fpr, tpr)
    
    plt.plot(fpr, tpr, linewidth=2, label=f'{method} (AUC = {roc_auc:.3f})')

plt.plot([0, 1], [0, 1], 'k--', linewidth=1, label='随机分类器')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('假阳性率 (FPR)')
plt.ylabel('真阳性率 (TPR)')
plt.title('ROC曲线对比')
plt.legend(loc='lower right')
plt.grid(True, alpha=0.3)
plt.show()

## 参数敏感性分析

In [None]:
# 参数敏感性分析
print("参数敏感性分析:")
print("=" * 50)

# 模拟不同参数设置下的性能
parameters = {
    'similarity_threshold': np.arange(0.7, 0.95, 0.05),
    'num_variants': range(5, 21, 5),
    'detection_threshold': np.arange(0.3, 0.8, 0.1),
    'ensemble_weights': [(0.2, 0.3, 0.5), (0.3, 0.3, 0.4), (0.4, 0.3, 0.3), (0.5, 0.25, 0.25)]
}

# 为每个参数生成敏感性数据
sensitivity_results = {}

for param_name, param_values in parameters.items():
    if param_name == 'ensemble_weights':
        continue  # 跳过复杂参数
    
    performance_values = []
    
    for param_val in param_values:
        # 模拟参数对性能的影响
        if param_name == 'similarity_threshold':
            # 相似度阈值：过高或过低都不好
            optimal = 0.85
            performance = 0.8 - 0.3 * abs(param_val - optimal)
        elif param_name == 'num_variants':
            # 变体数量：更多通常更好，但有边际递减效应
            performance = 0.7 + 0.02 * param_val - 0.0005 * param_val**2
        elif param_name == 'detection_threshold':
            # 检测阈值：需要平衡精确率和召回率
            optimal = 0.5
            performance = 0.75 - 0.2 * abs(param_val - optimal)
        
        # 添加噪声
        performance += np.random.normal(0, 0.02)
        performance = np.clip(performance, 0, 1)
        performance_values.append(performance)
    
    sensitivity_results[param_name] = {
        'values': list(param_values),
        'performance': performance_values
    }

# 可视化参数敏感性
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

param_names = list(sensitivity_results.keys())
param_labels = {
    'similarity_threshold': '相似度阈值',
    'num_variants': '变体数量',
    'detection_threshold': '检测阈值'
}

for i, param_name in enumerate(param_names):
    ax = axes[i]
    
    values = sensitivity_results[param_name]['values']
    performance = sensitivity_results[param_name]['performance']
    
    ax.plot(values, performance, 'o-', linewidth=2, markersize=6)
    ax.set_xlabel(param_labels[param_name])
    ax.set_ylabel('F1分数')
    ax.set_title(f'{param_labels[param_name]}敏感性分析')
    ax.grid(True, alpha=0.3)
    
    # 标记最优点
    best_idx = np.argmax(performance)
    best_val = values[best_idx]
    best_perf = performance[best_idx]
    
    ax.scatter([best_val], [best_perf], color='red', s=100, zorder=5)
    ax.annotate(f'最优: {best_val}', 
               xy=(best_val, best_perf), 
               xytext=(10, 10), 
               textcoords='offset points',
               bbox=dict(boxstyle='round,pad=0.3', facecolor='yellow', alpha=0.7),
               arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0'))

plt.tight_layout()
plt.show()

# 输出最优参数
print("\n推荐的最优参数设置:")
for param_name in param_names:
    values = sensitivity_results[param_name]['values']
    performance = sensitivity_results[param_name]['performance']
    best_idx = np.argmax(performance)
    best_val = values[best_idx]
    best_perf = performance[best_idx]
    
    print(f"- {param_labels[param_name]}: {best_val} (F1分数: {best_perf:.3f})")

## 可视化报告

In [None]:
# 生成综合报告
print("实验分析综合报告")
print("=" * 60)

if 'simulation_data' in locals():
    # 1. 最佳方法识别
    method_scores = {}
    for method in methods:
        # 计算综合得分（所有指标的平均值）
        avg_score = np.mean([simulation_data[method][metric] for metric in metrics])
        method_scores[method] = avg_score
    
    best_method = max(method_scores, key=method_scores.get)
    best_score = method_scores[best_method]
    
    print(f"1. 最佳方法: {best_method}")
    print(f"   综合得分: {best_score:.3f}")
    
    # 2. 各方法排名
    print(f"\n2. 方法排名:")
    sorted_methods = sorted(method_scores.items(), key=lambda x: x[1], reverse=True)
    for i, (method, score) in enumerate(sorted_methods, 1):
        print(f"   {i}. {method}: {score:.3f}")
    
    # 3. 关键发现
    print(f"\n3. 关键发现:")
    
    # 找出改进最大的指标
    max_improvements = {}
    for metric in metrics:
        baseline_val = simulation_data['Baseline'][metric]
        best_val = max(simulation_data[method][metric] for method in methods)
        improvement = (best_val - baseline_val) / baseline_val * 100
        max_improvements[metric] = improvement
    
    best_improved_metric = max(max_improvements, key=max_improvements.get)
    best_improvement = max_improvements[best_improved_metric]
    
    print(f"   - 最大改进指标: {best_improved_metric} (+{best_improvement:.1f}%)")
    
    # 找出最稳定的方法（方差最小）
    method_variances = {}
    for method in methods:
        values = [simulation_data[method][metric] for metric in metrics]
        variance = np.var(values)
        method_variances[method] = variance
    
    most_stable = min(method_variances, key=method_variances.get)
    print(f"   - 最稳定方法: {most_stable} (方差: {method_variances[most_stable]:.4f})")
    
    # 4. 建议
    print(f"\n4. 建议:")
    print(f"   - 推荐使用 {best_method} 方法以获得最佳整体性能")
    print(f"   - 重点关注 {best_improved_metric} 指标的进一步优化")
    print(f"   - 考虑 {most_stable} 方法的稳定性优势")
    
    # 5. 参数建议
    print(f"\n5. 参数建议:")
    for param_name in param_names:
        values = sensitivity_results[param_name]['values']
        performance = sensitivity_results[param_name]['performance']
        best_idx = np.argmax(performance)
        best_val = values[best_idx]
        print(f"   - {param_labels[param_name]}: {best_val}")

# 保存报告
report_path = '../results/experiment_analysis_report.txt'
os.makedirs(os.path.dirname(report_path), exist_ok=True)

with open(report_path, 'w', encoding='utf-8') as f:
    f.write("多模态检测一致性实验分析报告\n")
    f.write("=" * 60 + "\n\n")
    
    if 'simulation_data' in locals():
        f.write(f"最佳方法: {best_method}\n")
        f.write(f"综合得分: {best_score:.3f}\n\n")
        
        f.write("详细性能数据:\n")
        for method in methods:
            f.write(f"\n{method}:\n")
            for metric in metrics:
                value = simulation_data[method][metric]
                f.write(f"  {metric}: {value:.3f}\n")

print(f"\n分析报告已保存到: {report_path}")

## 总结

本notebook提供了全面的实验分析框架，包括：

1. **性能评估**: 多维度评估各种方法的性能
2. **统计分析**: 使用统计检验验证结果的显著性
3. **错误分析**: 深入分析不同类型的错误
4. **参数优化**: 分析关键参数对性能的影响
5. **可视化报告**: 生成直观的图表和综合报告

### 使用建议

1. 将真实的实验数据替换模拟数据
2. 根据具体需求调整评估指标
3. 扩展参数敏感性分析的范围
4. 添加更多的可视化类型
5. 定期更新分析以跟踪改进进展