### 误差情况分析

In [3]:
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
import json
from pathlib import Path
import seaborn as sns
from scipy.spatial import KDTree
import os
import matplotlib.pyplot as plt
import platform
# 设置Matplotlib的字体
system = platform.system()

if system == 'Windows':
    # Windows系统字体设置
    try:
        # 优先使用微软雅黑
        plt.rcParams['font.family'] = ['Microsoft YaHei']
    except:
        # 备选方案：宋体
        plt.rcParams['font.family'] = ['SimSun']
elif system == 'Linux':
    # Linux系统字体设置
    plt.rcParams['font.family'] = ['WenQuanYi Micro Hei']
elif system == 'Darwin':
    # macOS系统字体设置
    plt.rcParams['font.family'] = ['Arial Unicode MS']

plt.rcParams['axes.unicode_minus'] = False  # 解决负号显示问题

def load_inference_results(inference_dir):
    """加载推理结果文件夹中的所有json文件"""
    all_results = {
        'bboxes': [],
        'scores': [],
        'labels': [],
        'image_ids': []
    }
    
    # 创建图片文件名到ID的映射
    filename_to_id = {}
    
    print(f"开始加载推理结果，目录: {inference_dir}")
    json_files = sorted(list(Path(inference_dir).glob('*.json')))
    
    for json_file in json_files:
        with open(json_file, 'r') as f:
            result = json.load(f)
            # 获取原始图片名称（去掉_result后缀）
            image_name = json_file.stem
            if image_name.endswith('_result'):
                image_name = image_name[:-7]
            
            # 将每个框的结果添加到列表中
            for i, bbox in enumerate(result['bboxes']):
                all_results['bboxes'].append(bbox)
                all_results['scores'].append(result['scores'][i])
                all_results['labels'].append(result['labels'][i])
                all_results['image_ids'].append(image_name)
    
    # print(f"总共加载了 {len(all_results['bboxes'])} 个边界框")
    return all_results

def analyze_bbox_errors(inference_dir, gt_file, iou_threshold=0.5):
    """分析边界框误差"""
    # 读取预测结果和真实标签
    print("开始加载数据...")
    pred_data = load_inference_results(inference_dir)
    
    print(f"加载真实标签文件: {gt_file}")
    with open(gt_file, 'r') as f:
        gt_data = json.load(f)
    
    # 创建图片ID到文件名的映射
    image_id_to_filename = {
        img['id']: Path(img['file_name']).stem  # 去掉.jpg后缀
        for img in gt_data['images']
    }
    
    print(f"真实标签中的标注数量: {len(gt_data['annotations'])}")
    
    # 存储分析结果
    results = []
    
    # 按图片ID组织数据
    pred_by_image = defaultdict(list)
    gt_by_image = defaultdict(list)
    
    # 组织预测数据
    print("组织预测数据...")
    for bbox, score, label, img_id in zip(
        pred_data['bboxes'], 
        pred_data['scores'], 
        pred_data['labels'],
        pred_data['image_ids']
    ):
        pred_by_image[img_id].append({
            'bbox': bbox,
            'score': score,
            'label': label
        })
    
    # 组织真实标签数据
    print("组织真实标签数据...")
    for ann in gt_data['annotations']:
        img_id = ann['image_id']
        # 获取对应的文件名（不含后缀）
        filename = image_id_to_filename.get(img_id)
        if filename:
            gt_by_image[filename].append(ann['bbox'])
    
    # 打印数据统计
    print(f"\n预测数据中的图片数量: {len(pred_by_image)}")
    print(f"真实标签中的图片数量: {len(gt_by_image)}")
    print("\n部分ID对应关系:")
    for img_name, pred_boxes in list(pred_by_image.items())[:5]:
        print(f"图片名称: {img_name}")
        print(f"预测框数量: {len(pred_boxes)}")
        print(f"真实框数量: {len(gt_by_image[img_name]) if img_name in gt_by_image else 0}")
    
    # 分析每张图片
    processed_images = 0
    for img_name in gt_by_image.keys():
        gt_boxes = gt_by_image[img_name]
        pred_boxes = [p['bbox'] for p in pred_by_image[img_name]] if img_name in pred_by_image else []
        
        if len(gt_boxes) == 0 or len(pred_boxes) == 0:
            print(f"跳过图片 {img_name}，真实框数量: {len(gt_boxes)}, 预测框数量: {len(pred_boxes)}")
            continue
        
        processed_images += 1
        for i, gt_box in enumerate(gt_boxes):
            local_density, avg_size, size_variance = calculate_local_features(gt_boxes, i)
            
            # 找到最匹配的预测框
            max_iou = 0
            for pred_box in pred_boxes:
                iou = calculate_iou(gt_box, pred_box)
                max_iou = max(max_iou, iou)
            
            # 计算误差强度
            error_intensity = (1 - max_iou) / max_iou if max_iou > 0 else float('inf')
            
            # 记录结果
            results.append({
                'error': 1 if max_iou < iou_threshold else 0,
                'error_intensity': error_intensity,
                'local_density': local_density,
                'avg_size': avg_size,
                'size_variance': size_variance
            })
    
    print(f"成功处理的图片数量: {processed_images}")
    print(f"分析完成，结果数量: {len(results)}")
    return results

def calculate_iou(box1, box2):
    """计算两个边界框的IoU"""
    # 确保输入格式一致
    if len(box1) == 4:
        x1, y1, w1, h1 = box1
        box1 = [x1, y1, x1+w1, y1+h1]
    if len(box2) == 4:
        x2, y2, w2, h2 = box2
        box2 = [x2, y2, x2+w2, y2+h2]
    
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    
    intersection = max(0, x2 - x1) * max(0, y2 - y1)
    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union = area1 + area2 - intersection
    
    return intersection / union if union > 0 else 0

def calculate_local_features(boxes, target_idx, k=5):
    """计算局部特征（密度、平均尺寸、尺寸方差）"""
    if len(boxes) < 2:
        return 0, 0, 0
    
    # 转换框格式：[x,y,w,h] -> [x1,y1,x2,y2]
    boxes_xyxy = []
    for box in boxes:
        x, y, w, h = box
        boxes_xyxy.append([x, y, x+w, y+h])
    boxes = np.array(boxes_xyxy)
    
    # 计算所有框的中心点
    centers = np.array([[
        (box[0] + box[2])/2,
        (box[1] + box[3])/2
    ] for box in boxes])
    
    # 构建KD树用于找最近邻
    tree = KDTree(centers)
    distances, indices = tree.query(centers[target_idx], k=min(k+1, len(boxes)))
    
    # 移除自身
    indices = indices[1:]
    distances = distances[1:]
    
    # 计算局部密度（使用平均距离的倒数）
    local_density = 1 / np.mean(distances) if len(distances) > 0 else 0
    
    # 计算局部框的尺寸
    local_boxes = [boxes[i] for i in indices]
    sizes = [(box[2]-box[0])*(box[3]-box[1]) for box in local_boxes]
    
    # 计算平均尺寸和尺寸方差
    avg_size = np.mean(sizes) if sizes else 0
    size_variance = np.var(sizes) if len(sizes) > 1 else 0
    
    return local_density, avg_size, size_variance



# def plot_analysis(results):
#     """绘制分析图表"""
#     # 过滤掉无限值
#     filtered_results = [r for r in results if r['error_intensity'] != float('inf')]
    
#     if not filtered_results:
#         print("No available results for plotting.")
#         return
    
#     # 转换为numpy数组
#     data = np.array([[
#         r['error'],
#         r['error_intensity'],
#         r['local_density'],
#         r['avg_size'],
#         r['size_variance']
#     ] for r in filtered_results])
    
#     # 定义标题和标签（英文）
#     titles_en = [
#         'Relationship between Bounding Box Error Count\nand Local Density',
#         'Relationship between Bounding Box Error Count\nand Average Size',
#         'Relationship between Bounding Box Error Count\nand Size Variance',
#         'Relationship between Bounding Box Error Intensity\nand Local Density',
#         'Relationship between Bounding Box Error Intensity\nand Average Size',
#         'Relationship between Bounding Box Error Intensity\nand Size Variance'
#     ]
    
#     x_labels_en = [
#         'Local Density',
#         'Average Size',
#         'Size Variance',
#         'Local Density',
#         'Average Size',
#         'Size Variance'
#     ]
    
#     y_labels_en = [
#         'Error Count',
#         'Error Count',
#         'Error Count',
#         'Error Intensity',
#         'Error Intensity',
#         'Error Intensity'
#     ]
    
#     # 设置全局样式
#     plt.rcParams.update({
#         'figure.figsize': (10, 8),
#         'figure.dpi': 100,
#         'axes.grid': True,
#         'grid.alpha': 0.3,
#         'grid.linestyle': '--',
#         'font.size': 12,
#         'axes.labelsize': 12,
#         'axes.titlesize': 14,
#         'xtick.labelsize': 10,
#         'ytick.labelsize': 10
#     })
    
#     # 创建6个独立的图表
#     for i in range(6):
#         # 创建图形和轴对象
#         fig, ax = plt.subplots()
        
#         # 确定x和y数据
#         x_data = data[:, 2 + (i % 3)]  # 2, 3, 4 循环
#         y_data = data[:, i // 3]  # 0 for first three, 1 for last three
        
#         # 绘制散点图
#         scatter = ax.scatter(x_data, y_data, 
#                            alpha=0.5, 
#                            c='#2878B5',  # 专业的蓝色
#                            s=50,  # 点的大小
#                            edgecolors='white',  # 点的边缘颜色
#                            linewidth=0.5)  # 点的边缘宽度
        
#         # 添加趋势线
#         z = np.polyfit(x_data, y_data, 1)
#         p = np.poly1d(z)
#         x_range = np.linspace(min(x_data), max(x_data), 100)
#         ax.plot(x_range, p(x_range), 
#                 color='#C82423',  # 专业的红色
#                 linestyle='--', 
#                 alpha=0.8, 
#                 linewidth=2)
        
#         # 设置标题和标签
#         ax.set_title(titles_en[i], 
#                     pad=20,  # 标题与图表的间距
#                     fontweight='bold')
#         ax.set_xlabel(x_labels_en[i])
#         ax.set_ylabel(y_labels_en[i])
        
#         # 设置背景网格
#         ax.grid(True, linestyle='--', alpha=0.3)
        
#         # 设置图表边框
#         for spine in ax.spines.values():
#             spine.set_linewidth(1.5)
        
#         # 调整布局
#         plt.tight_layout()
        
#         # 保存图表
#         plt.savefig(f'bbox_error_analysis_{i+3}.png', 
#                    dpi=300, 
#                    bbox_inches='tight',
#                    facecolor='white',
#                    edgecolor='none')
#         plt.close()
    
#     print("Analysis plots have been saved as separate files.")


import numpy as np
import matplotlib.pyplot as plt

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# 设置 seaborn 风格，它能提供更美观的默认绘图风格
sns.set(style="whitegrid", font_scale=1.2)

def plot_analysis(results):
    """绘制分析图表"""
    # 过滤掉无限值
    filtered_results = [r for r in results if r['error_intensity'] != float('inf')]

    if not filtered_results:
        print("No available results for plotting.")
        return

    # 转换为 numpy 数组
    data = np.array([[
        r['error'],
        r['error_intensity'],
        r['local_density'],
        r['avg_size'],
        r['size_variance']
    ] for r in filtered_results])

    # 定义标题和标签（英文）
    # titles_en = [
    #     'Correlation between BBox Annotation Error Count and Partial Density',  # 边界框标注误差数量与边界框局部密度间的关系
    #     'Correlation between BBox Annotation Error Count and Partial Avg Size',  # 边界框标注误差数量与边界框局部平均尺寸间的关系
    #     'Correlation between BBox Annotation Error Count and Partial Size Var',  # 边界框标注误差数量与边界框局部尺寸方差间的关系
    #     'Correlation between BBox Annotation Error Intensity and Partial Density',  # 边界框标注误差强度与边界框局部密度间的关系
    #     'Correlation between BBox Annotation Error Intensity and Partial Avg Size',  # 边界框标注误差强度与边界框局部平均尺寸间的关系
    #     'Correlation between BBox Annotation Error Intensity and Partial Size Var'  # 边界框标注误差强度与边界框局部尺寸方差间的关系
    # ]

    # x_labels_en = [
    #     'BBox Partial Density',  # 边界框局部密度
    #     'BBox Partial Avg Size',  # 边界框局部平均尺寸
    #     'BBox Partial Size Var',  # 边界框局部尺寸方差
    #     'BBox Partial Density',  # 边界框局部密度
    #     'BBox Partial Avg Size',  # 边界框局部平均尺寸
    #     'BBox Partial Size Var'  # 边界框局部尺寸方差
    # ]

    # y_labels_en = [
    #     'BBox Annotation Error Count',  # 边界框标注误差数量
    #     'BBox Annotation Error Count',  # 边界框标注误差数量
    #     'BBox Annotation Error Count',  # 边界框标注误差数量
    #     'BBox Annotation Error Intensity',  # 边界框标注误差强度
    #     'BBox Annotation Error Intensity',  # 边界框标注误差强度
    #     'BBox Annotation Error Intensity'  # 边界框标注误差强度
    # ]
    titles_cn = [
        '边界框标注误差数量与边界框局部密度间的关系',
        '边界框标注误差数量与边界框局部平均尺寸间的关系',
        '边界框标注误差数量与边界框局部尺寸方差间的关系',
        '边界框标注误差强度与边界框局部密度间的关系',
        '边界框标注误差强度与边界框局部平均尺寸间的关系',
        '边界框标注误差强度与边界框局部尺寸方差间的关系'
    ]

    x_labels_cn = [
        '边界框局部密度',
        '边界框局部平均尺寸',
        '边界框局部尺寸方差',
        '边界框局部密度',
        '边界框局部平均尺寸',
        '边界框局部尺寸方差'
    ]

    y_labels_cn = [
        '边界框标注误差数量',
        '边界框标注误差数量',
        '边界框标注误差数量',
        '边界框标注误差强度',
        '边界框标注误差强度',
        '边界框标注误差强度'
    ]

    # 设置全局样式
    plt.rcParams.update({
        'figure.figsize': (10, 8),
        'figure.dpi': 300,  # 提高图片分辨率
        'axes.edgecolor': 'k',  # 坐标轴颜色为黑色
        'axes.linewidth': 1.2,  # 坐标轴线条宽度
        'xtick.color': 'k',  # x 轴刻度颜色为黑色
        'ytick.color': 'k',  # y 轴刻度颜色为黑色
        'font.family': 'serif',  # 使用衬线字体，更具学术风格
        'legend.frameon': True,  # 显示图例边框
        'legend.framealpha': 1,  # 图例边框不透明
        'legend.edgecolor': 'k',  # 图例边框颜色为黑色
        'legend.fontsize': 10  # 图例字体大小
    })

    # 创建 6 个独立的图表
    for i in range(6):
        # 创建图形和轴对象
        fig, ax = plt.subplots()

        # 确定 x 和 y 数据
        x_data = data[:, 2 + (i % 3)]  # 2, 3, 4 循环
        y_data = data[:, i // 3]  # 0 for first three, 1 for last three

        # 绘制散点图
        scatter = ax.scatter(x_data, y_data,
                             alpha=0.5,
                             c='#2878B5',  # 专业的蓝色
                             s=50,  # 点的大小
                             edgecolors='white',  # 点的边缘颜色
                             linewidth=0.5,
                             label='Data Points')  # 添加图例标签

        # 添加趋势线
        z = np.polyfit(x_data, y_data, 1)
        p = np.poly1d(z)
        x_range = np.linspace(min(x_data), max(x_data), 100)
        ax.plot(x_range, p(x_range),
                color='#C82423',  # 专业的红色
                linestyle='--',
                alpha=0.8,
                linewidth=2,
                label='Trend Line')  # 添加图例标签

        # 设置标题和标签
        ax.set_title(titles_cn[i],
                     pad=20,  # 标题与图表的间距
                     fontweight='bold')
        ax.set_xlabel(x_labels_cn[i])
        ax.set_ylabel(y_labels_cn[i])

        # 设置背景网格
        ax.grid(True, linestyle='--', alpha=0.3)

        # 设置图表边框
        for spine in ax.spines.values():
            spine.set_linewidth(1.5)

        # 添加图例
        ax.legend()

        # 调整布局
        plt.tight_layout()

        # 保存图表
        plt.savefig(f'bbox_error_analysis_{i + 3}.png',
                    dpi=300,
                    bbox_inches='tight',
                    facecolor='white',
                    edgecolor='none')
        plt.close()


def main():
    # 指定文件路径
    inference_dir = '217/20250217_124341/results'  # 推理结果文件夹
    gt_file = 'sampled_dataset/annotations.json'  # 真实标签文件
    
    # 分析数据
    results = analyze_bbox_errors(inference_dir, gt_file)
    
    # 绘制图表
    plot_analysis(results)

if __name__ == '__main__':
    main()

开始加载数据...
开始加载推理结果，目录: 217/20250217_124341/results
加载真实标签文件: sampled_dataset/annotations.json
真实标签中的标注数量: 8303
组织预测数据...
组织真实标签数据...

预测数据中的图片数量: 100
真实标签中的图片数量: 99

部分ID对应关系:
图片名称: 0349720a-7d75-4dd5-95e6-b96d551d88ed
预测框数量: 300
真实框数量: 123
图片名称: 037b8242-bba6-4ae4-a103-f5c3ee3afa41
预测框数量: 300
真实框数量: 68
图片名称: 04cc6e9d-646d-4720-83c5-7b330c0bf604
预测框数量: 300
真实框数量: 147
图片名称: 06e1245b-9bac-4aef-9b74-eb8ade83e0ce
预测框数量: 300
真实框数量: 126
图片名称: 08fc21fd-6b29-4092-839e-4899d147682a
预测框数量: 300
真实框数量: 120
成功处理的图片数量: 99
分析完成，结果数量: 8303


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(f'bbox_error_analysis_{i + 3}.png',
  plt.savefig(f'bbox_error_analysis_{i + 3}.png',
  plt.savefig(f'bbox_error_analysis_{i + 3}.png',
  plt.savefig(f'bbox_error_analysis_{i + 3}.png',
  plt.savefig(f'bbox_error_analysis_{i + 3}.png',
  plt.savefig(f'bbox_error_analysis_{i + 3}.png',
  plt.savefig(f'bbox_error_analysis_{i + 3}.png',
  plt.savefig(f'bbox_error_analysis_{i + 3}.png',
  plt.savefig(f'bbox_error_analysis_{i + 3}.png',
  plt.savefig(f'bbox_error_analysis_{i + 3}.png',
  plt.savefig(f'bbox_error_analysis_{i + 3}.png',
  plt.savefig(f'bbox_error_analysis_{i + 3}.png',
  plt.savefig(f'bbox_e

In [None]:
    # 定义标题和标签（英文）
    titles_en = [
        'Relationship between Bounding Box Error Count\nand Local Density',
        'Relationship between Bounding Box Error Count\nand Average Size',
        'Relationship between Bounding Box Error Count\nand Size Variance',
        'Relationship between Bounding Box Error Intensity\nand Local Density',
        'Relationship between Bounding Box Error Intensity\nand Average Size',
        'Relationship between Bounding Box Error Intensity\nand Size Variance'
    ]
    
    # 中文标题（已注释）
    titles_cn = [
        # '边界框标注误差数量与局部密度关系',
        # '边界框标注误差数量与局部平均尺寸关系',
        # '边界框标注误差数量与局部尺寸方差关系',
        # '边界框标注误差强度与局部密度关系',
        # '边界框标注误差强度与局部平均尺寸关系',
        # '边界框标注误差强度与局部尺寸方差关系'
    ]
    
    x_labels_en = [
        'Local Density',
        'Average Size',
        'Size Variance',
        'Local Density',
        'Average Size',
        'Size Variance'
    ]
    
    # 中文x轴标签（已注释）
    x_labels_cn = [
        # '局部密度',
        # '局部平均尺寸',
        # '局部尺寸方差',
        # '局部密度',
        # '局部平均尺寸',
        # '局部尺寸方差'
    ]
    
    y_labels_en = [
        'Error Count',
        'Error Count',
        'Error Count',
        'Error Intensity',
        'Error Intensity',
        'Error Intensity'
    ]
    
    # 中文y轴标签（已注释）
    y_labels_cn = [
        # '误差数量',
        # '误差数量',
        # '误差数量',
        # '误差强度',
        # '误差强度',
        # '误差强度'
    ]

### 绘制主动学习各轮teeacher模型置信度分布

In [2]:
import json
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import seaborn as sns

def analyze_confidence_distribution(base_path, rounds=16):
    """分析每轮主动学习中的置信度分布"""
    
    # 设置图表样式
    plt.rcParams.update({
        'figure.figsize': (20, 20),
        'font.size': 12,
        'axes.labelsize': 14,
        'axes.titlesize': 16
    })
    
    # 创建子图网格
    fig, axes = plt.subplots(4, 4, figsize=(20, 20))
    axes = axes.flatten()
    
    for round_idx in range(1, rounds + 1):
        print(f"Processing round {round_idx}...")
        
        # 构建基础路径
        round_path = Path(base_path) / f'round_{round_idx}' / 'teacher_outputs'
        
        # 获取时间戳文件夹（按时间排序）
        timestamp_dirs = sorted([d for d in round_path.glob('2*') if d.is_dir()])
        if not timestamp_dirs:
            print(f"No timestamp directory found for round {round_idx}")
            continue
            
        # 使用最新的时间戳文件夹
        latest_dir = timestamp_dirs[-1]
        print(f"Using timestamp directory: {latest_dir}")
        
        # 读取uncertainty ranking
        uncertainty_file = latest_dir / 'uncertainty' / 'uncertainty_ranking.json'
        if not uncertainty_file.exists():
            print(f"Uncertainty ranking file not found: {uncertainty_file}")
            continue
            
        with open(uncertainty_file, 'r') as f:
            uncertainty_data = json.load(f)
        
        # 从ranking列表中获取前100个图片路径
        if 'ranking' in uncertainty_data:
            image_paths = uncertainty_data['ranking'][:100]
            # 从路径中提取图片名称（不包含扩展名）
            image_ids = [Path(path).stem for path in image_paths]
        else:
            print(f"No ranking data found in round {round_idx}")
            continue
        
        # 收集所有置信度分数
        all_scores = []
        results_dir = latest_dir / 'results'
        
        for img_id in image_ids:
            result_file = results_dir / f'{img_id}_result.json'
            if result_file.exists():
                with open(result_file, 'r') as f:
                    result_data = json.load(f)
                    all_scores.extend(result_data['scores'])
            else:
                print(f"Result file not found: {result_file}")
        
        if not all_scores:
            print(f"No confidence scores found for round {round_idx}")
            continue
            
        # 绘制直方图
        ax = axes[round_idx - 1]
        bins = np.arange(0, 1.05, 0.05)
        n, bins, patches = ax.hist(all_scores, bins=bins, alpha=0.7, 
                                 color='#2878B5', edgecolor='black')
        
        # # 添加数值标签
        # for i in range(len(n)):
        #     if n[i] > 0:
        #         ax.text(bins[i] + 0.025, n[i], int(n[i]), 
        #                ha='center', va='bottom')
        
        ax.set_title(f'Round {round_idx}')
        ax.set_xlabel('Confidence Score')
        ax.set_ylabel('Frequency')
        ax.grid(True, alpha=0.3)
        
        # 添加均值和中位数的垂直线
        mean_score = np.mean(all_scores)
        median_score = np.median(all_scores)
        ax.axvline(mean_score, color='#C82423', linestyle='--', alpha=0.8, 
                  label=f'Mean: {mean_score:.3f}')
        ax.axvline(median_score, color='#28A428', linestyle='--', alpha=0.8, 
                  label=f'Median: {median_score:.3f}')
        ax.legend()
        
        # 设置x轴范围和刻度
        ax.set_xlim(-0.05, 1.05)
        ax.set_xticks(np.arange(0, 1.1, 0.1))
    
    # 调整布局
    plt.suptitle('Confidence Score Distribution in Active Learning Rounds\n(Top 100 Uncertain Samples)', 
                 fontsize=20, y=1.02)
    plt.tight_layout()
    
    # 保存图表
    plt.savefig('al_entropy_confidence_distribution.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    print("Confidence distribution plot has been saved.")

# 使用示例
base_path = 'work_dirs/faster-rcnn/al_entropy'
analyze_confidence_distribution(base_path)

Processing round 1...
Using timestamp directory: work_dirs\faster-rcnn\al_entropy\round_1\teacher_outputs\20250216_204903
Processing round 2...
Using timestamp directory: work_dirs\faster-rcnn\al_entropy\round_2\teacher_outputs\20250216_211806
Processing round 3...
Using timestamp directory: work_dirs\faster-rcnn\al_entropy\round_3\teacher_outputs\20250216_214519
Processing round 4...
Using timestamp directory: work_dirs\faster-rcnn\al_entropy\round_4\teacher_outputs\20250216_221128
Processing round 5...
Using timestamp directory: work_dirs\faster-rcnn\al_entropy\round_5\teacher_outputs\20250216_223547
Processing round 6...
Using timestamp directory: work_dirs\faster-rcnn\al_entropy\round_6\teacher_outputs\20250216_225823
Processing round 7...
Using timestamp directory: work_dirs\faster-rcnn\al_entropy\round_7\teacher_outputs\20250216_231918
Processing round 8...
Using timestamp directory: work_dirs\faster-rcnn\al_entropy\round_8\teacher_outputs\20250216_233843
Processing round 9...
Us