In [2]:
import json
import matplotlib.pyplot as plt
import numpy as np


In [3]:
def load_coco_data(json_file):
    """从 JSON 文件中加载 COCO 数据。"""
    with open(json_file, 'r') as file:
        data = json.load(file)
    return data

def analyze_image_resolutions(data, scatter_size_factor=5, font_size=16, tick_size=14, color='blue', save_path=None, dpi=300):
    """分析图像分辨率并进行可视化。

    参数：
    - scatter_size_factor (int): 控制散点大小的因子。
    - font_size (int): 标题和标签的字体大小。
    - tick_size (int): 刻度标签的字体大小。
    - color (str): 散点的颜色。
    - save_path (str): 用于保存图形的文件路径。如果为 None，则不保存。
    - dpi (int): 保存图像的分辨率，默认是 300 dpi。
    """
    image_dimensions = {}
    for image in data['images']:
        image_dimensions[image['id']] = (image['width'], image['height'])
    
    resolution_count = {}
    for dimensions in image_dimensions.values():
        resolution_count[dimensions] = resolution_count.get(dimensions, 0) + 1
    
    total_images = sum(resolution_count.values())
    
    fig, ax = plt.subplots(figsize=(12, 9))
    for dimensions, count in resolution_count.items():
        ax.scatter(dimensions[0], dimensions[1], s=count*scatter_size_factor, color=color)
    
    ax.set_xlabel('宽度', fontsize=font_size)
    ax.set_ylabel('高度', fontsize=font_size)
    ax.set_title('真实图像分辨率分析', fontsize=font_size, fontweight='bold')
    plt.figtext(0.99, 0.01, f'总图像数: {total_images}', horizontalalignment='right', fontsize=12, fontweight='bold')

    ax.tick_params(axis='both', which='major', labelsize=tick_size)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(0.7)
    ax.spines['bottom'].set_linewidth(0.7)
    
    if save_path:
        plt.savefig(save_path, dpi=dpi)
    
    plt.show()

# 使用示例
color_map = {
    0: '#e9e99d',  # 类别ID 1 的颜色
    1: '#dda03c',  # 类别ID 2 的颜色
    2: '#93b9a7',  # 类别ID 3 的颜色
    # 可以继续为每个类别指定颜色
}

def analyze_object_sizes(data, max_samples=1000, scatter_size_factor=4, font_size=16, tick_size=14, color_map=None, save_path=None, dpi=400, x_range=(0, 300), y_range=(0, 200), x_ticks=[0,50,100,150,200,250,300], y_ticks=[0,50,100,150,200]):
    """
    分析对象尺寸并进行可视化。

    参数：
    - max_samples (int): 分析的最大样本数。
    - scatter_size_factor (int): 控制散点大小的因子。
    - font_size (int): 标题和标签的字体大小。
    - tick_size (int): 刻度标签的字体大小。
    - color_map (dict): 映射类别ID到颜色的字典。
    - save_path (str): 用于保存图形的文件路径。如果为 None，则不保存。
    - dpi (int): 保存图像的分辨率，默认是 400 dpi。
    - x_range (tuple): 横轴范围，格式为 (xmin, xmax)。
    - y_range (tuple): 纵轴范围，格式为 (ymin, ymax)。
    - x_ticks (list): 横轴刻度。
    - y_ticks (list) : 纵轴刻度。
    """
    
    object_dimensions = {}
    sample_count = 0

    category_names = {cat['id']: cat['name'] for cat in data['categories']}

    # 如果没有提供颜色映射，则使用默认灰度
    if color_map is None:
        color_map = {cat_id: 'grey' for cat_id in category_names.keys()}

    fig, ax = plt.subplots(figsize=(12, 8))

    # 可视化每个对象
    for annotation in data['annotations']:
        if sample_count >= max_samples:
            break
        bbox = annotation['bbox']
        dimensions = (bbox[2], bbox[3])
        category_id = annotation['category_id']

        color = color_map.get(category_id, 'grey')

        ax.scatter(dimensions[0], dimensions[1], s=scatter_size_factor * 10, color=color, 
                   label=category_names[category_id] if category_names[category_id] not in ax.get_legend_handles_labels()[1] else "")

        sample_count += 1

    # 设置横纵坐标轴范围
    ax.set_xlim(*x_range)
    ax.set_ylim(*y_range)
    
    # 设置横纵坐标轴刻度
    if x_ticks is not None:
        ax.set_xticks(x_ticks)
    if y_ticks is not None:
        ax.set_yticks(y_ticks)

    ax.set_xlabel('Width', fontsize=font_size)
    ax.set_ylabel('Height', fontsize=font_size)
    ax.set_title('Size distribution of sampled objects in annotations', fontsize=font_size, fontweight='bold')

    ax.tick_params(axis='both', which='major', labelsize=tick_size)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(0.7)
    ax.spines['bottom'].set_linewidth(0.7)

    # 添加图例
    ax.legend(loc='upper right', fontsize=12)

    if save_path:
        plt.savefig(save_path, dpi=dpi)

    plt.show()



    

def analyze_category_distribution(data, font_size=16, tick_size=12, cmap='tab20', save_path=None, dpi=300):
    """分析类别分布并进行可视化。

    参数：
    - font_size (int): 标题和标签文本的字体大小。
    - tick_size (int): 刻度标签的字体大小（尽管饼图通常不使用此设置）。
    - cmap (str): 饼图的 Matplotlib 色图。
    - save_path (str): 用于保存图形的文件路径。如果为 None，则不保存。
    - dpi (int): 保存图像的分辨率，默认是 300 dpi。
    """
    category_counts = {}
    for annotation in data['annotations']:
        category_id = annotation['category_id']
        category_counts[category_id] = category_counts.get(category_id, 0) + 1

    category_names = {cat['id']: cat['name'] for cat in data['categories']}
    
    labels = [category_names[id] for id in category_counts.keys()]
    sizes = [category_counts[id] for id in category_counts.keys()]
    total = sum(sizes)

  
    
    fig, ax = plt.subplots(figsize=(10, 12))
    wedges, texts, autotexts = ax.pie(sizes, labels=labels, autopct=lambda p: f'{p:.1f}%\n({int(p*total/100)})',
                                      startangle=90, pctdistance=0.85, textprops={'fontsize': font_size}, colors=plt.get_cmap(cmap).colors)

    centre_circle = plt.Circle((0, 0), 0.70, fc='white')
    fig.gca().add_artist(centre_circle)

    ax.axis('equal')
    plt.title('class districbution', fontsize=font_size, fontweight='bold')
    
    ax.tick_params(axis='both', which='major', labelsize=tick_size)
    
    if save_path:
        plt.savefig(save_path, dpi=dpi)
    
    plt.show()

In [None]:
# 加载数据
data = load_coco_data('/opt/data/private/fcf/mmdetection/data/HazyDet-365k/Real_Haze/train/train_coco.json')
# data = load_coco_data('/opt/data/private/fcf/mmdetection/data/HazyDet-365k/train/train_coco.json')

# 进行分析
# analyze_image_resolutions(data)


analyze_object_sizes(data, scatter_size_factor=0.3, color_map = { 0: '#87a4ff',  # 类别ID 1 的颜色
    1: '#ff6678',  # 类别ID 2 的颜色
    2: '#ffc25e',  # 类别ID 3 的颜色
    },max_samples= 2500,save_path='/opt/data/private/fcf/mmdetection/tools/plot/output/Fig5(a).png')  # Analyze only 1000 samples for performance reasons


# analyze_category_distribution(data)  # New function to analyze category distribution

In [None]:
###统计每种种类目标的面积，并用直方图形式来展示
import json
import os
import matplotlib.pyplot as plt
from collections import defaultdict
import numpy as np

# 定义一个颜色列表，以供不同类别使用不同的颜色
colors = ['b', 'r', 'g', 'c', 'm', 'y', 'k']

def plot_histograms_for_categories(json_file_path, num_bins=30, output_dir='output'):
    # 确保输出目录存在
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # 读取json文件
    with open(json_file_path, 'r') as f:
        coco_data = json.load(f)
    
    # 创建一个字典来记录每个种类的尺寸信息
    category_sizes = defaultdict(list)
    
    # 获取种类信息
    categories = {category['id']: category['name'] for category in coco_data.get('categories', [])}
    
    # 收集所有面积的集合
    all_areas = []
    
    # 遍历标注信息并记录每个目标的面积
    for annotation in coco_data.get('annotations', []):
        category_id = annotation['category_id']
        category_name = categories.get(category_id, 'Unknown')
        bbox = annotation['bbox']
        area = bbox[2] * bbox[3]  # 宽 * 高
        category_sizes[category_name].append(area)
        all_areas.append(area)
    
    # 对所有面积进行对数变换，避免log(0)的情况，我们过滤面积为0的目标
    all_areas = np.array([area for area in all_areas if area > 0])
    log_all_areas = np.log(all_areas)
    
    # 获取对数变换后所有面积的最小值和最大值
    min_log_area = min(log_all_areas)
    max_log_area = max(log_all_areas)
    
    # 为每个种类绘制对数直方图
    for idx, (category, sizes) in enumerate(category_sizes.items()):
        log_sizes = np.log([size for size in sizes if size > 0])
        
        plt.figure()
        plt.hist(log_sizes, bins=num_bins, range=(min_log_area, max_log_area), edgecolor='black', color=colors[idx % len(colors)])
        plt.title(f'Distribution of {category} sizes (Log scale)')
        plt.xlabel('Log(Area)')
        plt.ylabel('Count')
        plt.grid(True)
        output_path = os.path.join(output_dir, f'{category}_size_distribution.png')
        plt.savefig(output_path)
        plt.show()

# 示例文件路径
json_file_path = '/opt/data/private/fcf/mmdetection/data/HazyDet-365k/test/test_coco.json'

# 调用函数并生成直方图
plot_histograms_for_categories(json_file_path)

In [None]:
##统计每种目标的数目

import json
from collections import defaultdict

def count_categories_in_coco(json_file_path):
    # 读取json文件
    with open(json_file_path, 'r') as f:
        coco_data = json.load(f)
    
    # 创建一个字典来记录每个种类的目标数量
    category_count = defaultdict(int)
    
    # 获取种类信息
    categories = {category['id']: category['name'] for category in coco_data.get('categories', [])}
    
    # 遍历标注信息并统计各类别的数量
    for annotation in coco_data.get('annotations', []):
        category_id = annotation['category_id']
        category_name = categories.get(category_id, 'Unknown')
        category_count[category_name] += 1
    
    return category_count

# 示例文件路径
json_file_path = '/opt/data/private/fcf/mmdetection/data/HazyDet-365k/Real_Haze/train/train_coco.json'

# 调用函数并打印结果
category_count = count_categories_in_coco(json_file_path)
for category, count in category_count.items():
    print(f"Category: {category}, Count: {count}")

In [19]:
import json

def read_annotations(json_file):
    with open(json_file, 'r') as f:
        annotations = json.load(f)
    return annotations

def calculate_area(bbox):
    _, _, w, h = bbox
    return w * h

def categorize_target_size(target_area, image_area):
    ratio = (target_area / image_area) * 100  # Convert to percentage
    if ratio <= 0.1:
        return 'small'
    elif ratio <= 1:
        return 'medium'
    else:
        return 'large'

def count_targets_by_category_and_size(annotations):
    category_size_counts = {}
    images_info = {img['id']: img for img in annotations['images']}
    
    for ann in annotations['annotations']:
        image_id = ann['image_id']
        image_info = images_info[image_id]
        
        image_area = image_info['width'] * image_info['height']
        target_area = calculate_area(ann['bbox'])
        size_category = categorize_target_size(target_area, image_area)
        
        category_id = ann['category_id']
        if category_id not in category_size_counts:
            category_size_counts[category_id] = {'small': 0, 'medium': 0, 'large': 0}
        
        category_size_counts[category_id][size_category] += 1
    
    return category_size_counts

def get_category_names(annotations):
    category_names = {cat['id']: cat['name'] for cat in annotations['categories']}
    return category_names

def main(annotation_file):
    annotations = read_annotations(annotation_file)
    category_size_counts = count_targets_by_category_and_size(annotations)
    category_names = get_category_names(annotations)

    for category_id, size_counts in category_size_counts.items():
        category_name = category_names.get(category_id, 'Unknown')
        print(f"Category: {category_name}")
        for size, count in size_counts.items():
            print(f"  {size.capitalize()} targets: {count}")
        print()

if __name__ == "__main__":
    annotation_file = "/opt/data/private/fcf/mmdetection/data/HazyDet-365k/Real_Haze/train/train_coco.json"
    main(annotation_file)

Category: car
  Small targets: 8167
  Medium targets: 8993
  Large targets: 1060

Category: bus
  Small targets: 69
  Medium targets: 363
  Large targets: 155

Category: truck
  Small targets: 112
  Medium targets: 290
  Large targets: 87



In [12]:
import json
###########COCO标准


def read_annotations(json_file):
    with open(json_file, 'r') as f:
        annotations = json.load(f)
    return annotations

def calculate_area(bbox):
    _, _, w, h = bbox
    return w * h

def categorize_target_size(target_area):
    if target_area < 32 * 32:
        return 'small'
    elif target_area > 96 * 96:
        return 'large'
    else:
        return 'medium'

def count_targets_by_category_and_size(annotations):
    category_size_counts = {}
    
    for ann in annotations['annotations']:
        target_area = calculate_area(ann['bbox'])
        size_category = categorize_target_size(target_area)
        
        category_id = ann['category_id']
        if category_id not in category_size_counts:
            category_size_counts[category_id] = {'small': 0, 'medium': 0, 'large': 0}
        
        category_size_counts[category_id][size_category] += 1
    
    return category_size_counts

def get_category_names(annotations):
    category_names = {cat['id']: cat['name'] for cat in annotations['categories']}
    return category_names

def main(annotation_file):
    annotations = read_annotations(annotation_file)
    category_size_counts = count_targets_by_category_and_size(annotations)
    category_names = get_category_names(annotations)

    for category_id, size_counts in category_size_counts.items():
        category_name = category_names.get(category_id, 'Unknown')
        print(f"Category: {category_name}")
        for size, count in size_counts.items():
            print(f"  {size.capitalize()} targets: {count}")
        print()

if __name__ == "__main__":
    annotation_file = "/opt/data/private/fcf/mmdetection/data/HazyDet-365k/train/train_coco.json"
    main(annotation_file)

Category: car
  Small targets: 69971
  Medium targets: 147799
  Large targets: 24425

Category: truck
  Small targets: 3229
  Medium targets: 5884
  Large targets: 2513

Category: bus
  Small targets: 1568
  Medium targets: 3587
  Large targets: 5575

