# YOLOv3 图像识别完整教程

本教程涵盖了 YOLOv3 的完整使用流程，从安装到执行目标检测。

## 1. 环境准备与安装

In [1]:
# 检查 GPU 是否可用（可选）
!nvidia-smi

In [2]:
# 安装必要的依赖（假设 OpenCV 已安装，若未安装则需要安装）
!pip install opencv-python numpy matplotlib

## 2. 导入必要的库

In [3]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from IPython.display import display, Image as IPImage
from matplotlib import font_manager

# 设置 matplotlib 显示参数
%matplotlib inline
plt.rcParams['figure.figsize'] = [12, 8]

# 检查系统中可用字体并设置支持中文的字体
available_fonts = [f.name for f in font_manager.FontManager().ttflist]
preferred_fonts = ['PingFang SC', 'Heiti TC', 'Songti SC', 'Arial Unicode MS']  # macOS 常见中文字体
selected_font = None
for font in preferred_fonts:
    if font in available_fonts:
        selected_font = font
        break

if selected_font:
    plt.rcParams['font.sans-serif'] = [selected_font]
    print(f'使用字体: {selected_font}')
else:
    print('未找到支持中文的字体，请检查系统字体配置')
    plt.rcParams['font.sans-serif'] = ['sans-serif']  # 回退到默认

plt.rcParams['axes.unicode_minus'] = False  # 解决负号显示问题

## 3. 加载 YOLOv3 预训练模型

我们将使用 YOLOv3 的预训练权重（yolov3.weights）和配置文件（yolov3.cfg）。这些文件可以从 YOLO 官方网站下载:
- yolov3.weights: https://pjreddie.com/media/files/yolov3.weights
- yolov3.cfg: https://github.com/pjreddie/darknet/blob/master/cfg/yolov3.cfg
- coco.names: https://github.com/pjreddie/darknet/blob/master/data/coco.names


In [4]:
# 下载 YOLOv3 模型文件（如果尚未下载）
if not os.path.exists('yolov3.weights'):
    !wget https://pjreddie.com/media/files/yolov3.weights -q
if not os.path.exists('yolov3.cfg'):
    !wget https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg -q
if not os.path.exists('coco.names'):
    !wget https://raw.githubusercontent.com/pjreddie/darknet/blob/master/data/coco.names -q

# 加载 YOLOv3 模型
net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')

# 加载类别名称
with open('coco.names', 'r') as f:
    classes = [line.strip() for line in f.readlines()]

# 获取输出层名称
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

print(f'模型加载完成: {classes}')

## 4. 定义 YOLOv3 检测函数

In [5]:
def detect_objects(image, net, output_layers, conf_threshold=0.5, nms_threshold=0.4):
    height, width = image.shape[:2]
    
    # 前处理：调整图像大小并归一化
    blob = cv2.dnn.blobFromImage(image, 1/255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    
    # 前向传播
    outputs = net.forward(output_layers)
    
    # 解析检测结果
    boxes, confidences, class_ids = [], [], []
    for output in outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > conf_threshold:
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)
    
    # 非极大值抑制 (NMS)
    indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
    
    # 绘制检测结果
    result_image = image.copy()
    detections = []
    for i in indices:
        box = boxes[i]
        x, y, w, h = box
        label = f'{classes[class_ids[i]]}: {confidences[i]:.2f}'
        color = [int(c) for c in np.random.randint(0, 255, size=3)]  # 检测框为随机颜色
        white_color = (255, 255, 255)  # 文本固定为白色
        cv2.rectangle(result_image, (x, y), (x + w, y + h), color, 2)
        cv2.putText(result_image, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, white_color, 2)
        detections.append({
            'class': classes[class_ids[i]],
            'confidence': confidences[i],
            'box': [x, y, x + w, y + h]
        })
    
    return result_image, detections

## 5. 准备测试图像

这里我们提供四张测试图像，并以 2x2 格式显示。

In [None]:
# 定义四张图像路径
image_paths = ['../yolo-images/giraffe.jpg', '../yolo-images/eagle.jpg', '../yolo-images/bus.jpg', '../yolo-images/dog.jpg']

# 读取并显示原始图像
fig, axs = plt.subplots(2, 2, figsize=(12, 8))
for idx, img_path in enumerate(image_paths):
    img = cv2.imread(img_path)
    if img is None:
        print(f'无法加载图像: {img_path}')
        continue
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    row, col = divmod(idx, 2)
    axs[row, col].imshow(img_rgb)
    axs[row, col].set_title(f'原始图像: {os.path.basename(img_path)}')
    axs[row, col].axis('off')
plt.tight_layout()
plt.show()

## 6. 获取并处理检测结果数据 (2x2 显示)

In [7]:
# 执行检测并显示结果
images = [cv2.imread(path) for path in image_paths]
results = []
for img in images:
    if img is not None:
        result = detect_objects(img, net, output_layers)
        results.append(result)
    else:
        results.append((None, []))

# 打印检测结果
for idx, (img_path, (_, detections)) in enumerate(zip(image_paths, results)):
    print(f'图像 {idx + 1}: {os.path.basename(img_path)}')
    print('边界框数量:', len(detections))
    for detection in detections:
        print(f"类别: {detection['class']}, 置信度: {detection['confidence']:.2f}, 边界框: {detection['box']}")
    print('-' * 50)

# 显示检测结果图像 (2x2)
fig, axs = plt.subplots(2, 2, figsize=(12, 8))
for idx, (result_image, _) in enumerate(results):
    row, col = divmod(idx, 2)
    if result_image is not None:
        axs[row, col].imshow(cv2.cvtColor(result_image, cv2.COLOR_BGR2RGB))
        axs[row, col].set_title(f'检测结果: {os.path.basename(image_paths[idx])}')
    else:
        axs[row, col].text(0.5, 0.5, '图像加载失败', ha='center', va='center')
    axs[row, col].axis('off')
plt.tight_layout()
plt.show()

## 7. 保存检测结果

In [8]:
# 创建保存结果的目录
os.makedirs('detection_results', exist_ok=True)

# 对所有图片进行检测并保存结果
for img_path, (result_image, detections) in zip(image_paths, results):
    if result_image is not None:
        # 保存包含检测结果的图像
        output_path = f'detection_results/result_{os.path.basename(img_path)}'
        cv2.imwrite(output_path, result_image)
        print(f'检测结果保存到: {output_path}')
    
        # 保存检测结果的文本信息到 TXT 文件
        txt_path = f'detection_results/result_{os.path.basename(img_path).split(".")[0]}.txt'
        with open(txt_path, 'w') as f:
            f.write(f'检测结果 - {img_path}\n')
            f.write('-' * 50 + '\n')
            for detection in detections:
                f.write(f"类别: {detection['class']}, 置信度: {detection['confidence']:.4f}\n")
                f.write(f"边界框: {detection['box']}\n")
                f.write('-' * 30 + '\n')
        print(f'检测信息保存到: {txt_path}')

## 8. 总结

在本教程中，我们完成了以下任务:
1. 安装了必要的依赖
2. 加载了预训练的 YOLOv3 模型
3. 对图像进行了目标检测
4. 以 2x2 格式可视化了原始图像和检测结果
5. 自定义处理检测结果数据
6. 保存了检测结果和信息

YOLOv3 是经典的目标检测模型，更多信息可参考官方文档: https://pjreddie.com/darknet/yolo/
