# 第7课：目标检测入门

## 学习目标
- 理解目标检测的基本概念
- 了解经典目标检测算法的演进
- 掌握 YOLO 的基本使用
- 学习目标检测的评估指标

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import torch
import torchvision
from torchvision import transforms
import warnings
warnings.filterwarnings('ignore')

## 1. 目标检测概述

**目标检测** = 图像分类 + 目标定位

任务：
- 识别图像中有哪些物体（分类）
- 确定物体的位置（边界框）

### 应用场景
- 自动驾驶（行人、车辆检测）
- 安防监控（人脸检测）
- 医学影像（病灶检测）
- 工业质检（缺陷检测）

## 2. 目标检测算法发展

### Two-Stage 检测器（两阶段）
1. **R-CNN** (2014): 选择性搜索 + CNN
2. **Fast R-CNN** (2015): ROI Pooling 加速
3. **Faster R-CNN** (2015): RPN 网络生成候选区域

### One-Stage 检测器（单阶段）
1. **YOLO** (2016): You Only Look Once
2. **SSD** (2016): Single Shot MultiBox Detector
3. **YOLOv2/v3/v4/v5/v8**: YOLO 系列持续演进

### 对比
- Two-Stage: 精度高，速度慢
- One-Stage: 速度快，适合实时检测

## 3. 边界框表示

常见的边界框表示方式：
- **(x, y, w, h)**: 中心点坐标 + 宽高
- **(x1, y1, x2, y2)**: 左上角和右下角坐标

In [None]:
def draw_bbox(ax, bbox, label=None, color='red'):
    """绘制边界框"""
    x1, y1, x2, y2 = bbox
    rect = patches.Rectangle(
        (x1, y1), x2-x1, y2-y1,
        linewidth=2, edgecolor=color, facecolor='none'
    )
    ax.add_patch(rect)
    if label:
        ax.text(x1, y1-5, label, color=color, fontsize=10,
                bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

# 演示
fig, ax = plt.subplots(figsize=(8, 6))
ax.set_xlim(0, 100)
ax.set_ylim(100, 0)  # 图像坐标系 y 轴向下

# 绘制边界框
draw_bbox(ax, [10, 10, 40, 50], 'Object A', 'red')
draw_bbox(ax, [50, 30, 90, 80], 'Object B', 'blue')

ax.set_title('Bounding Box Example')
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.grid(True, alpha=0.3)
plt.show()

## 4. IoU (交并比)

IoU (Intersection over Union) 用于衡量预测框和真实框的重叠程度。

$$IoU = \frac{Area\ of\ Intersection}{Area\ of\ Union}$$

In [None]:
def calculate_iou(box1, box2):
    """
    计算两个边界框的 IoU
    box: [x1, y1, x2, y2]
    """
    # 交集
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    
    intersection = max(0, x2 - x1) * max(0, y2 - y1)
    
    # 并集
    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union = area1 + area2 - intersection
    
    return intersection / union if union > 0 else 0

# 测试
box_gt = [20, 20, 60, 60]  # 真实框
box_pred = [25, 25, 65, 65]  # 预测框

iou = calculate_iou(box_gt, box_pred)
print(f"IoU: {iou:.4f}")

In [None]:
# 可视化 IoU
fig, ax = plt.subplots(figsize=(8, 6))
ax.set_xlim(0, 100)
ax.set_ylim(100, 0)

draw_bbox(ax, box_gt, f'Ground Truth', 'green')
draw_bbox(ax, box_pred, f'Prediction', 'red')

ax.set_title(f'IoU = {iou:.4f}')
ax.grid(True, alpha=0.3)
plt.show()

## 5. NMS (非极大值抑制)

检测器通常会输出多个重叠的边界框，NMS 用于去除冗余框。

In [None]:
def nms(boxes, scores, iou_threshold=0.5):
    """
    非极大值抑制
    boxes: [[x1, y1, x2, y2], ...]
    scores: [score1, score2, ...]
    """
    if len(boxes) == 0:
        return []
    
    # 按分数排序
    order = np.argsort(scores)[::-1]
    keep = []
    
    while len(order) > 0:
        # 保留分数最高的框
        i = order[0]
        keep.append(i)
        
        if len(order) == 1:
            break
        
        # 计算与其他框的 IoU
        ious = [calculate_iou(boxes[i], boxes[j]) for j in order[1:]]
        
        # 保留 IoU 小于阈值的框
        inds = np.where(np.array(ious) < iou_threshold)[0]
        order = order[inds + 1]
    
    return keep

# 测试 NMS
boxes = [
    [10, 10, 50, 50],
    [12, 12, 52, 52],
    [15, 15, 55, 55],
    [60, 60, 100, 100]
]
scores = [0.9, 0.8, 0.7, 0.85]

keep_indices = nms(boxes, scores, iou_threshold=0.5)
print(f"保留的框索引: {keep_indices}")

In [None]:
# 可视化 NMS
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

colors = ['red', 'blue', 'green', 'orange']

# NMS 前
axes[0].set_xlim(0, 120)
axes[0].set_ylim(120, 0)
for i, (box, score) in enumerate(zip(boxes, scores)):
    draw_bbox(axes[0], box, f's={score}', colors[i])
axes[0].set_title('Before NMS')
axes[0].grid(True, alpha=0.3)

# NMS 后
axes[1].set_xlim(0, 120)
axes[1].set_ylim(120, 0)
for i in keep_indices:
    draw_bbox(axes[1], boxes[i], f's={scores[i]}', colors[i])
axes[1].set_title('After NMS')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 6. 使用预训练的 Faster R-CNN

In [None]:
# 加载预训练的 Faster R-CNN
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights

weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
model = fasterrcnn_resnet50_fpn(weights=weights)
model.eval()

# COCO 数据集类别
COCO_CLASSES = weights.meta['categories']
print(f"类别数: {len(COCO_CLASSES)}")
print(f"部分类别: {COCO_CLASSES[:10]}...")

In [None]:
def detect_objects(model, image_path, threshold=0.5):
    """使用模型检测图像中的物体"""
    # 加载图像
    image = Image.open(image_path).convert('RGB')
    
    # 预处理
    transform = transforms.Compose([
        transforms.ToTensor()
    ])
    image_tensor = transform(image).unsqueeze(0)
    
    # 推理
    with torch.no_grad():
        predictions = model(image_tensor)[0]
    
    # 过滤低置信度检测
    keep = predictions['scores'] > threshold
    
    return {
        'image': image,
        'boxes': predictions['boxes'][keep].numpy(),
        'labels': predictions['labels'][keep].numpy(),
        'scores': predictions['scores'][keep].numpy()
    }

def visualize_detection(result, class_names):
    """可视化检测结果"""
    fig, ax = plt.subplots(figsize=(12, 8))
    ax.imshow(result['image'])
    
    colors = plt.cm.hsv(np.linspace(0, 1, len(class_names)))
    
    for box, label, score in zip(result['boxes'], result['labels'], result['scores']):
        x1, y1, x2, y2 = box
        color = colors[label % len(colors)]
        
        rect = patches.Rectangle(
            (x1, y1), x2-x1, y2-y1,
            linewidth=2, edgecolor=color, facecolor='none'
        )
        ax.add_patch(rect)
        
        class_name = class_names[label]
        ax.text(x1, y1-5, f'{class_name}: {score:.2f}',
                color='white', fontsize=10,
                bbox=dict(boxstyle='round', facecolor=color, alpha=0.8))
    
    ax.axis('off')
    plt.title('Object Detection Result')
    plt.show()

In [None]:
# 创建测试图像
from torchvision.utils import save_image
import urllib.request

# 下载示例图像
test_image_path = '/tmp/test_detection.jpg'

# 创建一个简单的测试图像
fig, ax = plt.subplots(figsize=(8, 6))
ax.text(0.5, 0.5, '请替换为你自己的图像\n运行检测代码', 
        ha='center', va='center', fontsize=16)
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.axis('off')
plt.savefig(test_image_path)
plt.close()

print(f"测试图像已保存到: {test_image_path}")
print("你可以替换为自己的图像路径来测试检测效果")

In [None]:
# 使用示例（需要替换为实际图像）
# result = detect_objects(model, 'your_image.jpg', threshold=0.5)
# visualize_detection(result, COCO_CLASSES)

print("使用方法:")
print("result = detect_objects(model, 'your_image.jpg', threshold=0.5)")
print("visualize_detection(result, COCO_CLASSES)")

## 7. YOLOv5/YOLOv8 使用

YOLO (You Only Look Once) 是最流行的实时目标检测算法。

In [None]:
# 安装 ultralytics (YOLOv8)
# pip install ultralytics

try:
    from ultralytics import YOLO
    print("ultralytics 已安装")
except ImportError:
    print("请先安装: pip install ultralytics")

In [None]:
from ultralytics import YOLO

# 加载预训练的 YOLOv8 模型
yolo_model = YOLO('yolov8n.pt')  # nano 版本，最小最快

print("YOLOv8 模型加载成功")
print(f"模型类别数: {len(yolo_model.names)}")

In [None]:
# YOLOv8 检测示例
def yolo_detect(model, image_path):
    """使用 YOLOv8 进行检测"""
    results = model(image_path)
    return results[0]

# 使用示例
# result = yolo_detect(yolo_model, 'your_image.jpg')
# result.show()  # 显示结果
# result.save()  # 保存结果

print("YOLOv8 使用方法:")
print("result = yolo_model('your_image.jpg')")
print("result[0].show()  # 显示结果")
print("result[0].boxes  # 获取边界框")

## 8. 评估指标

### Precision (精确率)
$$Precision = \frac{TP}{TP + FP}$$

### Recall (召回率)
$$Recall = \frac{TP}{TP + FN}$$

### AP (Average Precision)
不同置信度阈值下 Precision-Recall 曲线下的面积

### mAP (mean Average Precision)
所有类别 AP 的平均值

In [None]:
def calculate_ap(precision, recall):
    """计算 Average Precision (AP)"""
    # 添加端点
    recall = np.concatenate([[0], recall, [1]])
    precision = np.concatenate([[0], precision, [0]])
    
    # 单调递减
    for i in range(len(precision) - 2, -1, -1):
        precision[i] = max(precision[i], precision[i + 1])
    
    # 计算面积
    indices = np.where(recall[1:] != recall[:-1])[0] + 1
    ap = np.sum((recall[indices] - recall[indices - 1]) * precision[indices])
    
    return ap

# 示例数据
recall = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
precision = np.array([1.0, 0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.6, 0.5])

ap = calculate_ap(precision, recall)
print(f"AP: {ap:.4f}")

In [None]:
# 可视化 Precision-Recall 曲线
plt.figure(figsize=(8, 6))
plt.plot(recall, precision, 'b-', linewidth=2)
plt.fill_between(recall, precision, alpha=0.3)
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title(f'Precision-Recall Curve (AP = {ap:.4f})')
plt.xlim(0, 1)
plt.ylim(0, 1.05)
plt.grid(True, alpha=0.3)
plt.show()

## 9. 练习题

### 练习1：实现 IoU 计算
编写函数计算批量边界框的 IoU

In [None]:
# 在这里编写代码


### 练习2：使用 YOLOv8 检测自己的图像
选择几张图像进行目标检测

In [None]:
# 在这里编写代码


## 10. 本课小结

### 目标检测算法对比

| 算法 | 类型 | 速度 | 精度 | 适用场景 |
|------|------|------|------|----------|
| Faster R-CNN | Two-Stage | 慢 | 高 | 高精度要求 |
| SSD | One-Stage | 快 | 中 | 实时检测 |
| YOLOv8 | One-Stage | 很快 | 高 | 通用场景 |

### 关键概念

1. **边界框 (Bounding Box)**: 用矩形框定位物体
2. **IoU**: 衡量预测框与真实框的重叠程度
3. **NMS**: 去除冗余的检测框
4. **mAP**: 目标检测的主要评估指标

### 实践建议

1. 对于快速原型开发，使用预训练的 YOLOv8
2. 需要高精度时考虑 Faster R-CNN
3. 自定义数据集需要进行微调训练