In [None]:
"""
下载 PASCAL VOC 2007 数据集并转换为 YOLO 格式,生成 voc.yaml 文件
PASCAL VOC 2007 数据集是计算机视觉领域中一个著名的标准数据集，主要用于目标检测、图像分类和语义分割等任务。
该数据集包含 9963 张图片，分为训练集（5011 张）和测试集（4952 张），
涵盖 20 个类别，如飞机、自行车、鸟、船、瓶子、公共汽车、汽车、猫、椅子、牛、餐桌、狗、马、摩托车、人、盆栽、羊、沙发、火车和电视显示器。
其标注信息以 XML 格式存储，包含目标的边界框、类别标签等。该数据集是许多经典计算机视觉模型的训练和评估基准。
"""
import os
import requests
import tarfile
from lxml import etree
import shutil

# ----------------------
# 配置参数
# ----------------------
VOC_URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar"
VOC_TEST_URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar"
DATA_DIR = "./dataset/"

# ----------------------
# 下载并解压数据集
# ----------------------
def download_and_extract(url, dest_dir):
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir, exist_ok=True)

    filename = os.path.join(dest_dir, url.split("/")[-1])

    # 下载文件
    if not os.path.exists(filename):
        print(f"Downloading {url}...")
        response = requests.get(url, stream=True)
        with open(filename, "wb") as f:
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)

    # 解压文件
    print(f"Extracting {filename}...")
    with tarfile.open(filename) as tar:
        tar.extractall(path=dest_dir)



# 转换VOC格式到YOLO格式
# ----------------------
def convert_voc_to_yolo(voc_dir, output_dir):
    print(f"转换VOC格式到YOLO格式: {output_dir}")

    classes = [
        'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
        'bus', 'car', 'cat', 'chair', 'cow',
        'diningtable', 'dog', 'horse', 'motorbike', 'person',
        'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
    ]

    # 创建训练和测试子目录
    for split in ["train", "test"]:
        os.makedirs(os.path.join(output_dir, f"images/{split}"), exist_ok=True)
        os.makedirs(os.path.join(output_dir, f"labels/{split}"), exist_ok=True)

    # 对 trainval 和 test 分别处理
    for split in ["trainval", "test"]:
        split_type = "train" if split == "trainval" else "test"
        list_path = os.path.join(voc_dir, f"VOCdevkit/VOC2007/ImageSets/Main/{split}.txt")
        if not os.path.exists(list_path):
            print(f"划分文件不存在: {list_path}")
            continue

        with open(list_path) as f:
            ids = [line.strip() for line in f.readlines()]
        print(f"开始处理 {split_type} 集，共 {len(ids)} 张图片")

        for img_id in ids:
            ann_path = os.path.join(voc_dir, f"VOCdevkit/VOC2007/Annotations/{img_id}.xml")
            if not os.path.exists(ann_path):
                print(f"标注文件不存在: {ann_path}")
                continue

            tree = etree.parse(ann_path)
            root = tree.getroot()

            size = root.find("size")
            width = int(size.find("width").text)
            height = int(size.find("height").text)

            yolo_ann = []
            for obj in root.iter("object"):
                cls = obj.find("name").text
                if cls not in classes:
                    continue
                cls_id = classes.index(cls)

                bbox = obj.find("bndbox")
                xmin = float(bbox.find("xmin").text)
                ymin = float(bbox.find("ymin").text)
                xmax = float(bbox.find("xmax").text)
                ymax = float(bbox.find("ymax").text)

                x_center = (xmin + xmax) / 2 / width
                y_center = (ymin + ymax) / 2 / height
                w = (xmax - xmin) / width
                h = (ymax - ymin) / height

                yolo_ann.append(f"{cls_id} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}")

            # 保存 YOLO 标签
            label_out_path = os.path.join(output_dir, f"labels/{split_type}/{img_id}.txt")
            with open(label_out_path, "w") as f:
                f.write("\n".join(yolo_ann))

            # 拷贝图像
            src = os.path.join(voc_dir, f"VOCdevkit/VOC2007/JPEGImages/{img_id}.jpg")
            dst = os.path.join(output_dir, f"images/{split_type}/{img_id}.jpg")
            if not os.path.exists(dst):
                try:
                    shutil.copy2(src, dst)
                    # os.symlink(src, dst)  # 尝试创建符号链接
                except:
                    print(f"复制图像失败: {src}")

    # 创建 voc.yaml 文件
    with open("voc.yaml", "w") as f:
        names_str = '\n'.join([f"  {i}: {name}" for i, name in enumerate(classes)])
        f.write(
f"""path: {os.path.abspath(output_dir)}
train: images/train
val: images/train
test: images/test
names:
{names_str}
"""
        )
    print("✅ VOC 转 YOLO 完成，生成 voc.yaml")


# # 下载并解压训练集和测试集
download_and_extract(VOC_URL, DATA_DIR)
download_and_extract(VOC_TEST_URL, DATA_DIR)
# # 执行格式转换 voc->yolo
convert_voc_to_yolo(DATA_DIR, DATA_DIR+"/output")

Downloading http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar...
Extracting ./dataset/VOCtrainval_06-Nov-2007.tar...
Downloading http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar...
Extracting ./dataset/VOCtest_06-Nov-2007.tar...
转换VOC格式到YOLO格式: ./dataset//output
开始处理 train 集，共 5011 张图片
开始处理 test 集，共 4952 张图片
✅ VOC 转 YOLO 完成，生成 voc.yaml


In [None]:
"""
基于YOLO V8的PASCAL VOC 2007的目标检测任务
"""
!pip install ultralytics
# 使用预训练模型
from ultralytics import YOLO
from PIL import Image, ImageDraw, ImageFont
device = 'cuda'  # 使用GPU训练,可选cuda或cpu


Collecting ultralytics
  Downloading ultralytics-8.3.108-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading n

In [None]:
!nvidia-smi
# 加载预训练模型
model = YOLO("baseModel/yolov8n.pt")  # 使用预训练模型
print("模型加载完成") if model else print("模型加载失败")

Tue Apr 15 12:28:22 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   43C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

100%|██████████| 6.25M/6.25M [00:00<00:00, 103MB/s]

模型加载完成





In [3]:
# 训练模型
# 训练参数配置
# model.train(
#     data='voc.yaml',
#     epochs=30,  # 训练轮数
#     batch=64,   # 批处理大小
#     imgsz=640,  # 输入图像大小
#     device=0 if device == "cuda" else "cpu",
#     optimizer='AdamW',  # 优化器
#     lr0=0.0001, # 初始学习率,可选0.001、0.01、0.1等
#     augment=True,
#     # resume=True,  # 中断继续训练
#     # multi_scale=True, 随机改变输入的大小
#     save=True, # 是否保存模型,可选True或False
#     exist_ok=True # 是否覆盖已有模型,可选True或False

# )
model.train(
    data='voc.yaml',
    epochs=40,              # 增加训练轮数
    batch=32,
    imgsz=640,
    device=0,
    optimizer='SGD',        # 改用更适合目标检测的SGD优化器
    lr0=0.001,               # 适当提高初始学习率
    lrf=0.1,                # 添加余弦退火最终学习率
    cos_lr=True,            # 启用余弦学习率调度
    momentum=0.937,           # 调整动量参数
    weight_decay=0.0005,
    warmup_epochs=3.0,      # 适当延长预热
    augment=True,
    hsv_h=0.015,            # 增强颜色空间变换
    hsv_s=0.7,
    hsv_v=0.4,
    degrees=5.0,           # 增加旋转角度范围
    translate=0.2,          # 增加平移幅度
    scale=0.5,              # 增大缩放幅度
    shear=0.5,              # 增加剪切幅度
    perspective=0.001,      # 添加透视变换
    flipud=0.2,             # 增加上下翻转概率
    fliplr=0.5,
    mosaic=0.8,             # 保持mosaic增强
    mixup=0.1,              # 添加mixup增强
    copy_paste=0.1,         # 添加copy-paste增强
    close_mosaic=15,        # 延迟关闭mosaic
    multi_scale=(0.67, 1.33),       # 启用多尺度训练
    erasing=0.5,            # 增加随机擦除概率
    auto_augment='randaugment', # 使用更强数据增强
    nbs=128,                # 增加nominal batch size
    box=5.0,                # 调整损失函数权重
    cls=1.5,
    dfl=1.0,
    save=True,
    exist_ok=True,
    amp=True,               # 保持混合精度训练
    pretrained=True,        # 确保使用预训练权重
    resume=False,
    save=True,          # 保存训练结果
)



Ultralytics 8.3.111 🚀 Python-3.11.12 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=baseModel/yolov8n.pt, data=voc.yaml, epochs=40, time=None, patience=100, batch=32, imgsz=640, save=True, save_period=-1, cache=False, device=0, workers=8, project=None, name=train, exist_ok=True, pretrained=True, optimizer=SGD, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=True, close_mosaic=15, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=True, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=True, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True, line_width=Non

[34m[1mtrain: [0mScanning /content/dataset/output/labels/train.cache... 5011 images, 0 backgrounds, 0 corrupt: 100%|██████████| 5011/5011 [00:00<?, ?it/s]


[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 904.2±579.9 MB/s, size: 73.4 KB)


[34m[1mval: [0mScanning /content/dataset/output/labels/train.cache... 5011 images, 0 backgrounds, 0 corrupt: 100%|██████████| 5011/5011 [00:00<?, ?it/s]


Plotting labels to runs/detect/train/labels.jpg... 
[34m[1moptimizer:[0m SGD(lr=0.001, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/train[0m
Starting training for 40 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/40      11.8G      1.124       4.06      1.114        185        416: 100%|██████████| 157/157 [02:02<00:00,  1.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:48<00:00,  1.63it/s]


                   all       5011      15662      0.636      0.037     0.0554     0.0313

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/40      9.65G      1.086      3.523      1.072        133        928: 100%|██████████| 157/157 [01:58<00:00,  1.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:45<00:00,  1.74it/s]


                   all       5011      15662      0.783      0.108      0.172     0.0954

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/40      8.79G      1.073      3.163      1.074        168        544: 100%|██████████| 157/157 [01:56<00:00,  1.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:43<00:00,  1.83it/s]


                   all       5011      15662      0.594      0.342      0.399      0.219

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/40      9.26G      1.076      2.914       1.07        156        320: 100%|██████████| 157/157 [01:56<00:00,  1.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:49<00:00,  1.61it/s]


                   all       5011      15662      0.644      0.493      0.546        0.3

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/40      11.2G      1.092      2.771      1.072        207        608: 100%|██████████| 157/157 [02:02<00:00,  1.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:46<00:00,  1.71it/s]


                   all       5011      15662      0.661      0.541        0.6      0.321

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/40      11.8G      1.079       2.65      1.074        121        672: 100%|██████████| 157/157 [01:55<00:00,  1.36it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:42<00:00,  1.88it/s]


                   all       5011      15662      0.706      0.588      0.658      0.356

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/40      9.18G      1.073      2.584      1.066        152        448: 100%|██████████| 157/157 [01:56<00:00,  1.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:42<00:00,  1.86it/s]


                   all       5011      15662      0.724      0.618      0.681      0.378

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/40      11.2G      1.072      2.565      1.067        183        704: 100%|██████████| 157/157 [01:55<00:00,  1.36it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:42<00:00,  1.88it/s]


                   all       5011      15662      0.721      0.626      0.695      0.393

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/40        11G      1.053      2.507       1.06        115        576: 100%|██████████| 157/157 [01:55<00:00,  1.36it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:45<00:00,  1.72it/s]


                   all       5011      15662      0.739      0.637      0.706      0.406

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/40      14.2G      1.064      2.507      1.053        153        800: 100%|██████████| 157/157 [01:51<00:00,  1.40it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:41<00:00,  1.91it/s]


                   all       5011      15662      0.739      0.648      0.713      0.417

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/40      11.5G       1.05      2.455      1.047        167        576: 100%|██████████| 157/157 [01:52<00:00,  1.39it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:41<00:00,  1.89it/s]


                   all       5011      15662      0.749      0.649       0.72      0.411

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/40      11.1G      1.049      2.425      1.055        170        768: 100%|██████████| 157/157 [01:52<00:00,  1.40it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:42<00:00,  1.87it/s]


                   all       5011      15662      0.751      0.646      0.722      0.405

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/40      12.8G      1.053      2.399      1.046        181        704: 100%|██████████| 157/157 [01:50<00:00,  1.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:41<00:00,  1.90it/s]


                   all       5011      15662      0.751       0.66      0.729      0.416

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/40      10.6G      1.045       2.37      1.046        153        704: 100%|██████████| 157/157 [01:51<00:00,  1.40it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:42<00:00,  1.88it/s]


                   all       5011      15662      0.763      0.663      0.735      0.429

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/40      12.6G      1.048      2.381      1.051        168        608: 100%|██████████| 157/157 [01:51<00:00,  1.41it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:43<00:00,  1.84it/s]


                   all       5011      15662      0.768      0.671      0.745      0.426

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/40      12.8G      1.035      2.331      1.037        158        928: 100%|██████████| 157/157 [01:50<00:00,  1.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:40<00:00,  1.93it/s]


                   all       5011      15662      0.788      0.669      0.751      0.438

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/40      11.4G      1.043      2.354      1.049        158        480: 100%|██████████| 157/157 [01:53<00:00,  1.38it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:41<00:00,  1.90it/s]


                   all       5011      15662       0.78      0.663      0.752      0.435

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/40      9.43G      1.036      2.323      1.037        133        672: 100%|██████████| 157/157 [01:51<00:00,  1.41it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:43<00:00,  1.83it/s]


                   all       5011      15662      0.779      0.676      0.751      0.441

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/40      10.1G      1.033      2.309      1.046        162        608: 100%|██████████| 157/157 [01:51<00:00,  1.41it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:41<00:00,  1.91it/s]


                   all       5011      15662      0.788      0.675      0.756       0.44

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/40      10.8G      1.035      2.315       1.05        129        448: 100%|██████████| 157/157 [01:53<00:00,  1.38it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:40<00:00,  1.93it/s]


                   all       5011      15662      0.783      0.679      0.757      0.437

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      21/40        13G      1.035      2.305       1.04        186        448: 100%|██████████| 157/157 [01:52<00:00,  1.40it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:43<00:00,  1.80it/s]


                   all       5011      15662      0.777      0.688      0.761      0.441

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      22/40      10.4G      1.031      2.275       1.04        197        608: 100%|██████████| 157/157 [01:50<00:00,  1.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:41<00:00,  1.90it/s]


                   all       5011      15662      0.784      0.685      0.766      0.454

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      23/40      9.44G      1.032      2.301      1.051        185        704: 100%|██████████| 157/157 [01:53<00:00,  1.39it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:41<00:00,  1.91it/s]


                   all       5011      15662      0.785      0.692      0.768      0.446

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      24/40      12.6G      1.029      2.263      1.029        147        352: 100%|██████████| 157/157 [01:49<00:00,  1.43it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:41<00:00,  1.90it/s]


                   all       5011      15662      0.794      0.686       0.77      0.447

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      25/40      10.6G      1.019      2.263      1.032        134        640: 100%|██████████| 157/157 [01:52<00:00,  1.40it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:41<00:00,  1.89it/s]


                   all       5011      15662      0.787      0.694      0.769      0.443
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      26/40      11.6G     0.9022      2.325     0.9935         45        480: 100%|██████████| 157/157 [01:37<00:00,  1.61it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:42<00:00,  1.88it/s]


                   all       5011      15662      0.777      0.683      0.761      0.447

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      27/40         9G     0.9088      2.328      1.001         61        736: 100%|██████████| 157/157 [01:34<00:00,  1.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:40<00:00,  1.94it/s]


                   all       5011      15662      0.784      0.691      0.771      0.458

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      28/40      11.3G      0.895      2.229     0.9825         92        608: 100%|██████████| 157/157 [01:35<00:00,  1.64it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:40<00:00,  1.96it/s]


                   all       5011      15662      0.784      0.698      0.778      0.465

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      29/40        11G     0.8994      2.255     0.9827        103        416: 100%|██████████| 157/157 [01:35<00:00,  1.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:41<00:00,  1.89it/s]


                   all       5011      15662      0.795      0.698      0.782      0.471

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      30/40      10.1G     0.8922      2.218     0.9876         67        832: 100%|██████████| 157/157 [01:34<00:00,  1.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:40<00:00,  1.93it/s]


                   all       5011      15662      0.791      0.704       0.78      0.468

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      31/40      9.52G     0.9024       2.24     0.9849         69        704: 100%|██████████| 157/157 [01:35<00:00,  1.64it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:40<00:00,  1.93it/s]


                   all       5011      15662      0.798      0.707      0.788      0.471

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      32/40      10.6G     0.8933      2.219     0.9811         63        352: 100%|██████████| 157/157 [01:34<00:00,  1.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:40<00:00,  1.95it/s]


                   all       5011      15662      0.814      0.705      0.794       0.48

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      33/40      12.2G     0.8881      2.173     0.9737         49        320: 100%|██████████| 157/157 [01:34<00:00,  1.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:40<00:00,  1.95it/s]


                   all       5011      15662      0.817      0.703      0.793      0.474

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      34/40      10.5G     0.8853      2.198     0.9883         68        416: 100%|██████████| 157/157 [01:34<00:00,  1.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:40<00:00,  1.97it/s]


                   all       5011      15662       0.81       0.71      0.796      0.488

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      35/40      10.9G     0.8874       2.17     0.9741         53        736: 100%|██████████| 157/157 [01:35<00:00,  1.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:40<00:00,  1.93it/s]


                   all       5011      15662      0.817      0.708      0.797      0.484

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      36/40      9.44G     0.8865       2.15     0.9708         54        448: 100%|██████████| 157/157 [01:32<00:00,  1.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:41<00:00,  1.91it/s]


                   all       5011      15662      0.811      0.711      0.796      0.481

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      37/40      10.6G     0.8923      2.161     0.9774         61        352: 100%|██████████| 157/157 [01:34<00:00,  1.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:40<00:00,  1.97it/s]


                   all       5011      15662      0.818      0.714        0.8      0.476

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      38/40      11.8G     0.8859      2.151     0.9786         45        448: 100%|██████████| 157/157 [01:35<00:00,  1.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:40<00:00,  1.94it/s]


                   all       5011      15662       0.82      0.711      0.799      0.485

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      39/40      9.75G      0.881      2.159     0.9725         99        736:  61%|██████    | 96/157 [00:56<00:36,  1.68it/s]


KeyboardInterrupt: 

In [None]:
model.train(
    data='voc.yaml',
    epochs=70,           # 原来训练了40轮，这里设置为总轮数50（=40+10）
    resume=True          # 启用从上次训练中断处继续训练（自动加载runs/train/exp/weights/last.pt）
)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# 预测输出
import os
import cv2
import torch
from PIL import Image, ImageDraw, ImageFont
from ultralytics import YOLO
import matplotlib.pyplot as plt

# ------------ 全局配置 ------------
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MODEL_PATH = "runs/detect/train/weights/best.pt"
model = YOLO(MODEL_PATH)
INPUT_PATH = "dataset/output/images/test/"  # 输入路径,可以为图片,视频,文件夹,摄像头编号
# INPUT_PATH = "dataset/output/video/test.mp4"  # 输入路径,可以为图片,视频,文件夹,摄像头编号
# INPUT_PATH=0

SAVE = True  # 是否保存预测结果
OUTPUT_PATH = "predict/"  # 预测结果保存路径

# ------------ 工具函数 ------------
def draw_boxes_pil(image, results):
    draw = ImageDraw.Draw(image)
    try:
        font = ImageFont.truetype("arial.ttf", 20)
    except:
        font = ImageFont.load_default()

    for box in results[0].boxes:
        x1, y1, x2, y2 = box.xyxy[0].tolist()
        cls_id = int(box.cls)
        conf = float(box.conf)
        label = f"{model.names[cls_id]} {conf:.2f}"

        text_bbox = font.getbbox(label)
        text_w, text_h = text_bbox[2] - text_bbox[0], text_bbox[3] - text_bbox[1]
        draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
        draw.rectangle([x1, y1 - text_h, x1 + text_w, y1], fill="red")
        draw.text((x1, y1 - text_h), label, fill="white", font=font)

    return image

def save_image(image, save_path, origin_path=None):
    if os.path.isdir(save_path):
        filename = os.path.basename(origin_path)
        save_path = os.path.join(save_path, filename)
    else:
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
    image.save(save_path)
    print(f"✅ 已保存图片: {save_path}")

# ------------ 单图预测 ------------
def predict_image(image_path, save=False, save_path=None):
    image = Image.open(image_path).convert("RGB")
    results = model.predict(image_path, imgsz=640, device=DEVICE)
    image = draw_boxes_pil(image, results)

    plt.imshow(image)
    plt.axis("off")
    plt.title("预测结果")
    plt.show()

    if save and save_path:
        save_image(image, save_path, origin_path=image_path)

# ------------ 视频预测 ------------
def predict_video(video_path, save=False, save_path=None):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("❌ 视频文件无法打开")
        return

    if save:
        if os.path.isdir(save_path):
            filename = os.path.basename(video_path)
            save_path = os.path.join(save_path, f"{os.path.splitext(filename)[0]}.mp4")
        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
        fps, w, h = cap.get(5), int(cap.get(3)), int(cap.get(4))
        out = cv2.VideoWriter(save_path, fourcc, fps, (w, h))

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        results = model.predict(frame, imgsz=640, device=DEVICE)
        for box in results[0].boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            cls_id = int(box.cls)
            conf = float(box.conf)
            label = f"{model.names[cls_id]} {conf:.2f}"
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

        cv2.imshow("预测中 - 按 Q 退出", frame)
        if save:
            out.write(frame)

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    if save:
        out.release()
        print(f"✅ 已保存视频: {save_path}")
    cv2.destroyAllWindows()

# ------------ 文件夹批量图片 ------------
def predict_folder(folder_path, save=False, output_dir=None):
    for root, _, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".tiff")):
                img_path = os.path.join(root, file)
                image = Image.open(img_path).convert("RGB")
                results = model.predict(img_path, imgsz=640, device=DEVICE)
                image = draw_boxes_pil(image, results)

                if save and output_dir:
                    rel_path = os.path.relpath(img_path, folder_path)
                    save_path = os.path.join(output_dir, rel_path)
                    os.makedirs(os.path.dirname(save_path), exist_ok=True)
                    image.save(save_path)

    if save:
        print(f"✅ 文件夹预测完成，结果已保存至: {output_dir}")

# ------------ 摄像头实时预测 ------------
def predict_camera(index=0):
    cap = cv2.VideoCapture(index)
    if not cap.isOpened():
        print(f"❌ 无法打开摄像头 {index}")
        return

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        results = model.predict(frame, imgsz=640, device=DEVICE)
        for box in results[0].boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            cls_id = int(box.cls)
            conf = float(box.conf)
            label = f"{model.names[cls_id]} {conf:.2f}"
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

        cv2.imshow("摄像头预测 - 按 Q 退出", frame)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    cv2.destroyAllWindows()

# ------------ 总入口函数 ------------
def run_predict(path, save=False, save_path=None):
    if isinstance(path, int):
        predict_camera(index=path)
    elif os.path.isfile(path):
        ext = os.path.splitext(path)[1].lower()
        if ext in [".jpg", ".jpeg", ".png", ".bmp", ".tiff"]:
            predict_image(path, save, save_path)
        elif ext in [".mp4", ".avi", ".mov", ".mkv"]:
            predict_video(path, save, save_path)
    elif os.path.isdir(path):
        predict_folder(path, save, save_path)
    else:
        print("❌ 无效路径，请确认输入正确的图片/视频/文件夹/摄像头编号")

# ------------ 示例调用 ------------
run_predict(INPUT_PATH, SAVE, OUTPUT_PATH)      # 预测输出
