验证环境

In [None]:
from ultralytics import YOLO
# 加载预训练的 YOLOv11n 模型
model_path = '/home/szh/work/Weed_Detection/ultralytics-main/yolo11n.pt'
model = YOLO(model_path)
source = '/home/szh/work/Weed_Detection/cat.jpg' #更改为自己的图片路径
# 运行推理，并附加参数
model.predict(source, save=True)

数据集格式转yolo格式

In [None]:
import os
import json
import math

# 定义目标label的类ID
LABELS = {
    "weed": 0,  # 其他所有的 label 统一为 "weed"，class_id 为 0
    "mq": 1     # mq 对应的 class_id 为 1
}

def convert_to_yolo_format(points, img_width, img_height):
    # YOLO格式: class_id center_x center_y width height
    # points 是一个包含两个坐标 [(x1, y1), (x2, y2)]，表示的是矩形的两个点
    x_1, y_1 = points[0]
    x_2, y_2 = points[1]

    r = math.sqrt((x_2 - x_1) ** 2 + (y_2 - y_1) ** 2)
    # 外接正方形的左上角和右下角
    x1 = x_1 - r
    y1 = y_1 - r
    
    x2 = x_1 + r
    y2 = y_1 + r


    # 将矩形的两个点转换为边界框的中心点和宽高
    center_x = (x1 + x2) / 2.0 / img_width
    center_y = (y1 + y2) / 2.0 / img_height

    width = (2*r) / img_width
    height = (2*r) / img_height

    return center_x, center_y, width, height

def process_json_label(json_file, output_dir):
    with open(json_file, 'r', encoding='utf-8') as f:
        data = json.load(f)

    img_width = data['imageWidth']
    img_height = data['imageHeight']
    
    # 获取图像的文件名 (无后缀)
    image_filename = os.path.splitext(os.path.basename(data['imagePath']))[0]

    # 输出的YOLO格式标签文件路径
    yolo_txt_path = os.path.join(output_dir, f"{image_filename}.txt")
    
    # 打开文件写入YOLO格式的标签
    with open(yolo_txt_path, 'w') as yolo_file:
        for shape in data['shapes']:
            label = shape['label']
            
            # 只检测 "mq"，其余的都统一成 "weed"
            if label == 'mq':
                class_id = LABELS['mq']
            else:
                class_id = LABELS['weed']
            
            # 解析 points，转换成 YOLO 格式
            points = shape['points']
            center_x, center_y, width, height = convert_to_yolo_format(points, img_width, img_height)
            
            # 写入文件，格式: class_id center_x center_y width height
            yolo_file.write(f"{class_id} {center_x} {center_y} {width} {height}\n")

def convert_dataset(json_dir, output_dir):
    # 创建输出目录
    os.makedirs(output_dir, exist_ok=True)

    # 遍历所有的json文件
    for filename in os.listdir(json_dir):
        if filename.endswith('.json'):
            json_file = os.path.join(json_dir, filename)
            process_json_label(json_file, output_dir)

if __name__ == "__main__":
    # 输入的JSON标注文件目录
    json_dir = "/home/szh/work/Weed_Detection/train/labels"
    # 输出的YOLO格式标签的目录
    output_dir = "/home/szh/work/Weed_Detection/train/labels_process"
    
    convert_dataset(json_dir, output_dir)

模型训练--5折交叉验证

In [None]:
import argparse
import datetime
from itertools import chain
import os
from pathlib import Path
import shutil
import yaml
import pandas as pd
from collections import Counter
from sklearn.model_selection import KFold
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
from ultralytics import YOLO

NUM_THREADS = min(8, max(1, os.cpu_count() - 1))

def parse_opt():
    parser = argparse.ArgumentParser()

    parser.add_argument('--data', default='/home/szh/work/Weed_Detection/train')  # 数据集路径
    parser.add_argument('--ksplit', default=5, type=int)  # K-Fold交叉验证拆分数据集
    parser.add_argument('--im_suffixes', default=['jpg', 'png', 'jpeg'], help='images suffix')  # 图片后缀名

    return parser.parse_args()

def run(func, this_iter, desc="Processing"):
    with ThreadPoolExecutor(max_workers=NUM_THREADS, thread_name_prefix='MyThread') as executor:
        results = list(
            tqdm(executor.map(func, this_iter), total=len(this_iter), desc=desc)
        )
    return results

def main(opt):
    dataset_path, ksplit, im_suffixes = Path(opt.data), opt.ksplit, opt.im_suffixes

    save_path = Path(dataset_path / f'{datetime.date.today().isoformat()}_{ksplit}-Fold_Cross-Valid')
    save_path.mkdir(parents=True, exist_ok=True)

    # 获取所有图像和标签文件的列表
    images = sorted(list(chain(*[(dataset_path / "images").rglob(f'*.{ext}') for ext in im_suffixes])))
    # images = sorted(image_files)
    labels = sorted((dataset_path / "labels").rglob("*.txt"))

    root_directory = Path.cwd()
    print("当前文件运行根目录:", root_directory)
    if len(images) != len(labels):
        print('*' * 20)
        print('当前数据集和标签数量不一致！！！')
        print('*' * 20)

    # 从YAML文件加载类名
    yaml_file = '/home/szh/work/Weed_Detection/ultralytics-main/classes.yaml'
    with open(yaml_file, 'r', encoding="utf8") as y:
        classes = yaml.safe_load(y)['names']
    cls_idx = sorted(classes.keys())

    # classes_file = sorted(dataset_path.rglob('classes.yaml'))[0]
    # assert classes_file.exists(), "请创建classes.yaml类别文件"
    # if classes_file.suffix == ".txt":
    #     pass
    # elif classes_file.suffix == ".yaml":
    #     with open(classes_file, 'r', encoding="utf8") as f:
    #         classes = yaml.safe_load(f)['names']
    # cls_idx = sorted(classes.keys())

    # 创建DataFrame来存储每张图像的标签计数
    indx = [l.stem for l in labels]  # 使用基本文件名作为ID（无扩展名）
    labels_df = pd.DataFrame([], columns=cls_idx, index=indx)

    # 计算每张图像的标签计数
    for label in labels:
        lbl_counter = Counter()
        with open(label, 'r') as lf:
            lines = lf.readlines()
        for l in lines:
            # YOLO标签使用每行的第一个位置的整数作为类别
            lbl_counter[int(l.split(' ')[0])] += 1
        labels_df.loc[label.stem] = lbl_counter

    # 用0.0替换NaN值
    labels_df = labels_df.fillna(0.0)

    kf = KFold(n_splits=ksplit, shuffle=True, random_state=20)  # 设置random_state以获得可重复的结果
    kfolds = list(kf.split(labels_df))
    folds = [f'split_{n}' for n in range(1, ksplit + 1)]
    folds_df = pd.DataFrame(index=indx, columns=folds)

    # 为每个折叠分配图像到训练集或验证集
    for idx, (train, val) in enumerate(kfolds, start=1):
        folds_df[f'split_{idx}'].loc[labels_df.iloc[train].index] = 'train'
        folds_df[f'split_{idx}'].loc[labels_df.iloc[val].index] = 'val'

    # 计算每个折叠的标签分布比例
    fold_lbl_distrb = pd.DataFrame(index=folds, columns=cls_idx)
    for n, (train_indices, val_indices) in enumerate(kfolds, start=1):
        train_totals = labels_df.iloc[train_indices].sum()
        val_totals = labels_df.iloc[val_indices].sum()

        # 为避免分母为零，向分母添加一个小值（1E-7）
        ratio = val_totals / (train_totals + 1E-7)
        fold_lbl_distrb.loc[f'split_{n}'] = ratio

    ds_yamls = []

    for split in folds_df.columns:
        split_dir = save_path / split
        split_dir.mkdir(parents=True, exist_ok=True)
        (split_dir / 'train' / 'images').mkdir(parents=True, exist_ok=True)
        (split_dir / 'train' / 'labels').mkdir(parents=True, exist_ok=True)
        (split_dir / 'val' / 'images').mkdir(parents=True, exist_ok=True)
        (split_dir / 'val' / 'labels').mkdir(parents=True, exist_ok=True)

        dataset_yaml = split_dir / f'{split}_dataset.yaml'
        ds_yamls.append(dataset_yaml.as_posix())
        split_dir = (root_directory / split_dir).as_posix()

        with open(dataset_yaml, 'w') as ds_y:
            yaml.safe_dump({
                'train': split_dir + '/train/images',
                'val': split_dir + '/val/images',
                'names': classes
            }, ds_y)
    # print(ds_yamls)
    with open(dataset_path / 'yaml_paths.txt', 'w') as f:
        for path in ds_yamls:
            f.write(path + '\n')

    args_list = [(image, save_path, folds_df) for image in images]

    run(split_images_labels, args_list, desc=f"Creating dataset")

def split_images_labels(args):
    image, save_path, folds_df = args
    label = image.parents[1] / 'labels' / f'{image.stem}.txt'
    if label.exists():
        for split, k_split in folds_df.loc[image.stem].items():
            # 目标目录
            img_to_path = save_path / split / k_split / 'images'
            lbl_to_path = save_path / split / k_split / 'labels'
            shutil.copy(image, img_to_path / image.name)
            shutil.copy(label, lbl_to_path / label.name)


if __name__ == "__main__":
    opt = parse_opt()
    main(opt)

    model = YOLO('ultralytics/cfg/models/11/yolo11s.yaml')  # 从YAML建立一个新模型
    model_path = '/home/szh/work/Weed_Detection/ultralytics-main/yolo11s.pt'
    model.load(model_path)
    
    # 从文本文件中加载内容并存储到一个列表中
    ds_yamls = []
    with open(Path(opt.data) / 'yaml_paths.txt', 'r') as f:
        for line in f:
            # 去除每行末尾的换行符
            line = line.strip()
            ds_yamls.append(line)

    # 打印加载的文件路径列表
    print(ds_yamls)

    for k in range(opt.ksplit):
        dataset_yaml = ds_yamls[k]
        name = Path(dataset_yaml).stem
        model.train(data=dataset_yaml,name=name,epochs=100,patience=15,imgsz=640, device=[0,1], optimizer='SGD', workers=8, batch=64, amp=False, iou = 0.5,cos_lr= True)

    print("*"*40)
    print("K-Fold Cross Validation Completed.")
    print("*"*40)



模型训练结果绘图

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# 读取 CSV 文件的数据
# 这里假设 result.csv 文件存放在你的工作目录下，或者提供文件的绝对路径
file_path = '/home/szh/work/Weed_Detection/ultralytics-main/runs/detect/split_1_dataset/results.csv'

# 使用 pandas 读取 CSV 文件
df = pd.read_csv(file_path)

# 绘制训练和验证损失
plt.figure(figsize=(12, 8))

# 创建一个图，绘制训练和验证损
plt.subplot(2, 2, 1)
plt.plot(df['epoch'], df['train/box_loss'], label='train/box_loss', color='blue')
plt.plot(df['epoch'], df['val/box_loss'], label='val/box_loss', color='red')
plt.xlabel('Epoch')
plt.ylabel('Box Loss')
plt.title('Box Loss')
plt.legend()

plt.subplot(2, 2, 2)
plt.plot(df['epoch'], df['train/cls_loss'], label='train/cls_loss', color='blue')
plt.plot(df['epoch'], df['val/cls_loss'], label='val/cls_loss', color='red')
plt.xlabel('Epoch')
plt.ylabel('Class Loss')
plt.title('Class Loss')
plt.legend()

plt.subplot(2, 2, 3)
plt.plot(df['epoch'], df['train/dfl_loss'], label='train/dfl_loss', color='blue')
plt.plot(df['epoch'], df['val/dfl_loss'], label='val/dfl_loss', color='red')
plt.xlabel('Epoch')
plt.ylabel('DFL Loss')
plt.title('DFL Loss')
plt.legend()

# 绘制其他指标（Precision, Recall, mAP50等）
plt.subplot(2, 2, 4)
plt.plot(df['epoch'], df['metrics/precision(B)'], label='Precision', color='green')
plt.plot(df['epoch'], df['metrics/recall(B)'], label='Recall', color='orange')
plt.plot(df['epoch'], df['metrics/mAP50(B)'], label='mAP50', color='purple')
plt.plot(df['epoch'], df['metrics/mAP50-95(B)'], label='mAP50-95', color='brown')
plt.xlabel('Epoch')
plt.ylabel('Metric Value')
plt.title('Precision, Recall, mAP')
plt.legend()

# 调整布局
plt.tight_layout()

# 显示图形
plt.show()


测试集测试

In [None]:
import pandas as pd
import os
from ultralytics import YOLO
import pdb
import torch

# os.environ["CUDA_VISIBLE_DEVICES"] = "3"
# torch.cuda.set_device(3)

# 加载训练好的模型
best_model_path = '/home/szh/work/Weed_Detection/ultralytics-main/runs/detect/split_4_dataset/weights/best.pt'
model = YOLO(best_model_path)

# 测试图像目录
source_dir = '/home/szh/work/Weed_Detection/test/images'  # 修改为自己的图片路径

# 获取目录下所有图像文件
image_files = [f for f in os.listdir(source_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
print(len(image_files))

# 准备保存的列表
output_data = []

# 遍历每个图像文件并进行推理
j=0
for image_file in image_files:
    # 提取文件名作为 image_id（去掉扩展名）
    image_id = os.path.splitext(image_file)[0]
    print("image_id = ",image_id)

    # 生成图像的完整路径
    image_path = os.path.join(source_dir, image_file)
    
    # 运行推理
    results = model.predict(image_path)
    
    # 遍历推理结果
    for result in results:
        boxes = result.boxes  # 获取预测到的所有框
        for i, box in enumerate(boxes):
            class_id = int(box.cls.item())  # 类别: 0 为 'weed', 1 为 'mq'
            
            # 获取 xyxy 坐标
            xyxy = box.xyxy[0].cpu().numpy()  # 将 Tensor 转换为 NumPy 数组
            
            # 提取坐标
            x_min = int(xyxy[0])  # 左上角 X 坐标
            y_min = int(xyxy[1])  # 左上角 Y 坐标
            x_max = int(xyxy[2])  # 右下角 X 坐标
            y_max = int(xyxy[3])  # 右下角 Y 坐标
            
            # 计算宽度和高度
            width = x_max - x_min
            height = y_max - y_min
            
            # 添加到输出数据
            j=j+1
            output_data.append([j, image_id, class_id, x_min, y_min, width, height])
            

# 将数据保存为 CSV 格式
df = pd.DataFrame(output_data, columns=['ID', 'image_id', 'class_id', 'x_min', 'y_min', 'width', 'height'])
df.to_csv('/home/szh/work/Weed_Detection/test/output_predictions_split_4.csv', index=False)

print("预测结果已保存到 '/home/szh/work/Weed_Detection/test/output_predictions_split_4.csv'")