In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
import shutil
import cv2
import os
from tqdm import tqdm

In [2]:
%pip install ultralytics
import ultralytics
ultralytics.checks()

Ultralytics YOLOv8.0.208 🚀 Python-3.10.12 torch-2.0.0 CUDA:0 (Tesla P100-PCIE-16GB, 16281MiB)
Setup complete ✅ (4 CPUs, 31.4 GB RAM, 5122.2/8062.4 GB disk)


In [3]:
from ultralytics import YOLO

In [4]:
import os
import os.path as pt
import random
import shutil

import pandas as pd
import yaml
from tqdm import tqdm


def exists(path):
    if not os.path.exists(path):
        os.makedirs(path)


def move_image(src_folder, dest_folder, image_name):
    # 源文件的路径
    src_path = os.path.join(src_folder, image_name)

    # 目标文件的路径
    dest_path = os.path.join(dest_folder, image_name)

    # 移动文件
    shutil.copy(src_path, dest_path)


def progress(list_, img_target, label_target, labels, data_path):
    yy = tqdm(list_)
    for csv_filename in yy:
        df = pd.read_csv(os.path.join(data_path, csv_filename))
        csv_name = csv_filename.split('.')[0]
        txt_file_name = csv_name + '.txt'
        img_file_name = csv_name + '.jpg'

        for i in df.values:
            # i-->[file_name, w, h, label, x1, y1, x2, y2]
            txt_name, w, h, label, x1, y1, x2, y2 = i
            if label not in ["JAS39", "B52", "F14", "Tornado", "E2", "Mirage2000", "B2", "J20", "F4"]:
                pass
            else:
                move_image(data_path, img_target, img_file_name)
                if label not in labels:
                    labels.append(label)
                label = labels.index(label)

                x_ = (x1 + x2) / (2 * w)
                y_ = (y1 + y2) / (2 * h)
                w_ = (x2 - x1) / w
                h_ = (y2 - y1) / h
                with open(pt.join(label_target, txt_file_name), 'a') as f:
                    f.write(f"{label} {x_} {y_} {w_} {h_}\n")


def generate_yaml(train_path, val_path, names, nc, base):
    data = {
        "train": train_path,
        "val": val_path,
        "names": names,
        "nc": nc
    }

    with open(pt.join(base, 'mydata.yaml'), 'w') as outfile:
        yaml.dump(data, outfile, default_flow_style=False)


def main(save_path, source_path, scale):
    # 创建保存txt的路径
    base = save_path
    img_path = pt.join(base, 'images')
    label_path = pt.join(base, 'labels')
    img_path_train = pt.join(img_path, 'train')
    img_path_val = pt.join(img_path, 'val')
    label_path_train = pt.join(label_path, 'train')
    label_path_val = pt.join(label_path, 'val')
    exists(img_path_train)
    exists(img_path_val)
    exists(label_path_train)
    exists(label_path_val)

    data_path = source_path
    filenames = os.listdir(data_path)
    csv_filenames = [filename for filename in filenames if filename.endswith('.csv')]

    # 按比例将csv文件分成train和val
    labels1 = {}
    for csv_name in csv_filenames:
        df = pd.read_csv(os.path.join(data_path, csv_name))
        for i in df.values:
            txt_name, w, h, label, x1, y1, x2, y2 = i
            if label in labels1:
                labels1[label].append(csv_name)
            else:
                labels1[label] = [csv_name]
            break

    train_files = []
    valid_files = []
    for i in labels1.values():
        random.shuffle(i)
        num_train = int(len(i) * scale)

        train_files.extend(i[:num_train])
        valid_files.extend(i[num_train:])

    Alabels = []
    print(len(train_files) / len(valid_files))

    progress(train_files, img_path_train, label_path_train, Alabels, data_path)
    progress(valid_files, img_path_val, label_path_val, Alabels, data_path)

    names = {i: name for i, name in enumerate(Alabels)}
    nc = len(Alabels)
    generate_yaml(img_path_train, img_path_val, names, nc, base)


if __name__ == "__main__":
    main(save_path='/kaggle/working/data',
         source_path='/kaggle/input/militaryaircraftdetectiondataset/dataset',
         scale=0.9
         )

8.855325914149443


100%|██████████| 11140/11140 [01:15<00:00, 148.38it/s]
100%|██████████| 1258/1258 [00:07<00:00, 161.40it/s]


In [5]:
model = YOLO('yolov8n.pt')

results = model.train(data='/kaggle/working/data/mydata.yaml', epochs=15, imgsz=640, lrf=0.1)

Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt to 'yolov8n.pt'...
100%|██████████| 6.23M/6.23M [00:00<00:00, 175MB/s]
Ultralytics YOLOv8.0.208 🚀 Python-3.10.12 torch-2.0.0 CUDA:0 (Tesla P100-PCIE-16GB, 16281MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=/kaggle/working/data/mydata.yaml, epochs=15, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_s