In [1]:
from ultralytics import YOLO
import torch
import numpy as np
import pandas as pd
import cv2
from matplotlib import pyplot as plt
from sklearn.model_selection import KFold

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [35]:
def draw_bounding_box(image_path, coordinates):
    # Загрузка изображения
    image = cv2.imread(image_path)

    # Извлечение координат bounding box
    x, y, w, h = coordinates

    # Рисование bounding box на изображении
    color = (0, 255, 0)  # Зеленый цвет (BGR)
    thickness = 2  # Толщина линии
    cv2.rectangle(image, (x, y), (x + w, y + h), color, thickness)
    print(image.shape)
    # Отображение изображения с bounding box
    return image

In [40]:
img = draw_bounding_box('train/5.png', [45, 0, 80, 200])

(201, 406, 3)


In [41]:
cv2.imwrite("5.jpg", img)

True

x y w h

In [56]:
def get_coords(file_name, path):
    output = []
    with open(file_name) as file:
        for line in file:
            labels = line.split(',')
            image = cv2.imread(f"{path}/{labels[0]}.png")
            ih, iw, _ = image.shape
            y = 0
            x = int(labels[2])
            w = 80
            h = 200
            output.append([(x + w/2)/iw, (y + h/2)/ih, w/iw, h/ih, labels[0]])
    return output

In [46]:
labels = get_coords("train.txt", "data/images/train")

In [57]:
test_labels = get_coords("test.txt", "data/images/test")

In [58]:
def save_label(labels, path):
    for label in labels:
        file_name = f"{path}/{label[-1]}.txt"
        with open(file_name, 'w') as f:
            f.write(f"0 {label[0]} {label[1]} {label[2]} {label[3]}")

In [53]:
save_label(labels, 'data/labels/train')

In [59]:
save_label(test_labels, 'data/labels/val')

In [61]:
model = YOLO("yolov8n.pt")  # load a pretrained model (recommended for training)
model.to(device)
model.train(data="config.yaml", epochs=100, batch=4, imgsz=(400,200), amp=False, close_mosaic=0) 

Ultralytics YOLOv8.0.171  Python-3.11.3 torch-2.0.1+cu118 CUDA:0 (NVIDIA GeForce GTX 1650 Ti, 4096MiB)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=config.yaml, epochs=100, patience=50, batch=4, imgsz=(400, 200), save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=None, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=False, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, stream_buffer=False, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torchscript

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x000002316EAC0190>
fitness: 0.7859152177352313
keys: ['metrics/precision(B)', 'metrics/recall(B)', 'metrics/mAP50(B)', 'metrics/mAP50-95(B)']
maps: array([    0.76581])
names: {0: '0'}
plot: False
results_dict: {'metrics/precision(B)': 0.9257593548476772, 'metrics/recall(B)': 0.9619565217391305, 'metrics/mAP50(B)': 0.9668928963917184, 'metrics/mAP50-95(B)': 0.7658065867733995, 'fitness': 0.7859152177352313}
save_dir: WindowsPath('runs/detect/train2')
speed: {'preprocess': 0.11855548190087387, 'inference': 4.501130163055105, 'loss': 0.0, 'postprocess': 0.9175919994865497}

In [4]:
model = YOLO("runs/detect/train2/weights/best.pt")

In [5]:
metrics = model.val(conf=0.5, iou=0.5)

Ultralytics YOLOv8.0.171  Python-3.11.3 torch-2.0.1+cu118 CUDA:0 (NVIDIA GeForce GTX 1650 Ti, 4096MiB)
Model summary (fused): 168 layers, 3005843 parameters, 0 gradients
[34m[1mval: [0mScanning C:\Users\da4nik\Computer Vision\data\labels\val.cache... 184 images, 10 backgrounds, 0 corrupt: 100%|████[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:09
                   all        194        184      0.932      0.962      0.956      0.781
Speed: 0.3ms preprocess, 4.6ms inference, 0.0ms loss, 1.1ms postprocess per image
Results saved to [1mruns\detect\val5[0m


In [13]:
print(f"Precision our yolov8 = {model.metrics.results_dict['metrics/precision(B)']}")
print(f"Recall our yolov8 = {model.metrics.results_dict['metrics/recall(B)']}")
print(f"Mean AP at IoU threshold of 0.5 = {model.metrics.results_dict['metrics/mAP50(B)']}")

Precision our yolov8 = 0.9315789473684211
Recall our yolov8 = 0.9619565217391305
Mean AP at IoU threshold of 0.5 = 0.9555007641647859


K-Fold 

In [4]:
import datetime
import shutil
from pathlib import Path
from collections import Counter
import yaml

In [5]:
dataset_path = Path('./data')
labels = sorted(dataset_path.rglob("labels/train/*.txt"))

In [6]:
yaml_file = 'config.yaml'
with open(yaml_file, 'r', encoding="utf8") as y:
    classes = yaml.safe_load(y)['names']
cls_idx = sorted(classes.keys())

In [7]:
cls_idx

[0]

In [8]:
indx = [l.stem for l in labels]
labels_df = pd.DataFrame([], columns=cls_idx, index=indx)

In [9]:
for label in labels:
    lbl_counter = Counter()

    with open(label,'r') as lf:
        lines = lf.readlines()

    for l in lines:
        # classes for YOLO label uses integer at first position of each line
        lbl_counter[int(l.split(' ')[0])] += 1

    labels_df.loc[label.stem] = lbl_counter

labels_df = labels_df.fillna(0.0) # replace `nan` values with `0.0`

In [10]:
ksplit = 5
kf = KFold(n_splits=ksplit, shuffle=True, random_state=20)
kfolds = list(kf.split(labels_df))

In [11]:
print(f"train -> {len(kfolds[0][0])}")
print(f"test -> {len(kfolds[0][1])}")

train -> 200
test -> 50


In [12]:
folds = [f'split_{n}' for n in range(1, ksplit + 1)]
folds_df = pd.DataFrame(index=indx, columns=folds)

for idx, (train, val) in enumerate(kfolds, start=1):
    folds_df[f'split_{idx}'].loc[labels_df.iloc[train].index] = 'train'
    folds_df[f'split_{idx}'].loc[labels_df.iloc[val].index] = 'val'

In [13]:
folds_df

Unnamed: 0,split_1,split_2,split_3,split_4,split_5
1,val,train,train,train,train
10,val,train,train,train,train
100,train,train,val,train,train
101,train,train,train,train,val
102,train,train,train,val,train
...,...,...,...,...,...
95,val,train,train,train,train
96,train,train,train,val,train
97,val,train,train,train,train
98,train,train,train,train,val


distribution of class labels for each fold

In [14]:
fold_lbl_distrb = pd.DataFrame(index=folds, columns=cls_idx)

for n, (train_indices, val_indices) in enumerate(kfolds, start=1):
    train_totals = labels_df.iloc[train_indices].sum()
    val_totals = labels_df.iloc[val_indices].sum()

    # To avoid division by zero, we add a small value (1E-7) to the denominator
    ratio = val_totals / (train_totals + 1E-7)
    fold_lbl_distrb.loc[f'split_{n}'] = ratio

In [15]:
supported_extensions = ['.jpg', '.jpeg', '.png']

# Initialize an empty list to store image file paths
images = []

# Loop through supported extensions and gather image files
for ext in supported_extensions:
    images.extend(sorted((dataset_path / 'images/train').rglob(f"*{ext}")))

# Create the necessary directories and dataset YAML files (unchanged)
save_path = Path(dataset_path / f'{datetime.date.today().isoformat()}_{ksplit}-Fold_Cross-val')
save_path.mkdir(parents=True, exist_ok=True)
ds_yamls = []

for split in folds_df.columns:
    # Create directories
    split_dir = save_path / split
    split_dir.mkdir(parents=True, exist_ok=True)
    (split_dir / 'train' / 'images').mkdir(parents=True, exist_ok=True)
    (split_dir / 'train' / 'labels').mkdir(parents=True, exist_ok=True)
    (split_dir / 'val' / 'images').mkdir(parents=True, exist_ok=True)
    (split_dir / 'val' / 'labels').mkdir(parents=True, exist_ok=True)

    # Create dataset YAML files
    dataset_yaml = split_dir / f'{split}_dataset.yaml'
    ds_yamls.append(dataset_yaml)

    with open(dataset_yaml, 'w') as ds_y:
        yaml.safe_dump({
            'path': split_dir.as_posix(),
            'train': 'train',
            'val': 'val',
            'names': classes
        }, ds_y)

In [16]:
for image, label in zip(images, labels):
    for split, k_split in folds_df.loc[image.stem].items():
        # Destination directory
        img_to_path = save_path / split / k_split / 'images'
        lbl_to_path = save_path / split / k_split / 'labels'

        # Copy image and label files to new directory (SamefileError if file already exists)
        shutil.copy(image, img_to_path / image.name)
        shutil.copy(label, lbl_to_path / label.name)

In [17]:
weights_path = 'yolov8n.pt'
model2 = YOLO(weights_path, task='detect')

In [18]:
print(ds_yamls)

[WindowsPath('data/2023-11-11_5-Fold_Cross-val/split_1/split_1_dataset.yaml'), WindowsPath('data/2023-11-11_5-Fold_Cross-val/split_2/split_2_dataset.yaml'), WindowsPath('data/2023-11-11_5-Fold_Cross-val/split_3/split_3_dataset.yaml'), WindowsPath('data/2023-11-11_5-Fold_Cross-val/split_4/split_4_dataset.yaml'), WindowsPath('data/2023-11-11_5-Fold_Cross-val/split_5/split_5_dataset.yaml')]


In [None]:
results = {}

# Define your additional arguments here
batch = 4
project = 'kfold_demo'
epochs = 100

for k in range(ksplit):
    dataset_yaml = ds_yamls[k]
    print(dataset_yaml)
    model2.train(data=dataset_yaml,epochs=epochs, batch=batch, project=project, 
                 imgsz=(400,200), amp=False, close_mosaic=0)  # include any train arguments
    results[k] = model2.metrics  # save output metrics for further analysis


Ultralytics YOLOv8.0.171  Python-3.11.3 torch-2.0.1+cu118 CUDA:0 (NVIDIA GeForce GTX 1650 Ti, 4096MiB)


data\2023-11-11_5-Fold_Cross-val\split_1\split_1_dataset.yaml


[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=data\2023-11-11_5-Fold_Cross-val\split_1\split_1_dataset.yaml, epochs=100, patience=50, batch=4, imgsz=(400, 200), save=True, save_period=-1, cache=False, device=None, workers=8, project=kfold_demo, name=None, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=False, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, stream_buffer=False, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torchscript, keras=False, optimize=False, int8=False, dyna