### Step 1: Install Dependencies

### Step 2: Setup and Imports

In [None]:
import os
import shutil
import numpy as np
import cv2
from pathlib import Path
from sklearn.model_selection import train_test_split
import albumentations as A
from ultralytics import YOLO
import yaml

BASE_DIR = Path("/Users/arsknz/osnov/code/sbor_data/lab_7")
ORIGINAL_DATA = BASE_DIR / "2.v1i.yolov11"
SPLIT_DATA = BASE_DIR / "dataset_split"
AUGMENTED_DATA = BASE_DIR / "dataset_augmented"

print(f"Base directory: {BASE_DIR}")
print(f"Original data: {ORIGINAL_DATA}")
print(f"Split data will be saved to: {SPLIT_DATA}")
print(f"Augmented data will be saved to: {AUGMENTED_DATA}")

View Ultralytics Settings with 'yolo settings' or at '/Users/arsknz/Library/Application Support/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Base directory: /Users/arsknz/osnov/code/sbor_data/lab_7
Original data: /Users/arsknz/osnov/code/sbor_data/lab_7/2.v1i.yolov11
Split data will be saved to: /Users/arsknz/osnov/code/sbor_data/lab_7/dataset_split
Augmented data will be saved to: /Users/arsknz/osnov/code/sbor_data/lab_7/dataset_augmented
Base directory: /Users/arsknz/osnov/code/sbor_data/lab_7
Original data: /Users/arsknz/osnov/code/sbor_data/lab_7/2.v1i.yolov11
Split data will be saved to: /Users/arsknz/osnov/code/sbor_data/lab_7/dataset_split
Augmented data will be saved to: /Users/arsknz/osnov/code/sbor_data/lab_7/dataset_augmented


### Step 3: Split Dataset (Train/Val/Test)

In [None]:
def split_dataset(
    source_dir, output_dir, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15
):
    for split in ["train", "val", "test"]:
        (output_dir / split / "images").mkdir(parents=True, exist_ok=True)
        (output_dir / split / "labels").mkdir(parents=True, exist_ok=True)

    images_dir = source_dir / "train" / "images"
    labels_dir = source_dir / "train" / "labels"

    image_files = sorted(
        [f for f in os.listdir(images_dir) if f.endswith((".jpg", ".jpeg", ".png"))]
    )
    print(f"Total images: {len(image_files)}")

    train_val, test = train_test_split(
        image_files, test_size=test_ratio, random_state=42
    )

    train, val = train_test_split(
        train_val, train_size=train_ratio / (train_ratio + val_ratio), random_state=42
    )

    splits = {"train": train, "val": val, "test": test}

    for split, files in splits.items():
        for img_file in files:
            label_file = img_file.rsplit(".", 1)[0] + ".txt"

            src_img = images_dir / img_file
            dst_img = output_dir / split / "images" / img_file
            shutil.copy2(src_img, dst_img)

            src_label = labels_dir / label_file
            if src_label.exists():
                dst_label = output_dir / split / "labels" / label_file
                shutil.copy2(src_label, dst_label)

        print(f"{split.upper()}: {len(files)} images")

    return splits


if SPLIT_DATA.exists():
    shutil.rmtree(SPLIT_DATA)

splits = split_dataset(ORIGINAL_DATA, SPLIT_DATA)
print("‚úì Dataset split completed")

Total images: 500
TRAIN: 350 images
VAL: 75 images
TEST: 75 images
‚úì Dataset split completed
TRAIN: 350 images
VAL: 75 images
TEST: 75 images
‚úì Dataset split completed


### Step 4: Create data.yaml for YOLOv11

In [None]:
data_yaml = {
    "path": str(SPLIT_DATA),
    "train": "train/images",
    "val": "val/images",
    "test": "test/images",
    "nc": 1,
    "names": ["fruit"],
}

yaml_path = SPLIT_DATA / "data.yaml"
with open(yaml_path, "w") as f:
    yaml.dump(data_yaml, f, default_flow_style=False)

print(f"‚úì Created data.yaml at {yaml_path}")
print(f"\nData configuration:")
print(yaml.dump(data_yaml))

‚úì Created data.yaml at /Users/arsknz/osnov/code/sbor_data/lab_7/dataset_split/data.yaml

Data configuration:
names:
- fruit
nc: 1
path: /Users/arsknz/osnov/code/sbor_data/lab_7/dataset_split
test: test/images
train: train/images
val: val/images



### Step 5: Train YOLOv11 on Original Dataset

In [None]:
model = YOLO("yolo11n.pt")

print("Training YOLOv11 on original dataset...")
results = model.train(
    data=str(yaml_path),
    epochs=50,
    imgsz=640,
    batch=16,
    patience=10,
    device=0,
    project=str(BASE_DIR / "runs"),
    name="yolov11_original",
    save=True,
    verbose=True,
)

print("‚úì Training on original dataset completed")

Training YOLOv11 on original dataset...
Ultralytics 8.3.235 üöÄ Python-3.10.18 torch-2.8.0 CPU (Apple M3)
Ultralytics 8.3.235 üöÄ Python-3.10.18 torch-2.8.0 CPU (Apple M3)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/Users/arsknz/osnov/code/sbor_data/lab_7/dataset_split/data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name

KeyboardInterrupt: 

### Step 6: Evaluate on Test Set

In [None]:
best_model = YOLO(
    BASE_DIR / "runs" / "detect" / "yolov11_original" / "weights" / "best.pt"
)

print("Testing on test set...")
test_results = best_model.val(
    data=str(yaml_path), split="test", imgsz=640, batch=16, device=0
)

print("\n‚úì Test results:")
print(f"mAP50: {test_results.results_dict['metrics/mAP50']:.4f}")

### Step 7: Data Augmentation using Albumentations

In [None]:
def augment_dataset(source_dir, output_dir, augmentation_factor=2):
    for split in ["train", "val", "test"]:
        (output_dir / split / "images").mkdir(parents=True, exist_ok=True)
        (output_dir / split / "labels").mkdir(parents=True, exist_ok=True)

    for split in ["val", "test"]:
        src_images = source_dir / split / "images"
        src_labels = source_dir / split / "labels"
        dst_images = output_dir / split / "images"
        dst_labels = output_dir / split / "labels"

        for file in src_images.glob("*"):
            shutil.copy2(file, dst_images / file.name)
        for file in src_labels.glob("*"):
            shutil.copy2(file, dst_labels / file.name)

        print(f"Copied {split} set: {len(list(src_images.glob('*')))} images")

    transform = A.Compose(
        [
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.3),
            A.Rotate(limit=30, p=0.7),
            A.RandomBrightnessContrast(p=0.5),
            A.GaussNoise(p=0.3),
            A.Blur(blur_limit=3, p=0.3),
            A.Affine(shear=(-10, 10), p=0.3),
            A.RandomRain(p=0.1),
            A.RandomFog(p=0.1),
        ],
        bbox_params=A.BboxParams(format="yolo", min_visibility=0.3),
    )

    src_train_images = source_dir / "train" / "images"
    src_train_labels = source_dir / "train" / "labels"
    dst_train_images = output_dir / "train" / "images"
    dst_train_labels = output_dir / "train" / "labels"

    image_files = list(src_train_images.glob("*"))
    print(f"\nAugmenting {len(image_files)} training images...")

    counter = 0
    for img_idx, img_path in enumerate(image_files, 1):
        image = cv2.imread(str(img_path))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        label_path = src_train_labels / (img_path.stem + ".txt")
        bboxes = []

        if label_path.exists():
            with open(label_path, "r") as f:
                for line in f:
                    parts = line.strip().split()
                    if len(parts) >= 5:
                        bboxes.append([float(p) for p in parts[:5]])

        cv2.imwrite(
            str(dst_train_images / img_path.name),
            cv2.cvtColor(image, cv2.COLOR_RGB2BGR),
        )
        shutil.copy2(label_path, dst_train_labels / label_path.name)
        counter += 1

        for aug_idx in range(augmentation_factor):
            if len(bboxes) > 0:
                transformed = transform(image=image, bboxes=bboxes)
                aug_image = transformed["image"]
                aug_bboxes = transformed["bboxes"]
            else:
                simple_transform = A.Compose(
                    [
                        A.HorizontalFlip(p=0.5),
                        A.Rotate(limit=20, p=0.7),
                        A.RandomBrightnessContrast(p=0.5),
                    ]
                )
                aug_image = simple_transform(image=image)["image"]
                aug_bboxes = bboxes

            aug_img_name = f"{img_path.stem}_aug_{aug_idx}{img_path.suffix}"
            cv2.imwrite(
                str(dst_train_images / aug_img_name),
                cv2.cvtColor(aug_image, cv2.COLOR_RGB2BGR),
            )

            aug_label_name = f"{img_path.stem}_aug_{aug_idx}.txt"
            with open(dst_train_labels / aug_label_name, "w") as f:
                for bbox in aug_bboxes:
                    f.write(" ".join(str(x) for x in bbox) + "\n")

            counter += 1

        if (img_idx) % 50 == 0:
            print(f"  Processed {img_idx}/{len(image_files)} images")

    return counter


if AUGMENTED_DATA.exists():
    shutil.rmtree(AUGMENTED_DATA)

aug_count = augment_dataset(SPLIT_DATA, AUGMENTED_DATA, augmentation_factor=2)

### Step 8: Create data.yaml for Augmented Dataset

In [None]:
data_yaml_aug = {
    "path": str(AUGMENTED_DATA),
    "train": "train/images",
    "val": "val/images",
    "test": "test/images",
    "nc": 1,
    "names": ["fruit"],
}

yaml_path_aug = AUGMENTED_DATA / "data.yaml"
with open(yaml_path_aug, "w") as f:
    yaml.dump(data_yaml_aug, f, default_flow_style=False)

### Step 9: Train YOLOv11 on Augmented Dataset

In [None]:
model_aug = YOLO("yolov11n.pt")

print("Training YOLOv11 on augmented dataset...")
results_aug = model_aug.train(
    data=str(yaml_path_aug),
    epochs=50,
    imgsz=640,
    batch=16,
    patience=10,
    device=0,
    project=str(BASE_DIR / "runs"),
    name="yolov11_augmented",
    save=True,
    verbose=True,
)

print("‚úì Training on augmented dataset completed")

### Step 10: Compare Results

In [None]:
best_model_original = YOLO(
    BASE_DIR / "runs" / "detect" / "yolov11_original" / "weights" / "best.pt"
)
best_model_augmented = YOLO(
    BASE_DIR / "runs" / "detect" / "yolov11_augmented" / "weights" / "best.pt"
)

print("Testing models on test set...")

print("\n--- Original Model ---")
test_results_original = best_model_original.val(
    data=str(yaml_path), split="test", imgsz=640, batch=16, device=0
)

print("\n--- Augmented Model ---")
test_results_augmented = best_model_augmented.val(
    data=str(yaml_path_aug), split="test", imgsz=640, batch=16, device=0
)

print("\n" + "=" * 50)
print("COMPARISON: Original vs Augmented")
print("=" * 50)

original_map = test_results_original.results_dict.get("metrics/mAP50", 0)
augmented_map = test_results_augmented.results_dict.get("metrics/mAP50", 0)

print(f"\nOriginal Model mAP50:   {original_map:.4f}")
print(f"Augmented Model mAP50:  {augmented_map:.4f}")
print(
    f"\nImprovement: {(augmented_map - original_map):.4f} ({((augmented_map - original_map) / original_map * 100):.2f}%)"
)

if augmented_map > original_map:
    print("\n‚úì Augmentation improved model performance!")
else:
    print("\n‚ö† Augmented model performed similarly or slightly lower")

### Step 11: Summary and Model Export

In [None]:
print("\n" + "=" * 60)
print("TRAINING PIPELINE COMPLETED")
print("=" * 60)

print(f"\nDataset Structure:")
print(f"  Original: {ORIGINAL_DATA}")
print(f"  Split: {SPLIT_DATA}")
print(f"  Augmented: {AUGMENTED_DATA}")

print(f"\nTraining Results:")
print(f"  Model 1 (Original): runs/detect/yolov11_original")
print(f"  Model 2 (Augmented): runs/detect/yolov11_augmented")

print(f"\nBest Models:")
print(
    f"  Original: {BASE_DIR / 'runs' / 'detect' / 'yolov11_original' / 'weights' / 'best.pt'}"
)
print(
    f"  Augmented: {BASE_DIR / 'runs' / 'detect' / 'yolov11_augmented' / 'weights' / 'best.pt'}"
)

print("\nExporting best augmented model...")
best_model_augmented.export(format="onnx")
best_model_augmented.export(format="torchscript")
print("‚úì Models exported to ONNX and TorchScript formats")