In [None]:
import os
import yaml
import json
import torch_directml
import shutil
import numpy as np
import pandas as pd
import ultralytics
from ultralytics import YOLO
from sklearn.model_selection import train_test_split

print("Torch DirectML:", torch_directml.is_available())
print("GPU name:", torch_directml.device_name(0))

In [None]:
input_dir = 'annotated_outdoor_yolo'
output_dir = 'dataset_yolo'

os.makedirs(os.path.join(output_dir), exist_ok=True)

images_dir = os.path.join(input_dir, 'images')
labels_dir = os.path.join(input_dir, 'labels')

## Setup

In [None]:
# Get the list of image files (assuming all formats are .jpg, change if needed)
image_files = sorted([f for f in os.listdir(images_dir) if f.endswith(".jpg")])

# Create dataset indices
dataset_size = len(image_files)
indices = list(range(dataset_size))

# Split into train (70%), val (10%), test (20%)
train_indices, test_indices = train_test_split(indices, test_size=0.2, random_state=199742069)
train_indices, val_indices = train_test_split(train_indices, test_size=0.125, random_state=199742069)

# Store indices in a dictionary
split_data = {
    "train": train_indices,
    "val": val_indices,
    "test": test_indices
}

print(f"Train set size: {len(train_indices)} - {len(train_indices)/len(indices)*100:.2f}%")
print(f"Validation set size: {len(val_indices)} - {len(val_indices)/len(indices)*100:.2f}%")
print(f"Test set size: {len(test_indices)} - {len(test_indices)/len(indices)*100:.2f}%\n")

# Save the split indices as JSON
json_split_path = os.path.join(output_dir, "dataset_split.json")
with open(json_split_path, "w") as f:
    json.dump(split_data, f, indent=4)

print(f"Dataset split saved to {json_split_path}")

In [None]:
# List and sort both image and label files
image_files = sorted([f for f in os.listdir(images_dir) if f.endswith('.jpg')])
label_files = sorted([f for f in os.listdir(labels_dir) if f.endswith('.txt')])

# Sanity check
assert len(image_files) == len(label_files), "Image and label counts do not match!"

# Define dataset splits
splits = ["train", "val", "test"]

# Create output directories
for split in splits:
    for category in ["images", "labels"]:
        os.makedirs(os.path.join(output_dir, split, category), exist_ok=True)

# Loop over your splits
for split, indices in split_data.items():
    for idx in indices:
        image_name = image_files[idx]
        label_name = label_files[idx]

        image_src = os.path.join(images_dir, image_name)
        label_src = os.path.join(labels_dir, label_name)

        image_dest = os.path.join(output_dir, split, "images", image_name)
        label_dest = os.path.join(output_dir, split, "labels", label_name)

        shutil.copy(image_src, image_dest)
        shutil.copy(label_src, label_dest)

# Print final counts for verification
print("\nCopy complete! Summary:\n")
for split in split_data.keys():
    image_folder = os.path.join(output_dir, split, "images")
    label_folder = os.path.join(output_dir, split, "labels")

    num_images = len([f for f in os.listdir(image_folder) if f.endswith('.jpg')]) if os.path.exists(image_folder) else 0
    num_labels = len([f for f in os.listdir(label_folder) if f.endswith('.txt')]) if os.path.exists(label_folder) else 0

    print(f"Split '{split}': {num_images} images, {num_labels} labels.")

In [None]:
# Create a .yaml file for the YOLO trainer configuration
dataset_yaml_path = os.path.join(output_dir, 'dataset.yaml')
classes_path = os.path.join(input_dir, 'classes.txt')

with open(classes_path, 'r') as f:
    names = [line.strip() for line in f if line.strip()]

print(names)

# Construct .yaml file to include dataset paths
data_yaml = {
    "names": names,
    "nc": len(names),
    "train": os.path.abspath(os.path.join(output_dir, "train", "images")),
    "val": os.path.abspath(os.path.join(output_dir, "val", "images")),
    "test": os.path.abspath(os.path.join(output_dir, "test", "images"))
}

# Save to .yaml file
with open(dataset_yaml_path, "w") as f:
    yaml.dump(data_yaml, f, default_flow_style=False)

print(f"YAML file saved at {dataset_yaml_path}")


## Model Training

In [None]:
model = YOLO('yolov8n.pt')

In [None]:
# Set up device as GPU, here using intel ARC so need torch_directml instead of torch (which has CUDA only for Nvidia's GPUs)
import torch_directml

# Create DirectML device
dml_device = torch_directml.device()

# Set device globally in YOLO before training (so it doesn't try to select CUDA)
model.to(dml_device)  # <-- manually move your model to DirectML device

In [None]:
# Define parameters for training the YOLO model
yolo_params = {
    'image_size' : 640,
    'batch_size' : 16,
    'epochs' : 100
}

dataset_yaml_path = os.path.join(output_dir, 'dataset.yaml')

# Train the model
results = model.train(
            data=dataset_yaml_path,
            imgsz=yolo_params['image_size'],
            epochs=yolo_params['epochs'],
            batch=yolo_params['batch_size'],
            name=f'yolov8n_outdoor_train',
            project=os.path.join(output_dir, 'runs'),
            device=0,
            patience=0 
        )

## Test

In [None]:
best_model_path = os.path.join(output_dir,'runs','yolov8n_outdoor_train','weights','best.pt')
best_model = YOLO(best_model_path)

In [None]:
test_results = best_model.val(data=dataset_yaml_path, split='test', name=f'test')
print(f"mAP@50 test results: {test_results.box.map50}")

## Export as ONNX

In [None]:
# Export trained model to .onnx to later convert to .blob using BlobConverter
model.export(format='onnx', dynamic=True, simplify=True)