In [None]:
from IPython.display import clear_output
!pip install ultralytics
clear_output()

In [None]:
import os
import shutil
import random
from glob import glob
from ultralytics import YOLO
import yaml

In [None]:
# === Input Dataset Path ===
base_dataset_path = (
    "/kaggle/input/yolo-dataset/dataset"  # contains 'images/' and 'labels/'
)

# === Output Path ===
output_path = "yolo-split-dataset"
splits = ["train", "val", "test"]

# === Create Folder Structure ===
for split in splits:
    os.makedirs(os.path.join(output_path, "images", split), exist_ok=True)
    os.makedirs(os.path.join(output_path, "labels", split), exist_ok=True)

In [None]:
# Get all image files in 'images' folder
image_paths = glob(os.path.join(base_dataset_path, "images", "*.*"))

# Filter images that have corresponding label files
filtered_image_paths = []

for img_path in image_paths:
    file_stem = os.path.splitext(os.path.basename(img_path))[0]
    label_path = os.path.join(base_dataset_path, "labels", f"{file_stem}.txt")
    if os.path.exists(label_path):
        filtered_image_paths.append(img_path)

image_paths = filtered_image_paths

# Shuffle dataset for randomness
random.shuffle(image_paths)

# Compute split sizes
total = len(image_paths)
train_size = int(total * 0.7)
val_size = int(total * 0.15)

train_files = image_paths[:train_size]
val_files = image_paths[train_size : train_size + val_size]
test_files = image_paths[train_size + val_size :]

print(f"Total images: {total}")
print(f"Train: {len(train_files)}, Val: {len(val_files)}, Test: {len(test_files)}")

In [None]:
def copy_files(file_list, split_name):
    for img_path in file_list:
        file_stem = os.path.splitext(os.path.basename(img_path))[0]
        label_path = os.path.join(base_dataset_path, "labels", f"{file_stem}.txt")

        dst_img = os.path.join(
            output_path, "images", split_name, os.path.basename(img_path)
        )
        dst_lbl = os.path.join(output_path, "labels", split_name, f"{file_stem}.txt")

        shutil.copy2(img_path, dst_img)
        shutil.copy2(label_path, dst_lbl)


copy_files(train_files, "train")
copy_files(val_files, "val")
copy_files(test_files, "test")

print("Files copied into train, val, and test folders successfully.")

In [None]:
data_yaml = {
    "train": os.path.abspath(os.path.join(output_path, "images", "train")),
    "val": os.path.abspath(os.path.join(output_path, "images", "val")),
    "test": os.path.abspath(os.path.join(output_path, "images", "test")),
    "nc": 1,  # number of classes; change if more
    "names": ["sidewalk"],  # list of class names
}

yaml_path = os.path.join(output_path, "data.yaml")

with open(yaml_path, "w") as f:
    yaml.dump(data_yaml, f, default_flow_style=False)

print(f"Created data.yaml at: {yaml_path}")

In [None]:
# Change these paths as needed
data_yaml_path = os.path.join(output_path, "data.yaml")

# Check if data.yaml exists
if not os.path.exists(data_yaml_path):
    raise FileNotFoundError(f"data.yaml not found at {data_yaml_path}")

# Load YOLOv11 segmentation model (pretrained)
model = YOLO(
    "yolo11s-seg.pt"
)  # Change to yolov8n-seg.pt or yolov8m-seg.pt if preferred

i = 1
while i < 11:
    try:
        # Train
        model.train(
            data=data_yaml_path,
            epochs=125,
            imgsz=640,
            batch=16,
            device="0",  # Use 'cpu' if no GPU available
            name=f"segmentation experiment {i}",
            save=True,  # ensure saving
            save_period=-1,  # save only best
        )
        i += 1
    except KeyboardInterrupt:
        print("Training interrupted. Saving current model...")
        # Save current model state
        model_path = os.path.join(model.trainer.save_dir, "interrupted.pt")
        model.save(model_path)
        print(f"Model saved to {model_path}")