In [3]:
import os
import shutil
import json
from sklearn.model_selection import train_test_split

# Paths to your dataset and output folder
dataset_folder = './coin-dataset'
output_folder = './dataset_splitted_2'

# Create output directories
train_images_dir = os.path.join(output_folder, 'train/images')
train_labels_dir = os.path.join(output_folder, 'train/labels')
val_images_dir = os.path.join(output_folder, 'val/images')
val_labels_dir = os.path.join(output_folder, 'val/labels')

os.makedirs(train_images_dir, exist_ok=True)
os.makedirs(train_labels_dir, exist_ok=True)
os.makedirs(val_images_dir, exist_ok=True)
os.makedirs(val_labels_dir, exist_ok=True)

# Path to your annotations JSON file
annotations_path = os.path.join(dataset_folder, '_annotations.json')

# Load annotations
with open(annotations_path, 'r') as f:
    annotations = json.load(f)

# Split dataset into train and validation sets
image_annotations = list(annotations['images'])
train_data, val_data = train_test_split(image_annotations, test_size=0.2, random_state=42)

# Helper function to save labels
def save_label(image_info, annotations, output_labels_dir):
    image_id = image_info['id']
    label_path = os.path.join(output_labels_dir, os.path.splitext(image_info['file_name'])[0] + '.txt')
    
    with open(label_path, 'w') as label_file:
        for ann in annotations['annotations']:
            if ann['image_id'] == image_id:
                category_id = ann['category_id'] - 1  # YOLOv8 uses zero-based indexing
                bbox = ann['bbox']
                x_center = (bbox[0] + bbox[2] / 2) / image_info['width']
                y_center = (bbox[1] + bbox[3] / 2) / image_info['height']
                width = bbox[2] / image_info['width']
                height = bbox[3] / image_info['height']
                label_file.write(f"{category_id} {x_center} {y_center} {width} {height}\n")

# Move files and create labels for training and validation datasets
for image_info, output_image_dir, output_labels_dir in zip(
    [train_data, val_data], 
    [train_images_dir, val_images_dir], 
    [train_labels_dir, val_labels_dir]
):
    for image in image_info:
        src_image_path = os.path.join(dataset_folder, image['file_name'])
        dst_image_path = os.path.join(output_image_dir, os.path.basename(image['file_name']))
        shutil.copy(src_image_path, dst_image_path)
        save_label(image, annotations, output_labels_dir)

print(f"Dataset split into {output_folder} with train and val folders in the required structure.")


Dataset split into ./dataset_splitted_2 with train and val folders in the required structure.
