In [None]:
import os
import shutil
import random

# Paths
dataset_path = "\Occupant Detection\Input\data"
images_dir = os.path.join(dataset_path, 'images')
labels_dir = os.path.join(dataset_path, 'labels')

# Output directories
output_path = "\Occupant Detection\split_dataset"
splits = ["train", "val", "test"]
for split in splits:
  os.makedirs(os.path.join(output_path, split, "images"), exist_ok=True)
  os.makedirs(os.path.join(output_path, split, "labels"), exist_ok=True)

# List all images
images = [f for f in os.listdir(images_dir) if f.endswith((".jpg", ".png"))]
random.shuffle(images)

# Split dataset
train_ratio, val_ratio = 0.7, 0.2 # 70% train, 20% val, 10% test
train_split = int(len(images) * train_ratio)
val_split = int(len(images) * (train_ratio + val_ratio))

train_files = images[:train_split]
val_files = images[train_split:val_split]
test_files = images[val_split:]

# Function to move files
def move_files(file_list, split):
  img_count, label_count = 0, 0
  for file in file_list:
    img_src = os.path.join(images_dir, file)
    label_src = os.path.join(labels_dir, os.path.splitext(file)[0] + '.txt')

    img_dest = os.path.join(output_path, split, "images", file)
    label_dest = os.path.join(output_path, split, "labels", os.path.splitext(file)[0] + ".txt")

    shutil.copy(img_src, img_dest)
    img_count += 1
    if os.path.exists(label_src):
      shutil.copy(label_src, label_dest)
      label_count +=1

    print(f"{split.upper()} - Images: {img_count}, Annotations: {label_count}")

# Move files into respective directories
move_files(train_files, "train")
move_files(val_files, "val")

In [None]:
from sys import path
import yaml
import os

def create_data_yaml(path_to_classes_txt, path_to_data_yaml):
  # Read classes.txt to get class names
  if not os.path.exists(path_to_classes_txt):
    print(f'classes.txt file not found! Please create a classes.txt labelmap and move it to {path_to_classes_txt}')
    return
  with open(path_to_classes_txt, 'r') as f:
    classes = []
    for line in f.readlines():
      if len(line.strip()) == 0: continue
      classes.append(line.strip())
  number_of_classes = len(classes)

  # Create data dict
  data = {
      'path' : r"\Occupant Detection\Input\data",
      'train' : r"\Occupant Detection\split_dataset\train",
      'val' : r"\Occupant Detection\split_dataset\val",
      'nc' : number_of_classes,
      'names' : classes
  }

  # Write data to YAML file
  with open(path_to_data_yaml, 'w') as f:
    yaml.dump(data, f, sort_keys=False)
  print(f'Created config file at {path_to_data_yaml}')

  return

# Define path to classes.txt and run function
path_to_classes_txt = 'Occupant Detection\Input\data'
path_to_data_yaml = 'Occupant Detection'

create_data_yaml(path_to_classes_txt, path_to_data_yaml)

print('\nFile contents:\n')
!cat /content/data.yaml