In [1]:
import numpy as np
import os
import shutil
from sklearn.model_selection import train_test_split

In [2]:
# Path to images and labels
images_path = 'YOLO_Dataset/YOLO_Images/'
labels_path = 'YOLO_Dataset/YOLO_Labels/'

In [3]:
# List of images and labels
list_images = os.listdir(images_path)
list_labels = os.listdir(labels_path)

In [4]:
# Check images
def chek_list(images, labels):
    
    images_names = []
    labels_names = []
    
    for image_name in images:
        if image_name == '':
            continue
        images_names.append(image_name.split('.')[0])
        
    for label_name in labels:
        if label_name == '' or label_name == 'classes':
            continue
        labels_names.append(label_name.split('.')[0])
    
    # Check if are the same: 
    if (set(images_names) == set(labels_names)):
        print(f'images and labels are the same')
    elif (len(images_names) > len(labels_names)):
        print(f"Images folder has {set(images_names) - set(labels_names)} that doesn't have label")
    elif (len(labels_names) > len(images_names)):
        print(f"Labels folder has {set(labels_names) - set(images_names)} that doesn't have image")
    else:
        print(f"Images folder has {set(images_names) - set(labels_names)} that doesn't have label")
        print(f"Labels folder has {set(labels_names) - set(images_names)} that doesn't have image")
    
    return images_names, labels_names

In [5]:
def split_list(filenames, test_size = 0.30, random_state=1):
    filenames_train, filenames_test = train_test_split(filenames, test_size=test_size, random_state=random_state)
    print(f'The train size is: {len(filenames_train)}')
    print(f'The test size is: {len(filenames_test)}')
    
    return filenames_train, filenames_test

In [6]:
def split_dataset(out_dir, list_files, images_path, labels_path):
    
    images_out_path = os.path.join(out_dir, 'images')
    
    labels_out_path = os.path.join(out_dir, 'labels')
    
    # Create images dir if not exist
    if not os.path.exists(images_out_path):
        os.makedirs(os.path.join(images_out_path, 'train'))
        os.makedirs(os.path.join(images_out_path, 'test'))
    
    # Create labels_dir if not exist
    if not os.path.exists(labels_out_path):
        os.makedirs(os.path.join(labels_out_path, 'train'))
        os.makedirs(os.path.join(labels_out_path, 'test'))
    
    # Split list
    list_train, list_test = split_list(list_files)
    
    # Move items to train dir
    for file in list_train:
        # Get path to files
        image_path = os.path.join(images_path, file + '.png')
        label_path = os.path.join(labels_path, file + '.txt')
        # Check image and label
        if os.path.isfile(image_path) and os.path.isfile(label_path):
            shutil.copy(image_path, os.path.join(images_out_path, 'train'))
            shutil.copy(label_path, os.path.join(labels_out_path, 'train'))
        else:
            print('file does not exist', image_path)
            
    # Move items to train dir
    for file in list_test:
        # Get path to files
        image_path = os.path.join(images_path, file + '.png')
        label_path = os.path.join(labels_path, file + '.txt')
        # Check image and label
        if os.path.isfile(image_path) and os.path.isfile(label_path):
            shutil.copy(image_path, os.path.join(images_out_path, 'test'))
            shutil.copy(label_path, os.path.join(labels_out_path, 'test'))
        else:
            print('file does not exist', image_path)

In [7]:
list_images, list_labels = chek_list(list_images, list_labels)

Labels folder has {'', 'classes'} that doesn't have image


In [8]:
out_dir = 'YOLO_FInal_data'

split_dataset(out_dir, list_images, images_path, labels_path)

The train size is: 163
The test size is: 70


# Create yaml file

### Get classes

In [24]:

classes = []

with open(classes_path, 'r') as file:
    
    for class_name in file.readlines():
          classes.append(class_name.rstrip('\n').strip())

print(classes)

['dog', 'person', 'cat', 'tv', 'car', 'meatballs', 'marinara sauce', 'tomato soup', 'chicken noodle soup', 'french onion soup', 'chicken breast', 'ribs', 'pulled pork', 'hamburger', 'cavity', 'left', 'right']


### Create yaml

In [85]:
import yaml

def create_yaml(yaml_dir, dataset_path, classes):

    
    yaml_file = dict(
        # dataset root dir
        path = '../' + dataset_path,
        # Train and validation from root
        train = '../' + dataset_path + '/images/train/',
        val = '../' + dataset_path + '/images/test/',        
        # test images (optional)
        test =  '',

        # Classes
        # Number of classes
        nc = len(classes),
        # List of Classes
        names = classes
    )
    
    print('replace the list of classes with the one printed below: ')
    print(classes)
    # Create yaml file
    with open(yaml_dir, 'w') as outfile:
        yaml.dump(yaml_file, outfile, default_flow_style=None)
        print('')
        print('yaml created!')
        
    print('Now move the file to yolov5/data')

In [86]:
yaml_dir = 'YOLO_Dataset/dataset.yaml'
out_dir = 'YOLO_FInal_data'

create_yaml(yaml_dir, out_dir, classes)

replace the list of classes with the one printed below: 
['dog', 'person', 'cat', 'tv', 'car', 'meatballs', 'marinara sauce', 'tomato soup', 'chicken noodle soup', 'french onion soup', 'chicken breast', 'ribs', 'pulled pork', 'hamburger', 'cavity', 'left', 'right']

yaml created!
Now move the file to yolov5/data
