# Yolov8 format
Yolov8 wants following format: train, val and test folder. For each have a images folder and a labels folder. For each image in the images folder there is a .txt file with the same name and the corresponding labels. To get labels for segmentation look at JSON2YOLO github <https://github.com/ultralytics/JSON2YOLO>

Change paths accordingly for individual use

In [None]:
# Get all file names

import os
image_directory = 'data/images'
 
# get all image file names
filenames = []
for filename in os.listdir(image_directory):
    filenames.append(filename[:-4]) # last 4 characters are '.jpg'

In [None]:
# Originally data of gauge needle and gauge face seperated. With this we merge the labels.
# 0 corresponds to gauge face, 1 to gauge needle

def merge_labels(gauge_face_path, gauge_needle_path, dst_path):
    for filename in filenames:
        with open(dst_path + filename+'.txt', 'w') as outfile:
            with open(gauge_face_path+filename+'.txt') as infile:
                outfile.write(infile.read())
            with open(gauge_needle_path+filename+'.txt') as infile:
                for line in infile:
                    line = '1' + line[1:]
                    outfile.write(line)

In [None]:
gauge_face_path = 'data/gauge_face/annotations/bbox_labels_yolo/'
gauge_needle_path = 'data/gauge_needle/annotations/bbox_labels_yolo/'
path_bbox_labels = 'data/bbox_labels/'
merge_labels(gauge_face_path, gauge_needle_path, path_bbox_labels)
gauge_face_path = 'data/gauge_face/annotations/segmentation_labels_yolo/'
gauge_needle_path = 'data/gauge_needle/annotations/segmentation_labels_yolo/'
path_segmentation_labels = 'data/segmentation_labels/'
merge_labels(gauge_face_path, gauge_needle_path, path_segmentation_labels)


In [None]:
# split images into train, val and test set

import random

def split_dataset(filenames):
    # Assuming you have a list of filenames called "all_filenames"
    random.shuffle(filenames)

    # Calculate the size of each set
    num_files = len(filenames)
    num_train = int(0.8 * num_files)  # 80% for training
    num_val = int(0.1 * num_files)   # 10% for validation
    num_test = num_files - num_train - num_val  # remaining 10% for test

    # Split the list into three sets
    train_filenames = filenames[:num_train]
    val_filenames = filenames[num_train:num_train+num_val]
    test_filenames = filenames[num_train+num_val:]

    # Print the sizes of each set
    print(f"Number of files in train set: {len(train_filenames)}")
    print(f"Number of files in validation set: {len(val_filenames)}")
    print(f"Number of files in test set: {len(test_filenames)}")
    
    return train_filenames, val_filenames, test_filenames


train_filenames, val_filenames, test_filenames = split_dataset(filenames)

In [None]:
# Create folder structure
import os
dir_base = ['segmentation','detection']
modes = ['train', 'val', 'test']
for base in dir_base:
    for mode in modes:
        path = 'data/' + base + '/' + mode +'/images'
        os.makedirs(path, exist_ok=True)
        path = 'data/' + base + '/' + mode +'/labels'
        os.makedirs(path, exist_ok=True)

In [None]:
import shutil

#copy image and label file of given file name to their corresponding folders in new folderstructure
def copy_pair(src_dir, target_dir, file_name, mode):
    src = image_directory + '/' + file_name + ".jpg"
    dst = target_dir + '/' + mode +'/images/' + file_name + ".jpg"
    shutil.copy2(src, dst)
    src = src_dir + '/' + file_name + ".txt"
    dst = target_dir + '/' + mode +'/labels/' + file_name + ".txt"
    shutil.copy2(src, dst)

#for each set copy all labels and images of this set to corresponding 
def copy_split(src_dir, target_dir):
    for name in train_filenames:
        copy_pair(src_dir, target_dir, name, 'train')
    for name in val_filenames:
        copy_pair(src_dir, target_dir, name, 'val')
    for name in test_filenames:
        copy_pair(src_dir, target_dir, name, 'test')

copy_split(path_segmentation_labels, 'data/segmentation')
copy_split(path_bbox_labels, 'data/detection')
