In [1]:
# https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data
# A small script that turns the label data into the format which yolov5 requires. 

import pandas as pd
import os

PATHS = ["eye/", "iris/", "pupil/"]


In [2]:
def read_data(dataset="train"):
    
    data = pd.read_csv(PATH+dataset+"_label.csv")
    
    data["x_center"] = ((data.xmin+data.xmax)/2) / data.width 
    data["x_width"]  = (data.xmax-data.xmin) / data.width
    data["y_center"] = ((data.ymin+data.ymax)/2) / data.height
    data["y_height"] = (data.ymax-data.ymin) / data.height
    data.head()
    
    return data

In [3]:
#label_map = {"eye"  : 0, "iris" : 1, "pupil": 2}


def create_output(row):
    
    line = f"{label_map[row['class']]} {row['x_center']} {row['y_center']} {row['x_width']} {row['y_height']}"
    return line

def data_loop(df, dataset="train"):
    filename = df.filename[0]
    output = ""
    
    for index, row in df.iterrows():
        if filename == row['filename']:
            if index > 0:  
                output += ("\n")
            output += create_output(row)

        elif index > 0 and filename != row['filename']:

            with open(PATH+"labels/"+dataset+"/" + filename[:-4] + ".txt", "w") as out:
                out.write(output)
            filename = row['filename']
            output = create_output(row)


In [28]:
def prepare_for_dataset(data=0):  # 0 eye, 1 iris, 2 pupil
    
    path = PATHS[data]
    
    df = read_data("train")
    data_loop(df, "train")

    df = read_data("test")
    data_loop(df, "val")

    print(f"{path} done.")

    
label_map = {"eye"  : 0, "iris" : 0, "pupil": 0}
prepare_for_dataset(0)
prepare_for_dataset(1)
prepare_for_dataset(2)

eye/ done.
iris/ done.
pupil/ done.


In [31]:
def merge_iris_and_pupil():       
    
    for file in os.listdir("iris/labels/train/"):
        if file in os.listdir("pupil/labels/train/"):
            with open("iris_pupil/labels/train/"+file, "w") as out:
                with open("iris/labels/train/"+file, "r") as iris:                
                    with open("pupil/labels/train/"+file, "r") as pupil:
                        out.write(iris.read()+"\n"+pupil.read())
                    
    '''
    The iris dataset is a subset of the pupil dataset. 
    To merge the validation data, we need to match the 
    iris validation data to the pupil train data. 
    We have no collision between the validation and train 
    dataset because in the step before we have take the iris train data. 
    '''               
    
    for file in os.listdir("iris/labels/val/"):
        if file in os.listdir("pupil/labels/train/"):
            with open("iris_pupil/labels/val/"+file, "w") as out:
                with open("iris/labels/val/"+file, "r") as iris:                
                    with open("pupil/labels/train/"+file, "r") as pupil:
                        out.write(iris.read()+"\n"+pupil.read())
                    

label_map = {"eye"  : 0, "iris" : 0, "pupil": 1}
prepare_for_dataset(2)                 
merge_iris_and_pupil()

#reset pupil labels TODO: Delete
label_map = {"eye"  : 0, "iris" : 0, "pupil": 0}
prepare_for_dataset(2)                 

pupil/ done.
pupil/ done.
