# Stage: **Preprocessing**
## Function: JSON Coordinates to YOLO Coordinates

### Import libraries

In [7]:
import os 
import json
import glob
import cv2

from tqdm.notebook import trange, tqdm

### Utility Functions

In [8]:
def extract_json_paths(root_path):
    """
    Description: Generates a list of lists that contains the different paths to all '*_label.json' files in the dataset path.

    Arguments
        root_path: dataset root

    Return
        json_files_list: List of list with paths.
    """

    json_files_list=[]
    for subfolder in os.listdir(root_path):
        rgb_folder = os.path.join(root_path, subfolder, 'ColorImage')
        #print("RGB FOLDER: {}".format(rgb_folder))

        if len(os.listdir(rgb_folder)) != 0:
            # List of json_files per RGB_FOLDER
            json_files = glob.glob(os.path.join(os.path.join(rgb_folder, '*_label.json')))
            #print(json_files)
            json_files_list.append(json_files)
    
    return json_files_list

In [9]:
def json_txt_bboxes(json_label_file):
    """
    Description: Converts bounding boxes from [xmin, ymin, xmax, ymax] to normalized YOLO [class, xcenter, ycenter, widthbox, heightbox]

    Arguments
        json_label_file: json file with bounding boxes coordinates in [xmin, ymin, xmax, ymax] 

    Return
        json_files_list: List of list with paths.
    """
    #print("JSON LABEL FILE: {}".format(json_label_file))
    # Extract path and filename from json_label_file
    dirname, filename = os.path.split(json_label_file)
    
    # New filename in .txt for YOLO
    new_filename = (filename.split('.')[0]).split("_label")[0] + ".txt"
    #print(new_filename)

    # Read JSON file
    with open(json_label_file) as json_file:
        data = json.load(json_file)

    # Extract data coordinate from each box present in each label file
    boxes_list = data['boxes']

    # Calculation of new coordinate in YOLO Format (normalized between 0 and 1)
    image_filename = json_label_file.split(".")[0].split("_label")[0] + ".PNG"
    #print("Image path: {}".format(image_filename))

    # Load image
    img = cv2.imread(image_filename)

    # Extract image size
    H_image, W_image, _ = img.shape

    for i, box in enumerate(boxes_list):

        #print(box)
        x_min = int(box[0])
        y_min = int(box[1])
        x_max = int(box[2])
        y_max = int(box[3])
        
        #print("\tCoordinates: xmin={}, ymin={}, xmax={}, ymax={}".format(x_min, y_min, x_max, y_max))
        
        #print("\tIMAGE SHAPE: {}".format(img.shape))
        x_center = round(((x_max+x_min)/2)/W_image, 4)
        y_center = round(((y_max+y_min)/2)/H_image, 4)
        wb = round((x_max - x_min)/W_image, 4)
        hb = round((y_max - y_min)/H_image, 4)

        class_object = "0"

        new_coordinates = [str(class_object), str(x_center), str(y_center), str(wb), str(hb)]
        new_coordinates = " ".join(new_coordinates)
        
        #print("\tNew Coordinates: {}".format(new_coordinates))

        with open(os.path.join(dirname, new_filename), 'a') as f:
            f.write(new_coordinates + '\n')        

    return None

## Invoke converter

### Paths for Datasets

In [10]:
base_path = "/home/egcarren/datasets"
datasets_main_folder = "datasets_1_2"
dataset = "dataset_3"

root_path = os.path.join(base_path, datasets_main_folder, dataset)
print("Root dataset: \n\t{}".format(root_path))
print("Root exist? : {}".format(os.path.isdir(root_path)))

Root dataset: 
	/home/egcarren/datasets/datasets_1_2/dataset_3
Root exist? : True


### Paths for Datasets

In [11]:
files_per_folder=extract_json_paths(root_path)

print("Converting...")
for list_json_files in files_per_folder:
    for json_file in tqdm(list_json_files):
        #print(json_file)
        json_txt_bboxes(json_file)

print("End of conversion!")

Converting...


  0%|          | 0/940 [00:00<?, ?it/s]

End of conversion!
