# Create Dateset for Yolo Training

#### Convert coco jason format to yolo format 

In [10]:
import json
import os
import numpy as np
import pandas as pd
import cv2
from PIL import Image
import glob

import shutil
from pathlib import Path

import numpy as np
from PIL import ExifTags
from tqdm import tqdm

def make_dirs(dir='labels/'):
    # Create folders
    dir = Path(dir)
    if dir.exists():
        shutil.rmtree(dir)  # delete dir
    dir.mkdir(parents=True, exist_ok=True)  # make dir
    return dir

def yolo_class(): 
    x = [0, 1]
    return x


def convert_coco_json(json_dir='./annotations/', use_segments=False):
    save_dir = make_dirs()  # output directory
    jsons = glob.glob(json_dir + '*.json')
    print(jsons)
    coco2 = yolo_class()

    # Import json
    for json_file in sorted(jsons):
        #fn = Path(save_dir) / 'labels' / Path(json_file).stem.replace('instances_', '')  # folder name
        fn = Path(save_dir)/ Path(json_file).stem.replace('instances_', '')
        fn.mkdir()
        with open(json_file) as f:
            data = json.load(f)

        # Create image dict
        images = {'%g' % x['id']: x for x in data['images']}

        # Write labels file
        for x in tqdm(data['annotations'], desc='Annotations %s' % json_file):
            if x['iscrowd']:
                continue

            img = images['%g' % x['image_id']]
            h, w, f = img['height'], img['width'], img['file_name']

            # The COCO box format is [top left x, top left y, width, height]
            box = np.array(x['bbox'], dtype=np.float64)
            box[:2] += box[2:] / 2  # xy top-left corner to center
            box[[0, 2]] /= w  # normalize x
            box[[1, 3]] /= h  # normalize y

            # Segments
            segments = [j for i in x['segmentation'] for j in i]  # all segments concatenated
            s = (np.array(segments).reshape(-1, 2) / np.array([w, h])).reshape(-1).tolist()

            # Write
            if box[2] > 0 and box[3] > 0:  # if w > 0 and h > 0
                line = coco2[x['category_id'] - 1], *(s if use_segments else box)  # cls, box or segments
                with open((fn / f).with_suffix('.txt'), 'a') as file:
                    file.write(('%g ' * len(line)).rstrip() % line + '\n')


if __name__ == '__main__':
    convert_coco_json('./annotations/')



Annotations ./annotations\labels.json:   0%|                                                                                                     | 0/16772 [00:00<?, ?it/s]

['./annotations\\labels.json']


Annotations ./annotations\labels.json: 100%|█████████████████████████████████████████████████████████████████████████████████████████| 16772/16772 [02:51<00:00, 97.63it/s]


#### Generate Train, Validation and Test  files

In [None]:
img_list = os.listdir('./images/')

In [None]:
df = pd.DataFrame(img_list)

In [None]:
train, validate, test = np.split(df.sample(frac=1, random_state=42), [int(.8*len(df)), int(.9*len(df))])

In [None]:
file_train = open('train.txt', 'w+')  
for f in train.values:
    file_train.write('/data/images/' + f[0] + '\n')
file_train.close()


In [None]:
file_test = open('test.txt', 'w+')
for f in test.values:
    file_test.write('/data/images/' + f[0] + '\n')
file_test.close()

In [None]:
file_val = open('val.txt', 'w+')
for f in validate.values:
    file_val.write('/data/images/' + f[0] + '\n')
file_val.close()


In [None]:
def write_data_file(fname='eagleview.data', nc=2):
    # write darknet *.data file
    lines = ['classes = %g\n' % nc,
             'train =./train.txt\n',
             'valid =./val.txt\n',
             'test  = ./test.txt\n',
             'names =./eagleview.names\n',
             'backup = ./backup\n',
             'results = ./results\n']

    with open(fname, 'a') as f:
        f.writelines(lines)
data_file = write_data_file()

#### Visual Boxes

In [None]:
import cv2
import os
import glob
import numpy
def drawGT(frame, classId, left, top, right, bottom):
    # Draw a bounding box.
    frame_gt = frame
    cv2.rectangle(frame_gt, (left, top), (right, bottom), (255, 178, 50), 3)
    
    #label = '%.2f' % conf
        
    # Get the label for the class name and its confidence
    #if classes:
    #   assert(classId < len(classes))
    label = '%s' % (classId) #comment out if you have a class_lists.txt with class names in it 
    #label = '%s' % (obj_list[classId]) #uncomment if you have a class_lists.txt with class names in it

    #Display the label at the top of the bounding box
    labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
    top = max(top, labelSize[1])
    frame_gt = cv2.rectangle(frame_gt, (left, int(top - round(1.5*labelSize[1]))), (left + int(round(1.5*labelSize[0])), top + baseLine), (255, 255, 255), cv2.FILLED)
    frame_gt = cv2.putText(frame_gt, label, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,0), 1)
    return frame_gt

def convert_yolo_coordinates_to_bbox(x_c_n, y_c_n, width_n, height_n, img_width, img_height):
    ## remove normalization given the size of the image
    x_c = float(x_c_n) * img_width
    y_c = float(y_c_n) * img_height
    width = float(width_n) * img_width
    height = float(height_n) * img_height
    ## compute half width and half height
    half_width = width / 2
    half_height = height / 2
    ## compute left, top, right, bottom
    left = int(x_c - half_width) + 1
    top = int(y_c - half_height) + 1
    right = int(x_c + half_width) + 1
    bottom = int(y_c + half_height) + 1
    return left, top, right, bottom

In [None]:
def main():
    img_base = "./images/"
    #os.chdir(img_base)
    imgs = os.listdir("./images/")
    filenames = []
    sl = slice(0,-4)
    for img in imgs:
        filenames.append(img[sl])

    gt_base = "./new_dir/labels/labels/"
    count = 0;
    for name in filenames:
        name = name.split('.')[0]
        textfilename = name+".txt"
        img_path = img_base+name+".jpg"
        frame_orig = cv2.imread(img_path)
        gt_path = gt_base + textfilename
        img_height, img_width = frame_orig.shape[:2]
        with open(gt_path, "r") as f_gt:
            content_gt = f_gt.readlines()
        content_gt = [x.strip() for x in content_gt]
        for line in content_gt:
            obj_id, x_c_n, y_c_n, width_n, height_n = line.split() 
            left, top, right, bottom = convert_yolo_coordinates_to_bbox(x_c_n, y_c_n, width_n, height_n, img_width, img_height) #Comment out if co-ordinates not in YOLO format
            #obj_id, left, top, right, bottom = line.split() #Uncomment if absolute co-ordinates/VOC
            image_gt = drawGT(frame_orig, obj_id, left, top, right, bottom)
        gt_res_path = "./ground-truth-BB/"+name+".jpg"
        cv2.imwrite(gt_res_path, image_gt)
        count += 1;
        print(str(count)+"/"+str(len(filenames))+" done!")
    print("completed")

if __name__ == "__main__":
    main()