In [1]:
import json
import cv2
import os

In [3]:
with open("raw_data_from_labelbox.json") as f:
    data = json.load(f)

In [4]:
data

[{'ID': 'cka355pt5jxqq0757n4vx0gf6',
  'DataRow ID': 'cka354lsdnu3j0hjsabrehhym',
  'Labeled Data': 'https://storage.labelbox.com/cka0dkh52ffft08543vsyxap5%2F376aeb42-f1a4-6a58-ed5a-81de05eecbb0-pos_1292.jpg?Expires=1590547859975&KeyName=labelbox-assets-key-1&Signature=UpcvKMTOZovME2Efoldm2vaCOpw',
  'Label': {'objects': [{'featureId': 'cka355ef808kq0y6jzucp2djd',
     'schemaId': 'cka0f0vs4001l0zaa7evkrbvt',
     'title': 'Person',
     'value': 'person',
     'color': '#FFFF00',
     'bbox': {'top': 73, 'left': 197, 'height': 190, 'width': 104},
     'instanceURI': 'https://api.labelbox.com/masks/feature/cka355ef808kq0y6jzucp2djd?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2EwZGtoNjJ2ZDBrMDc4NThrNTRyNDE0Iiwib3JnYW5pemF0aW9uSWQiOiJja2EwZGtoNTJmZmZ0MDg1NDN2c3l4YXA1IiwiaWF0IjoxNTg5MzM4MjU5LCJleHAiOjE1OTE5MzAyNTl9.OOCW4xpbDvvYLFTm8NoZgB5P5jfJs_yIE8C6q5lVATU'},
    {'featureId': 'cka355j8r0s130y8isn07adpb',
     'schemaId': 'cka0f0vs2001h0zaa1r1f9l4p',
     'title': 'Hard 

In [5]:
# each index is an image
len(data)

395

In [6]:
data[0].keys()

dict_keys(['ID', 'DataRow ID', 'Labeled Data', 'Label', 'Created By', 'Project Name', 'Created At', 'Updated At', 'Seconds to Label', 'External ID', 'Agreement', 'Benchmark Agreement', 'Benchmark ID', 'Dataset Name', 'Reviews', 'View Label'])

In [27]:
# Print original file names and pixel dimensions
def display_image_dims():
    for img in data:
        path = "ppe_data/images/"
        filename = img['External ID']
        image = cv2.imread(path + filename)
        height, width = image.shape[0:2]
        print(filename, width, height)

In [13]:
'''
    Converts a string category to an integer class number to process in YOLO network
'''
def category_to_class_num(obj_title):
    categories = {"hardHat": 0,
                  "safetyVest": 1,
                  "person": 2}
    return categories[obj_title]

In [14]:
'''
    Convert object bounding box coordinates from top-left corner to xy-center
'''
def normalize_with_image_dims(bbox, filepath):
    image = cv2.imread(filepath)
    
    img_h, img_w = image.shape[0:2]
    
    xmin = (bbox['left'])
    xmax = (bbox['left'] + bbox['width'])
    ymin = (bbox['top'] - bbox['height'])
    ymax = (bbox['top'])
    
    return [xmin, xmax, ymin, ymax]

In [24]:
# normalize object positions and create label txt data
def normalize_dataset(image_path="ppe_data/images/",
                      label_path="ppe_data/labels/",
                      verbose=False):
    image_extension = '.jpg'
    label_extension = '.txt'
        
    for i in range(len(data)):
        filename = data[i]['External ID'].replace(image_extension, '')
        filepath = image_path + filename + image_extension
        label_file = label_path + filename + label_extension
        if verbose == True:
            print(filepath)
            
        try:
            _objects = data[i]['Label']['objects']
            for obj in _objects:
                obj_category = obj['title']
                obj_class = category_to_class_num(obj_category)
                obj_bbox = obj['bbox']
                norm_bbox = normalize_with_image_dims(obj_bbox, filepath)
                print(norm_bbox)
                # Example label: "0 0.7073 0.63636 0.29545 0.71969"
                label = str(obj_class)
                for coord in norm_bbox:
                    label += " "
                    label += str(coord)
                
                # create/overwrite file with label data
                with open(label_file, "a") as f:
                    f.write(label)
                    f.write('\n')
                    if verbose == True:
                        print(f)
        except KeyError as e:
            continue

In [25]:
def preprocess_dataset():
    normalize_dataset(verbose=True)

In [26]:
preprocess_dataset()

ppe_data/images/pos_1292.jpg
ppe_data/images/pos_556.jpg
ppe_data/images/neg_852.jpg
ppe_data/images/neg_114.jpg
ppe_data/images/pos_1743.jpg
ppe_data/images/pos_1324.jpg
ppe_data/images/pos_588.jpg
ppe_data/images/neg_884.jpg
ppe_data/images/neg_146.jpg
ppe_data/images/pos_1775.jpg
ppe_data/images/pos_1356.jpg
ppe_data/images/pos_620.jpg
ppe_data/images/neg_916.jpg
ppe_data/images/neg_178.jpg
ppe_data/images/pos_1807.jpg
ppe_data/images/pos_1388.jpg
ppe_data/images/pos_652.jpg
ppe_data/images/neg_948.jpg
ppe_data/images/neg_210.jpg
ppe_data/images/pos_1839.jpg
ppe_data/images/pos_1420.jpg
ppe_data/images/pos_684.jpg
ppe_data/images/neg_980.jpg
ppe_data/images/neg_242.jpg
ppe_data/images/pos_1871.jpg
ppe_data/images/pos_1452.jpg
ppe_data/images/pos_716.jpg
ppe_data/images/neg_1012.jpg
ppe_data/images/neg_274.jpg
ppe_data/images/pos_1903.jpg
ppe_data/images/pos_1484.jpg
ppe_data/images/pos_748.jpg
ppe_data/images/neg_1044.jpg
ppe_data/images/neg_306.jpg
ppe_data/images/pos_1935.jpg
ppe_

In [12]:
from os import listdir
from os.path import isfile, join

def move_images(FROM, TO):
    # get image names
    
    label_path = 'ppe_data/labels'
    labels = [f.replace('.txt', '') for f in listdir(label_path) if isfile(join(label_path, f))]

    # move images with processed labels from one dir to another
    for image in labels:
        ext = '.jpg'
        os.rename(FROM + image + ext, TO + image + ext)


In [13]:
# move files to processed directory
move_images(FROM='original_data/images/', TO='ppe_data/images/')

FileNotFoundError: [Errno 2] No such file or directory: 'ppe_data/labels'

In [14]:
# reset images to original directory
move_images(FROM='ppe_data/images/', TO='original_data/images/')

FileNotFoundError: [Errno 2] No such file or directory: 'ppe_data/labels'