In [3]:
import pandas as pd
import cv2 as cv2
import os
from shutil import copyfile
from random import shuffle

dataset_name = "box"
class_id = '/m/025dyy'
tv_split = 0.75

# Load BBoxes
df_bbox = pd.read_csv('train-annotations-bbox.csv')


In [4]:
# Filter by class
bboxes = df_bbox[df_bbox['LabelName'].str.contains(class_id)]
unique_images = bboxes.groupby(['ImageID'])

print("Images %d" % len(unique_images))
print("Total BBoxes: %d" % len(bboxes))

Images 2212
Total BBoxes: 5364


In [5]:
# Create Labels
def create_label(row):
    
    if row['IsOccluded'] == -1:
        return None
    
    image = cv2.imread('train/%s.jpg' % row['ImageID'])
    
    left = row['XMin'] * image.shape[1]
    top = row['YMin'] * image.shape[0]
    right = row['XMax'] * image.shape[1]
    bot = row['YMax'] * image.shape[0]
    
    
    if image.shape[0] > 1024 or image.shape[1] > 1024:
        return None
    
    fields = []
    fields.append(dataset_name)
    fields.append('0.0')
    
    fields.append(str(row['IsOccluded']))
    fields.append('0.0')
    fields.append("%.2f" % left)
    fields.append("%.2f" % top)
    fields.append("%.2f" % right)
    fields.append("%.2f" % bot)
    fields.append('0.0')
    fields.append('0.0')
    fields.append('0.0')
    fields.append('0.0')
    fields.append('0.0')
    fields.append('0.0')
    fields.append('0.0')
    
    return " ".join(fields)

image_dict = {}

for index, row in bboxes.iterrows():
    l = create_label(row)
    
    if l is None:
        continue
    
    if row['ImageID'] not in image_dict:
        image_dict[row['ImageID']] = [l]
    else:
        image_dict[row['ImageID']].append(l)






In [6]:
# Copy files to "all" folder
for image_id in image_dict:
        all_image_path = '%s/all/images/%s.jpg' % (dataset_name, image_id)
        copyfile('train/%s.jpg' % image_id, all_image_path)

        all_label_path = '%s/all/labels/%s.txt' % (dataset_name, image_id)
        all_label_data = "\n".join(image_dict[image_id])
        
        open(all_label_path, 'w').write(all_label_data)

In [7]:
# Create train/validation split and copy files there
images = [[img, 
           "%s/all/images/%s.jpg" % (dataset_name, img), 
           "%s/all/labels/%s.txt" % (dataset_name, img)] for img in image_dict]

shuffle(images)

train = images[0:int(len(images)*tv_split)]
val = images[int(len(images)*tv_split):]

for iid, image, label in train:
    copyfile(image, '%s/train/images/%s.jpg' % (dataset_name, iid))
    copyfile(label, '%s/train/labels/%s.txt' % (dataset_name, iid))


for iid, image, label in val:
    copyfile(image, '%s/val/images/%s.jpg' % (dataset_name, iid))
    copyfile(label, '%s/val/labels/%s.txt' % (dataset_name, iid))



