In [62]:
import pandas as pd
import os
import numpy as np
import cv2

In [63]:
# File name for the metadata to use in app.py
DATASET = 'train_data/'
METADATA_FILE = DATASET + 'labels.csv'
# ordering of columns in BOTH this file and app.py
YOLO_ORDER = ['label', 'x', 'y', 'w', 'h', 'image']
APP_ORDER = ['label', 'xmin', 'ymin', 'xmax', 'ymax', 'image']

In [65]:
# list of image names
image_files = sorted([x for x in os.listdir(DATASET + 'obj/') if x[-4:] == '.jpg'])
label_files = sorted([x for x in os.listdir(DATASET + 'obj/') if x[-4:] == '.txt'])

for i in zip(image_files, label_files):
    print(i)

('frame-001.jpg', 'frame-001.txt')


In [55]:
# Rename files to match all image/label name formats
for i in range(len(image_files)):
    old_image_file = DATASET + 'obj/' + image_files[i][:-4] + '.jpg'
    new_image_file = DATASET + 'obj/' + 'frame-' + str(i+1).zfill(3) + '.jpg'
    try:
        os.rename(old_image_file, new_image_file)
    except:
        continue
        
for i in range(len(label_files)):
    old_text_file = DATASET + 'obj/' + label_files[i][:-4] + '.txt'
    new_text_file = DATASET + 'obj/' + 'frame-' + str(i+1).zfill(3) + '.txt'
    try:
        os.rename(old_text_file, new_text_file)
    except:
        continue
        
# update local variable to keep up with file names
image_files = sorted([x for x in os.listdir(DATASET + 'obj/') if x[-4:] == '.jpg'])
label_files = sorted([x for x in os.listdir(DATASET + 'obj/') if x[-4:] == '.txt'])
for i in zip(image_files, label_files):
    print(i)

('frame-001.jpg', 'frame-001.txt')


In [56]:
# Add one row for each label
# For each image
temp = []
for file in label_files:
    try:
        image_file =  file[:-4] + '.jpg'
        with open(DATASET + 'obj/' + file, encoding='utf-8') as f:
            for line in f.readlines():
                temp.append([item for item in line.strip('\n').split(' ')] + [image_file])
    except:
        continue
metadata = pd.DataFrame(temp, columns=YOLO_ORDER)
print('metadata created')

metadata created


In [61]:
metadata

Unnamed: 0,label,x,y,w,h,image
0,Safety Vest,106.186667,107.8,207.68,213.84,frame-001.jpg
1,Safety Vest,238.773333,154.88,224.106667,212.96,frame-001.jpg


In [57]:
# Convert integer category label to object name string
metadata.loc[metadata['label'] == '0', 'label'] = 'Hard Hat'
metadata.loc[metadata['label'] == '1', 'label'] = 'Safety Vest'

In [58]:
# Convert string coordinates to float coordinates
metadata['x'] = metadata['x'].astype(float)
metadata['y'] = metadata['y'].astype(float)
metadata['w'] = metadata['w'].astype(float)
metadata['h'] = metadata['h'].astype(float)

In [59]:
for image_file in image_files:
    image = cv2.imread(DATASET + 'obj/' + image_file)
    h, w = image.shape[:2]
    metadata.loc[metadata['image'] == image_file, ['x', 'w']] *= w
    metadata.loc[metadata['image'] == image_file, ['y', 'h']] *= h

In [60]:
metadata

Unnamed: 0,label,x,y,w,h,image
0,Safety Vest,106.186667,107.8,207.68,213.84,frame-001.jpg
1,Safety Vest,238.773333,154.88,224.106667,212.96,frame-001.jpg


In [39]:
metadata['xmin'] = metadata['x'] - metadata['w']/2
metadata['ymin'] = metadata['y'] - metadata['h']/2
metadata['xmax'] = metadata['x'] + metadata['w']/2
metadata['ymax'] = metadata['y'] + metadata['h']/2

In [40]:
# Convert float coordinates to int coordinates
metadata['xmin'] = metadata['xmin'].astype(int)
metadata['ymin'] = metadata['ymin'].astype(int)
metadata['xmax'] = metadata['xmax'].astype(int)
metadata['ymax'] = metadata['ymax'].astype(int)

In [41]:
# Drop unused columns
metadata = metadata[APP_ORDER]

In [44]:
# Check that coordinates fit within image size
metadata[metadata['image'] == 'frame-123.jpg']

Unnamed: 0,label,xmin,ymin,xmax,ymax,image


In [43]:
# Stop "Run All" execution before saving file
assert False

AssertionError: 

In [29]:
# Save file
metadata.to_csv(METADATA_FILE, index=False)