In [47]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import os, time
from tqdm import tqdm
import numpy as np, pandas as pd
import cv2
from tqdm import tqdm_notebook, tqdm # Iteration visualization
from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline
import random
from sklearn.model_selection import train_test_split
from utility import *

# Prepearing Images and Files for YOLO Training

## Reading boundary box information for each image

In [62]:
# Reading boundary box information for each image
img_bbox_cat = {}
bbox_path = 'data/Anno/list_bbox.txt'
try:
    bbox_file = open(bbox_path, 'r')
except Exception:
    raise ValueError('There is no such a file in the directory')
bbox_data = bbox_file.readlines()
for img_info, idx in zip(bbox_data[2:], range(len(bbox_data)-2)):
    temp_list = img_info.split()
    img_path = temp_list[0][4:]
    img_bbox_cat[img_path] = [int(loc) for loc in temp_list[1:]]
print(len(img_bbox_cat))

# Reading category information for each image
category_path = 'data/Anno/list_category_img.txt'
try:
    cat_file = open(category_path, 'r')
except Exception:
    print('Category file does not exist!')
cat_data = cat_file.readlines()
for cat_info, idx in zip(cat_data[2:], range(len(cat_data)-2)):
    temp_list = cat_info.split()
    img_path = temp_list[0][4:]
    # 1~20 is upper clothes (0),
    # 21~36 is lower clothes(1)
    # 37~50 is full-body clothes)(2)
    if int(temp_list[1]) < 21:
        img_bbox_cat[img_path].append(0)
    elif int(temp_list[1]) > 36:
        img_bbox_cat[img_path].append(2)
    else:
        img_bbox_cat[img_path].append(1)
print(len(img_bbox_cat))
print(np.unique(np.array(
    [lst[-1] for lst in list(img_bbox_cat.values())]), return_counts=True))


# Writting the results as a txt file and csv file
Convert the dictionary to a data frame
df_img_bbox_cat = pd.DataFrame(img_bbox_cat).T.reset_index()
df_img_bbox_cat.columns = ['path', 'x_1', 'y_1', 'x_2', 'y_2', 'class']
df_img_bbox_cat.to_csv('data_compiled/df_img_bbox_cat.csv', index = False)

#writting the results into a txt file
f = open('data_compiled/img_bbox_cat.txt', 'w')
for key,value in img_bbox_cat.items():
    line = str(key)
    for v in value:
        line += " " + str(v) 
    line += '\n'
    f.write(line)
f.close()

# Preparing data for training YOLO_v3.
df_img_info = pd.read_csv('data_compiled/df_img_bbox_cat.csv')
print(df_img_info.shape)
df_img_info.head()

df_img_info['x'], df_img_info['y'], df_img_info['width'], df_img_info['height'] =\
zip(*df_img_info.apply(
    lambda row: convert_labels(row['path'], row['x_1'], row['y_1'],
                               row['x_2'], row['y_2']), axis=1))
df_img_info.to_csv('data_compiled/df_img_info.csv', index=False)
df_img_info.head()

## Cropping images

In [64]:
df_img_info = pd.read_csv('data_compiled/df_img_info.csv')

#Removing class 2 from dataset
df_img_info_wo_class2 = df_img_info.loc[df_img_info['class'] != 2]

#Cropping images
df_img_info_wo_class2['x_crop'], df_img_info_wo_class2['y_crop'],\
    df_img_info_wo_class2['width_crop'], df_img_info_wo_class2['height_crop'] =\
    zip(*df_img_info_wo_class2.progress_apply(
        lambda row: cropping_images(row['path'], row['x'], row['y'],
                                    row['width'], row['height']), axis=1))

#Writting the data for the cropped images into csv file
df_img_info_wo_class2.to_csv(
    'data_compiled/df_img_info_wo_class2_corpped.csv', index=False)

#Generating txt files for yolo model
df_img_info['class'] = df_img_info['class'].astype(str)
for path, row in zip(df_img_info['path'].values, df_img_info[['class', 'x', 'y', 'width', 'height']].values):
    file_name = path[:-3] + 'txt'
    row.tofile(file_name, sep=" ", format="%s")


df_img_info_wo_class2['class'] = df_img_info_wo_class2['class'].astype(str)
for path, row in zip(df_img_info_wo_class2['path'].values, df_img_info_wo_class2[['class', 'x_crop', 'y_crop', 'width_crop', 'height_crop']].values):
    file_name = path[:-4] + '_crop.txt'
    row.tofile(file_name, sep=" ", format="%s")

# dividing the data into train and test datasets
df_train, df_test = train_test_split(
    df_img_info_wo_class2, train_size=0.9, stratify=df_img_info_wo_class2['class'])

np.savetxt('data_compiled/train_cloth_crop.txt',
           df_train['new_path'].values, fmt='%s')
np.savetxt('data_compiled/test_cloth_crop.txt',
           df_test['new_path'].values, fmt='%s')