In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import os, time
from tqdm import tqdm
import numpy as np, pandas as pd
import cv2
from tqdm import tqdm_notebook, tqdm # Iteration visualization
tqdm.pandas(desc="Loading") # to do progress_apply for pandas
from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline
import random
from sklearn.model_selection import train_test_split

# Category and Attribute

## Reading boundary box information for each image

In [None]:
wd =  'C:/darknet-master/darknet-master/build/darknet/x64/data/cloth/'
img_bbox_cat = {}
bbox_path = wd + 'Anno/list_bbox.txt'
try:
    bbox_file = open(bbox_path, 'r')
except Exception:
    raise ValueError('There is no such a file in the directory')
bbox_data = bbox_file.readlines()
for img_info, idx in zip(bbox_data[2:], range(len(bbox_data)-2)):
    temp_list = img_info.split()
    img_path = wd + temp_list[0][4:]
    img_bbox_cat[img_path] = [int(loc) for loc in temp_list[1:]]
print(len(img_bbox_cat))

## Reading category information for each image

In [None]:
category_path = wd + 'Anno/list_category_img.txt'
try:
    cat_file = open(category_path, 'r')
except Exception:
    print('Category file does not exist!')
cat_data = cat_file.readlines()
for cat_info, idx in zip(cat_data[2:], range(len(cat_data)-2)):
    temp_list = cat_info.split()
    img_path = wd + temp_list[0][4:]
    # 1~20 is upper clothes (0), 
    # 21~36 is lower clothes(1)
    # 37~50 is full-body clothes)(2)
    if int(temp_list[1]) < 21:
        img_bbox_cat[img_path].append(0)
    elif int(temp_list[1]) > 36:
        img_bbox_cat[img_path].append(2)
    else:
        img_bbox_cat[img_path].append(1)
print(len(img_bbox_cat))
print(np.unique(np.array([lst[-1] for lst in list(img_bbox_cat.values())]), return_counts = True))

## Writting the results as a txt file and csv file

In [None]:
# Convert the dictionary to a data frame
df_img_bbox_cat = pd.DataFrame(img_bbox_cat).T.reset_index()
df_img_bbox_cat.columns = ['path', 'x_1', 'y_1', 'x_2', 'y_2', 'class']
df_img_bbox_cat.to_csv('data_compiled/df_img_bbox_cat.csv', index = False)

In [None]:
#writting the results into a txt file
f = open('data_compiled/img_bbox_cat.txt', 'w')
for key,value in img_bbox_cat.items():
    line = str(key)
    for v in value:
        line += " " + str(v) 
    line += '\n'
    f.write(line)
f.close()

# Preparing data for training YOLO_v3.

In [None]:
df_img_info = pd.read_csv('data_compiled\df_img_bbox_cat.csv')
print(df_img_info.shape)
df_img_info.head()

In [None]:
def convert_labels(path, x1, y1, x2, y2):
    """
    Definition: Parses label files to extract label and bounding box
        coordinates.  Converts (x1, y1, x1, y2) KITTI format to
        (x, y, width, height) normalized YOLO format.
    """
    def sorting(l1, l2):
        if l1 > l2:
            lmax, lmin = l1, l2
            return lmax, lmin
        else:
            lmax, lmin = l2, l1
            return lmax, lmin
    size = get_img_shape(path)
    height, width = size[:2]
    max_height = 300
    max_width = 300
    scaling_factor = 1
    if max_height < height or max_width < width:
        # get scaling factor
        scaling_factor = max_height / float(height)
        if max_width/float(width) < scaling_factor:
            scaling_factor = max_width / float(width)
    x1 = int(x1/scaling_factor)
    y1 = int(y1/scaling_factor)  
    x2 = int(x2/scaling_factor)  
    y2 = int(y2/scaling_factor)  
    xmax, xmin = sorting(x1, x2)
    ymax, ymin = sorting(y1, y2)
    dw = 1./size[1]
    dh = 1./size[0]
    x = (xmin + xmax)/2.0
    y = (ymin + ymax)/2.0
    w = xmax - xmin
    h = ymax - ymin
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)
def get_img_shape(path):
    img = cv2.imread(path)
    try:
        return img.shape
    except Exception:
        raise ValueError(f'There is no {path}')

In [None]:
df_img_info['x'], df_img_info['y'], df_img_info['width'], df_img_info['height'] =\
zip(*df_img_info.progress_apply(
    lambda row: convert_labels(row['path'], row['x_1'], row['y_1'],
                               row['x_2'], row['y_2']), axis=1))
df_img_info.to_csv('data_compiled/df_img_info.csv', index=False)
df_img_info.head()

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import os, time
from tqdm import tqdm
import numpy as np, pandas as pd
import cv2
from tqdm import tqdm_notebook, tqdm # Iteration visualization
tqdm.pandas(desc="Loading") # to do progress_apply for pandas
from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline
import random
from sklearn.model_selection import train_test_split
def from_yolo_to_cor(box, img_h, img_w): 
    x1, y1 = int((box[0] - box[2]/2)*img_w), int((box[1] - box[3]/2)*img_h)
    x2, y2 = int((box[0] + box[2]/2)*img_w), int((box[1] + box[3]/2)*img_h)
    return x1, y1, x2, y2
    
def draw_boxes(path, boxes, original = False):
    img = cv2.imread(path)
    print(img.shape)
    print(boxes)
    plt.figure(figsize = (12, 6))
    if original == False:
        x1, y1, x2, y2 = from_yolo_to_cor(boxes, img.shape[0], img.shape[1])
    else:
        x1, y1, x2, y2 = boxes
    cv2.rectangle(img, (x1, y1), (x2, y2), (0,0,255), 3)
    plt.subplot(1,1, 1), plt.imshow(img)

In [2]:
def cropping_images(path,x_0,y_0,width_0,heigh_0):
    bbox = [x_0,y_0,width_0,heigh_0]
    # import image
    f_image = Image.open(path)
    
    #Bounding box cordinates
    x_1, y_1, x_2, y_2 = from_yolo_to_cor(bbox, f_image.size[1], f_image.size[0])
#     plt.figure(figsize = (12, 8))    
#     img = cv2.imread(path)
#     cv2.rectangle(img, (x_1, y_1), (x_2, y_2), (255,0,0), 3)
#     plt.subplot(1, 2,1); plt.imshow(img);
    
    box_height = y_2 - y_1
    box_width = x_2 - x_1

    # get width and height of image
    width, height = f_image.size

    # crop image randomly around bouding box within a 0.3 * bbox extra range
    rnd_n = random.random() * 0.15 + 0.05
    left = max(0, x_1 - round(rnd_n * box_width))
    
    rnd_n = (random.random() * 0.15 + 0.05)
    right = min(x_2 + round(rnd_n * box_width), width)

    
    rnd_n = random.random() * 0.15 + 0.05
    top = max(0, y_1 - round(rnd_n * box_height))
    
    rnd_n = random.random() * 0.15 + 0.05
    bottom = min(y_2 + round(rnd_n * box_height), height)
    
    # Crop the image
    f_image = f_image.crop((left, top, right, bottom))
    
    _width, _height = width, height
    width, height = f_image.size
    
    #f_image.show()
    new_path = path[:-4] + '_crop' + '.jpg'
    #f_image.save(new_path, 'jpeg')
    try:
        f_image.save(new_path, 'jpeg')
    except:
        #print(f'Used convertor for {new_path}')
        f_image = f_image.convert('RGB')
        f_image.save(new_path, 'jpeg')

    # List of normalized left x coordinates in bounding box (1 per box)
    xmins = (x_1 - left) / width
    # List of normalized right x coordinates in bounding box (1 per box)
    xmaxs = (x_2 - left) / width
    # List of normalized top y coordinates in bounding box (1 per box)
    ymins = (y_1 - top) / height
    # List of normalized bottom y coordinates in bounding box (1 per box)
    ymaxs = (y_2 - top) / height
    
    x_middle = (xmins + xmaxs) / 2
    y_middle = (ymins + ymaxs) / 2
    bbox_width = (xmaxs - xmins)
    bbox_height = (ymaxs - ymins)
    
    bbox_new = (x_middle, y_middle, bbox_width, bbox_height)

#    assert (xmins >= 0.) and (xmaxs < 1.01) and (ymins >= 0.) and (ymaxs < 1.01), print(path)
#     img = cv2.imread(new_path)   
#     x_1, y_1, x_2, y_2 = from_yolo_to_cor(bbox_new, img.shape[0], img.shape[1])
#     cv2.rectangle(img, (x_1, y_1), (x_2, y_2), (255,0,0), 3)
#     plt.subplot(1, 2,2); plt.imshow(img);
    
    return bbox_new

In [None]:
# idx = 2204
# cropping_images(df_img_info.loc[idx, 'path'],df_img_info.loc[idx, 'x'],
#                 df_img_info.loc[idx, 'y'],
#                 df_img_info.loc[idx, 'width'],
#                 df_img_info.loc[idx, 'height'])

In [3]:
df_img_info = pd.read_csv('data_compiled/df_img_info.csv')
df_img_info_wo_class2 = df_img_info.loc[df_img_info['class'] != 2]

In [5]:
df_img_info_wo_class2['x_crop'], df_img_info_wo_class2['y_crop'],\
df_img_info_wo_class2['width_crop'], df_img_info_wo_class2['height_crop'] =\
zip(*df_img_info_wo_class2.progress_apply(
    lambda row: cropping_images(row['path'], row['x'], row['y'],
                               row['width'], row['height']), axis=1))

Loading: 100%|██████████| 198672/198672 [24:44<00:00, 133.82it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


In [6]:
df_img_info_wo_class2.to_csv('data_compiled/df_img_info_wo_class2_corpped.csv', index=False)
df_img_info_wo_class2.head()

Unnamed: 0,path,x_1,y_1,x_2,y_2,class,x,y,width,height,x_crop,y_crop,width_crop,height_crop
0,C:/darknet-master/darknet-master/build/darknet...,72,79,232,273,0,0.506667,0.586667,0.533333,0.646667,0.55102,0.518443,0.816327,0.79918
1,C:/darknet-master/darknet-master/build/darknet...,67,59,155,161,0,0.542799,0.366204,0.430707,0.339815,0.469952,0.514412,0.762019,0.815965
2,C:/darknet-master/darknet-master/build/darknet...,65,65,156,200,0,0.535714,0.441509,0.441392,0.450314,0.508446,0.510571,0.814189,0.758985
3,C:/darknet-master/darknet-master/build/darknet...,51,62,167,182,0,0.545,0.488,0.58,0.48,0.507576,0.51875,0.878788,0.75
4,C:/darknet-master/darknet-master/build/darknet...,46,88,166,262,0,0.530059,0.583008,0.599707,0.580078,0.508539,0.520356,0.777989,0.755725


In [None]:
df_img_info['class'] = df_img_info['class'].astype(str)
for path, row in zip(df_img_info['path'].values , df_img_info[['class', 'x', 'y', 'width', 'height']].values):
    file_name = path[:-3] + 'txt'
    row.tofile(file_name, sep=" ", format="%s")

In [8]:
df_img_info_wo_class2['class'] = df_img_info_wo_class2['class'].astype(str)
for path, row in zip(df_img_info_wo_class2['path'].values , df_img_info_wo_class2[['class', 'x_crop', 'y_crop', 'width_crop', 'height_crop']].values):
    file_name = path[:-4] + '_crop.txt'
    row.tofile(file_name, sep=" ", format="%s")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [None]:
idx = 5
draw_boxes(df_img_info.iloc[idx]['path'], list(df_img_info.iloc[idx][['x','y','width','height']]))
#draw_boxes(df_img_info.iloc[idx]['path'], list(df_img_info.iloc[idx][['x_1','y_1','x_2','y_2']]), True)
print("Class is", df_img_info.iloc[idx]['class'])


df_img_info.iloc[idx]['path']

# Removing Class 2 (full-body)

In [None]:
df_img_info_wo_class2 = df_img_info.loc[df_img_info['class'] != 2]

In [None]:
df_img_info_wo_class2['class'].value_counts()

In [16]:
df_img_info_wo_class2 = df_img_info_wo_class2.loc[df_img_info_wo_class2['height_crop'] < 1]
df_img_info_wo_class2 = df_img_info_wo_class2.loc[df_img_info_wo_class2['width_crop'] < 1]

In [20]:
df_img_info_wo_class2.loc[df_img_info_wo_class2['height_crop'] > 1]

Unnamed: 0,path,x_1,y_1,x_2,y_2,class,x,y,width,height,x_crop,y_crop,width_crop,height_crop


In [28]:
df_train, df_test = train_test_split(df_img_info_wo_class2, train_size = 0.9, stratify = df_img_info_wo_class2['class'])

In [30]:
np.savetxt('data_compiled/train_cloth_crop.txt', df_train['new_path'].values, fmt='%s')
np.savetxt('data_compiled/test_cloth_crop.txt', df_test['new_path'].values, fmt='%s')

In [24]:
df_img_info_wo_class2.iloc[0]['path']

'C:/darknet-master/darknet-master/build/darknet/x64/data/cloth/Sheer_Pleated-Front_Blouse/img_00000001.jpg'

In [25]:
df_img_info_wo_class2['new_path'] = df_img_info_wo_class2['path'].apply(lambda x:x.replace('.jpg', '_crop.jpg'))

In [27]:
df_img_info_wo_class2.iloc[0]['new_path']

'C:/darknet-master/darknet-master/build/darknet/x64/data/cloth/Sheer_Pleated-Front_Blouse/img_00000001_crop.jpg'