In [100]:
import os
import pandas as pd
import random
import shutil

In [101]:
exec_path = os.getcwd()
root = os.path.join(exec_path, 'combined-images-direction-not-a-thumbnail')

In [102]:
ds_store = os.path.join(root, '.DS_Store')
if os.path.exists(ds_store):
    os.remove(ds_store)
vehicles = {}
for text_file in os.listdir(root):
    direction, name = [n.replace('.txt', '') for n in text_file.split('---')]
    if name not in vehicles:
        vehicles[name] = {}
    with open(os.path.join(root, text_file)) as f:
        vehicles[name][direction] = [n.replace('\n', '').strip() for n in f.readlines()]


In [103]:
augmentations = ['hflip', 'saltpepper', 'gaussian', 'smartcrop', 'translatex0y25', 'translatex0ym25', 'translatex25y0', 'translatexm25y0']
directions = ['front', 'back', 'side', 'mix']
for name in vehicles:
    to_add = []
    for side_image in vehicles[name]['side']:
        for augmentation in augmentations:
            path_aug = side_image.replace('side', f'side_{augmentation}')
            if os.path.exists(path_aug):
                to_add.append(path_aug)
    vehicles[name]['side'] += to_add
    for direction in directions:
        random.shuffle(vehicles[name][direction])
        
        
        

In [104]:
list_data = []
for name in vehicles:
    front = len(vehicles[name]['front'])
    back = len(vehicles[name]['back'])
    side = len(vehicles[name]['side'])
    mix = len(vehicles[name]['mix'])
    total = front + back + side + mix
    list_data.append([name, front, back, side, mix, total])

In [105]:
headers = ['name', 'front', 'back', 'side', 'mix', 'total']
df = pd.DataFrame(list_data, columns=headers)
df = df[(df['front'] >= 300) & (df['back'] >= 300) & (df['side'] >= 300) & (df['mix'] >= 300)]

front_min = df["front"].min()
front_max = df["front"].max()

back_min = df["back"].min()
back_max = df["back"].max()

side_min = df["side"].min()
side_max = df["side"].max()

mix_min = df["mix"].min()
mix_max = df["mix"].max()
print(f'front min: {front_min}')
print(f'back min: {back_min}')
print(f'side min: {side_min}')
print(f'mix min: {mix_min}')

# df['front_count'] = round(front_min * df['total'] / 100)
# df['back_count'] = round(back_min * df['total'] / 100)
# df['side_count'] = round(side_min * df['total'] / 100)
# df['mix_count'] = round(mix_min * df['total'] / 100)
# df['min_total'] = df['front_count'] + df['back_count'] + df['side_count'] + df['mix_count']
      
# df_pre = pd.DataFrame([
#     ['Minimum', df["front"].min(), df["back"].min(), df["side"].min(), df["mix"].min(), df["total"].min(),],
#     ['Maximum', df["front"].max(), df["back"].max(), df["side"].max(), df["mix"].max(), df["total"].max(),],
# ], columns=headers)
# df = df_pre.append(df, ignore_index=True)
      
df.to_csv('image_counts.csv', sep=',', index=False)


front min: 327
back min: 300
side min: 315
mix min: 327


In [106]:
df

Unnamed: 0,name,front,back,side,mix,total
0,BMW_3시리즈 _3시리즈 (F30) (12~18년),404,490,927,776,2597
3,기아_K5_K5 2세대(15~18년),434,416,468,659,1977
4,기아_K5_더 뉴 K5(13~15년),988,652,990,884,3514
6,기아_K7_더 뉴 K7(12~16년),890,579,585,841,2895
8,기아_K7_올 뉴 K7(16년~현재),1610,739,585,984,3918
10,기아_K9_더 뉴 K9(14~18년),1003,381,315,421,2120
11,기아_레이_레이(11~17년),1379,966,1116,766,4227
12,기아_모닝_뉴모닝(08~11년),425,359,630,484,1898
13,기아_모닝_더 뉴 모닝(15~17년),989,649,711,644,2993
15,기아_모닝_올 뉴 모닝(11~15년),825,746,828,928,3327


In [107]:
dst = os.path.join(exec_path, 'cars_refined_20190508')
for label in ['train', 'test']:
    label_dir = os.path.join(dst, label)
    if not os.path.exists(label_dir):
        os.mkdir(label_dir)

In [108]:
for name in df['name'].tolist():
    train_path = os.path.join(dst, 'train', name)
    test_path = os.path.join(dst, 'test', name)
    if not os.path.exists(train_path):
        os.mkdir(train_path)
    if not os.path.exists(test_path):
        os.mkdir(test_path)
    train_images = []
    test_images = []
    for direction in directions:
        for img in vehicles[name][direction][:250]:
            shutil.copy2(img, os.path.join(train_path, f'{random.randint(10000000000, 900000000000)}.jpg'))
        for img in vehicles[name][direction][250:300]:
            shutil.copy2(img, os.path.join(test_path, f'{random.randint(10000000000, 900000000000)}.jpg'))