In [11]:
# import all dependencies here
import os
import pandas as pd
import shutil
import random
import sys
from pprint import pprint
import typing as T
import cv2
import shutil
import string


In [12]:
ROOT = '/Volumes/TriveStorage/code/trive-image-recognition/complete_manual/directional_cleaned'
directions = ['front', 'back', 'side', 'mix', 'front_hflip', 'back_hflip', 'side_hflip', 'mix_hflip']
processed_folders = [x for x in os.listdir(ROOT) if '.DS_Store' not in x]


In [13]:
data = []
full_total = 0
for processed_folder in processed_folders:
    if '_' in processed_folder:
        shutil.rmtree(os.path.join(ROOT, processed_folder), ignore_errors=True)
        continue
    
    image_counts = []
    image_total = 0
    image_counts.append(processed_folder)
    for direction in directions:
        direction_path = os.path.join(ROOT, processed_folder, direction)
        if not os.path.exists(direction_path):
            os.mkdir(direction_path)
        image_paths = [os.path.join(direction_path, s) for s in os.listdir(direction_path)]
        image_counts.append(len(image_paths))
    total = sum(image_counts[1:5])
    full_total += total
    image_counts.append(total)
    image_counts.append(sum(image_counts[1:9]))
    data.append(image_counts)

In [14]:
headers = ['name', 'front', 'back', 'side', 'mix', 'front_hflip', 'back_hflip', 'side_hflip', 'mix_hflip', 'total', 'totalWithAug']
df = pd.DataFrame(data, columns=headers)
df['side'] = df['side'] + df['mix']
df = df.drop(columns='mix')


In [15]:
# figure out how much data augmentation is required for each
# vehicle class
# * xxxAug = 0, indicates no augmentation is required
# * yyyAug = 100, indicates that 100 additional images are required

training_image_req = 334 # 1000 / 3
test_image_req = 67 # 200 / 3

image_requirement = training_image_req + test_image_req

df['frontAug'] = image_requirement - df['front']
df.loc[df['frontAug']<0,'frontAug'] = 0

df['backAug'] = image_requirement - df['back']
df.loc[df['backAug']<0,'backAug'] = 0

df['sideAug'] = image_requirement - df['side']
df.loc[df['sideAug']<0,'sideAug'] = 0


In [16]:
# df['augTotal'] = df['frontAug'] + df['backAug'] + df['sideAug']
df = df.sort_values(by='total', ascending=False)

In [17]:
df.to_csv('/Users/jo/Desktop/trive-image-recognition-logic/image_counts.csv', sep=',', index=False)
df['name'].to_csv('/Users/jo/Desktop/trive-image-recognition-logic/class_names.csv', sep=',', index=False)
top_100 = df.head(10000)
# top_100 = top_100[(top_100['total'] < 1200) & (top_100['totalWithAug'] < 1200) & (top_100['total'] >= 600)]
# top_100 = top_100[(top_100['total'] >= 1200) | (top_100['totalWithAug'] >= 1200)]
names = list(df['name'].values)
with open('current_vehicle_names.txt', 'w') as f:
    for item in names:
        f.write(f'{item}\n')

top_100.to_csv(r'/Users/jo/Desktop/trive-image-recognition-logic/top_200.csv', index = None, header=True)
print('number of classes:', len(top_100.index))
print('full total: ', full_total)
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    display(top_100)
    
    

number of classes: 619
full total:  197169


Unnamed: 0,name,front,back,side,front_hflip,back_hflip,side_hflip,mix_hflip,total,totalWithAug,frontAug,backAug,sideAug
58,기아레이레이(11~17년),1187,1474,2420,0,0,0,0,5081,5081,0,0,0
82,기아스포티지스포티지R(10~13년),913,917,2445,0,0,0,0,4275,4275,0,0,0
68,기아모닝올뉴모닝(11~15년),757,1073,2093,0,0,0,0,3923,3923,0,0,0
105,기아카니발올뉴카니발(14~18년),1179,842,1646,0,0,0,0,3667,3667,0,0,0
572,현대아반떼더뉴아반떼AD(18년~현재),460,692,2331,0,60,0,0,3483,3543,0,0,0
80,기아스포티지더뉴스포티지R(13~15년),849,865,1410,0,0,0,0,3124,3124,0,0,0
102,기아카니발그랜드카니발(05~10년),635,926,1509,0,0,0,0,3070,3070,0,0,0
91,기아쏘렌토올뉴쏘렌토(14~17년),579,794,1533,0,0,0,0,2906,2906,0,0,0
201,르노삼성SM6SM6(16년~현재),872,456,1463,0,0,0,0,2791,2791,0,0,0
39,기아K5K52세대(15~18년),929,631,1047,0,0,0,0,2607,2607,0,0,0


In [82]:
def save_to_gray(src: str, dst: str):
    try:
        image = cv2.imread(src)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        cv2.imwrite(dst, gray)
    except Exception as e:
        print(e)

DEST = '/Volumes/TriveStorage/code/trive-image-recognition/complete_manual/cars_refined'

if not os.path.exists(DEST):
    os.mkdir(DEST)

TEST_PATH = os.path.join(DEST, 'test')

if not os.path.exists(TEST_PATH):
    os.mkdir(TEST_PATH)

TRAIN_PATH = os.path.join(DEST, 'train')

if not os.path.exists(TRAIN_PATH):
    os.mkdir(TRAIN_PATH)

augmentations = [
    'hflip',
#     'smartcrop',
#     'saltpepper0d01',
#     'saltpepper0d02',
#     'saltpepper0d03',
#     'saltpepper0d04',
#     'saltpepper0d05',
#     'saltpepper0d06',
#     'saltpepper0d07',
#     'saltpepper0d08',
#     'saltpepper0d09',
#     'saltpepper0d10',
#     'translatex25y0',
#     'translatex0y25',
#     'translatex0ym25',
#     'translatexm25y0',
#     'gaussian',
]

front_augs = [f'front_{x}' for x in augmentations]
back_augs = [f'back_{x}' for x in augmentations]
side_augs = [f'side_{x}' for x in augmentations] + [f'mix_{x}' for x in augmentations]

def random_string(stringLength=10):
    """Generate a random string of fixed length """
    letters = string.ascii_lowercase
    return ''.join(random.choice(letters) for i in range(stringLength))

limit = sys.maxsize
for index, row in top_100.iterrows():
    print(row['name'])
    vehicle_test_path = os.path.join(TEST_PATH, row['name'])
    if not os.path.exists(vehicle_test_path):
        os.mkdir(vehicle_test_path)
    else:
        continue
        
    vehicle_train_path = os.path.join(TRAIN_PATH, row['name'])
    if not os.path.exists(vehicle_train_path):
        os.mkdir(vehicle_train_path)
    else:
        continue
        
    origin = os.path.join(ROOT, row['name'])
    imgs_front = [os.path.join(origin, 'front', x) for x in os.listdir(os.path.join(origin, 'front')) if x != '.DS_Store']
    imgs_back = [os.path.join(origin, 'back', x) for x in os.listdir(os.path.join(origin, 'back')) if x != '.DS_Store']
    imgs_side = [os.path.join(origin, 'side', x) for x in os.listdir(os.path.join(origin, 'side')) if x != '.DS_Store']
    imgs_mix = [os.path.join(origin, 'mix', x) for x in os.listdir(os.path.join(origin, 'mix')) if x != '.DS_Store']
    imgs_side += imgs_mix
        
    if (len(imgs_front) + len(imgs_back) + len(imgs_side)) < 1200:
#         curr_augs = [os.path.join(origin, x) for x in front_augs if os.path.exists(os.path.join(origin, x))]
#         extra_data = [[os.path.join(x, y) for y in os.listdir(x) if '.DS_Store' not in y] for x in curr_augs]
#         extra_data = [item for sublist in extra_data for item in sublist]
#         random.shuffle(extra_data)
#         print(f'frontAug: {row["frontAug"]} out of {len(extra_data)}')
#         imgs_front += extra_data[:row['frontAug']]
        imgs_front += [os.path.join(origin, 'front_hflip', x) for x in os.listdir(os.path.join(origin, 'front_hflip')) if x != '.DS_Store']
              
#         curr_augs = [os.path.join(origin, x) for x in back_augs if os.path.exists(os.path.join(origin, x))]
#         extra_data = [[os.path.join(x, y) for y in os.listdir(x) if '.DS_Store' not in y] for x in curr_augs]
#         extra_data = [item for sublist in extra_data for item in sublist]
#         random.shuffle(extra_data)
#         print(f'backAug: {row["backAug"]} out of {len(extra_data)}')
#         imgs_back += extra_data[:row['backAug']]
        imgs_back += [os.path.join(origin, 'back_hflip', x) for x in os.listdir(os.path.join(origin, 'back_hflip')) if x != '.DS_Store']
              
#         curr_augs = [os.path.join(origin, x) for x in side_augs if os.path.exists(os.path.join(origin, x))]
#         extra_data = [[os.path.join(x, y) for y in os.listdir(x) if '.DS_Store' not in y] for x in curr_augs]
#         extra_data = [item for sublist in extra_data for item in sublist]
#         random.shuffle(extra_data)
#         print(f'sideAug: {row["sideAug"]} out of {len(extra_data)}')
#         imgs_side += extra_data[:row['sideAug']]
        imgs_side += [os.path.join(origin, 'side_hflip', x) for x in os.listdir(os.path.join(origin, 'side_hflip')) if x != '.DS_Store']
              
    imgs_all = imgs_front + imgs_back + imgs_side
    if len(imgs_all) < 1200:
        shutil.rmtree(vehicle_test_path, ignore_errors=True)
        shutil.rmtree(vehicle_train_path, ignore_errors=True)
        continue
#         raise ValueError(f'{origin} - not enough images, {len(imgs_all)}')

    random.shuffle(imgs_all)
    
    for im_path in imgs_all[:1000]:
        try:
            rand_name = f'{random_string(20)}-{random.randint(1, limit)}.jpg'
            target_path = os.path.join(vehicle_train_path, rand_name)
            while os.path.exists(target_path):
                target_path = os.path.join(vehicle_train_path, rand_name)
                print(f'{target_path} already exists (train)')
            shutil.copy2(im_path, target_path)
        except Exception as e:
            print(e)
    
    assert(len([x for x in os.listdir(vehicle_train_path) if x != '.DS_Store']) >= 1000)

    for im_path in imgs_all[1000:1200]:
        try:
            rand_name = f'{random_string(20)}-{random.randint(1, limit)}.jpg'
            target_path = os.path.join(vehicle_test_path, rand_name)
            while os.path.exists(target_path):
                target_path = os.path.join(vehicle_test_path, rand_name)
                print(f'{target_path} already exists (test)')
            shutil.copy2(im_path, target_path)
        except Exception as e:
            print(e)
              
    assert(len([x for x in os.listdir(vehicle_test_path) if x != '.DS_Store']) >= 200)
    # failed at 쉐보레(GM대우)토스카토스카프리미엄6(08~11년)
        


기아레이레이(11~17년)
기아스포티지스포티지R(10~13년)
기아모닝올뉴모닝(11~15년)
기아카니발올뉴카니발(14~18년)
현대아반떼더뉴아반떼AD(18년~현재)
기아스포티지더뉴스포티지R(13~15년)
기아카니발그랜드카니발(05~10년)
기아쏘렌토올뉴쏘렌토(14~17년)
르노삼성SM6SM6(16년~현재)
기아K5K52세대(15~18년)
기아K5더뉴K5(13~15년)
기아모닝올뉴모닝(JA)(17년~현재)
기아스포티지스포티지4세대(15~18년)
기아K7더뉴K7(12~16년)
BMW5시리즈5시리즈(F10)(10~16년)
기아K7올뉴K7(16년~현재)
기아쏘렌토쏘렌토R(09~12년)
기아모하비모하비(07~16년)
현대쏘나타LF쏘나타(14~17년)
현대쏘나타쏘나타(DN8)(19년~현재)
현대투싼올뉴투싼(15년~현재)
기아카니발카니발R(10~14년)
제네시스G80G80(16년~현재)
제네시스EQ900EQ900(15~18년)
현대쏘나타YF쏘나타(09~12년)
현대쏘나타쏘나타하이브리드(11~14년)
현대아반떼아반떼MD(10~13년)
현대싼타페싼타페CM(05~12년)
현대아반떼아반떼HD(06~10년)
기아스포티지New스포티지(04~10년)
현대싼타페싼타페DM(12~15년)
르노삼성SM3뉴SM3(09~14년)


In [83]:
train_path = '/Volumes/TriveStorage/code/trive-image-recognition/complete_manual/cars_refined/train'
test_path = '/Volumes/TriveStorage/code/trive-image-recognition/complete_manual/cars_refined/test'

for folder in [os.path.join(train_path, x) for x in os.listdir(train_path) if x != '.DS_Store']:
    length = len([x for x in os.listdir(folder) if x != '.DS_Store'])
    if length != 1000:
        print(folder)
        print(length)
        print('\n')

for folder in [os.path.join(test_path, x) for x in os.listdir(train_path) if x != '.DS_Store']:
    length = len([x for x in os.listdir(folder) if x != '.DS_Store'])
    if length != 200:
        print(folder)
        print(length)
        print('\n')



In [None]:
#             save_to_gray(im_path, os.path.join(vehicle_test_path, rand_name))

