In [None]:
# 모든 파일 리스팅
import pandas as pd
from glob import glob
from pathlib import Path
from tqdm import tqdm
import random

train_imgs = glob('train_imgs/*.png')
test_imgs = glob('val_imgs/*.png')
len(train_imgs), len(test_imgs)

In [None]:
# ai hub 의 train 이미지를 train/val로 쓰고
#           val 이미지를 test로 쓴다.
def to_label(imgs, phase):
    label = [[Path(e).stem, e] for e in imgs]
    label = [s.split('_') + [n] for s, n in label]
    df = pd.DataFrame(label, columns=['INOUT', 'PID', 'SN', 'D1', 'D2', 'NAME'])
    df['K_SN'] = df['PID'] + '_' + df['SN']
    df['K_D1'] = df['K_SN'] + '_' + df['D1']
    df['K_D2'] = df['K_D1'] + '_' + df['D2'] 
    df['CLS_NAME'] = df['K_SN']
    df['PHASE'] = phase
    return df
    
df_train = to_label(train_imgs, 'train')
df_test = to_label(test_imgs, 'test')
df_total = pd.concat([df_train, df_test])
df_total

In [None]:
df_total[['SN','PID','K_SN', 'K_D1', 'K_D2', 'CLS_NAME']].describe()

In [None]:
kls_idx = {e:i for i, e in enumerate(sorted(list(set(df_total['CLS_NAME'].unique()))))}
df_total['KLS_IDX'] = df_total['CLS_NAME'].map(kls_idx) 
df_total

In [None]:
df_train_g = df_total.query('PHASE != "test"').sample(frac=1.0).groupby('KLS_IDX')
df_trains, df_vals = [], []
for KLS_IDX, df_i in df_train_g:
    df_trains.append(df_i.iloc[:-30])
    df_vals.append(df_i.iloc[-30:])
df_train = pd.concat(df_trains)
df_val = pd.concat(df_vals)
df_val['PHASE'] = 'val'
df_test = df_total.query('PHASE == "test"')
df_total = pd.concat([df_train, df_val, df_test])
df_total

In [None]:
cls_train = set(df_total.query('PHASE == "train"')['CLS_NAME'].values)
cls_val = set(df_total.query('PHASE == "val"')['CLS_NAME'].values)
cls_test = set(df_total.query('PHASE == "test"')['CLS_NAME'].values)
len(cls_train), len(cls_val), len(cls_test)

In [None]:
df_total.to_csv('ai_hub_reid.csv', index=False)
def to_label_txt(df, phase):
    txt = df_total.query('PHASE == @phase')
    txt = txt[['NAME', 'KLS_IDX']]
    
    file_name = f'img_list_{phase}.txt'
    txt.to_csv(file_name, index=False)
    return file_name 
    
txt_train = to_label_txt(df_total, 'train')
txt_val = to_label_txt(df_total, 'val')
txt_test = to_label_txt(df_total, 'test')

In [None]:
import cv2
from tqdm.auto import tqdm

df = pd.read_csv(txt_train)
imgs = df.to_dict('records')
for d in tqdm(imgs):
    try: 
        im = cv2.imread(d['NAME'])
    except Exception as e:
        print(err, e)
    

In [None]:
import cv2
from PIL import Image
import numpy as np

item = df_total.sample(n=1).iloc[0]
path = item['NAME']
im = Image.open(path)

def rect_pad(pil_image):
    import numpy as np
    from PIL import Image
    
    im = np.array(pil_image) 
    h, w = im.shape[:2]
    size = max(h, w)
    pad_h, pad_w = size-h, size -w
    im = np.pad(im, 
           ((pad_h//2, pad_h-pad_h//2), (pad_w//2, pad_w - pad_w//2), (0,0)), 
           mode='constant', constant_values=0)
    return Image.fromarray(im)
rect_pad(im)

In [None]:
import shutil
phase = 'test'
df = pd.read_csv(f'img_list_{phase}.txt')
shutil.rmtree(f'reid_data/{phase}', ignore_errors=True)
for path, kls in tqdm(df.values):
    dst = f'reid_data/{phase}/{kls:05d}/{Path(path).name}'
    if not Path(dst).parent.exists():
        Path(dst).parent.mkdir(parents=True, exist_ok=True)
    shutil.copy(path, dst)

In [None]:
import shutil
from PIL import Image

names = sorted(glob('reid_data/test/*'))

shutil.rmtree('reid_data/eval', ignore_errors=True)
skip = []
for name in tqdm(names):
    dst = name.replace('/test/', '/eval/')
    Path(dst).mkdir(parents=True)
    imgs = glob(f'{name}/*.png')
    imgs = [src for src in imgs if 32 < min(Image.open(src).size)]
    imgs = sorted(imgs)
    imgs = imgs[:30:3]
    for src in imgs:
        shutil.copy(src, dst)