# DataMaker
이 노트북은 train.csv를 읽어서 훈련에 유용한 형태로 변환하여 저장합니다.

## Import

In [1]:
import os
import numpy as np
import pandas as pd
import argparse
import sys
from tqdm import tqdm
sys.path.append('../') # import를 위해 경로추가
from utils import Utility as U

## Args

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument('--path_train', type=str, default="./data/train/")
parser.add_argument('--path_eval', type=str, default="./data/eval/")
parser.add_argument('--path_train_output', type=str, default="train_i.csv")
parser.add_argument('--path_eval_output', type=str, default="eval_i.csv")
args = parser.parse_args('')

## Load Data

In [3]:
df_train = pd.read_csv(os.path.join(args.path_train, 'train.csv'))
images = []
for path in df_train['path']:
    __path_folder = os.path.join(*[args.path_train, 'images', path])
    __path_image = [os.path.join(*[__path_folder, p])  for p in os.listdir(__path_folder) if p[0] != '.' ]
    images.append(__path_image)
    
df_train['images'] = images

df_train.head()


Unnamed: 0,id,gender,race,age,path,images
0,1,female,Asian,45,000001_female_Asian_45,[./data/train/images/000001_female_Asian_45/ma...
1,2,female,Asian,52,000002_female_Asian_52,[./data/train/images/000002_female_Asian_52/ma...
2,4,male,Asian,54,000004_male_Asian_54,[./data/train/images/000004_male_Asian_54/mask...
3,5,female,Asian,58,000005_female_Asian_58,[./data/train/images/000005_female_Asian_58/ma...
4,6,female,Asian,59,000006_female_Asian_59,[./data/train/images/000006_female_Asian_59/ma...


In [7]:
images = [os.path.join(*[args.path_eval, 'images', p])
          for p in os.listdir(os.path.join(*[args.path_eval, 'images'])) if p[0] != '.']
df_eval = pd.DataFrame()
df_eval['images'] = images

df_eval.head()


Unnamed: 0,images
0,./data/eval/images/3d1f8f1721367da53bbe5bf3125...
1,./data/eval/images/d354df341e493f2a45f54bba8f6...
2,./data/eval/images/8d43a2c9162e48024ef8d5d2d16...
3,./data/eval/images/6d6aa28eba8f7755524dabac48b...
4,./data/eval/images/a53d868547b70521f8eee018ebe...


## Make Data

In [9]:
image_df_labels = ['gender', 'age', 'mask', 'path']
image_df_rows = []
for _gender, _age, _images in zip(df_train['gender'], df_train['age'], df_train['images']):
    for _path in _images:
        _mask = U.ConvertImagePathToMaskStatus(_path)
        image_df_rows.append([_gender, U.AgeDecoder(U.AgeEncoder(_age)), _mask, _path])
image_df = pd.DataFrame(image_df_rows, columns=image_df_labels)
image_df['gender_class'] = [U.GenderEncoder(g.capitalize()) for g in image_df['gender']]
image_df['age_class'] = [U.AgeEncoder(a) for a in image_df['age']]
image_df['mask_class'] = [U.MaskEncoder(m) for m in image_df['mask']]
print('total number of images :', image_df.size / image_df.columns.size)
image_df.head()

total number of images : 18900.0


Unnamed: 0,gender,age,mask,path,gender_class,age_class,mask_class
0,female,30<=X<60,Wear,./data/train/images/000001_female_Asian_45/mas...,1,1,0
1,female,30<=X<60,NotWear,./data/train/images/000001_female_Asian_45/nor...,1,1,2
2,female,30<=X<60,Wear,./data/train/images/000001_female_Asian_45/mas...,1,1,0
3,female,30<=X<60,Wear,./data/train/images/000001_female_Asian_45/mas...,1,1,0
4,female,30<=X<60,Incorrect,./data/train/images/000001_female_Asian_45/inc...,1,1,1


In [10]:
image_df.to_csv(args.path_train_output, index=False)
df_eval.to_csv(args.path_eval_output, index=False)