# DataMaker
이 노트북은 train.csv를 읽어서 훈련에 유용한 형태로 변환하여 저장합니다.

## Import

In [1]:
import os
import numpy as np
import pandas as pd
import argparse
import sys
from tqdm import tqdm
sys.path.append('../') # import를 위해 경로추가
from utils import Utility as U

## Args

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument('--path_train', type=str, default="./data/train/")
parser.add_argument('--path_eval', type=str, default="./data/eval/")
parser.add_argument('--path_train_output', type=str, default="train_i.csv")
parser.add_argument('--path_eval_output', type=str, default="eval_i.csv")
args = parser.parse_args('')

## Load Data

In [3]:
df_train = pd.read_csv(os.path.join(args.path_train, 'train.csv'))
images = []
for path in df_train['path']:
    __path_folder = os.path.join(*[args.path_train, 'images', path])
    __path_image = [os.path.join(*[__path_folder, p])  for p in os.listdir(__path_folder) if p[0] != '.' ]
    images.append(__path_image)
    
df_train['images'] = images

df_train.head()


Unnamed: 0,id,gender,race,age,path,images
0,1,female,Asian,45,000001_female_Asian_45,[./data/train/images/000001_female_Asian_45/ma...
1,2,female,Asian,52,000002_female_Asian_52,[./data/train/images/000002_female_Asian_52/ma...
2,4,male,Asian,54,000004_male_Asian_54,[./data/train/images/000004_male_Asian_54/mask...
3,5,female,Asian,58,000005_female_Asian_58,[./data/train/images/000005_female_Asian_58/ma...
4,6,female,Asian,59,000006_female_Asian_59,[./data/train/images/000006_female_Asian_59/ma...


In [5]:
images = [os.path.join(*[args.path_eval, 'images', p])
          for p in os.listdir(os.path.join(*[args.path_eval, 'images'])) if p[0] != '.']
df_eval = pd.DataFrame()
df_eval['images'] = images

df_eval.head()


Unnamed: 0,images
0,./data/eval/images/f1bca876363b0bb65be84c2b4d9...
1,./data/eval/images/2e670348e354160dbebbcec5ec9...
2,./data/eval/images/2057065ec72010650d1dc2c2871...
3,./data/eval/images/262fc9157664604bdcf9cf1312b...
4,./data/eval/images/bba586d2aacf03113478a1e7865...


## Make Data

In [7]:
image_df_labels = ['id', 'gender', 'age', 'mask', 'path']
image_df_rows = []
for _id, (_gender, _age, _images) in enumerate(zip(df_train['gender'], df_train['age'], df_train['images'])):
    for  _path in _images:
        _mask = U.convertImagePathToMaskStatus(_path)
        image_df_rows.append(
            [_id, _gender, _age, _mask, _path])
image_df = pd.DataFrame(image_df_rows, columns=image_df_labels)
image_df['gender_class'] = [U.encodeGender(g.capitalize()) for g in image_df['gender']]
image_df['age_class'] = [U.encodeAge(a) for a in image_df['age']]
image_df['mask_class'] = [U.encodeMask(m) for m in image_df['mask']]
print('total number of images :', image_df.size / image_df.columns.size)
image_df.sample(5)

total number of images : 18900.0


Unnamed: 0,id,gender,age,mask,path,gender_class,age_class,mask_class
6380,911,male,22,Wear,./data/train/images/001665_male_Asian_22/mask4...,0,0,0
8940,1277,female,19,Wear,./data/train/images/003289_female_Asian_19/mas...,1,0,0
17238,2462,female,18,Wear,./data/train/images/006434_female_Asian_18/mas...,1,0,0
11693,1670,male,57,Wear,./data/train/images/003849_male_Asian_57/mask4...,0,1,0
119,17,female,58,Wear,./data/train/images/000022_female_Asian_58/mas...,1,1,0


In [10]:
image_df.to_csv(args.path_train_output, index=False)
df_eval.to_csv(args.path_eval_output, index=False)