In [7]:
import os
import pandas as pd
import numpy as np
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
from torchvision import transforms

In [8]:
split_ratio = 20

train_data_path = f'/opt/ml/code/df/train/df_train_{split_ratio}.csv'
valid_data_path = f'/opt/ml/code/df/valid/df_valid_{split_ratio}.csv'
image_path = '/opt/ml/input/data/train/images'

In [9]:
df_train = pd.read_csv(train_data_path)
df_valid = pd.read_csv(valid_data_path)

In [10]:
def generate_mask_field(df):

    def _get_extension(row):
        for f in os.listdir(os.path.join(image_path, row.path)):
            if f.startswith(row['mask']):
                return os.path.join(image_path, row.path, f)
        raise FileNotFoundError

    def _label(row):
        return row['gender'] + 2*row['mask']

    mask_map = {'incorrect_mask': 1, 'mask1': 0, 'mask2': 0, 'mask3': 0,
                'mask4': 0, 'mask5': 0, 'normal': 2}    

    df = df.explode('mask')
    df['path'] = df.apply(_get_extension, axis=1)
    df['mask'] = df['mask'].map(mask_map)
    df['label'] = df.apply(_label, axis=1)

    return df

def preprocess(df):
    del df['race']
    del df['id']
    del df['age']
    df['gender'] = df['gender'].map({'male':0, 'female':1})
    df['mask'] = [['incorrect_mask', 'mask1', 'mask2', 'mask3', 'mask4', 'mask5', 'normal'] for _ in range(len(df))]
    df = generate_mask_field(df)
    return df

In [11]:
df_train = preprocess(df_train)
df_valid = preprocess(df_valid)

In [12]:
df_train

Unnamed: 0,gender,path,mask,label
0,0,/opt/ml/input/data/train/images/001024_male_As...,1,2
0,0,/opt/ml/input/data/train/images/001024_male_As...,0,0
0,0,/opt/ml/input/data/train/images/001024_male_As...,0,0
0,0,/opt/ml/input/data/train/images/001024_male_As...,0,0
0,0,/opt/ml/input/data/train/images/001024_male_As...,0,0
...,...,...,...,...
2159,0,/opt/ml/input/data/train/images/003639_male_As...,0,0
2159,0,/opt/ml/input/data/train/images/003639_male_As...,0,0
2159,0,/opt/ml/input/data/train/images/003639_male_As...,0,0
2159,0,/opt/ml/input/data/train/images/003639_male_As...,0,0


In [14]:
df_train.to_csv(f'/opt/ml/code/df/df_mask_gender_train_{split_ratio}.csv', index=False)
df_valid.to_csv(f'/opt/ml/code/df/df_mask_gender_valid_{split_ratio}.csv', index=False)