In [1]:
import os
import pandas as pd
import numpy as np
from torch.utils.data import Dataset
from torchvision import transforms

In [2]:
split_ratio = 20

train_data_path = f'train/df_train_{split_ratio}.csv'
valid_data_path = f'valid/df_valid_{split_ratio}.csv'
image_path = '/opt/ml/input/data/train/images'

In [3]:
df_train = pd.read_csv(train_data_path)
df_valid = pd.read_csv(valid_data_path)

In [4]:
def generate_mask_field(df):

    def _get_extension(row):
        for f in os.listdir(os.path.join(image_path, row.path)):
            if f.startswith(row['mask']):
                return os.path.join(image_path, row.path, f)
        raise FileNotFoundError


    mask_map = {'incorrect_mask': 1, 'mask1': 0, 'mask2': 0, 'mask3': 0,
                'mask4': 0, 'mask5': 0, 'normal': 2}    
    
    df = df.explode('mask')
    df['path'] = df.apply(_get_extension, axis=1)
    df['mask'] = df['mask'].map(mask_map)

    return df

def preprocess(df, cut_age=59):
    
    del df['race']
    del df['id']
    
    df['age_real'] = df['age'].copy()
    
    bins = [0, 29, cut_age, 100]
    labels = [0, 1, 2]
    df['age'] = pd.cut(df['age'], bins=bins, labels=labels)
    
    df['gender'] = df['gender'].map({'male':0, 'female':1})
    
    df['mask'] = [['incorrect_mask', 'mask1', 'mask2', 'mask3', 'mask4', 'mask5', 'normal'] for _ in range(len(df))]
    df = generate_mask_field(df)
    
    return df

In [5]:
df_train = preprocess(df_train)
df_valid = preprocess(df_valid)

In [6]:
df_train

Unnamed: 0,gender,age,path,age_real,mask
0,0,1,/opt/ml/input/data/train/images/001024_male_As...,58,1
0,0,1,/opt/ml/input/data/train/images/001024_male_As...,58,0
0,0,1,/opt/ml/input/data/train/images/001024_male_As...,58,0
0,0,1,/opt/ml/input/data/train/images/001024_male_As...,58,0
0,0,1,/opt/ml/input/data/train/images/001024_male_As...,58,0
...,...,...,...,...,...
2159,0,2,/opt/ml/input/data/train/images/003639_male_As...,60,0
2159,0,2,/opt/ml/input/data/train/images/003639_male_As...,60,0
2159,0,2,/opt/ml/input/data/train/images/003639_male_As...,60,0
2159,0,2,/opt/ml/input/data/train/images/003639_male_As...,60,0


In [7]:
df_train.to_csv(f'/opt/ml/code/df/df_actual_age_train.csv', index=False)
df_valid.to_csv(f'/opt/ml/code/df/df_actual_age_valid.csv', index=False)