In [1]:
import pandas as pd
import numpy as np

In [11]:
train_df = pd.read_csv('processed_train.csv', header=0)

<br><br>
## 경로 및 라벨 추출
---

In [3]:
paths = train_df['FullPath']
labels = train_df['Class']

print(f"label, path example: \nlabel: {labels[0]}\npath: {paths[0]}")

label, path example: 
label: 4
path: /opt/ml/input/data/train/images/000001_female_Asian_45/mask1.jpg


<br><br>
## 경로 변경
---
- 훈련 데이터의 이미지 폴더가 /opt/ml/input/data/train/images가 아닌 경우

In [4]:
from pathlib import Path

In [6]:
def update_full_path(train_df, dir_images):
    train_df = train_df.copy()
    
    train_df['FullPath'] = str(Path(dir_images)) + '/'
    train_df['FullPath'] = train_df['FullPath'].str.cat(train_df['Path'])

    return train_df

dir_images = 'C://user/images'
train_df_updated = update_full_path(train_df, dir_images)

In [7]:
paths = train_df_updated['FullPath']
labels = train_df_updated['Class']

print(f"label, path example: \nlabel: {labels[0]}\npath: {paths[0]}")

label, path example: 
label: 4
path: C:/user/images/000001_female_Asian_45/mask1.jpg


<br><br>
## 칼럼
---
* 기본 데이터
    * 원본 데이터(from train.csv)
        * gender
        * race
        * age
        * path
    * 파일명
        * file - path 내의 파일이름(확장자 포함)
* 추가 데이터
    * Path - 이미지파일 폴더 및 이미지파일 경로(ex. 000002_female_Asian_52/mask.jpg) 
    * FullPath - 이미지 파일 full path
    * Mask 
    * Age 
    * Gender
    * Class (Competition 18개 class) 
    * ClassMask (Mask 독립 class)
    * ClassGender (Gender 독립 class)
    * ClassAge (Age 독립 Class)

In [13]:
train_df.columns

Index(['id', 'gender', 'race', 'age', 'path', 'file', 'FullPath', 'Path',
       'Mask', 'Age', 'Gender', 'Class', 'ClassMask', 'ClassGender',
       'ClassAge'],
      dtype='object')

In [14]:
train_df[['Mask', 'Age', 'Gender', 'Class', 'ClassMask', 'ClassGender', 'ClassAge']].head(5)

Unnamed: 0,Mask,Age,Gender,Class,ClassMask,ClassGender,ClassAge
0,Wear,>= 30 and < 60,Female,4,0,1,1
1,Wear,>= 30 and < 60,Female,4,0,1,1
2,Wear,>= 30 and < 60,Female,4,0,1,1
3,Wear,>= 30 and < 60,Female,4,0,1,1
4,Wear,>= 30 and < 60,Female,4,0,1,1


<br><br>
## Label 변경
---

In [22]:
"""
ClassAge 예시
    변경 전
        < 30 : 0
        >= 30 and < 60 : 1
        >= 60 : 2
    변경 후 
        < 20 : 0
        >= 20 and < 40 : 1
        >= 40 and < 60 : 2
        >= 60 : 3
"""
label_0 = train_df['age'] < 20
label_1 = (train_df['age'] >= 20) & (train_df['age'] < 40)
label_2 = (train_df['age'] >= 40) & (train_df['age'] < 60)
label_3 = train_df['age'] >= 60

train_df.loc[label_0, 'ClassAge'] = 0
train_df.loc[label_1, 'ClassAge'] = 1
train_df.loc[label_2, 'ClassAge'] = 2
train_df.loc[label_3, 'ClassAge'] = 3

In [29]:
"""
변경된 ClassAge 반영하여 전체 Class 업데이트(필요한 경우)
"""
num_age_labels = train_df['ClassAge'].unique().size
num_gender_labels = train_df['ClassGender'].unique().size
num_mask_labels = train_df['ClassMask'].unique().size

train_df['Class'] = train_df['ClassAge'] + \
                    num_age_labels*train_df['ClassGender'] + \
                    (num_age_labels*num_gender_labels)*train_df['ClassMask']


In [30]:
train_df['Class'].unique().size # 3 x 2 x 4 = 24

24