## 0. Libarary 불러오기 및 경로설정

In [1]:
import os
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from torchvision import transforms
from torchvision.transforms import Resize, ToTensor, Normalize

import albumentations
from albumentations import *
from albumentations.pytorch import ToTensorV2

import torch.optim as optim
from adamp import AdamP

import timm
import torchvision
import torchvision.models as models

import matplotlib.pyplot as plt

from tqdm.auto import tqdm
import time
import numpy as np
import glob
import re

In [2]:
train_dir = '/opt/ml/input/data/train'
test_dir = '/opt/ml/input/data/eval'

## 1. Custom Model 정의

## 2. Custom Dataset 정의

In [3]:
class CustomDataset(Dataset):
    def __init__(self, img_paths, labels, masks, genders, ages, transform, device):
        """Initialize CustomDataset
        
        Parameters:
        img_paths (list of string): list of image paths
        labels (list of int): list of labels
        
        """
        self.img_paths = img_paths
        self.labels = torch.tensor(labels).to(device)
        self.masks = torch.tensor(masks).to(device)
        self.genders = torch.tensor(genders).to(device)
        self.ages = torch.tensor(ages).to(device)
        self.device = device
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])
        
        if self.transform:
            image = self.transform(image=np.array(image))['image']

        return image.to(self.device), (self.labels[index], self.masks[index], self.genders[index], self.ages[index])

    def __len__(self):
        return len(self.img_paths)

## 3. Model의 input data, label data 준비

In [4]:
device = torch.device('cuda')
train_csv = pd.read_csv(os.path.join(train_dir, 'train.csv'))

# input data, output data 리스트 만들기(이미지 텐서화는 CustomDataset에서 이뤄짐)
train_image_paths = []
train_labels = []
train_masks = [] # mask:0 / incorrect:1 / notwear:2
train_genders = [] # male:0 / female:1
train_ages = [] # (,30):0 / [30, 58):1 / [58,):2 

dict_mask = {'mask1':0,
             'mask2':0,
             'mask3':0,
             'mask4':0,
             'mask5':0,
             'incorrect_mask':1,
             'normal':2,
            }

dict_gender = {'male':0,
              'female':1}

for i in range(train_csv.shape[0]): # number of train image folders is 2700
    row = train_csv.loc[i]
    seven_paths = glob.glob(train_dir + '/images/' + row['path'] + '/*.*')
    
    gender = row['gender']
    age = row['age']
    for i, path in enumerate(seven_paths):
        label = 0
        mask = path.split('/')[-1].split('.')[0]
        mask_label = dict_mask[mask]
        gender_label = dict_gender[gender]
        age_label = 0
        if 30 <= age < 58:
            age_label += 1
        elif 58 <= age:
            age_label += 2
            
        label = mask_label * 6 + gender_label * 3 + age_label        
                    
        train_image_paths.append(path)
        train_labels.append(label)
        train_masks.append(mask_label)
        train_genders.append(gender_label)
        train_ages.append(age_label)

## 4. DataLoader 정의

In [12]:
mean=(0.548, 0.504, 0.479)
std=(0.237, 0.247, 0.246)

transform_train = albumentations.Compose([
            #Resize(img_size[0], img_size[1], p=1.0),
            #Resize(200, 260, p=1.0),
            CenterCrop(height = 400, width = 200), # add centercrop 350/350 -> 400/200 -> 300/300
            #HorizontalFlip(p=0.5),
            #ShiftScaleRotate(p=0.5),
            #HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
            RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5),
            #GaussNoise(p=0.5),
            Normalize(mean=mean, std=std, max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.0)

transform_val = albumentations.Compose([
            #Resize(img_size[0], img_size[1]),
            #Resize(200, 260),
            CenterCrop(height = 400, width = 200), # add centercrop
            Normalize(mean=mean, std=std, max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.0)



## 5. Accuracy 계산 함수 정의 및 Train

In [14]:
# https://discuss.pytorch.org/t/is-this-a-correct-implementation-for-focal-loss-in-pytorch/43327/8
class FocalLoss(nn.Module):
    def __init__(self, weight=None,
                 gamma=2., reduction='mean'):
        nn.Module.__init__(self)
        self.weight = weight
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, input_tensor, target_tensor):
        log_prob = F.log_softmax(input_tensor, dim=-1)
        prob = torch.exp(log_prob)
        return F.nll_loss(
            ((1 - prob) ** self.gamma) * log_prob,
            target_tensor,
            weight=self.weight,
            reduction=self.reduction
        )
    
class F1Loss(nn.Module):
    def __init__(self, classes=18, epsilon=1e-7):
        super().__init__()
        self.classes = classes
        self.epsilon = epsilon
    def forward(self, y_pred, y_true):
        assert y_pred.ndim == 2
        assert y_true.ndim == 1
        y_true = F.one_hot(y_true, self.classes).to(torch.float32)
        y_pred = F.softmax(y_pred, dim=1)

        tp = (y_true * y_pred).sum(dim=0).to(torch.float32)
        tn = ((1 - y_true) * (1 - y_pred)).sum(dim=0).to(torch.float32)
        fp = ((1 - y_true) * y_pred).sum(dim=0).to(torch.float32)
        fn = (y_true * (1 - y_pred)).sum(dim=0).to(torch.float32)

        precision = tp / (tp + fp + self.epsilon)
        recall = tp / (tp + fn + self.epsilon)

        f1 = 2 * (precision * recall) / (precision + recall + self.epsilon)
        f1 = f1.clamp(min=self.epsilon, max=1 - self.epsilon)
        return 1 - f1.mean()


In [22]:
dataset = CustomDataset(train_image_paths, train_labels, train_masks, train_genders, train_ages, transform_train, device)

In [23]:
import torch.optim as optim
import torchvision.models as models
from tqdm.auto import tqdm
import time
from adamp import AdamP
import timm
from sklearn.model_selection import KFold , StratifiedKFold
import copy


LEARNING_RATE = 3e-4
WEIGHT_DECAY = 1e-5


def func_acc(outputs, labels):
    cnt_answer = 0
    for i in range(len(labels)):
        label = labels[i]
        _, output = outputs[i].max(dim=0)
        if label == output:
            cnt_answer += 1
        
    return cnt_answer / len(labels)
 
    
    
def func_class_acc(outputs, labels, pre_acc_list):
    # return: [[True, False, ], [], [], ...]
    answer = [[] for i in range(18)]    
    
    is_answer = [[] for i in range(18)]
    for i in range(len(labels)):
        label = labels[i]
        _, output = outputs[i].max(dim=0)
        if label == output:
            is_answer[label].append(True)
        else:
            is_answer[label].append(False)
            
    for i in range(18):
        answer[i] = pre_acc_list[i] + is_answer[i]
                           
    return answer


def func_class_acc_mask(outputs, labels, pre_acc_list):
    # return: [[True, False, ], [], [], ...]
    answer = [[] for i in range(3)]    
    
    is_answer = [[] for i in range(3)]
    for i in range(len(labels)):
        label = labels[i]
        _, output = outputs[i].max(dim=0)
        if label == output:
            is_answer[label].append(True)
        else:
            is_answer[label].append(False)
            
    for i in range(3):
        answer[i] = pre_acc_list[i] + is_answer[i]
                           
    return answer


def func_class_acc_gender(outputs, labels, pre_acc_list):
    # return: [[True, False, ], [], [], ...]
    answer = [[] for i in range(2)]    
    
    is_answer = [[] for i in range(2)]
    for i in range(len(labels)):
        label = labels[i]
        _, output = outputs[i].max(dim=0)
        if label == output:
            is_answer[label].append(True)
        else:
            is_answer[label].append(False)
            
    for i in range(2):
        answer[i] = pre_acc_list[i] + is_answer[i]
                           
    return answer


def func_class_acc_age(outputs, labels, pre_acc_list):
    # return: [[True, False, ], [], [], ...]
    answer = [[] for i in range(3)]    
    
    is_answer = [[] for i in range(3)]
    for i in range(len(labels)):
        label = labels[i]
        _, output = outputs[i].max(dim=0)
        if label == output:
            is_answer[label].append(True)
        else:
            is_answer[label].append(False)
            
    for i in range(3):
        answer[i] = pre_acc_list[i] + is_answer[i]
                           
    return answer


def cal_class_acc(epoch_class_acc):
    output = [0] * 18
    for i in range(18):
        total_cnt = len(epoch_class_acc[i])
        answer_cnt = 0
        for answer in epoch_class_acc[i]:
            if answer:
                answer_cnt += 1
        output[i] = answer_cnt / total_cnt
    return output


def cal_class_acc_mask(epoch_class_acc):
    output = [0] * 3
    for i in range(3):
        total_cnt = len(epoch_class_acc[i])
        answer_cnt = 0
        for answer in epoch_class_acc[i]:
            if answer:
                answer_cnt += 1
        output[i] = answer_cnt / total_cnt
    return output


def cal_class_acc_gender(epoch_class_acc):
    output = [0] * 2
    for i in range(2):
        total_cnt = len(epoch_class_acc[i])
        answer_cnt = 0
        for answer in epoch_class_acc[i]:
            if answer:
                answer_cnt += 1
        output[i] = answer_cnt / total_cnt
    return output


def cal_class_acc_age(epoch_class_acc):
    output = [0] * 3
    for i in range(3):
        total_cnt = len(epoch_class_acc[i])
        answer_cnt = 0
        for answer in epoch_class_acc[i]:
            if answer:
                answer_cnt += 1
        output[i] = answer_cnt / total_cnt
    return output


#outputs_label = func_labels(outputs_mask, outputs_gender, outputs_age)
def func_labels(outputs_mask, outputs_gender, outputs_age, device):
    # outputs_label = [class1, class2, class3, ...]
    outputs_label = torch.Tensor([])
    len_outputs = len(outputs_mask)
    for i in range(len_outputs):
        mask_class = outputs_mask[i] # [0.6, 0.2, 0.1, 0.1]
        _, mask_class = mask_class.max(dim=0)
        
        gender_class = outputs_gender[i] # [0.6, 0.2, 0.1, 0.1]
        _, gender_class = gender_class.max(dim=0)
        
        age_class = outputs_age[i] # [0.6, 0.2, 0.1, 0.1]
        _, age_class = age_class.max(dim=0)
        
        label = mask_class * 6 + gender_class * 3 + age_class
        
        #label: int -> [[1, 0, 0, 0]]
        one_hot = torch.zeros((1,18))
        one_hot[0][label] = 1
        label = one_hot
        outputs_label = torch.cat([outputs_label, label])
    return outputs_label.to(device)
        

class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.model = timm.create_model('tf_efficientnet_b4', pretrained=True)
        self.model.classifier = nn.Linear(1792, 1024)
        self.fc1 = nn.Linear(1024, 3)
        self.fc2 = nn.Linear(1024, 2)
        self.fc3 = nn.Linear(1024, 3)
    
    def forward(self, x):
        fc_output = self.model(x)
        mask = self.fc1(fc_output)
        gender = self.fc2(fc_output)
        age = self.fc3(fc_output)
        
        return mask, gender, age      

    
model = MyModel().to(device)
    
criterion = FocalLoss(gamma = 5)
# criterion = nn.CrossEntropyLoss()
# criterion = F1Loss()

optimizer = AdamP(model.parameters(), lr=LEARNING_RATE, betas=(0.9, 0.999), weight_decay= WEIGHT_DECAY)
# optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)
# optimizer = optim.Adam(model.parameters(), lr=1e-3)

scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[500,1000,1500], gamma=0.5)

min_loss = float('inf')

for epoch in range(10):
    epoch_loss = 0
    epoch_acc = 0
    epoch_val_loss = 0
    epoch_val_acc = 0

    epoch_class_acc = [[] for i in range(18)]
    epoch_class_val_acc = [[] for i in range(18)]
    
    epoch_mask_acc = [[] for i in range(18)]
    epoch_mask_val_acc = [[] for i in range(18)]
    
    epoch_gender_acc = [[] for i in range(18)]
    epoch_gender_val_acc = [[] for i in range(18)]
    
    epoch_age_acc = [[] for i in range(18)]
    epoch_age_val_acc = [[] for i in range(18)]
    
    
    stratified_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=None)
    k_idx = 1
    for train_index, validate_index in stratified_kfold.split(np.zeros(len(train_ages)), train_ages):  
        print(f'## Stratified_K-Fold :: {k_idx}')
        k_idx += 1
        train_dataset = torch.utils.data.dataset.Subset(dataset, train_index)
        valid_dataset = torch.utils.data.dataset.Subset(dataset, validate_index)
        valid_dataset = copy.deepcopy(valid_dataset)
        valid_dataset.dataset.transform = transform_val
        
        train_loader = DataLoader(train_dataset,
                    batch_size=32,
                    shuffle=True,
                    num_workers=0,
                    drop_last=True 
                   )
        
        val_loader = DataLoader(valid_dataset,
                    batch_size = 32,
                    shuffle=True,
                    num_workers=0
                   )

        
        for i, data in tqdm(enumerate(train_loader), desc=f"epoch-{epoch}", total=len(train_loader)):
            inputs, (labels, masks, genders, ages) = data

            optimizer.zero_grad()
            outputs_mask, outputs_gender, outputs_age = model(inputs)
            outputs_label = func_labels(outputs_mask, outputs_gender, outputs_age, device)

            loss_masks = criterion(outputs_mask, masks)
            loss_genders = criterion(outputs_gender, genders)
            loss_ages = criterion(outputs_age, ages)

            loss = loss_masks + loss_genders + loss_ages

            epoch_loss += loss

            acc = func_acc(outputs_label, labels)
            epoch_acc += acc     

            epoch_class_acc = func_class_acc(outputs_label, labels, epoch_class_acc)

            epoch_mask_acc = func_class_acc_mask(outputs_mask, masks, epoch_mask_acc)
            epoch_gender_acc = func_class_acc_gender(outputs_gender, genders, epoch_gender_acc)
            epoch_age_acc = func_class_acc_age(outputs_age, ages, epoch_age_acc)

            loss.backward()
            optimizer.step()
            scheduler.step()

        with torch.no_grad():
            for i, data in enumerate(val_loader):
                val_inputs, (val_labels, val_masks, val_genders, val_ages) = data
                val_outputs_mask, val_outputs_gender, val_outputs_age = model(val_inputs)
                val_outputs_label = func_labels(val_outputs_mask, val_outputs_gender, val_outputs_age, device)

                val_loss_mask = criterion(val_outputs_mask, val_masks)
                val_loss_gender = criterion(val_outputs_gender, val_genders)
                val_loss_age = criterion(val_outputs_age, val_ages)
                val_loss = val_loss_mask + val_loss_gender + val_loss_age

                epoch_val_loss += val_loss

                val_acc = func_acc(val_outputs_label, val_labels)
                epoch_val_acc += val_acc

                epoch_class_val_acc = func_class_acc(val_outputs_label, val_labels, epoch_class_val_acc)
                epoch_mask_val_acc = func_class_acc_mask(val_outputs_mask, val_masks, epoch_mask_val_acc)
                epoch_gender_val_acc = func_class_acc_gender(val_outputs_gender, val_genders, epoch_gender_val_acc)
                epoch_age_val_acc = func_class_acc_age(val_outputs_age, val_ages, epoch_age_val_acc)



    epoch_loss /= len(train_loader) * 5
    epoch_acc /= len(train_loader) * 5
    epoch_class_acc = cal_class_acc(epoch_class_acc)
    
    epoch_mask_acc = cal_class_acc_mask(epoch_mask_acc)
    epoch_gender_acc = cal_class_acc_gender(epoch_gender_acc)
    epoch_age_acc = cal_class_acc_age(epoch_age_acc)

    epoch_val_loss /= len(val_loader) * 5
    epoch_val_acc /= len(val_loader) * 5
    epoch_class_val_acc = cal_class_acc(epoch_class_val_acc)
    epoch_mask_val_acc = cal_class_acc_mask(epoch_mask_val_acc)
    epoch_gender_val_acc = cal_class_acc_gender(epoch_gender_val_acc)
    epoch_age_val_acc = cal_class_acc_age(epoch_age_val_acc)
    
    if min_loss > epoch_loss:
        save_path = f'./save_model/epoch_{epoch+1}_loss_{epoch_loss}.pth'
        torch.save(model.state_dict(), save_path)
        min_loss = epoch_loss
    
    print(f'epoch: {epoch}, epoch_acc: {epoch_acc}, epoch_loss: {epoch_loss}')
    
    print(f'epoch: {epoch}, epoch_val_acc: {epoch_val_acc}, epoch_val_loss: {epoch_val_loss}')    
    print(f'epoch_class_acc:')
    for class_id in range(18):
        print(f'class{class_id}: {epoch_class_acc[class_id]:.3f}(train_label) / {epoch_class_val_acc[class_id]:.3f}(val_label)')
    
    print(f'\nepoch_mask_acc:')
    for class_id in range(3):
        print(f'class{class_id}: {epoch_mask_acc[class_id]:.3f}(train_mask) / {epoch_mask_val_acc[class_id]:.3f}(val_mask)')
    
    print(f'\nepoch_gender_acc:')
    for class_id in range(2):
        print(f'class{class_id}: {epoch_gender_acc[class_id]:.3f}(train_gender) / {epoch_gender_val_acc[class_id]:.3f}(val_gender)')                
    
    print(f'\nepoch_age_acc:')
    for class_id in range(3):
        print(f'class{class_id}: {epoch_age_acc[class_id]:.3f}(train_age) / {epoch_age_val_acc[class_id]:.3f}(val_age)')

## Stratified_K-Fold :: 1


HBox(children=(HTML(value='epoch-0'), FloatProgress(value=0.0, max=472.0), HTML(value='')))


## Stratified_K-Fold :: 2


HBox(children=(HTML(value='epoch-0'), FloatProgress(value=0.0, max=472.0), HTML(value='')))


## Stratified_K-Fold :: 3


HBox(children=(HTML(value='epoch-0'), FloatProgress(value=0.0, max=472.0), HTML(value='')))


## Stratified_K-Fold :: 4


HBox(children=(HTML(value='epoch-0'), FloatProgress(value=0.0, max=472.0), HTML(value='')))


## Stratified_K-Fold :: 5


HBox(children=(HTML(value='epoch-0'), FloatProgress(value=0.0, max=472.0), HTML(value='')))


epoch: 0, epoch_acc: 0.9411943855932203, epoch_loss: 0.00924037117511034
epoch: 0, epoch_val_acc: 0.9625, epoch_loss: 0.005793455988168716
epoch_class_acc:
class0: 0.974(train_label) / 0.991(val_label)
class1: 0.914(train_label) / 0.921(val_label)
class2: 0.901(train_label) / 0.980(val_label)
class3: 0.979(train_label) / 0.992(val_label)
class4: 0.940(train_label) / 0.939(val_label)
class5: 0.884(train_label) / 0.932(val_label)
class6: 0.948(train_label) / 0.978(val_label)
class7: 0.884(train_label) / 0.898(val_label)
class8: 0.850(train_label) / 0.972(val_label)
class9: 0.968(train_label) / 0.993(val_label)
class10: 0.921(train_label) / 0.931(val_label)
class11: 0.861(train_label) / 0.946(val_label)
class12: 0.968(train_label) / 0.987(val_label)
class13: 0.890(train_label) / 0.936(val_label)
class14: 0.873(train_label) / 0.955(val_label)
class15: 0.976(train_label) / 0.995(val_label)
class16: 0.920(train_label) / 0.945(val_label)
class17: 0.842(train_label) / 0.864(val_label)

epoch_

HBox(children=(HTML(value='epoch-1'), FloatProgress(value=0.0, max=472.0), HTML(value='')))




KeyboardInterrupt: 

0: 2745<br/>
1: 2050<br/>
2: 415<br/>
3: 3660<br/>
4: 4085<br/>
5: 545<br/>
6: 549<br/>
7: 410<br/>
8: 83<br/>
9: 732<br/>
10: 817<br/>
11: 109<br/>
12: 549<br/>
13: 410<br/>
14: 83<br/>
15: 732<br/>
16: 817<br/>
17: 109<br/>

총이미지: 2700 * 3 배치사이즈: 32 한 epoch당 253번의 iteration 발생<br/>
총이미지: 2700 * 3 배치사이즈: 64 한 epoch당 126번의 iteration 발생<br/>
총이미지: 2700 * 3 배치사이즈: 128 한 epoch당 63번의 iteration 발생<br/>
총이미지: 2700 * 3 배치사이즈: 256 한 epoch당 31번의 iteration 발생<br/>

총이미지: 18828 배치사이즈:256 한 epoch당 74번의 iteration 발생

## 6. Inference

In [24]:
class TestDataset(Dataset):
    def __init__(self, img_paths, transform, device):
        self.img_paths = img_paths
        self.device = device
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])
        
        if self.transform:
            image = self.transform(image=np.array(image))['image']

        return image.to(self.device)

    def __len__(self):
        return len(self.img_paths)


    
# meta 데이터와 이미지 경로를 불러옵니다.
submission = pd.read_csv(os.path.join(test_dir, 'info.csv'))
image_dir = os.path.join(test_dir, 'images')

# Test Dataset 클래스 객체를 생성하고 DataLoader를 만듭니다.
image_paths = [os.path.join(image_dir, img_id) for img_id in submission.ImageID]
transform = albumentations.Compose([
            #Resize(img_size[0], img_size[1]),
            #Resize(200, 260),
            CenterCrop(height = 400, width = 200), # add centercrop
            Normalize(mean=mean, std=std, max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.0)
dataset = TestDataset(image_paths, transform, device)

loader = DataLoader(
    dataset,
    shuffle=False
)

# 모델을 정의합니다. (학습한 모델이 있다면 torch.load로 모델을 불러주세요!)
device = torch.device('cuda')

model = MyModel().to(device)

path = './save_model/epoch_1_loss_0.00924037117511034.pth'
model.load_state_dict(torch.load(path))
model.eval()

# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = []
for images in loader:
    with torch.no_grad():
        images = images.to(device)
        '''
        val_outputs_mask, val_outputs_gender, val_outputs_age = model(val_inputs)
        val_outputs_label = func_labels(val_outputs_mask, val_outputs_gender, val_outputs_age, device)
        '''
#         pred = model(images)
        pred_outputs_mask, pred_outputs_gender, pred_outputs_age = model(images)
        pred = func_labels(pred_outputs_mask, pred_outputs_gender, pred_outputs_age, device)
    
        pred = pred.argmax(dim=-1)
        all_predictions.extend(pred.cpu().numpy())
submission['ans'] = all_predictions

# 제출할 파일을 저장합니다.
submission.to_csv(os.path.join(test_dir, 'submission.csv'), index=False)
print('test inference is done!')

test inference is done!
