In [None]:
# 초기에 cv2, seaborn, timm 없었음
#!apt-get -y install libgl1-mesa-glx
#!pip install seaborn

In [86]:
# 대회 데이터 다운로드 후, 'input/data'경로에 압축해제
!wget https://aistages-prod-server-public.s3.amazonaws.com/app/Competitions/000102/data/train.tar.gz && mkdir -p input/data && tar -zxf train.tar.gz -C input/data

# 대회 데이터 경로 내 '._' 파일 삭제
!cd input/data && find . -name '._*' -type f -delete

# timm 라이브러리 설치 
!pip install timm

--2022-02-28 06:37:01--  https://aistages-prod-server-public.s3.amazonaws.com/app/Competitions/000102/data/train.tar.gz
Resolving aistages-prod-server-public.s3.amazonaws.com (aistages-prod-server-public.s3.amazonaws.com)... 52.218.242.147
Connecting to aistages-prod-server-public.s3.amazonaws.com (aistages-prod-server-public.s3.amazonaws.com)|52.218.242.147|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 750259380 (716M) [application/x-tar]
Saving to: ‘train.tar.gz’


2022-02-28 06:37:22 (33.9 MB/s) - ‘train.tar.gz’ saved [750259380/750259380]

Collecting timm
  Downloading timm-0.5.4-py3-none-any.whl (431 kB)
[K     |████████████████████████████████| 431 kB 5.2 MB/s 
Installing collected packages: timm
Successfully installed timm-0.5.4


In [89]:
# 경로를 찾거나 데이터프레임을 다룰때 사용
import os
import pandas as pd

# 시각화 그래프를 그릴때 사용
import seaborn as sns
import matplotlib.pyplot as plt

# 이미지 및 데이터를 다룰때 사용
import cv2
from PIL import Image
from torch.utils.data import Dataset, DataLoader

#모델 생성에 사용
import math
import timm
import torch
from torch import nn
from torch.optim.lr_scheduler import _LRScheduler


# etc
import tqdm
import random

import warnings
import numpy as np
from typing import List

warnings.simplefilter(action="ignore", category=FutureWarning)

train_path = 'input/data/train/'
test_path = 'input/data/eval/'
train_img_path = train_path + 'images/'
test_img_path = test_path + 'images/'

In [90]:
train_df = pd.read_csv(train_path+'train.csv')
test_df = pd.read_csv(test_path+'info.csv')

In [91]:
%%time
# 학습 데이터에 이미지 경로(img_path), 마스크 상태(state), 레이블링(label) 추가
def add_info(df: pd.DataFrame, path: str) -> pd.DataFrame:
    result = []
    for row in df.iloc:
        full_path = path + row.path
        for file in os.listdir(full_path):
            temp = {'path':row.path,
                    'fullpath':full_path + '/' + file,
                    'state':'mask' if file.startswith('mask') else file.split('.')[0]}
            temp['label'] = (['mask','incorrect_mask','normal'].index(temp['state']) * 6) + ((0 if 'female' == 'male' else 1) * 3) + min(2, row.age // 30)
            result.append(temp)
    return pd.merge(df, pd.DataFrame(result))

train_df = add_info(train_df, train_img_path)
train_df

CPU times: user 1.75 s, sys: 173 ms, total: 1.92 s
Wall time: 5.25 s


In [92]:
# 출처: https://pytorch.org/docs/stable/data.html?highlight=dataset#torch.utils.data.Dataset
class ImgDataset(Dataset):
    def __init__(self, df, transforms=None):
        super().__init__()
        self.df = df.reset_index(drop=True).copy()
        self.transforms = transforms
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index: int):
        # RGB 변환
        img = cv2.cvtColor(cv2.imread(self.df.fullpath[index]), cv2.COLOR_BGR2RGB)
        
        if self.transforms:
            img = self.transforms(image=img)['image']
        
        return img, self.df.label[index]


# 출처: https://pytorch.org/docs/stable/data.html?highlight=dataloader#torch.utils.data.DataLoader
def Dataloader(df):
    
    train_ds = ImgDataset(train_)
    valid_ds = ImgDataset(valid_)
    
    train_loader = torch.utils.data.DataLoader(
        train_ds,
        batch_size=CFG['batch_size'],
        num_workers=CFG['num_workers'],
        pin_memory=True,
        shuffle=True,  
    )

    val_loader = torch.utils.data.DataLoader(
        valid_ds, 
        batch_size=CFG['batch_size'],
        num_workers=CFG['num_workers'],
        pin_memory=True,
        shuffle=False,
    )

    return train_loader, val_loader

In [93]:
# Custom CosineAnnealingWarmRestarts (warm up start, max 감소 추가)
# 출처: https://gaussian37.github.io/dl-pytorch-lr_scheduler/#custom-cosineannealingwarmrestarts-1
class CosineAnnealingWarmUpRestarts(_LRScheduler):
    def __init__(self, optimizer, T_0, T_mult=1, eta_max=0.1, T_up=0, gamma=1., last_epoch=-1):
        if T_0 <= 0 or not isinstance(T_0, int):
            raise ValueError("Expected positive integer T_0, but got {}".format(T_0))
        if T_mult < 1 or not isinstance(T_mult, int):
            raise ValueError("Expected integer T_mult >= 1, but got {}".format(T_mult))
        if T_up < 0 or not isinstance(T_up, int):
            raise ValueError("Expected positive integer T_up, but got {}".format(T_up))
        self.T_0 = T_0
        self.T_mult = T_mult
        self.base_eta_max = eta_max
        self.eta_max = eta_max
        self.T_up = T_up
        self.T_i = T_0
        self.gamma = gamma
        self.cycle = 0
        self.T_cur = last_epoch
        super(CosineAnnealingWarmUpRestarts, self).__init__(optimizer, last_epoch)
    
    def get_lr(self):
        if self.T_cur == -1:
            return self.base_lrs
        elif self.T_cur < self.T_up:
            return [(self.eta_max - base_lr)*self.T_cur / self.T_up + base_lr for base_lr in self.base_lrs]
        else:
            return [base_lr + (self.eta_max - base_lr) * (1 + math.cos(math.pi * (self.T_cur-self.T_up) / (self.T_i - self.T_up))) / 2
                    for base_lr in self.base_lrs]

    def step(self, epoch=None):
        if epoch is None:
            epoch = self.last_epoch + 1
            self.T_cur = self.T_cur + 1
            if self.T_cur >= self.T_i:
                self.cycle += 1
                self.T_cur = self.T_cur - self.T_i
                self.T_i = (self.T_i - self.T_up) * self.T_mult + self.T_up
        else:
            if epoch >= self.T_0:
                if self.T_mult == 1:
                    self.T_cur = epoch % self.T_0
                    self.cycle = epoch // self.T_0
                else:
                    n = int(math.log((epoch / self.T_0 * (self.T_mult - 1) + 1), self.T_mult))
                    self.cycle = n
                    self.T_cur = epoch - self.T_0 * (self.T_mult ** n - 1) / (self.T_mult - 1)
                    self.T_i = self.T_0 * self.T_mult ** (n)
            else:
                self.T_i = self.T_0
                self.T_cur = epoch
                
        self.eta_max = self.base_eta_max * (self.gamma**self.cycle)
        self.last_epoch = math.floor(epoch)
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr

In [94]:

class ModelEfficientNet(nn.Module):
    def __init__(self, model_arch, n_class, pretrained=True):
        super().__init__()
        self.model = timm.create_model(model_arch, pretrained=pretrained)
        self.model.classifier = nn.Linear(n_features, n_class)
        n_features = self.model.classifier.in_features
        
    def forward(self, x):
        return self.model(x)
    
# resnet, vit 추가

In [97]:
CFG = {
    'device': torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    'accum_iter': 1,
    'lr': 1e-4,
    'seed':3033,
    'epoch': 30,
    'fold_num':5,
    
    # DataLoader
    'num_workers': 4,
    'batch_size': 32,

    # Optimizer
    'optimizer': 'Adam',

    # Scheduler
    'scheduler': 'CosineAnnealingWarmUpRestarts',

    # Model
    'base' : 'efficient', # efficient

    # efficient 모델 종류 선택
    # 출처: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/efficientnet.py
    'model' : 'tf_efficientnet_b4_ns', # 
    
} 