In [2]:
import pandas as pd
import numpy as np
import random
import cv2
import PIL
from PIL import Image
import torchvision
import os
import time
import glob
import pickle
import random
from pathlib import Path
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
from torchvision.transforms import Resize, ToTensor, Normalize
from torch.utils.data import DataLoader, Dataset, Subset, random_split, TensorDataset, SubsetRandomSampler
from efficientnet_pytorch import EfficientNet
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score
from adamp import AdamP
from torch.optim import Adam
from torch.optim.lr_scheduler import StepLR
from torch.utils.tensorboard import SummaryWriter

In [12]:
# seed
'''
동일한 조건으로 학습을 할 때, 동일한 결과를 얻기 위해 seed를 고정시킵니다.
'''
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True
print(f'seed : {seed}')

# device setting
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'device : {device}')
print(torch.cuda.get_device_properties(device))

seed : 42
device : cuda:0
_CudaDeviceProperties(name='Tesla V100-PCIE-32GB', major=7, minor=0, total_memory=32510MB, multi_processor_count=80)


In [13]:
# hyper-parameter
batch_size = 16
num_workers = 4
num_classes = 3

num_epochs = 1  # 학습할 epoch의 수
lr = 0.0001
lr_decay_step = 10
criterion_name = 'cross_entropy' # loss의 이름

train_log_interval = 20  # logging할 iteration의 주기
name = "02_model_results"  # 결과를 저장하는 폴더의 이름

In [14]:
# MaskDataset 만들기
class MaskDataset(Dataset):
    def __init__(self,img_path,label_path,transform=True):
        self.image = self.load_image(img_path)

        self.transform = transform
        self.label_path = label_path
        #self.age, self.gender, self.mask = self.load_label(label_path)
        self.label = self.load_label(label_path)

    def __getitem__(self,idx):
        #image, age, gender, mask = Image.open(self.image[idx]), self.age[idx], self.gender[idx], self.mask[idx]
        image, label= Image.open(self.image[idx]), self.label[idx]
        
        if self.transform:
            image = self.transform(image)
        #return image, age, gender, mask
        return image, label
    def __len__(self):
            return len(self.label)

    def load_image(self,paths):
        img_lst = []
        for dic in os.listdir(paths):
            if '._' in dic or 'ipynb_checkpoints' in dic:
                continue
            dir_path = paths + '/'+ dic
            for image in os.listdir(dir_path):
                if '._' in image or 'ipynb_checkpoints' in image:
                    continue
                image_path = dir_path + '/' + image 
                img_lst.append(image_path)
        return img_lst
    
    def load_label(self, paths):
        df = pd.read_csv(os.path.join(paths, "train_with_labels.csv"))
        #return df['age'], df['gender'], df['mask']
        return df['age']


In [15]:
# 데이터로더 만들기
def getDataloader(dataset, train_idx, valid_idx, batch_size, num_workers):
    # 인자로 전달받은 dataset에서 train_idx에 해당하는 Subset 추출
    train_set = torch.utils.data.Subset(dataset,
                                        indices=train_idx)
    # 인자로 전달받은 dataset에서 valid_idx에 해당하는 Subset 추출
    val_set = torch.utils.data.Subset(dataset,
                                      indices=valid_idx)
    
    # 추출된 Train Subset으로 DataLoader 생성
    train_loader = torch.utils.data.DataLoader(
        train_set,
        batch_size=batch_size,
        num_workers=num_workers,
        drop_last=True,
        shuffle=True
    )
    # 추출된 Valid Subset으로 DataLoader 생성
    val_loader = torch.utils.data.DataLoader(
        val_set,
        batch_size=batch_size,
        num_workers=num_workers,
        drop_last=True,
        shuffle=False
    )
    
    # 생성한 DataLoader 반환
    return train_loader, val_loader

In [19]:
img_path = '/opt/ml/input/data/train/images'
label_path = '/opt/ml/input/data/train'
transform = T.Compose([
    T.Resize((224,224)),
    T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize(mean=(0.5, 0.5, 0.5), std=(0.2, 0.2, 0.2)),
]) 
DATA = MaskDataset(img_path,label_path,transform)

In [20]:
resnet18 = torchvision.models.resnet18(pretrained=True)
print("네트워크 필요 입력 채널 개수", resnet18.conv1.weight.shape[1])
print("네트워크 출력 채널 개수 (예측 class type 개수)", resnet18.fc.weight.shape[0])

네트워크 필요 입력 채널 개수 3
네트워크 출력 채널 개수 (예측 class type 개수) 1000


In [22]:
import math
target_model = resnet18

os.makedirs(os.path.join(os.getcwd(), 'results', name), exist_ok=True)

# 5-fold Stratified KFold 5개의 fold를 형성하고 5번 Cross Validation을 진행합니다.
n_splits = 5
skf = StratifiedKFold(n_splits=n_splits)

counter = 0
patience = 10
accumulation_steps = 2
best_val_acc = 0
best_val_loss = np.inf

In [24]:
# 학습 진행
for i, (train_idx, valid_idx) in enumerate(skf.split(DATA.image, DATA.label)):
    
    # 생성한 Train, Valid Index를 getDataloader 함수에 전달해 train/valid DataLoader를 생성합니다.
    # 생성한 train, valid DataLoader로 이전과 같이 모델 학습을 진행합니다. 
    train_loader, val_loader = getDataloader(DATA, train_idx, valid_idx, batch_size, num_workers)

    # -- model
    model = resnet18
    if torch.cuda.is_available():
        model.cuda()

    # -- loss & metric
    criterion = torch.nn.CrossEntropyLoss() # 분류 학습 때 많이 사용되는 Cross entropy loss를 objective function으로 사용 - https://en.wikipedia.org/wiki/Cross_entropy
    optimizer = torch.optim.Adam(target_model.parameters(), lr=lr) # weight 업데이트를 위한 optimizer를 Adam으로 사용함

    scheduler = StepLR(optimizer, lr_decay_step, gamma=0.5)

    # -- logging
    logger = SummaryWriter(log_dir=f"results/cv{i}_{name}")
    for epoch in range(num_epochs):
        # train loop
        model.train()
        loss_value = 0
        matches = 0
        for idx, train_batch in enumerate(train_loader):
            inputs, labels = train_batch
            inputs = inputs.to(device)
            labels = labels.to(device)

            outs = model(inputs)
            preds = torch.argmax(outs, dim=-1)
            loss = criterion(outs, labels)

            loss.backward()
            
             # -- Gradient Accumulation
            if (idx+1) % accumulation_steps == 0:
                optimizer.step()
                optimizer.zero_grad()

            loss_value += loss.item()
            matches += (preds == labels).sum().item()
            if (idx + 1) % train_log_interval == 0:
                train_loss = loss_value / train_log_interval
                train_acc = matches / batch_size / train_log_interval
                current_lr = scheduler.get_last_lr()
                print(
                    f"Epoch[{epoch}/{num_epochs}]({idx + 1}/{len(train_loader)}) || "
                    f"training loss {train_loss:4.4} || training accuracy {train_acc:4.2%} || lr {current_lr}"
                )

                loss_value = 0
                matches = 0

        scheduler.step()

        # val loop
        with torch.no_grad():
            print("Calculating validation results...")
            model.eval()
            val_loss_items = []
            val_acc_items = []
            for val_batch in val_loader:
                inputs, labels = val_batch
                inputs = inputs.to(device)
                labels = labels.to(device)

                outs = model(inputs)
                preds = torch.argmax(outs, dim=-1)

                loss_item = criterion(outs, labels).item()
                acc_item = (labels == preds).sum().item()
                val_loss_items.append(loss_item)
                val_acc_items.append(acc_item)

            val_loss = np.sum(val_loss_items) / len(val_loader)
            val_acc = np.sum(val_acc_items) / len(valid_idx)

            # Callback1: validation accuracy가 향상될수록 모델을 저장합니다.
            if val_loss < best_val_loss:
                best_val_loss = val_loss
            if val_acc > best_val_acc:
                print("New best model for val accuracy! saving the model..")
                torch.save(model.state_dict(), f"results/{name}/{epoch:03}_accuracy_{val_acc:4.2%}.ckpt")
                best_val_acc = val_acc
                counter = 0
            else:
                counter += 1
            # Callback2: patience 횟수 동안 성능 향상이 없을 경우 학습을 종료시킵니다.
            if counter > patience:
                print("Early Stopping...")
                break


            print(
                f"[Val] acc : {val_acc:4.2%}, loss: {val_loss:4.2} || "
                f"best acc : {best_val_acc:4.2%}, best loss: {best_val_loss:4.2}"
            )

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Epoch[0/1](20/945) || training loss 8.786 || training accuracy 0.00% || lr [0.0001]
Epoch[0/1](40/945) || training loss 7.614 || training accuracy 1.56% || lr [0.0001]
Epoch[0/1](60/945) || training loss 6.694 || training accuracy 2.81% || lr [0.0001]
Epoch[0/1](80/945) || training loss 5.831 || training accuracy 6.56% || lr [0.0001]
Epoch[0/1](100/945) || training loss 4.841 || training accuracy 10.62% || lr [0.0001]
Epoch[0/1](120/945) || training loss 4.235 || training accuracy 12.50% || lr [0.0001]
Epoch[0/1](140/945) || training loss 3.873 || training accuracy 13.75% || lr [0.0001]
Epoch[0/1](160/945) || training loss 3.645 || training accuracy 10.00% || lr [0.0001]
Epoch[0/1](180/945) || training loss 3.342 || training accuracy 12.81% || lr [0.0001]
Epoch[0/1](200/945) || training loss 3.359 || training accuracy 13.44% || lr [0.0001]
Epoch[0/1](220/945) || training loss 3.158 || training accuracy 20.00% || lr [0.0001]
Epoch[0/1](240/945) || training loss 3.314 || training accurac