## Libarary 불러오기 및 경로설정

In [15]:
import os, sys
import random
from importlib import import_module
from glob import glob
import numpy as np
import pandas as pd
import cv2
from PIL import Image
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
from time import time

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, Subset
from torch.optim import SGD, Adam, AdamW
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

from torchvision import transforms
from torchvision.transforms import Resize, ToTensor, Normalize
import torchvision.models as models

import matplotlib.pyplot as plt
from dataset import *

In [16]:
def seed_everything(seed):
    """
    동일한 조건으로 학습을 할 때, 동일한 결과를 얻기 위해 seed를 고정시킵니다.
    
    Args:
        seed: seed 정수값
    """
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)
seed_everything(42)

## Parameter 설정

In [22]:
# -- parameters
img_root = '/opt/ml/input/data/train/images'  # 학습 이미지 폴더의 경로
label_path = '/opt/ml/input/data/train/train.csv'  # 학습 메타파일의 경로

model_name = "resnet50"  # 모델 이름
use_pretrained = True  # pretrained-model의 사용 여부
freeze_backbone = False  # classifier head 이 외 부분을 업데이트되지 않게 할 것인지 여부

##[not yet] 변화 시켜보기
val_split = 0.4  # validation dataset의 비율
batch_size = 64 # 2-> 64
num_workers = 0 # 0-> 4로 change
num_classes = 18

num_epochs = 10  # 학습할 epoch의 수 5->10
lr = 1e-4
lr_decay_step = 10

train_log_interval = 20  # logging할 iteration의 주기
name = "res11_resnet50_base_aug_default_batch64_f1loss"  # 결과를 저장하는 폴더의 이름

# -- settings
use_cuda = torch.cuda.is_available()|
device = torch.device("cuda" if use_cuda else "cpu")

## Loss Function
#### 사용해볼 loss Function
#### --> Cross Entropy Loss, Focal Loss, Label Smoothing, F1 Loss

### Cross Entropy Loss

In [4]:
# -- Cross Entropy Loss
class CrossEntropyLoss(nn.Module):
    def __init__(self, weight=None, reduction='mean'):
        nn.Module.__init__(self)
        self.weight = weight
        self.reduction = reduction

    def forward(self, input_tensor, target_tensor):
        log_prob = F.log_softmax(input_tensor, dim=-1)
        prob = torch.exp(log_prob)
        return F.nll_loss(
            log_prob,
            target_tensor,
            weight=self.weight,
            reduction=self.reduction
        )

## Creterion 정의!!!

In [19]:
#creterion 정의
criterion = CrossEntropyLoss()

## Model(Pretrained) && Optimizer 정의 
#### pytorch 기본 제공 pretrained 모델은 여기 참조 https://pytorch.org/vision/stable/models.html 
### efficient net github 주소 :: https://github.com/lukemelas/EfficientNet-PyTorch

### ResNet-50

In [20]:
#pretrained model 이용
model = models.resnet50(pretrained=True).to(device)
'''
# -- model
model_cls = getattr(import_module("model"), model_name)
print(model_cls)
model = model_cls(
    num_classes=num_classes,
    pretrained=use_pretrained,
    freeze=freeze_backbone
).to(device)
'''

'\n# -- model\nmodel_cls = getattr(import_module("model"), model_name)\nprint(model_cls)\nmodel = model_cls(\n    num_classes=num_classes,\n    pretrained=use_pretrained,\n    freeze=freeze_backbone\n).to(device)\n'

### EfficientNet-b7

In [11]:
from efficientnet_pytorch import EfficientNet
model = EfficientNet.from_pretrained('efficientnet-b7').to(device)

Loaded pretrained weights for efficientnet-b7


### EfficientNet-b4

In [6]:
from efficientnet_pytorch import EfficientNet
model = EfficientNet.from_pretrained('efficientnet-b4').to(device)

Loaded pretrained weights for efficientnet-b4


## Optimizer

In [23]:
# -- Adam optimizer
optimizer = Adam(model.parameters(), lr=lr, weight_decay=5e-4)

In [9]:
# -- SGD optimizer
optimizer = SGD(model.parameters(), lr=lr, weight_decay=5e-4)

In [23]:
#list(model.named_children())

## Scheduler
* Scheduler은 optimizer의 learning rate를 동적으로 변경시키는 기능을 합니다.
* Optimizer과 Scheduler를 적절히 활용하면 모델이 좋은 성능으로 Fitting하는데 도움을 줍니다.

In [24]:
# -- scheduler: StepLR
# 지정된 step마다 learning rate를 감소시킵니다.
scheduler = StepLR(optimizer, lr_decay_step, gamma=0.5)

In [16]:
# -- scheduler: ReduceLROnPlateau
# 성능이 향상되지 않을 때 learning rate를 줄입니다. patience=10은 10회 동안 성능 향상이 없을 경우입니다.
scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=10)

In [17]:
# -- scheduler: CosineAnnealingLR
# CosineAnnealing은 learning rate를 cosine 그래프처럼 변화시킵니다.
scheduler = CosineAnnealingLR(optimizer, T_max=2, eta_min=0.)

## Training process

### dataset 정의

In [25]:
# -- dataset
dataset_module = getattr(import_module("dataset"), 'MaskBaseDataset')
dataset = dataset_module(
    data_dir=img_root,
)
num_classes = dataset.num_classes  # 18

# -- augmentation
transform_module = getattr(import_module("dataset"), 'BaseAugmentation') #Base -> CustomAugmentation -> 취소
transform = transform_module(
    resize=[128, 96],
    mean=dataset.mean,
    std=dataset.std,
)
dataset.set_transform(transform)


# -- data_loader
train_set, val_set = dataset.split_dataset()

train_loader = torch.utils.data.DataLoader(
    train_set,
    batch_size=batch_size,
    num_workers=num_workers,
    shuffle=True
)

val_loader = torch.utils.data.DataLoader(
    val_set,
    batch_size=batch_size,
    num_workers=num_workers,
    shuffle=False
)

### Callback - Checkpoint, Early Stopping

In [26]:
# -- Callback1: Checkpoint - Accuracy가 높아질 때마다 모델을 저장합니다.
# 학습 코드에서 이어집니다.

# -- Callback2: Early Stopping - 성능이 일정 기간동안 향상이 없을 경우 학습을 종료합니다.
patience = 10
counter = 0
# 학습 코드에서 이어집니다.

### Training Method - Gradient Accumulation

In [27]:
# -- Gradient Accumulation
accumulation_steps = 2
# 학습코드에서 이어집니다.

## Training Loop

In [28]:
os.makedirs(os.path.join(os.getcwd(), 'results', name), exist_ok=True)

counter = 0
best_val_acc = 0
best_val_loss = np.inf
for epoch in range(num_epochs):
    # train loop
    model.train()
    loss_value = 0
    matches = 0
    for idx, train_batch in enumerate(train_loader):
        inputs, labels = train_batch
        inputs = inputs.to(device)
        labels = labels.to(device)

        outs = model(inputs)
        preds = torch.argmax(outs, dim=-1)
        loss = criterion(outs, labels)

        loss.backward()
        
        # -- Gradient Accumulation
        if (idx+1) % accumulation_steps == 0:
            optimizer.step()
            optimizer.zero_grad()

        loss_value += loss.item()
        matches += (preds == labels).sum().item()
        if (idx + 1) % train_log_interval == 0:
            train_loss = loss_value / train_log_interval
            train_acc = matches / batch_size / train_log_interval
            current_lr = scheduler.get_last_lr()
            print(
                f"Epoch[{epoch}/{num_epochs}]({idx + 1}/{len(train_loader)}) || "
                f"training loss {train_loss:4.4} || training accuracy {train_acc:4.2%} || lr {current_lr}"
            )

            loss_value = 0
            matches = 0

    scheduler.step()

    # val loop
    with torch.no_grad():
        print("Calculating validation results...")
        model.eval()
        val_loss_items = []
        val_acc_items = []
        for val_batch in val_loader:
            inputs, labels = val_batch
            inputs = inputs.to(device)
            labels = labels.to(device)

            outs = model(inputs)
            preds = torch.argmax(outs, dim=-1)

            loss_item = criterion(outs, labels).item()
            acc_item = (labels == preds).sum().item()
            val_loss_items.append(loss_item)
            val_acc_items.append(acc_item)

        val_loss = np.sum(val_loss_items) / len(val_loader)
        val_acc = np.sum(val_acc_items) / len(val_set)
        
        # Callback1: validation accuracy가 향상될수록 모델을 저장합니다.
        if val_loss < best_val_loss:
            best_val_loss = val_loss
        if val_acc > best_val_acc:
            print("New best model for val accuracy! saving the model..")
            torch.save(model.state_dict(), f"results/{name}/{epoch:03}_accuracy_{val_acc:4.2%}.ckpt")
            best_val_acc = val_acc
            counter = 0
        else:
            counter += 1
        # Callback2: patience 횟수 동안 성능 향상이 없을 경우 학습을 종료시킵니다.
        if counter > patience:
            print("Early Stopping...")
            break
        
        
        print(
            f"[Val] acc : {val_acc:4.2%}, loss: {val_loss:4.2} || "
            f"best acc : {best_val_acc:4.2%}, best loss: {best_val_loss:4.2}"
        )

RuntimeError: Class values must be smaller than num_classes.

In [50]:
print(os.getcwd())

/opt/ml/code


## Test

In [14]:
test_img_root = '/opt/ml/input/data/eval/'  # 학습 이미지 폴더의 경로
# public, private 테스트셋이 존재하니 각각의 예측결과를 저장합니다.

# meta 데이터와 이미지 경로를 불러옵니다.
submission = pd.read_csv(os.path.join(test_img_root, 'info.csv'))
image_dir = os.path.join(test_img_root, 'images')

# Test Dataset 클래스 객체를 생성하고 DataLoader를 만듭니다.
image_paths = [os.path.join(image_dir, img_id) for img_id in submission.ImageID]
resize = (96, 128)
'''
transform = transforms.Compose([
    Resize((96, 128), Image.BILINEAR),
    ToTensor(),
    Normalize(mean=(0.548, 0.504, 0.479), std=(0.237, 0.247, 0.246)),
])
test_dataset = TestDataset(test_img_root, transform)
'''
test_dataset = TestDataset(image_paths, resize)

test_loader = DataLoader(
    test_dataset,
    shuffle=False
)
print(test_loader.dataset)
# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = []
for images in test_loader:
    with torch.no_grad():
        images = images.to(device)
        pred = model(images)
        pred = pred.argmax(dim=-1)
        all_predictions.extend(pred.cpu().numpy())
submission['ans'] = all_predictions

# 제출할 파일을 저장합니다.
submission.to_csv(os.path.join(test_img_root, 'submission.csv'), index=False)
print('test inference is done!')


<dataset.TestDataset object at 0x7fcf5842f850>
test inference is done!
