In [2]:
import os
from glob import glob
import random

import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold

import torch
from torch.utils.data import Dataset, DataLoader
from albumentations import *
from albumentations.pytorch import ToTensorV2

import torch.nn as nn
import timm
from torchvision.models import resnext50_32x4d
from efficientnet_pytorch import EfficientNet

In [3]:
# 실험의 Randomness를 제거하여 실험이 같은 조건일 때 동일한 결과를 얻게 해줍니다.
def seed_everything(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed) # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)
    
    print(f'이 실험은 seed {seed}로 고정되었습니다.')

In [4]:
class conf:
    seed = 2021
    data_dir = '/opt/ml/input/data/train'
    model_dir = '/opt/ml/pstage_01_image_classification/model'
    n_fold = 5

seed_everything(conf.seed)

이 실험은 seed 2021로 고정되었습니다.


# 1. Dataset 전처리 함수 정의

In [5]:
def get_mask_label(image_name):
    """
    이미지 파일 이름을 통해 mask label을 구합니다.

    :param image_name: 학습 이미지 파일 이름
    :return: mask label
    """
    if 'incorrect_mask' in image_name:
        return 1
    elif 'normal' in image_name:
        return 2
    elif 'mask' in image_name:
        return 0
    else:
        raise ValueError(f'No class for {image_name}')


def get_gender_label(gender):
    """
    gender label을 구하는 함수입니다.
    :param gender: `male` or `female`
    :return: gender label
    """
    return 0 if gender == 'male' else 1


def get_age_label(age):
    """
    age label을 구하는 함수입니다.
    :param age: 나이를 나타내는 int.
    :return: age label
    """
    return 0 if int(age) < 30 else 1 if int(age) < 60 else 2

In [6]:
def convert_gender_age(gender, age):
    """
    gender와 age label을 조합하여 고유한 레이블을 만듭니다.
    이를 구하는 이유는 train/val의 성별 및 연령 분포를 맞추기 위함입니다. (by Stratified K-Fold)
    :param gender: `male` or `female`
    :param age: 나이를 나타내는 int.
    :return: gender & age label을 조합한 레이블
    """
    gender_label = get_gender_label(gender)
    age_label = get_age_label(age)
    return gender_label * 3 + age_label


def convert_label(image_path, sep=False):
    """
    이미지의 label을 구하는 함수입니다.
    :param image_path: 이미지 경로를 나타내는 str
    :param sep: 마스크, 성별, 연령 label을 따로 반환할건지 합쳐서 할지 나타내는 bool 인수입니다. 참일 경우 따로 반환합니다.
    :return: 이미지의 label (int or list)
    """
    image_name = image_path.split('/')[-1]
    mask_label = get_mask_label(image_name)

    profile = image_path.split('/')[-2]
    image_id, gender, race, age = profile.split("_")
    gender_label = get_gender_label(gender)
    age_label = get_age_label(age)
    if sep:
        return mask_label, gender_label, age_label
    else:
        return mask_label * 6 + gender_label * 3 + age_label

In [7]:
IMG_EXTENSIONS = [
    ".jpg", ".JPG", ".jpeg", ".JPEG", ".png",
    ".PNG", ".ppm", ".PPM", ".bmp", ".BMP",
]


def is_image_file(filepath):
    """
    해당 파일이 이미지 파일인지 확인합니다.
    """
    return any(filepath.endswith(extension) for extension in IMG_EXTENSIONS)


def remove_hidden_file(filepath):
    """
    `._`로 시작하는 숨김 파일일 경우 False를 반환합니다.
    """
    filename = filepath.split('/')[-1]
    return False if filename.startswith('._') else True

# 2. 데이터셋 함수 정의

In [8]:
mean, std = (0.485, 0.456, 0.406), (0.229, 0.224, 0.225)


def get_transforms(need=('train', 'val'), img_size=(512, 384)):
    """
    Augmentation 함수를 반홥합니다.
    """
    transformations = {}
    if 'train' in need:
        transformations['train'] = Compose([
            CenterCrop(448, 336, p=1.0),
            RandomResizedCrop(img_size[0], img_size[1], p=1.0),
            HorizontalFlip(p=0.5),
            ShiftScaleRotate(p=0.3),
            HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.3),
            RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.3),
            Normalize(mean=mean, std=std, max_pixel_value=255.0, p=1.0),
            CoarseDropout(p=0.3),
            Cutout(p=0.3),
            ToTensorV2(p=1.0),
        ], p=1.0)
    if 'val' in need:
        transformations['val'] = Compose([
            CenterCrop(448, 336, p=1.0),
            Resize(img_size[0], img_size[1], p=1.0),
            Normalize(mean=mean, std=std, max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.0)
    return transformations

In [26]:
def get_img(path):
    """
    이미지를 불러옵니다.
    """
    im_bgr = cv2.imread(path)
    im_rgb = im_bgr[:, :, ::-1]
    return im_rgb


class MaskDataset(Dataset):
    def __init__(self, image_dir, info, transform=None):
        self.image_dir = image_dir
        self.info = info
        self.transform = transform

        self.image_paths = [path for name in info.path.values for path in glob.glob(os.path.join(image_dir, name, '*'))]
        self.image_paths = list(filter(is_image_file, self.image_paths))
        self.image_paths = list(filter(remove_hidden_file, self.image_paths))

        self.labels = [convert_label(path, sep=False) for path in self.image_paths]

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        label = self.labels[idx]
        image = get_img(image_path)

        if self.transform:
            image = self.transform(image=image)['image']
        label = torch.eye(18)[label]
        return image, label

    def __len__(self):
        return len(self.image_paths)
    

# 3. 학습/검증 데이터셋 나누기

In [27]:
info = pd.read_csv(f'{conf.data_dir}/train.csv')
info.head()

Unnamed: 0,id,gender,race,age,path
0,1,female,Asian,45,000001_female_Asian_45
1,2,female,Asian,52,000002_female_Asian_52
2,4,male,Asian,54,000004_male_Asian_54
3,5,female,Asian,58,000005_female_Asian_58
4,6,female,Asian,59,000006_female_Asian_59


In [28]:
info['gender_age'] = info.apply(lambda x: convert_gender_age(x.gender, x.age), axis=1)

skf = StratifiedKFold(n_splits=conf.n_fold, shuffle=True)
info.loc[:, 'fold'] = 0
for fold_num, (train_index, val_index) in enumerate(skf.split(X=info.index, y=info.gender_age.values)):
    info.loc[info.iloc[val_index].index, 'fold'] = fold_num

# 4. Pytorch 데이터셋을 Fold를 활용하여 정의하기

In [111]:
image_dir = os.path.join(conf.data_dir, 'images')

for fold_idx in range(conf.n_fold):
    train = info[info.fold != fold_idx].reset_index(drop=True)
    val = info[info.fold == fold_idx].reset_index(drop=True)

    transforms = get_transforms()
    train_dataset = MaskDataset(image_dir, train, transforms['train'])
    val_dataset = MaskDataset(image_dir, val, transforms['val'])
    train_loader = DataLoader(train_dataset, batch_size=100, shuffle=True,
                              num_workers=3)
    val_loader = DataLoader(val_dataset, batch_size=100, shuffle=False,
                            num_workers=3)

# 5.Loss

In [30]:
class FocalLoss(nn.Module):
    def __init__(self, weight=None,
                 gamma=2., reduction='mean'):
        nn.Module.__init__(self)
        self.weight = weight
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, input_tensor, target_tensor):
        log_prob = F.log_softmax(input_tensor, dim=-1)
        prob = torch.exp(log_prob)
        return F.nll_loss(
            ((1 - prob) ** self.gamma) * log_prob,
            target_tensor,
            weight=self.weight,
            reduction=self.reduction
        )


In [31]:
class F1Loss(nn.Module):
    def __init__(self, classes=3, epsilon=1e-7):
        super().__init__()
        self.classes = classes
        self.epsilon = epsilon
    def forward(self, y_pred, y_true):
        assert y_pred.ndim == 2
        assert y_true.ndim == 1
        y_true = F.one_hot(y_true, self.classes).to(torch.float32)
        y_pred = F.softmax(y_pred, dim=1)

        tp = (y_true * y_pred).sum(dim=0).to(torch.float32)
        tn = ((1 - y_true) * (1 - y_pred)).sum(dim=0).to(torch.float32)
        fp = ((1 - y_true) * y_pred).sum(dim=0).to(torch.float32)
        fn = (y_true * (1 - y_pred)).sum(dim=0).to(torch.float32)

        precision = tp / (tp + fp + self.epsilon)
        recall = tp / (tp + fn + self.epsilon)

        f1 = 2 * (precision * recall) / (precision + recall + self.epsilon)
        f1 = f1.clamp(min=self.epsilon, max=1 - self.epsilon)
        return 1 - f1.mean()

# 6.model

In [32]:
resNext = timm.create_model('resnext50_32x4d', True)
n_features = resNext.fc.in_features
resNext.fc = nn.Linear(n_features, 18)

In [33]:
efficientNet = EfficientNet.from_pretrained('efficientnet-b0', num_classes = 18)

Loaded pretrained weights for efficientnet-b0


# 7.train

In [34]:
import argparse
import glob
import json
import os
import random
import re
from importlib import import_module
from pathlib import Path

import numpy as np
import matplotlib.pyplot as plt
import torch
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import Subset, DataLoader
from torch.utils.tensorboard import SummaryWriter

from loss import create_criterion

from efficientnet_pytorch import EfficientNet

In [35]:
def increment_path(path, exist_ok=False):
    path = Path(path)
    if (path.exists() and exist_ok) or (not path.exists()):
        return str(path)
    else:
        dirs = glob.glob(f"{path}*")
        matches = [re.search(rf"%s(\d+)" % path.stem, d) for d in dirs]
        i = [int(m.groups()[0]) for m in matches if m]
        n = max(i) + 1 if i else 2
        return f"{path}{n}"

In [109]:
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

In [113]:
from loss import create_criterion
image_dir = os.path.join(conf.data_dir, 'images')
save_dir = increment_path(os.path.join(conf.model_dir, 'mycode'))
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
epochs = 1

model = resNext.to(device)
model = torch.nn.DataParallel(model)


 # -- loss & metric
criterion = create_criterion('focal') 
opt_module = torch.optim.Adam
optimizer = opt_module(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=1e-3,
    weight_decay=5e-4
)
scheduler = StepLR(optimizer, 20, gamma=0.5)


# -- logging
logger = SummaryWriter(log_dir=save_dir)
#with open(os.path.join(save_dir, 'config.json'), 'w', encoding='utf-8') as f:
#    json.dump(var(args), f, ensure_ascii=False, indent=4)


for fold_idx in range(conf.n_fold):
    
    train = info[info.fold != fold_idx].reset_index(drop=True)
    val = info[info.fold == fold_idx].reset_index(drop=True)

    transforms = get_transforms()
    train_dataset = MaskDataset(image_dir, train, transforms['train'])
    val_dataset = MaskDataset(image_dir, val, transforms['val'])
    train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True,
                              num_workers=3)
    val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False,
                            num_workers=3)
    
    best_val_acc = 0
    best_val_loss = np.inf
    for epoch in range(epochs):
        # train loop
        model.train()
        loss_value = 0
        matches = 0
        for idx, train_batch in enumerate(train_loader):
            inputs, labels = train_batch
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            #####################################
            outs = model(inputs).to(device).float()
            preds = torch.argmax(outs, dim=-1)            

            loss = criterion(outs, torch.max(labels,1)[1])
            ##########이거 에러 정말 해결하기 힘드네^^,,,
            loss.backward()
            optimizer.step()

            loss_value += loss.item()
            labels = torch.argmax(labels, dim = -1)
            matches += (preds == labels).sum().item()
            if (idx + 1) % 20 == 0:
                train_loss = loss_value / 20
                train_acc = matches / 64 /20
                current_lr = get_lr(optimizer)
                print(
                    f"Epoch[{epoch}/{epochs}]({idx + 1}/{len(train_loader)}) || "
                    f"training loss {train_loss:4.4} || training accuracy {train_acc:4.2%} || lr {current_lr}"
                )
                logger.add_scalar("Train/loss", train_loss, epoch * len(train_loader) + idx)
                logger.add_scalar("Train/accuracy", train_acc, epoch * len(train_loader) + idx)

                loss_value = 0
                matches = 0

        scheduler.step()

        # val loop
        with torch.no_grad():
            print("Calculating validation results...")
            model.eval()
            val_loss_items = []
            val_acc_items = []
            figure = None
            for val_batch in val_loader:
                inputs, labels = val_batch
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                outs = model(inputs)
                
                preds = torch.argmax(outs, dim=-1)

                loss_item = criterion(outs, labels).item()
                acc_item = (labels == preds).sum().item()
                val_loss_items.append(loss_item)
                val_acc_items.append(acc_item)

                if figure is None:
                    inputs_np = torch.clone(inputs).detach().cpu().permute(0, 2, 3, 1).numpy()
                    inputs_np = dataset_module.denormalize_image(inputs_np, dataset.mean, dataset.std)
                    figure = grid_image(inputs_np, labels, preds) #  dataset != "MaskSplitByProfileDataset"

            val_loss = np.sum(val_loss_items) / len(val_loader)
            val_acc = np.sum(val_acc_items) / len(val_set)
            best_val_loss = min(best_val_loss, val_loss)
            if val_acc > best_val_acc:
                print(f"New best model for val accuracy : {val_acc:4.2%}! saving the best model..")
                torch.save(model.module.state_dict(), f"{save_dir}/best.pth")
                best_val_acc = val_acc
            torch.save(model.module.state_dict(), f"{save_dir}/last.pth")
            print(
                f"[Val] acc : {val_acc:4.2%}, loss: {val_loss:4.2} || "
                f"best acc : {best_val_acc:4.2%}, best loss: {best_val_loss:4.2}"
            )
            logger.add_scalar("Val/loss", val_loss, epoch)
            logger.add_scalar("Val/accuracy", val_acc, epoch)
            logger.add_figure("results", figure, epoch)
            print()


Epoch[0/1](20/3780) || training loss 1.819 || training accuracy 1.41% || lr 0.001
Epoch[0/1](40/3780) || training loss 2.049 || training accuracy 1.25% || lr 0.001
Epoch[0/1](60/3780) || training loss 2.005 || training accuracy 1.80% || lr 0.001
Epoch[0/1](80/3780) || training loss 2.122 || training accuracy 1.02% || lr 0.001
Epoch[0/1](100/3780) || training loss 1.906 || training accuracy 1.41% || lr 0.001
Epoch[0/1](120/3780) || training loss 1.809 || training accuracy 1.41% || lr 0.001
Epoch[0/1](140/3780) || training loss 1.822 || training accuracy 1.72% || lr 0.001
Epoch[0/1](160/3780) || training loss 1.851 || training accuracy 1.41% || lr 0.001
Epoch[0/1](180/3780) || training loss 1.793 || training accuracy 1.41% || lr 0.001
Epoch[0/1](200/3780) || training loss 1.814 || training accuracy 1.17% || lr 0.001
Epoch[0/1](220/3780) || training loss 1.636 || training accuracy 1.56% || lr 0.001
Epoch[0/1](240/3780) || training loss 2.123 || training accuracy 1.02% || lr 0.001
Epoch[0/

RuntimeError: Expected object of scalar type Long but got scalar type Float for argument #2 'target' in call to _thnn_nll_loss_forward

# 8.inference

In [None]:
model = resNext
model_path = os.path.join(save_dir, 'best.pth')
model.load_state_dict(torch.load(model_path, map_location=device))

In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model.eval()

img_root = os.path.join(data_dir, 'images')
info_path = os.path.join(data_dir, 'info.csv')
info = pd.read_csv(info_path)

img_paths = [os.path.join(img_root, img_id) for img_id in info.ImageID]
dataset = TestDataset(img_paths,(96, 128))
loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=64,
    num_workers=8,
    shuffle=False,
    pin_memory=use_cuda,
    drop_last=False,
)

print("Calculating inference results..")
preds = []
with torch.no_grad():
    for idx, images in enumerate(loader):
        images = images.to(device)
        pred = model(images)
        pred = pred.argmax(dim=-1)
        preds.extend(pred.cpu().numpy())

info['ans'] = preds
info.to_csv(os.path.join(output_dir, f'output.csv'), index=False)
print(f'Inference Done!')

# 9.evaluation