In [1]:
import os
import sys
from glob import glob
import numpy as np
import pandas as pd
import cv2
from PIL import Image
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
from time import time
from enum import Enum

import torch
import torch.utils.data as data

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
data_dir = '/opt/ml/input/data/train'
img_dir = f'{data_dir}/images'
df_path = f'{data_dir}/train.csv'

In [3]:
mean, std = (0.56019265, 0.52410305, 0.50145299), (0.23308824, 0.24294489, 0.2456003)

In [4]:
from albumentations import *
from albumentations.pytorch import ToTensorV2


def get_transforms(need=('train', 'val'), img_size=(512, 384), mean=(0.548, 0.504, 0.479), std=(0.237, 0.247, 0.246)):
    """
    train 혹은 validation의 augmentation 함수를 정의합니다. train은 데이터에 많은 변형을 주어야하지만, validation에는 최소한의 전처리만 주어져야합니다.
    
    Args:
        need: 'train', 혹은 'val' 혹은 둘 다에 대한 augmentation 함수를 얻을 건지에 대한 옵션입니다.
        img_size: Augmentation 이후 얻을 이미지 사이즈입니다.
        mean: 이미지를 Normalize할 때 사용될 RGB 평균값입니다.
        std: 이미지를 Normalize할 때 사용될 RGB 표준편차입니다.

    Returns:
        transformations: Augmentation 함수들이 저장된 dictionary 입니다. transformations['train']은 train 데이터에 대한 augmentation 함수가 있습니다.
    """
    """

    추가 항목
    Transpose(p=0.5),
    #HorizontalFlip(p=0.5),
    #VerticalFlip(p=0.5),
    #ShiftScaleRotate(p=0.5),
    ColorJitter(p=0.5),
    RandomCrop(350, 350)
    
    """
    transformations = {}
    if 'train' in need:
        transformations['train'] = Compose([
            Resize(img_size[0], img_size[1], p=1.0),
            Transpose(p=0.5),
            HorizontalFlip(p=0.5),
            # VerticalFlip(p=0.5),
            ShiftScaleRotate(p=0.5),
            ColorJitter(p=0.5),     
            CenterCrop(width =350, height = 350, always_apply = True),
            SafeRotate(limit = 45, p = 0.5),
            HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
            RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5),
            GaussNoise(p=0.5),
            Normalize(mean=mean, std=std, max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.0)
    if 'val' in need:
        transformations['val'] = Compose([
            Resize(img_size[0], img_size[1]),
            HorizontalFlip(p=0.5),
            # VerticalFlip(p=0.5),
            CenterCrop(width =350, height = 350,always_apply = True),
            Normalize(mean=mean, std=std, max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.0)
    return transformations

In [5]:
### 마스크 여부, 성별, 나이를 mapping할 클래스를 생성합니다.

class MaskLabels(int, Enum):
    MASK = 0
    INCORRECT = 1
    NORMAL = 2


class GenderLabels(int, Enum):
    MALE = 0
    FEMALE = 1

    @classmethod
    def from_str(cls, value: str) -> int:
        value = value.lower()
        if value == "male":
            return cls.MALE
        elif value == "female":
            return cls.FEMALE
        else:
            raise ValueError(f"Gender value should be either 'male' or 'female', {value}")


class AgeLabels(int, Enum):
    YOUNG = 0
    MIDDLE = 1
    OLD = 2

    @classmethod
    def from_number(cls, value: str) -> int:
        try:
            value = int(value)
        except Exception:
            raise ValueError(f"Age value should be numeric, {value}")

        if value < 30:
            return cls.YOUNG
        elif value < 60:
            return cls.MIDDLE
        else:
            return cls.OLD

In [6]:
class MaskBaseDataset(data.Dataset):
    num_classes = 3 + 2 + 3

    _file_names = {
        "mask1": MaskLabels.MASK,
        "mask2": MaskLabels.MASK,
        "mask3": MaskLabels.MASK,
        "mask4": MaskLabels.MASK,
        "mask5": MaskLabels.MASK,
        "incorrect_mask": MaskLabels.INCORRECT,
        "normal": MaskLabels.NORMAL
    }

    image_paths = []
    mask_labels = []
    gender_labels = []
    age_labels = []

    def __init__(self, img_dir, mean, std, transform=None):
        """
        MaskBaseDataset을 initialize 합니다.

        Args:
            img_dir: 학습 이미지 폴더의 root directory 입니다.
            transform: Augmentation을 하는 함수입니다.
        """
        self.img_dir = img_dir
        self.mean = mean
        self.std = std
        self.transform = transform

        self.setup()

    def set_transform(self, transform):
        """
        transform 함수를 설정하는 함수입니다.
        """
        self.transform = transform
        
    def setup(self):
        """
        image의 경로와 각 이미지들의 label을 계산하여 저장해두는 함수입니다.
        """
        profiles = os.listdir(self.img_dir)
        for profile in profiles:
            if profile.startswith("."):  # "." 로 시작하는 파일은 무시합니다
                continue

            img_folder = os.path.join(self.img_dir, profile)
            for file_name in os.listdir(img_folder):
                _file_name, ext = os.path.splitext(file_name)
                if _file_name not in self._file_names:  # "." 로 시작하는 파일 및 invalid 한 파일들은 무시합니다
                    continue

                img_path = os.path.join(self.img_dir, profile, file_name)  # (resized_data, 000004_male_Asian_54, mask1.jpg)
                mask_label = self._file_names[_file_name]

                id, gender, race, age = profile.split("_")
                gender_label = GenderLabels.from_str(gender)
                age_label = AgeLabels.from_number(age)

                self.image_paths.append(img_path)
                self.mask_labels.append(mask_label)
                self.gender_labels.append(gender_label)
                self.age_labels.append(age_label)

    def __getitem__(self, index):
        """
        데이터를 불러오는 함수입니다. 
        데이터셋 class에 데이터 정보가 저장되어 있고, index를 통해 해당 위치에 있는 데이터 정보를 불러옵니다.
        
        Args:
            index: 불러올 데이터의 인덱스값입니다.
        """
        # 이미지를 불러옵니다.
        image_path = self.image_paths[index]
        image = Image.open(image_path)
        
        # 레이블을 불러옵니다.
        mask_label = self.mask_labels[index]
        gender_label = self.gender_labels[index]
        age_label = self.age_labels[index]
        # multi_class_label = mask_label * 6 + gender_label * 3 + age_label
        
        # 이미지를 Augmentation 시킵니다.
        image_transform = self.transform(image=np.array(image))['image']
        return image_transform, (mask_label, gender_label, age_label)

    def __len__(self):
        return len(self.image_paths)

In [7]:
# 정의한 Augmentation 함수와 Dataset 클래스 객체를 생성합니다.
transform = get_transforms(mean=mean, std=std)

train_dataset = MaskBaseDataset(
    img_dir=img_dir,
    mean=mean,
    std=std,
    transform = transform['train']
)

val_dataset = MaskBaseDataset(
    img_dir=img_dir,
    mean=mean,
    std=std,
    transform = transform['val']
)


# train dataset과 validation dataset을 8:2 비율로 나눕니다.

train_indices, val_indices = train_test_split([i for i in range(len(train_dataset))],test_size=0.2,shuffle = False )

# 각 dataset에 augmentation 함수를 설정합니다.

train_dataset = torch.utils.data.Subset(train_dataset, train_indices)
val_dataset = torch.utils.data.Subset(val_dataset, val_indices)

In [8]:
print(len(train_indices))
print(len(val_indices))

30240
7560


In [9]:
# training dataloader은 데이터를 섞어주어야 합니다. (shuffle=True)
batch_size = 32
train_loader = data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    num_workers=4,
    shuffle=True
)

val_loader = data.DataLoader(
    val_dataset,
    batch_size=batch_size,
    num_workers=4,
    shuffle=True
)

In [10]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm

model = timm.create_model('efficientnet_b0', pretrained=True, num_classes=8)

In [11]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

EfficientNet(
  (conv_stem): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNormAct2d(
    32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
    (drop): Identity()
    (act): SiLU(inplace=True)
  )
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn1): BatchNormAct2d(
          32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): SiLU(inplace=True)
        )
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
          (act1): SiLU(inplace=True)
          (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
          (gate): Sigmoid()
        )
        (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNormAct2d(
      

In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR
import time
import copy
from sklearn.metrics import f1_score, accuracy_score

In [13]:
dataset_sizes = {'train': len(train_dataset), 'val': len(val_dataset)}

In [14]:
epochs = 20

In [15]:
import wandb

In [16]:

wandb.init(project = 'loss function')
config = {
    "learning_rate" : 'ReduceLROnPlateau',
    "model" : "pretrained_efficientnet_b0",
    "epochs" : 20,
    "batch_size" : batch_size,
    "loss function" : 'label smoothing 0.1',
    "label" : 'multi-label classification',
    "accuracy" : "f1 score",
    
}

wandb.config.update(config)
wandb.watch(model)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mlcwon9[0m. Use [1m`wandb login --relogin`[0m to force relogin


[]

In [17]:
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# criterion = torch.nn.CrossEntropyLoss()
# scheduler = StepLR(optimizer, step_size=7, gamma=0.1)

# since = time.time()

# best_model_weights = copy.deepcopy(model.state_dict())
# best_acc = 0.0

In [17]:
dataloaders = {'train': train_loader, 'val': val_loader}

In [25]:
class LabelSmoothingLoss(nn.Module):
    def __init__(self, classes, smoothing=0.1, dim=-1):
        super(LabelSmoothingLoss, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.cls = classes
        self.dim = dim

    def forward(self, pred, target):
        pred = pred.log_softmax(dim=self.dim)
        with torch.no_grad():
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing / (self.cls - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))

# criterion_mask =LabelSmoothingLoss(classes = 3).to(device)
# criterion_gender =LabelSmoothingLoss(classes = 2).to(device)
# criterion_age =LabelSmoothingLoss(classes = 3).to(device)
criterion = nn.CrossEntropyLoss().to(device)

In [20]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    
    
    best_val_score = 0
    best_model = None
    
    for epoch in range(1, epochs):
        model.train()
        train_loss = []
        for images, (mask_labels, gender_labels, age_labels) in tqdm(iter(train_loader)):
            images = images.to(device)

            # labels = labels.to(device)
            mask_labels = mask_labels.to(device)
            gender_labels = gender_labels.to(device)
            age_labels = age_labels.to(device)
            
            optimizer.zero_grad()
            
            # print(images.shape)
            output = model(images)
            (mask_outs, gender_outs, age_outs) = torch.split(output, [3,2,3], dim = 1)
            
            # loss = criterion(output, labels)
            mask_loss = criterion(mask_outs, mask_labels)
            gender_loss = criterion(gender_outs, gender_labels)
            age_loss = criterion(age_outs, age_labels)
            loss = mask_loss + gender_loss + age_loss


            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val F1 : [{_val_score:.5f}]')
        wandb.log({"Val_Loss":_val_loss, "Val_Accuracy" : _val_score, "Train_loss" :_train_loss, "mask_loss":mask_loss.item(), "gender_loss":gender_loss.item(), "age_loss" : age_loss.item()  }, step = epoch)
        if scheduler is not None:
            scheduler.step(_val_score)
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
            print('model saved epoch {}'.format(epoch))
            torch.save(best_model.state_dict(), '/opt/ml/test/weights/eff_b0_multi_label_smoothing_0.1_{}_epoch.pt'.format(epoch))
    
    return best_model

In [21]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    mask_preds, mask_trues = [], []
    gender_preds, gender_trues = [], []
    age_preds, age_trues = [], []
    
    with torch.no_grad():
        for images,(mask_labels, gender_labels, age_labels) in tqdm(iter(val_loader)):
            images = images.to(device)
            mask_labels = mask_labels.to(device)
            gender_labels = gender_labels.to(device)
            age_labels = age_labels.to(device)
            
            logit = model(images)
            (mask_logit, gender_logit, age_logit) = torch.split(logit, [3,2,3], dim = 1)
            
            mask_loss = criterion(mask_logit, mask_labels)
            gender_loss = criterion(gender_logit, gender_labels)
            age_loss = criterion(age_logit, age_labels)
            loss = mask_loss + gender_loss + age_loss
            
            val_loss.append(loss.item())
            

            mask_preds += mask_logit.argmax(1).detach().cpu().numpy().tolist()
            gender_preds += gender_logit.argmax(1).detach().cpu().numpy().tolist()
            age_preds += age_logit.argmax(1).detach().cpu().numpy().tolist()

            mask_trues += mask_labels.detach().cpu().numpy().tolist()
            gender_trues += gender_labels.detach().cpu().numpy().tolist()
            age_trues += age_labels.detach().cpu().numpy().tolist()


        _val_loss = np.mean(val_loss)

    _val_mask_score = f1_score(mask_trues, mask_preds, average='macro')
    _val_gender_score = f1_score(gender_trues, gender_preds, average='macro')
    _val_age_score = f1_score(age_trues, age_preds, average='macro')
    _val_score = (_val_mask_score + _val_gender_score + _val_age_score)/3
    # _val_score = accuracy_score(trues, preds)
    return _val_loss, _val_score

In [None]:
# model.load_state_dict(torch.load('PATH'))

optimizer = torch.optim.Adam(params = model.parameters(), lr = 3e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

In [26]:
import os
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from torchvision import transforms
from torchvision.transforms import Resize, ToTensor, Normalize

In [27]:
# 테스트 데이터셋 폴더 경로를 지정해주세요.
test_dir = '/opt/ml/input/data/eval'

In [28]:
class TestDataset(Dataset):
    def __init__(self, img_paths, transform):
        self.img_paths = img_paths
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])

        if self.transform:
            image = self.transform(image)
        return image

    def __len__(self):
        return len(self.img_paths)

In [32]:
# meta 데이터와 이미지 경로를 불러옵니다.
submission = pd.read_csv(os.path.join(test_dir, 'info.csv'))
image_dir = os.path.join(test_dir, 'images')

# Test Dataset 클래스 객체를 생성하고 DataLoader를 만듭니다.
image_paths = [os.path.join(image_dir, img_id) for img_id in submission.ImageID]
transform = transforms.Compose([
    Resize((512, 384), Image.BILINEAR),
    ToTensor(),
    Normalize(mean=mean, std=std),
])
dataset = TestDataset(image_paths, transform)

loader = DataLoader(
    dataset,
    shuffle=False
)

# 모델을 정의합니다. (학습한 모델이 있다면 torch.load로 모델을 불러주세요!)
model = timm.create_model('efficientnet_b0', pretrained=True, num_classes=8).to(device)
model.load_state_dict(torch.load('/opt/ml/test/weights/eff_b0_multi_label.pt'))
model.eval()

# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = []
for images in tqdm(loader):
    with torch.no_grad():
        images = images.to(device)
        pred = model(images)
        (pred_m, pred_g, pred_a) = torch.split(pred, [3,2,3], dim = 1)
        pred_m = pred_m.argmax(dim = -1)
        pred_g = pred_g.argmax(dim = -1)
        pred_a = pred_a.argmax(dim = -1)
        pred = pred_m * 6 + pred_g * 3 + pred_a
        all_predictions.extend(pred.cpu().numpy())
submission['ans'] = all_predictions

# 제출할 파일을 저장합니다.
submission.to_csv(os.path.join(test_dir, 'submission_0416_multilabel_smoothing.csv'), index=False)
print('test inference is done!')

  0%|          | 0/12600 [00:00<?, ?it/s]

test inference is done!
