In [1]:
import pandas as pd
import numpy as np
import cv2
import PIL
import os
from glob import glob
import pickle
import random

from pathlib import Path
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
from torch.utils.data import DataLoader, Dataset, Subset
from efficientnet_pytorch import EfficientNet
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from adamp import AdamP
from tqdm import tqdm, notebook
from PIL import Image

# random seed
seed = 37
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True
print(f'seed : {seed}')

# device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'device : {device}')
print(torch.cuda.get_device_properties(device))

# root
root = os.getcwd()
print(f'root : {root}')

# Training Name
name = ''
if not os.path.isdir(f'data_file/{name}') :
    os.chdir(os.path.join(root, 'data_file'))
    os.mkdir(f'{name}')
    os.chdir(root)

seed : 37
device : cuda:0
_CudaDeviceProperties(name='Tesla V100-PCIE-32GB', major=7, minor=0, total_memory=32510MB, multi_processor_count=80)
root : /opt/ml


In [2]:
path = Path('input/data/train/images')
image_dirs = [str(x) for x in list(path.glob('*')) if '._' not in str(x)]
# 'input/data/train/images/003277_female_Asian_19' 이런 형태 나옴

image_dirs = np.array(image_dirs)

# 나이와 성별 구분이 문제니까 이 두 개를 기준으로 나눠서 stratified_kfold하면 inbalance를 조금 방지할 수 있지 않을까?
# 나이 성별 정보면 이용해서 데이터 나누기
# 나이가 60세 이상 정보가 너무 부족하니까 59로 나눠서 진행해보기
def label_fold(image_dirs):
    stratified_kfold_label1 = []
    for image_dir in image_dirs :
        code = 0
        if 'female' in image_dir : code += 3
        else : code += 0 
        
        age = int(image_dir.split('/')[4][-2:])
        if age >= 59 : code += 2
        elif 30 <= age < 58 : code += 1
        else : code += 0
        stratified_kfold_label1.append(code)
    label1 = np.array(stratified_kfold_label1)
    return stratified_kfold_label1

    

In [3]:
# 처음부터 kfold로 나눠서 할 수 있다. 데이터의 분포가 다른 경우
# 18개의 클래스의 분포가 일정하지 않은데, 그 중에서도 ID로 K-fold해서 나누면 마스크의 분포는 일정한 상태니까 성별과 나이 위주로 나눠서 진행해보기
# 그리고 마스크는 상대적으로 성별과 나이보다 잘 맞추기 때문에 일단 고려 제외
# 이게 validation 정하는 거랑 비슷한거 같다.
def stratifiedkfold(image_dirs, label1):
    stratified_kfold = StratifiedKFold(n_splits=5, random_state=seed, shuffle=True)
    # Stratified K-Fold는 층화된 folds를 반환하는 기존 K-Fold의 변형된 방식. 각 집합에는 전체 집합과 거의 동일하게 클래스의 표본 비율이 포함된다. 불균형 클래스의 경우 사
    # train에 2700개 중에서 4/5가 들어가고 valid에 1/5가 들어간다. 이걸 train할 때 다섯번 반복하면 된다.
    fold_list = []
    for train_data, valid_data in stratified_kfold.split(image_dirs, label1) : # split(x,y) x training data, y target
        fold_list.append({'train':train_data, 'valid':valid_data})
    return fold_list

In [20]:
# 'Mask까지 포함해서, 다시 labeling하기'
def label_func(image_path) :
    code = 0
    if 'normal' in image_path : code += 12
    elif 'incorrect_mask' in image_path : code += 6
    else : code += 0

    if 'female' in image_path : code += 3
    else : code += 0

    age = int(image_path.split('/')[4][-2:])
    if age >= 58 : code += 2
    elif age < 58 : code += 1
    else : code += 0
    
    return code

class MaskDataset(Dataset) :
    # path input/data/train/images/003277_female_Asian_19/mask3.jpg 이런 식으로 들어옴
    def __init__(self, image_paths, transform, augment = None, training = False):
        self.image_paths = image_paths
        self.transform = transform
        self.augment = augment
        self.training = training

    def __len__(self) :
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image = np.array(Image.open(self.image_paths[idx]))
        
        if self.augment: # augmentation 안하는 경우에
            image = self.transform(self.augment(image = image)['image'])
        else:    
            image = self.transform(image)
            
        if self.training : # 트레이닝 하는 경우에
            label = label_func(self.image_paths[idx])
            return {'image' : image, 'label' : label}
            
        else:
            return {'image' : image}


    


In [21]:
t = timm.create_model('tf_efficientnet_b4', pretrained=True, num_classes=18)
t

EfficientNet(
  (conv_stem): Conv2dSame(3, 48, kernel_size=(3, 3), stride=(2, 2), bias=False)
  (bn1): BatchNorm2d(48, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  (act1): SiLU(inplace=True)
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
        (bn1): BatchNorm2d(48, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (act1): SiLU(inplace=True)
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1))
          (act1): SiLU(inplace=True)
          (conv_expand): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1))
          (gate): Sigmoid()
        )
        (conv_pw): Conv2d(48, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (act2): Identity()
      )
     

In [22]:
# model
# from efficientnet_pytorch import EfficientNet
import timm
'''
B0 224 B1 240 B2 260 B3 300 B4 380 B5 456 B6 528 B7 600

'''
class MyModel(nn.Module) :
    def __init__(self) :
        super().__init__()
        self.model_name = EfficientNet.from_pretrained('efficientnet-b4', 
                                                in_channels=3, 
                                                num_classes=18) # weight가져오고 num_classes(두번째 파라미터로 학습시키는 class 수)

    def forward(self, x) :
        x = F.relu(self.model_name(x))
        return x

class myefficientnet_b4(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.pretrain_model = timm.create_model('tf_efficientnet_b4', pretrained=True, num_classes=18)
        self.pretrain_model.conv_head = nn.Sequential(nn.Conv2d(448, 1792, kernel_size=(3, 3), stride=(1, 1), bias=False),
                                                        nn.BatchNorm2d(1792, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
                                                        nn.SiLU(inplace=True),
                                                        nn.Dropout(p=0.3),
                                                        nn.Conv2d(1792, 1792, kernel_size=(1, 1),
                                                        stride=(1, 1),
                                                        bias=False))    
    def forward(self,x):
        return self.pretrain_model(x)




## Define Transform and Augs

In [23]:
# Transform
'''
transforms.ToPILImage() - csv 파일로 데이터셋을 받을 경우, PIL image로 바꿔준다.
transforms.CenterCrop(size) - 가운데 부분을 size 크기로 자른다.
transforms.Grayscale(num_output_channels=1) - grayscale로 변환한다.
transforms.RandomAffine(degrees) - 랜덤으로 affine 변형을 한다.
transforms.RandomCrop(size) -이미지를 랜덤으로 아무데나 잘라 size 크기로 출력한다.
transforms.RandomResizedCrop(size) - 이미지 사이즈를 size로 변경한다
transforms.Resize(size) - 이미지 사이즈를 size로 변경한다
transforms.RandomRotation(degrees) 이미지를 랜덤으로 degrees 각도로 회전한다.
transforms.RandomResizedCrop(size, scale=(0.08, 1.0), ratio=(0.75, 1.3333333333333333)) - 이미지를 랜덤으로 변형한다.
transforms.RandomVerticalFlip(p=0.5) - 이미지를 랜덤으로 수직으로 뒤집는다. p =0이면 뒤집지 않는다.
transforms.RandomHorizontalFlip(p=0.5) - 이미지를 랜덤으로 수평으로 뒤집는다.
transforms.ToTensor() - 이미지 데이터를 tensor로 바꿔준다.
transforms.Normalize(mean, std, inplace=False) - 이미지를 정규화한다.
'''

train_transform = T.Compose([
    T.ToPILImage(),
    T.CenterCrop([300,250]),
    T.RandomHorizontalFlip(0.5),
    T.RandomRotation(10),
    T.ToTensor(),
    T.Normalize(mean=(0.548, 0.504, 0.479), std=(0.237, 0.247, 0.246))
])

valid_transform = T.Compose([
    T.ToPILImage(),
    T.CenterCrop([300,250]),
    T.ToTensor(),
    T.Normalize(mean=(0.548, 0.504, 0.479), std=(0.237, 0.247, 0.246))
])

In [24]:
# Hyper-params
batch_size = 32
lr = 1e-4
epochs = 10

In [25]:
# -- Training
# kfold로 데이터 나누기
label1 = label_fold(image_dirs)
fold_list = stratifiedkfold(image_dirs, label1)
k_fold = [1, 2, 3, 4, 5] # 총 5개

for fold in k_fold :
    print(f'k_fold number {fold}')
    min_loss = 5
    early_stop = 0
    
    # -- dataset 만들기 위한 경로 리스트 만들기
    train_image_paths = []
    for train_dir in image_dirs[fold_list[fold-1]['train']] :
        train_image_paths.extend(glob(train_dir+'/*'))
    
    valid_image_paths = []
    for valid_dir in image_dirs[fold_list[fold-1]['valid']] :
        valid_image_paths.extend(glob(valid_dir+'/*'))
    
    # -- dataset
    train_dataset = MaskDataset(train_image_paths, train_transform, training=True)
    valid_dataset = MaskDataset(valid_image_paths, valid_transform, training=True)
    
    # -- data_loader
    train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=3)
    valid_loader = DataLoader(dataset=valid_dataset, batch_size=batch_size//4, shuffle=True, num_workers=3)
    
    # -- model
    model = myefficientnet_b4()
    model = model.to(device)
    
    # -- loss & metric
    loss_func = torch.nn.CrossEntropyLoss()
    optimizer = AdamP(model.parameters(), lr=lr)

    # -- epoch
    for epoch in range(epochs) :
        
        # -- Train start
        with tqdm(train_loader, total=train_loader.__len__(), unit='batch') as train_depth :
            train_f1_score = []
            train_loss = []
            for sample in train_depth:
                train_depth.set_description(f'Epoch {epoch+1} / {epochs}')
                images = sample['image'].float().to(device)
                labels = sample['label'].long().to(device)

                model.train()
                optimizer.zero_grad()
                pred = model(images)
                loss = loss_func(pred, labels)
                loss.backward()
                optimizer.step()
                
                # print f1 score and loss
                train_f1_score.append(f1_score(labels.cpu().detach().float(), torch.argmax(pred.cpu().detach(), 1), average='macro'))
                train_loss.append(loss.item())

                train_depth.set_postfix(f1=np.mean(train_f1_score), loss=np.mean(train_loss), Train=epoch+1)
        
        # -- Validation start
        with tqdm(valid_loader, total=valid_loader.__len__(), unit='batch') as valid_depth :
            valid_f1_score = []
            valid_loss = []
            for sample in valid_depth :
                valid_depth.set_description(f'Epoch {epoch+1} / {epochs}')
                imgs = sample['image'].float().to(device)
                labels = sample['label'].long().to(device)
                
                model.eval()
                optimizer.zero_grad()
                with torch.no_grad() : 
                    pred = model(imgs)
                    loss = loss_func(pred, labels)

                # postfix로 출력
                valid_f1_score.append(f1_score(labels.cpu().detach().float(), torch.argmax(pred.cpu().detach(), 1), average='macro'))
                valid_loss.append(loss.item())
                valid_depth.set_postfix(f1=np.mean(valid_f1_score), loss=np.mean(valid_loss), Valid=epoch+1)
        
        # Loss가 낮아질 때, 해당 Model을 저장
        if np.mean(valid_loss) < min_loss :
            min_loss = np.mean(valid_loss) # 갱신
            early_stop = 0
            for f in glob(f'data_file/{name}/{fold}fold_*{name}.pt') :
                open(f, 'w').close()
                os.remove(f)
            torch.save(model.state_dict(), f'data_file/{name}/{fold}fold_{epoch+1}epoch_{np.mean(valid_loss):2.4f}_{name}.pt')
        # loss가 다섯 번 이상 좋아지지 않으면 조기종료
        else :
            early_stop += 1
        if early_stop >= 5 :
            print('Early Stop')
            break

k_fold number 1


Epoch 1 / 10:   4%|▍         | 21/473 [00:08<03:05,  2.43batch/s, Train=1, f1=0.371, loss=1.65]


KeyboardInterrupt: 

In [None]:
# Inference

submission = pd.read_csv('input/data/eval/info.csv')
test_image_paths = [os.path.join('input/data/eval/images', image_file) for image_file in submission.ImageID]

# Test Dataset, DataLoader를 만들기
test_dataset = MaskDataset(test_image_paths, valid_transform, training=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

prediction_lst = []
for best_model in glob(f'data_file/{name}/*{name}.pt') :
    model = MyModel()
    model.load_state_dict(torch.load(best_model))
    model.to(device)
    model.eval()
    prediction_list=[]
    
    with tqdm(test_loader, total=test_loader.__len__(), unit='batch') as test_depth :
        for sample in test_depth :
            images = sample['image'].float().to(device)
            pred = model(images)
            pred = pred.cpu().detach().numpy()
            prediction_list.extend(pred)
    
    prediction_lst.append(np.array(prediction_list)[...,np.newaxis])

submission['ans'] = np.argmax(np.mean(np.concatenate(prediction_lst, axis=2), axis=2), axis=1)
submission.to_csv(f'data_file/{name}/{name}.csv', index=False)
print('test inference is done!')

Loaded pretrained weights for efficientnet-b4


100%|██████████| 394/394 [01:27<00:00,  4.49batch/s]


Loaded pretrained weights for efficientnet-b4


100%|██████████| 394/394 [01:26<00:00,  4.57batch/s]


Loaded pretrained weights for efficientnet-b4


100%|██████████| 394/394 [01:26<00:00,  4.54batch/s]


Loaded pretrained weights for efficientnet-b4


100%|██████████| 394/394 [01:27<00:00,  4.49batch/s]


Loaded pretrained weights for efficientnet-b4


100%|██████████| 394/394 [01:27<00:00,  4.48batch/s]


test inference is done!


12600