In [12]:
from glob import glob
from sklearn.model_selection import StratifiedKFold
import cv2
import torch
from torch import nn
import os
import random
import torchvision
import pandas as pd
import numpy as np
from tqdm import tqdm
import math
import sys

import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torch.cuda.amp import autocast, GradScaler
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F
from torch.optim.lr_scheduler import _LRScheduler

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

import timm

import sklearn
import warnings
import joblib
from sklearn import metrics

import albumentations as A
from albumentations.pytorch import ToTensorV2

# from adamp import AdamP

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
#     torch.cuda.manualseedall(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

class CFG:
    img_size = 384
    num_workers=4
    scheduler='CosineAnnealingWarmRestarts' # 'CosineAnnealingWarmRestarts', None, 
    epoch=30 # Change epochs
    model = 'tf_efficientnet_b3_ns' # [ecaresnet50t seresnext50_32x4d] #[tf_efficientnet_b3_ns tf_efficientnet_b4_ns] #vit_deit_base_patch16_384 # eca_nfnet_l0
    base = 'efficient' # resnet efficient ViT NfNet
    pretrained = True
    warmup = False
    lr=1e-4
    min_lr=1e-7
    batch_size=32
    weight_decay=1e-6
    accum_iter = 1
    fold_num = 5
    seed=2021
    optimizer = 'Adam'
    mix_prob = 1.
    T_0=10
    T_max=10
    mean= [0.56019358, 0.52410121, 0.501457]
    std= [0.23318603, 0.24300033, 0.24567522]
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
train = pd.read_csv("/opt/ml/input/data/train/train.csv")#new_mytrain.csv로 되어있었음
old = train[train['age']==2]

# scheduler

In [4]:
class CustomCosineAnnealingWarmUpRestarts(_LRScheduler):
    def __init__(self, optimizer, T_0, T_mult=1, eta_max=0.1, T_up=0, gamma=1., last_epoch=-1):
        if T_0 <= 0 or not isinstance(T_0, int):
            raise ValueError("Expected positive integer T_0, but got {}".format(T_0))
        if T_mult < 1 or not isinstance(T_mult, int):
            raise ValueError("Expected integer T_mult >= 1, but got {}".format(T_mult))
        if T_up < 0 or not isinstance(T_up, int):
            raise ValueError("Expected positive integer T_up, but got {}".format(T_up))
        self.T_0 = T_0
        self.T_mult = T_mult
        self.base_eta_max = eta_max
        self.eta_max = eta_max
        self.T_up = T_up
        self.T_i = T_0
        self.gamma = gamma
        self.cycle = 0
        self.T_cur = last_epoch
        super(CustomCosineAnnealingWarmUpRestarts, self).__init__(optimizer, last_epoch)
        
    
    def get_lr(self):
        if self.T_cur == -1:
            return self.base_lrs
        elif self.T_cur < self.T_up:
            return [(self.eta_max - base_lr)*self.T_cur / self.T_up + base_lr for base_lr in self.base_lrs]
        else:
            return [base_lr + (self.eta_max - base_lr) * (1 + math.cos(math.pi * (self.T_cur-self.T_up) / (self.T_i - self.T_up))) / 2
                    for base_lr in self.base_lrs]

    def step(self, epoch=None):
        if epoch is None:
            epoch = self.last_epoch + 1
            self.T_cur = self.T_cur + 1
            if self.T_cur >= self.T_i:
                self.cycle += 1
                self.T_cur = self.T_cur - self.T_i
                self.T_i = (self.T_i - self.T_up) * self.T_mult + self.T_up
        else:
            if epoch >= self.T_0:
                if self.T_mult == 1:
                    self.T_cur = epoch % self.T_0
                    self.cycle = epoch // self.T_0
                else:
                    n = int(math.log((epoch / self.T_0 * (self.T_mult - 1) + 1), self.T_mult))
                    self.cycle = n
                    self.T_cur = epoch - self.T_0 * (self.T_mult ** n - 1) / (self.T_mult - 1)
                    self.T_i = self.T_0 * self.T_mult ** (n)
            else:
                self.T_i = self.T_0
                self.T_cur = epoch
                
        self.eta_max = self.base_eta_max * (self.gamma**self.cycle)
        self.last_epoch = math.floor(epoch)
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr

# loss

In [6]:
class F1Loss(nn.Module):
    def __init__(self, classes=18, epsilon=1e-7):
        super().__init__()
        self.classes = classes
        self.epsilon = epsilon
    def forward(self, y_pred, y_true):
        assert y_pred.ndim == 2
        assert y_true.ndim == 1
        y_true = F.one_hot(y_true, self.classes).to(torch.float32)
        y_pred = F.softmax(y_pred, dim=1)

        tp = (y_true * y_pred).sum(dim=0).to(torch.float32)
        tn = ((1 - y_true) * (1 - y_pred)).sum(dim=0).to(torch.float32)
        fp = ((1 - y_true) * y_pred).sum(dim=0).to(torch.float32)
        fn = (y_true * (1 - y_pred)).sum(dim=0).to(torch.float32)

        precision = tp / (tp + fp + self.epsilon)
        recall = tp / (tp + fn + self.epsilon)

        f1 = 2 * (precision * recall) / (precision + recall + self.epsilon)
        f1 = f1.clamp(min=self.epsilon, max=1 - self.epsilon)
        return 1 - f1.mean()

class CustomLoss(nn.Module):
    def __init__(self, classes=18, epsilon=1e-7):
        super().__init__()
        self.classes = classes
        self.epsilon = epsilon
    
    def forward(self, y_pred, y_true):
        assert y_pred.ndim == 2
        assert y_true.ndim == 1
        
        ce_loss =  nn.functional.cross_entropy(y_pred, y_true)
        
        y_true = F.one_hot(y_true, self.classes).to(torch.float32)
        y_pred = F.softmax(y_pred, dim=1)

        tp = (y_true * y_pred).sum(dim=0).to(torch.float32)
        tn = ((1 - y_true) * (1 - y_pred)).sum(dim=0).to(torch.float32)
        fp = ((1 - y_true) * y_pred).sum(dim=0).to(torch.float32)
        fn = (y_true * (1 - y_pred)).sum(dim=0).to(torch.float32)

        precision = tp / (tp + fp + self.epsilon)
        recall = tp / (tp + fn + self.epsilon)

        f1 = 2 * (precision * recall) / (precision + recall + self.epsilon)
        f1 = f1.clamp(min=self.epsilon, max=1 - self.epsilon)
        
        f1_loss = 1 - f1.mean()
        
        return f1_loss + ce_loss

# Dataset

In [7]:
class MaskDataset(Dataset):
    def __init__(
        self, df, transforms=None, output_label=True
    ):
        super().__init__()
        self.df = df.reset_index(drop=True).copy()
        self.transforms = transforms
        self.output_label = output_label
    
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, index: int):
        
        # get labels
        if self.output_label:
            target = self.df.iloc[index]['label']
          
        path = self.df.iloc[index]['filepath']
        
        img = cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)
        
        if self.transforms:
            img = self.transforms(image=img)['image']
            
        # do label smoothing
        if self.output_label == True:
            return img, target
        else:
            return img 

# Augmentation

In [8]:
def get_train_transforms():
      return A.Compose([
                      A.CenterCrop(CFG.img_size, CFG.img_size, p=1.),
                      A.HueSaturationValue(),
                      A.OneOf([
                                A.OpticalDistortion(p=0.4),
                                A.GridDistortion(p=0.2),
                                A.IAAPiecewiseAffine(p=0.4),
                      ], p=0.5),
                      A.RandomBrightnessContrast(brightness_limit=(-0.3, 0.3), contrast_limit = (-0.1, 0.1), p = 0.5),
                      A.Normalize(mean=CFG.mean, std=CFG.std, max_pixel_value=255.0, p = 1.0),
                      ToTensorV2(p=1.0),
      ], p = 1.)

def get_valid_transforms():
      return A.Compose([
                      A.CenterCrop(CFG.img_size, CFG.img_size, p=1.),
                      A.Normalize(mean=CFG.mean, std=CFG.std, max_pixel_value=255.0, p=1.0),
                      ToTensorV2(p = 1.0),
      ], p = 1.)

def custom_cutmix(data, target, old_data, old_target, alpha):
    indices = torch.randperm(data.size(0))
    shuffled_old_target = old_target[indices]
    
    size = data.size()
    W = size[2]

    new_data = data.clone()

    new_data[:, :, :, :W//2] = old_data[indices, :, :, W//2:]
    # adjust lambda to exactly match pixel ratio
    lam = 0.5
    targets = (target, shuffled_old_target, lam)

    return new_data, targets

# dataloader

In [9]:
def prepare_dataloader(df, fold):
    
    train_ = df[~df.groups.isin(fold)].reset_index(drop=True)
    valid_ = df[df.groups.isin(fold)].reset_index(drop=True)
    
    train_ds = MaskDataset(train_, transforms=get_train_transforms(), output_label=True)
    valid_ds = MaskDataset(valid_, transforms=get_valid_transforms(), output_label=True)
    
    train_loader = torch.utils.data.DataLoader(
        train_ds,
        batch_size=CFG.batch_size,
        pin_memory=True,
        drop_last=False,
        shuffle=True,        
        num_workers=CFG.num_workers,
    )
    val_loader = torch.utils.data.DataLoader(
        valid_ds, 
        batch_size=CFG.batch_size,
        num_workers=CFG.num_workers,
        shuffle=False,
        pin_memory=True,
    )
    return train_loader, val_loader

# model

In [10]:
#efficientnet
class MaskEffNetClassifier(nn.Module):
    def __init__(self, model_arch, n_class, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_arch, pretrained=pretrained)
        n_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(n_features, n_class)
        
    def forward(self, x):
        x = self.model(x)
        return x
# Resnet
class MaskResNetClassifier(nn.Module):
    def __init__(self, model_arch, n_class, pretrained = False):
        super().__init__()
        self.model = timm.create_model(model_arch, pretrained = pretrained)
        n_features = self.model.fc.in_features
        self.model.fc = nn.Linear(n_features, n_class)

    def forward(self, x):
        x = self.model(x)
        return x
# NFNet    
class MaskNFNClassifier(nn.Module):
    def __init__(self, model_arch, n_classes, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_arch, pretrained=pretrained, num_classes=n_classes)
        n_features = self.model.head.fc.in_features
        self.model.head.fc = nn.Linear(n_features, n_classes)

    def forward(self, x):
        x = self.model(x)
        return x
# VIT
class MaskViTClassifier(nn.Module):
    def __init__(self, model_arch, n_classes, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_arch, pretrained=pretrained)
        n_features = self.model.head.in_features
        self.model.head = nn.Linear(n_features, n_classes)

    def forward(self, x):
        x = self.model(x)
        return x

    

# Train / Validation

In [13]:
def train_one_epoch(epoch, model, loss_fn, optimizer, train_loader, old_train_loader, device, scheduler=None):
    model.train()

    running_loss = None
    old_iter = iter(old_train_loader)
    old_length = len(old_train_loader)
    old_step = 0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), position=0, leave=True)
    for step, (imgs, image_labels) in pbar:
        imgs = imgs.to(device).float()
        image_labels = image_labels.to(device).long()
        mix_decision = np.random.rand()
        if mix_decision < CFG.mix_prob:
            if old_step < old_length-1:
                old_imgs, old_labels = next(old_iter)
                old_imgs = old_imgs.to(device).float()
                old_labels = old_labels.to(device).long()
                old_step += 1
            else:
                old_step = 1
                old_iter = iter(old_train_loader)
                old_imgs, old_labels = next(old_iter)
                old_imgs = old_imgs.to(device).float()
                old_labels = old_labels.to(device).long()
                
            imgs, image_labels = custom_cutmix(imgs, image_labels, old_imgs, old_labels, 1.)

        with autocast():
            image_preds = model(imgs.float())   #output = model(input)
            
            if mix_decision < CFG.mix_prob:
                loss = loss_fn(image_preds, image_labels[0]) * image_labels[2] + loss_fn(image_preds, image_labels[1]) * (1. - image_labels[2])
                loss /= CFG.accum_iter
            else:
                loss = loss_fn(image_preds, image_labels)
                loss /= CFG.accum_iter
            
            scaler.scale(loss).backward()

            if running_loss is None:
                running_loss = loss.item()
            else:
                running_loss = running_loss * .99 + loss.item() * .01

            if ((step + 1) % CFG.accum_iter == 0) or ((step + 1) == len(train_loader)):

                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad() 

                description = f'epoch {epoch} loss: {running_loss:.4f}'
                
                pbar.set_description(description)
                
    if scheduler is not None:
        scheduler.step()

def valid_one_epoch(epoch, model, loss_fn, val_loader, device):
    model.eval()

    loss_sum = 0
    sample_num = 0
    image_preds_all = []
    image_targets_all = []
    
    pbar = tqdm(enumerate(val_loader), total=len(val_loader), position=0, leave=True)
    for step, (imgs, image_labels) in pbar:
        imgs = imgs.to(device).float()
        image_labels = image_labels.to(device).long()
        
        image_preds = model(imgs)   #output = model(input)
        #print(image_preds.shape, exam_pred.shape)
        image_preds_all += [torch.argmax(image_preds, 1).detach().cpu().numpy()]
        image_targets_all += [image_labels.detach().cpu().numpy()]
        
        loss = loss_fn(image_preds, image_labels)
        
        loss_sum += loss.item()*image_labels.shape[0]
        sample_num += image_labels.shape[0]  

        if ((step + 1) % 1 == 0) or ((step + 1) == len(val_loader)):
            description = f'epoch {epoch} loss: {loss_sum/sample_num:.4f}'
            pbar.set_description(description)
    
    image_preds_all = np.concatenate(image_preds_all)
    image_targets_all = np.concatenate(image_targets_all)
    accuracy = (image_preds_all==image_targets_all).mean()
    f1 = f1_score(image_preds_all, image_targets_all, average='macro')
    print('validation multi-class accuracy = {:.4f}, f1 score = {:.4f}'.format(accuracy, f1))
    
            
    return accuracy, f1

def create_folder(directory):
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except:
        pass

seed_everything(CFG.seed)

for fold in range(CFG.fold_num):

    print('Training with {} started'.format(fold))

    train_loader, val_loader = prepare_dataloader(train, [fold])
    old_train_loader, old_val_loader = prepare_dataloader(old, [fold])
    device = CFG.device
    if CFG.base == 'efficient':
        model = MaskEffNetClassifier(CFG.model, train.label.nunique(), pretrained=True).to(device)
    elif CFG.base == 'resnet':
        model = MaskResNetClassifier(CFG.model, train.label.nunique(), pretrained=True).to(device)
    elif CFG.base == 'ViT':
        model = MaskViTClassifier(CFG.model, train.label.nunique(), pretrained=True).to(device)
    elif CFG.base == 'NfNet':
        model = MaskNFNClassifier(CFG.model, train.label.nunique(), pretrained=True).to(device)
    elif CFG.base == 'regnet':
        model = MaskRegNetClassifier(CFG.model, train.label.nunique(), pretrained=True).to(device)
    elif CFG.base == 'resnest':
        model = MaskResNestClassifier(CFG.model, train.label.nunique(), pretrained=True).to(device)
       
    if CFG.optimizer == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay)
    elif CFG.optimizer == 'AdamP':  
        optimizer = AdamP(model.parameters(), lr=CFG.lr)
        
    if CFG.scheduler is not None:
        if CFG.warmup:
            optimizer = torch.optim.Adam(model.parameters(), lr=0)
            scheduler = CustomCosineAnnealingWarmUpRestarts(optimizer, T_0=CFG.epoch, T_mult=1, eta_max=CFG.lr,  T_up=2, gamma=1.)
        else:
            scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=1, eta_min=CFG.min_lr ,last_epoch=-1)     
    
#     loss_fn = nn.CrossEntropyLoss().to(device)
#     loss_fn = F1Loss().to(device)
    loss_fn = CustomLoss().to(device)


    scaler = GradScaler()
    
    best_accuracy = 0
    best_f1 = 0
    best_epoch = 0
    stop_count = 0
    for epoch in range(CFG.epoch):
        train_one_epoch(epoch, model, loss_fn, optimizer, train_loader, old_train_loader, device, scheduler=scheduler)

        with torch.no_grad():
            epoch_accuracy, epoch_f1 = valid_one_epoch(epoch, model, loss_fn, val_loader, device)

        if epoch_f1 > best_f1:
            stop_count = 0
            dir = f"/opt/ml/myfolder/models/{CFG.base}/{CFG.model}"
            create_folder(dir)
            torch.save(model.state_dict(), f'{dir}/{CFG.model}_{fold}.pth')

            best_f1 = epoch_f1
            best_epoch = epoch
            print('The model is saved!')
        else:
            stop_count += 1
            if stop_count > 3:
                break   

    del model, optimizer, train_loader, val_loader, scaler, scheduler
    torch.cuda.empty_cache()
    print('Best Accuracy: {} in epoch {}'.format(best_f1, best_epoch))
    print()

Training with 0 started


AttributeError: 'DataFrame' object has no attribute 'groups'