# k-Fold
* TinyVit-21m-378 
* Augmentation
    - Transpose # 행렬 스왑
    - HorizontalFlip # 좌우 반전
    - VerticalFlip # 상하 반전
    - ShiftScaleRotate # 랜덤하게 옮기고, scale, 회전
    - HueSaturationValue # 빛깔, 색조, 값 변환
    - RandomBrightnessContrast # 명도 대비
    - ChannelShuffle # RGB 채널 간 shuffle
* lr scheduler
    - Custom CosineAnnealingWarmUpRestarts
* Sampler
* Focal loss
* CumMix
* k-fold
* result
    - fold0 : Epoch [0-44], Train Loss : [0.75912] Val Loss : [0.48666] Val F1 Score : [0.8393] 
    - fold1 : Epoch [1-50], Train Loss : [0.75849] Val Loss : [0.43823] Val F1 Score : [0.8414]
    - fold2 : Epoch [2-30], Train Loss : [0.91718] Val Loss : [0.60007] Val F1 Score : [0.8119]
    - fold3 : Epoch [3-44], Train Loss : [0.80010] Val Loss : [0.52640] Val F1 Score : [0.8362]
    - fold4 : Epoch [4-35], Train Loss : [0.85218] Val Loss : [0.54661] Val F1 Score : [0.8248]
    - 41536
    - public score : 0.8412144012
    - private score : 0.8422106045

In [1]:
import gc
import os
import random
from datetime import datetime

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import cv2

from sklearn import preprocessing
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import f1_score

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from tqdm.auto import tqdm

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import timm

import warnings
warnings.filterwarnings(action='ignore') 

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [3]:
CFG = {
    'IMG_SIZE': 260,
    'EPOCHS': 1000,
    'LEARNING_RATE': 8e-6,
    'BATCH_SIZE': 16,
    'K-FOLD': 5,
    'FILENAME': 'kfold-vit',
    'SEED': 6
}

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG['SEED']) # Seed 고정

In [5]:
running_colab = 'google.colab' in str(get_ipython()) if hasattr(__builtins__,'__IPYTHON__') else False
if running_colab:
    from google.colab import drive
    drive.mount('/content/drive')
if running_colab:
    data_path = '/content/drive/MyDrive/Colab Notebooks/ai6th/data/optiver/'
else:
    data_path = '../../data/'

In [6]:
df = pd.read_csv(os.path.join(data_path, 'train.csv'))
df.loc[3896, 'artist'] = 'Titian'
df.loc[3986, 'artist'] = 'Alfred Sisley'

In [7]:
artists = df.groupby('artist')[['id']].count().rename(columns={'id':'count'}).reset_index()

In [8]:
# Label Encoding
le = preprocessing.LabelEncoder()
df['artist'] = le.fit_transform(df['artist'].values)

In [9]:
def get_data(df, infer=False):
    if infer:
        return df['img_path'].apply(lambda p: os.path.join(data_path, p)).values
    return df['img_path'].apply(lambda p: os.path.join(data_path, p)).values, df['artist'].values

In [10]:
from torchvision.transforms import ToTensor


class CustomDataset(Dataset):
    def __init__(self, img_paths, labels, transforms=None):
        self.img_paths = img_paths
        self.labels = labels
        self.transforms = transforms if transforms else ToTensor()

    def __getitem__(self, index):
        img_path = self.img_paths[index]
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = self.transforms(image=image)['image']
        
        if self.labels is not None:
            label = self.labels[index]
            return image, label
        else:
            return image
    
    def __len__(self):
        return len(self.img_paths)

In [11]:
train_transform = A.Compose([
    A.Resize(CFG['IMG_SIZE']*2,CFG['IMG_SIZE']*2),
    A.RandomCrop(CFG['IMG_SIZE'],CFG['IMG_SIZE']),
    A.Transpose(p=0.5), # 행렬 스왑
    A.HorizontalFlip(p=0.5), # 좌우 반전
    A.VerticalFlip(p=0.5), # 상하 반전
    A.ShiftScaleRotate(p=0.5), # 랜덤하게 옮기고, scale, 회전
    A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=20, val_shift_limit=20, p=0.5), # 빛깔, 색조, 값 변환
    A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5), # 명도 대비
    A.ChannelShuffle(), # RGB 채널 간 shuffle
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
    ToTensorV2()
])

validation_transform = A.Compose([
    A.Resize(CFG['IMG_SIZE']*2,CFG['IMG_SIZE']*2),
    A.RandomCrop(CFG['IMG_SIZE'],CFG['IMG_SIZE']),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
    ToTensorV2()
])

test_transform = A.Compose([
    A.Resize(CFG['IMG_SIZE'],CFG['IMG_SIZE']),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
    ToTensorV2()
])

In [12]:
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2 ** 32
    np.random.seed(worker_seed)
    random.seed(worker_seed)
g = torch.Generator()
g.manual_seed(0)

<torch._C.Generator at 0x124b8154770>

In [13]:
class VitModel(nn.Module):
    def __init__(self, num_classes=len(le.classes_)):
        super(VitModel, self).__init__()
        self.backbone = timm.create_model('tiny_vit_21m_384.dist_in22k_ft_in1k', pretrained=True, num_classes=0)
        self.classifier = nn.Sequential(
            nn.LayerNorm(576),
            nn.GELU(),
            nn.Dropout(p=0.4),
            nn.Linear(576, num_classes)
        )
            
    def forward(self, x):
        x = self.backbone(x)
        x = self.classifier(x)
        return x

In [14]:
def clear_mem():
    gc.collect()
    torch.cuda.empty_cache()

In [15]:
class EarlyStopping:
    def __init__(self, patience=10, verbose=False, delta=0):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta

    def __call__(self, score):
        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            print(f'Best F1 score from now: {self.best_score}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.counter = 0
        
        return self.early_stop

In [16]:
def rand_bbox(size, lam):   # size : [B, C, W, H]
    W = size[2] # 이미지의 width
    H = size[3] # 이미지의 height
    cut_rat = np.sqrt(1. - lam)  # 패치 크기의 비율 정하기
    cut_w = np.int32(W * cut_rat)  # 패치의 너비
    cut_h = np.int32(H * cut_rat)  # 패치의 높이

    # uniform
    # 기존 이미지의 크기에서 랜덤하게 값을 가져옵니다.(중간 좌표 추출)
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    # 패치 부분에 대한 좌표값을 추출합니다.
    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

In [17]:
import math
from torch.optim.lr_scheduler import LRScheduler

class CosineAnnealingWarmUpRestarts(LRScheduler):
    def __init__(self, optimizer, T_0, T_mult=1, eta_max=0.1, T_up=0, gamma=1., last_epoch=-1):
        if T_0 <= 0 or not isinstance(T_0, int):
            raise ValueError("Expected positive integer T_0, but got {}".format(T_0))
        if T_mult < 1 or not isinstance(T_mult, int):
            raise ValueError("Expected integer T_mult >= 1, but got {}".format(T_mult))
        if T_up < 0 or not isinstance(T_up, int):
            raise ValueError("Expected positive integer T_up, but got {}".format(T_up))
        self.T_0 = T_0
        self.T_mult = T_mult
        self.base_eta_max = eta_max
        self.eta_max = eta_max
        self.T_up = T_up
        self.T_i = T_0
        self.gamma = gamma
        self.cycle = 0
        self.T_cur = last_epoch
        super(CosineAnnealingWarmUpRestarts, self).__init__(optimizer, last_epoch)
    
    def get_lr(self):
        if self.T_cur == -1:
            return self.base_lrs
        elif self.T_cur < self.T_up:
            return [(self.eta_max - base_lr)*self.T_cur / self.T_up + base_lr for base_lr in self.base_lrs]
        else:
            return [base_lr + (self.eta_max - base_lr) * (1 + math.cos(math.pi * (self.T_cur-self.T_up) / (self.T_i - self.T_up))) / 2
                    for base_lr in self.base_lrs]

    def step(self, epoch=None):
        if epoch is None:
            epoch = self.last_epoch + 1
            self.T_cur = self.T_cur + 1
            if self.T_cur >= self.T_i:
                self.cycle += 1
                self.T_cur = self.T_cur - self.T_i
                self.T_i = (self.T_i - self.T_up) * self.T_mult + self.T_up
        else:
            if epoch >= self.T_0:
                if self.T_mult == 1:
                    self.T_cur = epoch % self.T_0
                    self.cycle = epoch // self.T_0
                else:
                    n = int(math.log((epoch / self.T_0 * (self.T_mult - 1) + 1), self.T_mult))
                    self.cycle = n
                    self.T_cur = epoch - self.T_0 * (self.T_mult ** n - 1) / (self.T_mult - 1)
                    self.T_i = self.T_0 * self.T_mult ** (n)
            else:
                self.T_i = self.T_0
                self.T_cur = epoch
                
        self.eta_max = self.base_eta_max * (self.gamma**self.cycle)
        self.last_epoch = math.floor(epoch)
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr

In [18]:
import torch.nn as nn
import torch.nn.functional as F

class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * ce_loss

        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        else:
            return focal_loss

In [19]:
time_now = datetime.now()
run_id = 0# time_now.strftime("%Y%m%d%H%M%S")
os.makedirs(os.path.join(data_path, f'./runs/{run_id}'), exist_ok=True)
print(f'{run_id=}')

run_id=0


In [20]:
def train(epoch, model, optimizer, criterion, train_loader, device, lr_scheduler=None):
    model.train()
    train_loss = []
    lr_list = []
    cutmix_prob = 0.5
    beta = 1
    for idx, (img, label) in enumerate(train_loader):
        img, label = img.float().to(device), label.long().to(device)
        
        optimizer.zero_grad()

        r = np.random.rand(1)
        if r < cutmix_prob:
            lam = np.random.beta(beta, beta)
            rand_index = torch.randperm(img.size()[0]).cuda()
            target_a = label
            target_b = label[rand_index]
            bbx1, bby1, bbx2, bby2 = rand_bbox(img.size(), lam)
            img[:, :, bbx1:bbx2, bby1:bby2] = img[rand_index, :, bbx1:bbx2, bby1:bby2]

            lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (img.size()[-1] * img.size()[-2]))

            model_pred = model(img)
            loss = criterion(model_pred, target_a) * lam + criterion(model_pred, target_b) * (1. - lam)
        else:
            model_pred = model(img)
            loss = criterion(model_pred, label)

        loss.backward()
        if ((epoch-1)*len(train_loader) + idx)%10 == 0:
            lr_list.append(optimizer.param_groups[0]['lr'])
        optimizer.step()
        train_loss.append(loss.item())

        if lr_scheduler:
            lr_scheduler.step()
    return np.mean(train_loss), lr_list

In [21]:
def competition_metric(true, pred):
    return f1_score(true, pred, average="macro")

def validation(model, criterion, test_loader, device):
    model.eval()
    
    model_preds = []
    true_labels = []
    
    val_loss = []
    
    with torch.no_grad():
        for img, label in iter(test_loader):
            img, label = img.float().to(device), label.long().to(device)
            
            model_pred = model(img)
            
            loss = criterion(model_pred, label)
            
            val_loss.append(loss.item())
            
            model_preds += model_pred.argmax(1).detach().cpu().numpy().tolist()
            true_labels += label.detach().cpu().numpy().tolist()
        
    val_f1 = competition_metric(true_labels, model_preds)
    return np.mean(val_loss), val_f1

In [22]:
def train_epoch(k, model, optimizer, train_loader, test_loader, lr_scheduler, device):
    model.to(device)

    criterion = nn.CrossEntropyLoss().to(device)
    early_stopping = EarlyStopping(patience=CFG['PATIENCE'], verbose=True)
    
    best_score = 0
    lr_list = []
    train_loss_list, val_loss_list = [], []
    
    for epoch in range(1,CFG["EPOCHS"]+1):
        tr_loss, lr_ = train(epoch, model, optimizer, criterion, train_loader, device, lr_scheduler)
        val_loss, val_score = validation(model, criterion, test_loader, device)
        train_loss_list.append(tr_loss)
        val_loss_list.append(val_loss)
        
        if lr_scheduler is not None:
            lr_list.extend(lr_)
            
        if best_score < val_score:
            print(f'**Epoch [{k}-{epoch}], Train Loss : [{tr_loss:.5f}] Val Loss : [{val_loss:.5f}] Val F1 Score : [{val_score:.5f}]')
            best_score = val_score
            torch.save(model, os.path.join(data_path, f'runs/{run_id}/best_model_{k}.pt'))
        else:
            print(f'Epoch [{k}-{epoch}], Train Loss : [{tr_loss:.5f}] Val Loss : [{val_loss:.5f}] Val F1 Score : [{val_score:.5f}]')
        clear_mem()
        if early_stopping(val_score):
            print(f'Epoch [{k}-{epoch}], early stopping')
            break
    if lr_list:
        return (train_loss_list, val_loss_list, best_score, lr_list)
    else:
        return (train_loss_list, val_loss_list, best_score, None)

In [23]:
def k_fold(k): # k-fold
    skf = StratifiedKFold(n_splits=k, shuffle=False)
    f1_sum = 0
    
    for k_, (train_index, valid_index) in enumerate(skf.split(df, df['artist'])):
        print(f'{k_}-fold start')
        
        class_counts = df.loc[train_index, 'artist'].value_counts(sort=False).to_dict()
        num_samples = sum(class_counts.values())
        labels = df.loc[train_index, 'artist'].to_list()
        class_weights = {l:round(num_samples/np.log(class_counts[l]), 2) for l in class_counts.keys()}
        weights = [class_weights[labels[i]] for i in range(int(num_samples))]
        sampler = torch.utils.data.WeightedRandomSampler(torch.DoubleTensor(weights), int(num_samples))
        
        train_img_paths, train_labels = get_data(df.iloc[train_index])
        val_img_paths, val_labels = get_data(df.iloc[valid_index])
        train_dataset = CustomDataset(train_img_paths, train_labels, train_transform)
        train_loader = DataLoader(
            train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False,
            sampler=sampler, worker_init_fn=seed_worker, generator=g, num_workers=0
        )
        val_dataset = CustomDataset(val_img_paths, val_labels, validation_transform)
        val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, worker_init_fn=seed_worker, generator=g, num_workers=0)
        
        model = VitModel()
        model.eval()
        optimizer = torch.optim.AdamW(params = model.parameters(), lr = CFG['LEARNING_RATE'])
        # lr : 5epochs 동안 0.01->0
        lr_scheduler = CosineAnnealingWarmUpRestarts(optimizer, T_0=CFG['WARMUP']*len(train_loader), T_mult=1, eta_max=0.001, T_up=50, gamma=0.8)
        _, _, f1_score, _ = train_epoch(k_, model, optimizer, train_loader, val_loader, lr_scheduler, device)
        f1_sum += f1_score
    return f1_sum/k

In [24]:
f1_score = k_fold(CFG['K-FOLD'])
print(f1_score)

0-fold start
**Epoch [0-1], Train Loss : [3.28671] Val Loss : [2.48123] Val F1 Score : [0.20109]
**Epoch [0-2], Train Loss : [2.65624] Val Loss : [1.83625] Val F1 Score : [0.40101]
**Epoch [0-3], Train Loss : [2.23771] Val Loss : [1.48675] Val F1 Score : [0.47183]
**Epoch [0-4], Train Loss : [1.94627] Val Loss : [1.13654] Val F1 Score : [0.59950]
**Epoch [0-5], Train Loss : [1.79144] Val Loss : [1.08396] Val F1 Score : [0.63148]
Epoch [0-6], Train Loss : [2.16897] Val Loss : [1.50838] Val F1 Score : [0.48367]
EarlyStopping counter: 1 out of 10
Best F1 score from now: 0.6314790122436917
Epoch [0-7], Train Loss : [2.03625] Val Loss : [1.26281] Val F1 Score : [0.56311]
EarlyStopping counter: 2 out of 10
Best F1 score from now: 0.6314790122436917
**Epoch [0-8], Train Loss : [1.74990] Val Loss : [1.00052] Val F1 Score : [0.64949]
**Epoch [0-9], Train Loss : [1.53388] Val Loss : [0.88796] Val F1 Score : [0.68954]
**Epoch [0-10], Train Loss : [1.40995] Val Loss : [0.83565] Val F1 Score : [0.7

In [25]:
test_df = pd.read_csv(os.path.join(data_path, './test.csv'))
test_df.head()

Unnamed: 0,id,img_path
0,TEST_00000,./test/TEST_00000.jpg
1,TEST_00001,./test/TEST_00001.jpg
2,TEST_00002,./test/TEST_00002.jpg
3,TEST_00003,./test/TEST_00003.jpg
4,TEST_00004,./test/TEST_00004.jpg


In [26]:
test_img_paths = get_data(test_df, infer=True)

In [27]:
test_dataset = CustomDataset(test_img_paths, None, test_transform)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [28]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    
    model_preds = []
    
    with torch.no_grad():
        for idx, img in enumerate(test_loader):
            img = img.float().to(device)
            
            model_pred = model(img).detach().cpu()
            model_pred = F.softmax(model_pred, dim=1)
            model_preds.extend(model_pred.numpy().tolist())
    
    print('Done.')
    return model_preds

In [29]:
CFG['K-FOLD']

5

In [30]:
result_df = pd.DataFrame(np.zeros((test_df.shape[0], len(le.classes_))))
for k_ in range(CFG['K-FOLD']):
    checkpoint = os.path.join(data_path, f'runs/{run_id}/best_model_{k_}.pt')
    print(f'{k_}-fold CHECKPOINT LOADED: {checkpoint}')
    infer_model = torch.load(checkpoint)
    infer_model.to(device)
    infer_model.eval()
    result_df += pd.DataFrame(inference(infer_model, test_loader, device))

0-fold CHECKPOINT LOADED: ../../data/runs/0/best_model_0.pt
Done.
1-fold CHECKPOINT LOADED: ../../data/runs/0/best_model_1.pt
Done.
2-fold CHECKPOINT LOADED: ../../data/runs/0/best_model_2.pt
Done.
3-fold CHECKPOINT LOADED: ../../data/runs/0/best_model_3.pt
Done.
4-fold CHECKPOINT LOADED: ../../data/runs/0/best_model_4.pt
Done.


In [31]:
result_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,0.000391,0.003798,0.001253,0.000659,0.000289,0.000511,0.000893,0.002182,4.3e-05,0.001935,...,0.001037,0.001086,0.001995,0.001114,0.001621,0.002774,0.014539,0.000594,0.012735,0.000502
1,0.000609,0.000465,4.964714,0.000262,0.000161,0.000157,5.6e-05,0.000467,0.000906,0.0002,...,0.000302,0.000129,0.000768,0.000783,0.00035,0.000156,0.000508,5.8e-05,0.001751,0.000337
2,0.000892,0.000559,0.000182,0.000126,0.000139,0.000156,4.750564,0.000163,4.7e-05,0.01919,...,0.000448,0.007567,0.003164,0.001022,0.041968,0.00063,0.111736,0.000179,0.002056,8.3e-05
3,4.975413,0.000614,0.000306,0.000289,0.000143,0.000246,0.000106,0.000523,8.3e-05,0.000281,...,0.001447,0.000128,0.000457,0.000291,0.00036,0.000197,0.000669,0.00039,0.003969,0.00021
4,0.002655,0.010429,0.670103,0.001621,0.001949,0.007229,0.000604,0.006943,0.008683,0.001588,...,0.004716,0.000625,0.002971,0.003787,0.001023,0.000788,0.002099,0.001349,0.502946,0.000813


In [32]:
preds = result_df.idxmax(axis=1)

In [33]:
preds = le.inverse_transform(preds)

In [34]:
submit = pd.read_csv(os.path.join(data_path, './sample_submission.csv'))

In [35]:
submit['artist'] = preds

In [36]:
submit.head()

Unnamed: 0,id,artist
0,TEST_00000,Edgar Degas
1,TEST_00001,Amedeo Modigliani
2,TEST_00002,Caravaggio
3,TEST_00003,Albrecht Du rer
4,TEST_00004,Pablo Picasso


In [37]:
submit.to_csv(os.path.join(data_path, f"./submit_{CFG['FILENAME']}.csv"), index=False)