#  tta
* models
    - vit
    - efficient
    - caformer
- kfold
- tta
* result
    - tta5
    - public 점수 : 0.8613488426
    - private 점수 : 0.8672446236
    - tta7 
    - public 점수 : 0.8612270892
    - private 점수 : 0.8644435004

In [1]:
import gc
import os
import random
from datetime import datetime

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import cv2

from sklearn import preprocessing
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import f1_score

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from tqdm.auto import tqdm

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import timm

import warnings
warnings.filterwarnings(action='ignore') 

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [3]:
CFG = {
    'IMG_SIZE_VIT': 260,
    'IMG_SIZE_EFFICIENT': 300,
    'IMG_SIZE_CAFORMER': 272,
    'BATCH_SIZE': 16,
    'K-FOLD': 5,
    'NUM_TTA': 7,
    'FILENAME': 'tta7',
    'SEED': 6
}

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG['SEED']) # Seed 고정

In [5]:
running_colab = 'google.colab' in str(get_ipython()) if hasattr(__builtins__,'__IPYTHON__') else False
if running_colab:
    from google.colab import drive
    drive.mount('/content/drive')
if running_colab:
    data_path = '/content/drive/MyDrive/Colab Notebooks/ai6th/data/optiver/'
else:
    data_path = '../../data/'

In [6]:
df = pd.read_csv(os.path.join(data_path, 'train.csv'))
df.loc[3896, 'artist'] = 'Titian'
df.loc[3986, 'artist'] = 'Alfred Sisley'
df.head()

Unnamed: 0,id,img_path,artist
0,0,./train/0000.jpg,Diego Velazquez
1,1,./train/0001.jpg,Vincent van Gogh
2,2,./train/0002.jpg,Claude Monet
3,3,./train/0003.jpg,Edgar Degas
4,4,./train/0004.jpg,Hieronymus Bosch


In [7]:
# Label Encoding
le = preprocessing.LabelEncoder()
df['artist'] = le.fit_transform(df['artist'].values)

In [8]:
def get_data(df, infer=False):
    if infer:
        return df['img_path'].apply(lambda p: os.path.join(data_path, p)).values
    return df['img_path'].apply(lambda p: os.path.join(data_path, p)).values, df['artist'].values

In [9]:
from torchvision.transforms import ToTensor


class CustomDataset(Dataset):
    def __init__(self, img_paths, labels, transforms=None):
        self.img_paths = img_paths
        self.labels = labels
        self.transforms = transforms if transforms else ToTensor()

    def __getitem__(self, index):
        img_path = self.img_paths[index]
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = self.transforms(image=image)['image']
        
        if self.labels is not None:
            label = self.labels[index]
            return image, label
        else:
            return image
    
    def __len__(self):
        return len(self.img_paths)

In [10]:
train_transform_vit = A.Compose([
    A.Resize(CFG['IMG_SIZE_VIT']*2,CFG['IMG_SIZE_VIT']*2),
    A.RandomCrop(CFG['IMG_SIZE_VIT'],CFG['IMG_SIZE_VIT']),
    A.Transpose(p=0.5), # 행렬 스왑
    A.HorizontalFlip(p=0.5), # 좌우 반전
    A.VerticalFlip(p=0.5), # 상하 반전
    A.ShiftScaleRotate(p=0.5), # 랜덤하게 옮기고, scale, 회전
    A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=20, val_shift_limit=20, p=0.5), # 빛깔, 색조, 값 변환
    A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5), # 명도 대비
    A.ChannelShuffle(), # RGB 채널 간 shuffle
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
    ToTensorV2()
])

test_transform_vit = A.Compose([
    A.Resize(CFG['IMG_SIZE_VIT'],CFG['IMG_SIZE_VIT']),
    A.Transpose(p=0.5), # 행렬 스왑
    A.HorizontalFlip(p=0.5), # 좌우 반전
    A.VerticalFlip(p=0.5), # 상하 반전
    A.ShiftScaleRotate(p=0.5), # 랜덤하게 옮기고, scale, 회전
    A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=20, val_shift_limit=20, p=0.5), # 빛깔, 색조, 값 변환
    A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5), # 명도 대비
    A.ChannelShuffle(), # RGB 채널 간 shuffle
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
    ToTensorV2()
])

In [11]:
train_transform_efficient = A.Compose([
    A.Resize(CFG['IMG_SIZE_EFFICIENT']*2,CFG['IMG_SIZE_EFFICIENT']*2),
    A.RandomCrop(CFG['IMG_SIZE_EFFICIENT'],CFG['IMG_SIZE_EFFICIENT']),
    A.Transpose(p=0.5), # 행렬 스왑
    A.HorizontalFlip(p=0.5), # 좌우 반전
    A.VerticalFlip(p=0.5), # 상하 반전
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
    A.CoarseDropout(p=0.5),
    ToTensorV2()
])

test_transform_efficient = A.Compose([
    A.Resize(CFG['IMG_SIZE_EFFICIENT'],CFG['IMG_SIZE_EFFICIENT']),
    A.Transpose(p=0.5), # 행렬 스왑
    A.HorizontalFlip(p=0.5), # 좌우 반전
    A.VerticalFlip(p=0.5), # 상하 반전
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
    A.CoarseDropout(p=0.5),
    ToTensorV2()
])

In [12]:
train_transform_caformer = A.Compose([
    A.Resize(CFG['IMG_SIZE_CAFORMER']*2,CFG['IMG_SIZE_CAFORMER']*2),
    A.RandomCrop(CFG['IMG_SIZE_CAFORMER'],CFG['IMG_SIZE_CAFORMER']),
    A.Transpose(p=0.5), # 행렬 스왑
    A.HorizontalFlip(p=0.5), # 좌우 반전
    A.VerticalFlip(p=0.5), # 상하 반전
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
    A.CoarseDropout(p=0.5),
    ToTensorV2()
])

test_transform_caformer = A.Compose([
    A.Resize(CFG['IMG_SIZE_CAFORMER'],CFG['IMG_SIZE_CAFORMER']),
    A.Transpose(p=0.5), # 행렬 스왑
    A.HorizontalFlip(p=0.5), # 좌우 반전
    A.VerticalFlip(p=0.5), # 상하 반전
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
    A.CoarseDropout(p=0.5),
    ToTensorV2()
])

In [13]:
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2 ** 32
    np.random.seed(worker_seed)
    random.seed(worker_seed)
g = torch.Generator()
g.manual_seed(0)

<torch._C.Generator at 0x1b8e1c7deb0>

In [14]:
class VitModel(nn.Module):
    def __init__(self, num_classes=len(le.classes_)):
        super(VitModel, self).__init__()
        self.backbone = timm.create_model('tiny_vit_21m_384.dist_in22k_ft_in1k', pretrained=True, num_classes=0)
        self.classifier = nn.Sequential(
            nn.LayerNorm(576),
            nn.GELU(),
            nn.Dropout(p=0.4),
            nn.Linear(576, num_classes)
        )
            
    def forward(self, x):
        x = self.backbone(x)
        x = self.classifier(x)
        return x

In [15]:
class EfficientNetModel(nn.Module):
    def __init__(self, num_classes=len(le.classes_)):
        super(EfficientNetModel, self).__init__()
        self.backbone = timm.create_model('efficientnet_b3', pretrained=True, num_classes=512)
        self.classifier = nn.Sequential(
            nn.LayerNorm(512),
            nn.GELU(),
            nn.Dropout(p=0.4),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.backbone(x)
        x = self.classifier(x)
        return x

In [16]:
class CAFormerModel(nn.Module):
    def __init__(self, num_classes=len(le.classes_)):
        super(CAFormerModel, self).__init__()
        self.backbone = timm.create_model('caformer_s18.sail_in22k_ft_in1k_384', pretrained=True, num_classes=0)
        self.classifier = nn.Sequential(
            nn.LayerNorm(512),
            nn.GELU(),
            nn.Dropout(p=0.4),
            nn.Linear(512, num_classes)
        )
            
    def forward(self, x):
        x = self.backbone(x)
        x = self.classifier(x)
        return x

In [17]:
def clear_mem():
    gc.collect()
    torch.cuda.empty_cache()

In [18]:
time_now = datetime.now()
print(f'{time_now=}')

time_now=datetime.datetime(2023, 12, 28, 22, 44, 8, 777890)


In [19]:
test_df = pd.read_csv(os.path.join(data_path, './test.csv'))
test_df.head()

Unnamed: 0,id,img_path
0,TEST_00000,./test/TEST_00000.jpg
1,TEST_00001,./test/TEST_00001.jpg
2,TEST_00002,./test/TEST_00002.jpg
3,TEST_00003,./test/TEST_00003.jpg
4,TEST_00004,./test/TEST_00004.jpg


In [20]:
test_img_paths = get_data(test_df, infer=True)

In [21]:
test_dataset_vit = CustomDataset(test_img_paths, None, test_transform_vit)
test_loader_vit = DataLoader(test_dataset_vit, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [22]:
test_dataset_efficient = CustomDataset(test_img_paths, None, test_transform_efficient)
test_loader_efficient = DataLoader(test_dataset_efficient, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [23]:
test_dataset_caformer = CustomDataset(test_img_paths, None, test_transform_caformer)
test_loader_caformer = DataLoader(test_dataset_caformer, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [24]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    
    model_preds = []
    
    with torch.no_grad():
        for idx, img in enumerate(test_loader):
            img = img.float().to(device)
            
            model_pred = model(img).detach().cpu()
            model_pred = F.softmax(model_pred, dim=1)
            model_preds.extend(model_pred.numpy().tolist())
    
    print('Done.')
    return model_preds

In [25]:
preds_tta = np.zeros((len(test_df), len(le.classes_)))
test_targets = [
    (test_loader_vit, 'kfold_vit'),
    (test_loader_efficient, 'kfold_efficientnet'),
    (test_loader_caformer, 'kfold_caformer')
]
for loader, run_id in test_targets:
    for k_ in range(CFG['K-FOLD']):
        checkpoint = os.path.join(data_path, f'runs/{run_id}/best_model_{k_}.pt')
        print(f'{k_}-fold CHECKPOINT LOADED: {checkpoint}')
        infer_model = torch.load(checkpoint)
        for t_ in range(CFG['NUM_TTA']):
            preds_tta += np.array(inference(infer_model, loader, device))

0-fold CHECKPOINT LOADED: ../../data/runs/kfold_vit/best_model_0.pt
Done.
Done.
Done.
Done.
Done.
Done.
Done.
1-fold CHECKPOINT LOADED: ../../data/runs/kfold_vit/best_model_1.pt
Done.
Done.
Done.
Done.
Done.
Done.
Done.
2-fold CHECKPOINT LOADED: ../../data/runs/kfold_vit/best_model_2.pt
Done.
Done.
Done.
Done.
Done.
Done.
Done.
3-fold CHECKPOINT LOADED: ../../data/runs/kfold_vit/best_model_3.pt
Done.
Done.
Done.
Done.
Done.
Done.
Done.
4-fold CHECKPOINT LOADED: ../../data/runs/kfold_vit/best_model_4.pt
Done.
Done.
Done.
Done.
Done.
Done.
Done.
0-fold CHECKPOINT LOADED: ../../data/runs/kfold_efficientnet/best_model_0.pt
Done.
Done.
Done.
Done.
Done.
Done.
Done.
1-fold CHECKPOINT LOADED: ../../data/runs/kfold_efficientnet/best_model_1.pt
Done.
Done.
Done.
Done.
Done.
Done.
Done.
2-fold CHECKPOINT LOADED: ../../data/runs/kfold_efficientnet/best_model_2.pt
Done.
Done.
Done.
Done.
Done.
Done.
Done.
3-fold CHECKPOINT LOADED: ../../data/runs/kfold_efficientnet/best_model_3.pt
Done.
Done.
Done

In [26]:
preds_tta[:10]

array([[3.93546038e-02, 1.88333291e-01, 7.17476744e-02, 9.40155775e-02,
        3.13911152e-02, 5.31454685e-02, 8.50061306e-02, 9.66850150e-02,
        1.37719830e-02, 5.77412460e-01, 8.68181823e+01, 9.95824838e-01,
        1.14705161e-01, 3.53681662e-02, 2.96101108e-02, 4.28905097e-01,
        3.22870021e-02, 2.09567065e-02, 2.72733955e-01, 2.81361716e-01,
        1.59169089e-01, 4.10604815e-02, 6.26692310e-02, 8.45099158e-02,
        1.63070876e-01, 9.50511496e-03, 8.62218876e-01, 1.97596509e-02,
        8.02647693e-02, 1.04504121e-01, 1.62442401e-01, 2.39142172e-02,
        1.47428531e-01, 1.64144386e+00, 2.81161956e-02, 2.74751768e-01,
        1.08183028e-01, 4.80738153e-02, 2.97520455e-01, 5.88237604e-02,
        4.51288657e-01, 1.53676032e-01, 5.98268895e-01, 1.73213647e-01,
        1.71598682e-01, 2.85329154e-01, 1.66647303e+00, 5.14733234e-02,
        6.72358208e+00, 6.68696333e-02],
       [1.10446302e-01, 1.04646748e-01, 9.73525238e+01, 4.93605862e-02,
        7.46376227e-02,

In [27]:
preds = preds_tta.argmax(axis=1)

In [28]:
preds = le.inverse_transform(preds)

In [29]:
submit = pd.read_csv(os.path.join(data_path, './sample_submission.csv'))

In [30]:
submit['artist'] = preds

In [31]:
submit.head()

Unnamed: 0,id,artist
0,TEST_00000,Edgar Degas
1,TEST_00001,Amedeo Modigliani
2,TEST_00002,Caravaggio
3,TEST_00003,Albrecht Du rer
4,TEST_00004,Pablo Picasso


In [32]:
submit.to_csv(os.path.join(data_path, f"./submit_{CFG['FILENAME']}.csv"), index=False)