In [None]:
# drive mount
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Import Library

In [None]:
!pip install ttach
!pip install timm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import random
import pandas as pd
import numpy as np
import os
import cv2

from sklearn import preprocessing
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import timm

from tqdm.auto import tqdm
from copy import deepcopy

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import ttach as tta

import torchvision.models as models

from sklearn.metrics import f1_score

import warnings
warnings.filterwarnings(action='ignore')

In [None]:
# device 할당
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
# 권석원 ver
CFG = {
    'IMG_SIZE_H': 220,
    'IMG_SIZE_W': 275,
    'EPOCHS': 50,
    'LEARNING_RATE': 3e-4,
    'BATCH_SIZE': 64,
    'SEED': 41,
    'PATIENCE' : 5
}

In [None]:
# RandomSeed
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED'])

## Data Preprocessing

In [None]:
train = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Artist_classification/data/train.csv')
train.shape

(5911, 3)

In [None]:
def img_path_change(img_path):
  return '/content/drive/MyDrive/Colab Notebooks/Artist_classification/data' + str(img_path)[1:]

train['img_path'] = train['img_path'].apply(img_path_change)

In [None]:
train['img_path'][0]

'/content/drive/MyDrive/데이콘/월간 데이콘 예술 작품 화가 분류 AI 경진대회/train/0000.jpg'

In [None]:
# Label Encoding : artist들을 범주형 데이터로 변환
# 화가 이름 50명
le = preprocessing.LabelEncoder()
train['artist'] = le.fit_transform(train['artist'].values)

## Train / Validation Split

In [None]:
train_df, val_df, _, _ = train_test_split(train, train['artist'].values, test_size=0.2, random_state=CFG['SEED'])

In [None]:
train_df = train_df.sort_values(by=['id'])
train_df.head()

Unnamed: 0,id,img_path,artist
0,0,/content/drive/MyDrive/데이콘/월간 데이콘 ...,9
2,2,/content/drive/MyDrive/데이콘/월간 데이콘 ...,7
3,3,/content/drive/MyDrive/데이콘/월간 데이콘 ...,10
5,5,/content/drive/MyDrive/데이콘/월간 데이콘 ...,38
6,6,/content/drive/MyDrive/데이콘/월간 데이콘 ...,43


In [None]:
val_df = val_df.sort_values(by=['id'])
val_df.head()

Unnamed: 0,id,img_path,artist
1,1,/content/drive/MyDrive/데이콘/월간 데이콘 ...,48
4,4,/content/drive/MyDrive/데이콘/월간 데이콘 ...,24
17,17,/content/drive/MyDrive/데이콘/월간 데이콘 ...,10
21,21,/content/drive/MyDrive/데이콘/월간 데이콘 ...,29
29,29,/content/drive/MyDrive/데이콘/월간 데이콘 ...,28


## Data Load

In [None]:
# inference=True면 test 데이터라는 뜻.
# 따라서 target에 해당하는 artist를 return할 수 없음.
def get_data(df, infer=False):
  if infer:
    return df['img_path'].values
    
  return df['img_path'].values, df['artist'].values

In [None]:
# 파일 경로, 레이블
train_img_paths, train_labels = get_data(train_df)
val_img_paths, val_labels = get_data(val_df)

In [None]:
# 여기서 9등분하고 train_imgs_labels, val_imgs_labels 만들기
def split_image(paths,labels):
  img_list = []
  label_list = []
  real_img_ls = []
  for path, label in tqdm(zip(paths, labels)):
    image = cv2.imread(path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)    
    
    height,width,c = image.shape

    half_h = height//2
    half_w = width//2
    queter_h = half_h//2
    queter_w = half_w//2

    pos_list = [
        [0,half_w, 0,half_h],
        [half_w,width, 0,half_h],
        [0,half_w, half_h,height],
        [half_w,width, half_h,height],
        [0,half_w, queter_h,height-queter_h],
        [half_w,width, queter_h,height-queter_h],
        [queter_w,width-queter_w, 0,half_h],
        [queter_w,width-queter_w, half_h,height],
        [queter_w,width-queter_w, queter_h,height-queter_h]
    ]

    for poses in pos_list:
      img_list.append(image[poses[2]:poses[3],poses[0]:poses[1]])
      label_list.append(label)
      
  return img_list,label_list

In [None]:
train_imgs, train_labels = split_image(train_img_paths, train_labels)

0it [00:00, ?it/s]

In [None]:
val_imgs, val_labels = split_image(val_img_paths, val_labels)

0it [00:00, ?it/s]

## CustomDataset

In [None]:
# torch.utils.data.Dataset이라는 class를 상속받는 자식 클래스
class CustomDataset(Dataset):

  # 데이터셋을 처음 선언할 때, 자동으로 호출.
  # 몇 가지 인수들을 입력받도록 만들 수 있다.
  def __init__(self, imgs, labels, transforms=None):
    self.imgs = imgs
    self.labels = labels
    self.transforms = transforms

  # 데이터셋에서 특정 1개의 샘플을 가져오기
  # index는 몇 번째 데이터를 가져올건지에 대한 변수.
  def __getitem__(self, index):
    image = self.imgs[index]

    # 아래 dataset 선언을 보면 transform이 사용됨.
    if self.transforms is not None:
      image = self.transforms(image=image)['image']

    if self.labels is not None:
      label = self.labels[index]
      return image, label
    else:
      return image

  # 데이터셋의 길이 (총 샘플의 수)
  # 데이터셋을 선언하고 dataloader를 사용할 때 내부적으로 사용
  ## 데이터셋의 len을 알아야 데이터로더가 미니 배치를 사용할 수 있기 때문
  def __len__(self):
    return len(self.imgs)

In [None]:
class TestDataset(Dataset):
  def __init__(self, img_paths, labels, transforms=None):
    self.img_paths = img_paths
    self.labels = labels
    self.transforms = transforms

  def __getitem__(self, index):
    img_path = self.img_paths[index]

    image = cv2.imread(img_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    if self.transforms is not None:
      image = self.transforms(image=image)['image']

    if self.labels is not None:
      label = self.labels[index]
      return image, label
    else:
      return image

  def __len__(self):
    return len(self.img_paths)

In [None]:
# Albumentation Augmentation
train_transform = A.Compose([
                            A.Resize(CFG['IMG_SIZE_H'],CFG['IMG_SIZE_W']),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            A.HorizontalFlip(p=0.5),
                            A.VerticalFlip(p=0.5),
                            ToTensorV2()
                            ])

test_transform = A.Compose([
                            A.Resize(CFG['IMG_SIZE_H'],CFG['IMG_SIZE_W']),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            ToTensorV2()
                            ])

In [None]:
# Data Loader
train_dataset = CustomDataset(train_imgs, train_labels, train_transform)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=2)

val_dataset = CustomDataset(val_imgs, val_labels, test_transform)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=2)

## Multi-Modal Model Define

In [None]:
import torchvision.models as models
class Network_eff_genre(nn.Module):
    def __init__(self, num_classes=31):  # 장르 종류 개수
        super(Network_eff_genre, self).__init__()
        self.backbone = models.efficientnet_b0(pretrained=True) # b0 ~ b7
        self.classifier = nn.Linear(1000, num_classes)
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.classifier(x)
        return x

# 미리 학습시킨 genre를 맞추는 모델 불러오기
genre_model = Network_eff_genre()
genre_model.load_state_dict(torch.load('/content/drive/MyDrive/Colab Notebooks/Artist_classification/model/genre.pt', map_location=device))

# 해당 모델 가중치 흐름 freezing
for para in genre_model.parameters():
  para.requires_grad = False

genre_model_backbone = genre_model.backbone

class Multi_modal_eff(nn.Module):
    def __init__(self, num_classes=len(le.classes_)):
        super(Multi_modal_eff, self).__init__()
        self.backbone = models.efficientnet_b0(pretrained=True) # b0 ~ b7
        self.genre_backbone = genre_model_backbone
        self.classifier = nn.Linear(2000, num_classes)
        
    def forward(self, x):
        x_origin = self.backbone(x)
        x_genre = self.genre_backbone(x)
        # print(x.shape, x_genre.shape)
        x = torch.cat([x_origin, x_genre], dim=1)
        # print(x.shape)
        x = self.classifier(x)
        return x



Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-3dd342df.pth


  0%|          | 0.00/20.5M [00:00<?, ?B/s]

In [None]:
class Network_swin(nn.Module):
    def __init__(self, num_classes=len(le.classes_)):
        super(Network_swin, self).__init__()
        self.backbone = models.swin_t(weights=models.Swin_T_Weights.IMAGENET1K_V1)
        self.classifier = nn.Linear(1000, num_classes)
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.classifier(x)
        return x

## Train

In [None]:
def train(model, optimizer, train_loader, test_loader, scheduler, device):
  # 모델을 device에 할당
  model.to(device)

  # early stopping
  es_count = 0

  # Loss 정의
  criterion = nn.CrossEntropyLoss().to(device)

  # Scheduler에서 사용할 변수 선언
  best_score = 0
  best_model = None

  for epoch in range(1, CFG['EPOCHS'] + 1):
    # model을 train 모드로 전환
    model.train()

    # loss값을 넣을 리스트 생성
    train_loss = []

    # Epoch 진행
    for img, label in tqdm(iter(train_loader)):
      img, label = img.float().to(device), label.to(device)

      # 과거에 이용한 mini batch 내 이미지, 레이블을 바탕으로 계산된 Loss의 Gradient값이 optimizer에 할당되어 있는 것을 방지.
      optimizer.zero_grad()

      # pred값 
      model_pred = model(img)

      # 선언한 Loss에 pred값과 정답을 넣기 
      loss = criterion(model_pred, label)

      # backpropagation
      loss.backward()

      # optimizer
      optimizer.step()

      # loss값 추가
      train_loss.append(loss.item())

    # 최종 loss값 생성
    tr_loss = np.mean(train_loss)

    val_loss, val_score = validation(model, criterion, test_loader, device)

    print(f'Epoch [{epoch}], Train Loss : [{tr_loss:.5f}] Val Loss : [{val_loss:.5f}] Val F1 Score : [{val_score:.5f}]')

    # Scheduler
    if scheduler is not None:
      scheduler.step()

    es_count += 1

    # val_score을 기준으로 best model 선정
    if best_score < val_score:
      best_model = model
      best_score = val_score
      es_count = 0

      # checkpoint
      best_acc_model = deepcopy(model.state_dict())
      print("model save!!" + 'multimodal_genre.pt')
      torch.save(model.state_dict(), '/content/drive/MyDrive/Colab Notebooks/Artist_classification/model/multimodal_genre.pt')
      
    if es_count > CFG['PATIENCE']:
      print('Early Stopping')
      break

  return best_model

In [None]:
# 이번 대회에서는 F1 score를 사용
def competition_metric(true, pred):
    return f1_score(true, pred, average="macro")

def validation(model, test_loader, device):

  # 모델을 평가용으로 전환 (dropout 등의 규제가 들어가지 않게 조절)
  model.eval()
  criterion = nn.CrossEntropyLoss().to(device)
  model_preds = []
  true_labels = []

  val_loss = []

  # 평가 단계에서 Gradient를 통해 파라미터 값이 업데이트되는 현상을 방지
  with torch.no_grad():
    for img, label in tqdm(iter(test_loader)):
      img, label = img.float().to(device), label.to(device)
      model_pred = model(img)
      loss = criterion(model_pred, label)
      val_loss.append(loss.item())

      model_preds += model_pred.detach().cpu().numpy().tolist()
      true_labels += label.detach().cpu().numpy().tolist()
  
  val_f1 = competition_metric(true_labels, model_preds)
  return np.mean(val_loss), val_f1, model_preds

In [None]:
model_genre_eff = Multi_modal_eff()
model_genre_eff.eval()
optimizer = torch.optim.Adam(params = model_genre_eff.parameters(), lr = CFG["LEARNING_RATE"])

# scheduler
lambda1 = lambda epoch: 0.85 ** epoch
scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1)

infer_model_genre_eff = train(model_genre_eff, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/665 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.29313] Val Loss : [0.91153] Val F1 Score : [0.72277]
model save!!best_model_effv2_0.72.pt


  0%|          | 0/665 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.05960] Val Loss : [0.96327] Val F1 Score : [0.73544]
model save!!best_model_effv2_0.74.pt


  0%|          | 0/665 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.03859] Val Loss : [1.04353] Val F1 Score : [0.74242]
model save!!best_model_effv2_0.74.pt


  0%|          | 0/665 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.02671] Val Loss : [1.04925] Val F1 Score : [0.74776]
model save!!best_model_effv2_0.75.pt


  0%|          | 0/665 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.02075] Val Loss : [1.10006] Val F1 Score : [0.74801]
model save!!best_model_effv2_0.75.pt


  0%|          | 0/665 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.01687] Val Loss : [1.13292] Val F1 Score : [0.74945]
model save!!best_model_effv2_0.75.pt


  0%|          | 0/665 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.01276] Val Loss : [1.14510] Val F1 Score : [0.74623]


  0%|          | 0/665 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.01237] Val Loss : [1.12382] Val F1 Score : [0.75610]
model save!!best_model_effv2_0.76.pt


  0%|          | 0/665 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.01076] Val Loss : [1.16733] Val F1 Score : [0.74828]


  0%|          | 0/665 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.00868] Val Loss : [1.17205] Val F1 Score : [0.75842]
model save!!best_model_effv2_0.76.pt


  0%|          | 0/665 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch [11], Train Loss : [0.00795] Val Loss : [1.17119] Val F1 Score : [0.75620]


  0%|          | 0/665 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch [12], Train Loss : [0.00776] Val Loss : [1.18334] Val F1 Score : [0.75101]


  0%|          | 0/665 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch [13], Train Loss : [0.00720] Val Loss : [1.17298] Val F1 Score : [0.75287]


  0%|          | 0/665 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch [14], Train Loss : [0.00632] Val Loss : [1.19593] Val F1 Score : [0.75279]


  0%|          | 0/665 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch [15], Train Loss : [0.00528] Val Loss : [1.16294] Val F1 Score : [0.75828]


  0%|          | 0/665 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch [16], Train Loss : [0.00526] Val Loss : [1.17776] Val F1 Score : [0.75573]
Early Stopping


## inference

In [None]:
# 가중치로드
infer_model_genre_eff = Multi_modal_eff()
infer_model_genre_eff.load_state_dict(torch.load('/content/drive/MyDrive/Colab Notebooks/Artist_classification/data/model/multimodal_genre.pt', map_location=device))

<All keys matched successfully>

In [None]:
# TTA (test time augmentation)
import ttach as tta

tta_transforms = tta.Compose(
    [
        tta.HorizontalFlip(),
        tta.VerticalFlip(),     
    ]
)

infer_model_swin = tta.ClassificationTTAWrapper(infer_model_genre_eff, tta_transforms)

In [None]:
test = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Artist_classification/data/test.csv')
test.head()

Unnamed: 0,id,img_path
0,TEST_00000,./test/TEST_00000.jpg
1,TEST_00001,./test/TEST_00001.jpg
2,TEST_00002,./test/TEST_00002.jpg
3,TEST_00003,./test/TEST_00003.jpg
4,TEST_00004,./test/TEST_00004.jpg


In [None]:
test['img_path'] = test['img_path'].apply(lambda x : '/content/drive/MyDrive/Colab Notebooks/Artist_classification/data' + x[1:] )

In [None]:
test['img_path'][0]

'/content/drive/MyDrive/데이콘/월간 데이콘 예술 작품 화가 분류 AI 경진대회/test/TEST_00000.jpg'

In [None]:
# Test에는 artist 정보가 없으니 infer=True
test_img_paths = get_data(test, infer=True)

In [None]:
test_dataset = TestDataset(test_img_paths, None, test_transform)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=2)

In [None]:
def inference(model1, test_loader, device):
    model1.to(device)
    model1.eval()
    
    model_preds = []
    with torch.no_grad():
        for img in tqdm(iter(test_loader)):
            img = img.float().to(device)

            model1_pred = model1(img)
            model_preds += (model1_pred).detach().cpu().numpy().tolist()

    print('Done.')
    return model_preds

In [None]:
preds = inference(infer_model_swin, test_loader, device)

In [None]:
np.save('/content/drive/MyDrive/Colab Notebooks/Artist_classification/pred/pred_genre', preds)