In [1]:
import gc
import os
from glob import glob
import random
import time
import copy
import datetime as dt
import warnings
from collections import Counter
import itertools
import shutil
from pprint import pprint

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from PIL import Image
from albumentations.pytorch import ToTensorV2
from tqdm.auto import tqdm
import torch
from torch import nn
from torch.nn import functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision
import albumentations as A
import sklearn
from sklearn.metrics import confusion_matrix
from joblib import parallel_backend
import timm
from timm.models.layers import Conv2dSame
from sklearn.metrics import f1_score

In [2]:
data_path = '../../data/'

In [3]:
warnings.filterwarnings(action='ignore') 
time_now = dt.datetime.now()
run_id = time_now.strftime("%Y%m%d%H%M%S")


def seed_everything(seed: int=42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(42)


def imshow(img):
    img = img / 2 + 0.5 
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


def plot_confusion_matrix(
                        cm, classes, runid, epoch, 
                        f1, normalize=False, 
                        title='Confusion matrix',
                        cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(f'{title}-{runid}-{epoch}-{f1:.4f}')
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                horizontalalignment="center",
                color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    # plt.savefig(data_path+f'./cms/cm-{runid}.jpg', dpi=400)
    plt.clf()

In [4]:
transform_train = A.Compose(
    [
        A.RandomResizedCrop(
            height=380, 
            width=380, 
            scale=(0.24, 0.26),
            ratio=(0.90, 1.10),
            always_apply=True
        ),
        A.HorizontalFlip(p=0.5),
        A.Normalize(mean=(0.548, 0.504, 0.479), std=(0.237, 0.247, 0.246)),
        ToTensorV2()
    ]
)

transform_test = A.Compose(
    [
        A.Resize(380, 380),
        A.Normalize(mean=(0.548, 0.504, 0.479), std=(0.237, 0.247, 0.246)),
        ToTensorV2()
    ]
)

In [5]:
class ARTDataset(Dataset):
    def __init__(self, phase, csv, transform) -> None:
        super().__init__()
        df = csv.sort_values(by=['id'])
        self.phase = phase
        self.transform = transform
        self.images = df['img_path']

        if self.phase != 'test':
            self.label = df['artist']
    
    def __getitem__(self, index):
        image_path = self.images.iloc[index]
        if self.phase != 'test':
            label = int(self.label.iloc[index])

        image_path = os.path.join(data_path, f'{image_path}')
        temp = Image.open(image_path).convert("RGB")
        image = np.array(temp).copy()
        temp.close()

        if self.transform:
            transformed = self.transform(image=image)
            image = transformed['image']

        if self.phase != 'test':
            return image, label
        else:
            return image

    def __len__(self):
        return len(self.images)


In [6]:
class EarlyStopping:
    def __init__(self, patience=10, verbose=False, delta=0):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta

    def __call__(self, score):
        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            print(f'Best F1 score from now: {self.best_score}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.counter = 0
        
        return self.early_stop

In [7]:
train_csv = pd.read_csv(os.path.join(data_path, 'train.csv'))
train_csv.loc[3896, 'artist'] = 'Titian'
train_csv.loc[3986, 'artist'] = 'Alfred Sisley'
le = sklearn.preprocessing.LabelEncoder()
train_csv['artist'] = le.fit_transform(train_csv['artist'].values)

assert len(le.classes_) == 50

In [8]:
def clear_mem():
    gc.collect()
    torch.cuda.empty_cache()

In [9]:
# skf = sklearn.model_selection.StratifiedKFold(n_splits=20, shuffle=False)
skf = sklearn.model_selection.StratifiedKFold(n_splits=5, shuffle=False)
t = train_csv.artist

for fold, (train_index, test_index) in enumerate(skf.split(np.zeros(len(t)), t)):
    early_stopping = EarlyStopping(patience=10, verbose=True)
    data_train = train_csv.loc[train_index]
    data_validation = train_csv.loc[test_index]

    class_counts = data_train['artist'].value_counts(sort=False).to_dict()
    num_samples = sum(class_counts.values())
    print(f'cls_cnts: {len(class_counts)}\nnum_samples:{num_samples}')
    labels = data_train['artist'].to_list()

    # weight 제작, 전체 학습 데이터 수를 해당 클래스의 데이터 수로 나누어 줌
    class_weights = {l:round(num_samples/class_counts[l], 2) for l in class_counts.keys()}
    
    # weighted vote를 시도해보기 위해 만들었지만 최종 제출에는 사용하지 않았음
    class_weights_keys = le.inverse_transform(list(class_weights.keys()))
    class_weights_values = class_weights.values()
    class_weights2 = dict(zip(class_weights_keys, class_weights_values))
    print(class_weights2)

    # class 별 weight를 전체 trainset에 대응시켜 sampler에 넣어줌
    weights = [class_weights[labels[i]] for i in range(int(num_samples))] 
    sampler = torch.utils.data.WeightedRandomSampler(torch.DoubleTensor(weights), int(num_samples))

    # batch_size=288; GPU 개당 batch 32개 * 9 == 288
    train_dataset = ARTDataset('train', data_train, transform=transform_train)
    validation_dataset = ARTDataset('validation', data_validation, transform=transform_train)
    
    train_loader = DataLoader(
        train_dataset, 
        batch_size=16,
        sampler=sampler,  # trainset에 sampler를 설정해줌
        shuffle=False,
        num_workers=0,
        pin_memory=True
        )
    validation_loader = DataLoader(
        validation_dataset, 
        batch_size=16,
        shuffle=False,
        num_workers=0)

    ss = pd.read_csv(os.path.join(data_path, 'test.csv'))
    test_dataset = ARTDataset('test', ss, transform=transform_test)

    test_loader = DataLoader(
        test_dataset, 
        batch_size=16,
        shuffle=False, 
        num_workers=0,
        pin_memory=True)

    dataloaders = {
        'train': train_loader,
        'val': validation_loader,
        'test': test_loader
    }

    dataset_sizes = {
        'train': len(train_dataset),
        'val': len(validation_dataset),
        'test': len(test_dataset)
    }

    # timm에서 모델을 가져옴
    device =  torch.device("cuda")
    model = timm.create_model('tf_efficientnet_b4_ns', pretrained=True, num_classes=50)
    model.to(device)
    # model = nn.DataParallel(model, device_ids=[0, 1, 2, 3, 4, 5, 6, 7, 8])

    epochs = 300  # 보통 30~40 epoch에서 멈춥니다.
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    # optimizer = optim.AdamW(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()
    scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

    os.makedirs(data_path+f'./runs/{run_id}', exist_ok=True)
    os.makedirs(data_path+f'./cms/', exist_ok=True)
    
    since = time.time()
    best_f1 = 0.0
    scaler = torch.cuda.amp.GradScaler()

    
    fold_run_id = f'{run_id}_fold{str(fold)}'
    
    # 학습
    for epoch in range(epochs):
        print('-'*50)
        print(f'Fold: {fold}')
        print('Epoch {}/{}'.format(epoch, epochs - 1))
        train_loss = 0.0

        for phase in ['train', 'val']:
            running_loss = 0.0
            cm_preds = []
            cm_labels = []
            model_preds = []
            model_labels = []

            if phase == 'train':
                model.train()
            else:
                model.eval()
            
            for x, y in tqdm(iter(dataloaders[phase])):
                x = x.to(device)
                y = y.to(device)
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    with torch.cuda.amp.autocast(enabled=True):
                        y_hat = model(x)
                        loss = criterion(y_hat, y)
                    _, preds = torch.max(y_hat, 1)

                    if phase == 'train':
                        scaler.scale(loss).backward()
                        scaler.step(optimizer)
                        scaler.update()

                running_loss += loss.item() * x.size(0)
                
                model_labels += y.detach().cpu().numpy().tolist()
                model_preds += preds.detach().cpu().numpy().tolist()

            if phase == 'train' and scheduler != None:
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_f1 = f1_score(
                        model_labels, 
                        model_preds, 
                        average='macro')
            print(f'[{phase}] Loss: {epoch_loss:.4f} Macro F1: {epoch_f1:.4f}')

            # 체크포인트 저장
            if phase == 'val':
                if epoch_f1 > best_f1:
                    best_f1 = epoch_f1
                    torch.save(model, data_path+f'./runs/{run_id}/best_model_fold{fold}.pt')
                    confusion_mtx = confusion_matrix(model_labels, model_preds)
                    plot_confusion_matrix(confusion_mtx, classes=class_counts.keys(), runid=fold_run_id, epoch=epoch, f1=best_f1)
            
            # 로그
            if phase == 'val':
                pass
            else:
                train_loss = epoch_loss
                train_f1 = epoch_f1
            
        # EARLY STOPPING
        stop = early_stopping(epoch_f1)
        if stop:
            print("called")   
            break

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val_F1: {:4f}'.format(best_f1))

    # 해당 fold의 checkpoint를 불러와 test
    device =  torch.device("cuda")
    checkpoint = os.path.join(data_path, data_path+f'runs/{run_id}/best_model_fold{fold}.pt')
    print(f'CHECKPOINT LOADED: {checkpoint}')
    model = torch.load(checkpoint)
    model.to(device)
    model.eval()

    test_preds = []

    with torch.no_grad():
        for x in tqdm(iter(dataloaders['test'])):
            x = x.to(device)
            batch_pred = model(x)
            _, pred = torch.max(batch_pred, 1)
            pred = pred.detach().cpu().numpy().tolist()
            test_preds.extend(pred)

    # trainset에 fit_trainsform 되어있는 LabelEncoder로 inverse transform 해줌
    test_preds = le.inverse_transform(test_preds)

    sample_submission = pd.read_csv(data_path+'./data/sample_submission.csv')
    sample_submission['artist'] = test_preds
    os.makedirs(data_path+'./output/', exist_ok=True)
    sample_submission.to_csv(data_path+f'./output/{run_id}_fold{fold}.csv', index=False)
    
    clear_mem()

cls_cnts: 50
num_samples:4728
{'Jackson Pollock': 278.12, 'Eugene Delacroix': 225.14, 'Kazimir Malevich': 64.77, 'Georges Seurat': 197.0, 'Peter Paul Rubens': 60.62, 'Edouard Manet': 94.56, 'Claude Monet': 100.6, 'Paul Klee': 41.47, 'Mikhail Vrubel': 50.3, 'Andrei Rublev': 80.14, 'Pablo Picasso': 19.54, 'Rembrandt': 32.61, 'Marc Chagall': 34.26, 'Michelangelo': 175.11, 'Raphael': 80.14, 'Joan Miro': 77.51, 'Sandro Botticelli': 49.25, 'Gustave Courbet': 139.06, 'Andy Warhol': 45.03, 'Pieter Bruegel': 69.53, 'Albrecht Du rer': 26.86, 'El Greco': 90.92, 'Vincent van Gogh': 9.38, 'Camille Pissarro': 92.71, 'Leonardo da Vinci': 58.37, 'Francisco Goya': 29.01, 'Hieronymus Bosch': 51.39, 'Amedeo Modigliani': 45.03, 'Paul Gauguin': 26.86, 'Vasiliy Kandinskiy': 98.5, 'Henri Rousseau': 112.57, 'Pierre-Auguste Renoir': 25.42, 'Titian': 34.01, 'Edgar Degas': 12.09, 'Alfred Sisley': 35.82, 'Diego Rivera': 118.2, 'William Turner': 135.09, 'Gustav Klimt': 84.43, 'Jan van Eyck': 92.71, 'Diego Velazque

  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 2.0815 Macro F1: 0.4355


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 1.9581 Macro F1: 0.4603
--------------------------------------------------
Fold: 0
Epoch 1/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 1.2537 Macro F1: 0.6366


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 1.7908 Macro F1: 0.5542
--------------------------------------------------
Fold: 0
Epoch 2/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 1.0516 Macro F1: 0.6936


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 1.5305 Macro F1: 0.5882
--------------------------------------------------
Fold: 0
Epoch 3/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.8780 Macro F1: 0.7371


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 1.4912 Macro F1: 0.5766
EarlyStopping counter: 1 out of 10
Best F1 score from now: 0.5882012857175393
--------------------------------------------------
Fold: 0
Epoch 4/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.7983 Macro F1: 0.7664


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: nan Macro F1: 0.5452
EarlyStopping counter: 2 out of 10
Best F1 score from now: 0.5882012857175393
--------------------------------------------------
Fold: 0
Epoch 5/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.4576 Macro F1: 0.8636


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: nan Macro F1: 0.6935
--------------------------------------------------
Fold: 0
Epoch 6/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.3665 Macro F1: 0.8875


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 1.1104 Macro F1: 0.7001
--------------------------------------------------
Fold: 0
Epoch 7/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.2942 Macro F1: 0.9087


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 1.4828 Macro F1: 0.6977
EarlyStopping counter: 1 out of 10
Best F1 score from now: 0.7001410140028136
--------------------------------------------------
Fold: 0
Epoch 8/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.3097 Macro F1: 0.9108


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 1.2066 Macro F1: 0.7128
--------------------------------------------------
Fold: 0
Epoch 9/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.2690 Macro F1: 0.9186


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 1.1363 Macro F1: 0.7157
--------------------------------------------------
Fold: 0
Epoch 10/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.1770 Macro F1: 0.9470


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 1.2000 Macro F1: 0.7559
--------------------------------------------------
Fold: 0
Epoch 11/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.1325 Macro F1: 0.9569


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.9814 Macro F1: 0.7612
--------------------------------------------------
Fold: 0
Epoch 12/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.1118 Macro F1: 0.9698


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 1.1832 Macro F1: 0.7536
EarlyStopping counter: 1 out of 10
Best F1 score from now: 0.7612167488252309
--------------------------------------------------
Fold: 0
Epoch 13/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.1246 Macro F1: 0.9601


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.9623 Macro F1: 0.7616
--------------------------------------------------
Fold: 0
Epoch 14/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.1054 Macro F1: 0.9712


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.9539 Macro F1: 0.7530
EarlyStopping counter: 1 out of 10
Best F1 score from now: 0.7616028747290031
--------------------------------------------------
Fold: 0
Epoch 15/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0864 Macro F1: 0.9735


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.8109 Macro F1: 0.7808
--------------------------------------------------
Fold: 0
Epoch 16/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0752 Macro F1: 0.9782


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.7748 Macro F1: 0.7759
EarlyStopping counter: 1 out of 10
Best F1 score from now: 0.7808421154035757
--------------------------------------------------
Fold: 0
Epoch 17/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0567 Macro F1: 0.9834


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.8458 Macro F1: 0.7922
--------------------------------------------------
Fold: 0
Epoch 18/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0600 Macro F1: 0.9842


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.9144 Macro F1: 0.7911
EarlyStopping counter: 1 out of 10
Best F1 score from now: 0.7922240935167033
--------------------------------------------------
Fold: 0
Epoch 19/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0502 Macro F1: 0.9848


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.9293 Macro F1: 0.7784
EarlyStopping counter: 2 out of 10
Best F1 score from now: 0.7922240935167033
--------------------------------------------------
Fold: 0
Epoch 20/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0621 Macro F1: 0.9819


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.9670 Macro F1: 0.7687
EarlyStopping counter: 3 out of 10
Best F1 score from now: 0.7922240935167033
--------------------------------------------------
Fold: 0
Epoch 21/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0450 Macro F1: 0.9854


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.8722 Macro F1: 0.7757
EarlyStopping counter: 4 out of 10
Best F1 score from now: 0.7922240935167033
--------------------------------------------------
Fold: 0
Epoch 22/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0436 Macro F1: 0.9883


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.9886 Macro F1: 0.7806
EarlyStopping counter: 5 out of 10
Best F1 score from now: 0.7922240935167033
--------------------------------------------------
Fold: 0
Epoch 23/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0385 Macro F1: 0.9885


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.9114 Macro F1: 0.7964
--------------------------------------------------
Fold: 0
Epoch 24/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0409 Macro F1: 0.9887


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.8450 Macro F1: 0.7951
EarlyStopping counter: 1 out of 10
Best F1 score from now: 0.7964468957245294
--------------------------------------------------
Fold: 0
Epoch 25/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0327 Macro F1: 0.9898


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.7678 Macro F1: 0.7929
EarlyStopping counter: 2 out of 10
Best F1 score from now: 0.7964468957245294
--------------------------------------------------
Fold: 0
Epoch 26/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0231 Macro F1: 0.9950


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.7960 Macro F1: 0.7988
--------------------------------------------------
Fold: 0
Epoch 27/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0280 Macro F1: 0.9902


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.7450 Macro F1: 0.8078
--------------------------------------------------
Fold: 0
Epoch 28/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0321 Macro F1: 0.9914


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.7064 Macro F1: 0.8156
--------------------------------------------------
Fold: 0
Epoch 29/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0299 Macro F1: 0.9905


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.7239 Macro F1: 0.8062
EarlyStopping counter: 1 out of 10
Best F1 score from now: 0.8156220324306078
--------------------------------------------------
Fold: 0
Epoch 30/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0227 Macro F1: 0.9935


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.7843 Macro F1: 0.8014
EarlyStopping counter: 2 out of 10
Best F1 score from now: 0.8156220324306078
--------------------------------------------------
Fold: 0
Epoch 31/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0239 Macro F1: 0.9926


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.8339 Macro F1: 0.8039
EarlyStopping counter: 3 out of 10
Best F1 score from now: 0.8156220324306078
--------------------------------------------------
Fold: 0
Epoch 32/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0215 Macro F1: 0.9953


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.8314 Macro F1: 0.8027
EarlyStopping counter: 4 out of 10
Best F1 score from now: 0.8156220324306078
--------------------------------------------------
Fold: 0
Epoch 33/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0253 Macro F1: 0.9930


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 1.0346 Macro F1: 0.8135
EarlyStopping counter: 5 out of 10
Best F1 score from now: 0.8156220324306078
--------------------------------------------------
Fold: 0
Epoch 34/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0275 Macro F1: 0.9919


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.8213 Macro F1: 0.8064
EarlyStopping counter: 6 out of 10
Best F1 score from now: 0.8156220324306078
--------------------------------------------------
Fold: 0
Epoch 35/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0228 Macro F1: 0.9934


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.7924 Macro F1: 0.8100
EarlyStopping counter: 7 out of 10
Best F1 score from now: 0.8156220324306078
--------------------------------------------------
Fold: 0
Epoch 36/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0219 Macro F1: 0.9940


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.8073 Macro F1: 0.8037
EarlyStopping counter: 8 out of 10
Best F1 score from now: 0.8156220324306078
--------------------------------------------------
Fold: 0
Epoch 37/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0229 Macro F1: 0.9944


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.7652 Macro F1: 0.8087
EarlyStopping counter: 9 out of 10
Best F1 score from now: 0.8156220324306078
--------------------------------------------------
Fold: 0
Epoch 38/299


  0%|          | 0/296 [00:00<?, ?it/s]

[train] Loss: 0.0191 Macro F1: 0.9946


  0%|          | 0/74 [00:00<?, ?it/s]

[val] Loss: 0.9353 Macro F1: 0.8051
EarlyStopping counter: 10 out of 10
Best F1 score from now: 0.8156220324306078
called
Training complete in 97m 4s
Best val_F1: 0.815622
CHECKPOINT LOADED: ../data/../data/runs/20231212190031/best_model_fold0.pt


  0%|          | 0/792 [00:00<?, ?it/s]

RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor

<Figure size 640x480 with 0 Axes>

In [None]:
import pandas as pd
from collections import Counter
from glob import glob


# 경로 수정(run_id*.csv)
csvs = glob(data_path + 'output/20221010220426*.csv')
csvs2 = glob(data_path + 'output/20221030183609*.csv')
csvs.extend(csvs2)
print(len(csvs))

preds = []
for csv in csvs:
    f = pd.read_csv(csv)
    artist = f['artist'].tolist()
    preds.append(artist)

out = []
cols = list(zip(*preds))
for c in cols:
    most = Counter(c).most_common()[0][0]
    out.append(most)

print(out[:20])
ss = pd.read_csv(os.path.join(data_path, 'sample_submission.csv'))
ss['artist'] = out
ss.to_csv(os.path.join(data_path, 'vote1234.csv'), index=False)  # 구분 가능하게 경로 수정