In [1]:
import common

import os
import time
import random

import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split

from dotenv import load_dotenv
from datetime import datetime
from zoneinfo import ZoneInfo
import wandb

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [3]:
model_checkpoint_filename_list = [
                                    'model_bak/cp-efficientnet_b3.ra2_in1k_sd_42_epc_2_isFull_False_vl0.0596_va_0.9802_vf1_0.9795.pt', 
                                    'model_bak/cp-tiny_vit_21m_384.dist_in22k_ft_in1k_sd_42_epc_0_isFull_False_vl_0.1706_va_0.9366_vf1_0.9290.pt',
                                    'model_bak/cp-tiny_vit_21m_384.dist_in22k_ft_in1k_sd_42_epc_2_isFull_False_vl_0.1526_va_0.9711_vf1_0.9714.pt',
                                 ]
is_soft_voting = True
augment_ratio = 20

In [4]:

model_list = []
tst_loader_list = []

for cp_filename in model_checkpoint_filename_list:
    checkpoint = torch.load(cp_filename, map_location = device)
    
    print(checkpoint['model'], checkpoint['tst_img_size'], checkpoint['batch_size'])
    
    model = timm.create_model(
        checkpoint['model'],
        pretrained = True,
        num_classes = 17,
    ).to(device)
    
    model.load_state_dict(checkpoint['model_state_dict'])    
    model_list.append(model)
    
    ##
    tst_transform = common.create_tst_transform(checkpoint['tst_img_size'])
    tst_aug_transform = common.create_trn_aug_transform(checkpoint['tst_img_size'])

    # tst_dataset = common.ImageDataset(
    #     "datasets_fin/sample_submission.csv",
    #     "datasets_fin/test/",
    #     transform = tst_transform
    # )
    
    tst_dataset = common.ImageDataset(
        "datasets_fin/sample_submission.csv",
        "datasets_fin/test/",
        transform=tst_transform, 
        aug_transform=tst_aug_transform, 
        augment_ratio=augment_ratio)

    tst_loader = DataLoader(
        tst_dataset,
        batch_size = checkpoint['batch_size'],
        shuffle = False,
        num_workers = 12,
        pin_memory = True
    )
    
    tst_loader_list.append(tst_loader)

print(len(model_list), len(tst_loader_list))

efficientnet_b3.ra2_in1k 320 32
tiny_vit_21m_384.dist_in22k_ft_in1k 384 32
tiny_vit_21m_384.dist_in22k_ft_in1k 384 32
3 3


In [5]:
preds_list = []

for model, tst_loader in zip(model_list, tst_loader_list):
    preds = common.get_preds_list_by_tst_loader(model, tst_loader, device, is_soft_voting)
    preds_list.append(preds)

100%|██████████| 1963/1963 [12:48<00:00,  2.55it/s]
100%|██████████| 1963/1963 [16:52<00:00,  1.94it/s]
100%|██████████| 1963/1963 [16:21<00:00,  2.00it/s]


In [6]:
def get_all_targets_count():
    sample_submission_df = pd.read_csv("datasets_fin/sample_submission.csv")
    return len(sample_submission_df)
    
def hard_voting(predictions):
    predictions = np.asarray(predictions)
    return np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)

def soft_voting(predictions):
    predictions = np.asarray(predictions)
    mean_axis0 = np.mean(predictions, axis=0)
    
    # 증강된 데이터에 대한 예측값도 고려하기.
    all_targets_count = get_all_targets_count()
    
    aug_size = len(mean_axis0) / all_targets_count
    assert len(mean_axis0) % all_targets_count == 0
    aug_size = int(aug_size)
    
    if aug_size > 1:
        bulk_list = []
        step = 0
        
        for i in range(0, aug_size):
            bulk_list.append(mean_axis0[step:step + all_targets_count])
            step += all_targets_count
        
        bulk_list = np.asarray(bulk_list)
        mean_axis0 = np.mean(bulk_list, axis=0)
        
    return mean_axis0.argmax(axis=1)

# 최종 예측
if is_soft_voting:
    final_pred = soft_voting(preds_list)
else:
    final_pred = hard_voting(preds_list)

# # csv 로 저장
common.preds_list_to_save_to_csv(final_pred, tst_loader, 'pred_ensemble.csv')