## Libarary 불러오기

In [14]:
import multiprocessing
import os
from importlib import import_module

import pandas as pd
import torch
from torch.utils.data import DataLoader
import numpy as np

from dataset import TestDataset, MaskBaseDataset
from torchmetrics.classification import MulticlassF1Score
from importlib import import_module
from torchvision.transforms import RandomAdjustSharpness, Resize, ToTensor, Normalize, Compose, CenterCrop, ColorJitter, RandomHorizontalFlip, RandomRotation, RandomAffine, RandomGrayscale, Grayscale
from PIL import Image

## 모델 경로 및 모델 구조를 불러옵니다.

모델경로와 모델 구조의 순서에 유의하세요

In [2]:
# 모델의 파일경로를 리스트에 넣어줍니다.
model_paths = [
#               '/opt/ml/workspace/teajun/level1_imageclassification_cv-level-cv-19/final_ensemble_models/swinshallow_0.004_5best_f1 0.5_ 1.0.pth',
#                 '/opt/ml/workspace/teajun/level1_imageclassification_cv-level-cv-19/final_ensemble_models/8_best_f1_0.49_0.986.pth',
#                '/opt/ml/workspace/teajun/level1_imageclassification_cv-level-cv-19/final_ensemble_models/final_melting.pth',
               '/opt/ml/workspace/teajun/level1_imageclassification_cv-level-cv-19/final_ensemble_models/swinbshallow_5.pth',
#                '/opt/ml/workspace/teajun/level1_imageclassification_cv-level-cv-19/final_ensemble_models/convtiny_f1_0.838_0.827_submit.pth',
              '/opt/ml/workspace/teajun/level1_imageclassification_cv-level-cv-19/final_ensemble_models/CV_19_EFficientNet_V2_0.04_4_best_f1_0.724_0.804_ensemble.pth'
]
# 모델의 클래스 구조를 넣어줍니다.
model_structs = [
#                  'Swin_s_Shallow', 
#                 'ConvNext_Small_Shallow',
#                  'ConvNext_Small', 
                 'Swin_b_Shallow',
#                  'ConvNext_Tiny',
                 'EfficientNet_V2_L_shallow'
                ]
# soft voting을 사용할 경우 같은 숫자를 넣어주세요.
# weighted voting을 사용할 경우 해당 모델에 부여할 weight를 순서대로 넣어주세요
# 앙상블에 사용할 모델의 개수와 weights 리스트의 길이는 같아야합니다.
weights = [1, 1]


## 데이터를 가져와 data preparation

data loader를 이용해 모델에 데이터를 feeding할 준비를 합니다.

In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
num_classes = MaskBaseDataset.num_classes  # 18
data_dir = '/opt/ml/input/data/eval' # test 데이터셋의 경로를 입력하세요.
img_root = os.path.join(data_dir, 'images')
info_path = os.path.join(data_dir, 'info.csv')
info = pd.read_csv(info_path)
img_paths = [os.path.join(img_root, img_id) for img_id in info.ImageID]
batch_size = 64
resize=(256, 192)
dataset = TestDataset(img_paths, resize)

loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=batch_size,
    num_workers=multiprocessing.cpu_count() // 2,
    shuffle=False,
    pin_memory=use_cuda,
    drop_last=False,
)

img_paths = [os.path.join(img_root, img_id) for img_id in info.ImageID]

## soft voting을 구현합니다.

참고: https://dhpark1212.tistory.com/entry/%EB%AA%A8%EB%8D%B8-%EC%95%99%EC%83%81%EB%B8%94ensemble-%ED%95%98%EA%B8%B0

In [None]:
prediction_array_3d = []
i = 0
softmax = torch.nn.Softmax(dim=1)
for index, (model_struct, model_path) in enumerate(zip(model_structs, model_paths)):

    # 모델 생성 및 파라미터 주입
    model_cls = getattr(import_module('model'), model_struct)
    model = model_cls(num_classes)
    model.load_state_dict(torch.load(model_path), model_struct)
    model = model.to(device)
    model.eval()
    with torch.no_grad():
        temp_prediction = []
        for idx, images in enumerate(loader):
            images = images.to(device)
            pred = softmax(model(images))
            temp_prediction.extend(pred.cpu().numpy())
        prediction_array_3d.append(np.array(temp_prediction) * weights[i])
    i += 1
prediction_array_3d = np.array(prediction_array_3d)
prediction = prediction_array_3d.sum(axis=0)
prediction = prediction.argmax(axis=1)
info['ans'] = prediction

### 경로를 지정하여 CSV 파일로 저장합니다.

In [None]:
# info.to_csv('ensemble_result1.csv')
# info.to_csv('ensemble_result2.csv')
# info.to_csv('ensemble_result3.csv') 
# info.to_csv('ensemble_result4.csv') 
# info.to_csv('ensemble_result5.csv') 
info.to_csv('ensemble_result6.csv') 

## Validation Acc, F1 Score

validation dataset의 acc와 f1 score를 구합니다.

In [None]:
data_dir  = '/opt/ml/input/data/train/images' # 데이터의 경로를 입력하세요
dataset_module = getattr(import_module("dataset"), "MaskSplitByProfileDatasetByClass") #

dataset_val = dataset_module(
        data_dir=data_dir,
    )
dataset_val.set_transform(Compose([
            CenterCrop((320, 256)),
            Resize((256, 192), Image.BILINEAR),
            ToTensor(),
            Normalize(mean=(0.548, 0.504, 0.479), std=(0.237, 0.247, 0.246)),
        ]))
_, val_set = dataset_val.split_dataset()

val_loader = torch.utils.data.DataLoader(
    val_set,
    batch_size=1000,
    num_workers=multiprocessing.cpu_count() // 2,
    shuffle=False,
    pin_memory=use_cuda,
    drop_last=False,
)

softmax = torch.nn.Softmax(dim=1)

prediction_array_3d = []
for model_struct, model_path in zip(model_structs, model_paths):
    model_cls = getattr(import_module('model'), model_struct)
    model = model_cls(num_classes)
    # 모델 생성 및 파라미터 주입
    model.load_state_dict(torch.load(model_path), model_struct)
    model = model.to(device)
    model.eval()
    temp_prediction = []
    with torch.no_grad():
        batch_prediction = []
        for images, _ in val_loader:
            images = images.to(device)
            pred = softmax(model(images))
            batch_prediction.extend(pred.cpu().numpy())
        temp_prediction.extend(batch_prediction)
    prediction_array_3d.append(temp_prediction)
prediction_array_3d = np.array(prediction_array_3d)
prediction = prediction_array_3d.sum(axis=0)
prediction = prediction.argmax(axis=1)
labels = np.array(dataset_val.get_multi_labels_val())

acc_item = (labels == prediction).sum().item()
print(acc_item)
f1 = MulticlassF1Score(num_classes=18)

f1_score = f1(torch.Tensor(prediction).type(torch.LongTensor), torch.Tensor(labels).type(torch.LongTensor)).item()

val_acc = np.sum(acc_item) / len(val_set)
        
        
print(f1_score)

print(val_acc)