### 라이브러리 가져오기

In [1]:
import os
import random
import timm
import time
import pickle

import torch
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts

import albumentations as A
from albumentations.pytorch import ToTensorV2

import torch.nn as nn

from torch.cuda.amp import GradScaler, autocast
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score

from PIL import Image
from tqdm import tqdm

import pandas as pd
import numpy as np
import pandas as pd


import logging
import warnings
warnings.filterwarnings(action='ignore')

### CONFIG

In [2]:
class CONFIG:

    @staticmethod
    def set_seed(SEED):
        os.environ['PYTHONHASHSEED'] = str(SEED)
        random.seed(SEED)
        np.random.seed(SEED)
        torch.manual_seed(SEED)
        torch.cuda.manual_seed(SEED)
        torch.cuda.manual_seed_all(SEED)
        torch.backends.cudnn.benchmark = True
    
    @staticmethod
    def set_path(root_path):
        train_path = f'{root_path}/train/'
        test_path = f'{root_path}/test/'

        return root_path, train_path, test_path

In [3]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', 
                    handlers=[logging.FileHandler('notebook.log'), logging.StreamHandler()])

In [4]:
CONFIG.set_seed(0xC0FFE)
root_path, train_path, test_path = CONFIG.set_path('/root/Project/new_data')

logging.info('1. Set Seed')

2024-04-20 02:14:31,778 - INFO - 1. Set Seed


### CustomDataset

In [5]:
# 데이터셋 클래스를 정의합니다.
class ImageDataset(Dataset):
    def __init__(self, df, path, transform=None):
        self.df = df
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df.iloc[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

logging.info('2. Define Dataset Class')

2024-04-20 02:14:31,816 - INFO - 2. Define Dataset Class


In [6]:
# 하이퍼파라미터 및 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_name = 'tf_efficientnetv2_m'  # 모델명
img_size = 224
learning_rate = 1e-4
num_epochs = 10
batch_size = 32
num_workers = 8
patience = 3
T_0 = 5
T_mult = 2
eta_min = 1e-6

train_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

test_transform = A.Compose([
    A.LongestMaxSize(max_size=img_size, always_apply=True), 
    A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=0, value=(255, 255, 255)), 
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

logging.info('3. Set Hyperparameter')

2024-04-20 02:14:31,830 - INFO - 3. Set Hyperparameter


In [7]:
# 훈련 데이터 로드 및 클래스 할당
train_file = pd.read_csv(f'{root_path}/combined_data.csv')
train_file['class'] = None

train_file.loc[train_file['target'].isin([0, 5, 8, 9]), 'class'] = 0
train_file.loc[train_file['target'].isin([2, 16]), 'class'] = 1
train_file.loc[train_file['target'].isin([1, 3, 4, 6, 7, 10, 11, 12, 13, 14, 15]), 'class'] = 2


# 훈련 데이터셋 생성 및 로더 설정
train_datasets = {cls: ImageDataset(df=train_file[train_file['class'] == cls], 
                                    path=train_path, 
                                    transform=train_transform) 
                  for cls in range(3)}

train_loaders = {cls: DataLoader(dataset, batch_size=batch_size, shuffle=True) 
                 for cls, dataset in train_datasets.items()}

logging.info('4. Load Data')

2024-04-20 02:14:31,934 - INFO - 4. Load Data


### class가 1인 car class에 대해서만 학습

In [8]:
class_file = train_file.loc[train_file['class'] == 1, ['ID', 'target']]
class_dataset = train_datasets[1]
class_loader = train_loaders[1]

In [9]:
# 2 --> 0
# 16 --> 1
class_file.loc[class_file['target'] == 2, 'target'] = 0
class_file.loc[class_file['target'] == 16, 'target'] = 1

class_file

Unnamed: 0,ID,target
0,002f99746285dfdd.jpg,1
4,00b2f44967580c74.jpg,1
7,00f59b12e9e08f29.jpg,1
26,03ce6a0a86939c04.jpg,0
32,04d106668176a2e6.jpg,1
...,...,...
138771,noised1_mixup_flipud_6dbb0d26b45902b4.jpg_9ac1...,1
138772,noised1_mixup_flipud_6dbb0d26b45902b4.jpg_9ac1...,1
138773,noised1_mixup_flipud_6dbb0d26b45902b4.jpg_9ac1...,1
138774,noised1_mixup_flipud_6dbb0d26b45902b4.jpg_9ac1...,1


### Stratified K-Fold

In [10]:
# Stratified K-Fold
skf = StratifiedKFold(n_splits=5)
train_folds = skf.split(class_file['ID'], class_file['target'])

### 모델 학습

In [11]:
# one epoch 학습을 위한 함수입니다.
def _train_one_epoch(loader, model, optimizer, loss_fn, scheduler, scaler, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for images, targets in pbar:
        targets = targets.type(torch.LongTensor)
        
        images = images.to(device)
        targets = targets.to(device)

        model.zero_grad(set_to_none=True)

        with autocast():
            preds = model(images)
            loss = loss_fn(preds, targets)
            
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")
    
    scheduler.step()

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "train_loss": train_loss,
        "train_acc": train_acc,
        "train_f1": train_f1,
    }

    return ret

In [12]:
# one epoch 학습을 위한 함수입니다.
def _val_one_epoch(loader, model, device):
    model.eval()
    
    preds_list = []
    targets_list = []

    pbar = tqdm(loader)
    for images, targets in pbar:
        targets = targets.type(torch.LongTensor)

        images = images.to(device)
        targets = targets.to(device)

        model.zero_grad(set_to_none=True)
        with torch.no_grad():
            preds = model(images)

        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "train_acc": train_acc,
        "train_f1": train_f1,
    }

    return ret, train_f1

In [13]:
def train_model(patience, num_epochs, device):

    # 시작 시간
    since = time.time()

    models = []

    for fold_index, (train_index, validation_index) in enumerate(train_folds):
        
        # 그라디언트 스케일러 초기화
        scaler = GradScaler()
        print()
        print(f'Stratified K-Fold: {fold_index}')
        logging.info(f'Stratified K-Fold: {fold_index + 1} / 5')
        print('-' * 10)

        model = timm.create_model(
            model_name=model_name,
            pretrained=True,
            # 이진 분류
            num_classes=2
        ).to(device)

        loss_fn = nn.CrossEntropyLoss()
        optimizer = Adam(model.parameters(), lr=learning_rate)
        scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=T_0, T_mult=T_mult, eta_min=eta_min)
        
        # train
        train_data = class_file.iloc[train_index, :]
        train_dataset = ImageDataset(
            df=train_data,
            path=train_path,
            transform=train_transform
        )
        train_loader = DataLoader(
            train_dataset,
            batch_size=batch_size,
            shuffle=True,
            num_workers=num_workers,
            pin_memory=True,
            drop_last=False
        )

        # validation
        validation_data = class_file.iloc[validation_index, :]
        validation_dataset = ImageDataset(
            df=validation_data,
            path=train_path,
            transform=train_transform
        )
        validation_loader = DataLoader(
            validation_dataset,
            batch_size=batch_size,
            shuffle=True,
            num_workers=num_workers,
            pin_memory=True,
            drop_last=False
        )

        best_epoch = 0
        best_f1_score = 0
        early_stop_counter = 0
        best_model_weights = None

        for epoch in range(num_epochs):
            print(f'Epoch {epoch}/{num_epochs - 1}')
            print('-' * 10)
            logging.info(f"Epoch {epoch + 1}/{num_epochs}")

            if early_stop_counter >= patience:
                logging.info(f"Early Stopping... epoch {epoch + 1}")
                print("Early Stopping....\n")
                break


            # train
            ret = _train_one_epoch(train_loader, model, optimizer, loss_fn, scheduler, scaler, device)
            # validation
            ret2, val_f1 = _val_one_epoch(validation_loader, model, device)

            print(f"Loss: {ret['train_loss']:.4f}, train Accuracy: {ret['train_acc']:.4f}, train F1-Score: {ret['train_f1']:.4f}")
            print(f"validation Accuracy: {ret2['train_acc']:.4f}, validation F1-Score: {ret2['train_f1']:.4f}")
            print('-' * 10)
            
            # f1-score을 비교
            if val_f1 > best_f1_score:
                early_stop_counter = 0

                best_epoch = epoch
                best_f1_score = val_f1
                best_model_weights = model.state_dict()
                
            else:
                early_stop_counter += 1

    
        print(f'best epoch: {best_epoch}, best f1 score: {best_f1_score}')

        time_elapsed = time.time() - since
        print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')

        # 가장 좋은 모델의 가중치(w) 가져오기
        model.load_state_dict(best_model_weights)
    
        model_path = f'{root_path}/model/{model_name}'
        if not os.path.exists(model_path):
            os.makedirs(model_path)

        with open(f'{model_path}/fold_{fold_index}' + '.pkl', 'wb') as f:
            pickle.dump(model, f)

        models.append({
            'model': model,
            'weights': best_model_weights,
            'f1_score': best_f1_score,
        })
        
    return models

In [14]:
logging.info('5. Start Training')
logging.info('----------------------------------------------')

2024-04-20 02:14:31,995 - INFO - 5. Start Training
2024-04-20 02:14:31,996 - INFO - ----------------------------------------------


In [15]:
# 훈련하기
models = train_model(patience, num_epochs=num_epochs, device=device)

2024-04-20 02:14:32,004 - INFO - Stratified K-Fold: 1 / 5



Stratified K-Fold: 0
----------


2024-04-20 02:14:32,517 - INFO - Loading pretrained weights from Hugging Face hub (timm/tf_efficientnetv2_m.in21k_ft_in1k)
2024-04-20 02:14:32,877 - INFO - [timm/tf_efficientnetv2_m.in21k_ft_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.
2024-04-20 02:14:33,192 - INFO - Epoch 1/10


Epoch 0/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:46<00:00,  9.28it/s]
100%|██████████| 108/108 [00:05<00:00, 18.73it/s]
2024-04-20 02:15:25,222 - INFO - Epoch 2/10


Loss: 0.0535, train Accuracy: 0.9937, train F1-Score: 0.9937
validation Accuracy: 0.9974, validation F1-Score: 0.9974
----------
Epoch 1/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:43<00:00,  9.83it/s]
100%|██████████| 108/108 [00:05<00:00, 21.43it/s]
2024-04-20 02:16:13,944 - INFO - Epoch 3/10


Loss: 0.0029, train Accuracy: 0.9995, train F1-Score: 0.9995
validation Accuracy: 1.0000, validation F1-Score: 1.0000
----------
Epoch 2/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:43<00:00,  9.82it/s]
100%|██████████| 108/108 [00:05<00:00, 21.13it/s]
2024-04-20 02:17:02,776 - INFO - Epoch 4/10


Loss: 0.0006, train Accuracy: 0.9999, train F1-Score: 0.9999
validation Accuracy: 1.0000, validation F1-Score: 1.0000
----------
Epoch 3/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:43<00:00,  9.80it/s]
100%|██████████| 108/108 [00:05<00:00, 21.39it/s]
2024-04-20 02:17:51,623 - INFO - Epoch 5/10


Loss: 0.0001, train Accuracy: 0.9999, train F1-Score: 0.9999
validation Accuracy: 1.0000, validation F1-Score: 1.0000
----------
Epoch 4/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:44<00:00,  9.68it/s]
100%|██████████| 108/108 [00:05<00:00, 21.35it/s]
2024-04-20 02:18:41,016 - INFO - Epoch 6/10
2024-04-20 02:18:41,017 - INFO - Early Stopping... epoch 6


Loss: 0.0000, train Accuracy: 1.0000, train F1-Score: 1.0000
validation Accuracy: 1.0000, validation F1-Score: 1.0000
----------
Epoch 5/9
----------
Early Stopping....

best epoch: 1, best f1 score: 1.0
Training complete in 4m 9s


2024-04-20 02:18:41,491 - INFO - Stratified K-Fold: 2 / 5



Stratified K-Fold: 1
----------


2024-04-20 02:18:41,972 - INFO - Loading pretrained weights from Hugging Face hub (timm/tf_efficientnetv2_m.in21k_ft_in1k)
2024-04-20 02:18:42,298 - INFO - [timm/tf_efficientnetv2_m.in21k_ft_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.
2024-04-20 02:18:42,447 - INFO - Epoch 1/10


Epoch 0/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:43<00:00,  9.78it/s]
100%|██████████| 108/108 [00:05<00:00, 21.01it/s]
2024-04-20 02:19:31,479 - INFO - Epoch 2/10


Loss: 0.1099, train Accuracy: 0.9909, train F1-Score: 0.9909
validation Accuracy: 0.9985, validation F1-Score: 0.9985
----------
Epoch 1/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:43<00:00,  9.85it/s]
100%|██████████| 108/108 [00:05<00:00, 21.19it/s]
2024-04-20 02:20:20,171 - INFO - Epoch 3/10


Loss: 0.0028, train Accuracy: 0.9993, train F1-Score: 0.9993
validation Accuracy: 1.0000, validation F1-Score: 1.0000
----------
Epoch 2/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:44<00:00,  9.70it/s]
100%|██████████| 108/108 [00:05<00:00, 21.00it/s]
2024-04-20 02:21:09,576 - INFO - Epoch 4/10


Loss: 0.0001, train Accuracy: 1.0000, train F1-Score: 1.0000
validation Accuracy: 1.0000, validation F1-Score: 1.0000
----------
Epoch 3/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:44<00:00,  9.73it/s]
100%|██████████| 108/108 [00:05<00:00, 21.23it/s]
2024-04-20 02:21:58,804 - INFO - Epoch 5/10


Loss: 0.0002, train Accuracy: 0.9999, train F1-Score: 0.9999
validation Accuracy: 1.0000, validation F1-Score: 1.0000
----------
Epoch 4/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:44<00:00,  9.74it/s]
100%|██████████| 108/108 [00:05<00:00, 21.13it/s]
2024-04-20 02:22:48,021 - INFO - Epoch 6/10
2024-04-20 02:22:48,022 - INFO - Early Stopping... epoch 6


Loss: 0.0008, train Accuracy: 0.9999, train F1-Score: 0.9999
validation Accuracy: 1.0000, validation F1-Score: 1.0000
----------
Epoch 5/9
----------
Early Stopping....

best epoch: 1, best f1 score: 1.0
Training complete in 8m 16s


2024-04-20 02:22:48,311 - INFO - Stratified K-Fold: 3 / 5



Stratified K-Fold: 2
----------


2024-04-20 02:22:48,752 - INFO - Loading pretrained weights from Hugging Face hub (timm/tf_efficientnetv2_m.in21k_ft_in1k)
2024-04-20 02:22:49,037 - INFO - [timm/tf_efficientnetv2_m.in21k_ft_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.
2024-04-20 02:22:49,153 - INFO - Epoch 1/10


Epoch 0/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:44<00:00,  9.64it/s]
100%|██████████| 108/108 [00:05<00:00, 21.04it/s]
2024-04-20 02:23:38,849 - INFO - Epoch 2/10


Loss: 0.0865, train Accuracy: 0.9927, train F1-Score: 0.9927
validation Accuracy: 1.0000, validation F1-Score: 1.0000
----------
Epoch 1/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:44<00:00,  9.69it/s]
100%|██████████| 108/108 [00:05<00:00, 20.98it/s]
2024-04-20 02:24:28,324 - INFO - Epoch 3/10


Loss: 0.0051, train Accuracy: 0.9994, train F1-Score: 0.9994
validation Accuracy: 1.0000, validation F1-Score: 1.0000
----------
Epoch 2/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:44<00:00,  9.66it/s]
100%|██████████| 108/108 [00:05<00:00, 20.93it/s]
2024-04-20 02:25:17,947 - INFO - Epoch 4/10


Loss: 0.0002, train Accuracy: 0.9999, train F1-Score: 0.9999
validation Accuracy: 1.0000, validation F1-Score: 1.0000
----------
Epoch 3/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:44<00:00,  9.64it/s]
100%|██████████| 108/108 [00:05<00:00, 20.95it/s]
2024-04-20 02:26:07,647 - INFO - Epoch 5/10
2024-04-20 02:26:07,648 - INFO - Early Stopping... epoch 5


Loss: 0.0013, train Accuracy: 0.9997, train F1-Score: 0.9997
validation Accuracy: 1.0000, validation F1-Score: 1.0000
----------
Epoch 4/9
----------
Early Stopping....

best epoch: 0, best f1 score: 1.0
Training complete in 11m 36s


2024-04-20 02:26:07,949 - INFO - Stratified K-Fold: 4 / 5



Stratified K-Fold: 3
----------


2024-04-20 02:26:08,397 - INFO - Loading pretrained weights from Hugging Face hub (timm/tf_efficientnetv2_m.in21k_ft_in1k)
2024-04-20 02:26:08,602 - INFO - [timm/tf_efficientnetv2_m.in21k_ft_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.
2024-04-20 02:26:08,717 - INFO - Epoch 1/10


Epoch 0/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:44<00:00,  9.68it/s]
100%|██████████| 108/108 [00:05<00:00, 21.05it/s]
2024-04-20 02:26:58,194 - INFO - Epoch 2/10


Loss: 0.0632, train Accuracy: 0.9931, train F1-Score: 0.9931
validation Accuracy: 0.9988, validation F1-Score: 0.9988
----------
Epoch 1/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:44<00:00,  9.66it/s]
100%|██████████| 108/108 [00:05<00:00, 20.92it/s]
2024-04-20 02:27:47,804 - INFO - Epoch 3/10


Loss: 0.0025, train Accuracy: 0.9994, train F1-Score: 0.9994
validation Accuracy: 0.9997, validation F1-Score: 0.9997
----------
Epoch 2/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:44<00:00,  9.60it/s]
100%|██████████| 108/108 [00:05<00:00, 21.14it/s]
2024-04-20 02:28:37,623 - INFO - Epoch 4/10


Loss: 0.0007, train Accuracy: 0.9999, train F1-Score: 0.9999
validation Accuracy: 1.0000, validation F1-Score: 1.0000
----------
Epoch 3/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:44<00:00,  9.62it/s]
100%|██████████| 108/108 [00:05<00:00, 20.86it/s]
2024-04-20 02:29:27,419 - INFO - Epoch 5/10


Loss: 0.0001, train Accuracy: 0.9999, train F1-Score: 0.9999
validation Accuracy: 1.0000, validation F1-Score: 1.0000
----------
Epoch 4/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:43<00:00,  9.78it/s]
100%|██████████| 108/108 [00:05<00:00, 21.01it/s]
2024-04-20 02:30:16,479 - INFO - Epoch 6/10


Loss: 0.0000, train Accuracy: 1.0000, train F1-Score: 1.0000
validation Accuracy: 1.0000, validation F1-Score: 1.0000
----------
Epoch 5/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:44<00:00,  9.69it/s]
100%|██████████| 108/108 [00:05<00:00, 20.89it/s]
2024-04-20 02:31:05,964 - INFO - Epoch 7/10
2024-04-20 02:31:05,965 - INFO - Early Stopping... epoch 7


Loss: 0.0000, train Accuracy: 1.0000, train F1-Score: 1.0000
validation Accuracy: 1.0000, validation F1-Score: 1.0000
----------
Epoch 6/9
----------
Early Stopping....

best epoch: 2, best f1 score: 1.0
Training complete in 16m 34s


2024-04-20 02:31:06,396 - INFO - Stratified K-Fold: 5 / 5



Stratified K-Fold: 4
----------


2024-04-20 02:31:06,843 - INFO - Loading pretrained weights from Hugging Face hub (timm/tf_efficientnetv2_m.in21k_ft_in1k)
2024-04-20 02:31:07,309 - INFO - [timm/tf_efficientnetv2_m.in21k_ft_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.
2024-04-20 02:31:07,459 - INFO - Epoch 1/10


Epoch 0/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:43<00:00,  9.76it/s]
100%|██████████| 108/108 [00:05<00:00, 20.91it/s]
2024-04-20 02:31:56,609 - INFO - Epoch 2/10


Loss: 0.0750, train Accuracy: 0.9932, train F1-Score: 0.9932
validation Accuracy: 1.0000, validation F1-Score: 1.0000
----------
Epoch 1/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:44<00:00,  9.61it/s]
100%|██████████| 108/108 [00:05<00:00, 21.15it/s]
2024-04-20 02:32:46,398 - INFO - Epoch 3/10


Loss: 0.0084, train Accuracy: 0.9984, train F1-Score: 0.9984
validation Accuracy: 1.0000, validation F1-Score: 1.0000
----------
Epoch 2/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:44<00:00,  9.62it/s]
100%|██████████| 108/108 [00:05<00:00, 20.98it/s]
2024-04-20 02:33:36,163 - INFO - Epoch 4/10


Loss: 0.0006, train Accuracy: 0.9999, train F1-Score: 0.9999
validation Accuracy: 1.0000, validation F1-Score: 1.0000
----------
Epoch 3/9
----------


Loss: 0.0000: 100%|██████████| 429/429 [00:44<00:00,  9.62it/s]
100%|██████████| 108/108 [00:05<00:00, 20.85it/s]
2024-04-20 02:34:25,990 - INFO - Epoch 5/10
2024-04-20 02:34:25,991 - INFO - Early Stopping... epoch 5


Loss: 0.0012, train Accuracy: 0.9998, train F1-Score: 0.9998
validation Accuracy: 1.0000, validation F1-Score: 1.0000
----------
Epoch 4/9
----------
Early Stopping....

best epoch: 0, best f1 score: 1.0
Training complete in 19m 54s


In [16]:
logging.info('----------------------------------------------')
logging.info('6. Finish Training')

2024-04-20 02:34:26,335 - INFO - ----------------------------------------------
2024-04-20 02:34:26,336 - INFO - 6. Finish Training


In [17]:
# 모델 불러오기
models = []

for i in range(5):
    with open(f'{root_path}/model/{model_name}/fold_{i}.pkl', 'rb') as f:
        model = pickle.load(f)
    models.append(model)

### classifier로 나눈 3개의 클래스 중 class 1에 해당하는 row들만 가져오기

In [18]:
# classifier 결과 csv 불러오기
classfier_result = pd.read_csv(f'{root_path}/crossvit_18_dagger_240.in1k_class.csv')


test_file = pd.read_csv(f'{root_path}/sample_submission.csv')

In [19]:
test_file_class_1 = test_file.loc[classfier_result['class'] == 1]
test_file_class_1

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,0
4,00901f504008d884.jpg,0
12,00f5784903a39fdd.jpg,0
27,021e7c8d9dc19021.jpg,0
33,02f79963274b3c41.jpg,0
...,...,...
3096,fc872debee38eb7d.jpg,0
3099,fcefb15c88bee6d2.jpg,0
3106,fd1049bb59b78775.jpg,0
3114,fd6c4a06eebc95ff.jpg,0


In [20]:
test_dataset = ImageDataset(
    df=test_file_class_1,
    path=test_path,
    transform=test_transform,
)

test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True
)

In [21]:
preds_list = []

for image, _ in tqdm(test_loader):
    image = image.to(device)

    preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list

100%|██████████| 13/13 [00:01<00:00,  9.56it/s]


In [22]:
# 0 --> 2
# 1 --> 16 으로 다시 맵핑
pred_df.loc[pred_df['target'] == 0, 'target'] = 2
pred_df.loc[pred_df['target'] == 1, 'target'] = 16

In [23]:
assert (test_file_class_1['ID'] == pred_df['ID']).all()

In [24]:
result_path = f"{root_path}/result"
if not os.path.exists(result_path):
    os.makedirs(result_path)
pred_df.to_csv(f"{result_path}/{model_name}_class_1.csv", index=False)
pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
4,00901f504008d884.jpg,2
12,00f5784903a39fdd.jpg,16
27,021e7c8d9dc19021.jpg,16
33,02f79963274b3c41.jpg,2
