In [1]:
import os
from typing import Tuple, Any, Callable, List, Optional, Union

import cv2
import timm
import torch
import numpy as np
import pandas as pd
import albumentations as A
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
from tqdm.auto import tqdm
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from albumentations.pytorch import ToTensorV2
from torchvision.transforms import AutoAugment, AutoAugmentPolicy

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class CustomDataset(Dataset):
    def __init__(self, root_dir: str, info_df: pd.DataFrame, transform: Callable, is_inference: bool = False):
        self.root_dir = root_dir
        self.transform = transform
        self.is_inference = is_inference
        self.image_paths = info_df['image_path'].tolist()

        if not self.is_inference:
            self.targets = info_df['target'].tolist()

    def __len__(self) -> int:
        return len(self.image_paths)

    def __getitem__(self, index: int) -> Union[Tuple[torch.Tensor, int], torch.Tensor]:
        img_path = os.path.join(self.root_dir, self.image_paths[index])
        image = cv2.imread(img_path, cv2.IMREAD_COLOR)
        # Convert to grayscale
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)  # keep 3 channels for pre-trained models
        
        # 여기에서 PIL로 변환하는 대신 transform에서 처리하도록 넘깁니다.
        image = self.transform(image)  # 이미 numpy로 되어 있기 때문에 변환을 바로 적용

        if self.is_inference:
            return image
        else:
            target = self.targets[index]
            return image, target


In [3]:
from PIL import Image
from torchvision import transforms
from torchvision.transforms import AutoAugment, AutoAugmentPolicy

class TorchvisionTransform:
    def __init__(self, is_train: bool = True):
        if is_train:
            # 훈련 데이터에 대해 AutoAugment와 공통 변환을 적용
            self.transform = transforms.Compose([
                AutoAugment(policy=AutoAugmentPolicy.IMAGENET),  # AutoAugment 적용
                transforms.Resize((224, 224)),  # 이미지를 224x224 크기로 리사이즈
                transforms.ToTensor(),  # 이미지를 PyTorch 텐서로 변환
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 정규화
            ])
        else:
            # 검증/테스트 데이터에 대해 공통 변환만 적용
            self.transform = transforms.Compose([
                transforms.Resize((224, 224)),  # 이미지를 224x224 크기로 리사이즈
                transforms.ToTensor(),  # 이미지를 PyTorch 텐서로 변환
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 정규화
            ])

    def __call__(self, image: np.ndarray) -> torch.Tensor:
        # numpy 배열을 PIL 이미지로 변환
        image = Image.fromarray(image)
        
        # 설정된 변환을 적용
        transformed = self.transform(image)
        
        return transformed


In [4]:
class TransformSelector:
    """
    이미지 변환 라이브러리를 선택하기 위한 클래스.
    """
    def __init__(self, transform_type: str):

        # 지원하는 변환 라이브러리인지 확인
        if transform_type in ["torchvision", "albumentations"]:
            self.transform_type = transform_type
        
        else:
            raise ValueError("Unknown transformation library specified.")

    def get_transform(self, is_train: bool):
        
        # 선택된 라이브러리에 따라 적절한 변환 객체를 생성
        if self.transform_type == 'torchvision':
            transform = TorchvisionTransform(is_train=is_train)
        
        elif self.transform_type == 'albumentations':
            transform = AlbumentationsTransform(is_train=is_train)
        
        return transform

In [5]:
# EfficientNetB2 using Timm
class TimmModel(nn.Module):
    def __init__(self, model_name: str, num_classes: int, pretrained: bool):
        super(TimmModel, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, num_classes=num_classes)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.model(x)

In [6]:
# ModelSelector modified to use EfficientNetB2
class ModelSelector:
    def __init__(self, model_type: str, num_classes: int, **kwargs):
        if model_type == 'timm':
            self.model = TimmModel(model_name='efficientnet_b2', num_classes=num_classes, **kwargs)
        else:
            raise ValueError("Unknown model type specified.")

    def get_model(self) -> nn.Module:
        return self.model

In [7]:
class Loss(nn.Module):
    """
    모델의 손실함수를 계산하는 클래스.
    """
    def __init__(self):
        super(Loss, self).__init__()
        self.loss_fn = nn.CrossEntropyLoss()

    def forward(
        self, 
        outputs: torch.Tensor, 
        targets: torch.Tensor
    ) -> torch.Tensor:
    
        return self.loss_fn(outputs, targets)

In [8]:
class Trainer:
    def __init__(
        self, 
        model: nn.Module, 
        device: torch.device, 
        train_loader: DataLoader, 
        val_loader: DataLoader, 
        optimizer: optim.Optimizer,
        scheduler: optim.lr_scheduler,
        loss_fn: torch.nn.modules.loss._Loss, 
        epochs: int,
        result_path: str
    ):
        # 클래스 초기화: 모델, 디바이스, 데이터 로더 등 설정
        self.model = model  # 훈련할 모델
        self.device = device  # 연산을 수행할 디바이스 (CPU or GPU)
        self.train_loader = train_loader  # 훈련 데이터 로더
        self.val_loader = val_loader  # 검증 데이터 로더
        self.optimizer = optimizer  # 최적화 알고리즘
        self.scheduler = scheduler # 학습률 스케줄러
        self.loss_fn = loss_fn  # 손실 함수
        self.epochs = epochs  # 총 훈련 에폭 수
        self.result_path = result_path  # 모델 저장 경로
        self.best_models = [] # 가장 좋은 상위 3개 모델의 정보를 저장할 리스트
        self.lowest_loss = float('inf') # 가장 낮은 Loss를 저장할 변수

    def save_model(self, epoch, loss):
        # 모델 저장 경로 설정
        os.makedirs(self.result_path, exist_ok=True)

        # 현재 에폭 모델 저장
        current_model_path = os.path.join(self.result_path, f'model_epoch_{epoch}_loss_{loss:.4f}.pt')
        torch.save(self.model.state_dict(), current_model_path)

        # 최상위 3개 모델 관리
        self.best_models.append((loss, epoch, current_model_path))
        self.best_models.sort()
        if len(self.best_models) > 3:
            _, _, path_to_remove = self.best_models.pop(-1)  # 가장 높은 손실 모델 삭제
            if os.path.exists(path_to_remove):
                os.remove(path_to_remove)

        # 가장 낮은 손실의 모델 저장
        if loss < self.lowest_loss:
            self.lowest_loss = loss
            best_model_path = os.path.join(self.result_path, 'best_model.pt')
            torch.save(self.model.state_dict(), best_model_path)
            print(f"Save {epoch}epoch result. Loss = {loss:.4f}")

    def train_epoch(self) -> float:
        # 한 에폭 동안의 훈련을 진행
        self.model.train()
        
        total_loss = 0.0
        progress_bar = tqdm(self.train_loader, desc="Training", leave=False)
        
        for images, targets in progress_bar:
            images, targets = images.to(self.device), targets.to(self.device)
            self.optimizer.zero_grad()
            outputs = self.model(images)
            loss = self.loss_fn(outputs, targets)
            loss.backward()
            self.optimizer.step()
            self.scheduler.step()
            total_loss += loss.item()
            progress_bar.set_postfix(loss=loss.item())
        
        return total_loss / len(self.train_loader)

    def validate(self) -> float:
        # 모델의 검증을 진행
        self.model.eval()
        
        total_loss = 0.0
        progress_bar = tqdm(self.val_loader, desc="Validating", leave=False)
        
        with torch.no_grad():
            for images, targets in progress_bar:
                images, targets = images.to(self.device), targets.to(self.device)
                outputs = self.model(images)    
                loss = self.loss_fn(outputs, targets)
                total_loss += loss.item()
                progress_bar.set_postfix(loss=loss.item())
        
        return total_loss / len(self.val_loader)

    def train(self) -> None:
        # 전체 훈련 과정을 관리
        for epoch in range(self.epochs):
            print(f"Epoch {epoch+1}/{self.epochs}")
            
            train_loss = self.train_epoch()
            val_loss = self.validate()
            print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}\n")

            self.save_model(epoch, val_loss)
            self.scheduler.step()

In [9]:
# 학습에 사용할 장비를 선택.
# torch라이브러리에서 gpu를 인식할 경우, cuda로 설정.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [10]:
# 학습 데이터의 경로와 정보를 가진 파일의 경로를 설정.
traindata_dir = "./data/train"
traindata_info_file = "./data/train.csv"
save_result_path = "./train_result"

In [11]:
# 학습 데이터의 class, image path, target에 대한 정보가 들어있는 csv파일을 읽기.
train_info = pd.read_csv(traindata_info_file)

# 총 class의 수를 측정.
num_classes = len(train_info['target'].unique())

# 각 class별로 8:2의 비율이 되도록 학습과 검증 데이터를 분리.
train_df, val_df = train_test_split(
    train_info, 
    test_size=0.2,
    stratify=train_info['target']
)

In [12]:
# 학습에 사용할 Transform을 선언.
transform_selector = TransformSelector(
    transform_type = "torchvision" 
)
train_transform = transform_selector.get_transform(is_train=True)
val_transform = transform_selector.get_transform(is_train=False)

# 학습에 사용할 Dataset을 선언.
train_dataset = CustomDataset(
    root_dir=traindata_dir,
    info_df=train_df,
    transform=train_transform
)
val_dataset = CustomDataset(
    root_dir=traindata_dir,
    info_df=val_df,
    transform=val_transform
)

# 학습에 사용할 DataLoader를 선언.
train_loader = DataLoader(
    train_dataset, 
    batch_size=64, 
    shuffle=True
)
val_loader = DataLoader(
    val_dataset, 
    batch_size=64, 
    shuffle=False
)

In [13]:
# 학습에 사용할 Model을 선언.
model_selector = ModelSelector(
    model_type='timm', 
    num_classes=num_classes,
    pretrained=True
)
model = model_selector.get_model()

# 선언된 모델을 학습에 사용할 장비로 셋팅.
model.to(device)

TimmModel(
  (model): EfficientNet(
    (conv_stem): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNormAct2d(
      32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): SiLU(inplace=True)
    )
    (blocks): Sequential(
      (0): Sequential(
        (0): DepthwiseSeparableConv(
          (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (bn1): BatchNormAct2d(
            32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
          )
          (se): SqueezeExcite(
            (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (act1): SiLU(inplace=True)
            (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (gate): Sigmoid()
          )
          (conv_pw): Conv2d(32, 16, kernel_size=(1, 1)

In [14]:
# 학습에 사용할 optimizer를 선언하고, learning rate를 지정
optimizer = optim.Adam(
    model.parameters(), 
    lr=0.001
)

In [15]:
# 스케줄러 초기화
scheduler_step_size = 30  # 매 30step마다 학습률 감소
scheduler_gamma = 0.1  # 학습률을 현재의 10%로 감소

# 한 epoch당 step 수 계산
steps_per_epoch = len(train_loader)

# 2 epoch마다 학습률을 감소시키는 스케줄러 선언
epochs_per_lr_decay = 2
scheduler_step_size = steps_per_epoch * epochs_per_lr_decay

scheduler = optim.lr_scheduler.StepLR(
    optimizer, 
    step_size=scheduler_step_size, 
    gamma=scheduler_gamma
)

In [16]:
# 학습에 사용할 Loss를 선언.
loss_fn = Loss()

In [17]:
# 앞서 선언한 필요 class와 변수들을 조합해, 학습을 진행할 Trainer를 선언. 
trainer = Trainer(
    model=model, 
    device=device, 
    train_loader=train_loader,
    val_loader=val_loader, 
    optimizer=optimizer,
    scheduler=scheduler,
    loss_fn=loss_fn, 
    epochs=30,
    result_path=save_result_path
)

In [18]:
# 모델 학습.
trainer.train()

Epoch 1/30


                                                                      

Epoch 1, Train Loss: 3.9047, Validation Loss: 2.1926

Save 0epoch result. Loss = 2.1926
Epoch 2/30


                                                                      

Epoch 2, Train Loss: 1.5989, Validation Loss: 1.4770

Save 1epoch result. Loss = 1.4770
Epoch 3/30


                                                                       

Epoch 3, Train Loss: 0.6205, Validation Loss: 1.0365

Save 2epoch result. Loss = 1.0365
Epoch 4/30


                                                                       

Epoch 4, Train Loss: 0.3905, Validation Loss: 0.9590

Save 3epoch result. Loss = 0.9590
Epoch 5/30


                                                                       

Epoch 5, Train Loss: 0.2997, Validation Loss: 0.9484

Save 4epoch result. Loss = 0.9484
Epoch 6/30


                                                                       

Epoch 6, Train Loss: 0.2857, Validation Loss: 0.9448

Save 5epoch result. Loss = 0.9448
Epoch 7/30


                                                                        

Epoch 7, Train Loss: 0.2792, Validation Loss: 0.9435

Save 6epoch result. Loss = 0.9435
Epoch 8/30


                                                                       

Epoch 8, Train Loss: 0.2743, Validation Loss: 0.9398

Save 7epoch result. Loss = 0.9398
Epoch 9/30


                                                                       

Epoch 9, Train Loss: 0.2770, Validation Loss: 0.9416

Epoch 10/30


                                                                       

Epoch 10, Train Loss: 0.2696, Validation Loss: 0.9446

Epoch 11/30


                                                                       

Epoch 11, Train Loss: 0.2759, Validation Loss: 0.9399

Epoch 12/30


                                                                       

Epoch 12, Train Loss: 0.2764, Validation Loss: 0.9401

Epoch 13/30


                                                                       

Epoch 13, Train Loss: 0.2740, Validation Loss: 0.9417

Epoch 14/30


                                                                       

Epoch 14, Train Loss: 0.2783, Validation Loss: 0.9433

Epoch 15/30


                                                                       

Epoch 15, Train Loss: 0.2787, Validation Loss: 0.9406

Epoch 16/30


                                                                       

Epoch 16, Train Loss: 0.2730, Validation Loss: 0.9391

Save 15epoch result. Loss = 0.9391
Epoch 17/30


                                                                        

Epoch 17, Train Loss: 0.2767, Validation Loss: 0.9439

Epoch 18/30


                                                                       

Epoch 18, Train Loss: 0.2748, Validation Loss: 0.9392

Epoch 19/30


                                                                       

Epoch 19, Train Loss: 0.2702, Validation Loss: 0.9410

Epoch 20/30


                                                                       

Epoch 20, Train Loss: 0.2801, Validation Loss: 0.9431

Epoch 21/30


                                                                       

Epoch 21, Train Loss: 0.2806, Validation Loss: 0.9409

Epoch 22/30


                                                                       

Epoch 22, Train Loss: 0.2767, Validation Loss: 0.9415

Epoch 23/30


                                                                       

Epoch 23, Train Loss: 0.2748, Validation Loss: 0.9421

Epoch 24/30


                                                                       

Epoch 24, Train Loss: 0.2725, Validation Loss: 0.9434

Epoch 25/30


                                                                       

Epoch 25, Train Loss: 0.2765, Validation Loss: 0.9432

Epoch 26/30


                                                                       

Epoch 26, Train Loss: 0.2742, Validation Loss: 0.9443

Epoch 27/30


                                                                       

Epoch 27, Train Loss: 0.2802, Validation Loss: 0.9418

Epoch 28/30


                                                                       

Epoch 28, Train Loss: 0.2737, Validation Loss: 0.9418

Epoch 29/30


                                                                       

Epoch 29, Train Loss: 0.2722, Validation Loss: 0.9418

Epoch 30/30


                                                                       

Epoch 30, Train Loss: 0.2733, Validation Loss: 0.9451





In [19]:
# 모델 추론을 위한 함수
def inference(
    model: nn.Module, 
    device: torch.device, 
    test_loader: DataLoader
):
    # 모델을 평가 모드로 설정
    model.to(device)
    model.eval()
    
    predictions = []
    with torch.no_grad():  # Gradient 계산을 비활성화
        for images in tqdm(test_loader):
            # 데이터를 같은 장치로 이동
            images = images.to(device)
            
            # 모델을 통해 예측 수행
            logits = model(images)
            logits = F.softmax(logits, dim=1)
            preds = logits.argmax(dim=1)
            
            # 예측 결과 저장
            predictions.extend(preds.cpu().detach().numpy())  # 결과를 CPU로 옮기고 리스트에 추가
    
    return predictions

In [20]:
# 추론 데이터의 경로와 정보를 가진 파일의 경로를 설정.
testdata_dir = "./data/test"
testdata_info_file = "./data/test.csv"
save_result_path = "./train_result"

In [21]:
# 추론 데이터의 class, image path, target에 대한 정보가 들어있는 csv파일을 읽기.
test_info = pd.read_csv(testdata_info_file)

# 총 class 수.
num_classes = 500

In [22]:
# 추론에 사용할 Transform을 선언.
transform_selector = TransformSelector(
    transform_type = "torchvision"
)
test_transform = transform_selector.get_transform(is_train=False)

# 추론에 사용할 Dataset을 선언.
test_dataset = CustomDataset(
    root_dir=testdata_dir,
    info_df=test_info,
    transform=test_transform,
    is_inference=True
)

# 추론에 사용할 DataLoader를 선언.
test_loader = DataLoader(
    test_dataset, 
    batch_size=64, 
    shuffle=False,
    drop_last=False
)

In [23]:
# 추론에 사용할 장비를 선택.
# torch라이브러리에서 gpu를 인식할 경우, cuda로 설정.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 추론에 사용할 Model을 선언.
model_selector = ModelSelector(
    model_type='timm', 
    num_classes=num_classes,
    pretrained=False
)
model = model_selector.get_model()


In [24]:
# best epoch 모델을 불러오기.
model.load_state_dict(
    torch.load(
        os.path.join(save_result_path, "best_model.pt"),
        map_location='cpu'
    )
)

  torch.load(


<All keys matched successfully>

In [25]:
# predictions를 CSV에 저장할 때 형식을 맞춰서 저장
# 테스트 함수 호출
predictions = inference(
    model=model, 
    device=device, 
    test_loader=test_loader
)

100%|██████████| 157/157 [01:42<00:00,  1.54it/s]


In [26]:
# 모든 클래스에 대한 예측 결과를 하나의 문자열로 합침
test_info['target'] = predictions
test_info = test_info.reset_index().rename(columns={"index": "ID"})
test_info

Unnamed: 0,ID,image_path,target
0,0,0.JPEG,246
1,1,1.JPEG,414
2,2,2.JPEG,493
3,3,3.JPEG,17
4,4,4.JPEG,388
...,...,...,...
10009,10009,10009.JPEG,235
10010,10010,10010.JPEG,86
10011,10011,10011.JPEG,244
10012,10012,10012.JPEG,351


In [27]:
# DataFrame 저장
test_info.to_csv("EfficientB2_Autoaug_grayscale.csv", index=False)

In [None]:
# GAN으로 train 데이터 셋 증강시켜서 일단 원본 train 데이터 셋만큼만 증강시켜서 총 학습데이터셋은 원본의 2배로 만들기
# gan 모델 학습 시키고 그다음에 증강해야 함..
# gan 모델 학습을 원본 스케치 데이터셋으로 학습하기~~