In [1]:
import pandas as pd

# 데이터셋 불러오기
test_df = pd.read_csv('data/test.csv')

In [2]:
import timm
import torch
import torch.nn as nn
import pytorch_lightning as pl
import torch.nn.functional as F
class EnsembleModel(pl.LightningModule):
    def __init__(self, num_classes):
        super().__init__()
        self.save_hyperparameters()
        # Load State_dict
        # ckpt1 = torch.load("result/epoch=50-step=9282.ckpt")
        # ckpt2 = torch.load('../result/swin_base_patch4_window7_224/epoch=29-step=19170.ckpt')
        # state_dict1 = {'.'.join(key.split('.')[2:]): val for key, val in ckpt1['state_dict'].items()}
        # state_dict2 = {'.'.join(key.split('.')[2:]): val for key, val in ckpt2['state_dict'].items()}
        # Make Branch
        self.clip_base = timm.create_model('vit_giant_patch14_clip_224', pretrained=False, num_classes=num_classes)
        self.swin_base = timm.create_model('swin_base_patch4_window7_224', pretrained=False, num_classes=num_classes)
        self.convnext_base = timm.create_model('convnextv2_huge', pretrained=False, num_classes=num_classes)
        # self.rex_base.load_state_dict(state_dict1)
        # self.swin_base.load_state_dict(state_dict1)
        # Remove FC layer
        clip_in_features, swin_in_feuatres, convnext_in_features = self.clip_base.head.in_features, self.swin_base.head.fc.in_features, self.convnext_base.head.fc.in_features
        self.clip_base.head = nn.Identity()
        self.swin_base.head.fc = nn.Identity()
        self.convnext_base.head.fc = nn.Identity()
        # Freeze base models
        for model in [self.clip_base, self.swin_base, self.convnext_base]:
            for param in model.parameters():
                param.requires_grad = False
        # Create branches
        clip_branch_output_dim, swin_branch_output_dim, convnext_branch_output_dim = 512, 384, 768
        self.clip_branch = self.create_branch(self.clip_base, clip_in_features, clip_branch_output_dim)
        self.swin_branch = self.create_branch(self.swin_base, swin_in_feuatres, swin_branch_output_dim)
        self.convnext_branch = self.create_branch(self.convnext_base, convnext_in_features, convnext_branch_output_dim)
        # Fully connected layers
        self.fc_layers = nn.Sequential(
            nn.Linear(clip_branch_output_dim+swin_branch_output_dim+convnext_branch_output_dim, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )
    def create_branch(self, base_model, result_features, target_features):
        return nn.Sequential(
            base_model,
            nn.Flatten(start_dim=1),
            nn.Linear(result_features, target_features),
            nn.ReLU(),
            nn.Dropout(0.5)
    )
    def forward(self, x):
        # Extract features from each branch
        clip_features = self.clip_branch(x)
        swin_features = self.swin_branch(x)
        convnext_feautres = self.convnext_branch(x)
        # Concatenate features
        combined_features = torch.cat((clip_features, swin_features, convnext_feautres), dim=1)
        output = self.fc_layers(combined_features)
        return output

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from torchvision import transforms

common_transforms = transforms.Compose([  # transforms.Compose로 감싸줘야 함
    transforms.Resize((224, 224)),  # 이미지를 224x224 크기로 리사이즈
    transforms.ToTensor(),  # 이미지를 PyTorch 텐서로 변환
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225]
    )  # 정규화
])

In [4]:
from torch.utils.data import Dataset,DataLoader
from typing import Callable, Union, Tuple
import cv2
import os
from PIL import Image

class CustomDataset(Dataset):
    def __init__(self, root_dir, info_df, transform, is_inference):
        self.root_dir = root_dir
        self.transform = transform
        self.is_inference = is_inference
        self.info_df = info_df
        self.image_paths = self.info_df['image_path'].tolist()
        
        if not self.is_inference:
            self.targets = self.info_df['target'].tolist()

    def __len__(self) -> int:
        return len(self.image_paths)

    def __getitem__(self, index: int) -> Union[Tuple[torch.Tensor, int], torch.Tensor]:
        img_path = os.path.join(self.root_dir, self.image_paths[index])
        image = cv2.imread(img_path, cv2.IMREAD_COLOR)  # 이미지를 numpy로 로드
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # BGR에서 RGB로 변환
        
        # numpy 배열을 PIL 이미지로 변환
        image = Image.fromarray(image)

        # 변환 적용
        image = self.transform(image)  # 'image=image' 대신 'image'만 전달

        if self.is_inference:
            return image
        else:
            target = self.targets[index]
            return image, target, index

## Batch 조정! 

In [6]:
test_dataset = CustomDataset('data/test', test_df, common_transforms, True)
test_dataloader = DataLoader(test_dataset, batch_size=64, num_workers=8, shuffle=False)

In [7]:
import numpy as np

## CKPT path list 추가!

In [10]:
ckpt_paths = ["result/clip_swin_transformer_7_224_convnextv2_huge-96-0.0002-AdamW-O-ensemble_with_3_models_09-25_0/fold0/epoch=52-step=1696.ckpt",
              "result/clip_swin_transformer_7_224_convnextv2_huge-96-0.0002-AdamW-O-ensemble_with_3_models_09-25_0/fold1/epoch=44-step=1440.ckpt",
              "result/clip_swin_transformer_7_224_convnextv2_huge-96-0.0002-AdamW-O-ensemble_with_3_models_09-25_0/fold2/epoch=49-step=1600.ckpt",
              "result/clip_swin_transformer_7_224_convnextv2_huge-96-0.0002-AdamW-O-ensemble_with_3_models_09-25_0/fold3/epoch=34-step=1120.ckpt"] # path 추가! 

In [11]:
import torch
from tqdm import tqdm
import gc

# device = 'cpu'
# 모델을 평가 모드로 설정
# model.eval()

# GPU가 있으면 GPU로 모델 이동
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = 'cpu'
# model = model.to(device)

# 추론 과정
with torch.no_grad():  # 추론 시에는 gradient 계산을 하지 않음
    
    test_predictions = np.zeros((len(test_df), 500))
    # model.cuda()
    # device = 'cpu'
    for ckpt_path in ckpt_paths:
        print(ckpt_path)
        
        model = EnsembleModel(500)
        ckpt = torch.load(ckpt_path)
        
        state_dict = {'.'.join(key.split('.')[1:]): val for key, val in ckpt['state_dict'].items()}
        model.load_state_dict(state_dict)

        model = model.to(device)

        ckpt = None
        state_dict = None
        gc.collect()  # 가비지 컬렉터 실행
        torch.cuda.empty_cache()  # GPU 캐시 비우기

        logit_list = []

        for batch in tqdm(test_dataloader):
            images = batch.to(device)

            predictions = model(images)

            # for batch in predictions:
            logit_list.extend(predictions.cpu().numpy())
        logit_list = np.vstack(logit_list)
        test_predictions += F.softmax(torch.tensor(logit_list), dim=1).numpy()
        # test_logits.append(logit_list)

    test_predictions /= len(ckpt_paths)


./epoch=52-step=1696.ckpt


RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [21]:
test_df['target'] = test_predictions.argmax(axis=1)


array([328, 414, 493,  17], dtype=int64)

In [24]:
test_df

In [23]:
test_df.to_csv('output.csv', index=False)

Unnamed: 0,image_path,target
0,0.JPEG,328
1,1.JPEG,414
2,2.JPEG,493
3,3.JPEG,17
