# 작동 방법
1. .bin으로 된 file을 upload한다.
2. 사용했던 model명과 bin의 path를 change에 기록한다
3. check으로 된 부분을 알잘딱깔센하게 수정하거나 맞춘다. (그냥 진행해도 무관)
4. 전부 다 돌려본다!
    - 모델에 따라 size나 normalization 때문에 에러가 날 수 있는데, 그건 augmentation 부분을 참고한다. 
5. output에 train_##.csv, submission_##.csv가 output에 나온걸 확인한다. (없으면 refresh 버튼 클릭)
6. output에 있는 file을 download한다. 이제 concat2에서 만나자


In [24]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import torch
import os
from pathlib import Path

path = Path('/kaggle/input/isic-2024-challenge')


## Data Check

In [25]:
train = pd.read_csv(path / 'train-metadata.csv', low_memory=False)
test = pd.read_csv(path/'test-metadata.csv', low_memory=False)
print(f'train shape : {train.shape}')
print(f'test shape : {test.shape}')

train shape : (401059, 55)
test shape : (3, 44)


In [26]:
from PIL import Image
import os
import gc
import cv2
import math
import copy
import time
import random
from glob import glob

# torch importsa
import torch
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp
import torchvision

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict

import warnings
warnings.filterwarnings("ignore")

# For colored terminal text
from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

# 시드 고정 

SEED = 12

def set_seed(SEED):
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(SEED)
    
set_seed(SEED)


In [27]:
# check meta data  & add a col
## check train data
train.head()
train['target_eff_semin'] = 0 # check



In [28]:
# ## check test data
test.head()
test['target_eff_semin'] = 0 # check

## Create the Dataset

- transform.resize, normalize 필요. 이미지 사이즈 맞춰줘야 하는 문제가 발생.

In [29]:
from io import BytesIO

class ISICDataset(Dataset):
    def __init__(self, df, file_hdf, transforms=None):
        self.df = df
        self.file_hdf = h5py.File(file_hdf, mode="r")
        self.isic_ids = df['isic_id'].values
        self.targets = df['target_eff_semin'].values # check
        self.transforms = transforms
        
    def __len__(self):
        return len(self.isic_ids)
    
    def __getitem__(self, index):
        isic_id = self.isic_ids[index]
        img = np.array(Image.open(BytesIO(self.file_hdf[isic_id][()])))
        target = self.targets[index]
        
        if self.transforms:
            img = self.transforms(image=img)["image"]
            
        return {
            'image': img,
            'target': target
        }

## Augmentation (증강시 증강하세요!)

In [30]:
# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

data_transforms = {
    "valid": A.Compose([
        A.Resize(256, 256), # check size
        A.Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
                max_pixel_value=255.0, 
                p=1.0
            ),
        ToTensorV2()], p=1.)
}

## GemPooling

In [31]:
class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
        
    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
        
    def __repr__(self):
        return self.__class__.__name__ + \
                '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
                ', ' + 'eps=' + str(self.eps) + ')'

## Modeling

In [33]:
import timm

class SkinModel(nn.Module):
    def __init__(self, model_name, num_classes=1, pretrained=False, checkpoint_path=None):
        super(SkinModel, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, 
                                       checkpoint_path=checkpoint_path)
        in_features = self.model.classifier.in_features
        self.num_classes = num_classes
        self.model.classifier = nn.Identity()
        self.model.global_pool = nn.Identity()
        self.pooling = GeM()
        self.linear = nn.Linear(in_features, num_classes)
        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.Softmax()
        
    def forward(self, images):
        features = self.model(images)
        pooled_features=self.pooling(features).flatten(1)
        output = self.sigmoid(self.linear(pooled_features))
        return output

model = SkinModel('tf_efficientnetv2_b0', pretrained=False) # change
model.load_state_dict(torch.load('/kaggle/input/seminai_effi/pytorch/ver1/1/AUROC0.5172_Loss0.2831_epoch45.bin')) # change
model.to('cuda')

SkinModel(
  (model): EfficientNet(
    (conv_stem): Conv2dSame(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn1): BatchNormAct2d(
      32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): SiLU(inplace=True)
    )
    (blocks): Sequential(
      (0): Sequential(
        (0): ConvBnAct(
          (conv): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNormAct2d(
            16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
          )
          (aa): Identity()
          (drop_path): Identity()
        )
      )
      (1): Sequential(
        (0): EdgeResidual(
          (conv_exp): Conv2dSame(16, 64, kernel_size=(3, 3), stride=(2, 2), bias=False)
          (bn1): BatchNormAct2d(
            64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity

## Create Dataset for ViT

In [37]:
import h5py

train_hdf = path / 'train-image.hdf5'

train_dataset = ISICDataset(train, train_hdf, transforms=data_transforms["valid"])
train_loader = DataLoader(train_dataset, batch_size=64, 
                          num_workers=2, shuffle=False, pin_memory=True)

In [38]:
test_hdf = path / 'test-image.hdf5'

test_dataset = ISICDataset(test, test_hdf, transforms=data_transforms["valid"])
test_loader = DataLoader(test_dataset, batch_size=64, 
                          num_workers=2, shuffle=False, pin_memory=True)

## Inference using custom model (created by Semin)

In [39]:
# inference train data
preds = []
with torch.no_grad():
    bar = tqdm(enumerate(train_loader), total=len(train_loader)) 
    for step, data in bar:        
        images = data['image'].to('cuda', dtype=torch.float)        
        batch_size = images.size(0)
        outputs = model(images)
        preds.append(outputs.detach().cpu().numpy())
preds = np.concatenate(preds).flatten()
train['target_eff_semin'] = preds

100%|██████████| 6267/6267 [11:48<00:00,  8.85it/s]


In [40]:
# inference train data
preds = []
with torch.no_grad():
    bar = tqdm(enumerate(test_loader), total=len(test_loader)) 
    for step, data in bar:        
        images = data['image'].to('cuda', dtype=torch.float)        
        batch_size = images.size(0)
        outputs = model(images)
        preds.append(outputs.detach().cpu().numpy())
preds = np.concatenate(preds).flatten()
test['target_eff_semin'] = preds

100%|██████████| 1/1 [00:00<00:00,  3.76it/s]


In [41]:
train.to_csv('train_custom_eff_semin0.csv') # check
test.to_csv('submission_custom_eff_semin0.csv') # check