In [36]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import torch
import os
from pathlib import Path

path = Path('/USER/data')

In [37]:
train = pd.read_csv(path / 'train-metadata.csv', low_memory=False)
test = pd.read_csv(path/'test-metadata.csv', low_memory=False)
print(f'train shape : {train.shape}')
print(f'test shape : {test.shape}')

train shape : (401059, 55)
test shape : (3, 44)


In [38]:
from PIL import Image
import os
import gc
import cv2
import math
import copy
import time
import random
from glob import glob

# torch importsa
import torch
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp
import torchvision

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict

import warnings
warnings.filterwarnings("ignore")

# For colored terminal text
from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

# 시드 고정 

SEED = 22

def set_seed(SEED):
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(SEED)
    
set_seed(SEED)


In [39]:
# check meta data  & add a col
## check train data
train.head()

Unnamed: 0,isic_id,target,patient_id,age_approx,sex,anatom_site_general,clin_size_long_diam_mm,image_type,tbp_tile_type,tbp_lv_A,...,lesion_id,iddx_full,iddx_1,iddx_2,iddx_3,iddx_4,iddx_5,mel_mitotic_index,mel_thick_mm,tbp_lv_dnn_lesion_confidence
0,ISIC_0015670,0,IP_1235828,60.0,male,lower extremity,3.04,TBP tile: close-up,3D: white,20.244422,...,,Benign,Benign,,,,,,,97.517282
1,ISIC_0015845,0,IP_8170065,60.0,male,head/neck,1.1,TBP tile: close-up,3D: white,31.71257,...,IL_6727506,Benign,Benign,,,,,,,3.141455
2,ISIC_0015864,0,IP_6724798,60.0,male,posterior torso,3.4,TBP tile: close-up,3D: XP,22.57583,...,,Benign,Benign,,,,,,,99.80404
3,ISIC_0015902,0,IP_4111386,65.0,male,anterior torso,3.22,TBP tile: close-up,3D: XP,14.242329,...,,Benign,Benign,,,,,,,99.989998
4,ISIC_0024200,0,IP_8313778,55.0,male,anterior torso,2.73,TBP tile: close-up,3D: white,24.72552,...,,Benign,Benign,,,,,,,70.44251


In [40]:
# ## check test data
test.head()
test['target'] = 0 # dummy

In [41]:
from io import BytesIO

class ISICDataset(Dataset):
    def __init__(self, df, file_hdf, transforms=None):
        self.df = df
        self.file_hdf = h5py.File(file_hdf, mode="r")
        self.isic_ids = df['isic_id'].values
        self.targets = df['target'].values # check
        self.transforms = transforms
        
    def __len__(self):
        return len(self.isic_ids)
    
    def __getitem__(self, index):
        isic_id = self.isic_ids[index]
        img = np.array(Image.open(BytesIO(self.file_hdf[isic_id][()])))
        target = self.targets[index]
        
        if self.transforms:
            img = self.transforms(image=img)["image"]
            
        return {
            'image': img,
            'target': target
        }

In [42]:
# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

data_transforms = {
    "valid": A.Compose([
        A.Resize(256, 256), # check size
        A.Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
                max_pixel_value=255.0, 
                p=1.0
            ),
        ToTensorV2()], p=1.)
}

In [43]:
class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
        
    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
        
    def __repr__(self):
        return self.__class__.__name__ + \
                '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
                ', ' + 'eps=' + str(self.eps) + ')'
    
'''
Vit는 1d로(2d 안씀. )
'''    
class ViTGeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(ViTGeM,self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
        
    def gem(self, x, p=3, eps=1e-6):
        return F.adaptive_avg_pool1d(x.clamp(min=eps).pow(p), 1).pow(1./p)
        
    def __repr__(self):
        return self.__class__.__name__ + '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + ', ' + 'eps=' + str(self.eps) + ')'

In [53]:
import timm
import torch.ao.quantization as quant

class SkinModel(nn.Module):
    def __init__(self, model_name, num_classes=1, pretrained=True, checkpoint_path=None):
        super(SkinModel, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, 
                                       checkpoint_path=checkpoint_path)
        in_features = self.model.classifier.in_features
        self.num_classes = num_classes
        self.model.classifier = nn.Identity()
        self.model.global_pool = nn.Identity()
        self.pooling = GeM()
        self.linear = nn.Linear(in_features, num_classes)

        
    def forward(self, images):
        output = self.model(images)
        output=self.pooling(output).flatten(1)
        output = self.linear(output)
        return output

model = SkinModel("tf_efficientnetv2_m.in21k", pretrained=False)
model.load_state_dict(torch.load('/USER/semin/auc0.9838570894126449_loss4.820306866257279_epoch30.bin'), strict=False) # change
model.to('cuda')

SkinModel(
  (model): EfficientNet(
    (conv_stem): Conv2dSame(3, 24, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn1): BatchNormAct2d(
      24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): SiLU(inplace=True)
    )
    (blocks): Sequential(
      (0): Sequential(
        (0): ConvBnAct(
          (conv): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNormAct2d(
            24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
          )
          (drop_path): Identity()
        )
        (1): ConvBnAct(
          (conv): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNormAct2d(
            24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
          )


In [55]:
!pip install h5py

Collecting h5py
  Downloading h5py-3.11.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.5 kB)
Downloading h5py-3.11.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.3/5.3 MB[0m [31m34.4 MB/s[0m eta [36m0:00:00[0m
    jupyter-core!=~5.0,>=4.12
                ^[0m[33m
[0mInstalling collected packages: h5py
Successfully installed h5py-3.11.0
[0m

In [56]:
import h5py

train_hdf = path / 'train-image.hdf5'

train_dataset = ISICDataset(train, train_hdf, transforms=data_transforms["valid"])
train_loader = DataLoader(train_dataset, batch_size=64, 
                          num_workers=2, shuffle=False, pin_memory=True)

In [57]:
test_hdf = path / 'test-image.hdf5'

test_dataset = ISICDataset(test, test_hdf, transforms=data_transforms["valid"])
test_loader = DataLoader(test_dataset, batch_size=64, 
                          num_workers=2, shuffle=False, pin_memory=True)

In [58]:
train['target_eff']=0 # eff

In [None]:
# t합치기1
preds = []
with torch.no_grad():
    bar = tqdm(enumerate(train_loader), total=len(train_loader)) 
    for step, data in bar:        
        images = data['image'].to('cuda', dtype=torch.float)        
        batch_size = images.size(0)
        outputs = torch.sigmoid(model(images))
        preds.append(outputs.detach().cpu().numpy())
preds1 = np.concatenate(preds).flatten()
train['target_eff'] = preds1

  2%|▏         | 154/6267 [01:08<45:10,  2.26it/s]