# ISIC 2024 - Skin Cancer Detection: Pytorch Model w/ Image + Metadata Inference

Inspired by [motono0223](https://www.kaggle.com/code/motono0223/isic-pytorch-baseline-pseudo-labeling-eva02#Create-Model) and [Jiadi Wang](https://www.kaggle.com/code/hugowjd/isic-2024-pytorch-inference-effnet-b3#Create-Model)

Idea:
* Feature engineer and append snippet of data to images
* Data augment images for diversity
* Inference with multiple folds

Notebooks:
* [Training notebook](https://www.kaggle.com/code/qiaoyingzhang/isic-2024-pytorch-training-baseline-swin/notebook?scriptVersionId=191356799) -- version 16
* Inference notebook (current)

Best models (manually selected from training notebook) are uploaded in [dataset](https://www.kaggle.com/datasets/qiaoyingzhang/isic-2024-swin-pytorch-best-models)
* Fold 0 best model from [version 9](https://www.kaggle.com/code/qiaoyingzhang/isic-2024-pytorch-training-baseline-swin?scriptVersionId=191338975) (AUROC0.5181_Loss0.2202_epoch41_fold0.bin)
* Fold 1 best model from [version 8](https://www.kaggle.com/code/qiaoyingzhang/isic-2024-pytorch-training-baseline-swin?scriptVersionId=191338963) (AUROC0.5184_Loss0.3117_epoch31_fold1.bin)
* Fold 2 best model from [version 10](https://www.kaggle.com/code/qiaoyingzhang/isic-2024-pytorch-training-baseline-swin?scriptVersionId=191342208) (AUROC0.5173_Loss0.2800_epoch41_fold2.bin)
* Fold 3 best model from [version 11](https://www.kaggle.com/code/qiaoyingzhang/isic-2024-pytorch-training-baseline-swin?scriptVersionId=191342225) (AUROC0.5175_Loss0.3176_epoch6_fold3.bin)
* Fold 4 best model from [version 12](https://www.kaggle.com/code/qiaoyingzhang/isic-2024-pytorch-training-baseline-swin?scriptVersionId=191346483) (AUROC0.5177_Loss0.2280_epoch49_fold4.bin)

# Import Libraries

In [None]:
import os
import gc
import cv2
import copy
import time
import random
import glob
import h5py
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from PIL import Image
from io import BytesIO

# PyTorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict

# Sklearn Imports
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import StratifiedGroupKFold

# For Image Models
import timm

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# For colored terminal text
from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

# Initialize Environment & Configuration

In [None]:
CONFIG = {
    "seed": 2024,
    "img_size": 224,
    "model_name": 'swin_large_patch4_window7_224',
    "valid_batch_size": 64,
    "multiple_folds": True,
    "selected": True,
    "n_fold": 5,
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
}

# Set seed for reproducibility
def seed_everything(seed):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)
    
seed_everything(CONFIG['seed'])

In [None]:
ROOT_DIR = "/kaggle/input/isic-2024-challenge"
TEST_CSV = f'{ROOT_DIR}/test-metadata.csv'
TEST_HDF = f'{ROOT_DIR}/test-image.hdf5'
SAMPLE = f'{ROOT_DIR}/sample_submission.csv'

# Data Setup (Psuedo Labeling)

In [None]:
df = pd.read_csv(TEST_CSV)
df['target'] = 0 # dummy

In [None]:
# Feature engineering
df['lesion_size_ratio'] = df['tbp_lv_minorAxisMM'] / df['clin_size_long_diam_mm']
df['color_uniformity'] = df['tbp_lv_color_std_mean'] / df['tbp_lv_radial_color_std_max']
df['3d_position_distance'] = np.sqrt(df['tbp_lv_x'] ** 2 + df['tbp_lv_y'] ** 2 + df['tbp_lv_z'] ** 2) 
# List of numerical features to be scaled
num_feat = ['age_approx', 'lesion_size_ratio', 'color_uniformity', 
            'tbp_lv_Lext', 'tbp_lv_eccentricity', '3d_position_distance']

# Replace infinite values with NaN
df[num_feat] = df[num_feat].replace([np.inf, -np.inf], np.nan)

# Handle missing values (if any) by filling them with the mean of the column
df[num_feat] = df[num_feat].fillna(df[num_feat].mean())

# Scale numerical features
scaler = StandardScaler()
df[num_feat] = scaler.fit_transform(df[num_feat])

print("Feature engineering and scaling complete.")

In [None]:
# Sanity check
df

In [None]:
# Sample submission
df_sub = pd.read_csv(SAMPLE)

# Dataset Manipulation

In [None]:
# Dataset
class ISICDataset(Dataset):
    def __init__(self, dataframe, feat, file_path, transforms=None):
        self.df = dataframe
        self.file_path = h5py.File(file_path, mode="r")
        self.transforms = transforms
        self.metadata = self.df[feat].values

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        target = row['target']
        isic_id = row['isic_id']
        
        metadata = self.metadata[idx]

        image = np.array(Image.open(BytesIO(self.file_path[isic_id][()])))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transforms:
            image = self.transforms(image=image)["image"]
        
        return {'image': image, 'target': target, 'metadata': metadata}

# Modeling

In [None]:
class Swish(torch.autograd.Function):
    @staticmethod
    def forward(ctx, i):
        result = i * nn.Sigmoid()(i)
        ctx.save_for_backward(i)
        return result
    @staticmethod
    def backward(ctx, grad_output):
        i = ctx.saved_variables[0]
        sigmoid_i = nn.Sigmoid()(i)
        return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))

class Swish_Module(nn.Module):
    def forward(self, x):
        return Swish.apply(x)
        

class CustomISICModel(nn.Module):
    def __init__(self, model_name, num_classes=1, pretrained=True, checkpoint_path=None):
        super(CustomISICModel, self).__init__()
        self.image_model = timm.create_model(model_name, pretrained=pretrained, 
                                             chekpoint_path=checkpoint_path)
        self.image_out_features = self.image_model.get_classifier().in_features
        self.image_model.reset_classifier(0)  # Remove the original classifier\
        

        # Metadata part
        metadata_input_features = 6
        metadata_output_features = 128

        self.metadata_fc = nn.Sequential(
            nn.Linear(metadata_input_features, 128),
            nn.BatchNorm1d(128),
            Swish_Module(), #ReLU
            nn.Dropout(0.3),
            nn.Linear(128, 256),
            nn.BatchNorm1d(256),
            Swish_Module(), #ReLU
            nn.Dropout(0.3),
            nn.Linear(256, metadata_output_features),
            nn.BatchNorm1d(metadata_output_features),
            Swish_Module() #ReLU
        )

        # Combine features from image model and metadata
        combined_features = self.image_out_features + metadata_output_features
        self.final_fc = nn.Sequential(
            nn.Linear(combined_features, 512),
            nn.BatchNorm1d(512),
            Swish_Module(), #ReLU
            nn.Dropout(0.5),
            nn.Linear(512, num_classes),
            nn.Sigmoid()
        )

    def forward(self, image, metadata):
        image_features = self.image_model(image)
        metadata_features = self.metadata_fc(metadata)
        combined_features = torch.cat((image_features, metadata_features), dim=1)
        output = self.final_fc(combined_features)
        
        return output

# Data Augmentation

In [None]:
# Define image transformer
data_transforms = {
    'valid': A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.Normalize(
            mean=[0.4815, 0.4578, 0.4082], 
            std=[0.2686, 0.2613, 0.2758],
            max_pixel_value=255.0),
        ToTensorV2(),
    ])
}

# Inference Function

In [None]:
def inference(df_test, file_test_hdf, test_loader, model, device):
    model.eval();
    
    preds = []
    with torch.no_grad():
        bar = tqdm(enumerate(test_loader), total=len(test_loader))
        for step, data in bar:        
            images = data['image'].to(device, dtype=torch.float)
            metadata = data['metadata'].to(device, dtype=torch.float)
            batch_size = images.size(0)
            outputs = model(images, metadata).squeeze()
            preds.append(outputs.detach().cpu().numpy())
            
    preds = np.concatenate(preds).flatten()
    
    return preds

# Submission

In [None]:
# Prepare test loader            
test_dataset = ISICDataset(df, num_feat, TEST_HDF, data_transforms['valid'])
test_loader = DataLoader(test_dataset, batch_size=CONFIG['valid_batch_size'], 
                          num_workers=2, shuffle=False, pin_memory=True)

In [None]:
# model load
model = CustomISICModel(CONFIG['model_name'], pretrained=True)
model_dict = model.state_dict()

# pre-trained metadata model load
meta_state_dict = torch.load("/kaggle/input/isic-2024-pytorch-training-baseline-vit/AUROC0.5185_Loss0.3199_epoch31.bin")
meta_pretrained_dict = {k: v for k, v in meta_state_dict.items() if k in model_dict}
model_dict.update(meta_pretrained_dict)

#pre-trained swin image model load
image_state_dict = torch.load("/kaggle/input/isic-2024-swin-model-ver-24/AUROC0.5012_Loss0.2076_epoch50.bin")
image_pretrained_dict = {k: v for k, v in image_state_dict.items() if k in model_dict}
model_dict.update(image_pretrained_dict) 

model.to(CONFIG['device'])

In [None]:
preds = inference(df, TEST_HDF, test_loader, model, CONFIG['device'])
df_sub["target"] = preds

In [None]:
df_sub

In [None]:
df_sub[['isic_id', 'target']].to_csv("submission.csv", index=False)