In [1]:
import pandas as pd
import numpy as np
from io import BytesIO
from torch.utils.data import Dataset,DataLoader
import torchvision.transforms as transforms
from PIL import Image
import torch

In [2]:
import os
import gc
import cv2
import math
import copy
import time
import random
import glob
from matplotlib import pyplot as plt

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp
import torchvision

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict

# Sklearn Imports
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import StratifiedKFold, StratifiedGroupKFold 

# For Image Models
import timm

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# For colored terminal text
from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [3]:
CONFIG = {
    "seed": 42,
    "neg_sample_ratio" : 0.02,
    "epochs": 50,
    "img_size": 384,
    "model_name": "efficientnet_b3",
    #"checkpoint_path" : "/kaggle/input/tf-efficientnet/pytorch/tf-efficientnet-b0/1/tf_efficientnet_b0_aa-827b6e33.pth",
    "train_batch_size": 32,
    "valid_batch_size": 64,
    "learning_rate": 1e-4,
    "scheduler": 'CosineAnnealingLR',
    "min_lr": 1e-6,
    "T_max": 500,
    "weight_decay": 1e-6,
    "fold" : 0,
    "n_fold": 5,
    "n_accumulate": 1,
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
}

In [4]:
import h5py

In [5]:
import pickle

In [6]:
import matplotlib.colors as mcolors

class GradCAM:
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = target_layer
        self.gradients = None
        self.activations = None
        
        self.target_layer.register_forward_hook(self.save_activation)
        self.target_layer.register_backward_hook(self.save_gradient)
    
    def save_activation(self, module, input, output):
        self.activations = output.detach()
    
    def save_gradient(self, module, grad_input, grad_output):
        self.gradients = grad_output[0].detach()
    
    def __call__(self, x):
        self.model.eval()
        logit = self.model(x)
        
        score = logit.squeeze()
        
        self.model.zero_grad()
        score.backward(retain_graph=True)
        
        gradients = self.gradients
        activations = self.activations
        b, k, u, v = gradients.size()
        
        alpha = gradients.view(b, k, -1).mean(2)
        weights = alpha.view(b, k, 1, 1)
        
        saliency_map = (weights*activations).sum(1, keepdim=True)
        saliency_map = F.relu(saliency_map)
        saliency_map = F.interpolate(saliency_map, size=(x.size(2), x.size(3)), mode='bilinear', align_corners=False)
        saliency_map_min, saliency_map_max = saliency_map.min(), saliency_map.max()
        saliency_map = (saliency_map - saliency_map_min).div(saliency_map_max - saliency_map_min).data
        
        return saliency_map

In [7]:
class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
        
    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
        
    def __repr__(self):
        return self.__class__.__name__ + \
                '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
                ', ' + 'eps=' + str(self.eps) + ')'

In [8]:
class ISICModel(nn.Module):
    def __init__(self, model_name, num_classes=1, pretrained=False, checkpoint_path=None):
        super(ISICModel, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, checkpoint_path=checkpoint_path)

        in_features = self.model.classifier.in_features
        self.model.classifier = nn.Identity()
        self.model.global_pool = nn.Identity()
        self.pooling = GeM()
        self.linear = nn.Linear(in_features, num_classes)

    def forward(self, images):
        features = self.model(images)
        pooled_features = self.pooling(features).flatten(1)
        output = self.linear(pooled_features)
        return output

    
model = ISICModel(CONFIG['model_name'])
model.to(CONFIG['device']);

In [9]:
class MetaModel(nn.Module):
    def __init__(self,in_features):
        super().__init__()
        self.model=nn.Sequential(
                    nn.BatchNorm1d(in_features),
                    nn.Linear(in_features,512),
                    nn.BatchNorm1d(512),
                    nn.ReLU(),
                    nn.Dropout(p=0.5),
                    nn.Linear(512,256),
                    nn.BatchNorm1d(256),
                    nn.ReLU(),
                    nn.Dropout(p=0.5),
                    nn.Linear(256,128),
                    nn.BatchNorm1d(128),
                    nn.ReLU(),
                    nn.Dropout(p=0.4),
                    nn.Linear(128,1))
    def forward(self,x):
        return self.model(x)

In [10]:
test_path='/kaggle/input/isic-2024-challenge/test-image.hdf5'

In [11]:
test_meta_data=pd.read_csv('/kaggle/input/isic-2024-challenge/test-metadata.csv')

In [12]:
def preprocess(df,df_test,cat_labels):
    for col in df.columns:
        if((df[col].dtype=='float64' or df[col].dtype=='int64') and df[col].isna().sum()!=0):
            df[col].fillna(value=df[col].mean(),inplace=True);
        if(df[col].dtype=='object' and df[col].isna().sum()!=0):
            df[col].fillna(value=df[col].mode()[0],inplace=True);
    for col in df_test.columns:
        if((df_test[col].dtype=='float64' or df_test[col].dtype=='int64') and df_test[col].isna().sum()!=0):
            df_test[col].fillna(value=df_test[col].mean(),inplace=True);
        if(df_test[col].dtype=='object' and df_test[col].isna().sum()!=0):
            df_test[col].fillna(value=df_test[col].mode()[0],inplace=True);
    encoder = OneHotEncoder(sparse_output=False, drop='first')
    encoder.fit(df[cat_labels])
    new_cat_cols=encoder.get_feature_names_out(cat_labels)
    one_hot_encoded=encoder.transform(df_test[cat_labels])
    one_hot_df = pd.DataFrame(one_hot_encoded, columns=encoder.get_feature_names_out(cat_labels))
    df_encoded = pd.concat([df_test.drop(columns=cat_labels), one_hot_df], axis=1)
    df_f=df_encoded.drop(obj_but_not_cat,axis=1)
    return df_f, new_cat_cols

In [13]:
train=pd.read_csv('/kaggle/input/isic-2024-challenge/train-metadata.csv')

In [14]:
cols_unique_train=['lesion_id', 'iddx_full', 'iddx_1', 'iddx_2', 'iddx_3', 'iddx_4', 'iddx_5', 'mel_mitotic_index', 'mel_thick_mm', 'tbp_lv_dnn_lesion_confidence']
train=train.drop(cols_unique_train,axis=1)
cat_cols=['sex','anatom_site_general','tbp_tile_type','tbp_lv_location','tbp_lv_location_simple']
obj_but_not_cat=['image_type','copyright_license','attribution','patient_id','isic_id']

In [15]:
df_test,new_cat_cols=preprocess(train,test_meta_data,cat_cols)

In [16]:
df_test.shape

(3, 67)

In [17]:
df_test.columns

Index(['age_approx', 'clin_size_long_diam_mm', 'tbp_lv_A', 'tbp_lv_Aext',
       'tbp_lv_B', 'tbp_lv_Bext', 'tbp_lv_C', 'tbp_lv_Cext', 'tbp_lv_H',
       'tbp_lv_Hext', 'tbp_lv_L', 'tbp_lv_Lext', 'tbp_lv_areaMM2',
       'tbp_lv_area_perim_ratio', 'tbp_lv_color_std_mean', 'tbp_lv_deltaA',
       'tbp_lv_deltaB', 'tbp_lv_deltaL', 'tbp_lv_deltaLB',
       'tbp_lv_deltaLBnorm', 'tbp_lv_eccentricity', 'tbp_lv_minorAxisMM',
       'tbp_lv_nevi_confidence', 'tbp_lv_norm_border', 'tbp_lv_norm_color',
       'tbp_lv_perimeterMM', 'tbp_lv_radial_color_std_max', 'tbp_lv_stdL',
       'tbp_lv_stdLExt', 'tbp_lv_symm_2axis', 'tbp_lv_symm_2axis_angle',
       'tbp_lv_x', 'tbp_lv_y', 'tbp_lv_z', 'sex_male',
       'anatom_site_general_head/neck', 'anatom_site_general_lower extremity',
       'anatom_site_general_posterior torso',
       'anatom_site_general_upper extremity', 'tbp_tile_type_3D: white',
       'tbp_lv_location_Left Arm', 'tbp_lv_location_Left Arm - Lower',
       'tbp_lv_location_Left 

In [18]:
class MetaModel(nn.Module):
    def __init__(self,in_features):
        super().__init__()
        self.model=nn.Sequential(
                    nn.BatchNorm1d(in_features),
                    nn.Linear(in_features,128),
                    nn.BatchNorm1d(128),
                    nn.ReLU(),
                    nn.Dropout(p=0.6),
                    nn.Linear(128,128),
                    nn.BatchNorm1d(128),
                    nn.ReLU(),
                    nn.Dropout(p=0.6),
                    nn.Linear(128,128),
                    nn.BatchNorm1d(128),
                    nn.ReLU(),
                    nn.Dropout(p=0.6),
                    nn.Linear(128,1))
    def forward(self,x):
        return self.model(x)

In [19]:
meta_model = pickle.load(open("/kaggle/input/metamodel14/pytorch/default/1/meta_model_8_model (6).pkl",'rb'))
# sd1 = torch.load('/kaggle/input/metamodel13/pytorch/default/1/AUROC0.5400_Loss35.1872_epoch1.bin')

In [20]:
model=pickle.load(open('/kaggle/input/eff_net_03/pytorch/default/1/eff_net_2_model (1).pkl','rb'))
# sd2 = torch.load('/kaggle/input/effnet0-2/pytorch/default/1/AUROC0.5434_Loss35.1402_epoch6.bin')

In [21]:
sd=torch.load('/kaggle/input/effnet_5/pytorch/default/1/AUROC0.8463_Loss4.8312_epoch1.bin')

In [22]:
model.load_state_dict(sd)

<All keys matched successfully>

In [23]:
model2 = pickle.load(open('/kaggle/input/eff_net_03/pytorch/default/1/eff_net_2_model (1).pkl','rb'))

In [24]:
df_test.shape

(3, 67)

In [25]:
lgb_models=pickle.load(open('/kaggle/input/lgbm13/pytorch/default/1/lgbm_11 (2).pkl','rb'))

In [26]:
f1 = h5py.File('/kaggle/input/isic-2024-challenge/train-image.hdf5', 'r')

In [27]:
train_keys = list(f1.keys())

In [28]:
f2=h5py.File(test_path,'r')

In [29]:
keys=list(f2.keys())

In [30]:
# img=BytesIO(f2[keys[0]][()])

In [31]:
# test_transform(Image.open(img))

In [32]:
# img=np.array(Image.open(img))

In [33]:
# img=torch.tensor(img)

In [34]:
# img_2=img.to(torch.float32)

In [35]:
# img_2

In [36]:
# type(img)

In [37]:
class ISICDataset(Dataset):
    def __init__(self,transform=None):
        self.transform=transform
    def __len__(self):
        return len(keys)
    def __getitem__(self,idx):
        img=np.array(Image.open(BytesIO(f2[keys[idx]][()])))
        img=self.transform(image=img)['image']
        return img

In [38]:
class dISICDataset(Dataset):
    def __init__(self,transform=None):
        self.transform=transform
    def __len__(self):
        return len(train_keys)
    def __getitem__(self,idx):
        img=np.array(Image.open(BytesIO(f1[train_keys[idx]][()])))
        img=self.transform(image=img)['image']
        return img

In [39]:
data_transforms =A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
                max_pixel_value=255.0, 
                p=1.0
            ),
        ToTensorV2()], p=1.)

In [40]:
test_dataset = ISICDataset(data_transforms)

In [41]:
class TabularDataset_test(Dataset):
    def __init__(self,data,transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        X = self.data.iloc[idx].values.astype(float)
        
        if self.transform:
            X = self.transform(X)
        
        return torch.tensor(X, dtype=torch.float32)

In [42]:
test_dataloader=DataLoader(test_dataset,batch_size=32)

In [43]:
device='cuda' if torch.cuda.is_available else 'cpu'

In [44]:
from tqdm.auto import tqdm

In [45]:
import gc
gc.collect()

60

In [46]:
i = next(iter(test_dataloader))

In [47]:
model(i.to(device))

tensor([[-1.3276],
        [-7.9674],
        [-2.9345]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [48]:
model(i.to(device)).shape

torch.Size([3, 1])

In [49]:
model.eval()
y=np.array([])
model.to(device)
for i,X in enumerate(tqdm(test_dataloader)):
    X=X.to(device)
    with torch.inference_mode():
        y=np.concatenate((y,np.array(torch.nn.functional.sigmoid(model(X)).detach().cpu()).ravel()))
    del X
    torch.cuda.empty_cache()
    gc.collect()

  0%|          | 0/1 [00:00<?, ?it/s]

In [50]:
df_test['connect_y']=y

In [51]:
test_metaset=TabularDataset_test(df_test)

In [52]:
test_metaloader=DataLoader(test_metaset,batch_size=32)

In [53]:
X=next(iter(test_metaloader))

In [54]:
X.shape

torch.Size([3, 68])

In [55]:
meta_model

MetaModel(
  (model): Sequential(
    (0): BatchNorm1d(68, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): Linear(in_features=68, out_features=128, bias=True)
    (2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ReLU()
    (4): Dropout(p=0.6, inplace=False)
    (5): Linear(in_features=128, out_features=128, bias=True)
    (6): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): ReLU()
    (8): Dropout(p=0.6, inplace=False)
    (9): Linear(in_features=128, out_features=128, bias=True)
    (10): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU()
    (12): Dropout(p=0.6, inplace=False)
    (13): Linear(in_features=128, out_features=1, bias=True)
  )
)

In [56]:
meta_model.eval()
y_meta=np.array([])
meta_model.to(device)
for i,X in enumerate(tqdm(test_metaloader)):
    X=X.to(device)
    with torch.inference_mode():
        y_meta=np.concatenate((y_meta,np.array(torch.nn.functional.sigmoid(meta_model(X)).detach().cpu()).ravel()))
    del X
    torch.cuda.empty_cache()
    gc.collect()

  0%|          | 0/1 [00:00<?, ?it/s]

In [57]:
y_lgb = np.mean([model.predict_proba(df_test)[:, 1] for model in lgb_models], 0)

In [58]:
y

array([0.20955557, 0.00034646, 0.05047354])

In [59]:
y_meta

array([0.08166983, 0.00794605, 0.24012446])

In [60]:
y_lgb

array([0.00550863, 0.00852895, 0.01824066])

In [61]:
a = 0.33
b = 0.33
c = 0.33

In [62]:
yy=a*y+b*y_meta+c*y_lgb

In [63]:
# y = torch.cat(y, dim = 0).reshape(-1)

In [64]:
# y=y.squeeze().detach().cpu()

In [65]:
# y = y.detach().cpu().numpy()

In [66]:
data = {'isic_id': keys,
        'target': yy}

In [67]:
df = pd.DataFrame(data)

In [68]:
df.head()

Unnamed: 0,isic_id,target
0,ISIC_0015657,0.097922
1,ISIC_0015729,0.005551
2,ISIC_0015740,0.101917


In [69]:
df.to_csv("submission.csv", index=False)

In [70]:
df

Unnamed: 0,isic_id,target
0,ISIC_0015657,0.097922
1,ISIC_0015729,0.005551
2,ISIC_0015740,0.101917


In [71]:
#train_d=pd.read_csv('/kaggle/input/isic-2024-challenge/train-metadata.csv')
#cols_unique_train=['lesion_id', 'iddx_full', 'iddx_1', 'iddx_2', 'iddx_3', 'iddx_4', 'iddx_5', 'mel_mitotic_index', 'mel_thick_mm', 'tbp_lv_dnn_lesion_confidence']
#train_d=train_d.drop(cols_unique_train,axis=1)
#cat_cols=['sex','anatom_site_general','tbp_tile_type','tbp_lv_location','tbp_lv_location_simple']
#obj_but_not_cat=['image_type','copyright_license','attribution','patient_id','isic_id']

In [72]:
#train_d=train_d.drop('target',axis=1)

In [73]:
#train_d, new_cat=preprocess(train,train_d,cat_cols)

In [74]:
#train_d.shape

In [75]:
#dtest_metaset=TabularDataset_test(train_d)

In [76]:
#dtest_dataset = dISICDataset(data_transforms)

In [77]:
#dtest_dataloader=DataLoader(dtest_dataset,batch_size=32)

In [78]:
#dtest_metaloader=DataLoader(dtest_metaset,batch_size=32)

In [79]:
# train_dataset = dISICDataset(data_transforms)
# train_dataloader = DataLoader(train_dataset, batch_size = 32)

In [80]:
# for td in tqdm(train_dataloader):
#     with torch.inference_mode():
#         model(td.to(CONFIG['device']))
#     del td
#     torch.cuda.empty_cache()