In [1]:
import pandas as pd
import numpy as np
from io import BytesIO
from torch.utils.data import Dataset,DataLoader
import torchvision.transforms as transforms
from PIL import Image
import torch
from sklearn.preprocessing import StandardScaler

In [2]:
import os
import gc
import cv2
import math
import copy
import time
import random
import glob
from matplotlib import pyplot as plt

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp
import torchvision

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict

# Sklearn Imports
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import StratifiedKFold, StratifiedGroupKFold 

# For Image Models
import timm

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# For colored terminal text
from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [3]:
CONFIG = {
    "seed": 42,
    "neg_sample_ratio" : 0.02,
    "epochs": 50,
    "img_size": 384,
    "model_name": "densenet121",
    #"checkpoint_path" : "/kaggle/input/tf-efficientnet/pytorch/tf-efficientnet-b0/1/tf_efficientnet_b0_aa-827b6e33.pth",
    "train_batch_size": 32,
    "valid_batch_size": 64,
    "learning_rate": 1e-4,
    "scheduler": 'CosineAnnealingLR',
    "min_lr": 1e-6,
    "T_max": 500,
    "weight_decay": 1e-6,
    "fold" : 0,
    "n_fold": 5,
    "n_accumulate": 1,
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
}
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [4]:
import h5py

In [5]:
import pickle

In [6]:
class ISICModel(nn.Module):
    def __init__(self, model_name, num_classes=1, pretrained=True, checkpoint_path=None):
        super(ISICModel, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, checkpoint_path=checkpoint_path)

        in_features = self.model.classifier.in_features
        self.model.classifier = nn.Identity()
        self.model.global_pool = nn.Identity()
        self.pooling = GeM()
        self.linear = nn.Linear(in_features, num_classes)
        self.sigmoid = nn.Sigmoid()

    def forward(self, images):
        features = self.model(images)
        pooled_features = self.pooling(features).flatten(1)
        output = self.sigmoid(self.linear(pooled_features))
        return output


In [7]:
import matplotlib.colors as mcolors

class GradCAM:
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = target_layer
        self.gradients = None
        self.activations = None
        
        self.target_layer.register_forward_hook(self.save_activation)
        self.target_layer.register_backward_hook(self.save_gradient)
    
    def save_activation(self, module, input, output):
        self.activations = output.detach()
    
    def save_gradient(self, module, grad_input, grad_output):
        self.gradients = grad_output[0].detach()
    
    def __call__(self, x):
        self.model.eval()
        logit = self.model(x)
        
        score = logit.squeeze()
        
        self.model.zero_grad()
        score.backward(retain_graph=True)
        
        gradients = self.gradients
        activations = self.activations
        b, k, u, v = gradients.size()
        
        alpha = gradients.view(b, k, -1).mean(2)
        weights = alpha.view(b, k, 1, 1)
        
        saliency_map = (weights*activations).sum(1, keepdim=True)
        saliency_map = F.relu(saliency_map)
        saliency_map = F.interpolate(saliency_map, size=(x.size(2), x.size(3)), mode='bilinear', align_corners=False)
        saliency_map_min, saliency_map_max = saliency_map.min(), saliency_map.max()
        saliency_map = (saliency_map - saliency_map_min).div(saliency_map_max - saliency_map_min).data
        
        return saliency_map

In [8]:
class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
        
    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
        
    def __repr__(self):
        return self.__class__.__name__ + \
                '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
                ', ' + 'eps=' + str(self.eps) + ')'

In [9]:
# class MetaModel(nn.Module):
#     def __init__(self,in_features):
#         super().__init__()
#         self.model=nn.Sequential(
#                     nn.BatchNorm1d(in_features),
#                     nn.Linear(in_features,512),
#                     nn.BatchNorm1d(512),
#                     nn.ReLU(),
#                     nn.Dropout(p=0.5),
#                     nn.Linear(512,256),
#                     nn.BatchNorm1d(256),
#                     nn.ReLU(),
#                     nn.Dropout(p=0.5),
#                     nn.Linear(256,128),
#                     nn.BatchNorm1d(128),
#                     nn.ReLU(),
#                     nn.Dropout(p=0.4),
#                     nn.Linear(128,1))
#     def forward(self,x):
#         return self.model(x)

In [10]:
test_path='/kaggle/input/isic-2024-challenge/test-image.hdf5'

In [11]:
test_meta_data=pd.read_csv('/kaggle/input/isic-2024-challenge/test-metadata.csv')

In [12]:
keys = test_meta_data['isic_id']

In [13]:
def preprocess(df,df_test,cat_labels):
    scaler = StandardScaler()
    for col in df.columns:
        if((df[col].dtype=='float64' or df[col].dtype=='int64') and df[col].isna().sum()!=0):
            df[col].fillna(value=df[col].mean(),inplace=True);
        if(df[col].dtype=='object' and df[col].isna().sum()!=0):
            df[col].fillna(value=df[col].mode()[0],inplace=True);
    for col in df_test.columns:
        if((df_test[col].dtype=='float64' or df_test[col].dtype=='int64') and df_test[col].isna().sum()!=0):
            df_test[col].fillna(value=df_test[col].mean(),inplace=True);
        if(df_test[col].dtype=='object' and df_test[col].isna().sum()!=0):
            df_test[col].fillna(value=df_test[col].mode()[0],inplace=True);
    encoder = OneHotEncoder(sparse_output=False, drop='first')
    encoder.fit(df[cat_labels])
    new_cat_cols=encoder.get_feature_names_out(cat_labels)
    one_hot_encoded=encoder.transform(df_test[cat_labels])
    one_hot_df = pd.DataFrame(one_hot_encoded, columns=encoder.get_feature_names_out(cat_labels))
    df_encoded = pd.concat([df_test.drop(columns=cat_labels), one_hot_df], axis=1)
    df_f=df_encoded.drop(obj_but_not_cat,axis=1)
    num_cols = [col for col in df_f.columns if col not in new_cat_cols]
    scaler.fit(df[num_cols])
    df_f[num_cols]=scaler.transform(df_f[num_cols])
    return df_f, new_cat_cols

In [14]:
train=pd.read_csv('/kaggle/input/isic-2024-challenge/train-metadata.csv')

In [15]:
cols_unique_train=['lesion_id', 'iddx_full', 'iddx_1', 'iddx_2', 'iddx_3', 'iddx_4', 'iddx_5', 'mel_mitotic_index', 'mel_thick_mm', 'tbp_lv_dnn_lesion_confidence']
train=train.drop(cols_unique_train,axis=1)
cat_cols=['sex','anatom_site_general','tbp_tile_type','tbp_lv_location','tbp_lv_location_simple']
obj_but_not_cat=['image_type','copyright_license','attribution','patient_id','isic_id']

In [16]:
df_test,new_cat_cols=preprocess(train,test_meta_data,cat_cols)

In [17]:
df_test.shape

(3, 67)

In [18]:
df_test.columns

Index(['age_approx', 'clin_size_long_diam_mm', 'tbp_lv_A', 'tbp_lv_Aext',
       'tbp_lv_B', 'tbp_lv_Bext', 'tbp_lv_C', 'tbp_lv_Cext', 'tbp_lv_H',
       'tbp_lv_Hext', 'tbp_lv_L', 'tbp_lv_Lext', 'tbp_lv_areaMM2',
       'tbp_lv_area_perim_ratio', 'tbp_lv_color_std_mean', 'tbp_lv_deltaA',
       'tbp_lv_deltaB', 'tbp_lv_deltaL', 'tbp_lv_deltaLB',
       'tbp_lv_deltaLBnorm', 'tbp_lv_eccentricity', 'tbp_lv_minorAxisMM',
       'tbp_lv_nevi_confidence', 'tbp_lv_norm_border', 'tbp_lv_norm_color',
       'tbp_lv_perimeterMM', 'tbp_lv_radial_color_std_max', 'tbp_lv_stdL',
       'tbp_lv_stdLExt', 'tbp_lv_symm_2axis', 'tbp_lv_symm_2axis_angle',
       'tbp_lv_x', 'tbp_lv_y', 'tbp_lv_z', 'sex_male',
       'anatom_site_general_head/neck', 'anatom_site_general_lower extremity',
       'anatom_site_general_posterior torso',
       'anatom_site_general_upper extremity', 'tbp_tile_type_3D: white',
       'tbp_lv_location_Left Arm', 'tbp_lv_location_Left Arm - Lower',
       'tbp_lv_location_Left 

In [19]:
# class ISICModel(nn.Module):
#     def __init__(self, model_name, num_classes=1, pretrained=True, checkpoint_path=None):
#         super(ISICModel, self).__init__()
#         self.model = timm.create_model(model_name, pretrained=pretrained, checkpoint_path=checkpoint_path)

#         in_features = self.model.classifier.in_features
#         self.model.classifier = nn.Identity()
#         self.model.global_pool = nn.Identity()
#         self.pooling = nn.AvgPool2d()
#         self.linear = nn.Linear(in_features, num_classes)

#     def forward(self, images):
#         features = self.model(images)
#         pooled_features = self.pooling(features).flatten(1)
#         output = self.linear(pooled_features)
#         return output

In [20]:
class MetaModel(nn.Module):
    def __init__(self,in_features):
        super().__init__()
        self.model=nn.Sequential(
                    nn.Linear(in_features,512),
                    nn.BatchNorm1d(512),
                    nn.ReLU(),
                    nn.Dropout(p=0.7),
#                     nn.Linear(128,128),
#                     nn.BatchNorm1d(128),
#                     nn.ReLU(),
#                     nn.Dropout(p=0.7),
                    nn.Linear(512,128),
                    nn.BatchNorm1d(128),
                    nn.ReLU(),
                    nn.Dropout(p=0.7),
                    nn.Linear(128,1))
    def forward(self,x):
        return self.model(x)

In [21]:
meta_model = pickle.load(open("/kaggle/input/0.1637nn_cv/pytorch/default/1/AUROC0.1637_Loss0.0196_epoch11.pkl",'rb'))
# sd1 = torch.load('/kaggle/input/metamodel13/pytorch/default/1/AUROC0.5400_Loss35.1872_epoch1.bin')

In [22]:
class ISICModel(nn.Module):
    def __init__(self, model_name, num_classes=1, pretrained=True, checkpoint_path=None):
        super(ISICModel, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, checkpoint_path=checkpoint_path)
#         self.model = model_name
#         self.model.classifier = nn.Conv2d(832, 1, kernel_size = (1,1), stride = (1,1))
        in_features = self.model.classifier.in_features
        self.model.classifier = nn.Identity()
        self.model.global_pool = nn.Identity()
#         self.model.global_pool = GeM()
        self.pooling = GeM()
        self.linear = nn.Linear(in_features, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, images):
        features = self.model(images)
        pooled_features = self.pooling(features).flatten(1)
#         pooled_features = self.pooling(features)
        output = self.linear(pooled_features)
#         output = self.sigmoid(self.model(images))
        return output

    
# model = ISICModel(CONFIG['hf_hub:timm/densenet121.tv_in1k'])
# model1 = timm.create_model('dpn68.mx_in1k', pretrained=True)
# model1.classifier = nn.Conv2d(832, 1, kernel_size = (1,1), stride = (1,1))
model = ISICModel('efficientnet_b3', pretrained = False)
model.load_state_dict(torch.load('/kaggle/input/effnet_0.15lb/pytorch/default/1/AUROC0.1576_Loss0.3074_epoch23.bin'))
# model = ISICModel(model1)
# , checkpoint_path=CONFIG['checkpoint_path']
model.to(device);

In [23]:
# model_1=pickle.load(open('/kaggle/input/eff_net15/pytorch/default/1/eff_net_2_model (2).pkl','rb'))
# sd2 = torch.load('/kaggle/input/effnet0-2/pytorch/default/1/AUROC0.5434_Loss35.1402_epoch6.bin')

In [24]:
# model_2=pickle.load(open('/kaggle/input/densenet11/pytorch/default/1/densenet121_8_model (3).pkl','rb'))

In [25]:
# model = model.classifier.load_state_dict(sd2)
# meta_model = meta_model.load_state_dict(sd1)

In [26]:
# df_test.shape

In [27]:
# lgb_models=pickle.load(open('/kaggle/input/lgbm15/pytorch/default/1/lgbm_11 (3).pkl','rb'))

In [28]:
#f1 = h5py.File('/kaggle/input/isic-2024-challenge/train-image.hdf5', 'r')

In [29]:
#train_keys = list(f1.keys())

In [30]:
f2=h5py.File(test_path,'r')

In [31]:
# img=BytesIO(f2[keys[0]][()])

In [32]:
# test_transform(Image.open(img))

In [33]:
# img=np.array(Image.open(img))

In [34]:
# img=torch.tensor(img)

In [35]:
# img_2=img.to(torch.float32)

In [36]:
# img_2

In [37]:
# type(img)

In [38]:
class ISICDataset(Dataset):
    def __init__(self,transform=None):
        self.transform=transform
    def __len__(self):
        return len(keys)
    def __getitem__(self,idx):
        img=np.array(Image.open(BytesIO(f2[keys[idx]][()])))
        img=self.transform(image=img)['image']
        return img

In [39]:
# class dISICDataset(Dataset):
#     def __init__(self,transform=None):
#         self.transform=transform
#     def __len__(self):
#         return len(train_keys)
#     def __getitem__(self,idx):
#         img=np.array(Image.open(BytesIO(f1[train_keys[idx]][()])))
#         img=self.transform(image=img)['image']
#         return img

In [40]:
data_transforms =A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
                max_pixel_value=255.0, 
                p=1.0
            ),
        ToTensorV2()], p=1.)

In [41]:
test_dataset = ISICDataset(data_transforms)

In [42]:
class TabularDataset_test(Dataset):
    def __init__(self,data,transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        X = self.data.iloc[idx].values.astype(float)
        
        if self.transform:
            X = self.transform(X)
        
        return torch.tensor(X, dtype=torch.float32)

In [43]:
test_dataloader=DataLoader(test_dataset,batch_size=32)

In [44]:
device='cuda' if torch.cuda.is_available else 'cpu'

In [45]:
from tqdm.auto import tqdm

In [46]:
# import gc
# gc.collect()

In [47]:
# i = next(iter(test_dataloader))

In [48]:
# model_1

In [49]:
# model_1(i.to(device)).shape

In [50]:
model.eval()
y=np.array([])
model.to(device)
for i,X in enumerate(tqdm(test_dataloader)):
    X=X.to(device)
    with torch.inference_mode():
        y=np.concatenate((y,np.array(torch.nn.functional.sigmoid(model(X)).detach().cpu()).ravel()))
    del X
    gc.collect()

  0%|          | 0/1 [00:00<?, ?it/s]

In [51]:
# model_2.eval()
# yyyy=np.array([])
# model_2.to(device)
# for i,X in enumerate(tqdm(test_dataloader)):
#     X=X.to(device)
#     with torch.inference_mode():
#         yyyy=np.concatenate((yyyy,np.array(torch.nn.functional.sigmoid(model_2(X)).detach().cpu()).ravel()))
#     del X
#     gc.collect()

In [52]:
df_test['connect_y']=y

In [53]:
test_metaset=TabularDataset_test(df_test)

In [54]:
test_metaloader=DataLoader(test_metaset,batch_size=32)

In [55]:
X=next(iter(test_metaloader))

In [56]:
X.shape

torch.Size([3, 68])

In [57]:
# meta_model

In [58]:
meta_model.eval()
y_meta=np.array([])
meta_model.to(device)
for i,X in enumerate(tqdm(test_metaloader)):
    X=X.to(device)
    with torch.inference_mode():
        y_meta=np.concatenate((y_meta,np.array(torch.nn.functional.sigmoid(meta_model(X)).detach().cpu()).ravel()))
    del X
    gc.collect()

  0%|          | 0/1 [00:00<?, ?it/s]

In [59]:
# y_lgb = np.mean([model.predict_proba(df_test)[:, 1] for model in lgb_models], 0)

In [60]:
y_cnn=(y-np.mean(y))/np.std(y)

In [61]:
y_meta=(y_meta-np.mean(y_meta))/np.std(y_meta)

In [62]:
# y_lgb=(y_lgb-np.mean(y_lgb))/np.std(y_lgb)

In [63]:
a = 0.5
b = 0.5
# c = 0.333

In [64]:
# yy=a*y+b*y_meta+c*y_lgb

In [65]:
yy=a*y_meta + b*y_cnn

In [66]:
# y = torch.cat(y, dim = 0).reshape(-1)

In [67]:
# y=y.squeeze().detach().cpu()

In [68]:
# y = y.detach().cpu().numpy()

In [69]:
data = {'isic_id': keys,
        'target': yy}

In [70]:
df = pd.DataFrame(data)

In [71]:
df.head()

Unnamed: 0,isic_id,target
0,ISIC_0015657,0.374505
1,ISIC_0015729,-0.738441
2,ISIC_0015740,0.363936


In [72]:
df.to_csv("submission.csv", index=False)

In [73]:
df

Unnamed: 0,isic_id,target
0,ISIC_0015657,0.374505
1,ISIC_0015729,-0.738441
2,ISIC_0015740,0.363936


In [74]:
#train_d=pd.read_csv('/kaggle/input/isic-2024-challenge/train-metadata.csv')
#cols_unique_train=['lesion_id', 'iddx_full', 'iddx_1', 'iddx_2', 'iddx_3', 'iddx_4', 'iddx_5', 'mel_mitotic_index', 'mel_thick_mm', 'tbp_lv_dnn_lesion_confidence']
#train_d=train_d.drop(cols_unique_train,axis=1)
#cat_cols=['sex','anatom_site_general','tbp_tile_type','tbp_lv_location','tbp_lv_location_simple']
#obj_but_not_cat=['image_type','copyright_license','attribution','patient_id','isic_id']

In [75]:
#train_d=train_d.drop('target',axis=1)

In [76]:
#train_d, new_cat=preprocess(train,train_d,cat_cols)

In [77]:
#train_d.shape

In [78]:
#dtest_metaset=TabularDataset_test(train_d)

In [79]:
#dtest_dataset = dISICDataset(data_transforms)

In [80]:
#dtest_dataloader=DataLoader(dtest_dataset,batch_size=32)

In [81]:
#dtest_metaloader=DataLoader(dtest_metaset,batch_size=32)