In [23]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import cv2
import PIL
import os
from tqdm import tqdm
import copy
import shutil

import torchvision.models as models
from torchvision.transforms import transforms

import torch
import torch.nn as nn 
from torch.utils.data import DataLoader, Dataset

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# Dataset

In [24]:
metadata = pd.read_csv("/kaggle/input/metadata/metadata.csv")
metadata = metadata.reset_index(drop=True)

In [25]:
print(len(metadata))
print(metadata.columns)
print(metadata.shape)

18875
Index(['binary_label', 'id', 'smoke', 'drink', 'background_father',
       'background_mother', 'age', 'gender', 'skin_cancer_history',
       'cancer_history', 'region', 'itch', 'grew', 'hurt', 'changed', 'bleed',
       'elevation', 'biopsed', 'fitzpatrick'],
      dtype='object')
(18875, 19)


In [26]:
metadata["id"].loc[16572:16579]

16572    cd90e491ddaa92f0f4eb07f73aa09f64
16573    5f1ed6de6a9110d7dc580a6a0312af63
16574    f198aaf1f0550c2464b285454d34926e
16575    6214de2e915835014235a1839cbc5938
16576    5a3a4c1f0effb626b298e89c032b1d28
16577               PAT_1516_1765_530.png
16578                  PAT_46_881_939.png
16579               PAT_1545_1867_547.png
Name: id, dtype: object

In [27]:
metadata.binary_label.value_counts()

binary_label
0    14793
1     4082
Name: count, dtype: int64

In [28]:
metadata.fitzpatrick.value_counts()

fitzpatrick
 2.0    5684
 3.0    3700
 1.0    3100
 4.0    2843
 5.0    1543
-1.0    1369
 6.0     636
Name: count, dtype: int64

In [29]:
metadata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18875 entries, 0 to 18874
Data columns (total 19 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   binary_label         18875 non-null  int64  
 1   id                   18875 non-null  object 
 2   smoke                18875 non-null  int64  
 3   drink                18875 non-null  int64  
 4   background_father    18875 non-null  int64  
 5   background_mother    18875 non-null  int64  
 6   age                  18875 non-null  float64
 7   gender               18875 non-null  int64  
 8   skin_cancer_history  18875 non-null  int64  
 9   cancer_history       18875 non-null  int64  
 10  region               18875 non-null  int64  
 11  itch                 18875 non-null  int64  
 12  grew                 18875 non-null  int64  
 13  hurt                 18875 non-null  int64  
 14  changed              18875 non-null  int64  
 15  bleed                18875 non-null 

In [30]:
pad_base_path = "/kaggle/input/skin-cancer"
pad_subfolder_paths = [
    "imgs_part_1/imgs_part_1",
    "imgs_part_2/imgs_part_2",
    "imgs_part_3/imgs_part_3"
]
pad_set = {}

for pad_path in pad_subfolder_paths:
    image_path = os.path.join(pad_base_path, pad_path)
        
    for f in os.listdir(image_path):
        if f.endswith(".png"):
            pad_set[f] = os.path.join(image_path, f)

In [31]:
def resolve_path(image_id):
    if image_id.endswith(".png"):
        return pad_set.get(image_id, None)
    else:
        return image_id

In [32]:
metadata["full_path"] = metadata["id"].apply(resolve_path)

In [33]:
metadata.full_path

0                         5e82a45bc5d78bd24ae9202d194423f8
1                         fa2911a9b13b6f8af79cb700937cc14f
2                         d2bac3c9e4499032ca8e9b07c7d3bc40
3                         0a94359e7eaacd7178e06b2823777789
4                         a39ec3b1f22c08a421fa20535e037bba
                               ...                        
18870    /kaggle/input/skin-cancer/imgs_part_3/imgs_par...
18871    /kaggle/input/skin-cancer/imgs_part_1/imgs_par...
18872    /kaggle/input/skin-cancer/imgs_part_3/imgs_par...
18873    /kaggle/input/skin-cancer/imgs_part_1/imgs_par...
18874    /kaggle/input/skin-cancer/imgs_part_3/imgs_par...
Name: full_path, Length: 18875, dtype: object

In [34]:
metadata_cols = ['smoke', 'drink', 'background_father',
       'background_mother', 'age', 'gender', 'skin_cancer_history',
       'cancer_history', 'region', 'itch', 'grew', 'hurt', 'changed', 'bleed',
       'elevation', 'biopsed', 'fitzpatrick']

In [35]:
class MultimodalSkinCancerDataset(Dataset):

    def __init__(self, df, metadata_cols, transform = None):
        self.df = df
        self.transform = transform
        self.metadata_cols = metadata_cols
       
    
    def __len__(self):
        return len(self.df)


    def __getitem__(self, idx):

        image = None
        image_path = None
        fitz_path = "/kaggle/input/fitzpatrick17k-original/finalfitz17k"

        row = self.df.iloc[idx]
        image_id = self.df.iloc[idx]["id"]
        label = self.df.iloc[idx]["binary_label"]

        
        if(image_id.endswith(".jpg")):
            image_path = os.path.join(fitz_path, image_id)
            image_bgr = cv2.imread(image_path)
            
        elif image_id.endswith(".png"):  
            image_path = self.df.iloc[idx]["full_path"]
        
        else:
            image_path = os.path.join(fitz_path, image_id + ".jpg")
            
        image_bgr = cv2.imread(image_path)

        if image_bgr is None:
            image_bgr = np.zeros((224, 224, 3), dtype=np.uint8)
        
        image = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
        
        if self.transform:
            image = self.transform(image)

        metadata = row[self.metadata_cols].values.astype(np.float32)
            
        return image, metadata, label

In [36]:
image_size = 224  

train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((image_size, image_size)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

val_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

In [37]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

train_df, val_df = train_test_split(metadata, test_size=0.2, random_state=42, stratify=metadata['binary_label'])

train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)

train_dataset = MultimodalSkinCancerDataset(df = train_df, metadata_cols=metadata_cols, transform=train_transform)
val_dataset = MultimodalSkinCancerDataset(df = val_df, metadata_cols=metadata_cols, transform=val_transform)

batch_size = 32

In [38]:
from torch.utils.data import WeightedRandomSampler

train_class_counts = train_df['binary_label'].value_counts().to_dict()
train_weights = [1.0 / train_class_counts[label] for label in train_df['binary_label']]

sampler = WeightedRandomSampler(train_weights, num_samples=len(train_weights), replacement=True)

train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

# Helper Functions

In [39]:
from tqdm import tqdm

def train_one_epoch(model, loader, optimizer, criterion, device):
    
    model.train()
    running_loss = 0
    
    for images, metadata, labels in tqdm(loader, desc="Training", leave=True):
        
        images = images.to(device)
        metadata = metadata.to(device)
        labels = labels.float().unsqueeze(1).to(device)

        optimizer.zero_grad()
        outputs = model(images, metadata)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
    return running_loss / len(loader)

In [40]:
def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0
    all_preds, all_labels = [], []
    
    with torch.no_grad():
        for images, metadata, labels in tqdm(loader, desc="Validation", leave=True):
            images = images.to(device)
            metadata = metadata.to(device)
            labels = labels.float().unsqueeze(1).to(device)
            
            outputs = model(images, metadata)  
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            
            probs = torch.sigmoid(outputs)  
            all_preds.extend(probs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            
    return running_loss / len(loader), all_preds, all_labels

In [41]:
def compute_metrics(preds, labels):
    preds_bin = (np.array(preds) > 0.5).astype(int)
    labels = np.array(labels)
    
    acc = accuracy_score(labels, preds_bin)
    prec = precision_score(labels, preds_bin)
    rec = recall_score(labels, preds_bin)
    f1 = f1_score(labels, preds_bin)
    auc = roc_auc_score(labels, preds)
    
    return {'accuracy': acc, 'precision': prec, 'recall': rec, 'f1': f1, 'roc_auc': auc}

# Modeling

### 1. Resnet18

In [42]:
class MultimodalModel(nn.Module):
    def __init__(self, num_metadata_features, pretrained=True):
        super().__init__()
        
        self.cnn = models.resnet18(pretrained=pretrained)
        self.cnn.fc = nn.Identity()
        img_features = 512  
        
        self.metadata_fc = nn.Sequential(
            nn.Linear(num_metadata_features, 32),
            nn.ReLU(),
            nn.BatchNorm1d(32)
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(img_features + 32, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 1)  
        )
    
    def forward(self, image, metadata=None):
        img_out = self.cnn(image)
        if metadata is not None:
            meta_out = self.metadata_fc(metadata)
            combined = torch.cat([img_out, meta_out], dim=1)
        else:
            combined = img_out
        return self.classifier(combined) 

In [43]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = MultimodalModel(num_metadata_features=len(train_dataset.metadata_cols)).to(device)

class_counts = metadata['binary_label'].value_counts()
neg_count, pos_count = class_counts[0], class_counts[1]
pos_weight = torch.tensor([neg_count / pos_count], dtype=torch.float32).to(device)

criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)



In [44]:
num_epochs = 10
best_val_f1 = 0.0  
best_val_loss = float('inf') 
patience = 5
epochs_no_improve = 0

for epoch in range(num_epochs):
    train_loss = train_one_epoch(model, train_loader, optimizer, criterion, device)
    val_loss, val_preds, val_labels = validate(model, val_loader, criterion, device)
    metrics = compute_metrics(val_preds, val_labels)

    print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")
    print(f"Metrics: {metrics}")

    if metrics['f1'] > best_val_f1:
        best_val_f1 = metrics['f1']
        torch.save(model.state_dict(), "best_multimodal_model.pth")
        print("Model saved! F1-score improved.")
        epochs_no_improve = 0 
    
    else:
        epochs_no_improve += 1
        print(f"F1-score did not improve for {epochs_no_improve} epochs.")

    if epochs_no_improve == patience:
        print(f"Early stopping triggered after {patience} epochs with no improvement.")
        break

Training:   2%|▏         | 9/472 [00:03<02:40,  2.88it/s][ WARN:0@411.252] global loadsave.cpp:268 findDecoder imread_('/kaggle/input/fitzpatrick17k-original/finalfitz17k/e69165b3455bb3a5a8b33a0f6fd8a1d3.jpg'): can't open/read file: check file path/integrity
Training:  32%|███▏      | 151/472 [00:42<01:43,  3.09it/s][ WARN:0@449.910] global loadsave.cpp:268 findDecoder imread_('/kaggle/input/fitzpatrick17k-original/finalfitz17k/bccab73c32aba48c90602de9bb458272.jpg'): can't open/read file: check file path/integrity
Training: 100%|██████████| 472/472 [02:07<00:00,  3.71it/s]
Validation: 100%|██████████| 118/118 [00:22<00:00,  5.14it/s]


Epoch 1/10 | Train Loss: 0.6570 | Val Loss: 0.6176
Metrics: {'accuracy': 0.7952317880794701, 'precision': 0.5153024911032028, 'recall': 0.8872549019607843, 'f1': 0.6519585772174696, 'roc_auc': 0.919701608253981}
Model saved! F1-score improved.


Training:   0%|          | 0/472 [00:00<?, ?it/s][ WARN:0@558.534] global loadsave.cpp:268 findDecoder imread_('/kaggle/input/fitzpatrick17k-original/finalfitz17k/2ea034fc482e9bd21a5dfa2506cc5d6b.jpg'): can't open/read file: check file path/integrity
Training:  15%|█▌        | 71/472 [00:20<01:32,  4.35it/s][ WARN:0@577.687] global loadsave.cpp:268 findDecoder imread_('/kaggle/input/fitzpatrick17k-original/finalfitz17k/e69165b3455bb3a5a8b33a0f6fd8a1d3.jpg'): can't open/read file: check file path/integrity
Training:  96%|█████████▌| 451/472 [01:59<00:04,  4.92it/s][ WARN:0@677.832] global loadsave.cpp:268 findDecoder imread_('/kaggle/input/fitzpatrick17k-original/finalfitz17k/2ea034fc482e9bd21a5dfa2506cc5d6b.jpg'): can't open/read file: check file path/integrity
Training: 100%|██████████| 472/472 [02:05<00:00,  3.77it/s]
Validation: 100%|██████████| 118/118 [00:22<00:00,  5.17it/s]


Epoch 2/10 | Train Loss: 0.4569 | Val Loss: 0.5118
Metrics: {'accuracy': 0.8770860927152317, 'precision': 0.6651031894934334, 'recall': 0.8688725490196079, 'f1': 0.7534537725823591, 'roc_auc': 0.9431072699441387}
Model saved! F1-score improved.


Training:  87%|████████▋ | 409/472 [01:49<00:11,  5.37it/s][ WARN:0@815.717] global loadsave.cpp:268 findDecoder imread_('/kaggle/input/fitzpatrick17k-original/finalfitz17k/bccab73c32aba48c90602de9bb458272.jpg'): can't open/read file: check file path/integrity
Training: 100%|██████████| 472/472 [02:05<00:00,  3.77it/s]
Validation: 100%|██████████| 118/118 [00:22<00:00,  5.14it/s]


Epoch 3/10 | Train Loss: 0.3614 | Val Loss: 0.5536
Metrics: {'accuracy': 0.8309933774834437, 'precision': 0.5689922480620155, 'recall': 0.8995098039215687, 'f1': 0.6970560303893638, 'roc_auc': 0.9438552372621911}
F1-score did not improve for 1 epochs.


Training:  81%|████████  | 383/472 [01:40<00:22,  4.02it/s][ WARN:0@954.941] global loadsave.cpp:268 findDecoder imread_('/kaggle/input/fitzpatrick17k-original/finalfitz17k/bccab73c32aba48c90602de9bb458272.jpg'): can't open/read file: check file path/integrity
Training: 100%|██████████| 472/472 [02:03<00:00,  3.83it/s]
Validation: 100%|██████████| 118/118 [00:22<00:00,  5.19it/s]


Epoch 4/10 | Train Loss: 0.2980 | Val Loss: 0.5521
Metrics: {'accuracy': 0.8437086092715231, 'precision': 0.5902555910543131, 'recall': 0.9056372549019608, 'f1': 0.7147001934235978, 'roc_auc': 0.9449763599255179}
F1-score did not improve for 2 epochs.


Training:  34%|███▎      | 159/472 [00:42<01:11,  4.37it/s][ WARN:0@1042.466] global loadsave.cpp:268 findDecoder imread_('/kaggle/input/fitzpatrick17k-original/finalfitz17k/e69165b3455bb3a5a8b33a0f6fd8a1d3.jpg'): can't open/read file: check file path/integrity
Training:  50%|█████     | 236/472 [01:01<00:43,  5.48it/s][ WARN:0@1061.917] global loadsave.cpp:268 findDecoder imread_('/kaggle/input/fitzpatrick17k-original/finalfitz17k/e69165b3455bb3a5a8b33a0f6fd8a1d3.jpg'): can't open/read file: check file path/integrity
Training:  76%|███████▌  | 358/472 [01:35<00:28,  4.01it/s][ WARN:0@1095.351] global loadsave.cpp:268 findDecoder imread_('/kaggle/input/fitzpatrick17k-original/finalfitz17k/2ea034fc482e9bd21a5dfa2506cc5d6b.jpg'): can't open/read file: check file path/integrity
Training: 100%|██████████| 472/472 [02:02<00:00,  3.84it/s]
Validation: 100%|██████████| 118/118 [00:22<00:00,  5.13it/s]


Epoch 5/10 | Train Loss: 0.2411 | Val Loss: 0.5570
Metrics: {'accuracy': 0.8845033112582782, 'precision': 0.6866404715127702, 'recall': 0.8566176470588235, 'f1': 0.7622682660850599, 'roc_auc': 0.9444938671649802}
Model saved! F1-score improved.


Training:  91%|█████████ | 429/472 [01:50<00:12,  3.57it/s][ WARN:0@1256.733] global loadsave.cpp:268 findDecoder imread_('/kaggle/input/fitzpatrick17k-original/finalfitz17k/bccab73c32aba48c90602de9bb458272.jpg'): can't open/read file: check file path/integrity
Training:  96%|█████████▋| 455/472 [01:57<00:02,  5.90it/s][ WARN:0@1263.383] global loadsave.cpp:268 findDecoder imread_('/kaggle/input/fitzpatrick17k-original/finalfitz17k/2ea034fc482e9bd21a5dfa2506cc5d6b.jpg'): can't open/read file: check file path/integrity
Training: 100%|██████████| 472/472 [02:01<00:00,  3.88it/s]
Validation: 100%|██████████| 118/118 [00:22<00:00,  5.16it/s]


Epoch 6/10 | Train Loss: 0.2028 | Val Loss: 0.6711
Metrics: {'accuracy': 0.8659602649006622, 'precision': 0.6388888888888888, 'recall': 0.8737745098039216, 'f1': 0.7380952380952381, 'roc_auc': 0.9416999648794969}
F1-score did not improve for 1 epochs.


Training:  63%|██████▎   | 297/472 [01:18<00:41,  4.24it/s][ WARN:0@1368.911] global loadsave.cpp:268 findDecoder imread_('/kaggle/input/fitzpatrick17k-original/finalfitz17k/e69165b3455bb3a5a8b33a0f6fd8a1d3.jpg'): can't open/read file: check file path/integrity
Training:  87%|████████▋ | 412/472 [01:47<00:13,  4.41it/s][ WARN:0@1398.028] global loadsave.cpp:268 findDecoder imread_('/kaggle/input/fitzpatrick17k-original/finalfitz17k/2ea034fc482e9bd21a5dfa2506cc5d6b.jpg'): can't open/read file: check file path/integrity
Training: 100%|██████████| 472/472 [02:02<00:00,  3.86it/s]
Validation: 100%|██████████| 118/118 [00:22<00:00,  5.15it/s]


Epoch 7/10 | Train Loss: 0.1977 | Val Loss: 0.6559
Metrics: {'accuracy': 0.898543046357616, 'precision': 0.7267015706806282, 'recall': 0.8504901960784313, 'f1': 0.7837380011293055, 'roc_auc': 0.946316778654686}
Model saved! F1-score improved.


Training:   1%|▏         | 7/472 [00:02<02:37,  2.95it/s][ WARN:0@1438.188] global loadsave.cpp:268 findDecoder imread_('/kaggle/input/fitzpatrick17k-original/finalfitz17k/bccab73c32aba48c90602de9bb458272.jpg'): can't open/read file: check file path/integrity
Training:  94%|█████████▍| 444/472 [01:55<00:10,  2.78it/s][ WARN:0@1551.018] global loadsave.cpp:268 findDecoder imread_('/kaggle/input/fitzpatrick17k-original/finalfitz17k/e69165b3455bb3a5a8b33a0f6fd8a1d3.jpg'): can't open/read file: check file path/integrity
Training: 100%|██████████| 472/472 [02:01<00:00,  3.88it/s]
Validation: 100%|██████████| 118/118 [00:22<00:00,  5.16it/s]


Epoch 8/10 | Train Loss: 0.1695 | Val Loss: 0.7129
Metrics: {'accuracy': 0.9149668874172185, 'precision': 0.8136882129277566, 'recall': 0.7867647058823529, 'f1': 0.7999999999999999, 'roc_auc': 0.9468760975157214}
Model saved! F1-score improved.


Training:  61%|██████    | 287/472 [01:12<00:37,  4.87it/s][ WARN:0@1653.288] global loadsave.cpp:268 findDecoder imread_('/kaggle/input/fitzpatrick17k-original/finalfitz17k/2ea034fc482e9bd21a5dfa2506cc5d6b.jpg'): can't open/read file: check file path/integrity
Training: 100%|██████████| 472/472 [02:01<00:00,  3.90it/s]
Validation: 100%|██████████| 118/118 [00:22<00:00,  5.14it/s]


Epoch 9/10 | Train Loss: 0.1563 | Val Loss: 0.6101
Metrics: {'accuracy': 0.909933774834437, 'precision': 0.7644444444444445, 'recall': 0.8431372549019608, 'f1': 0.8018648018648018, 'roc_auc': 0.9551412192115779}
Model saved! F1-score improved.


Training:   6%|▋         | 30/472 [00:09<01:59,  3.70it/s][ WARN:0@1733.770] global loadsave.cpp:268 findDecoder imread_('/kaggle/input/fitzpatrick17k-original/finalfitz17k/e69165b3455bb3a5a8b33a0f6fd8a1d3.jpg'): can't open/read file: check file path/integrity
Training:  18%|█▊        | 83/472 [00:22<01:12,  5.40it/s][ WARN:0@1747.576] global loadsave.cpp:268 findDecoder imread_('/kaggle/input/fitzpatrick17k-original/finalfitz17k/e69165b3455bb3a5a8b33a0f6fd8a1d3.jpg'): can't open/read file: check file path/integrity
Training:  53%|█████▎    | 252/472 [01:06<00:55,  3.93it/s][ WARN:0@1791.735] global loadsave.cpp:268 findDecoder imread_('/kaggle/input/fitzpatrick17k-original/finalfitz17k/bccab73c32aba48c90602de9bb458272.jpg'): can't open/read file: check file path/integrity
Training: 100%|██████████| 472/472 [02:03<00:00,  3.81it/s]
Validation: 100%|██████████| 118/118 [00:22<00:00,  5.16it/s]


Epoch 10/10 | Train Loss: 0.1429 | Val Loss: 0.6020
Metrics: {'accuracy': 0.9101986754966888, 'precision': 0.7647058823529411, 'recall': 0.8443627450980392, 'f1': 0.8025626092020967, 'roc_auc': 0.9567367585763608}
Model saved! F1-score improved.


In [45]:
model.load_state_dict(torch.load("best_multimodal_model.pth"))

<All keys matched successfully>