In [1]:
!pip install fastai





[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
from fastai.vision.all import *
from fastcore.parallel import *
import fastai
import pandas as pd
from pathlib import Path
import torch
from torch.utils.data import Dataset

In [3]:
!pip install timm





[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import timm
#model = timm.create_model("efficientformerv2_s2", pretrained=True)
#torch.save(model.state_dict(), "efficientformerv2_s2_weights.pth")

In [5]:
metadata_path =Path("C:/Users/Yashwanth/isic/train-metadata.csv")
 
hdf5_file=Path('C:/Users/Yashwanth/isic/train-image.hdf5')

In [6]:
df=pd.read_csv(metadata_path)

  df=pd.read_csv(metadata_path)


In [7]:
import os

for root, dirs, files in os.walk("/kaggle/input"):
    for file in files:
        if file.endswith(".csv"):
            print(os.path.join(root, file))

In [8]:

new_cat_columns = ['sex', 'anatom_site_general', 'tbp_lv_location', 'tbp_lv_location_simple']


In [9]:
# Drop specified columns
columns_to_drop = ['copyright_license', 'attribution', 'image_type', 'iddx_1', 'iddx_2', 'iddx_3', 'iddx_4',
                   'iddx_5', 'iddx_full', 'mel_mitotic_index', 'mel_thick_mm', 'tbp_tile_type', 
                   'tbp_lv_dnn_lesion_confidence', 'lesion_id']

# Define categorical columns manually (based on what you likely intended)
cat_names = ['sex', 'anatom_site_general', 'tbp_lv_location', 'tbp_lv_location_simple']

# Manually set new_cat_columns (fallback because CSV is missing)
new_cat_columns = cat_names  # You can edit or expand this list later if needed

# Define continuous columns (all columns not in categorical or dropped list)
cont_names = [x for x in df.columns if x not in (cat_names + ['target', 'isic_id', 'patient_id'] + columns_to_drop)]

# Define the target and image ID columns
y_col = 'target'
image_col = 'isic_id'


# Create a custom dataset that includes both image and tabular data:

In [10]:
# Required imports for custom dataset class
from torch.utils.data import Dataset
import h5py
import numpy as np
import torch

# Define the ImageTabDataset class to combine image + tabular data
class ImageTabDataset(Dataset):
    def __init__(self, df, hdf5_file, cat_cols, cont_cols, target_col, img_key='image_id'):
        self.df = df.reset_index(drop=True)
        self.hdf5 = h5py.File(hdf5_file, 'r')
        self.cat_cols = cat_cols
        self.cont_cols = cont_cols
        self.target_col = target_col
        self.img_key = img_key

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        
        # Load image from HDF5 using the image ID
        img_id = str(row[self.img_key])
        img = self.hdf5[img_id][()]  # shape: (C, H, W) expected
        img = torch.tensor(img, dtype=torch.float32)

        # Extract categorical and continuous variables
        cat_values = torch.tensor(row[self.cat_cols].values, dtype=torch.long)
        cont_values = torch.tensor(row[self.cont_cols].values, dtype=torch.float32)

        # Extract the label
        label = torch.tensor(row[self.target_col], dtype=torch.long)

        return img, cat_values, cont_values, label


In [11]:
# Importing required libraries: PyTorch, FastAI, Sklearn, and others
import h5py
import numpy as np
import timm
import torch
import torch.nn as nn
import torchvision.models as models
from fastai.tabular.all import TabularPandas, get_emb_sz, TabularModel, Learner, CrossEntropyLossFlat, accuracy
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
import torch.optim as optim

# Set up the device to use GPU if available; otherwise, fallback to CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# --- Stratified Train-Validation Split ---

# Custom function to split the dataset into training and validation sets 
# while preserving the original class distribution
def stratified_splitter(df, valid_pct=0.25, seed=42):
    train_indices, valid_indices = train_test_split(
        df.index,
        test_size=valid_pct,
        stratify=df[y_col],  # Ensures class distribution remains balanced
        random_state=seed
    )
    return train_indices, valid_indices

# Applying the stratified split function on the dataset
train_indices, valid_indices = stratified_splitter(df)

# Extract the actual training and validation DataFrames using the indices
train_df = df.iloc[train_indices]
valid_df = df.iloc[valid_indices]

# --- Dataset and DataLoader Preparation ---

# Wrap tabular + image data into custom PyTorch datasets
train_dataset = ImageTabDataset(train_df, hdf5_file, new_cat_columns, cont_names, y_col)
valid_dataset = ImageTabDataset(valid_df, hdf5_file, new_cat_columns, cont_names, y_col)

# Load data into PyTorch's DataLoader for efficient batch processing
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4)
valid_loader = DataLoader(valid_dataset, batch_size=128, shuffle=False, num_workers=4)

# Combine the two DataLoaders into one object for use in FastAI
dls = DataLoaders(train_loader, valid_loader)

# --- Tabular Embedding Sizes ---

# This helper function calculates the embedding sizes required 
# for each categorical variable based on its number of unique values
def get_emb_szs(df, new_cat_columns):
    return [(df[col].nunique() + 1, min(50, (df[col].nunique() + 1) // 2)) for col in new_cat_columns]

# Calculate the required embedding sizes for the categorical columns
emb_szs = get_emb_szs(train_df, new_cat_columns)

# Count the number of continuous features in the dataset
n_cont = len(cont_names)

# Determine the number of target classes in the dataset
out_sz = len(train_df[y_col].unique())

# --- Custom Neural Network Models ---

# Model 1: Combines a frozen ResNet50 CNN for image features 
# and a tabular neural network for structured data
class ImageTabularModel_1(nn.Module):
    def __init__(self, emb_szs, n_cont, out_sz, layers, ps=0.5):
        super().__init__()
        # Load a pretrained ResNet50 model
        self.cnn = models.resnet50(weights=None)
        self.cnn.load_state_dict(torch.load('resnet50-11ad3fa6.pth'))

        
        # Freeze all layers except the final one
        for param in self.cnn.parameters():
            param.requires_grad = False
        num_ftrs = self.cnn.fc.in_features
        self.cnn.fc = nn.Linear(num_ftrs, out_sz)
        for param in self.cnn.fc.parameters():
            param.requires_grad = True

        # Define the tabular (structured) model
        self.tab_net = TabularModel(emb_szs, n_cont, out_sz, layers, ps)
        
        # Combine both models’ outputs before final prediction
        self.head = nn.Linear(out_sz * 2, out_sz)

    def forward(self, x_img, x_cat, x_cont):
        # Convert categorical data to long format if needed
        if x_cat.dtype != torch.long:
            x_cat = x_cat.long()

        # Get features from both the CNN and tabular network
        img_out = self.cnn(x_img)
        tab_out = self.tab_net(x_cat, x_cont)
        
        # Concatenate both outputs and pass through final head
        combined = torch.cat([img_out, tab_out], dim=1)
        return self.head(combined)

# Model 2: Uses EfficientFormerV2 instead of ResNet50 for the image component
class ImageTabularModel_2(nn.Module):
    def __init__(self, emb_szs, n_cont, out_sz, layers, ps=0.5):
        super().__init__()
        self.cnn = timm.create_model("efficientformerv2_s2", pretrained=True)
        self.tab_net = TabularModel(emb_szs, n_cont, out_sz, layers, ps)
        self.head = nn.Linear(1002, out_sz)  # adjust if needed

    def forward(self, x_img, x_cat, x_cont):
        if x_cat.dtype != torch.long:
            x_cat = x_cat.long()
        img_out = self.cnn(x_img)
        tab_out = self.tab_net(x_cat, x_cont)
        combined = torch.cat([img_out, tab_out], dim=1)
        return self.head(combined)

# --- Initialize Models and Learners ---

# Initialize both hybrid models with selected layer sizes
model_1 = ImageTabularModel_1(emb_szs, n_cont, out_sz, layers=[512, 256, 128], ps=0.5).to(device)
model_2 = ImageTabularModel_2(emb_szs, n_cont, out_sz, layers=[512, 256, 128], ps=0.5).to(device)

# Enable multi-GPU training if available
model_1 = torch.nn.DataParallel(model_1)
model_2 = torch.nn.DataParallel(model_2)

# Define the loss function for classification
loss_func = CrossEntropyLossFlat()

# Build FastAI Learner for training and validation with model_1
learn_1 = Learner(
    dls, model_1, loss_func=loss_func,
    opt_func=partial(Adam, lr=0.001),
    metrics=accuracy,
    cbs=SaveModelCallback(monitor='valid_loss'),
    wd=1e-3
)

# Same learner setup for model_2
learn_2 = Learner(
    dls, model_2, loss_func=loss_func,
    opt_func=partial(Adam, lr=0.001),
    metrics=accuracy,
    cbs=SaveModelCallback(monitor='valid_loss'),
    wd=1e-3
)


This section implements a hybrid deep learning model combining image features (via CNNs like ResNet50 or EfficientFormerV2) with structured tabular features (via FastAI's TabularModel). It supports multi-modal learning using PyTorch and FastAI, with model selection and training optimised using learners and callbacks. Useful in medical imaging, finance, or e-commerce where both image and metadata are available.

In [12]:
# make cuDNN pick the fastest convolution algorithms for your fixed image size
import torch
torch.backends.cudnn.benchmark = True


In [13]:
# train in float16 where safe (cuts compute + memory, usually 1.3–2× faster)
learn_1 = learn_1.to_fp16()
learn_2 = learn_2.to_fp16()


In [14]:
# freeze ResNet50 backbone in learn_1 (it’s mostly frozen already, but this is robust)
m1 = learn_1.model.module if hasattr(learn_1.model, "module") else learn_1.model
if hasattr(m1, "cnn"):
    for p in m1.cnn.parameters(): p.requires_grad = False
    # keep the final fc + head trainable
    if hasattr(m1.cnn, "fc"):
        for p in m1.cnn.fc.parameters(): p.requires_grad = True

# freeze EfficientFormerV2 backbone in learn_2 (this one usually trains all layers by default)
m2 = learn_2.model.module if hasattr(learn_2.model, "module") else learn_2.model
if hasattr(m2, "cnn"):
    for p in m2.cnn.parameters(): p.requires_grad = False    # huge speedup
    # keep your tabular + head layers trainable
    if hasattr(m2, "head"):
        for p in m2.head.parameters(): p.requires_grad = True


In [15]:
for p in m2.cnn.parameters(): p.requires_grad = True  # then run a short fine_tune


In [16]:
from torch.utils.data import DataLoader

# larger batches = fewer steps; adjust if GPU RAM is tight
_fast_bs = 256  # try 256; if OOM, drop to 192/128

# on Windows with HDF5, 0 or 2 workers are usually best; pin_memory speeds host->GPU copies
train_loader = DataLoader(train_dataset, batch_size=_fast_bs, shuffle=True,
                          num_workers=2, pin_memory=True, persistent_workers=True, prefetch_factor=2)

# validation can be even bigger since it doesn't backprop
valid_loader = DataLoader(valid_dataset, batch_size=_fast_bs*2, shuffle=False,
                          num_workers=2, pin_memory=True, persistent_workers=True, prefetch_factor=2)

# rebuild the fastai DataLoaders with the tuned PyTorch loaders
dls = DataLoaders(train_loader, valid_loader)
learn_1.dls = dls
learn_2.dls = dls


In [None]:
# Define a slightly smaller LR (safer when using bigger batch & fp16)
custom_lr = 0.003

# QUICK PASS: short, fast runs to get results sooner
learn_1.fit_one_cycle(1, lr_max=custom_lr)      # keep
learn_1.fine_tune(2, base_lr=custom_lr/2)       # was 5 → try 2 for speed

learn_2.fit_one_cycle(1, lr_max=custom_lr)      # was 3 → try 1 first
learn_2.fine_tune(2, base_lr=custom_lr/2)       # was 5 → try 2


  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()
  self.autocast,self.learn.scaler,self.scales = autocast(dtype=dtype),GradScaler(**self.kwargs),L()


epoch,train_loss,valid_loss,accuracy,time




In [None]:
#Define the lr
custom_lr=0.005

#Train and fine-tune the model
learn_1.fit_one_cycle(1, lr_max=custom_lr)
learn_1.fine_tune(5)

learn_2.fit_one_cycle(3, lr_max=custom_lr)
learn_2.fine_tune(5)


epoch,train_loss,valid_loss,accuracy,time


# Test part

In [None]:
def process_data(df, cat_names):
    
    
    # Drop columns 
    df = df.drop(columns=[col for col in columns_to_drop if col in df.columns])
    
    # Add number of pictures for each patient
    df['numb_pic'] = df.groupby('patient_id')['patient_id'].transform('count')
    
    # Fill missing values with the mode
    if 'age_approx' in df.columns:
        mode_age = df['age_approx'].mode()[0]
        df['age_approx'] = df['age_approx'].fillna(mode_age)
    
    if 'sex' in df.columns:
        mode_sex = df['sex'].mode()[0]
        df['sex'] = df['sex'].fillna(mode_sex)
    
    # Convert categorical columns to dummies
    df = pd.get_dummies(df, columns=cat_names, prefix=cat_names)
    
    # Get new categorical column names
    new_cat_columns = [col for col in df.columns if any(col.startswith(name + '_') for name in cat_names)]
    
    # Ensure 'isic_id' in df has the correct file extension
    # if 'isic_id' in df.columns:
    #     df['isic_id'] = df['isic_id'].apply(lambda x: x.strip() + '.jpg')
    
    return df, new_cat_columns