In [1]:
import os
import gc
#import cv2
import math
import copy
import time
import random
import glob

# Plotting
from matplotlib import pyplot as plt
from matplotlib import image as mpimg
import seaborn as sns
from PIL import Image

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp
import torchvision
from transformers import AutoImageProcessor, ResNetForImageClassification
from datasets import load_dataset
from torcheval.metrics.functional import binary_auroc
from torch.optim import lr_scheduler

# Sklearn
from sklearn.model_selection import KFold, GroupKFold
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import roc_curve, auc, roc_auc_score

import albumentations as A
from albumentations.pytorch import ToTensorV2

In [None]:
ls | grep "v3"

v3_AUROC0.4903_Loss0.3193_epoch1_lossauroc.pth                       
v3_AUROC0.4968_Loss0.2766_epoch2_lossauroc.pth                                          
v3_AUROC0.4988_Loss0.2975_epoch1_lossauroc.pth                                     
v3_AUROC0.5111_Loss0.2549_epoch1_lossauroc.pth                          

In [3]:
ROOT_DIR = "/Users/Yashwanth/isic"
TRAIN_DIR = f'{ROOT_DIR}/train-image/image'

CONFIG = {
    "seed": 42,
    "n_samples_train":5000,
    "n_samples_val":5000, 
    "epochs": 50,
    "img_size": 384,
    "model_name": "tf_efficientnet_b0_ns",
    "checkpoint_path" : "/kaggle/input/tf-efficientnet/pytorch/tf-efficientnet-b0/1/tf_efficientnet_b0_aa-827b6e33.pth",
    "train_batch_size": 400,
    "valid_batch_size": 400,
    "learning_rate": 1e-4,
    "scheduler": 'CosineAnnealingLR',
    "min_lr": 1e-6,
    "T_max": 500,
    "weight_decay": 1e-6,
    "fold" : 4,
    "n_fold": 5,
    "n_accumulate": 1,
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
}

BEST_WEIGHT = ROOT_DIR + '/' + 'v3_AUROC0.4903_Loss0.3193_epoch1_lossauroc.pth'

In [4]:
seed=CONFIG['seed']
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

# Data

In [5]:
train_df = pd.read_csv(ROOT_DIR+"/train-metadata.csv")
test_df = pd.read_csv(ROOT_DIR+"/test-metadata.csv")

all_df = pd.concat([train_df, test_df]).reset_index(drop=True)
display(train_df.head())
display(test_df.head())

  train_df = pd.read_csv(ROOT_DIR+"/train-metadata.csv")


Unnamed: 0,isic_id,target,patient_id,age_approx,sex,anatom_site_general,clin_size_long_diam_mm,image_type,tbp_tile_type,tbp_lv_A,...,lesion_id,iddx_full,iddx_1,iddx_2,iddx_3,iddx_4,iddx_5,mel_mitotic_index,mel_thick_mm,tbp_lv_dnn_lesion_confidence
0,ISIC_0015670,0,IP_1235828,60.0,male,lower extremity,3.04,TBP tile: close-up,3D: white,20.244422,...,,Benign,Benign,,,,,,,97.517282
1,ISIC_0015845,0,IP_8170065,60.0,male,head/neck,1.1,TBP tile: close-up,3D: white,31.71257,...,IL_6727506,Benign,Benign,,,,,,,3.141455
2,ISIC_0015864,0,IP_6724798,60.0,male,posterior torso,3.4,TBP tile: close-up,3D: XP,22.57583,...,,Benign,Benign,,,,,,,99.80404
3,ISIC_0015902,0,IP_4111386,65.0,male,anterior torso,3.22,TBP tile: close-up,3D: XP,14.242329,...,,Benign,Benign,,,,,,,99.989998
4,ISIC_0024200,0,IP_8313778,55.0,male,anterior torso,2.73,TBP tile: close-up,3D: white,24.72552,...,,Benign,Benign,,,,,,,70.44251


Unnamed: 0,isic_id,patient_id,age_approx,sex,anatom_site_general,clin_size_long_diam_mm,image_type,tbp_tile_type,tbp_lv_A,tbp_lv_Aext,...,tbp_lv_radial_color_std_max,tbp_lv_stdL,tbp_lv_stdLExt,tbp_lv_symm_2axis,tbp_lv_symm_2axis_angle,tbp_lv_x,tbp_lv_y,tbp_lv_z,attribution,copyright_license
0,ISIC_0015657,IP_6074337,45.0,male,posterior torso,2.7,TBP tile: close-up,3D: XP,22.80433,20.00727,...,0.304827,1.281532,2.299935,0.479339,20,-155.0651,1511.222,113.9801,Memorial Sloan Kettering Cancer Center,CC-BY
1,ISIC_0015729,IP_1664139,35.0,female,lower extremity,2.52,TBP tile: close-up,3D: XP,16.64867,9.657964,...,0.0,1.27194,2.011223,0.42623,25,-112.36924,629.535889,-15.019287,"Frazer Institute, The University of Queensland...",CC-BY
2,ISIC_0015740,IP_7142616,65.0,male,posterior torso,3.16,TBP tile: close-up,3D: XP,24.25384,19.93738,...,0.230742,1.080308,2.705857,0.366071,110,-84.29282,1303.978,-28.57605,FNQH Cairns,CC-BY


In [6]:
train_images = sorted(glob.glob(f"{TRAIN_DIR}/*.jpg"))

In [7]:
## Images

def get_train_file_path(image_id):
    return f"{TRAIN_DIR}/{image_id}.jpg"

def show_im(image_id):
    image = mpimg.imread(image_id)
    plt.imshow(image)
    plt.show()

In [8]:
for i in range(10):
    image = mpimg.imread(train_images[i])
    print(image.shape)
    

(139, 139, 3)
(127, 127, 3)
(145, 145, 3)
(109, 109, 3)
(125, 125, 3)
(119, 119, 3)
(117, 117, 3)
(157, 157, 3)
(111, 111, 3)
(127, 127, 3)


In [9]:
# Start from the training dataframe
df = train_df.copy()  # df has no 'image_path' column yet

# Build image paths from ISIC IDs (this creates the column)
df['image_path'] = df['isic_id'].apply(get_train_file_path)

# Now it's safe to standardize slashes
df['image_path'] = df['image_path'].str.replace('\\', '/', regex=False)
train_images = [p.replace('\\', '/') for p in train_images]

# Keep only rows whose paths are in train_images
df = df[df["image_path"].isin(train_images)].reset_index(drop=True)

# Print dataset stats
print("# of images , # of positive cases, # of negative cases, # of patients")
print(df.shape, df.target.sum(), (df["target"] == 0).sum(), df["patient_id"].nunique())

# Split by class
df_positive = df[df["target"] == 1].reset_index(drop=True)
df_negative = df[df["target"] == 0].reset_index(drop=True)


# of images , # of positive cases, # of negative cases, # of patients
(401059, 56) 393 400666 1042


## Start of Deep Learning: Pytorch

In [12]:
## CHANGE THIS, GOT THIS FROM COMMUNITY MODELS

data_transforms = {
    "train": A.Compose([
        # 1) Resize all images to the model's expected input size.
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        
        # 2) Basic rotation that respects 90° steps (cheap + safe for medical images).
        A.RandomRotate90(p=0.5),
        
        # 3) Replace A.Flip with an explicit, version-safe choice:
        A.OneOf([
            A.HorizontalFlip(p=1.0),  # mirror left/right
            A.VerticalFlip(p=1.0)     # mirror up/down
        ], p=0.5),                    # overall 50% chance to apply a flip
        
        # 4) Slight downscale+upscale to simulate lower-res captures.
        A.Downscale(p=0.25),
        
        # 5) Small geometric jitter: shift/scale/rotate.
        A.ShiftScaleRotate(
            shift_limit=0.1, 
            scale_limit=0.15, 
            rotate_limit=60, 
            p=0.5
        ),
        
        # 6) Color tweaks (hue/saturation/value) to handle lighting/camera variations.
        A.HueSaturationValue(
            hue_shift_limit=0.2, 
            sat_shift_limit=0.2, 
            val_shift_limit=0.2, 
            p=0.5
        ),
        
        # 7) Contrast & brightness jitter for robustness.
        A.RandomBrightnessContrast(
            brightness_limit=(-0.1, 0.1), 
            contrast_limit=(-0.1, 0.1), 
            p=0.5
        ),
        
        # 8) Normalize to ImageNet stats (good default for ResNet/EffNet backbones).
        A.Normalize(
            mean=[0.485, 0.456, 0.406], 
            std=[0.229, 0.224, 0.225], 
            max_pixel_value=255.0, 
            p=1.0
        ),
        
        # 9) Convert to PyTorch tensor at the very end.
        ToTensorV2()
    ], p=1.0),

    "validation": A.Compose([
        # Validation should be deterministic: resize + normalize only.
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.Normalize(
            mean=[0.485, 0.456, 0.406], 
            std=[0.229, 0.224, 0.225], 
            max_pixel_value=255.0, 
            p=1.0
        ),
        ToTensorV2()
    ], p=1.0),

    "train_testing": A.Compose([
        # This looks like a light/alternate train-time pipeline. Keep it consistent.
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        
        # Optional: If you want to disable 90° rotations here, keep it commented.
        # A.RandomRotate90(p=0.5),
        
        # Replace A.Flip as above:
        A.OneOf([
            A.HorizontalFlip(p=1.0),
            A.VerticalFlip(p=1.0)
        ], p=0.5),
        
        A.Downscale(p=0.25),
        A.ShiftScaleRotate(
            shift_limit=0.1, 
            scale_limit=0.15, 
            rotate_limit=60, 
            p=0.5
        ),
        
        # Redundant resize is okay but not needed if size didn’t change; you can remove it.
        # Keeping it here to match your original structure.
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        
        A.HueSaturationValue(
            hue_shift_limit=0.2, 
            sat_shift_limit=0.2, 
            val_shift_limit=0.2, 
            p=0.5
        ),
        A.RandomBrightnessContrast(
            brightness_limit=(-0.1, 0.1), 
            contrast_limit=(-0.1, 0.1), 
            p=0.5
        ),
        ToTensorV2()
    ], p=1.0)
}

  original_init(self, **validated_kwargs)


In [13]:
class ISICDataset(Dataset):
    def __init__(self, df, phase="train", transforms=None):
        # Select number of samples based on training/validation phase
        if phase == 'train':
            offset = CONFIG["n_samples_val"]
            n_samples = CONFIG["n_samples_train"]
        elif phase == 'train_testing':
            offset = CONFIG["n_samples_val"]
            n_samples = CONFIG["n_samples_train"]
        elif phase == 'validation':
            offset = 0
            n_samples = CONFIG["n_samples_val"]

        # Split positives and negatives
        self.df_positive = df[df["target"] == 1].reset_index(drop=True)
        self.df_negative = df[df["target"] == 0].reset_index(drop=True)

        # Shuffle and take required negatives
        self.df_negative = self.df_negative.sample(frac=1, random_state=42).reset_index(drop=True)
        self.df_negative = self.df_negative[offset:offset + n_samples // 2]

        self.transforms = transforms
        self.n_samples = n_samples

        # Augment positives to balance dataset
        self.positive_samples = []
        for idx in range(len(self.df_positive)):
            img_path = self.df_positive['image_path'].iloc[idx]
            img = Image.open(img_path).convert("RGB")
            img_np = np.array(img).copy()
            for _ in range(n_samples // (2 * len(self.df_positive))):
                transformed_img = self.transforms(image=img_np.copy())["image"]
                self.positive_samples.append((transformed_img, 1))

        # Prepare negatives
        self.negative_samples = []
        for idx in range(len(self.df_negative)):
            img_path = self.df_negative['image_path'].iloc[idx]
            img = Image.open(img_path).convert("RGB")
            img_np = np.array(img).copy()
            transformed_img = self.transforms(image=img_np)["image"]
            self.negative_samples.append((transformed_img, 0))

    def __len__(self):
        return self.n_samples

    def __getitem__(self, index):
        # Randomly choose positive or negative
        if random.random() < 0.5:
            idx = index % len(self.positive_samples)
            img, target = self.positive_samples[idx]
        else:
            idx = index % len(self.negative_samples)
            img, target = self.negative_samples[idx]
        return {'image': img, 'target': target}


This custom dataset handles the class imbalance problem in skin cancer images. It separates positives (cancer) and negatives (non-cancer), shuffles the negatives, and then balances the dataset by generating multiple augmented versions of positive images. Each batch is built by randomly picking from positive and negative samples, ensuring a roughly 50/50 split. This way, the model learns from both classes more evenly, which helps improve performance.

In [14]:
df

Unnamed: 0,isic_id,target,patient_id,age_approx,sex,anatom_site_general,clin_size_long_diam_mm,image_type,tbp_tile_type,tbp_lv_A,...,iddx_full,iddx_1,iddx_2,iddx_3,iddx_4,iddx_5,mel_mitotic_index,mel_thick_mm,tbp_lv_dnn_lesion_confidence,image_path
0,ISIC_0015670,0,IP_1235828,60.0,male,lower extremity,3.04,TBP tile: close-up,3D: white,20.244422,...,Benign,Benign,,,,,,,97.517282,/Users/Yashwanth/isic/train-image/image/ISIC_0...
1,ISIC_0015845,0,IP_8170065,60.0,male,head/neck,1.10,TBP tile: close-up,3D: white,31.712570,...,Benign,Benign,,,,,,,3.141455,/Users/Yashwanth/isic/train-image/image/ISIC_0...
2,ISIC_0015864,0,IP_6724798,60.0,male,posterior torso,3.40,TBP tile: close-up,3D: XP,22.575830,...,Benign,Benign,,,,,,,99.804040,/Users/Yashwanth/isic/train-image/image/ISIC_0...
3,ISIC_0015902,0,IP_4111386,65.0,male,anterior torso,3.22,TBP tile: close-up,3D: XP,14.242329,...,Benign,Benign,,,,,,,99.989998,/Users/Yashwanth/isic/train-image/image/ISIC_0...
4,ISIC_0024200,0,IP_8313778,55.0,male,anterior torso,2.73,TBP tile: close-up,3D: white,24.725520,...,Benign,Benign,,,,,,,70.442510,/Users/Yashwanth/isic/train-image/image/ISIC_0...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
401054,ISIC_9999937,0,IP_1140263,70.0,male,anterior torso,6.80,TBP tile: close-up,3D: XP,22.574335,...,Benign,Benign,,,,,,,99.999988,/Users/Yashwanth/isic/train-image/image/ISIC_9...
401055,ISIC_9999951,0,IP_5678181,60.0,male,posterior torso,3.11,TBP tile: close-up,3D: white,19.977640,...,Benign,Benign,,,,,,,99.999820,/Users/Yashwanth/isic/train-image/image/ISIC_9...
401056,ISIC_9999960,0,IP_0076153,65.0,female,anterior torso,2.05,TBP tile: close-up,3D: XP,17.332567,...,Benign,Benign,,,,,,,99.999416,/Users/Yashwanth/isic/train-image/image/ISIC_9...
401057,ISIC_9999964,0,IP_5231513,30.0,female,anterior torso,2.80,TBP tile: close-up,3D: XP,22.288570,...,Benign,Benign,,,,,,,100.000000,/Users/Yashwanth/isic/train-image/image/ISIC_9...


In [15]:
# Categorical columns we want to encode
disc_cols = ['patient_id', 'age_approx', 'sex', 
             'anatom_site_general', 'tbp_tile_type',  
             'tbp_lv_location', 'tbp_lv_location_simple']

# OrdinalEncoder: converts categories → integer codes
category_encoder = OrdinalEncoder(
    categories='auto',
    dtype=int,
    handle_unknown='use_encoded_value',  # unseen categories get special code
    unknown_value=-2,                     # code for unknown values
    encoded_missing_value=-1,             # code for NaN/missing values
)

# Fit encoder and transform the categorical columns
X_cat = category_encoder.fit_transform(df[disc_cols])

# Replace original columns with encoded values
for c, cat_col in enumerate(disc_cols):
    df[cat_col] = X_cat[:, c]


This block encodes categorical features into numeric form so they can be used in machine learning models. The OrdinalEncoder assigns each unique category an integer, while unknown values get -2 and missing values get -1. Finally, the original categorical columns are replaced with their encoded versions inside the dataframe.

In [16]:
df

Unnamed: 0,isic_id,target,patient_id,age_approx,sex,anatom_site_general,clin_size_long_diam_mm,image_type,tbp_tile_type,tbp_lv_A,...,iddx_full,iddx_1,iddx_2,iddx_3,iddx_4,iddx_5,mel_mitotic_index,mel_thick_mm,tbp_lv_dnn_lesion_confidence,image_path
0,ISIC_0015670,0,120,10,1,2,3.04,TBP tile: close-up,1,20.244422,...,Benign,Benign,,,,,,,97.517282,/Users/Yashwanth/isic/train-image/image/ISIC_0...
1,ISIC_0015845,0,843,10,1,1,1.10,TBP tile: close-up,1,31.712570,...,Benign,Benign,,,,,,,3.141455,/Users/Yashwanth/isic/train-image/image/ISIC_0...
2,ISIC_0015864,0,676,10,1,3,3.40,TBP tile: close-up,0,22.575830,...,Benign,Benign,,,,,,,99.804040,/Users/Yashwanth/isic/train-image/image/ISIC_0...
3,ISIC_0015902,0,397,11,1,0,3.22,TBP tile: close-up,0,14.242329,...,Benign,Benign,,,,,,,99.989998,/Users/Yashwanth/isic/train-image/image/ISIC_0...
4,ISIC_0024200,0,863,9,1,0,2.73,TBP tile: close-up,1,24.725520,...,Benign,Benign,,,,,,,70.442510,/Users/Yashwanth/isic/train-image/image/ISIC_0...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
401054,ISIC_9999937,0,107,12,1,0,6.80,TBP tile: close-up,0,22.574335,...,Benign,Benign,,,,,,,99.999988,/Users/Yashwanth/isic/train-image/image/ISIC_9...
401055,ISIC_9999951,0,563,10,1,3,3.11,TBP tile: close-up,1,19.977640,...,Benign,Benign,,,,,,,99.999820,/Users/Yashwanth/isic/train-image/image/ISIC_9...
401056,ISIC_9999960,0,9,11,0,0,2.05,TBP tile: close-up,0,17.332567,...,Benign,Benign,,,,,,,99.999416,/Users/Yashwanth/isic/train-image/image/ISIC_9...
401057,ISIC_9999964,0,515,4,0,0,2.80,TBP tile: close-up,0,22.288570,...,Benign,Benign,,,,,,,100.000000,/Users/Yashwanth/isic/train-image/image/ISIC_9...


In [17]:
# Continuous (numerical) metadata features
cont_cols = [
    'clin_size_long_diam_mm', 'tbp_lv_A', 'tbp_lv_Aext', 'tbp_lv_B', 'tbp_lv_Bext',
    'tbp_lv_C', 'tbp_lv_Cext', 'tbp_lv_H', 'tbp_lv_Hext', 'tbp_lv_L', 'tbp_lv_Lext',
    'tbp_lv_areaMM2', 'tbp_lv_area_perim_ratio', 'tbp_lv_color_std_mean',
    'tbp_lv_deltaA', 'tbp_lv_deltaB', 'tbp_lv_deltaL', 'tbp_lv_deltaLB',
    'tbp_lv_deltaLBnorm', 'tbp_lv_eccentricity', 'tbp_lv_minorAxisMM',
    'tbp_lv_nevi_confidence', 'tbp_lv_norm_border', 'tbp_lv_norm_color',
    'tbp_lv_perimeterMM', 'tbp_lv_radial_color_std_max', 'tbp_lv_stdL',
    'tbp_lv_stdLExt', 'tbp_lv_symm_2axis', 'tbp_lv_symm_2axis_angle'
]

# Categorical/discrete metadata features
disc_cols = [
    'patient_id', 'age_approx', 'sex', 'anatom_site_general',
    'tbp_tile_type', 'tbp_lv_location', 'tbp_lv_location_simple'
]

# Combine into one list
meta_cols = cont_cols + disc_cols


class ISIC_MetaData_Image_Dataset(Dataset):
    def __init__(self, df, phase="train", cont_cols=cont_cols, disc_cols=disc_cols, transforms=None):
        # Decide sample size depending on phase
        if phase == 'train':
            offset = CONFIG["n_samples_val"]
            n_samples = CONFIG["n_samples_train"]
        elif phase == 'train_testing':
            offset = CONFIG["n_samples_val"]
            n_samples = CONFIG["n_samples_train"]
        elif phase == 'validation':
            offset = 0
            n_samples = CONFIG["n_samples_val"]

        # Split into positives and negatives
        self.df_positive = df[df["target"] == 1].reset_index(drop=True)
        self.df_negative = df[df["target"] == 0].reset_index(drop=True)

        # Shuffle negatives and take half of total samples
        self.df_negative = self.df_negative.sample(frac=1, random_state=42).reset_index(drop=True)
        self.df_negative = self.df_negative[offset:offset + n_samples // 2]

        self.transforms = transforms
        self.n_samples = n_samples

        # Build positive samples (image + metadata + label)
        self.positive_samples = []
        for idx in range(len(self.df_positive)):
            img_path = self.df_positive['image_path'].iloc[idx]
            img = Image.open(img_path).convert("RGB")
            img_np = np.array(img).copy()
            meta_data = torch.tensor(self.df_positive[meta_cols].iloc[idx].values)

            for _ in range(n_samples // (2 * len(self.df_positive))):
                transformed_img = self.transforms(image=img_np.copy())["image"]
                self.positive_samples.append((transformed_img, meta_data, 1))

        # Build negative samples (image + metadata + label)
        self.negative_samples = []
        for idx in range(len(self.df_negative)):
            img_path = self.df_negative['image_path'].iloc[idx]
            img = Image.open(img_path).convert("RGB")
            img_np = np.array(img).copy()
            meta_data = torch.tensor(self.df_negative[meta_cols].iloc[idx].values)

            transformed_img = self.transforms(image=img_np)["image"]
            self.negative_samples.append((transformed_img, meta_data, 0))

    def __len__(self):
        return self.n_samples

    def __getitem__(self, index):
        # Randomly choose between positive and negative
        if random.random() < 0.5:
            idx = index % len(self.positive_samples)
            img, meta_vals, target = self.positive_samples[idx]
        else:
            idx = index % len(self.negative_samples)
            img, meta_vals, target = self.negative_samples[idx]

        return {
            'image': img,
            'meta_vals': meta_vals,
            'target': target
        }


This dataset class combines image data with metadata features (continuous + categorical). For each sample, it loads the image, applies transformations, and also extracts patient-level/tabular metadata as a tensor. The dataset balances positives and negatives by augmenting positive samples and sampling negatives evenly. The __getitem__ returns a dictionary containing the transformed image, the metadata values, and the target label, making it suitable for models that process both images and tabular inputs together.

In [18]:
#train_dataset = ISICDataset(df, phase = "train", transforms=data_transforms["train"])
#valid_dataset = ISICDataset(df, phase = "validation", transforms=data_transforms["validation"])
#train_dataset = ISICDataset(df, phase = "train", transforms=data_transforms["train_testing"])
#valid_dataset = ISICDataset(df, phase = "validation", transforms=data_transforms["train_testing"])

train_dataset = ISIC_MetaData_Image_Dataset(df, phase = "train", transforms=data_transforms["train_testing"])
valid_dataset = ISIC_MetaData_Image_Dataset(df, phase = "validation", transforms=data_transforms["train_testing"])



train_loader = DataLoader(train_dataset, batch_size=CONFIG['train_batch_size'], 
                          num_workers=3, shuffle=True, pin_memory=True, drop_last=True)
valid_loader = DataLoader(valid_dataset, batch_size=CONFIG['valid_batch_size'], 
                          num_workers=3, shuffle=False, pin_memory=True)

Creating PyTorch DataLoader objects for training and validation. The dataset used is ISIC_MetaData_Image_Dataset, which returns both image data and metadata together. The training loader shuffles the data to avoid learning order bias, while validation is kept in order for consistency. Batching, multiprocessing (num_workers), and pinned memory improve efficiency during training.

In [19]:
'''
gkf = GroupKFold(n_splits=CONFIG['n_fold'])
for fold, (train_index, valid_index) in enumerate(gkf.split(train_df, train_df.target, train_df.patient_id)):
    train_df.loc[valid_index, "fold"] = int(fold)
    
display(train_df.groupby('fold').size()), 
display(train_df.head())
'''

'\ngkf = GroupKFold(n_splits=CONFIG[\'n_fold\'])\nfor fold, (train_index, valid_index) in enumerate(gkf.split(train_df, train_df.target, train_df.patient_id)):\n    train_df.loc[valid_index, "fold"] = int(fold)\n    \ndisplay(train_df.groupby(\'fold\').size()), \ndisplay(train_df.head())\n'

This block uses GroupKFold cross-validation, where grouping by patient_id ensures that images from the same patient do not leak across training and validation sets. Each sample in the dataframe gets assigned a fold number, stored in a new "fold" column. The distribution across folds is displayed to confirm balance, and the first few rows of the updated dataframe are shown.