In [1]:
import os
import gc
#import cv2
import math
import copy
import time
import random
import glob

# Plotting
from matplotlib import pyplot as plt
from matplotlib import image as mpimg
import seaborn as sns
from PIL import Image

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp
import torchvision
from transformers import AutoImageProcessor, ResNetForImageClassification
from datasets import load_dataset
from torcheval.metrics.functional import binary_auroc
from torch.optim import lr_scheduler


from sklearn.model_selection import KFold, GroupKFold

import albumentations as A
from albumentations.pytorch import ToTensorV2

In [20]:
ROOT_DIR = "/Users/Yashwanth/isic"
TRAIN_DIR = f'{ROOT_DIR}/train-image/image'

CONFIG = {
    "seed": 42,
    "n_samples_train":10000,
    "n_samples_val":10000, 
    "epochs": 50,
    "img_size": 384,
    "model_name": "tf_efficientnet_b0_ns",
    "checkpoint_path" : "/kaggle/input/tf-efficientnet/pytorch/tf-efficientnet-b0/1/tf_efficientnet_b0_aa-827b6e33.pth",
    "train_batch_size": 400,
    "valid_batch_size": 400,
    "learning_rate": 1e-4,
    "scheduler": 'CosineAnnealingLR',
    "min_lr": 1e-6,
    "T_max": 500,
    "weight_decay": 1e-6,
    "fold" : 4,
    "n_fold": 5,
    "n_accumulate": 1,
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
}

BEST_WEIGHT = 'v2_AUROC0.9324_Loss0.0043_epoch22_lossauroc.pth'#'v2_AUROC0.8736_Loss0.0158_epoch12_lossauroc.pth'#'v2_AUROC0.6942_Loss0.2311_epoch26.pth'


In [16]:
CONFIG['device'] = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")#torch.device("cpu")

In [17]:
seed=CONFIG['seed']
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

## Data

In [21]:
train_df = pd.read_csv(ROOT_DIR+"/train-metadata.csv")
test_df = pd.read_csv(ROOT_DIR+"/test-metadata.csv")

all_df = pd.concat([train_df, test_df]).reset_index(drop=True)
display(train_df.head())
display(test_df.head())

  train_df = pd.read_csv(ROOT_DIR+"/train-metadata.csv")


Unnamed: 0,isic_id,target,patient_id,age_approx,sex,anatom_site_general,clin_size_long_diam_mm,image_type,tbp_tile_type,tbp_lv_A,...,lesion_id,iddx_full,iddx_1,iddx_2,iddx_3,iddx_4,iddx_5,mel_mitotic_index,mel_thick_mm,tbp_lv_dnn_lesion_confidence
0,ISIC_0015670,0,IP_1235828,60.0,male,lower extremity,3.04,TBP tile: close-up,3D: white,20.244422,...,,Benign,Benign,,,,,,,97.517282
1,ISIC_0015845,0,IP_8170065,60.0,male,head/neck,1.1,TBP tile: close-up,3D: white,31.71257,...,IL_6727506,Benign,Benign,,,,,,,3.141455
2,ISIC_0015864,0,IP_6724798,60.0,male,posterior torso,3.4,TBP tile: close-up,3D: XP,22.57583,...,,Benign,Benign,,,,,,,99.80404
3,ISIC_0015902,0,IP_4111386,65.0,male,anterior torso,3.22,TBP tile: close-up,3D: XP,14.242329,...,,Benign,Benign,,,,,,,99.989998
4,ISIC_0024200,0,IP_8313778,55.0,male,anterior torso,2.73,TBP tile: close-up,3D: white,24.72552,...,,Benign,Benign,,,,,,,70.44251


Unnamed: 0,isic_id,patient_id,age_approx,sex,anatom_site_general,clin_size_long_diam_mm,image_type,tbp_tile_type,tbp_lv_A,tbp_lv_Aext,...,tbp_lv_radial_color_std_max,tbp_lv_stdL,tbp_lv_stdLExt,tbp_lv_symm_2axis,tbp_lv_symm_2axis_angle,tbp_lv_x,tbp_lv_y,tbp_lv_z,attribution,copyright_license
0,ISIC_0015657,IP_6074337,45.0,male,posterior torso,2.7,TBP tile: close-up,3D: XP,22.80433,20.00727,...,0.304827,1.281532,2.299935,0.479339,20,-155.0651,1511.222,113.9801,Memorial Sloan Kettering Cancer Center,CC-BY
1,ISIC_0015729,IP_1664139,35.0,female,lower extremity,2.52,TBP tile: close-up,3D: XP,16.64867,9.657964,...,0.0,1.27194,2.011223,0.42623,25,-112.36924,629.535889,-15.019287,"Frazer Institute, The University of Queensland...",CC-BY
2,ISIC_0015740,IP_7142616,65.0,male,posterior torso,3.16,TBP tile: close-up,3D: XP,24.25384,19.93738,...,0.230742,1.080308,2.705857,0.366071,110,-84.29282,1303.978,-28.57605,FNQH Cairns,CC-BY


In [22]:
train_images = sorted(glob.glob(f"{TRAIN_DIR}/*.jpg"))

In [23]:
## Images

def get_train_file_path(image_id):
    return f"{TRAIN_DIR}/{image_id}.jpg"

def show_im(image_id):
    image = mpimg.imread(image_id)
    plt.imshow(image)
    plt.show()

In [24]:
for i in range(10):
    image = mpimg.imread(train_images[i])
    print(image.shape)
    

(139, 139, 3)
(127, 127, 3)
(145, 145, 3)
(109, 109, 3)
(125, 125, 3)
(119, 119, 3)
(117, 117, 3)
(157, 157, 3)
(111, 111, 3)
(127, 127, 3)


In [None]:
df['image_path'] = df['image_path'].str.replace('\\', '/', regex=False)
train_images = [p.replace('\\', '/') for p in train_images]

# --- your original code as-is ---
df = train_df.copy()
df['image_path'] = df['isic_id'].apply(get_train_file_path)
#df['image'] = df['isic_id'].apply(show_im)
df = df[ df["image_path"].isin(train_images) ].reset_index(drop=True)

print("# of images , # of positive cases, # of negative cases, # of patients")
print(df.shape, df.target.sum(), (df["target"] == 0).sum(), df["patient_id"].unique().shape)

df_positive = df[df["target"] == 1].reset_index(drop=True)
df_negative = df[df["target"] == 0].reset_index(drop=True)


# of images , # of positive cases, # of negative cases, # of patients
(401059, 56) 393 400666 (1042,)


## Start of Deep Learning: Pytorch

In [29]:
# Define the data augmentation and preprocessing steps for training, validation, and training-test phases.
# These transformations help the model generalize better by simulating real-world variations in images.

data_transforms = {
    
    # ---------------------------------------------
    # TRAINING TRANSFORMATIONS
    # ---------------------------------------------
    "train": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.RandomRotate90(p=0.5),   # Random 90° rotation
        A.HorizontalFlip(p=0.5),   # Left ↔ right flip
        A.VerticalFlip(p=0.5),     # Top ↔ bottom flip
        A.Downscale(p=0.25),
        A.ShiftScaleRotate(
            shift_limit=0.1,
            scale_limit=0.15,
            rotate_limit=60,
            p=0.5
        ),
        A.HueSaturationValue(
            hue_shift_limit=0.2,
            sat_shift_limit=0.2,
            val_shift_limit=0.2,
            p=0.5
        ),
        A.RandomBrightnessContrast(
            brightness_limit=(-0.1, 0.1),
            contrast_limit=(-0.1, 0.1),
            p=0.5
        ),
        A.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0
        ),
        ToTensorV2()
    ], p=1.0),
    
    
    # ---------------------------------------------
    # VALIDATION TRANSFORMATIONS
    # ---------------------------------------------
    "validation": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0
        ),
        ToTensorV2()
    ], p=1.0),
    
    
    # ---------------------------------------------
    # TRAIN-TEST (MIXED) TRANSFORMATIONS
    # ---------------------------------------------
    "train_testing": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        # A.RandomRotate90(p=0.5),  # Disabled optional rotation
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.Downscale(p=0.25),
        A.ShiftScaleRotate(
            shift_limit=0.1,
            scale_limit=0.15,
            rotate_limit=60,
            p=0.5
        ),
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.HueSaturationValue(
            hue_shift_limit=0.2,
            sat_shift_limit=0.2,
            val_shift_limit=0.2,
            p=0.5
        ),
        A.RandomBrightnessContrast(
            brightness_limit=(-0.1, 0.1),
            contrast_limit=(-0.1, 0.1),
            p=0.5
        ),
        ToTensorV2()
    ], p=1.0)
}


  original_init(self, **validated_kwargs)


In [30]:
class ISICDataset(Dataset):
    def __init__(self, df, phase="train", transforms=None):
        # Store how many samples to use and where to start from, based on phase.
        # Idea: 'train' and 'train_testing' use the training count after skipping a validation-sized offset.
        if phase == 'train':
            offset = CONFIG["n_samples_val"]      # skip this many rows at the start for negatives
            n_samples = CONFIG["n_samples_train"] # total items the dataset will report
        elif phase == 'train_testing':
            offset = CONFIG["n_samples_val"]
            n_samples = CONFIG["n_samples_train"]
        elif phase == 'validation':
            offset = 0
            n_samples = CONFIG["n_samples_val"]

        # Split the dataframe by class label for easy handling
        self.df_positive = df[df["target"] == 1].reset_index(drop=True)
        self.df_negative = df[df["target"] == 0].reset_index(drop=True)
        
        # Shuffle negatives once (deterministic with random_state) and then
        # take a window: start at 'offset' and keep half of n_samples.
        self.df_negative = self.df_negative.sample(frac=1, random_state=42).reset_index(drop=True)
        self.df_negative = self.df_negative[offset:offset + n_samples // 2]

        # Save the transform pipeline and the “length” the dataset should present
        self.transforms = transforms
        self.n_samples = n_samples

        # ----- Prebuild transformed POSITIVE samples -----
        # For each positive image, we create multiple augmented versions up front.
        # This acts like oversampling + augmentation for the minority/positive class.
        self.positive_samples = []
        for idx in range(len(self.df_positive)):
            img_path = self.df_positive['image_path'].iloc[idx]
            img = Image.open(img_path).convert("RGB")   # ensure 3-channel RGB
            img_np = np.array(img).copy()               # work with a NumPy array for Albumentations
            # Create several variants from the same image
            for _ in range(n_samples // (2 * len(self.df_positive))):
                transformed_img = self.transforms(image=img_np.copy())["image"]
                self.positive_samples.append((transformed_img, 1))

        # ----- Prebuild transformed NEGATIVE samples -----
        # For each selected negative image, make exactly one transformed version.
        self.negative_samples = []
        for idx in range(len(self.df_negative)):
            img_path = self.df_negative['image_path'].iloc[idx]
            img = Image.open(img_path).convert("RGB")
            img_np = np.array(img).copy()
            transformed_img = self.transforms(image=img_np)["image"]
            self.negative_samples.append((transformed_img, 0))

    def __len__(self):
        # Dataset reports a fixed length, matching the configured sample budget.
        return self.n_samples

    def __getitem__(self, index):
        # Randomly pick from positive or negative pools for rough 50/50 balance.
        if random.random() < 0.5:
            # Map any index to the available positive pool size
            idx = index % len(self.positive_samples)
            img, target = self.positive_samples[idx]
        else:
            # Map any index to the available negative pool size
            idx = index % len(self.negative_samples)
            img, target = self.negative_samples[idx]

        # Return the tensor image and its label in a dict (common pattern for PyTorch Datasets)
        return {
            'image': img,
            'target': target
        }


This dataset balances classes by oversampling positives with multiple augmentations and using a shuffled subset of negatives. Transforms are applied during dataset creation, so the same augmented images are used each epoch.

In [31]:
df

Unnamed: 0,isic_id,target,patient_id,age_approx,sex,anatom_site_general,clin_size_long_diam_mm,image_type,tbp_tile_type,tbp_lv_A,...,iddx_full,iddx_1,iddx_2,iddx_3,iddx_4,iddx_5,mel_mitotic_index,mel_thick_mm,tbp_lv_dnn_lesion_confidence,image_path
0,ISIC_0015670,0,IP_1235828,60.0,male,lower extremity,3.04,TBP tile: close-up,3D: white,20.244422,...,Benign,Benign,,,,,,,97.517282,/Users/Yashwanth/isic/train-image/image/ISIC_0...
1,ISIC_0015845,0,IP_8170065,60.0,male,head/neck,1.10,TBP tile: close-up,3D: white,31.712570,...,Benign,Benign,,,,,,,3.141455,/Users/Yashwanth/isic/train-image/image/ISIC_0...
2,ISIC_0015864,0,IP_6724798,60.0,male,posterior torso,3.40,TBP tile: close-up,3D: XP,22.575830,...,Benign,Benign,,,,,,,99.804040,/Users/Yashwanth/isic/train-image/image/ISIC_0...
3,ISIC_0015902,0,IP_4111386,65.0,male,anterior torso,3.22,TBP tile: close-up,3D: XP,14.242329,...,Benign,Benign,,,,,,,99.989998,/Users/Yashwanth/isic/train-image/image/ISIC_0...
4,ISIC_0024200,0,IP_8313778,55.0,male,anterior torso,2.73,TBP tile: close-up,3D: white,24.725520,...,Benign,Benign,,,,,,,70.442510,/Users/Yashwanth/isic/train-image/image/ISIC_0...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
401054,ISIC_9999937,0,IP_1140263,70.0,male,anterior torso,6.80,TBP tile: close-up,3D: XP,22.574335,...,Benign,Benign,,,,,,,99.999988,/Users/Yashwanth/isic/train-image/image/ISIC_9...
401055,ISIC_9999951,0,IP_5678181,60.0,male,posterior torso,3.11,TBP tile: close-up,3D: white,19.977640,...,Benign,Benign,,,,,,,99.999820,/Users/Yashwanth/isic/train-image/image/ISIC_9...
401056,ISIC_9999960,0,IP_0076153,65.0,female,anterior torso,2.05,TBP tile: close-up,3D: XP,17.332567,...,Benign,Benign,,,,,,,99.999416,/Users/Yashwanth/isic/train-image/image/ISIC_9...
401057,ISIC_9999964,0,IP_5231513,30.0,female,anterior torso,2.80,TBP tile: close-up,3D: XP,22.288570,...,Benign,Benign,,,,,,,100.000000,/Users/Yashwanth/isic/train-image/image/ISIC_9...


In [32]:
#train_dataset = ISICDataset(df, phase = "train", transforms=data_transforms["train"])
#valid_dataset = ISICDataset(df, phase = "validation", transforms=data_transforms["validation"])
train_dataset = ISICDataset(df, phase = "train", transforms=data_transforms["train_testing"])
valid_dataset = ISICDataset(df, phase = "validation", transforms=data_transforms["train_testing"])


train_loader = DataLoader(train_dataset, batch_size=CONFIG['train_batch_size'], 
                          num_workers=3, shuffle=True, pin_memory=True, drop_last=True)
valid_loader = DataLoader(valid_dataset, batch_size=CONFIG['valid_batch_size'], 
                          num_workers=3, shuffle=False, pin_memory=True)

This block prepares training and validation datasets with the chosen augmentation set, then wraps them in DataLoaders for batching and efficient loading during training and evaluation.

In [33]:

gkf = GroupKFold(n_splits=CONFIG['n_fold'])
for fold, (train_index, valid_index) in enumerate(gkf.split(train_df, train_df.target, train_df.patient_id)):
    train_df.loc[valid_index, "fold"] = int(fold)
    
display(train_df.groupby('fold').size()), 
display(train_df.head())


fold
0.0    80214
1.0    80212
2.0    80211
3.0    80211
4.0    80211
dtype: int64

Unnamed: 0,isic_id,target,patient_id,age_approx,sex,anatom_site_general,clin_size_long_diam_mm,image_type,tbp_tile_type,tbp_lv_A,...,iddx_full,iddx_1,iddx_2,iddx_3,iddx_4,iddx_5,mel_mitotic_index,mel_thick_mm,tbp_lv_dnn_lesion_confidence,fold
0,ISIC_0015670,0,IP_1235828,60.0,male,lower extremity,3.04,TBP tile: close-up,3D: white,20.244422,...,Benign,Benign,,,,,,,97.517282,3.0
1,ISIC_0015845,0,IP_8170065,60.0,male,head/neck,1.1,TBP tile: close-up,3D: white,31.71257,...,Benign,Benign,,,,,,,3.141455,1.0
2,ISIC_0015864,0,IP_6724798,60.0,male,posterior torso,3.4,TBP tile: close-up,3D: XP,22.57583,...,Benign,Benign,,,,,,,99.80404,4.0
3,ISIC_0015902,0,IP_4111386,65.0,male,anterior torso,3.22,TBP tile: close-up,3D: XP,14.242329,...,Benign,Benign,,,,,,,99.989998,1.0
4,ISIC_0024200,0,IP_8313778,55.0,male,anterior torso,2.73,TBP tile: close-up,3D: white,24.72552,...,Benign,Benign,,,,,,,70.44251,0.0


 Splits the dataset into multiple folds for cross-validation, ensuring no patient overlap between train and validation within the same fold. It also labels each sample with its fold number for later use.

In [35]:
## Model
#dataset = load_dataset("huggingface/cats-image")
#image = dataset["test"]["image"][0]

processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
model = ResNetForImageClassification.from_pretrained("microsoft/resnet-50")

#inputs = processor(image, return_tensors="pt")

#with torch.no_grad():
#    logits = model(**inputs).logits

## model predicts one of the 1000 ImageNet classes
#predicted_label = logits.argmax(-1).item()
#print(model.config.id2label[predicted_label])
new_classifier = nn.Sequential(
    nn.Flatten(start_dim=1, end_dim=-1),
    nn.Linear(in_features=2048, out_features=2, bias=True),
    nn.Sigmoid()
)

# Replace the old classifier with the new one
model.classifier = new_classifier

#for param in model.parameters():
#    param.requires_grad = False

    
for param in model.classifier.parameters():
    param.requires_grad = True

# Verify which parameters require gradients
#for name, param in model.named_parameters():
#    print(name, param.requires_grad)

BEST_WEIGHT = r"C:\path\to\your\weights\v2_AUROC0.9324_Loss0.0043_epoch22_lossauroc.pth"
model.to(CONFIG['device']);

This part loads pre-trained weights into the modified ResNet model. If the file path in BEST_WEIGHT is wrong or the file is missing, PyTorch will raise FileNotFoundError. Ensure BEST_WEIGHT points to the correct .pth file before calling torch.load().

In [38]:
# from sklearn.metrics import roc_curve, auc, roc_auc_score

def criterion(submission, solution, min_tpr: float = 0.80) -> float:
    # Use only the probability/score for the positive class (index 1)
    submission = submission[:, 1]
    solution = solution[:, 1]
    
    # Print shapes for quick debugging
    print(submission.shape, solution.shape)
    
    '''
    # --- Partial AUC version (commented out) ---
    # Convert ground truth to match negative-class metric style
    v_gt = abs(solution - 1)
    # Predict negative-class probability
    v_pred = np.array([1.0 - x for x in submission])
    # Calculate partial AUC up to a given false positive rate
    max_fpr = abs(1 - min_tpr)
    partial_auc_scaled = roc_auc_score(v_gt, v_pred, max_fpr=max_fpr)
    # Rescale the partial AUC score to a different numeric range
    partial_auc = 0.5 * max_fpr**2 + (max_fpr - 0.5 * max_fpr**2) / (1.0 - 0.5) * (partial_auc_scaled - 0.5)
    return partial_auc
    '''
    
    # --- Focal loss version (active) ---
    gamma = 2
    alpha = 0.8

    # Binary Cross-Entropy loss
    BCE = F.binary_cross_entropy(submission, solution, reduction='mean')
    # Exponential term used in focal loss calculation
    BCE_EXP = torch.exp(-BCE)
    # Apply focal loss formula to give more weight to hard examples
    focal_loss = alpha * (1 - BCE_EXP) ** gamma * BCE

    return focal_loss


This function defines a custom loss. The active part uses focal loss to focus training on harder examples by down-weighting easy ones. A commented-out alternative computes a partial AUC metric for evaluation purposes.

In [39]:
#device = torch.device("cpu")

In [40]:
def train_one_epoch(model, optimizer, scheduler, dataloader, device, epoch):
    device = CONFIG["device"]
    model.train()  # Set model to training mode

    dataset_size = 0
    running_loss = 0.0
    running_auroc = 0.0

    for step, batch in enumerate(train_loader):
        print(step)  # Debug: show current step

        # Move data to device
        images = batch['image'].to(device, dtype=torch.float)
        targets = batch['target'].to(device, dtype=torch.int64)  # Class indices

        batch_size = images.size(0)
        model = model.to(device)

        # Preprocess images using the model's processor
        inputs = processor(images, return_tensors="pt").to(device)

        # Forward pass (no torch.no_grad in training)
        logits = model(**inputs).logits

        # Compute loss (convert targets to one-hot for criterion)
        loss = criterion(
            logits,
            torch.nn.functional.one_hot(targets, num_classes=2) * 1.0
        )
        # Gradient accumulation
        loss = loss / CONFIG['n_accumulate']

        print(loss)  # Debug: show loss

        # Backward pass
        loss.backward()

        # Optimizer step every n_accumulate steps
        if (step + 1) % CONFIG['n_accumulate'] == 0:
            optimizer.step()
            optimizer.zero_grad()
            if scheduler is not None:
                scheduler.step()

        # Calculate AUROC using predicted class indices
        predicted_label = torch.argmax(logits, dim=-1)
        auroc = binary_auroc(input=predicted_label, target=targets).item()

        # Track total loss and AUROC
        running_loss += (loss.item() * batch_size)
        running_auroc += (auroc * batch_size)
        dataset_size += batch_size

        # Compute average loss and AUROC so far
        train_epoch_loss = running_loss / dataset_size
        train_epoch_auroc = running_auroc / dataset_size

    gc.collect()  # Free up unused memory

    return train_epoch_loss, train_epoch_auroc


This function trains the model for one epoch. It processes batches, computes loss, updates weights (with optional gradient accumulation), and tracks average loss and AUROC score for the epoch.

In [41]:
@torch.inference_mode()
def valid_one_epoch(model, dataloader, device, epoch):
    model.eval()  # Set model to evaluation mode
    
    dataset_size = 0
    running_loss = 0.0
    running_auroc = 0.0
    
    for batch in valid_loader:
        # Move data to the correct device
        images = batch['image'].to(device, dtype=torch.float)
        targets = batch['target'].to(device, dtype=torch.int64)  # Class indices
        
        batch_size = images.size(0)
        model = model.to(device)

        # Preprocess images using the model's processor
        inputs = processor(images, return_tensors="pt").to(device)

        # Forward pass to get logits
        logits = model(**inputs).logits

        # Compute loss (convert targets to one-hot for criterion)
        loss = criterion(
            logits,
            torch.nn.functional.one_hot(targets, num_classes=2) * 1.0
        )
        loss = loss / CONFIG['n_accumulate']

        print(loss)  # Debug: print batch loss

        # Get predicted class labels
        predicted_label = torch.argmax(logits, dim=-1)
        auroc = binary_auroc(input=predicted_label, target=targets).item()

        # Accumulate total loss and AUROC
        running_loss += (loss.item() * batch_size)
        running_auroc += (auroc * batch_size)
        dataset_size += batch_size

        # Compute average loss and AUROC so far
        epoch_loss = running_loss / dataset_size
        epoch_auroc = running_auroc / dataset_size
      
    gc.collect()  # Clean up unused memory
    
    return epoch_loss, epoch_auroc


This function evaluates the model for one epoch without gradient updates. It calculates average loss and AUROC across the validation set to measure performance.

In [43]:
# Check if CUDA (GPU support) is available and print the GPU name
if torch.cuda.is_available():
    print("[INFO] Using GPU: {}\n".format(torch.cuda.get_device_name()))


In [44]:
def fetch_scheduler(optimizer):
    # Select and return a learning rate scheduler based on CONFIG
    if CONFIG['scheduler'] == 'CosineAnnealingLR':
        scheduler = lr_scheduler.CosineAnnealingLR(
            optimizer,
            T_max=CONFIG['T_max'],   # Number of iterations for one cycle
            eta_min=CONFIG['min_lr'] # Minimum learning rate
        )
    elif CONFIG['scheduler'] == 'CosineAnnealingWarmRestarts':
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer,
            T_0=CONFIG['T_0'],       # Iterations before first restart
            eta_min=CONFIG['min_lr'] # Minimum learning rate
        )
    elif CONFIG['scheduler'] is None:
        return None  # No scheduler
    
    return scheduler


Chooses and returns a learning rate scheduler based on configuration. Supports Cosine Annealing and Cosine Annealing with Warm Restarts, or returns None if no scheduler is selected.

In [46]:
# Set PyTorch to use the GPU (CUDA) device
device = torch.device("cuda")

In [48]:
# Create Adam optimizer with learning rate and weight decay from CONFIG
optimizer = optim.Adam(
    model.parameters(),
    lr=CONFIG['learning_rate'],
    weight_decay=CONFIG['weight_decay']
)

# Get the learning rate scheduler based on CONFIG settings
scheduler = fetch_scheduler(optimizer)


Initializes the Adam optimizer for model training and retrieves the appropriate learning rate scheduler according to the configuration.

In [49]:
# Record the start time of training
start = time.time()

# Save an initial copy of the model's weights (best model tracking)
best_model_wts = copy.deepcopy(model.state_dict())

# Initialize best AUROC score as negative infinity
best_epoch_auroc = -np.inf

# Dictionary to log training history for later analysis
history = {
    "Train Loss": [],
    "Valid Loss": [],
    "Train AUROC": [],
    "Valid AUROC": [],
    "lr": []  # Learning rate history
}


Starts a timer, stores an initial copy of model weights, sets a baseline best AUROC, and prepares a dictionary to track loss, AUROC, and learning rate during training.

In [None]:
    

    #start = time.time()
    #best_model_wts = copy.deepcopy(model.state_dict())
    #best_epoch_auroc = -np.inf
    #history = {"Train Loss": [], "Valid Loss": [], 'Train AUROC': [], 'Valid AUROC' : [], 'lr' : []}
    
    for epoch in range(13,13+CONFIG['epochs']): 
        gc.collect()
        train_epoch_loss, train_epoch_auroc = train_one_epoch(model, optimizer, scheduler, 
                                           dataloader=train_loader, 
                                           device=CONFIG['device'], epoch=epoch)
        
        val_epoch_loss, val_epoch_auroc = valid_one_epoch(model, valid_loader, device=CONFIG['device'], 
                                         epoch=epoch)
    
        history['Train Loss'].append(train_epoch_loss)
        history['Valid Loss'].append(val_epoch_loss)
        history['Train AUROC'].append(train_epoch_auroc)
        history['Valid AUROC'].append(val_epoch_auroc)
        history['lr'].append( scheduler.get_lr()[0] )
        
        print(history)
        # deep copy the model
        if 2>1:# best_epoch_auroc <= val_epoch_auroc:
            print(f"Validation AUROC Improved ({best_epoch_auroc} ---> {val_epoch_auroc})")
            best_epoch_auroc = val_epoch_auroc
            best_model_wts = copy.deepcopy(model.state_dict())
            PATH = "/home/mccruz/isic/ISIC2024_Skin_Cancer_Detection/v2_AUROC{:.4f}_Loss{:.4f}_epoch{:.0f}_lossauroc.pth".format(val_epoch_auroc, val_epoch_loss, epoch)
            torch.save(model.state_dict(), PATH)
            # Save a model file from the current directory
            print(f"Model Saved")
            
        print()
    
    end = time.time()
    time_elapsed = end - start
    print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format(
        time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60))
    print("Best AUROC: {:.4f}".format(best_epoch_auroc))
    
    # load best model weights
    model.load_state_dict(best_model_wts)
    

