# SKIN CANCER CLASSIFICATION - WITH BASELINE MODEL

# 1) Imports and Setup

In [1]:
import warnings
warnings.filterwarnings("ignore")
import os, random, time
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2

from sklearn.model_selection import train_test_split
from sklearn.metrics import (accuracy_score, precision_score, recall_score, 
                             f1_score, roc_auc_score, confusion_matrix)

In [2]:
# Device Configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


# 2)  Configuration Class

In [3]:
# Configuration - BASELINE
class BaselineCFG:
    SEED = 42
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    ISIC_CSV = "/kaggle/input/labels/ISIC_2024_Training_GroundTruth.csv"
    IMG_DIR = "/kaggle/input/isic-2024-dataset/ISIC_2024_Training_Input"
    
    BENIGN_DOWNSAMPLE = 10000  
    RANDOM_STATE = SEED
    
    IMG_SIZE = 320  
    BATCH_SIZE = 16  
    EPOCHS = 20
    
    MODEL_NAME = "tf_efficientnet_b3_ns"  
    LR = 3e-4  
    NUM_WORKERS = 0
    
    OUT_DIR = "/kaggle/working/baseline_outputs"
    os.makedirs(OUT_DIR, exist_ok=True)
    MODEL_SAVE = os.path.join(OUT_DIR, "baseline_model.pth")

In [4]:
# Set seeds
random.seed(BaselineCFG.SEED)
np.random.seed(BaselineCFG.SEED)
torch.manual_seed(BaselineCFG.SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(BaselineCFG.SEED)

# 3) Read CSV + Downsample benign

In [5]:
# Load and downsample
df_all = pd.read_csv(BaselineCFG.ISIC_CSV)[['isic_id','malignant']]
print(f"Original class counts: {df_all['malignant'].value_counts().to_dict()}")

benign_df = df_all[df_all['malignant']==0]
mal_df = df_all[df_all['malignant']==1]

benign_down = benign_df.sample(n=min(len(benign_df), BaselineCFG.BENIGN_DOWNSAMPLE), 
                                random_state=BaselineCFG.RANDOM_STATE)
df = pd.concat([benign_down, mal_df]).sample(frac=1, random_state=BaselineCFG.RANDOM_STATE).reset_index(drop=True)
print(f"After downsample: {df['malignant'].value_counts().to_dict()}")

Original class counts: {0.0: 400666, 1.0: 393}
After downsample: {0.0: 10000, 1.0: 393}


# 4) Stratified Train/Val/Test Split

In [6]:
# Stratified split
train_val_df, test_df = train_test_split(df, test_size=0.12, stratify=df['malignant'], 
                                          random_state=BaselineCFG.RANDOM_STATE)
train_df, val_df = train_test_split(train_val_df, test_size=0.12, stratify=train_val_df['malignant'], 
                                     random_state=BaselineCFG.RANDOM_STATE)

print(f"Train: {len(train_df)} (Mal: {int(train_df['malignant'].sum())})")
print(f"Val: {len(val_df)} (Mal: {int(val_df['malignant'].sum())})")
print(f"Test: {len(test_df)} (Mal: {int(test_df['malignant'].sum())})")

Train: 8047 (Mal: 304)
Val: 1098 (Mal: 42)
Test: 1248 (Mal: 47)


# 5) Albumentations Transformations 

In [7]:
# TRANSFORMS - MINIMAL (Only Resize + Normalize)
train_transform = A.Compose([
    A.Resize(BaselineCFG.IMG_SIZE, BaselineCFG.IMG_SIZE),
    A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
    ToTensorV2()
])

val_transform = A.Compose([
    A.Resize(BaselineCFG.IMG_SIZE, BaselineCFG.IMG_SIZE),
    A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
    ToTensorV2()
])

# 6) Dataset Class

In [8]:
class SkinDataset(Dataset):
    def __init__(self, df, transform):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_id = str(row['isic_id'])
        path = os.path.join(BaselineCFG.IMG_DIR, img_id + ".jpg")
        
        try:
            img = cv2.imread(path)
            if img is None:
                raise ValueError("Image is None")
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        except:
            img = np.zeros((BaselineCFG.IMG_SIZE, BaselineCFG.IMG_SIZE, 3), dtype=np.uint8)
        
        img_t = self.transform(image=img)['image']
        label = torch.tensor(row['malignant'], dtype=torch.float32)
        
        return img_t, label

# 7) Create Datasets

In [9]:
# Create datasets
train_dataset = SkinDataset(train_df, train_transform)
val_dataset = SkinDataset(val_df, val_transform)
test_dataset = SkinDataset(test_df, val_transform)

# 8) DataLoaders

In [10]:
#SIMPLE (No Sampler, Regular Shuffle)
train_loader = DataLoader(
    train_dataset,
    batch_size=BaselineCFG.BATCH_SIZE,
    shuffle=True,  # Regular shuffle, no weighted sampler
    num_workers=BaselineCFG.NUM_WORKERS,
    pin_memory=True if torch.cuda.is_available() else False
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BaselineCFG.BATCH_SIZE * 2,
    shuffle=False,
    num_workers=BaselineCFG.NUM_WORKERS,
    pin_memory=True if torch.cuda.is_available() else False
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BaselineCFG.BATCH_SIZE * 2,
    shuffle=False,
    num_workers=BaselineCFG.NUM_WORKERS,
    pin_memory=True if torch.cuda.is_available() else False
)

print(f"DataLoaders: train={len(train_loader)}, val={len(val_loader)}, test={len(test_loader)}")

DataLoaders: train=503, val=35, test=39


 # 9) Model Definition

In [11]:
# MODEL - MINIMAL (Just Pretrained Backbone + Single Output)
class BaselineModel(nn.Module):
    def __init__(self, model_name):
        super().__init__()
        # Use timm's built-in classifier (simplest possible)
        self.model = timm.create_model(model_name, pretrained=True, num_classes=1)

    def forward(self, x):
        return self.model(x).squeeze(1)

model = BaselineModel(BaselineCFG.MODEL_NAME).to(device)
print(f"Model params: {sum(p.numel() for p in model.parameters()):,}")

model.safetensors:   0%|          | 0.00/49.3M [00:00<?, ?B/s]

Model params: 10,697,769


# 10) Loss & Optimizer

In [12]:
#SIMPLE (No pos_weight, No Scheduler, No Mixed Precision)
criterion = nn.BCEWithLogitsLoss()  # No pos_weight
optimizer = optim.Adam(model.parameters(), lr=BaselineCFG.LR)  # No weight_decay

# 11) Training Functions

In [13]:
# TRAINING FUNCTION
def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    
    pbar = tqdm(loader, desc="Training", leave=False)
    for imgs, labels in pbar:
        imgs = imgs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        # Regular forward pass (no mixed precision)
        logits = model(imgs)
        loss = criterion(logits, labels)
        
        # Regular backward pass
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * imgs.size(0)
        pbar.set_postfix({'loss': f'{loss.item():.4f}'})
    
    return running_loss / len(loader.dataset)

# 12) Validation Function

In [14]:
@torch.no_grad()
def validate(model, loader):
    model.eval()
    all_preds = []
    all_labels = []
    
    for imgs, labels in tqdm(loader, desc="Validating", leave=False):
        imgs = imgs.to(device)
        logits = model(imgs)
        probs = torch.sigmoid(logits)
        
        all_preds.extend(probs.cpu().numpy())
        all_labels.extend(labels.numpy())
    
    return np.array(all_labels), np.array(all_preds)

# 13) Metric Computation

In [15]:
def compute_metrics(y_true, y_probs, threshold=0.5):
    y_pred = (y_probs >= threshold).astype(int)
    
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, zero_division=0)
    rec = recall_score(y_true, y_pred, zero_division=0)
    f1 = f1_score(y_true, y_pred, zero_division=0)
    
    try:
        auc = roc_auc_score(y_true, y_probs)
    except:
        auc = 0.0
    
    cm = confusion_matrix(y_true, y_pred)
    
    return {
        'acc': acc, 'prec': prec, 'rec': rec, 'f1': f1, 'auc': auc,
        'threshold': threshold, 'cm': cm
    }

# 14) Training Loop

In [16]:
#SIMPLE (No Scheduler, Basic Early Stopping)
print("Starting BASELINE Training")

best_auc = 0.0
best_epoch = -1
patience = 5
patience_counter = 0

start_time = time.time()

for epoch in range(BaselineCFG.EPOCHS):
    epoch_start = time.time()
    
    # Train
    train_loss = train_one_epoch(model, train_loader, optimizer, criterion)
    
    # Validate
    y_true, y_probs = validate(model, val_loader)
    metrics = compute_metrics(y_true, y_probs, threshold=0.5)
    
    # Print results
    epoch_time = time.time() - epoch_start
    print(f"Epoch {epoch+1:02d}/{BaselineCFG.EPOCHS} | "
          f"Loss: {train_loss:.4f} | "
          f"AUC: {metrics['auc']:.4f} | "
          f"F1: {metrics['f1']:.4f} | "
          f"Prec: {metrics['prec']:.4f} | "
          f"Rec: {metrics['rec']:.4f} | "
          f"Time: {epoch_time:.1f}s")
    
    # Save best model
    if metrics['auc'] > best_auc:
        best_auc = metrics['auc']
        best_epoch = epoch
        patience_counter = 0
        
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'best_auc': best_auc,
            'metrics': metrics
        }, BaselineCFG.MODEL_SAVE)
        
        print(f"----> Saved best model (AUC: {best_auc:.4f})")
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"\nEarly stopping at epoch {epoch+1}")
            break

total_time = (time.time() - start_time) / 60
print(f"Training Complete!!!")
print(f"Best Epoch: {best_epoch+1} | Best AUC: {best_auc:.4f}")
print(f"Total Time: {total_time:.1f} minutes")

Starting BASELINE Training


                                                                        

Epoch 01/20 | Loss: 0.2446 | AUC: 0.8485 | F1: 0.3358 | Prec: 0.2421 | Rec: 0.5476 | Time: 306.4s
----> Saved best model (AUC: 0.8485)


                                                                        

Epoch 02/20 | Loss: 0.1154 | AUC: 0.8674 | F1: 0.2985 | Prec: 0.4000 | Rec: 0.2381 | Time: 207.6s
----> Saved best model (AUC: 0.8674)


                                                                        

Epoch 03/20 | Loss: 0.0860 | AUC: 0.8823 | F1: 0.3697 | Prec: 0.2857 | Rec: 0.5238 | Time: 207.1s
----> Saved best model (AUC: 0.8823)


                                                                        

Epoch 04/20 | Loss: 0.0650 | AUC: 0.9076 | F1: 0.3951 | Prec: 0.4103 | Rec: 0.3810 | Time: 207.6s
----> Saved best model (AUC: 0.9076)


                                                                        

Epoch 05/20 | Loss: 0.0377 | AUC: 0.8574 | F1: 0.2716 | Prec: 0.2821 | Rec: 0.2619 | Time: 207.2s


                                                                        

Epoch 06/20 | Loss: 0.0351 | AUC: 0.8819 | F1: 0.3175 | Prec: 0.4762 | Rec: 0.2381 | Time: 208.3s


                                                                        

Epoch 07/20 | Loss: 0.0373 | AUC: 0.8941 | F1: 0.2740 | Prec: 0.3226 | Rec: 0.2381 | Time: 206.2s


                                                                        

Epoch 08/20 | Loss: 0.0336 | AUC: 0.8664 | F1: 0.2222 | Prec: 0.5000 | Rec: 0.1429 | Time: 207.2s


                                                                        

Epoch 09/20 | Loss: 0.0259 | AUC: 0.8725 | F1: 0.3117 | Prec: 0.3429 | Rec: 0.2857 | Time: 207.7s

Early stopping at epoch 9
Training Complete!!!
Best Epoch: 4 | Best AUC: 0.9076
Total Time: 32.8 minutes




# 15) Final Test Evaluation

In [17]:
checkpoint = torch.load(BaselineCFG.MODEL_SAVE, map_location=device, weights_only=False)
model.load_state_dict(checkpoint['model_state_dict'])

y_test, probs_test = validate(model, test_loader)

print("BASELINE TEST RESULTS (Threshold = 0.5)")

test_metrics = compute_metrics(y_test, probs_test, threshold=0.5)

print(f"\nMetrics:")
print(f"  Accuracy:  {test_metrics['acc']:.4f}")
print(f"  Precision: {test_metrics['prec']:.4f}")
print(f"  Recall:    {test_metrics['rec']:.4f}")
print(f"  F1-Score:  {test_metrics['f1']:.4f}")
print(f"  ROC-AUC:   {test_metrics['auc']:.4f}")

cm = test_metrics['cm']
print(f"\nConfusion Matrix:")
print(f"                Predicted")
print(f"              Neg     Pos")
print(f"  Actual Neg  {cm[0,0]:>4}    {cm[0,1]:>4}")
print(f"         Pos  {cm[1,0]:>4}    {cm[1,1]:>4}")

sensitivity = cm[1,1] / (cm[1,1] + cm[1,0]) if (cm[1,1] + cm[1,0]) > 0 else 0
specificity = cm[0,0] / (cm[0,0] + cm[0,1]) if (cm[0,0] + cm[0,1]) > 0 else 0
print(f"\n  Sensitivity (Recall): {sensitivity:.4f}")
print(f"  Specificity: {specificity:.4f}")

# Save results
results = pd.DataFrame({
    'metric': ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'ROC-AUC', 'Sensitivity', 'Specificity'],
    'value': [test_metrics['acc'], test_metrics['prec'], 
              test_metrics['rec'], test_metrics['f1'], 
              test_metrics['auc'], sensitivity, specificity]
})
results.to_csv(os.path.join(BaselineCFG.OUT_DIR, 'baseline_results.csv'), index=False)

print(f"Results saved to {BaselineCFG.OUT_DIR}")
print("BASELINE MODEL EVALUATION COMPLETE!!1")

                                                           

BASELINE TEST RESULTS (Threshold = 0.5)

Metrics:
  Accuracy:  0.9647
  Precision: 0.5349
  Recall:    0.4894
  F1-Score:  0.5111
  ROC-AUC:   0.9248

Confusion Matrix:
                Predicted
              Neg     Pos
  Actual Neg  1181      20
         Pos    24      23

  Sensitivity (Recall): 0.4894
  Specificity: 0.9833
Results saved to /kaggle/working/baseline_outputs
BASELINE MODEL EVALUATION COMPLETE!!1


