# ðŸ”¬ Skin Cancer Classification â€“ EfficientNetB3

**Training notebook for the deployed Streamlit application.**

---

## ðŸ“Š Quick Results

- **Test Accuracy:** 77.13%
- **Melanoma Recall:** 63.68%
- **Macro AUC:** 0.94

---

## ðŸ”— Links

- **ðŸ“– Full Documentation:** [README.md](../README.md)
- **ðŸš€ Live Demo:** [Streamlit App](https://skin-cancer-classifier-mkzevixv7y2x2wmrnlvaw3.streamlit.app/)
- **ðŸ““ Notebook with Outputs:** [Kaggle](https://www.kaggle.com/code/foroughgh95/skin-cancer-efficientnetb3-ham10000)

---

**Note:** This notebook has outputs removed for cleaner version control. For full training logs and visualizations, see the [Kaggle version](https://www.kaggle.com/code/foroughgh95/skin-cancer-efficientnetb3-ham10000) or refer to [README.md](../README.md) for complete project details.

---

**Model:** EfficientNetB3 | **Loss:** Focal Loss | **Framework:** TensorFlow 2.19

In [None]:
# Import Libraries

# Core utilities
import os
import random
import math
import numpy as np
import pandas as pd
from tqdm import tqdm

# Visualization
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
from sklearn.metrics import roc_curve, auc, classification_report, confusion_matrix

# Deep Learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, BatchNormalization, Activation
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, LearningRateScheduler

# Preprocessing
from sklearn.preprocessing import label_binarize
from sklearn.model_selection import train_test_split

In [None]:
# Dataset Setup Instructions

# This notebook uses the HAM10000 dataset from Kaggle:
# https://www.kaggle.com/datasets/kmader/skin-cancer-mnist-ham10000
#
# To run locally:
# 1. Download the dataset from the link above
# 2. Extract it to: ./skin_cancer_data/
# 3. Ensure the following files/folders exist:
#    - HAM10000_metadata.csv
#    - HAM10000_images_part_1/
#    - HAM10000_images_part_2/
#
# The download script below is for Kaggle/Colab only:
#
# !pip install -q kaggle
# !kaggle datasets download -d kmader/skin-cancer-mnist-ham10000
# !unzip -q skin-cancer-mnist-ham10000.zip -d ./skin_cancer_data

print("Dataset should be in ./skin_cancer_data")

In [None]:
# Load and Prepare Metadata

# Load dataset metadata
df = pd.read_csv('./skin_cancer_data/HAM10000_metadata.csv')

# Map diagnosis codes to full lesion names
lesion_labels = {
    'nv': 'Melanocytic nevus',
    'mel': 'Melanoma',
    'bkl': 'Benign keratosis-like lesions',
    'bcc': 'Basal cell carcinoma',
    'akiec': 'Actinic keratosis',
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma'
}
df['lesion_type'] = df['dx'].map(lesion_labels)

# Create numeric labels with fixed order for consistency
label_order = ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']
df['label_idx'] = df['dx'].astype('category').cat.reorder_categories(label_order).cat.codes

# Define image directories (case-insensitive for cross-platform compatibility)
IMAGE_DIRS = [
    './skin_cancer_data/HAM10000_images_part_1',
    './skin_cancer_data/HAM10000_images_part_2',
    './skin_cancer_data/ham10000_images_part_1',
    './skin_cancer_data/ham10000_images_part_2'
]

# Resolve image paths
def get_image_path(image_id):
    for dir_path in IMAGE_DIRS:
        path = os.path.join(dir_path, f'{image_id}.jpg')
        if os.path.exists(path):
            return path
    return None

df['path'] = df['image_id'].apply(get_image_path)

# Verify data integrity
missing_count = df['path'].isna().sum()
if missing_count > 0:
    print(f"Warning: {missing_count} images not found in dataset folders.")
print(f"Dataset loaded successfully")
print(f"Total samples: {len(df)}")
print(f"Missing paths: {missing_count}")

In [None]:
# Visualize Class Distribution

# Display class distribution
print("\nLesion Type Distribution:")
print(df['dx'].value_counts())

# Plot distribution (sorted by frequency)
plt.figure(figsize=(10, 6))
order = df['lesion_type'].value_counts().index
sns.countplot(data=df, x='lesion_type', order=order)
plt.xticks(rotation=45, ha='right')
plt.xlabel('Lesion Type')
plt.ylabel('Count')
plt.title('Distribution of Lesion Types')
plt.tight_layout()
plt.show()

In [None]:
# Display Sample Images per Lesion Type

# Define image directories (case-insensitive)
image_dirs = [
    './skin_cancer_data/HAM10000_images_part_1',
    './skin_cancer_data/HAM10000_images_part_2',
    './skin_cancer_data/ham10000_images_part_1',
    './skin_cancer_data/ham10000_images_part_2'
]

classes = df['lesion_type'].unique()

plt.figure(figsize=(15, 10))
for i, lesion in enumerate(classes):
    sample = df[df['lesion_type'] == lesion].sample(1, random_state=42).iloc[0]
    image_filename = sample['image_id'] + '.jpg'

    for dir_path in image_dirs:
        image_path = os.path.join(dir_path, image_filename)
        if os.path.exists(image_path):
            img = mpimg.imread(image_path)
            break
    else:
        print(f"Image {image_filename} not found in either directory.")
        continue

    plt.subplot(3, 3, i + 1)
    plt.imshow(img)
    plt.title(lesion, fontweight='bold', fontsize=10)
    plt.axis('off')

plt.subplots_adjust(wspace=0.2, hspace=0.1)
plt.tight_layout()
plt.show()

In [None]:
# Split Dataset into Train, Validation, and Test Sets

# Split: Train+Val (80%) vs Test (20%)
train_val_df, test_df = train_test_split(
    df,
    test_size=0.2,
    stratify=df['label_idx'],
    random_state=42
)

# Split: Train (60%) vs Validation (20%) from Train+Val
train_df, val_df = train_test_split(
    train_val_df,
    test_size=0.25,  # 25% of 80% = 20% of total
    stratify=train_val_df['label_idx'],
    random_state=42
)

# Display split sizes
total_samples = len(df)
print(f"Train samples: {len(train_df)} ({len(train_df)/total_samples*100:.1f}%)")
print(f"Validation samples: {len(val_df)} ({len(val_df)/total_samples*100:.1f}%)")
print(f"Test samples: {len(test_df)} ({len(test_df)/total_samples*100:.1f}%)")

# Verify class distribution in each split
print("\nClass distribution in each split:")
for name, subset in [("Train", train_df), ("Validation", val_df), ("Test", test_df)]:
    print(f"{name}:")
    distribution = subset['dx'].value_counts()
    total_subset = len(subset)
    for dx, count in distribution.items():
        percent = (count / total_subset) * 100
        print(f"{dx}: {count} ({percent:.2f}%)")
    print("-" * 30)

In [None]:
# Data Generators with Optimized Augmentation

# Hyperparameters
TARGET_SIZE = (300, 300)      # EfficientNetB3 optimal input
BATCH_SIZE = 24               # Balanced for memory and gradient stability
RANDOM_STATE = 42

# Moderate augmentation (domain-aware)
ROTATION_RANGE = 20
WIDTH_SHIFT_RANGE = 0.15
HEIGHT_SHIFT_RANGE = 0.15
SHEAR_RANGE = 0.1
ZOOM_RANGE = 0.2
BRIGHTNESS_RANGE = [0.8, 1.2]
HORIZONTAL_FLIP = True
VERTICAL_FLIP = False
CHANNEL_SHIFT_RANGE = 0.1

# Train generator
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=ROTATION_RANGE,
    width_shift_range=WIDTH_SHIFT_RANGE,
    height_shift_range=HEIGHT_SHIFT_RANGE,
    shear_range=SHEAR_RANGE,
    horizontal_flip=HORIZONTAL_FLIP,
    vertical_flip=VERTICAL_FLIP,
    zoom_range=ZOOM_RANGE,
    brightness_range=BRIGHTNESS_RANGE,
    channel_shift_range=CHANNEL_SHIFT_RANGE,
    fill_mode='reflect'
)

# Validation & Test (no augmentation)
val_test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

# Create generators
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='path',
    y_col='label_idx',
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='raw',
    shuffle=True,
    seed=RANDOM_STATE
)

val_generator = val_test_datagen.flow_from_dataframe(
    dataframe=val_df,
    x_col='path',
    y_col='label_idx',
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='raw',
    shuffle=False
)

test_generator = val_test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col='path',
    y_col='label_idx',
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='raw',
    shuffle=False
)

# Verify generator setup
batch_x, batch_y = next(train_generator)
print(f"Generators created successfully")
print(f"Train batches: {len(train_generator)}")
print(f"Val batches: {len(val_generator)}")
print(f"Test batches: {len(test_generator)}")
print(f"Batch shape: {batch_x.shape}")
print(f"Image range: [{batch_x.min():.2f}, {batch_x.max():.2f}]")

In [None]:
# Build and Compile Model (B3 Optimized)

# Hyperparameters
INPUT_SHAPE = (300, 300, 3)
DENSE_UNITS = 384
DROPOUT_RATE = 0.3
LEARNING_RATE = 0.0005
ACTIVATION = 'swish'

# Focal Loss (optimized for class imbalance)
def focal_loss(gamma=2.0, alpha=None):
    if alpha is None:
        # Class weights: [akiec, bcc, bkl, df, mel, nv, vasc]
        alpha = [0.5, 0.3, 0.3, 1.2, 0.55, 0.25, 1.5]

    alpha_tensor = tf.constant(alpha, dtype=tf.float32)

    def focal_loss_fixed(y_true, y_pred):
        y_true = tf.cast(y_true, tf.int32)
        if len(y_true.shape) > 1:
            y_true = tf.squeeze(y_true, axis=-1)

        y_true_one_hot = tf.one_hot(y_true, depth=7, dtype=tf.float32)
        y_pred = tf.clip_by_value(y_pred, 1e-7, 1.0 - 1e-7)

        alpha_t = tf.reduce_sum(y_true_one_hot * alpha_tensor, axis=-1, keepdims=True)
        ce = -y_true_one_hot * tf.math.log(y_pred)
        weight = tf.pow(1 - y_pred, gamma)
        fl = alpha_t * weight * ce

        return tf.reduce_mean(tf.reduce_sum(fl, axis=-1))

    return focal_loss_fixed

# Load EfficientNet-B3 (pretrained)
base_model = EfficientNetB3(
    weights='imagenet',
    include_top=False,
    input_shape=INPUT_SHAPE
)

base_model.trainable = False
print(f"EfficientNetB3 loaded: {len(base_model.layers)} layers")

# Custom classification head
x = base_model.output
x = GlobalAveragePooling2D(name='gap')(x)
x = Dense(DENSE_UNITS, activation=ACTIVATION, name='dense1')(x)
x = BatchNormalization(name='bn1')(x)
x = Dropout(DROPOUT_RATE, name='dropout1')(x)
x = Dense(128, activation=ACTIVATION, name='dense2')(x)
x = BatchNormalization(name='bn2')(x)
x = Dropout(DROPOUT_RATE * 0.7, name='dropout2')(x)
predictions = Dense(7, activation='softmax', name='output')(x)

# Create model
model = Model(inputs=base_model.input, outputs=predictions)

# Compile with Focal Loss
model.compile(
    optimizer=Adam(learning_rate=LEARNING_RATE),
    loss=focal_loss(gamma=2.0, alpha=None),
    metrics=['accuracy']
)

# Display model info
print(f"Model compiled: {model.count_params():,} total parameters")
trainable = sum([tf.size(w).numpy() for w in model.trainable_weights])
print(f"Trainable: {trainable:,}, Frozen: {model.count_params() - trainable:,}")

In [None]:
# Training Model (Single-Phase, Frozen Base)

# Hyperparameters
TOTAL_EPOCHS = 45
STEPS_PER_EPOCH = 1.0
EARLY_STOPPING_PATIENCE = 15
REDUCE_LR_PATIENCE = 5
REDUCE_LR_FACTOR = 0.5

# Cosine annealing learning rate schedule
def cosine_annealing_schedule(epoch, lr, initial_lr=0.0005, min_lr=1e-7, epochs=45):
    if epoch < epochs:
        return min_lr + (initial_lr - min_lr) * (1 + math.cos(math.pi * epoch / epochs)) / 2
    return min_lr

# Callbacks
checkpoint_path = 'efficientnetb3_ham10000_model.keras'  # Save path for best model (standardized filename for deployment)

checkpoint = ModelCheckpoint(
    checkpoint_path,
    monitor='val_accuracy',
    save_best_only=True,
    mode='max',
    verbose=1
)

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=EARLY_STOPPING_PATIENCE,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=REDUCE_LR_FACTOR,
    patience=REDUCE_LR_PATIENCE,
    min_lr=1e-7,
    verbose=1
)

lr_scheduler = LearningRateScheduler(
    lambda epoch: cosine_annealing_schedule(epoch, model.optimizer.learning_rate.numpy(), epochs=TOTAL_EPOCHS),
    verbose=1
)

print("="*60)
print("SINGLE PHASE TRAINING (No Fine-tuning)")
print("Strategy: Keep base model frozen, train only custom head")
print("="*60)

# Train model
history = model.fit(
    train_generator,
    steps_per_epoch=int(len(train_generator) * STEPS_PER_EPOCH),
    validation_data=val_generator,
    validation_steps=int(len(val_generator) * STEPS_PER_EPOCH),
    epochs=TOTAL_EPOCHS,
    callbacks=[checkpoint, early_stopping, reduce_lr, lr_scheduler],
    verbose=1
)

# Results
print("\n" + "="*60)
print("TRAINING COMPLETE!")
print("="*60)
best_val_acc = max(history.history['val_accuracy'])
best_epoch = np.argmax(history.history['val_accuracy']) + 1
final_val_acc = history.history['val_accuracy'][-1]
print(f"Best Val Accuracy: {best_val_acc * 100:.2f}% (Epoch {best_epoch})")
print(f"Final Val Accuracy: {final_val_acc * 100:.2f}%")
print(f"Model saved: {checkpoint_path}")

# # Download best model
# files.download(checkpoint_path)

In [None]:
# Visualize Training History

# Extract metrics
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)

# Create plots
plt.figure(figsize=(14, 6))

# Accuracy
plt.subplot(1, 2, 1)
plt.plot(epochs, [a * 100 for a in acc], label='Training Accuracy')
plt.plot(epochs, [a * 100 for a in val_acc], label='Validation Accuracy')
best_epoch = np.argmax(val_acc) + 1
plt.axvline(best_epoch, color='r', linestyle='--', label=f'Best Epoch = {best_epoch}')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.legend()
plt.grid(True)

# Loss
plt.subplot(1, 2, 2)
plt.plot(epochs, loss, label='Training Loss')
plt.plot(epochs, val_loss, label='Validation Loss')
plt.axvline(best_epoch, color='r', linestyle='--')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

In [None]:
# Evaluate Model on Test Data

# Evaluate on test data
test_loss, test_accuracy = model.evaluate(test_generator, steps=len(test_generator), verbose=1)
print(f"\nTest Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Predictions
test_generator.reset()
y_pred = model.predict(test_generator, steps=len(test_generator), verbose=1)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = test_df['label_idx'].values

# Class names (full)
target_names = [lesion_labels[c] for c in label_order]

# Classification report
print("\nClassification Report:")
print(classification_report(y_true, y_pred_classes, target_names=target_names, digits=4))

# Confusion Matrix (absolute)
cm = confusion_matrix(y_true, y_pred_classes)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=target_names, yticklabels=target_names)
plt.title('Confusion Matrix', fontsize=14, fontweight='bold')
plt.xlabel('Predicted Label', fontsize=12, fontweight='bold')
plt.ylabel('True Label', fontsize=12, fontweight='bold')
plt.tight_layout()
plt.show()

In [None]:
# Plot ROC Curves

# Number of classes
num_classes = len(label_order)

# Convert true labels to one-hot for ROC
y_test_bin = label_binarize(y_true, classes=range(num_classes))

# Use prediction probabilities from previous cell
y_pred_probs = y_pred

# Compute ROC curve and AUC for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(num_classes):
    fpr[i], tpr[i], _ = roc_curve(y_test_bin[:, i], y_pred_probs[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Micro-average ROC
fpr["micro"], tpr["micro"], _ = roc_curve(y_test_bin.ravel(), y_pred_probs.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

# Macro-average ROC
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(num_classes)]))
mean_tpr = np.zeros_like(all_fpr)
for i in range(num_classes):
    mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])
mean_tpr /= num_classes
fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

# Plot ROC curves
plt.figure(figsize=(10, 8))
colors = ['aqua', 'darkorange', 'cornflowerblue', 'red', 'green', 'purple', 'brown']
for i, color in zip(range(num_classes), colors):
    plt.plot(fpr[i], tpr[i], color=color, lw=2,
             label=f'{lesion_labels[label_order[i]]} (AUC = {roc_auc[i]:0.2f})')

plt.plot(fpr["micro"], tpr["micro"], color='deeppink', linestyle=':', linewidth=3,
         label=f'Micro-average (AUC = {roc_auc["micro"]:.2f})')
plt.plot(fpr["macro"], tpr["macro"], color='navy', linestyle=':', linewidth=3,
         label=f'Macro-average (AUC = {roc_auc["macro"]:.2f})')

plt.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--', label='Random Guess')

plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate', fontsize=12, fontweight='bold')
plt.ylabel('True Positive Rate', fontsize=12, fontweight='bold')
plt.title('ROC Curves (One-vs-Rest)', fontsize=14, fontweight='bold')
plt.legend(loc='lower right', fontsize=10)
plt.grid(True)
plt.tight_layout()
plt.show()