In [1]:
import pandas as pd
import numpy as np
import os
from pathlib import Path
import cv2 as cv
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.mixed_precision import set_global_policy

2025-10-24 17:32:37.398289: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Enable mixed precision training to optimize GPU memory usage
set_global_policy('mixed_float16')

In [3]:
# Define Paths
data_path = '../data/ISIC_2019_Training_Input'
csv_path = '../data/ISIC_2019_Training_GroundTruth.csv'

In [4]:
# Load metadata
df = pd.read_csv(csv_path)

In [5]:
# Add image paths to DataFrame for easy access
df['image_path'] = df['image'].apply(lambda x: os.path.join(data_path, x + '.jpg'))

In [6]:
# Convert one-hot encoded labels to categorical
class_columns = ['MEL', 'NV', 'BCC', 'AK', 'BKL', 'DF', 'VASC', 'SCC', 'UNK']
df['label'] = df[class_columns].idxmax(axis=1)

In [7]:
#  Split data into train (70%), validation (15%) and test (15%) sets
train_df, temp_df = train_test_split(df, test_size = 0.3, stratify = df['label'], random_state = 42)
val_df, test_df = train_test_split(temp_df, test_size = 0.5, stratify = temp_df['label'], random_state = 42)
print(f"Train: {len(train_df)}, Validation: {len(val_df)}, Test: {len(test_df)}")

Train: 17731, Validation: 3800, Test: 3800


In [8]:
# Set up data augmentation for training and normalization for validation/test
train_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.resnet50.preprocess_input,    rotation_range = 20, # Random rotation up to 20 degrees
    width_shift_range = 0.2, # Random horizontal shift
    height_shift_range = 0.2, # Random vertical shift
    horizontal_flip = True, # Random horizontal flip
    zoom_range = 0.2, # Random zoom
    fill_mode = 'nearest' # Fill new pixels with nearest value
)

val_test_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.resnet50.preprocess_input) # Only normalize for validation/test

# Configure data generators
target_size = (224, 224) # Resize images for EfficientNetB0
batch_size = 16 

train_generator = train_datagen.flow_from_dataframe(
    train_df,
    x_col = 'image_path',
    y_col = 'label',
    target_size = target_size,
    batch_size = batch_size,
    class_mode = 'categorical'
)

val_generator = val_test_datagen.flow_from_dataframe(
    val_df,
    x_col = 'image_path',
    y_col = 'label',
    target_size = target_size,
    batch_size = batch_size,
    class_mode = 'categorical'
)

test_generator = val_test_datagen.flow_from_dataframe(
    test_df,
    x_col = 'image_path',
    y_col = 'label',
    target_size = target_size,
    batch_size = batch_size,
    class_mode = 'categorical',
    shuffle = False # Keep test data order to evaluation   
)

print("Class indices:", train_generator.class_indices)

Found 17731 validated image filenames belonging to 8 classes.
Found 3800 validated image filenames belonging to 8 classes.
Found 3800 validated image filenames belonging to 8 classes.
Class indices: {'AK': 0, 'BCC': 1, 'BKL': 2, 'DF': 3, 'MEL': 4, 'NV': 5, 'SCC': 6, 'VASC': 7}


In [9]:
# Define class weights 
class_weights = {
    'AK': 3.652, 'BCC': 0.953, 'BKL': 1.207, 'DF': 13.248,
    'MEL': 0.700, 'NV': 0.246, 'SCC': 5.042, 'VASC': 12.515
}

In [10]:
# Define ResNet50 model with transfer learning
def build_resnet50_model(num_classes = 8):
    # load ResNet50 with ImageNet weight exclude top layer
    base_model = ResNet50(weights = 'imagenet', include_top = False, input_shape = (224, 224, 3))
    
    # Freeze the base model layers to prevent training initially
    base_model.trainable = False

    # Build the model
    model = Sequential([
        base_model, 
        GlobalAveragePooling2D(), # Pool the feature maps to a single vector
        Dense(512, activation = 'relu'), # Add a dense layer for feature learning
        Dropout(0.5), # Add dropout to prevent overfitting
        Dense(num_classes, activation = 'softmax', dtype = 'float32') # Output layer for 8 classes
    ])


    # Map class weights to generator indices
    class_indices = {'AK': 0, 'BCC': 1, 'BKL': 2, 'DF': 3, 'MEL': 4, 'NV': 5, 'SCC': 6, 'VASC': 7}
    class_weight_dict = {class_indices[cls]: weight for cls, weight in class_weights.items()}


    # compile the model
    model.compile(
        optimizer = Adam(learning_rate = 0.001),
        loss = 'categorical_crossentropy',
        metrics = ['accuracy']
    )

    return model

In [11]:
model = build_resnet50_model()

2025-10-24 17:32:38.851893: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-10-24 17:32:38.880867: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-10-24 17:32:38.884605: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

In [12]:
model.summary()

In [13]:
# Define training parameters
epochs = 20
batch_size = 16

In [14]:
# Define callbacks
callbacks = [
    # stop training if val_loss doesn't improve for 5 epochs
    EarlyStopping(monitor = 'val_loss', patience = 5, restore_best_weights = True),

    # save the best model based on val_accuracy
    ModelCheckpoint('best_resnet50_model.keras', monitor = 'val_accuracy', save_best_only = True),

    # reduce learning rate if val_loss plateaus
    ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience = 3, min_lr = 1e-6)
]

In [15]:
class_weight_dict = {
    0: 3.652,  # AK
    1: 0.953,  # BCC
    2: 1.207,  # BKL
    3: 13.248, # DF
    4: 0.700,  # MEL
    5: 0.246,  # NV
    6: 5.042,  # SCC
    7: 12.515  # VASC
}

In [17]:
# Train the model
history = model.fit(
    train_generator,
    epochs = epochs,
    validation_data = val_generator,
    class_weight = class_weight_dict,
    callbacks = callbacks,
    verbose = 1
)

  self._warn_if_super_not_called()


Epoch 1/20


I0000 00:00:1761314647.968054    6578 service.cc:145] XLA service 0x72a03c001d80 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1761314647.968076    6578 service.cc:153]   StreamExecutor device (0): NVIDIA GeForce GTX 1650, Compute Capability 7.5
2025-10-24 17:34:08.173682: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-10-24 17:34:09.029908: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8902
2025-10-24 17:34:12.017112: E external/local_xla/xla/service/gpu/buffer_comparator.cc:149] Difference at 0: 0.125854, expected -nan
2025-10-24 17:34:12.017139: E external/local_xla/xla/service/gpu/buffer_comparator.cc:149] Difference at 1: 0.232422, expected 0
2025-10-24 17:34:12.017146: E external/local_xla/xla/service/gpu/buffer_comparator.cc:149] Difference at 2: 0.131714, expected -nan
2025-

[1m   2/1109[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:29[0m 81ms/step - accuracy: 0.1094 - loss: 3.5938  

I0000 00:00:1761314666.147323    6578 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m 357/1109[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m3:32[0m 283ms/step - accuracy: 0.2830 - loss: 2.9139

2025-10-24 17:36:09.594601: E external/local_xla/xla/service/gpu/buffer_comparator.cc:149] Difference at 0: 0.199951, expected -nan
2025-10-24 17:36:09.594643: E external/local_xla/xla/service/gpu/buffer_comparator.cc:149] Difference at 1: 0.125244, expected 0
2025-10-24 17:36:09.594649: E external/local_xla/xla/service/gpu/buffer_comparator.cc:149] Difference at 2: 0.204224, expected -nan
2025-10-24 17:36:09.594653: E external/local_xla/xla/service/gpu/buffer_comparator.cc:149] Difference at 3: 0.199829, expected 0
2025-10-24 17:36:09.594656: E external/local_xla/xla/service/gpu/buffer_comparator.cc:149] Difference at 4: 0.182129, expected -nan
2025-10-24 17:36:09.594660: E external/local_xla/xla/service/gpu/buffer_comparator.cc:149] Difference at 5: 0.118286, expected 0
2025-10-24 17:36:09.594663: E external/local_xla/xla/service/gpu/buffer_comparator.cc:149] Difference at 6: 0.128296, expected -nan
2025-10-24 17:36:09.594667: E external/local_xla/xla/service/gpu/buffer_comparator.cc

[1m1109/1109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 288ms/step - accuracy: 0.3273 - loss: 2.3430  

2025-10-24 17:40:35.207356: E external/local_xla/xla/service/gpu/buffer_comparator.cc:149] Difference at 0: 0.142456, expected -nan
2025-10-24 17:40:35.207400: E external/local_xla/xla/service/gpu/buffer_comparator.cc:149] Difference at 1: 0.202148, expected 0
2025-10-24 17:40:35.207406: E external/local_xla/xla/service/gpu/buffer_comparator.cc:149] Difference at 2: 0.123108, expected -nan
2025-10-24 17:40:35.207410: E external/local_xla/xla/service/gpu/buffer_comparator.cc:149] Difference at 3: 0.14856, expected 0
2025-10-24 17:40:35.207413: E external/local_xla/xla/service/gpu/buffer_comparator.cc:149] Difference at 4: 0.164917, expected -nan
2025-10-24 17:40:35.207417: E external/local_xla/xla/service/gpu/buffer_comparator.cc:149] Difference at 5: 0.128174, expected 0
2025-10-24 17:40:35.207420: E external/local_xla/xla/service/gpu/buffer_comparator.cc:149] Difference at 6: 0.130737, expected -nan
2025-10-24 17:40:35.207423: E external/local_xla/xla/service/gpu/buffer_comparator.cc:

[1m1109/1109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m402s[0m 341ms/step - accuracy: 0.3572 - loss: 1.9505 - val_accuracy: 0.4789 - val_loss: 1.3871 - learning_rate: 0.0010
Epoch 2/20
[1m1109/1109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m338s[0m 304ms/step - accuracy: 0.4043 - loss: 1.6454 - val_accuracy: 0.5053 - val_loss: 1.3526 - learning_rate: 0.0010
Epoch 3/20
[1m1109/1109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m339s[0m 305ms/step - accuracy: 0.4208 - loss: 1.5690 - val_accuracy: 0.4861 - val_loss: 1.3311 - learning_rate: 0.0010
Epoch 4/20
[1m1109/1109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m338s[0m 305ms/step - accuracy: 0.4138 - loss: 1.5535 - val_accuracy: 0.5218 - val_loss: 1.2499 - learning_rate: 0.0010
Epoch 5/20
[1m1109/1109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m338s[0m 305ms/step - accuracy: 0.4313 - loss: 1.4995 - val_accuracy: 0.5116 - val_loss: 1.2868 - learning_rate: 0.0010
Epoch 6/20
[1m1109/1109[0m [32m━━━━━━━━━━━━━━━━━

In [None]:
# save training history
with open('training_history.pkl', 'wb') as f:
    pickle.dump(history.history, f)

In [None]:
# plot training and validation metrics

plt.figure(figsize=(12, 4))

# plot accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label = 'Train Accuracy')
plt.plot(history.history['val_accuracy'], label = 'Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()


# plot loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label = 'Train Loss')
plt.plot(history.history['val_loss'], label = 'Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.show()