In [1]:
Step 2: Data Preparation

import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

# Load the dataset
metadata = pd.read_csv('HAM10000_metadata.csv')

# Balance the dataset using oversampling
def balance_dataset(metadata):
    class_counts = metadata['dx'].value_counts()
    max_class = class_counts.max()
    balanced_metadata = metadata.groupby('dx').apply(lambda x: x.sample(max_class, replace=True)).reset_index(drop=True)
    return balanced_metadata

balanced_metadata = balance_dataset(metadata)





In [2]:
#Step 3: Data Preprocessing

import cv2

def remove_hair(image):
    grayScale = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    kernel = cv2.getStructuringElement(1, (17, 17))
    blackhat = cv2.morphologyEx(grayScale, cv2.MORPH_BLACKHAT, kernel)
    ret, thresh2 = cv2.threshold(blackhat, 10, 255, cv2.THRESH_BINARY)
    dst = cv2.inpaint(image, thresh2, 1, cv2.INPAINT_TELEA)
    return dst

# Apply preprocessing to all images
def preprocess_images(metadata):
    for i, row in metadata.iterrows():
        image_path = os.path.join('HAM10000_images', row['image_id'] + '.jpg')
        image = cv2.imread(image_path)
        image = remove_hair(image)
        cv2.imwrite(image_path, image)

preprocess_images(balanced_metadata)


In [13]:

#Step 4: Data Augmentation

from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)

def create_data_generator(metadata, batch_size=32, img_size=(224, 224)):
    train_gen = datagen.flow_from_dataframe(
        dataframe=metadata,
        directory='HAM10000_images',
        x_col='image_id',
        y_col='dx',
        target_size=img_size,
        batch_size=batch_size,
        class_mode='categorical',
        subset='training',
        seed=42
    )
    val_gen = datagen.flow_from_dataframe(
        dataframe=metadata,
        directory='HAM10000_images',
        x_col='image_id',
        y_col='dx',
        target_size=img_size,
        batch_size=batch_size,
        class_mode='categorical',
        subset='validation',
        seed=42
    )
    return train_gen, val_gen

train_gen, val_gen = create_data_generator(balanced_metadata)


Found 37548 validated image filenames belonging to 7 classes.
Found 9387 validated image filenames belonging to 7 classes.


In [14]:
from keras.applications import DenseNet169
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau

def build_model(base_model, num_classes):
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    return model

num_classes = balanced_metadata['dx'].nunique()

# DenseNet169 Model
base_model_densenet = DenseNet169(weights='imagenet', include_top=False)
model_densenet = build_model(base_model_densenet, num_classes)

# Compile model
model_densenet.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Callback for reducing learning rate
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.00001)

# Train model
history_densenet = model_densenet.fit(train_gen, validation_data=val_gen, epochs=50, steps_per_epoch=len(train_gen), validation_steps=len(val_gen), callbacks=[reduce_lr])




Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5




Epoch 1/50












  18/1174 [..............................] - ETA: 19:06:00 - loss: 1.9876 - accuracy: 0.3785


KeyboardInterrupt



In [None]:
#Step 6: Model Evaluation
# Evaluate DenseNet169 Model

val_loss, val_accuracy = model_densenet.evaluate(val_gen)
print(f'DenseNet169 Model Accuracy: {val_accuracy*100:.2f}%')


In [None]:
#Step 7: Save the Model

model_densenet.save('densenet169_skin_cancer_model.h5')


In [None]:
#OPTIONAL, Hyperparameter Tuning, Optionally, you can use Keras Tuner for hyperparameter tuning to further optimize the model
from keras_tuner import RandomSearch

def model_builder(hp):
    base_model = DenseNet169(weights='imagenet', include_top=False)
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(hp.Int('units', min_value=512, max_value=2048, step=512), activation='relu')(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    
    model.compile(optimizer=Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

tuner = RandomSearch(model_builder, objective='val_accuracy', max_trials=5, executions_per_trial=3)
tuner.search(train_gen, validation_data=val_gen, epochs=10)
best_model = tuner.get_best_models(num_models=1)[0]
best_model.fit(train_gen, validation_data=val_gen, epochs=50)
