In [1]:
import os
import numpy as np
import pandas as pd
from skimage.io import imread
from skimage.transform import resize
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

2023-08-24 19:24:41.338750: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-08-24 19:24:45.754294: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/local/cuda/extras/CUPTI/lib64
2023-08-24 19:24:45.755585: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/loca

In [2]:
# Load metadata CSV file
metadata = pd.read_csv('HAM10000_metadata.csv')

In [3]:
# Load images and labels
image_folder = 'HAM10000_Images'
image_size = (96, 96)

In [4]:
images = []
labels = []

for index, row in metadata.iterrows():
    image_path = os.path.join(image_folder, row['image_id'] + '.jpg')
    image = imread(image_path)
    resized_image = resize(image, image_size)
    images.append(resized_image)
    labels.append(row['dx'])

In [5]:
images = np.array(images)
labels = np.array(labels)

In [6]:
# Convert labels to categorical format
label_mapping = {label: idx for idx, label in enumerate(np.unique(labels))}
labels_encoded = np.array([label_mapping[label] for label in labels])
labels_categorical = to_categorical(labels_encoded, num_classes=len(label_mapping))

In [7]:
# Split the dataset into train, validation, and test sets
train_images, test_images, train_labels, test_labels = train_test_split(
    images, labels_categorical, test_size=0.2, random_state=42
)
train_images, val_images, train_labels, val_labels = train_test_split(
    train_images, train_labels, test_size=0.2, random_state=42
)


In [8]:

# Print the shape of each set
print("Train images shape:", train_images.shape)
print("Train labels shape:", train_labels.shape)
print("Validation images shape:", val_images.shape)
print("Validation labels shape:", val_labels.shape)
print("Test images shape:", test_images.shape)
print("Test labels shape:", test_labels.shape)


Train images shape: (6409, 96, 96, 3)
Train labels shape: (6409, 7)
Validation images shape: (1603, 96, 96, 3)
Validation labels shape: (1603, 7)
Test images shape: (2003, 96, 96, 3)
Test labels shape: (2003, 7)


In [9]:
# Create an ImageDataGenerator instance for data augmentation
datagen = ImageDataGenerator(
    rotation_range=30,  # Increase rotation range
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest'
)


In [10]:
# Apply data augmentation only to the training set
train_datagen = datagen.flow(train_images, train_labels, batch_size=32, shuffle=True)

In [11]:
# Load the pre-trained ResNet50 model (excluding top layers and with ImageNet weights)
base_model = ResNet50(include_top=False, weights='imagenet', input_shape=(96, 96, 3))

2023-08-24 19:40:28.738391: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/local/cuda/extras/CUPTI/lib64
2023-08-24 19:40:28.738445: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2023-08-24 19:40:28.738527: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (instance-20230816-171017): /proc/driver/nvidia/version does not exist
2023-08-24 19:40:28.739969: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow 

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [12]:
# Freeze the layers of the base model to retain pre-trained weights
for layer in base_model.layers:
    layer.trainable = False

In [20]:
# Add custom classification layers on top of the base model
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(256, activation='relu')(x)  # Increased dense layer size
x = Dropout(0.5)(x)  # Increase dropout rate
output = Dense(7, activation='softmax')(x)

In [21]:
# Create the final model
model = Model(inputs=base_model.input, outputs=output)

In [22]:
# Unfreeze the last block of layers in the base model for fine-tuning
for layer in base_model.layers[:-12]:
    layer.trainable = False
for layer in base_model.layers[-12:]:
    layer.trainable = True

In [23]:
# Adjust the learning rate
def lr_scheduler(epoch):
    if epoch < 10:
        return 0.0001
    else:
        return 0.0001 * np.exp(0.1 * (10 - epoch))

lr_schedule = tf.keras.callbacks.LearningRateScheduler(lr_scheduler)

In [24]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

In [25]:

# Early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)


In [26]:
# Train the model with the improved settings
epochs = 40
batch_size = 32

history = model.fit(
    train_datagen,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(val_images, val_labels),
    callbacks=[early_stopping, lr_schedule]
)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [27]:
# Evaluate the model on the validation set
val_loss, val_accuracy = model.evaluate(val_images, val_labels, batch_size=batch_size)
print("Validation Loss:", val_loss)
print("Validation Accuracy:", val_accuracy)

Validation Loss: 0.8172417879104614
Validation Accuracy: 0.7236431837081909


In [28]:
# Evaluate the model on the testing set
test_loss, test_accuracy = model.evaluate(test_images, test_labels, batch_size=batch_size)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)


Test Loss: 0.8268979787826538
Test Accuracy: 0.7084373235702515


In [29]:
model.save('cnn_model-resnet_96x96')



INFO:tensorflow:Assets written to: cnn_model-resnet_96x96/assets


INFO:tensorflow:Assets written to: cnn_model-resnet_96x96/assets
