In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# data preprocessing

In [None]:
import os
import numpy as np
from PIL import Image
import pandas as pd

In [None]:
label_mapping = {
    'akiec': 0,
    'bcc': 1,
    'bkl': 2,
    'df': 3,
    'nv': 4,
    'mel': 6,
    'vasc': 5
}

In [None]:
image_folder = '/content/drive/MyDrive/Skin HAM10000/omar/Synthetic_Images'
output_csv = '/content/drive/MyDrive/Skin HAM10000/HAM10000_metadata.csv'

In [None]:
def image_to_pixels(image_path):
    img = Image.open(image_path)
    img = img.resize((28, 28))
    img = np.array(img)
    if img.shape == (28, 28, 3):
        img_flatten = img.flatten()
        return img_flatten
    else:
        return None

In [None]:
data = []
for label_name, label_num in label_mapping.items(): # Now, label_mapping is correctly defined as a dictionary.
    folder_path = os.path.join(image_folder, label_name)

    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)
        pixels = image_to_pixels(file_path)
        if pixels is not None:
          data.append(list(pixels) + [label_num])


In [None]:
pixel_columns = [f'pixel{str(i).zfill(4)}' for i in range(28*28*3)]
df = pd.DataFrame(data, columns=pixel_columns + ['label'])

In [None]:
df

Unnamed: 0,pixel0000,pixel0001,pixel0002,pixel0003,pixel0004,pixel0005,pixel0006,pixel0007,pixel0008,pixel0009,...,pixel2343,pixel2344,pixel2345,pixel2346,pixel2347,pixel2348,pixel2349,pixel2350,pixel2351,label
0,210,154,173,214,157,172,209,155,170,220,...,169,129,150,184,141,155,186,140,149,0
1,193,144,157,194,145,156,191,143,150,195,...,145,116,98,119,83,70,75,66,63,0
2,200,147,161,199,154,163,204,162,170,205,...,202,139,154,183,124,133,157,118,121,0
3,156,113,118,170,118,123,154,111,116,139,...,207,133,139,103,73,64,39,40,30,0
4,166,127,136,169,123,136,181,132,150,189,...,70,53,47,53,33,26,62,51,46,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59980,191,145,154,189,146,154,196,151,160,195,...,158,135,136,165,144,145,194,152,167,5
59981,196,145,161,204,153,168,197,155,165,197,...,186,144,152,179,145,149,192,144,157,5
59982,171,131,142,173,129,142,181,136,149,192,...,171,130,136,161,124,128,177,132,141,5
59983,0,0,0,0,0,0,0,3,3,7,...,175,136,144,168,120,119,115,99,92,5


In [None]:
df.to_csv(output_csv, index=False)

In [None]:
df["label"].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
3,9885
5,9858
0,9673
1,9486
2,8901
6,8887
4,3295


In [None]:
difference = 134

if difference > 0:
    # Randomly sample 'difference' rows where label is 6
    drop_indices = df[df['label'] == 6].sample(n=difference, random_state=42).index

    # Drop these indices from the DataFrame
    df = df.drop(drop_indices)

# Verify the new value counts
print(df['label'].value_counts())

label
3    9885
5    9858
0    9673
1    9486
2    8901
6    8753
4    3295
Name: count, dtype: int64


In [None]:
df.to_csv(output_csv, index=False)

# Hybrid

In [None]:
# import system libs
import os
import time
import shutil
import itertools

# import data handling tools
import cv2
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style('darkgrid')
import matplotlib.pyplot as plt
# import Deep learning Libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout, BatchNormalization
from tensorflow.keras.models import Model, load_model, Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam, Adamax, Adagrad, RMSprop, Adadelta, Nadam, Ftrl, SGD
from tensorflow.keras import regularizers
from tensorflow.keras.metrics import categorical_crossentropy


# Ignore Warnings
import warnings
warnings.filterwarnings("ignore")

print ('modules loaded')

modules loaded


## test set does not contain synthetic data

In [None]:
# data_dir = '/content/drive/MyDrive/Skin HAM10000/synthetic_images_pixel_values1.csv'
# data = pd.read_csv(data_dir)
# data

Unnamed: 0,pixel0000,pixel0001,pixel0002,pixel0003,pixel0004,pixel0005,pixel0006,pixel0007,pixel0008,pixel0009,...,pixel2343,pixel2344,pixel2345,pixel2346,pixel2347,pixel2348,pixel2349,pixel2350,pixel2351,label
0,210,154,173,214,157,172,209,155,170,220,...,169,129,150,184,141,155,186,140,149,0
1,193,144,157,194,145,156,191,143,150,195,...,145,116,98,119,83,70,75,66,63,0
2,200,147,161,199,154,163,204,162,170,205,...,202,139,154,183,124,133,157,118,121,0
3,156,113,118,170,118,123,154,111,116,139,...,207,133,139,103,73,64,39,40,30,0
4,166,127,136,169,123,136,181,132,150,189,...,70,53,47,53,33,26,62,51,46,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59846,191,145,154,189,146,154,196,151,160,195,...,158,135,136,165,144,145,194,152,167,5
59847,196,145,161,204,153,168,197,155,165,197,...,186,144,152,179,145,149,192,144,157,5
59848,171,131,142,173,129,142,181,136,149,192,...,171,130,136,161,124,128,177,132,141,5
59849,0,0,0,0,0,0,0,3,3,7,...,175,136,144,168,120,119,115,99,92,5


In [None]:
data["label"].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
3,9885
5,9858
0,9673
1,9486
2,8901
6,8753
4,3295


In [None]:
imbalanced_train_data, test_data = train_test_split(data, test_size=0.25, random_state=49, stratify=data['label'])

In [None]:
test_data["label"].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
3,2471
5,2465
0,2418
1,2372
2,2225
6,2188
4,824


In [None]:
synthetic_data_path = '/content/drive/MyDrive/Skin HAM10000/synthetic_images_pixel_values1.csv'

synthetic_df = pd.read_csv(synthetic_data_path)
train_data = pd.concat([imbalanced_train_data, synthetic_df], ignore_index=True)

In [None]:
train_data["label"].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
3,17299
5,17251
0,16928
1,16600
2,15577
6,15318
4,5766


In [None]:
y_train = train_data["label"]
X_train = train_data.drop(columns=["label"])
X_train = np.array(X_train).reshape(-1, 28, 28, 3)
print('Shape of Data :', X_train.shape)

Shape of Data : (104739, 28, 28, 3)


In [None]:
y_train = np.array(y_train)
y_train

array([2, 0, 4, ..., 5, 5, 5])

In [None]:
y_test = test_data["label"]
X_test = test_data.drop(columns=["label"])
X_test = np.array(X_test).reshape(-1, 28, 28, 3)
print('Shape of Data :', X_test.shape)

Shape of Data : (14963, 28, 28, 3)


In [None]:
y_test = np.array(y_test)
y_test

array([1, 5, 3, ..., 5, 3, 3])

In [None]:
from keras.utils import to_categorical

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [None]:
classes = {4: ('nv', ' melanocytic nevi'),
           6: ('mel', 'melanoma'),
           2 :('bkl', 'benign keratosis-like lesions'),
           1:('bcc' , ' basal cell carcinoma'),
           5: ('vasc', ' pyogenic granulomas and hemorrhage'),
           0: ('akiec', 'Actinic keratoses and intraepithelial carcinomae'),
           3: ('df', 'dermatofibroma')}

In [None]:
from keras.callbacks import ReduceLROnPlateau

learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy'
                                            , patience = 2
                                            , verbose=1
                                            ,factor=0.5
                                            , min_lr=0.00001)

In [None]:
from tensorflow.keras.layers import (
    Input, Dense, BatchNormalization, Dropout, MultiHeadAttention,
    Flatten, Conv2D, MaxPooling2D, LeakyReLU, Add, GlobalAveragePooling2D
)
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Multiply

# Multi-Head Attention Block
def enhanced_multi_head_attention(inputs):
    attention_output = MultiHeadAttention(num_heads=8, key_dim=128)(inputs, inputs)
    attention_output = BatchNormalization()(attention_output)
    return attention_output

# Class-Specific Attention Block
def enhanced_class_specific_attention(x, num_classes):
    attention_scores = Dense(x.shape[-1], activation="sigmoid")(x)  # Generate attention scores
    attention_mul = Multiply()([x, attention_scores])
    return Flatten()(attention_mul)

# Residual Block
def residual_block(inputs, filters):
    x = Conv2D(filters, (3, 3), padding='same', kernel_initializer='he_normal')(inputs)
    x = LeakyReLU(alpha=0.1)(x)
    x = BatchNormalization()(x)
    x = Conv2D(filters, (3, 3), padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    shortcut = Conv2D(filters, (1, 1), padding='same')(inputs)
    x = Add()([x, shortcut])  # Add residual connection
    return LeakyReLU(alpha=0.1)(x)

# Enhanced Model
def build_improved_attention_model():
    inputs = Input(shape=(28, 28, 3))

    # Convolutional Layers with Residual Connections
    x = Conv2D(32, (3, 3), padding='same', kernel_initializer='he_normal')(inputs)
    x = LeakyReLU(alpha=0.1)(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D()(x)

    x = residual_block(x, 64)
    x = MaxPooling2D()(x)

    x = residual_block(x, 128)
    x = MaxPooling2D()(x)

    # Global Average Pooling
    x = GlobalAveragePooling2D()(x)

    # Class-Specific Attention
    x = enhanced_class_specific_attention(x, num_classes=7)

    # Dense Layers
    x = Dense(256, kernel_initializer='he_normal')(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)

    outputs = Dense(7, activation='softmax', name='classifier')(x)
    model = Model(inputs, outputs)
    return model

# Build and Compile the Model
improved_attention_model = build_improved_attention_model()
improved_attention_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
improved_attention_model.summary()

# Callbacks
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.5, min_lr=1e-6)
early_stopping = EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)

# Train the Model
history = improved_attention_model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=128,
    validation_data=(X_test, y_test),
    callbacks=[learning_rate_reduction, early_stopping]
)


Epoch 1/10
[1m819/819[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 19ms/step - accuracy: 0.5524 - loss: 1.2657 - val_accuracy: 0.4758 - val_loss: 2.6000 - learning_rate: 0.0010
Epoch 2/10
[1m819/819[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.8789 - loss: 0.3424 - val_accuracy: 0.7019 - val_loss: 1.0825 - learning_rate: 0.0010
Epoch 3/10
[1m819/819[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.9323 - loss: 0.1905 - val_accuracy: 0.7487 - val_loss: 0.9230 - learning_rate: 0.0010
Epoch 4/10
[1m819/819[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.9529 - loss: 0.1348 - val_accuracy: 0.7910 - val_loss: 0.6965 - learning_rate: 0.0010
Epoch 5/10
[1m819/819[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.9618 - loss: 0.1108 - val_accuracy: 0.8765 - val_loss: 0.4042 - learning_rate: 0.0010
Epoch 6/10
[1m819/819[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [