In [45]:
# Step 1: Importing Essential Libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from glob import glob
from PIL import Image
import cv2
import itertools

from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau


In [46]:

# Step 2: Enhance Image with CLAHE
def enhance_image(image_array):
    lab = cv2.cvtColor(image_array, cv2.COLOR_RGB2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    cl = clahe.apply(l)
    merged = cv2.merge((cl, a, b))
    enhanced = cv2.cvtColor(merged, cv2.COLOR_LAB2RGB)
    return enhanced

In [47]:
# Step 3: Loading images and labels into arrays with CLAHE
def load_images_and_labels(dataset_path, img_size=(224, 224)):
    categories = ['benign', 'malignant']
    data = []
    labels = []

    for category in categories:
        folder_path = os.path.join(dataset_path, category)
        class_label = category
        if not os.path.exists(folder_path):
            print(f"❌ Folder not found: {folder_path}")
            continue
        for img_file in os.listdir(folder_path):
            img_path = os.path.join(folder_path, img_file)
            try:
                img = Image.open(img_path).resize(img_size, Image.LANCZOS).convert('RGB')
                img_np = np.array(img)
                img_enhanced = enhance_image(img_np)
                data.append(img_enhanced)
                labels.append(class_label)
            except Exception as e:
                print(f"⚠️ Skipping file: {img_path} due to error: {e}")
                continue
    print(f"✅ Loaded {len(labels)} images.")
    return np.array(data), np.array(labels)

# Step 4: Categorical Labels
data_path = r"C:\Users\LLR User\Desktop\Coding\code\skin-cancer\Dataset"  # Full path

data, labels = load_images_and_labels(data_path)

✅ Loaded 2109 images.


In [48]:
label_encoder = LabelEncoder()
if len(labels) == 0:
    raise ValueError("No image data found. Check your dataset path and folder names.")
labels_encoded = label_encoder.fit_transform(labels)
labels_categorical = to_categorical(labels_encoded, num_classes=len(np.unique(labels_encoded)))

In [50]:
# Step 5: Normalization using dataset mean and std
mean = np.mean(data, axis=(0, 1, 2), keepdims=True, dtype=np.float32)
std = np.std(data, axis=(0, 1, 2), keepdims=True, dtype=np.float32)
std[std == 0] = 1e-6  # Avoid division by zero
data = (data - mean) / std

In [51]:
# Step 6: Train and Test Split
X_train, X_test, y_train, y_test = train_test_split(data, labels_categorical, test_size=0.2, random_state=42, stratify=labels_encoded)


In [52]:
# Step 7: Data Augmentation
train_datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.3,
    brightness_range=[0.8, 1.2],
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest')

val_datagen = ImageDataGenerator()
train_generator = train_datagen.flow(X_train[:512], y_train[:512], batch_size=32)
val_generator = val_datagen.flow(X_test[:128], y_test[:128], batch_size=32)


In [53]:
def build_cnn_model(input_shape=(224, 224, 3)):
    model = Sequential()
    model.add(Conv2D(16, (3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(32, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(2, activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model


In [54]:
# Step 9: Train the model on smaller batches to reduce memory usage
X_train_small = X_train[:256]
y_train_small = y_train[:256]
X_test_small = X_test[:64]
y_test_small = y_test[:64]

train_generator = train_datagen.flow(X_train_small, y_train_small, batch_size=16)
val_generator = val_datagen.flow(X_test_small, y_test_small, batch_size=16)

lr_scheduler = ReduceLROnPlateau(monitor='val_accuracy', patience=5, verbose=1, factor=0.5, min_lr=1e-7)
history = model.fit(train_generator, validation_data=val_generator, epochs=10, callbacks=[lr_scheduler])


  self._warn_if_super_not_called()


Epoch 1/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 723ms/step - accuracy: 0.8032 - loss: 0.4544 - val_accuracy: 0.7969 - val_loss: 0.3834 - learning_rate: 5.0000e-05
Epoch 2/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 442ms/step - accuracy: 0.8154 - loss: 0.4058 - val_accuracy: 0.8281 - val_loss: 0.3744 - learning_rate: 5.0000e-05
Epoch 3/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 495ms/step - accuracy: 0.8528 - loss: 0.3562 - val_accuracy: 0.8125 - val_loss: 0.3881 - learning_rate: 5.0000e-05
Epoch 4/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 468ms/step - accuracy: 0.8385 - loss: 0.3850 - val_accuracy: 0.7969 - val_loss: 0.3628 - learning_rate: 5.0000e-05
Epoch 5/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 464ms/step - accuracy: 0.8395 - loss: 0.3156 - val_accuracy: 0.8281 - val_loss: 0.3700 - learning_rate: 5.0000e-05
Epoch 6/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━

In [55]:
# Step 10: Evaluate the model
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 130ms/step


In [56]:
acc = accuracy_score(y_true, y_pred_classes)
print("Test Accuracy:", acc)
print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred_classes))
print("Classification Report:\n", classification_report(y_true, y_pred_classes))

Test Accuracy: 0.7867298578199052
Confusion Matrix:
 [[176  55]
 [ 35 156]]
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.76      0.80       231
           1       0.74      0.82      0.78       191

    accuracy                           0.79       422
   macro avg       0.79      0.79      0.79       422
weighted avg       0.79      0.79      0.79       422

