In [29]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from glob import glob
from PIL import Image
import itertools

from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau


In [31]:
# Step 2: Loading images and labels into arrays
def load_images_and_labels(dataset_path, img_size=(224, 224)):
    categories = ['benign', 'malignant']
    data = []
    labels = []

    for category in categories:
        folder_path = os.path.join(dataset_path, category)
        class_label = category
        for img_file in os.listdir(folder_path):
            img_path = os.path.join(folder_path, img_file)
            try:
                img = Image.open(img_path).resize(img_size).convert('RGB')
                data.append(np.array(img))
                labels.append(class_label)
            except Exception as e:
                continue
    return np.array(data), np.array(labels)

# Step 3: Categorical Labels
# Load data
data_path = r"C:\Users\LLR User\Desktop\Coding\code\skin-cancer\Dataset"  # Replace with your actual dataset folder path
data, labels = load_images_and_labels(data_path)

In [32]:
# Encode labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)
labels_categorical = to_categorical(labels_encoded)

In [33]:
# Step 4: Normalization
data = data / 255.0  # Normalize pixel values

# Step 5: Train and Test Split
X_train, X_test, y_train, y_test = train_test_split(data, labels_categorical, test_size=0.2, random_state=42, stratify=labels_encoded)


In [34]:
# Step 6: Model Building (Basic CNN)
def build_cnn_model(input_shape=(224, 224, 3)):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(2, activation='softmax'))

    model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model = build_cnn_model()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [35]:
# Step 7: Cross-validating model using K-Fold (optional but shown)
kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=1)
accuracies = []

for train_idx, val_idx in kfold.split(X_train, np.argmax(y_train, axis=1)):
    model = build_cnn_model()
    model.fit(X_train[train_idx], y_train[train_idx], epochs=5, verbose=1, validation_data=(X_train[val_idx], y_train[val_idx]))
    scores = model.evaluate(X_train[val_idx], y_train[val_idx], verbose=0)
    accuracies.append(scores[1])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 772ms/step - accuracy: 0.5250 - loss: 0.7111 - val_accuracy: 0.6128 - val_loss: 0.5948
Epoch 2/5
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 687ms/step - accuracy: 0.6790 - loss: 0.5834 - val_accuracy: 0.7744 - val_loss: 0.5055
Epoch 3/5
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 692ms/step - accuracy: 0.7568 - loss: 0.5223 - val_accuracy: 0.7531 - val_loss: 0.4826
Epoch 4/5
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 704ms/step - accuracy: 0.7443 - loss: 0.5129 - val_accuracy: 0.7762 - val_loss: 0.4672
Epoch 5/5
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 764ms/step - accuracy: 0.7859 - loss: 0.4596 - val_accuracy: 0.7229 - val_loss: 0.5120
Epoch 1/5
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 2s/step - accuracy: 0.5358 - loss: 0.7325 - val_accuracy: 0.7367 - val_loss: 0.6210
Epoch 2/5
[1m36/36[0m [32m━━

In [36]:
print("Cross-Validation Accuracies:", accuracies)
print("Mean Accuracy:", np.mean(accuracies))

Cross-Validation Accuracies: [0.7229129672050476, 0.7953736782073975, 0.7864768505096436]
Mean Accuracy: 0.7682544986406962


In [37]:
# Step 8: Testing model
model = build_cnn_model()
model.fit(X_train, y_train, epochs=10, validation_split=0.1, verbose=1)

Epoch 1/10
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 711ms/step - accuracy: 0.5792 - loss: 0.6880 - val_accuracy: 0.7633 - val_loss: 0.5480
Epoch 2/10
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 755ms/step - accuracy: 0.7157 - loss: 0.5530 - val_accuracy: 0.7929 - val_loss: 0.4500
Epoch 3/10
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 725ms/step - accuracy: 0.7260 - loss: 0.5196 - val_accuracy: 0.8284 - val_loss: 0.4353
Epoch 4/10
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 688ms/step - accuracy: 0.7712 - loss: 0.4605 - val_accuracy: 0.7929 - val_loss: 0.4466
Epoch 5/10
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 693ms/step - accuracy: 0.7578 - loss: 0.4804 - val_accuracy: 0.8225 - val_loss: 0.4228
Epoch 6/10
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 706ms/step - accuracy: 0.7835 - loss: 0.4638 - val_accuracy: 0.8047 - val_loss: 0.4036
Epoch 7/10
[1m48/48[

<keras.src.callbacks.history.History at 0x18ced52d300>

In [38]:
# Evaluate on test set
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 149ms/step


In [39]:
# Accuracy & classification report
acc = accuracy_score(y_true, y_pred_classes)
print("Test Accuracy:", acc)
print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred_classes))
print("Classification Report:\n", classification_report(y_true, y_pred_classes))

Test Accuracy: 0.7630331753554502
Confusion Matrix:
 [[173  58]
 [ 42 149]]
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.75      0.78       231
           1       0.72      0.78      0.75       191

    accuracy                           0.76       422
   macro avg       0.76      0.76      0.76       422
weighted avg       0.77      0.76      0.76       422

