In [4]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2
import os

# Load metadata
metadata_path = r"data/ImageData/HAM10000_metadata.csv"
metadata = pd.read_csv(metadata_path)

# Define image directory (update this path accordingly)
image_dir = r"data/ImageData/HAM10000_images_part_1"

# Load and preprocess images
img_size = 64  # Resize images to 64x64

def load_images(metadata, img_size):
    images = []
    labels = []
    label_map = {label: idx for idx, label in enumerate(metadata['dx'].unique())}  # Encoding labels
    
    for index, row in metadata.iterrows():
        img_path = os.path.join(image_dir, row['image_id'] + ".jpg")
        if os.path.exists(img_path):
            img = cv2.imread(img_path)
            img = cv2.resize(img, (img_size, img_size))
            img = img / 255.0  # Normalize
            images.append(img)
            labels.append(label_map[row['dx']])
    
    return np.array(images), np.array(labels), label_map

# Load dataset
X, y, label_map = load_images(metadata, img_size)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Define CNN model
def create_cnn_model(input_shape, num_classes):
    model = keras.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Create and train model
model = create_cnn_model((img_size, img_size, 3), len(label_map))
model.fit(X_train, y_train, epochs=150, validation_data=(X_test, y_test), batch_size=32)

# Evaluate model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy * 100:.2f}%')




  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/150
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 36ms/step - accuracy: 0.6520 - loss: 1.1182 - val_accuracy: 0.6680 - val_loss: 0.9408
Epoch 2/150
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 40ms/step - accuracy: 0.6687 - loss: 0.9244 - val_accuracy: 0.6775 - val_loss: 0.8544
Epoch 3/150
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 39ms/step - accuracy: 0.6729 - loss: 0.8829 - val_accuracy: 0.6860 - val_loss: 0.8595
Epoch 4/150
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 38ms/step - accuracy: 0.6865 - loss: 0.8532 - val_accuracy: 0.7039 - val_loss: 0.7870
Epoch 5/150
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 40ms/step - accuracy: 0.6982 - loss: 0.8047 - val_accuracy: 0.7034 - val_loss: 0.7974
Epoch 6/150
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 39ms/step - accuracy: 0.6929 - loss: 0.8254 - val_accuracy: 0.7319 - val_loss: 0.7245
Epoch 7/15

In [5]:
# Save model
model.save(r"data/skin_cancer_cnn.keras")
print("Model saved successfully.")

Model saved successfully.


In [6]:
import numpy as np
import tensorflow as tf
import cv2
import os

# Load the trained model
model_path = "data/skin_cancer_cnn.keras"  # Update if saved elsewhere
model = tf.keras.models.load_model(model_path)

# Define image size (should match training size)
img_size = 64  

# Load label mapping (update based on training)
label_map = {
    0: "akiec",  # Actinic keratoses
    1: "bcc",    # Basal cell carcinoma
    2: "bkl",    # Benign keratosis-like lesions
    3: "df",     # Dermatofibroma
    4: "mel",    # Melanoma
    5: "nv",     # Melanocytic nevi
    6: "vasc"    # Vascular lesions
}

# Directory containing test images
test_dir = r"data/test"

# Function to preprocess a single image
def preprocess_image(img_path):
    img = cv2.imread(img_path)
    if img is None:
        print(f"Error loading image: {img_path}")
        return None
    img = cv2.resize(img, (img_size, img_size))
    img = img / 255.0  # Normalize
    img = np.expand_dims(img, axis=0)  # Add batch dimension
    return img

# Predict on all test images
for filename in os.listdir(test_dir):
    img_path = os.path.join(test_dir, filename)
    img_array = preprocess_image(img_path)

    if img_array is not None:
        prediction = model.predict(img_array)
        predicted_class = np.argmax(prediction)  # Get highest probability class
        predicted_label = label_map[predicted_class]  # Converting index to class name
        
        print(f"Image: {filename} --> Predicted Skin Cancer Type: {predicted_label}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
Image: ISIC_0024319NV.jpg --> Predicted Skin Cancer Type: bcc
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Image: ISIC_0024403BC.jpg --> Predicted Skin Cancer Type: nv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Image: ISIC_0024412BKL.jpg --> Predicted Skin Cancer Type: bcc
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Image: ISIC_0024459ML.jpg --> Predicted Skin Cancer Type: df
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Image: ISIC_0024575A.jpg --> Predicted Skin Cancer Type: vasc
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
Image: ISIC_0025606VA.jpg --> Predicted Skin Cancer Type: mel
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Image: ISIC_0025622DF.jpg --> Predicted Skin Cancer Type: bkl
