Libraties

In [23]:
import numpy as np 
import pandas as pd
import tensorflow as tf

import os
from glob import glob
import cv2

In [24]:
base_path = os.path.join(os.getcwd(), 'HAM10000_Images')
for dirname, _, _ in os.walk(base_path):
    print(dirname)

In [25]:
base_path = r"d:\term 4\Studio\Skin\Skin-Cancer-Detection-App\HAM10000_Images"
print("Looking in:", base_path)

d_paths = [x[0] for x in os.walk(base_path)]
print("Directories found:", d_paths)

i_paths = []
for d_path in d_paths:
    i_paths.extend(glob(os.path.join(d_path, '*')))
print("Number of files:", len(i_paths))

Looking in: d:\term 4\Studio\Skin\Skin-Cancer-Detection-App\HAM10000_Images
Directories found: ['d:\\term 4\\Studio\\Skin\\Skin-Cancer-Detection-App\\HAM10000_Images', 'd:\\term 4\\Studio\\Skin\\Skin-Cancer-Detection-App\\HAM10000_Images\\akiec', 'd:\\term 4\\Studio\\Skin\\Skin-Cancer-Detection-App\\HAM10000_Images\\bcc', 'd:\\term 4\\Studio\\Skin\\Skin-Cancer-Detection-App\\HAM10000_Images\\bkl', 'd:\\term 4\\Studio\\Skin\\Skin-Cancer-Detection-App\\HAM10000_Images\\df', 'd:\\term 4\\Studio\\Skin\\Skin-Cancer-Detection-App\\HAM10000_Images\\mel', 'd:\\term 4\\Studio\\Skin\\Skin-Cancer-Detection-App\\HAM10000_Images\\nv', 'd:\\term 4\\Studio\\Skin\\Skin-Cancer-Detection-App\\HAM10000_Images\\vasc']
Number of files: 10022


In [26]:
d_paths

['d:\\term 4\\Studio\\Skin\\Skin-Cancer-Detection-App\\HAM10000_Images',
 'd:\\term 4\\Studio\\Skin\\Skin-Cancer-Detection-App\\HAM10000_Images\\akiec',
 'd:\\term 4\\Studio\\Skin\\Skin-Cancer-Detection-App\\HAM10000_Images\\bcc',
 'd:\\term 4\\Studio\\Skin\\Skin-Cancer-Detection-App\\HAM10000_Images\\bkl',
 'd:\\term 4\\Studio\\Skin\\Skin-Cancer-Detection-App\\HAM10000_Images\\df',
 'd:\\term 4\\Studio\\Skin\\Skin-Cancer-Detection-App\\HAM10000_Images\\mel',
 'd:\\term 4\\Studio\\Skin\\Skin-Cancer-Detection-App\\HAM10000_Images\\nv',
 'd:\\term 4\\Studio\\Skin\\Skin-Cancer-Detection-App\\HAM10000_Images\\vasc']

In [27]:
from wolta.visual_tools import get_extensions

get_extensions(i_paths)

{'none': 7, 'jpg': 10015}

In [28]:

i_paths = []
for subfolder in ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']:
    folder_path = os.path.join(base_path, subfolder)
    image_paths = glob(os.path.join(folder_path, '*.*'))  # Get all files
    i_paths.extend(image_paths)

# Print first few paths to verify
print("Number of images found:", len(i_paths))
if len(i_paths) > 0:
    print("First image path:", i_paths[0])
    # Try reading the first image
    img = cv2.imread(i_paths[0])
    if img is not None:
        print('width: {}\nheight: {}'.format(img.shape[1], img.shape[0]))
    else:
        print("Failed to read image")
else:
    print("No images found in the directories")

Number of images found: 10015
First image path: d:\term 4\Studio\Skin\Skin-Cancer-Detection-App\HAM10000_Images\akiec\ISIC_0024329.jpg
width: 128
height: 128


Image Augmentation

In [33]:
# Create a directory for processed images
output_base = 'processed_images'
os.makedirs(output_base, exist_ok=True)

# Get all subdirectories in HAM10000_Images
base_path = r"d:\term 4\Studio\Skin\Skin-Cancer-Detection-App\HAM10000_Images"
d_paths = [os.path.join(base_path, d) for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d))]

for d_path in d_paths:
    # Get directory name from path
    dir_name = os.path.basename(d_path)
    
    # Create subdirectory for each class
    current_dir = os.path.join(output_base, dir_name)
    os.makedirs(current_dir, exist_ok=True)
    
    # Get all images in current directory
    i_paths = glob(os.path.join(d_path, '*'))
    
    for i_path in i_paths:
        # Get image name
        i_name = os.path.basename(i_path)
        
        # Read and process image
        img = cv2.imread(i_path)
        if img is not None:
            edge = min(img.shape[0], img.shape[1])
            img = crop(img, crop_width=edge, crop_height=edge, get_img=True)
            img = cv2.resize(img, (128, 128))
            
            # Save processed image
            output_path = os.path.join(current_dir, i_name)
            try:
                cv2.imwrite(output_path, img)
            except Exception as e:
                print(f"Error saving image: {output_path}")
                print(e)
        else:
            print(f"Failed to read image: {i_path}")

DATA PREPARATION FOR TESTING AND TRAINING

In [34]:
train_ds, test_val_ds = tf.keras.utils.image_dataset_from_directory(
    'processed_images',
    validation_split=0.4,
    subset='both',
    seed=123,
    image_size=(128, 128),
    batch_size=16
)

Found 10015 files belonging to 7 classes.
Using 6009 files for training.
Using 4006 files for validation.


In [38]:
test_val_ds_size = tf.data.experimental.cardinality(test_val_ds).numpy()
test_val_split_size = int(0.5 * test_val_ds_size)

validation_ds = test_val_ds.take(test_val_split_size)
test_ds = test_val_ds.skip(test_val_split_size)

In [39]:
names = train_ds.class_names
num_classes = len(names)

print(names)

['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']


Model

In [36]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

In [40]:
model = Sequential([
    layers.Input(shape=[128, 128, 3]),
    layers.Rescaling(1./255),
    layers.Conv2D(16, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes)
])

In [41]:
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

In [42]:
model.summary()

In [47]:
model.save("src/skin_cancer_model.keras")


In [43]:
history = model.fit(
    train_ds,
    validation_data=validation_ds,
    epochs=5
)

Epoch 1/5
[1m376/376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 84ms/step - accuracy: 0.6570 - loss: 1.0809 - val_accuracy: 0.6930 - val_loss: 0.9075
Epoch 2/5
[1m376/376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 48ms/step - accuracy: 0.6763 - loss: 0.9133 - val_accuracy: 0.6865 - val_loss: 0.8697
Epoch 3/5
[1m376/376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 48ms/step - accuracy: 0.6947 - loss: 0.8315 - val_accuracy: 0.7100 - val_loss: 0.7808
Epoch 4/5
[1m376/376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 47ms/step - accuracy: 0.7081 - loss: 0.7760 - val_accuracy: 0.7295 - val_loss: 0.7458
Epoch 5/5
[1m376/376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 47ms/step - accuracy: 0.7340 - loss: 0.7042 - val_accuracy: 0.7315 - val_loss: 0.7167


In [44]:
loss, acc = model.evaluate(test_ds)

print(f"Test Accuracy: {acc * 100:.2f}%")

[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 21ms/step - accuracy: 0.7494 - loss: 0.6793
Test Accuracy: 72.83%


In [49]:


# Load the trained model
model = tf.keras.models.load_model("src/skin_cancer_model.keras")

# Define class labels (ensure these match your dataset labels)
class_labels = ["akiec", "bcc", "bkl", "df", "mel", "nv", "vasc"]

# Path to test images
test_folder = "src/test"

def preprocess_image(image_path):
    """Load and preprocess the image for model prediction"""
    img = cv2.imread(image_path)
    img = cv2.resize(img, (128, 128))  # Resize to match your model's input size
    img = img / 255.0  # Normalize pixel values
    img = np.expand_dims(img, axis=0)  # Add batch dimension
    return img

def predict_image(image_path):
    """Predict the class of the image"""
    img = preprocess_image(image_path)
    prediction = model.predict(img)
    predicted_class = np.argmax(prediction)
    return class_labels[predicted_class], prediction[0][predicted_class]

# Iterate through test images and predict
for image_name in os.listdir(test_folder):
    image_path = os.path.join(test_folder, image_name)
    predicted_class, confidence = predict_image(image_path)
    print(f"Image: {image_name} | Predicted: {predicted_class} | Confidence: {confidence:.2f}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 133ms/step
Image: ISIC_0024308.jpg | Predicted: df | Confidence: 0.16
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Image: ISIC_0024371.jpg | Predicted: bkl | Confidence: 0.16
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Image: ISIC_0024418.jpg | Predicted: bkl | Confidence: 0.16
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Image: ISIC_0024457.jpg | Predicted: bkl | Confidence: 0.16
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Image: ISIC_0024459.jpg | Predicted: df | Confidence: 0.16
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
Image: ISIC_0024904.jpg | Predicted: bkl | Confidence: 0.16
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
Image: ISIC_0024973.jpg | Predicted: bkl | Confidence: 0.16
