# Model testing

The purpose of this notebook is to determine which CNN architecture is the most efficient yet accurate
one for the purposes of determining whether a photo provided by the enduser shows signs of skin cancer.

In [1]:
# Importing relevant packages
import tensorflow as tf
import keras
from tensorflow.keras import Sequential, Model
from tensorflow.keras.models import *
from tensorflow.keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization, Conv2D, MaxPool2D, Lambda
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.metrics import Precision, Recall
import skimage
from skimage import io, color
from skimage.filters import threshold_otsu
import numpy as np
from PIL import Image
import os




In [2]:
# Assigning image dataset into variables for later use
# The image_dataset_from_directory function automatically assigns an image to its respective class
# i.e., if an image is part of the '1' file (meaning it shows skin with skin cancer),
# the photo is automatically flagged as showing skin cancer.
train_dataset = tf.keras.preprocessing.image_dataset_from_directory('img_data/train', image_size=(300,300), batch_size=32)
val_dataset = tf.keras.preprocessing.image_dataset_from_directory('img_data/val', image_size=(300,300), batch_size=32)
test_dataset = tf.keras.preprocessing.image_dataset_from_directory('img_data/test', image_size=(300,300), batch_size=32)

Found 42639 files belonging to 2 classes.
Found 5329 files belonging to 2 classes.
Found 5332 files belonging to 2 classes.


## Creating the preprocessing custom layer

This layer will:
- first reshape the provided pictures into a set dimension for uniformity,
- then save the cropped picture in a variable for later use
- meanwhile, another copy of the cropped picture is made and greyscaled
- the greyscaled photo is then used for otsu thresholding which is used for masking
- finally, the processed photo used by the model is made by overlaying the mask on the
   previously saved cropped picture to create a photo with only the important features
   of the picture (i.e., just the skinspot, disregarding healthy skin and hair).

Through preprocessing, it is hoped that the model's accuracy and speed will improve due to the removal of
unnecessary features like image size and the presence of healthy skin and hair.
 

In [3]:
class prepro(tf.keras.layers.Layer):

    def __init__(self):
        super(prepro, self).__init__()
  
    def build(self, input_shape):
        pass
    
    def call(self, img):

        # Resize the image to a consistent size
        img = img.resize((300, 300))
        
        # Convert the image to a NumPy array
        img_array = np.array(img)
        
        # Normalize pixel values to be between 0 and 1
        img_array = img_array.astype('float32') / 299
        
        # Converting RGB picture to greyscale for thresholding
        img_gc = color.rgb2gray(img)
    
        # Global thresholding with Otsu
        thresh = threshold_otsu(img_gc)
        img_t = img_gc <= thresh
    
        # Creating mask using threshold image
        # Value 0 as black and white photo used
        mask = np.where(img_t >= 0, img_t, 0)
    
        # Overlaying mask on original image
        # Nested for loop for each 'row' of img
        for h in range(mask.shape[0]):
          # For each 'column' of img
            for w in range(mask.shape[1]):
              # If the pixel chosen from the mask is white, add in the pixel from the original image
              # Otherwise, discard/make pixel black
                if mask[h][w] == 0:
                    for i in range(3):
                        img[h][w][i] = 0
                else:
                    continue
            
        return img

In [4]:
# Defining preprocessing layer
def prepro(img):
    # Resize the image to a consistent size
    img = img.resize((300, 300))
    
    # Convert the image to a NumPy array
    img_array = np.array(img)
    
    # Normalize pixel values to be between 0 and 1
    img_array = img_array.astype('float32') / 299
    
    # Converting RGB picture to greyscale for thresholding
    img_gc = color.rgb2gray(img)

    # Global thresholding with Otsu
    thresh = threshold_otsu(img_gc)
    img_t = img_gc <= thresh

    # Creating mask using threshold image
    # Value 0 as black and white photo used
    mask = np.where(img_t >= 0, img_t, 0)

    # Overlaying mask on original image
    # Nested for loop for each 'row' of img
    for h in range(mask.shape[0]):
      # For each 'column' of img
        for w in range(mask.shape[1]):
          # If the pixel chosen from the mask is white, add in the pixel from the original image
          # Otherwise, discard/make pixel black
            if mask[h][w] == 0:
                for i in range(3):
                    img[h][w][i] = 0
            else:
                continue

    # Function output - processed image
    return img

## Creating initial CNN model

In [5]:
# Creating Sequential object
model_1 = Sequential()

# Adding layers to the Sequential object
#model_1.add(keras.Input(shape=(1,)))
#model_1.add(Lambda(lambda img: prepro(img)))
model_1.add(Conv2D(32, kernel_size=(3,3), activation='relu', padding='same', input_shape=(900,3))) # Start of neural network
model_1.add(Conv2D(64, (3,3), activation='relu'))
model_1.add(MaxPool2D(pool_size=(2,2)))
model_1.add(Dropout(0.25))
model_1.add(Flatten())
model_1.add(Dense(128, activation='relu'))
model_1.add(Dropout(0.25))
model_1.add(Dense(128, activation='relu'))
model_1.add(Dropout(0.25))
model_1.add(Dense(1, activation='sigmoid')) # Output layer - just one neuron because of binary classification (one or zero)





In [6]:
# Checking the structure of the model
model_1.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 300, 300, 32)      896       
                                                                 
 conv2d_1 (Conv2D)           (None, 298, 298, 64)      18496     
                                                                 
 max_pooling2d (MaxPooling2  (None, 149, 149, 64)      0         
 D)                                                              
                                                                 
 dropout (Dropout)           (None, 149, 149, 64)      0         
                                                                 
 flatten (Flatten)           (None, 1420864)           0         
                                                                 
 dense (Dense)               (None, 128)               181870720 
                                                        

In [7]:
# Compiling the model
model_1.compile(loss="binary_crossentropy", optimizer="adam", metrics=['accuracy', Precision(), Recall()])




In [9]:
# Training the model
fit_model_1 = model_1.fit(train_dataset, epochs=1)



In [3]:
# Evaluate the model using the test data
model_loss, model_accuracy = model_1.evaluate(test_dataset,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

NameError: name 'model_1' is not defined

In [16]:
# Export our model to HDF5 file
model_1.save("model1.h5")