# Notebook for testing the model on the test set

In [None]:
'''
Feature extraction for the cancer dataset.
Feature 'smoke':
False: 0
True: 1
Unknown: -1
------------------------------
Feature 'drink':
False: 0
True: 1
Unknown: -1
------------------------------
Feature 'background_father':
AUSTRIA: 0
BRASIL: 1
BRAZIL: 2
CZECH: 3
GERMANY: 4
ISRAEL: 5
ITALY: 6
NETHERLANDS: 7
POLAND: 8
POMERANIA: 9
PORTUGAL: 10
SPAIN: 11
Unknown: -1
------------------------------
Feature 'background_mother':
BRAZIL: 0
FRANCE: 1
GERMANY: 2
ITALY: 3
NETHERLANDS: 4
NORWAY: 5
POLAND: 6
POMERANIA: 7
PORTUGAL: 8
SPAIN: 9
Unknown: -1
------------------------------
Feature 'pesticide':
False: 0
True: 1
Unknown: -1
------------------------------
Feature 'gender':
FEMALE: 0
MALE: 1
Unknown: -1
------------------------------
Feature 'skin_cancer_history':
False: 0
True: 1
Unknown: -1
------------------------------
Feature 'cancer_history':
False: 0
True: 1
Unknown: -1
------------------------------
Feature 'has_piped_water':
False: 0
True: 1
Unknown: -1
------------------------------
Feature 'has_sewage_system':
False: 0
True: 1
Unknown: -1
------------------------------
Feature 'grew':
FALSE: 0
TRUE: 1
Unknown: -1
------------------------------
Feature 'changed':
FALSE: 0
TRUE: 1
Unknown: -1
------------------------------
'''

In [4]:
# randomly generate three numeric features base on the original features
import random

def random_value(mapping):
    """Randomly selects a value from the provided mapping dictionary."""
    return random.choice(list(mapping.values()))

def generate_random_features():
    features = {
        'smoke': {False: 0, True: 1, 'Unknown': -1},
        'drink': {False: 0, True: 1, 'Unknown': -1},
        'background_father': {'AUSTRIA': 0, 'BRASIL': 1, 'BRAZIL': 2, 'CZECH': 3, 'GERMANY': 4, 'ISRAEL': 5, 'ITALY': 6, 'NETHERLANDS': 7, 'POLAND': 8, 'POMERANIA': 9, 'PORTUGAL': 10, 'SPAIN': 11, 'Unknown': -1},
        'background_mother': {'BRAZIL': 0, 'FRANCE': 1, 'GERMANY': 2, 'ITALY': 3, 'NETHERLANDS': 4, 'NORWAY': 5, 'POLAND': 6, 'POMERANIA': 7, 'PORTUGAL': 8, 'SPAIN': 9, 'Unknown': -1},
        'pesticide': {False: 0, True: 1, 'Unknown': -1},
        'gender': {'FEMALE': 0, 'MALE': 1, 'Unknown': -1},
        'skin_cancer_history': {False: 0, True: 1, 'Unknown': -1},
        'cancer_history': {False: 0, True: 1, 'Unknown': -1},
        'has_piped_water': {False: 0, True: 1, 'Unknown': -1},
        'has_sewage_system': {False: 0, True: 1, 'Unknown': -1},
        'grew': {False: 0, True: 1, 'Unknown': -1},
        'changed': {False: 0, True: 1, 'Unknown': -1}
    }

    # Generate a random value for each feature
    random_feature_values = {feature: random_value(mapping) for feature, mapping in features.items()}
    return random_feature_values

random_features = generate_random_features()

In [4]:
# try if the image processor is working
import tensorflow as tf

# image processing for model
def img_preprocessing(img):
    """ Image preprocessing function """
    img = tf.io.read_file(img)  # Read the image file
    img = tf.image.decode_png(img, channels=3)  # Decode the PNG image
    img = tf.image.resize(img, (150, 150))  # Resize the image, image size is (256, 256)
    img = tf.cast(img, tf.float32) / 255.0  # Normalize pixel values to [0, 1] range
    return img

# image path
img_path = 'testing_data/test3.png'

img = img_preprocessing(img_path)

In [12]:
from sklearn.ensemble import RandomForestClassifier

In [7]:
# test if the model is working
from keras.models import load_model

# Load the model
model = load_model('pretrained_image_model.h5')
model.summary()

Model: "model_20"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_42 (InputLayer)       [(None, 150, 150, 3)]     0         
                                                                 
 xception (Functional)       (None, 5, 5, 2048)        20861480  
                                                                 
 global_average_pooling2d_20  (None, 2048)             0         
  (GlobalAveragePooling2D)                                       
                                                                 
 dense_54 (Dense)            (None, 25)                51225     
                                                                 
 dropout_20 (Dropout)        (None, 25)                0         
                                                                 
 dense_55 (Dense)            (None, 7)                 182       
                                                          

In [25]:
# try to predict the image
def predict_image(model, img):
    """ Predict the image """
    img = tf.expand_dims(img, axis=0)  # Expand the image dimensions
    prediction = model.predict(img)  # Predict the image
    # convert the prediction to soft max
    prediction = tf.nn.softmax(prediction).numpy()
    return prediction

prediction = predict_image(model, img)

# link the prediction with relative features
# 1. Actinic keratosis 2. Basal cell carcinoma 3. Benign Keratosis-like leisons  4. Dermatofibroma 5. Melanocytic nevi 6. Melanoma 7. Vascular leisons

def get_cancer_type(prediction):
    """ Get the cancer type based on the prediction 
        if the prediction is under 
    """
    cancer_types = {
        0: 'Actinic keratosis',
        1: 'Basal cell carcinoma',
        2: 'Benign Keratosis-like leisons',
        3: 'Dermatofibroma',
        4: 'Melanocytic nevi',
        5: 'Melanoma',
        6: 'Vascular leisons'
    }
    # if none of the prediction is above 0.5, return 'Not cancer'
    if prediction.max() < 0.5:
        return 'Not cancer'
    cancer_type = cancer_types[prediction.argmax()]
    return cancer_type
    
# try non-cancer image
img_path = 'testing_data/test4.png'

img = img_preprocessing(img_path)

prediction = predict_image(model, img)

# convert the prediction to soft max

cancer_type = get_cancer_type(prediction)
prediction

array([[0.02001181, 0.01874391, 0.07246403, 0.00865481, 0.05187269,
        0.79915637, 0.02909629]], dtype=float32)