# MODEL - IMAGE LOADING & NEURAL NETWORK

In [1]:
#Import libraries
import csv
import os
import io
import cv2
from PIL import Image
import h5py
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn import preprocessing
import tensorflow as tf
from tensorflow import keras

2024-09-10 11:52:24.359235: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-09-10 11:52:24.362781: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-09-10 11:52:24.373045: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-10 11:52:24.390100: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-10 11:52:24.394973: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-10 11:52:24.406830: I tensorflow/core/platform/cpu_feature_gu

## 1) GENERAL FUNCTIONS

In [2]:
#Function to show image
def show_img(image):
    plt.imshow(image, interpolation=None)
    plt.grid(None)
    plt.show()

In [3]:
#Image cropping
def crop_image(images_list, nbPix = 100):
    output_images = []
    for image in images_list:
        #Height adjustments
        h = len(image)
        adj = len(image) - nbPix
        h1 = round(adj / 2) #Top
        h2 = h - (adj - h1) #Bottom

        #Width adjustments
        w = len(image[0])
        w_adj = w - nbPix
        w1 = round(w_adj / 2) #Left
        w2 = w - (w_adj - w1) #Right

        img = image[h1:h2,w1:w2]
        output_images.append(img)
        
    return np.array(output_images)

## 2) IMPORT DATA

### 2.1 - Declare file paths

In [4]:
#General file paths
projectDir = os.getcwd() + "/"
parentDir = os.path.abspath(os.path.join(projectDir, os.pardir)) + "/"
dataPath = os.path.abspath(os.path.join(projectDir, os.pardir)) + "/isic-2024-challenge/"

#Metadata file paths
metaPath = dataPath + "train-metadata.csv"

#Image file path
hdf5_file = dataPath + "train-image.hdf5"

### 2.2 - Load metadata from csv

In [5]:
#Import metadata
metadata = pd.read_csv(metaPath, sep=",")

#METADATA: color and size features having no NAs
metadata = metadata[["isic_id",
                     "target",
                     "clin_size_long_diam_mm",
                     "tbp_lv_areaMM2",
                     "tbp_lv_area_perim_ratio",
                     "tbp_lv_eccentricity",
                     "tbp_lv_minorAxisMM",
                     "tbp_lv_color_std_mean",
                     "tbp_lv_deltaLBnorm",
                     "tbp_lv_radial_color_std_max"]]

#Verify that there are no NAs
print("-- X_meta NA counts --")
print(metadata.isna().sum())

  metadata = pd.read_csv(metaPath, sep=",")


-- X_meta NA counts --
isic_id                        0
target                         0
clin_size_long_diam_mm         0
tbp_lv_areaMM2                 0
tbp_lv_area_perim_ratio        0
tbp_lv_eccentricity            0
tbp_lv_minorAxisMM             0
tbp_lv_color_std_mean          0
tbp_lv_deltaLBnorm             0
tbp_lv_radial_color_std_max    0
dtype: int64


### 2.3 - Clean data

In [6]:
#Add code here


### 2.4 - Train, Validate, Test Split

In [7]:
#Image subset: normal, hairs1, hairs2, wrinkles1, wrinkles2, protrusions, malignant, malignant, other
train_imgs = ["ISIC_0015670", "ISIC_0052213", "ISIC_0075726", "ISIC_0076172", "ISIC_8570031", "ISIC_5071401", "ISIC_0104229", "ISIC_9877311", "ISIC_0024200"]

In [8]:
#Generate the metadata
X_train = metadata[metadata["isic_id"].isin(train_imgs)].iloc[:,2:]

#Generate the target
y_train = metadata[metadata["isic_id"].isin(train_imgs)]["target"]

### 2.5 - Load images and create hybrid tensorflow dataset

In [9]:
#GENERATOR FOR HDF5
#Generates the image (standardized). Avoids multiple file open/read/close operations.
#file: full path for file
#imgs: list of images to load
#imgSize: number of pixels for size/resolution adjustment in square form
class hdf5_generator:
    def __init__(self, file, img_names, imgSize):
        self.file = file
        self.img_names = img_names
        self.imgSize = imgSize
    def __call__(self):
        with h5py.File(self.file, 'r') as h5file:
            for img in self.img_names:
                img = np.array(Image.open(io.BytesIO(h5file[img][()])))
                img = tf.image.resize(img, [self.imgSize, self.imgSize])
                img = tf.constant(np.reshape(img/255,(1,100,100,3)), dtype=tf.float32) #standardized here
                yield img

#DATASET FUNCTION
def make_dataset(hdf5_file, meta, target, img_names, imgSize=100):
    #Generate image dataset
    img_dataset = tf.data.Dataset.from_generator(
        hdf5_generator(hdf5_file, img_names, imgSize),
        output_types=tf.float32,
        output_shapes = tf.TensorShape([1, imgSize,imgSize,3])
        )

    #Generate target dataset
    target = [np.reshape(element, (1,1)) for element in target]
    target = tf.cast(target, dtype=tf.int32)
    target = tf.data.Dataset.from_tensor_slices(target, name = "target")

    #Generate metadata set
    meta = tf.cast(meta, dtype=tf.float32)
    meta = tf.reshape(meta, shape=(9,1,8))
    meta = tf.data.Dataset.from_tensor_slices(meta, name = "metadata")

    #Combine datasets into one
    dataset = tf.data.Dataset.zip((img_dataset, meta))
    dataset = tf.data.Dataset.zip((dataset, target))

    return dataset

In [10]:
#Make datasets
train_dataset = make_dataset(hdf5_file, X_train, y_train, train_imgs)

Instructions for updating:
Use output_signature instead
Instructions for updating:
Use output_signature instead


In [11]:
#Delete duplicate data, keeping only the dataset
del metadata
del y_train
del X_train

In [12]:
#Examine the dataset objects
print(train_dataset, "\n")

#iterator = iter(train_dataset)
#print(iterator.get_next())

<_ZipDataset element_spec=((TensorSpec(shape=(1, 100, 100, 3), dtype=tf.float32, name=None), TensorSpec(shape=(1, 8), dtype=tf.float32, name=None)), TensorSpec(shape=(1, 1), dtype=tf.int32, name=None))> 



## 3) CNN MODEL

### 3.1 - Model class

In [13]:
#Simple CNN model using only images and target
class CNN_model(tf.keras.Model):
    def __init__(self, neurons = 8, activ = 'tanh'):
        super().__init__()
        self.conv1 = tf.keras.layers.Conv2D(filters=16, kernel_size=5, strides=(1, 1), activation='relu', padding='same', input_shape=(100, 100, 3))
        self.pool1 = tf.keras.layers.MaxPool2D(pool_size=(2,2))
        self.flatten = tf.keras.layers.Flatten()
        self.dense1 = tf.keras.layers.Dense(neurons, activation = activ)
        self.dense2 = tf.keras.layers.Dense(1, activation='sigmoid')

    def call(self, inputs):
        x_image, x_meta = inputs

        # Convolutions
        x1 = self.conv1(x_image)
        x1 = self.pool1(x1)

        # Flattening of images for input layer
        x1 = self.flatten(x1)

        # Hidden layers of neural network
        x1 = self.dense1(x1)

        # Output layer of neural network
        output = self.dense2(x1)

        return output

#Hybrid CNN model taking metadata
class Hybrid_model(tf.keras.Model):
    def __init__(self, neurons = 8, activ = 'tanh'):
        super().__init__()
        self.conv1 = tf.keras.layers.Conv2D(filters=16, kernel_size=5, strides=(1, 1), activation='relu', padding='same')
        self.conv2 = tf.keras.layers.Conv2D(32, 5, activation='relu')
        self.pool = tf.keras.layers.MaxPool2D(pool_size=(2,2))
        self.flatten = tf.keras.layers.Flatten()
        self.dense1 = tf.keras.layers.Dense(neurons, activation = activ)
        self.dense2 = tf.keras.layers.Dense(neurons, activation = activ)
        self.dense3 = tf.keras.layers.Dense(1, activation='sigmoid')
        #self.dropout = tf.keras.layers.dropout(0.25)

    def call(self, inputs, training=False):
        x_image, x_meta = inputs
        # Convolutions
        x = self.conv1(x_image)
        x = self.pool(x)
        #x = self.conv2(x)
        #x = self.pool(x)
        # Flattening of images and concatenation with other data
        x = self.flatten(x)
        #x_all = tf.concat([x,x_meta], axis=1)
        x_all = keras.layers.Concatenate(axis=1)([x, x_meta])
        # Neural Network
        x_all = self.dense1(x_all)
        #x_all = self.dense2(x_all)
        #if training:
        #    x_all = self.dropout(x_all, training=training)
        output = self.dense3(x_all)
        return output

### 3.2 - Model compiling

In [14]:
#Set seed
tf.random.set_seed(71)

#Initialize model
#model = CNN_model(neurons=8, activ='tanh')
model = Hybrid_model(neurons=8, activ='tanh')

#Define optimizer and loss function
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
loss = tf.keras.losses.BinaryCrossentropy(from_logits=True,
                                          label_smoothing=0.0,
                                          axis=-1,
                                          reduction='sum_over_batch_size',
                                          name='binary_crossentropy')

#Compile the model with loss, optimizer, and metrics
model.compile(loss = loss,
              optimizer = optimizer,
              metrics = [
                  tf.keras.metrics.BinaryAccuracy(),
                  tf.keras.metrics.FalseNegatives(),
                  tf.keras.metrics.FalsePositives(),
                  tf.keras.metrics.TrueNegatives(),
                  tf.keras.metrics.TruePositives()
                  ]
)

### 3.3 - Model fit

In [17]:
#Fit the model
mod = model.fit(train_dataset, epochs=4)

Epoch 1/4
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - binary_accuracy: 0.9043 - false_negatives: 0.7000 - false_positives: 0.0000e+00 - loss: 0.2831 - true_negatives: 4.7000 - true_positives: 0.0000e+00
Epoch 2/4
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - binary_accuracy: 0.9043 - false_negatives: 0.7000 - false_positives: 0.0000e+00 - loss: 0.2879 - true_negatives: 4.7000 - true_positives: 0.0000e+00  
Epoch 3/4
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - binary_accuracy: 0.9043 - false_negatives: 0.7000 - false_positives: 0.0000e+00 - loss: 0.2982 - true_negatives: 4.7000 - true_positives: 0.0000e+00    
Epoch 4/4
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - binary_accuracy: 0.9043 - false_negatives: 0.7000 - false_positives: 0.0000e+00 - loss: 0.3117 - true_negatives: 4.7000 - true_positives: 0.0000e+00   


2024-09-10 11:52:45.308042: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


In [19]:
mod.history

{'binary_accuracy': [0.699999988079071,
  0.699999988079071,
  0.699999988079071,
  0.699999988079071],
 'false_negatives': [2.0, 2.0, 2.0, 2.0],
 'false_positives': [0.0, 0.0, 0.0, 0.0],
 'loss': [0.5316162705421448,
  0.5050565004348755,
  0.4884614050388336,
  0.4798160195350647],
 'true_negatives': [7.0, 7.0, 7.0, 7.0],
 'true_positives': [0.0, 0.0, 0.0, 0.0]}