# MODEL - IMAGE LOADING & NEURAL NETWORK

In [1]:
#Import libraries
import csv
import os
import io
import cv2
from PIL import Image
import h5py
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn import preprocessing
import tensorflow as tf
from tensorflow import keras

2024-09-09 21:15:23.243903: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-09-09 21:15:23.248573: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-09-09 21:15:23.259446: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-09 21:15:23.277350: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-09 21:15:23.282917: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-09 21:15:23.296323: I tensorflow/core/platform/cpu_feature_gu

## 1) DATA LOADING - BASIC

In [2]:
#General file paths
projectDir = os.getcwd() + "/"
parentDir = os.path.abspath(os.path.join(projectDir, os.pardir)) + "/"
dataPath = os.path.abspath(os.path.join(projectDir, os.pardir)) + "/isic-2024-challenge/"

#Metadata file paths
metaPath = dataPath + "train-metadata.csv"

#Image file path
file = dataPath + "train-image.hdf5"

#Image subset: normal, hairs1, hairs2, wrinkles1, wrinkles2, protrusions, malignant, malignant, other
image_files = ["ISIC_0015670", "ISIC_0052213", "ISIC_0075726", "ISIC_0076172", "ISIC_8570031", "ISIC_5071401", "ISIC_0104229", "ISIC_9877311", "ISIC_0024200"]

In [3]:
#Import metadata
metadata = pd.read_csv(metaPath, sep=",")

#Import images from hdf5 file and one image
images = []
f = h5py.File(file, mode="r")
for isic_id in image_files:
    image = np.array(
        Image.open(
            io.BytesIO(f[isic_id][()])
            )
        )
    images.append(image)

  metadata = pd.read_csv(metaPath, sep=",")


## 2) GENERAL FUNCTIONS

In [4]:
#Function to show image
def show_img(image):
    plt.imshow(image, interpolation=None)
    plt.grid(None)
    plt.show()

In [5]:
#Image cropping
def crop_image(images_list, nbPix = 100):
    output_images = []
    for image in images_list:
        #Height adjustments
        h = len(image)
        adj = len(image) - nbPix
        h1 = round(adj / 2) #Top
        h2 = h - (adj - h1) #Bottom

        #Width adjustments
        w = len(image[0])
        w_adj = w - nbPix
        w1 = round(w_adj / 2) #Left
        w2 = w - (w_adj - w1) #Right

        img = image[h1:h2,w1:w2]
        output_images.append(img)
        
    return np.array(output_images)

### 2.1 - Load metadata and target from csv

In [6]:
#METADATA: color and size features having no NAs
X_meta = metadata[["isic_id",
                   "clin_size_long_diam_mm",
                   "tbp_lv_areaMM2",
                   "tbp_lv_area_perim_ratio",
                   "tbp_lv_eccentricity",
                   "tbp_lv_minorAxisMM",
                   "tbp_lv_color_std_mean",
                   "tbp_lv_deltaLBnorm",
                   "tbp_lv_radial_color_std_max"]]

X_meta = X_meta[X_meta["isic_id"].isin(image_files)].iloc[:,1:]

#TARGET
y = metadata[metadata["isic_id"].isin(image_files)]["target"]

#Verify that there are no NAs
print("-- X_meta NA counts --")
print(X_meta.isna().sum())
print("\n-- y NA count --")
print(y.isna().sum())

-- X_meta NA counts --
clin_size_long_diam_mm         0
tbp_lv_areaMM2                 0
tbp_lv_area_perim_ratio        0
tbp_lv_eccentricity            0
tbp_lv_minorAxisMM             0
tbp_lv_color_std_mean          0
tbp_lv_deltaLBnorm             0
tbp_lv_radial_color_std_max    0
dtype: int64

-- y NA count --
0


In [7]:
#Display X_meta in order to inspect the tensor values and shapes
X_meta

Unnamed: 0,clin_size_long_diam_mm,tbp_lv_areaMM2,tbp_lv_area_perim_ratio,tbp_lv_eccentricity,tbp_lv_minorAxisMM,tbp_lv_color_std_mean,tbp_lv_deltaLBnorm,tbp_lv_radial_color_std_max
0,3.04,3.152561,27.47617,0.901302,1.543016,0.0,5.784302,0.0
4,2.73,2.101708,19.90256,0.946448,0.929916,0.0,6.531302,0.0
26,4.25,7.374742,16.400927,0.862139,2.054795,1.739398,8.80013,1.909609
100,4.5,8.556953,30.651852,0.829499,2.719287,0.794746,4.745211,1.035859
112,2.6,3.152561,16.70167,0.851353,1.372272,0.992478,5.973864,1.134277
1245,6.55,12.10358,38.02596,0.907116,3.481341,1.40143,5.621653,1.340916
201543,10.57,36.742353,21.024594,0.924676,4.549288,9.952932,15.93489,9.01813
343041,2.42,3.640458,14.67419,0.773552,1.549823,1.729533,10.36935,1.601183
396165,8.16,24.544943,21.43401,0.860139,4.503774,2.179194,8.754266,2.432117


### 2.2 - Load images and create hybrid tensorflow dataset

In [8]:
#Generates the image (standardized). Avoids multiple file open/read/close operations.
#file: full path for file
#imgs: list of images to load
#imgSize: number of pixels for size/resolution adjustment in square form
class hdf5_generator:
    def __init__(self, file, imgs, imgSize):
        self.file = file
        self.imgs = imgs
        self.imgSize = imgSize
        self.f = h5py.File(file, mode="r")
    def __call__(self):
        with h5py.File(self.file, 'r') as h5file:
            for img in self.imgs:
                img = np.array(Image.open(io.BytesIO(f[img][()])))
                img = tf.image.resize(img, [self.imgSize, self.imgSize])
                img = tf.constant(np.reshape(img/255,(1,100,100,3)), dtype=tf.float32) #standardized here
                yield img

#Generate image dataset
imgSize = 100
features_dataset = tf.data.Dataset.from_generator(
    hdf5_generator(file, image_files, imgSize),
    output_types=tf.float32,
    output_shapes = tf.TensorShape([1, imgSize,imgSize,3])
    )

#Generate target dataset
y = [np.reshape(element, (1,1)) for element in y]
y = tf.cast(y, dtype=tf.int32)
labels_dataset = tf.data.Dataset.from_tensor_slices(y, name = "target")

#Generate metadata set
X_meta = tf.cast(X_meta, dtype=tf.float32)
X_meta = tf.reshape(X_meta, shape=(9,1,8))
meta_dataset = tf.data.Dataset.from_tensor_slices(X_meta, name = "metadata")

#Combine datasets into one
subset = tf.data.Dataset.zip((features_dataset, meta_dataset))
dataset = tf.data.Dataset.zip((subset, labels_dataset))

Instructions for updating:
Use output_signature instead
Instructions for updating:
Use output_signature instead


In [9]:
#Examine the dataset objects
iterator = iter(dataset)
print(dataset, "\n")
print(iterator.get_next())

<_ZipDataset element_spec=((TensorSpec(shape=(1, 100, 100, 3), dtype=tf.float32, name=None), TensorSpec(shape=(1, 8), dtype=tf.float32, name=None)), TensorSpec(shape=(1, 1), dtype=tf.int32, name=None))> 

((<tf.Tensor: shape=(1, 100, 100, 3), dtype=float32, numpy=
array([[[[0.70879203, 0.5754587 , 0.5283999 ],
         [0.7081765 , 0.57484317, 0.52778435],
         [0.7196471 , 0.57866675, 0.53160787],
         ...,
         [0.7658624 , 0.60919565, 0.5777251 ],
         [0.7445527 , 0.5971605 , 0.56186634],
         [0.73979616, 0.59469813, 0.559404  ]],

        [[0.7322745 , 0.59894115, 0.5518823 ],
         [0.7304597 , 0.5944422 , 0.54738337],
         [0.7361721 , 0.595077  , 0.54801816],
         ...,
         [0.74052656, 0.58833337, 0.55462605],
         [0.7280784 , 0.5797342 , 0.5444401 ],
         [0.72262746, 0.5770821 , 0.541788  ]],

        [[0.7595294 , 0.618549  , 0.57149017],
         [0.7541931 , 0.613098  , 0.5660392 ],
         [0.75264704, 0.6114706 , 0.56441176]

## 4) CNN MODEL

In [10]:
#Simple CNN model using only images and target
class CNN_model(tf.keras.Model):
    def __init__(self, neurons = 8, activ = 'tanh'):
        super().__init__()
        self.conv1 = tf.keras.layers.Conv2D(filters=16, kernel_size=5, strides=(1, 1), activation='relu', padding='same', input_shape=(100, 100, 3))
        self.pool1 = tf.keras.layers.MaxPool2D(pool_size=(2,2))
        self.flatten = tf.keras.layers.Flatten()
        self.dense1 = tf.keras.layers.Dense(neurons, activation = activ)
        self.dense2 = tf.keras.layers.Dense(1, activation='sigmoid')

    def call(self, inputs):
        x_image, x_meta = inputs

        # Convolutions
        x1 = self.conv1(x_image)
        x1 = self.pool1(x1)

        # Flattening of images for input layer
        x1 = self.flatten(x1)

        # Hidden layers of neural network
        x1 = self.dense1(x1)

        # Output layer of neural network
        output = self.dense2(x1)

        return output

#Hybrid CNN model taking metadata
class Hybrid_model(tf.keras.Model):
    def __init__(self, neurons = 8, activ = 'tanh'):
        super().__init__()
        self.conv1 = tf.keras.layers.Conv2D(filters=16, kernel_size=5, strides=(1, 1), activation='relu', padding='same')
        self.conv2 = tf.keras.layers.Conv2D(32, 5, activation='relu')
        self.pool = tf.keras.layers.MaxPool2D(pool_size=(2,2))
        self.flatten = tf.keras.layers.Flatten()
        self.dense1 = tf.keras.layers.Dense(neurons, activation = activ)
        self.dense2 = tf.keras.layers.Dense(neurons, activation = activ)
        self.dense3 = tf.keras.layers.Dense(1, activation='sigmoid')
        #self.dropout = tf.keras.layers.dropout(0.25)

    def call(self, inputs, training=False):
        x_image, x_meta = inputs
        # Convolutions
        x = self.conv1(x_image)
        x = self.pool(x)
        #x = self.conv2(x)
        #x = self.pool(x)
        # Flattening of images and concatenation with other data
        x = self.flatten(x)
        #x_all = tf.concat([x,x_meta], axis=1)
        x_all = keras.layers.Concatenate(axis=1)([x, x_meta])
        # Neural Network
        x_all = self.dense1(x_all)
        #x_all = self.dense2(x_all)
        #if training:
        #    x_all = self.dropout(x_all, training=training)
        output = self.dense3(x_all)
        return output

In [11]:
#Set seed
tf.random.set_seed(71)

#Initialize model
#model = CNN_model(neurons=8, activ='tanh')
model = Hybrid_model(neurons=8, activ='tanh')

#Define optimizer and loss function
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
loss = tf.keras.losses.BinaryCrossentropy(from_logits=True,
                                          label_smoothing=0.0,
                                          axis=-1,
                                          reduction='sum_over_batch_size',
                                          name='binary_crossentropy')

#Compile the model with loss, optimizer, and metrics
model.compile(loss = loss,
              optimizer = optimizer,
              metrics = [
                  tf.keras.metrics.BinaryAccuracy(),
                  tf.keras.metrics.FalseNegatives(),
                  tf.keras.metrics.FalsePositives(),
                  tf.keras.metrics.TrueNegatives(),
                  tf.keras.metrics.TruePositives()
                  ]
)

## 3) DATA PREPARATION

In [12]:
#Fit the model
mod = model.fit(dataset, epochs=4)

Epoch 1/4


  output, from_logits = _get_logits(


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - binary_accuracy: 0.6114 - false_negatives: 0.7000 - false_positives: 1.0000 - loss: 0.5971 - true_negatives: 3.7000 - true_positives: 0.0000e+00
Epoch 2/4
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - binary_accuracy: 0.9043 - false_negatives: 0.7000 - false_positives: 0.0000e+00 - loss: 0.2920 - true_negatives: 4.7000 - true_positives: 0.0000e+00   
Epoch 3/4
[1m1/9[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 26ms/step - binary_accuracy: 1.0000 - false_negatives: 0.0000e+00 - false_positives: 0.0000e+00 - loss: 0.0683 - true_negatives: 1.0000 - true_positives: 0.0000e+00

2024-09-09 21:15:32.641005: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
  self.gen.throw(typ, value, traceback)
2024-09-09 21:15:32.755181: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - binary_accuracy: 0.9043 - false_negatives: 0.7000 - false_positives: 0.0000e+00 - loss: 0.2845 - true_negatives: 4.7000 - true_positives: 0.0000e+00    
Epoch 4/4
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - binary_accuracy: 0.9043 - false_negatives: 0.7000 - false_positives: 0.0000e+00 - loss: 0.2838 - true_negatives: 4.7000 - true_positives: 0.0000e+00    


2024-09-09 21:15:32.967375: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


In [13]:
mod.history

{'binary_accuracy': [0.6000000238418579,
  0.699999988079071,
  0.699999988079071,
  0.699999988079071],
 'false_negatives': [2.0, 2.0, 2.0, 2.0],
 'false_positives': [1.0, 0.0, 0.0, 0.0],
 'loss': [0.7504349946975708,
  0.6019914746284485,
  0.5589548945426941,
  0.524715006351471],
 'true_negatives': [6.0, 7.0, 7.0, 7.0],
 'true_positives': [0.0, 0.0, 0.0, 0.0]}