# MODEL - IMAGE LOADING & NEURAL NETWORK

In [1]:
#Import libraries
import csv
import os
import io
import cv2
from PIL import Image
import h5py
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn import preprocessing
import tensorflow as tf
from tensorflow import keras

In [None]:
pip install Tensorflow

In [None]:
pip install numpy==1.26.4

## 1) GENERAL FUNCTIONS

In [159]:
#Function to show image
def show_img(image):
    plt.imshow(image, interpolation=None)
    plt.grid(None)
    plt.show()

In [160]:
#Image cropping
def crop_image(images_list, nbPix = 100):
    output_images = []
    for image in images_list:
        #Height adjustments
        h = len(image)
        adj = len(image) - nbPix
        h1 = round(adj / 2) #Top
        h2 = h - (adj - h1) #Bottom

        #Width adjustments
        w = len(image[0])
        w_adj = w - nbPix
        w1 = round(w_adj / 2) #Left
        w2 = w - (w_adj - w1) #Right

        img = image[h1:h2,w1:w2]
        output_images.append(img)
        
    return np.array(output_images)

## 2) IMPORT DATA

### 2.1 - Declare file paths

In [161]:
#General file paths
projectDir = os.getcwd() + "/"
parentDir = os.path.abspath(os.path.join(projectDir, os.pardir)) + "/"
dataPath = os.path.abspath(os.path.join(projectDir, os.pardir)) + "/isic-2024-challenge/"

#Metadata file paths
metaPath = dataPath + "train-metadata.csv"

#Image file path
hdf5_file = dataPath + "train-image.hdf5"

### 2.2 - Load metadata from csv

In [162]:
#Import metadata
metadata = pd.read_csv(metaPath, sep=",")

#METADATA: color and size features having no NAs
metadata = metadata[["isic_id",
                     "target",
                     "clin_size_long_diam_mm",
                     "tbp_lv_areaMM2",
                     "tbp_lv_area_perim_ratio",
                     "tbp_lv_eccentricity",
                     "tbp_lv_minorAxisMM",
                     "tbp_lv_color_std_mean",
                     "tbp_lv_deltaLBnorm",
                     "tbp_lv_radial_color_std_max"]]

#Verify that there are no NAs
print("-- X_meta NA counts --")
print(metadata.isna().sum())

  metadata = pd.read_csv(metaPath, sep=",")


-- X_meta NA counts --
isic_id                        0
target                         0
clin_size_long_diam_mm         0
tbp_lv_areaMM2                 0
tbp_lv_area_perim_ratio        0
tbp_lv_eccentricity            0
tbp_lv_minorAxisMM             0
tbp_lv_color_std_mean          0
tbp_lv_deltaLBnorm             0
tbp_lv_radial_color_std_max    0
dtype: int64


### 2.3 - Clean data

In [6]:
#Add code here


### 2.4 - Train, Validate, Test Split

In [163]:
#Image subset: normal, hairs1, hairs2, wrinkles1, wrinkles2, protrusions, malignant, malignant, other
train_imgs = ["ISIC_0015670", "ISIC_0052213", "ISIC_0075726", "ISIC_0076172", "ISIC_8570031", "ISIC_5071401", "ISIC_0104229", "ISIC_9877311", "ISIC_0024200"]

In [164]:
#Generate the metadata
X_train = metadata[metadata["isic_id"].isin(train_imgs)].iloc[:,2:]

#Generate the target
y_train = metadata[metadata["isic_id"].isin(train_imgs)]["target"]

### 2.5 - Load images and create hybrid tensorflow dataset

In [165]:
# HDF5 Image Data Generator with Augmentation and Standardization
class hdf5_generator:
    def __init__(self, file, img_names, imgSize):
        self.file = file
        self.img_names = img_names
        self.imgSize = imgSize

    def __call__(self):
        with h5py.File(self.file, 'r') as h5file:
            for img_name in self.img_names:
                try:
                    # Load image data from HDF5
                    img = np.array(Image.open(io.BytesIO(h5file[img_name][()])))
                    
                    # Resize the image
                    img = tf.image.resize(img, [self.imgSize, self.imgSize])
                    
                    # Data Augmentation 
                    img = tf.image.random_flip_left_right(img)
                    img = tf.image.random_flip_up_down(img)
                    img = tf.image.random_brightness(img, max_delta=0.2)

                    # Standardize and return as TensorFlow constant
                    img = tf.constant(img / 255, dtype=tf.float32)  # Standardize here
               
                    
                    yield img
                    
                except Exception as e:
                    print(f"Error loading image {img_name}: {e}")
                    # log the error to a file for later analysis
                    with open('image_errors.log', 'a') as f:
                        f.write(f"Error loading image {img_name}: {e}\n")
                    continue

#Generate the dataset with batch size and prefetching
def make_dataset(hdf5_file, meta, target, img_names, imgSize=100, batch_size=5):
    # Generate image dataset
    img_dataset = tf.data.Dataset.from_generator(
        hdf5_generator(hdf5_file, img_names, imgSize),
        output_types=tf.float32,
        output_shapes=tf.TensorShape([imgSize, imgSize, 3])
    )

    # Generate target dataset
    target = [np.reshape(element, (1, 1)) for element in target]
    target = tf.cast(target, dtype=tf.int32)
    target = tf.data.Dataset.from_tensor_slices(target, name="target")

    # Generate metadata set
    meta = tf.cast(meta, dtype=tf.float32)
    meta = tf.reshape(meta, shape=(9,1,8))
    meta = tf.data.Dataset.from_tensor_slices(meta, name="metadata")

    # Combine datasets into one
    dataset = tf.data.Dataset.zip((img_dataset, meta))
    dataset = tf.data.Dataset.zip((dataset, target))

    # Add shuffling, batching, and prefetching
    dataset = dataset.shuffle(buffer_size=min(len(img_names), 10000)).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)

    return dataset


In [166]:
#Make datasets
train_dataset = make_dataset(hdf5_file, X_train, y_train, train_imgs)

In [167]:
#Delete duplicate data, keeping only the dataset
del metadata
del y_train
del X_train

In [168]:
#Examine the dataset objects
print(train_dataset, "\n")

#iterator = iter(train_dataset)
#print(iterator.get_next())

<_PrefetchDataset element_spec=((TensorSpec(shape=(None, 100, 100, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1, 8), dtype=tf.float32, name=None)), TensorSpec(shape=(None, 1, 1), dtype=tf.int32, name=None))> 



## 3) CNN MODEL

### 3.1 - Model class

In [169]:
#Simple CNN model using only images and target
class CNN_model(tf.keras.Model):
    def __init__(self, neurons = 8, activ = 'tanh'):
        super().__init__()
        self.conv1 = tf.keras.layers.Conv2D(filters=16, kernel_size=5, strides=(1, 1), activation='relu', padding='same', input_shape=(100, 100, 3))
        self.pool1 = tf.keras.layers.MaxPool2D(pool_size=(2,2))
        self.flatten = tf.keras.layers.Flatten()
        self.dense1 = tf.keras.layers.Dense(neurons, activation = activ)
        self.dense2 = tf.keras.layers.Dense(1, activation='sigmoid')

    def call(self, inputs):
        x_image, x_meta = inputs

        # Convolutions
        x1 = self.conv1(x_image)
        x1 = self.pool1(x1)

        # Flattening of images for input layer
        x1 = self.flatten(x1)

        # Hidden layers of neural network
        x1 = self.dense1(x1)

        # Output layer of neural network
        output = self.dense2(x1)

        return output

#Hybrid CNN model taking metadata
class Hybrid_model(tf.keras.Model):
    def __init__(self, neurons = 8, activ = 'tanh'):
        super().__init__()
        self.conv1 = tf.keras.layers.Conv2D(filters=16, kernel_size=5, strides=(1, 1), activation='relu', padding='same', input_shape=(100, 100, 3))
        self.conv2 = tf.keras.layers.Conv2D(32, 5, activation='relu')
        self.pool = tf.keras.layers.MaxPool2D(pool_size=(2,2))
        self.flatten = tf.keras.layers.Flatten()
        self.dense1 = tf.keras.layers.Dense(neurons, activation = activ)
        self.dense2 = tf.keras.layers.Dense(neurons, activation = activ)
        self.dense3 = tf.keras.layers.Dense(1, activation='sigmoid')
        #self.dropout = tf.keras.layers.dropout(0.25)

    def call(self, inputs, training=False):
        x_image, x_meta = inputs
        # Convolutions
        x = self.conv1(x_image)
        x = self.pool(x)
        #x = self.conv2(x)
        #x = self.pool(x)
        # Flattening of images and concatenation with other data
        x = self.flatten(x)
        # Reshape metadata to match dimensions
        x_meta = tf.reshape(x_meta, (tf.shape(x_meta)[0], 8))
        #x_all = tf.concat([x,x_meta], axis=1)
        x_all = keras.layers.Concatenate(axis=1)([x, x_meta])
        # Neural Network
        x_all = self.dense1(x_all)
        #x_all = self.dense2(x_all)
        #if training:
        #    x_all = self.dropout(x_all, training=training)
        output = self.dense3(x_all)
        return output

### 3.2 - Model compiling

In [170]:
#Set seed
tf.random.set_seed(71)

#Initialize model
#model = CNN_model(neurons=8, activ='tanh')
model = Hybrid_model(neurons=8, activ='tanh')

#Define optimizer and loss function
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
loss = tf.keras.losses.BinaryCrossentropy(from_logits=True,
                                          label_smoothing=0.0,
                                          axis=-1,
                                          reduction='sum_over_batch_size',
                                          name='binary_crossentropy')

#Compile the model with loss, optimizer, and metrics
model.compile(loss = loss,
              optimizer = optimizer,
              metrics = [
                  tf.keras.metrics.BinaryAccuracy(),
                  tf.keras.metrics.FalseNegatives(),
                  tf.keras.metrics.FalsePositives(),
                  tf.keras.metrics.TrueNegatives(),
                  tf.keras.metrics.TruePositives()
                  ]
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### 3.3 - Model fit

In [171]:
#Fit the model
mod = model.fit(train_dataset, epochs=4)

Epoch 1/4


  output, from_logits = _get_logits(


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 51ms/step - binary_accuracy: 0.7852 - false_negatives_15: 1.6667 - false_positives_15: 0.0000e+00 - loss: 0.6483 - true_negatives_15: 6.0000 - true_positives_15: 0.0000e+00
Epoch 2/4
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - binary_accuracy: 0.8519 - false_negatives_15: 1.3333 - false_positives_15: 0.0000e+00 - loss: 0.4244 - true_negatives_15: 6.3333 - true_positives_15: 0.0000e+00     
Epoch 3/4


  self.gen.throw(typ, value, traceback)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - binary_accuracy: 0.7185 - false_negatives_15: 2.0000 - false_positives_15: 0.0000e+00 - loss: 0.6340 - true_negatives_15: 5.6667 - true_positives_15: 0.0000e+00 
Epoch 4/4
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - binary_accuracy: 0.7185 - false_negatives_15: 2.0000 - false_positives_15: 0.0000e+00 - loss: 0.6271 - true_negatives_15: 5.6667 - true_positives_15: 0.0000e+00 


In [172]:
mod.history

{'binary_accuracy': [0.7777777910232544,
  0.7777777910232544,
  0.7777777910232544,
  0.7777777910232544],
 'false_negatives_15': [2.0, 2.0, 2.0, 2.0],
 'false_positives_15': [0.0, 0.0, 0.0, 0.0],
 'loss': [0.6349514126777649,
  0.542496919631958,
  0.5415472388267517,
  0.5381039381027222],
 'true_negatives_15': [7.0, 7.0, 7.0, 7.0],
 'true_positives_15': [0.0, 0.0, 0.0, 0.0]}

**BATCHES**

In [190]:
# Iterate through all batches in the dataset and print their shapes
for i, batch in enumerate(train_dataset):
    (img_batch, meta_batch), target_batch = batch
    
    # Print the shapes of the current batch
    print(f"Batch {i+1}:")
    print("  Image Batch Shape:", img_batch.shape)
    print("  Metadata Batch Shape:", meta_batch.shape)
    print("  Target Batch Shape:", target_batch.shape)

Batch 1:
  Image Batch Shape: (5, 100, 100, 3)
  Metadata Batch Shape: (5, 1, 8)
  Target Batch Shape: (5, 1, 1)
Batch 2:
  Image Batch Shape: (4, 100, 100, 3)
  Metadata Batch Shape: (4, 1, 8)
  Target Batch Shape: (4, 1, 1)
