# Prediction model

## Imports and Drive mount

In [None]:
#Only in case you run the project on Colab
from google.colab import drive
drive.mount('/content/drive')

!pip install -U tensorflow-addons

Mounted at /content/drive


In [1]:
# Imports
import tensorflow as tf
import tensorflow_addons as tfa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

## Project info

In [3]:
boneage_min = 1.0
boneage_max = 228.0
image_size = (512, 512)

debug = False
info = True
initial_epoch = 0
model_name = 'prediction_model'

In [4]:
# Path to the project and the dataset
project_path = '/content/drive/MyDrive/Uni/HDA/Project' # Used to train in Colab
#project_path = ''                                      # Used to train locally

train_dataset_path = os.path.join(project_path, 'datasets/boneage-train-dataset-preprocessed-2')
train_labels_file = os.path.join(project_path, 'datasets/train_labels.csv')
val_dataset_path = os.path.join(project_path, 'datasets/boneage-val-dataset-preprocessed-2')
val_labels_file = os.path.join(project_path, 'datasets/val_labels.csv')
test_dataset_path = os.path.join(project_path, 'datasets/boneage-test-dataset-preprocessed-2')
test_labels_file = os.path.join(project_path, 'datasets/test_labels.csv')

model_file = os.path.join(project_path, f'models/{model_name}.h5')
checkpoints_file = os.path.join(project_path, 'checkpoints/', f'{model_name}-checkpoint.h5')
log_file = os.path.join(project_path, f'logs/{model_name}-log.csv')

## Dataset

In [5]:
def deg_to_rad(angle_deg):
  return angle_deg * np.pi / 180

def read(image_path, male):
  # Reads the image from the image path and normalize
  image = tf.io.read_file(image_path)
  image = tf.io.decode_png(image, channels=1)
  image = tf.image.convert_image_dtype(image, tf.float32)
  image = (image - 0.5) * 2

  # Normelize male
  male = tf.cast(male, tf.float32)

  return image, male

# Preprocess each image starting from the image path
def preprocess(image, male, size=image_size):
  # Makes data augmentation
  # Rotation
  angle = tf.random.uniform(shape=(), minval=-deg_to_rad(20), maxval=deg_to_rad(20+1))
  image = tfa.image.rotate(images=image, angles=angle, fill_mode='nearest')

  # Translation
  translation = tf.random.uniform(shape=(2,), minval=-size[0]*0.2, maxval=+size[0]*0.2+1)
  image = tfa.image.translate(images=image, translations=translation, fill_mode='nearest')

  # Zoom
  zoom_scale = tf.random.uniform(shape=(1,), minval=0.0, maxval=0.2, dtype=tf.float64)
  spare_space = zoom_scale[0]
  new_space = 1 - spare_space
  if spare_space == 0:
    x1 = spare_space
    y1 = spare_space
  else:
    x1 = tf.random.uniform(shape=(1,), minval=0, maxval=spare_space, dtype=tf.float64)[0]
    y1 = tf.random.uniform(shape=(1,), minval=0, maxval=spare_space, dtype=tf.float64)[0]
  x2, y2 = x1 + new_space, y1 + new_space
  image = tf.expand_dims(image, axis=0)
  image = tf.image.crop_and_resize(image, boxes=[[y1, x1, y2, x2]], box_indices=[0], crop_size=size)
  image = tf.squeeze(image, 0)

  # Orizontal flip 
  if tf.random.uniform(shape=(), minval=0, maxval=1) > 0.5:
    image = tf.image.flip_left_right(image)

  # Contrast
  contrast_factor = tf.random.uniform(shape=(), minval=1, maxval=2)
  image = tf.image.adjust_contrast(image, contrast_factor)

  image = tf.image.grayscale_to_rgb(image)

  return image, male

def preprocess_test(image, male):
  # Contrast
  image = tf.image.adjust_contrast(image, 1.5)
  image = tf.image.grayscale_to_rgb(image)

  return image, male

def normalize_y(boneage):
  return tf.cast((boneage - boneage_min) / (boneage_max - boneage_min), tf.float32)


In [6]:
def create_dataset(labels_file, dataset_path, batch_size=16, train=True):
  data_df = pd.read_csv(labels_file, sep=';', index_col=0, header=0)
  data_df['path'] = data_df.index.astype(str)
  data_df['path'] = dataset_path + '/' + data_df['path'] + '.png'

  # Creates a Dataset object
  dataset_x = tf.data.Dataset.from_tensor_slices((data_df['path'], data_df['male'].astype(int)))
  dataset_y = tf.data.Dataset.from_tensor_slices((data_df['boneage'].astype(float)))

  # Reads the images
  dataset_x = dataset_x.map(read, num_parallel_calls=tf.data.AUTOTUNE)
  
  if train:
    # Cache the dataset_x
    dataset_x = dataset_x.cache()

    # Preprocess the images
    dataset_x = dataset_x.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
  else:
    # Preprocess (light) the images
    dataset_x = dataset_x.map(preprocess_test, num_parallel_calls=tf.data.AUTOTUNE)

  # Map the normalize function
  dataset_y = dataset_y.map(normalize_y, num_parallel_calls=tf.data.AUTOTUNE)

  # Cache the dataset_y
  if train:
    dataset_y = dataset_y.cache()

  # Zip the dataset
  dataset = tf.data.Dataset.zip((dataset_x, dataset_y))

  # Shuffle
  if train:
    dataset = dataset.shuffle(int(len(dataset) * 0.05))
  else:
    dataset = dataset.cache()

  # Batch
  dataset = dataset.batch(batch_size=batch_size)

  # Prefetch
  dataset = dataset.prefetch(tf.data.AUTOTUNE)

  return dataset

In [7]:
train_dataset = create_dataset(train_labels_file, train_dataset_path, train=True)
val_dataset = create_dataset(val_labels_file, val_dataset_path, train=False)

Metal device set to: Apple M1 Pro


2022-02-15 17:47:16.692049: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-02-15 17:47:16.692264: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [8]:
def show_image(iterator):
  inputs, output = next(iterator)

  image = inputs[0][0]
  male = inputs[1][0]
  boneage = output[0]

  plt.figure()
  plt.imshow(image[:,:,0], cmap='gray', vmin=-1.0, vmax=1.0)
  plt.title(f'Male: {male} - Boneage: {boneage * boneage_max}')

In [9]:
if debug:
  train_iterator = iter(train_dataset)

In [10]:
if debug:
  show_image(train_iterator)

## Model definition

In [11]:
def create_model(dropout_prob=0.2):
  # InceptionV3 model -> Dealing with radiographys
  input1_shape = (image_size[0], image_size[1], 3)
  input1 = tf.keras.layers.Input(shape=input1_shape)

  InceptionV3 = tf.keras.applications.InceptionV3(input_tensor=input1, input_shape=input1_shape, weights='imagenet', include_top=False)
  xray_nn = tf.keras.layers.AveragePooling2D()(InceptionV3.output)
  xray_nn = tf.keras.layers.Dropout(dropout_prob)(xray_nn)
  xray_nn = tf.keras.layers.Flatten()(xray_nn)

  # FCNN model -> Dealing with sex
  input2_shape = (1,)
  input2 = tf.keras.layers.Input(shape=input2_shape)

  sex_nn = tf.keras.layers.Activation('linear')(input2)
  sex_nn = tf.keras.layers.Dense(32, activation='relu')(sex_nn)
  
  # Models contatenations
  concat_layer = tf.keras.layers.Concatenate()([xray_nn, sex_nn])

  # Classification FCNN
  x = tf.keras.layers.Dense(500, activation='relu')(concat_layer)
  x = tf.keras.layers.Dense(1, activation=None)(x)

  model = tf.keras.Model([input1, input2], x)

  return model

In [12]:
model = create_model()

In [13]:
if debug:
  tf.keras.utils.plot_model(model, to_file=f'{model_name}.png', show_shapes=True)

## Training

In [None]:
def mean_absolute_error_months(y_true, y_pred):
   return tf.keras.metrics.mean_absolute_error(y_true, y_pred) * boneage_max

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-03) # Initial: 1e-03
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', verbose=1, factor=0.5, patience=25, cooldown=5)
model.compile(optimizer=optimizer, loss=mean_absolute_error_months)

if initial_epoch != 0:
  model.load_weights(checkpoints_file)
  csv_append = True
else:
  csv_append = False

checkpoints = tf.keras.callbacks.ModelCheckpoint(checkpoints_file, save_freq='epoch', save_weights_only=True)
saving = tf.keras.callbacks.ModelCheckpoint(model_file, save_best_only=True),
csv_logger = tf.keras.callbacks.CSVLogger(log_file, append=csv_append)
if info: print('[INFO] training model...')
model.fit(
    train_dataset,
    validation_data=val_dataset,
    validation_freq=1,
    epochs=500,
    callbacks=[saving, checkpoints, csv_logger, reduce_lr],
    initial_epoch=initial_epoch
)

In [None]:
# Saving in case the model has not been saved during training
model.save(model_file)

## Evaluation (test loss)

In [None]:
test_dataset = create_dataset(test_labels_file, test_dataset_path, train=False, batch_size=1)

mae = model.evaluate(test_dataset)
print(f'MAE: {mae}')