In [None]:
import tensorflow as tf
import tensorflow.keras as keras
import pathlib
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import nibabel as nib
import scipy 
import util
import tensorflow_docs as tfdocs
import tensorflow_docs.modeling
import tensorflow_docs.plots
import math
import time
import os 

#### Cargamos los datos

In [None]:
ALL_DATA = "E:Corrected_FA/ALL_DATA/"
info_data = "idaSearch_8_01_2020.csv"

# Obtenemos los diccionarios con los nombres de los ficheros que contienen las imágenes
AD_CN, groups = util.obtain_data_files(ALL_DATA, info_data)

# Cargamos las imágenes
CN_imgs = np.array(util.load_data(ALL_DATA, AD_CN["CN"]), dtype='float32')

AD_imgs = util.load_data(ALL_DATA, AD_CN["AD"])

# Extendemos la clase con menos ejemplos
AD_imgs = np.array(util.extend_class(AD_imgs, len(CN_imgs)), dtype='float32')

# Creamos las etiquetas 1: AD, 0:CN
CN_labels = np.zeros((len(CN_imgs),1), dtype = "int32")
AD_labels = np.ones((len(AD_imgs),1), dtype = "int32")

#### Función para crear el modelo seleccionando número de filtros y neuronas, dropout rate y parámetro de regularización

In [None]:
def create_model(filters, neurons, dropout_rate, regularization):
    layers = tf.keras.layers
    model = tf.keras.Sequential([
        layers.Conv3D(filters, 11, strides = (4,4,4), padding= 'valid', input_shape=(91,109,91, 1), activation = 'relu'),
        layers.BatchNormalization(),     
        layers.Dropout(dropout_rate),

        layers.Conv3D(filters, 5, strides = (1,1,1), padding= 'valid', activation = 'relu'),
        layers.BatchNormalization(),    
        layers.Dropout(dropout_rate),

        layers.Conv3D(filters, 3, strides = (1,1,1), padding= 'valid', activation = 'relu'),
        layers.BatchNormalization(),    
        layers.Dropout(dropout_rate),

        layers.Conv3D(filters, 3, strides = (1,1,1), padding= 'valid', activation = 'relu'),
        layers.BatchNormalization(),
        layers.MaxPooling3D(),
        layers.Dropout(dropout_rate),

        layers.Conv3D(filters, 3, strides = (1,1,1), padding= 'valid', activation = 'relu'),    
        layers.GlobalAveragePooling3D(),

        layers.Dense(neurons, activation = "relu", activity_regularizer= keras.regularizers.l2(regularization)),
        layers.Dense(neurons, activation = "relu", activity_regularizer= keras.regularizers.l2(regularization)),
        layers.Dense(neurons, activation = "relu", activity_regularizer= keras.regularizers.l2(regularization)),
        layers.Dense(1, activation = 'sigmoid')
    ])
    return model

#### Función para seleccionar únicamente dropout rate y regularización.

Esta es la función que utilicé finalmente ya que utilizamos el número de filtros y neuronas de VGG-16

In [None]:
def create_model_def(dropout, regularization):
    layers = tf.keras.layers
    model = tf.keras.Sequential([
        layers.Conv3D(64, 11, strides = (4,4,4), padding= 'valid', input_shape=(91,109,91, 1)),
        layers.BatchNormalization(),    
        layers.ReLU(),       

        layers.Conv3D(128, 5, strides = (1,1,1), padding= 'valid'),
        layers.BatchNormalization(),    
        layers.ReLU(),

        layers.Conv3D(256, 3, strides = (1,1,1), padding= 'valid'),
        layers.BatchNormalization(),    
        layers.ReLU(),

        layers.Conv3D(512, 3, strides = (1,1,1), padding= 'valid'),
        layers.BatchNormalization(),
        layers.ReLU(),
        layers.MaxPooling3D(),

        layers.Conv3D(512, 3, strides = (1,1,1), padding= 'valid'),    
        layers.BatchNormalization(),
        layers.ReLU(),
        layers.GlobalAveragePooling3D(),

        layers.Dense(512, activation = "relu"),
        layers.Dropout(dropout),

        layers.Dense(512, activation = "relu"),
        layers.Dropout(dropout),

        layers.Dense(512, activation = "relu"),
        layers.Dropout(dropout),

        layers.Dense(1, activation = 'sigmoid')])
    return model

#### Funciones para probar un modelo
Modelo dado número de filtros y neuronas

In [None]:
def try_model(train_ds, val_ds, train_size, filters, neurons, batch_size = 32, dropout=0.05, reg=0.003, learning_rate = 3e-7, fold = 0, n_epoch = 200):
    """ Crea, compila y entrena un modelo con los parámetros obtenidos, además guarda el modelo con mejor loss y el modelo con mejor accuracy.
    Devuelve la evaluación del modelo con mejor loss y la del modelo con mejor accuracy, los path de los modelos y el history del entrenamiento"""
    
    loss_path = "model_loss_{}_{}_{}_{}_{}.h5".format(filters,neurons, dropout, reg, fold)
    
    checkpoint_cb_loss = keras.callbacks.ModelCheckpoint(loss_path, monitor="val_loss", save_best_only = True) 
    
    root_logdir = os.path.join(os.curdir, "my_logs_cv") 
    def get_run_logdir(): 
        run_id = "run_{}_{}_{}_{}_{}".format(filters,neurons, dropout, reg, fold) 
        return os.path.join(root_logdir, run_id) 
    
    tensorboard_cb = keras.callbacks.TensorBoard(get_run_logdir())

    # Se crea el modelo
    m = create_model_def(dropout, reg)
    # Se compila
    m.compile(optimizer = keras.optimizers.Adam(learning_rate), loss = tf.keras.losses.BinaryCrossentropy(), metrics = ['accuracy'])       
    # Se entrena
    history = m.fit(train_ds.repeat(), epochs = n_epoch, steps_per_epoch= train_size/batch_size, 
                    validation_data = val_ds, verbose = 0, callbacks =[checkpoint_cb_loss, 
                                                                       tensorboard_cb]) 
    # Evaluacion del modelo con mejor loss
    m = keras.models.load_model(loss_path) 
    evaluation_loss = m.evaluate(val_ds)
    
    
    return {"ev_loss": evaluation_loss, "loss_path": loss_path, "history": history}


Modelo dado dropout rate y regularización

In [None]:
def try_model_final(train_ds, val_ds, train_size, batch_size = 32, dropout=0.05, reg=0.003, learning_rate = 3e-7, fold = 0, n_epoch = 200):
    """ Crea, compila y entrena un modelo con los parámetros obtenidos, además guarda el modelo con mejor loss y el modelo con mejor accuracy.
    Devuelve la evaluación del modelo con mejor loss y la del modelo con mejor accuracy, los path de los modelos y el history del entrenamiento"""
    
    loss_path = "model_loss_{}_{}_{}.h5".format(dropout, reg, fold)
    
    checkpoint_cb_loss = keras.callbacks.ModelCheckpoint(loss_path, monitor="val_loss", save_best_only = True) 
    
    root_logdir = os.path.join(os.curdir, "my_logs_cv") 
    def get_run_logdir(): 
        run_id = "run__{}_{}_{}".format(dropout, reg, fold) 
        return os.path.join(root_logdir, run_id) 
    
    tensorboard_cb = keras.callbacks.TensorBoard(get_run_logdir())

    # Se crea el modelo
    m = create_model_def(dropout, reg)
    # Se compila
    m.compile(optimizer = keras.optimizers.Adam(learning_rate), loss = tf.keras.losses.BinaryCrossentropy(), metrics = ['accuracy'])       
    # Se entrena
    history = m.fit(train_ds.repeat(), epochs = n_epoch, steps_per_epoch= train_size/batch_size, 
                    validation_data = val_ds, verbose = 0, callbacks =[checkpoint_cb_loss, 
                                                                       tensorboard_cb]) 
    # Evaluacion del modelo con mejor loss
    m = keras.models.load_model(loss_path) 
    evaluation_loss = m.evaluate(val_ds)
    
    
    return {"ev_loss": evaluation_loss, "loss_path": loss_path, "history": history}

In [None]:
batch_size = 32

data = util.train_test_split(CN_imgs, CN_labels, AD_imgs, AD_labels, 0.15)

CN_imgs, AD_imgs = None, None # Liberamos memoria

In [None]:

dropouts = [0.2, .3,.4,.5]
n_epoch = 250
batch_size = 32
n_folds = 5
effective_folds = 3
histories = []

best_model_loss = ""
best_loss = np.inf
best_acc = 0
dropout = None
reg = 0.003
lr = 1e-6


best_loss_parameters = []

for d in dropouts:    
    run_evaluations = []
    for fold in range(effective_folds):
        start = time.time()
        #print("Iniciado modelo con f = {} y n = {}".format(f,n))

        fold_data = util.k_fold(data["train_imgs"], data["train_labels"], n_folds, fold )
        train_ds = fold_data["train_ds"].map(lambda tensor, labels : util.transform(tensor,labels), num_parallel_calls=16)\
                                        .batch(batch_size).prefetch(8)
        val_ds = fold_data["val_ds"].batch(fold_data["val_size"])
        train_size = fold_data["train_size"]

        evaluations = try_model_final(train_ds, val_ds, train_size, dropout = d, fold= fold, learning_rate = lr, n_epoch = n_epoch)
        evaluation_loss = evaluations["ev_loss"]
        history = evaluations["history"]

        print("{} dropout, {} regularization, {} fold".format(d, reg, fold))
        print("Loss: {}, Accuracy: {}".format(evaluation_loss[0], evaluation_loss[1]))

        histories.append(history)
        run_evaluations.append(evaluation_loss)
        end = time.time()
        print("Tiempo de ejecucion de fold: {}".format(end-start))

    # Comprobamos si el modelo con mejor loss es el mejor hasta el momento
    loss = 0
    acc = 0
    for i in range(len(run_evaluations)):
        loss += run_evaluations[i][0]
        acc += run_evaluations[i][1]
    loss /= len(run_evaluations)
    acc /= len(run_evaluations)

    if loss < best_loss:
        best_loss = loss
        best_model_loss = evaluations["loss_path"]
        print("Nuevo mejor modelo de loss con {}".format(best_loss))

    if acc > best_acc:

        best_acc = acc
        best_model_acc = evaluations["loss_path"]
        print("Nuevo mejor modelo de accuracy con {}".format(best_acc))

    print("Definitive evaluation of best loss of model with {} dropout, {} regularization, {} fold".format(dropout, reg, fold))
    print("Loss: {}, Accuracy: {}".format(loss, acc))

        
print("Terminado")