# Notebook- Preprocessing & Fit : Model13_2.1  Optimization Hyperas
# Author : V.Albors   Date : 21.01.2020
# Purpose : Hyperparameter search with Hyperas


**Input** :  
  * CSV files that identify the images to use as train and validation. CSV files are in directory csv_dir   
  * Images from train and validation. Images are in directory : imag_dir  
  
  
**Output**:  
  * Download of the model trained with train dataset - with overfitting
  * Download the history of the model in order to be evaluated 

**Process**:  
 * Read Train and Validation images ( identified in the .csv files ) from the imag_dir directory   
 * Define Network 
 * Print Network + Save Network Definition
 * Compile Network 
 * Create a train and validation generator   
 * Train the model with the train dataset with 100 epochs  
 * Save the trained model and history of the model in directory model_bin_dir 
 
**To do hyper-parameter optimization on this model, just wrap the parameters you want to optimize into double curly brackets and choose a distribution over which to run the algorithm**

 * Parameters to optimize
 - Check the results from the 1st scrip and second
     * Set structure of script 2
     * One layer + 
     * Batch size to 128 
     



In [1]:
from __future__ import print_function
from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation

from keras.optimizers import Adam
# Do not write is do no want in choice 
from keras.optimizers import RMSprop
from keras.optimizers import SGD

#from keras.datasets import mnist
from keras.utils import np_utils
from keras.callbacks import EarlyStopping, ModelCheckpoint

Using TensorFlow backend.


In [2]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
tf.config.experimental.list_physical_devices('GPU') 
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
tf.config.experimental.set_memory_growth(physical_devices[0], True)


Num GPUs Available:  1


In [3]:
# Define the name of the model, directories & if to train the model 
Model_directory = "MODELN13"
Model_name = "ModelK13_21"
TRAIN = True

In [4]:
# Import routines
import sys  
subrc_dir = "/home/valborsf/Documents/UOC/PFMProject/"

sys.path.append(subrc_dir)  
from  Models_routines import *
import inspect

# List functions inside the module
import Models_routines as module
functions = inspect.getmembers(module, inspect.isfunction)
lsfunctions = [item[0] for item in functions]
print ( lsfunctions )

['confusion_ROC_AUC', 'create_column_tensor', 'create_label_tensor', 'create_val_test', 'define_dirs', 'extract_images_bm', 'extract_images_train', 'load_hist_model', 'load_images', 'load_images_tf', 'model_load', 'plot_save_acc_loss', 'print_network', 'process_clinical_info', 'read_dataframes', 'read_dataframes_tables', 'reproducible_results', 'save_model', 'save_network_json', 'start', 'stop', 'to_one_hot', 'to_one_hot_words', 'xi_squared']


In [5]:
# Define directories
(root_dir,json_dir,imag_dir,csv_dir,model_json_dir,model_bin_dir,results_dir,Tensor_dir) = define_dirs(Model_directory)

In [6]:
def data():
        csv_dir =  root_dir +"/DataNew4/CSV/"   
        (dftrain, dfval, dftest) = read_dataframes(csv_dir)
        height_imag = 150 
        width_imag = 150
        x_train  = load_images_tf(dftrain,height_imag,width_imag)
        x_val    = load_images_tf(dfval,height_imag,width_imag)
        x_test   = load_images_tf(dftest,height_imag,width_imag)
        y_train  = create_label_tensor(dftrain)
        y_val    = create_label_tensor(dfval)
        y_test   = create_label_tensor(dftest)
        return x_train, y_train, x_val, y_val, x_test, y_test
        

In [7]:
def create_model(x_train, y_train, x_val, y_val, x_test, y_test):
    import keras
    from hyperopt import Trials, STATUS_OK, tpe
    from keras import layers
    from keras import models
    from hyperas import optim
    from hyperas.distributions import choice, uniform
    import numpy as np
    from keras.utils import np_utils
    
    model = models.Sequential ()
    model.add(layers.Conv2D(64, (3,3), activation='relu', input_shape=(150,150,3)))
    model.add(layers.MaxPooling2D((2,2)))
    model.add(layers.Conv2D(32, (3,3), activation='relu'))
    model.add(layers.MaxPooling2D((2,2)))
    model.add(layers.Conv2D(128, (3,3), activation='relu'))
    model.add(layers.MaxPooling2D((2,2)))
 
    model.add(layers.Flatten())
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dropout(0.23))  
    num_layers = {{choice(['one', 'two'])}}
    if num_layers == 'two':
        model.add(layers.Dense({{choice([32, 64, 128])}}, activation='relu'))

    model.add(layers.Dense(1, activation='sigmoid'))
    

    from keras import optimizers 
    from keras.optimizers import Adam
    
    adam = keras.optimizers.Adam(lr=1e-4)

    optim = adam
    
    
    model.compile ( loss='binary_crossentropy',
               optimizer = optim,
               metrics= ['acc'])
    early_stopping = EarlyStopping(monitor='val_loss', patience=4)
    
    epochs = 100
    
    result = model.fit(x_train, y_train,
                      batch_size={{choice([64, 128])}},
                      epochs=100,
                      verbose=2,
                      validation_data=(x_val, y_val),
                      callbacks=[early_stopping])

    

    validation_acc = np.amax(result.history['val_acc']) 
    print('Best validation acc of epoch:', validation_acc)
    return {'loss': -validation_acc, 'status': STATUS_OK, 'model': model}

In [9]:
if __name__ == '__main__':
    import time

    start_time = time.time()
    best_run, best_model = optim.minimize(model=create_model,
                                          data=data,
                                          algo=tpe.suggest,
                                          max_evals=5,
                                          trials=Trials(),
                                          eval_space=True,
                                          notebook_name='ModelK13_Hyperas_21')
    x_train, y_train, x_val, y_val, x_test, y_test = data()

    print("Evalutation of best performing model:")
    print(best_model.evaluate(x_test, y_test))
    print("Best performing model chosen hyper-parameters:")
    print(best_run)
    
    elapsed_time = time.time() - start_time

    time.strftime('Time spent in TF loading :'"%H:%M:%S", time.gmtime(elapsed_time))
    

FileNotFoundError: [Errno 2] No such file or directory: '/home/valborsf/Documents/UOC/PFMProject/MODELK13/ModelK13_Hyperas_2.1.ipynb'

In [8]:
# Results :
