# Notebook- Data Augmentation ( Model K3_1 ) 
# Author : V.Albors   Date : 05.02.2020
# Purpose : Preprocess and Train the CNN (Data Augmentation )


**Input** :  
  * CSV files that identify the images to use as train and validation. CSV files are in directory csv_dir   
  * Images from train and validation. Images are in directory : imag_dir  
  
  
**Output**:  
  * Download of the model trained with train dataset - with Data Augmentation
  * Download the history of the model in order to be evaluated 

**Process**:  
 * Read Train and Validation images ( identified in the .csv files ) from the imag_dir directory   
 * Define Network 
 * Print Network + Save Network Definition
 * Compile Network 
 * Create a train and validation generator   
 * Train the model with the train dataset with 100 epochs ( with data Augmentation. Callbacks :ModelCheckpoint)  
 * Save the trained model and history of the model in directory model_bin_dir 



In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf

tf.keras.backend.clear_session()  # Reset

In [None]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
tf.config.experimental.list_physical_devices('GPU') 
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
tf.config.experimental.set_memory_growth(physical_devices[0], True)
tf.debugging.set_log_device_placement(True)

In [None]:
#Tensorflow version 
print(tf.__version__)
from tensorflow.python.platform import build_info as tf_build_info
print(tf_build_info.cuda_version_number)
# Cuda Version 9.0 in v1.10.0
print(tf_build_info.cudnn_version_number)
# CudNN 7 in v1.10.0

In [2]:
# Define the name of the model, directories & if to train the model 
Model_directory = "MODELK3"
Model_name = "ModelK3_1"
TRAIN = True

In [3]:
# Import routines
import sys  
subrc_dir = "/home/valborsf/Documents/UOC/PFMProject/"
subrc_dir = "/home/user/Documentos/UOC/PFM/PFMProject/"
sys.path.append(subrc_dir)  
from  Models_routines import *
import inspect

# List functions inside the module
import Models_routines as module
functions = inspect.getmembers(module, inspect.isfunction)
lsfunctions = [item[0] for item in functions]
print ( lsfunctions )

['confusion_ROC_AUC', 'create_column_tensor', 'create_label_tensor', 'create_val_test', 'define_dirs', 'extract_images_train', 'load_hist_model', 'load_images', 'model_load', 'plot_save_acc_loss', 'print_network', 'process_clinical_info', 'read_dataframes', 'read_dataframes_tables', 'reproducible_results', 'save_model', 'save_network_json', 'start', 'stop', 'to_one_hot', 'to_one_hot_words', 'xi_squared']


In [4]:
# Reproducible results 
reproducible_results ()

Using TensorFlow backend.


In [5]:
# Define directories
(root_dir,json_dir,imag_dir,csv_dir,model_json_dir,model_bin_dir,results_dir,Tensor_dir) = define_dirs(Model_directory)

In [6]:
from keras import layers
from keras import models
model = models.Sequential ()
model.add(layers.Conv2D(32, (3,3), activation='relu', input_shape=(150,150,3)))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(64, (3,3), activation='relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(128, (3,3), activation='relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(128, (3,3), activation='relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

In [7]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 148, 148, 32)      896       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 72, 72, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 34, 34, 128)       73856     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 17, 17, 128)       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 15, 15, 128)      

In [8]:
#Print Network 
print_network (results_dir, model, Model_name)
#Save Network 
save_network_json (model_json_dir, model, Model_name)

In [9]:
# Compile Network 
from keras import optimizers 
model.compile ( loss='binary_crossentropy',
#               optimizer = optimizers.RMSprop(lr=1e-4),
#                optimizer = optimizers.RMSprop(lr=1e-5),
                optimizer = optimizers.Adam(lr=1e-4),
               metrics= ['acc'])

In [10]:
# Load train,validation & Test 
(dftrain, dfval, dftest) = read_dataframes()

/home/user/Documentos/UOC/PFM/PFMProject/DataNew/CSV/


In [11]:
# Callbackt to be used 
import keras
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping

callbacks_list = [
#         keras.callbacks.EarlyStopping (
#             monitor = 'acc',             # Monitors the accuracy
#             patience = 2,),              # Interrupt if acc no improve in 3 epochs

#  ModelCheckpoint to store the weights of the best performing epoch. 
    
         keras.callbacks.ModelCheckpoint(filepath=model_bin_dir+"Best_weights"+Model_name+".hdf5", 
             monitor = 'val_loss', # Won't overwritte the model file unless val_loss has
             verbose=1,            # improve 
             save_best_only=True),
         
#         keras.callbacks.TensorBoard(
#             log_dir =  Tensor_dir, 
#            histogram_freq = 0,  ) # No histograms - validation data must be provided as a tensor
          ]

In [12]:
from keras.preprocessing.image import ImageDataGenerator

#Rescale images  1/255

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range =40,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range=0.2,
    horizontal_flip=True, ) 
                                   
validation_datagen = ImageDataGenerator(rescale=1./255)

#batch size = 32 . In this way "steps per epoch" ( how many batches have to been treated before going to 
# the next epoch  is exact  2000 samples =  20 samples x batch  and steps per epoch = 100 - 1 epoch)

train_generator = train_datagen.flow_from_dataframe(
        dataframe=dftrain,         # Data frame with info of the files and targets
        directory=imag_dir,        # path to the directory to read the images from
        x_col='file_name_ext',     # column in the data frame that contains the file names 
        y_col='bm',                # column in the data frame that has the target data
        target_size=(150, 150),    # dimensions that the images will be resized
        batch_size=20,             # size of the batches of data (default: 32).
        class_mode='binary')       # Mode for yielding the targets:1D numpy array of binary labels

validation_generator = validation_datagen.flow_from_dataframe(
        dataframe=dfval,           # Data frame with info of the files and targets
        directory=imag_dir,        # path to the directory to read the images from
        x_col='file_name_ext',     # column in the data frame that contains the file names 
        y_col='bm',                # column in the data frame that has the target data
        target_size=(150, 150),    # dimensions that the images will be resized
        batch_size=20,             # size of the batches of data (default: 32).
        class_mode='binary')       # Mode for yielding the targets:1D numpy array of binary labels
    

Found 2614 validated image filenames belonging to 2 classes.
Found 871 validated image filenames belonging to 2 classes.


In [13]:
if TRAIN :
    epochs = 100
    history = model.fit_generator ( 
      train_generator,
      steps_per_epoch =131,          # 2614 / 20 = 131    
#      steps_per_epoch =41,                      # nº samples training/ Batch size  = 2614 / 64 
      epochs = epochs,
      callbacks=callbacks_list,                 # callbacks
      validation_data= validation_generator,
      validation_steps =44 )        # 871 / 20 = 44
#      validation_steps =14 )                     # nº samples validation / Batch size = 871 /64



    save_model(model, history, model_bin_dir, Model_name)


Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.58528, saving model to /home/user/Documentos/UOC/PFM/PFMProject/MODEL2/BMODEL/Best_weightsModel2_Rep.hdf5
Epoch 2/100

Epoch 00002: val_loss did not improve from 0.58528
Epoch 3/100

Epoch 00003: val_loss did not improve from 0.58528
Epoch 4/100

Epoch 00004: val_loss improved from 0.58528 to 0.50577, saving model to /home/user/Documentos/UOC/PFM/PFMProject/MODEL2/BMODEL/Best_weightsModel2_Rep.hdf5
Epoch 5/100

Epoch 00005: val_loss did not improve from 0.50577
Epoch 6/100

Epoch 00006: val_loss did not improve from 0.50577
Epoch 7/100

Epoch 00007: val_loss did not improve from 0.50577
Epoch 8/100

Epoch 00008: val_loss did not improve from 0.50577
Epoch 9/100

Epoch 00009: val_loss did not improve from 0.50577
Epoch 10/100

Epoch 00010: val_loss improved from 0.50577 to 0.43771, saving model to /home/user/Documentos/UOC/PFM/PFMProject/MODEL2/BMODEL/Best_weightsModel2_Rep.hdf5
Epoch 11/100

Epoch 00011: val_loss did not improve


Epoch 00085: val_loss did not improve from 0.36799
Epoch 86/100

Epoch 00086: val_loss did not improve from 0.36799
Epoch 87/100

Epoch 00087: val_loss did not improve from 0.36799
Epoch 88/100

Epoch 00088: val_loss did not improve from 0.36799
Epoch 89/100

Epoch 00089: val_loss did not improve from 0.36799
Epoch 90/100

Epoch 00090: val_loss did not improve from 0.36799
Epoch 91/100

Epoch 00091: val_loss did not improve from 0.36799
Epoch 92/100

Epoch 00092: val_loss did not improve from 0.36799
Epoch 93/100

Epoch 00093: val_loss did not improve from 0.36799
Epoch 94/100

Epoch 00094: val_loss did not improve from 0.36799
Epoch 95/100

Epoch 00095: val_loss did not improve from 0.36799
Epoch 96/100

Epoch 00096: val_loss did not improve from 0.36799
Epoch 97/100

Epoch 00097: val_loss did not improve from 0.36799
Epoch 98/100

Epoch 00098: val_loss did not improve from 0.36799
Epoch 99/100

Epoch 00099: val_loss did not improve from 0.36799
Epoch 100/100

Epoch 00100: val_loss d

In [None]:
# Import Model Test if not need to Train 
if not TRAIN :
    model = model_load ( model_bin_dir, Model_name)