# Neural Network Training
## Author: [Jeremiah Croshaw](https://linktr.ee/jeremiahcroshaw)
#### Last Edited: Sept 23 2020

Since this code was written while employed by [Quantum Silicon Inc.](https://www.quantumsilicon.com/), I have been advised to share it under the GNU-GPL
***
Copyright (C) 2020  Jeremiah Croshaw

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2
of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
***

### This code demonstrates how the neural network is trained from prepared data.
### The NN architecture corresponds to the highest performing NN as shown in Croshaw's PhD Thesis

The network design was done by Croshaw, but some of the code infrastructure was developed with Rashidi for the linked work (below)

### This code was developed for follow up work to our [published work](https://iopscience.iop.org/article/10.1088/2632-2153/ab6d5e) on defect segmentation of scanning probe images of the H-Si(100) surface.  


author corresponence: croshaw@ualberta.ca

In [1]:
import h5py
import keras
import keras.backend as K 
from keras.models import Sequential,Model,load_model
from keras.layers import *
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
import math
import numpy as np
import tensorflow as tf

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


### dice_coef():
***
input:
- y_true - ground truths (labelled data)
- y_pred - predicted by NN
- smooth - smoothing factor

output:
- returns the calculated dice_coef

### dice_coef_loss():
***
input - same as above

output - negative dice_coef()

In [2]:
def dice_coef(y_true, y_pred,smooth=1):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

def dice_coef_loss(y_true, y_pred):
    return -dice_coef(y_true, y_pred)

### model_seg():
used to define the neural network

inputs:
- pretrained_weights - to be used if you want to initialize the netowrk with pretrained weights
- input size - (None,None, 1).  This allows any size image to be used as input (resolution is the only restriction)
- dropout_rate - add if you want any drop out
- kernel_size_small - useful if you want to use the same kernel size throughout the network
- kernel_size_large - different kernel sizes are useful to explore
- padding = 'same'
- activation = 'relu'
- pool_size = (2,2)):

output:
- model


In [3]:
def model_seg(pretrained_weights = None,
                 input_size = (None,None,1),
               dropout_rate = 0.0,
                kernel_size_small = (3,3),
                    kernel_size_large = (5,5),
                    padding = 'same',
                 activation = 'relu',
                  pool_size = (2,2)):
    inputs = Input(input_size)
    
    conv1 = Conv2D(filters = 64, kernel_size=kernel_size_small, padding=padding)(inputs)
    drop1 = Dropout(dropout_rate)(conv1)
    norm1 = BatchNormalization()(drop1)
    act1  = Activation(activation)(norm1)

    conv1b = Conv2D(filters = 64, kernel_size=kernel_size_small, padding=padding)(act1)
    drop1b = Dropout(dropout_rate)(conv1b)
    norm1b = BatchNormalization()(drop1b)
    act1b  = Activation(activation)(norm1b)

    pool1 = MaxPool2D()(act1b)

    conv2 = Conv2D(filters = 128, kernel_size=kernel_size_small, padding=padding)(pool1)
    drop2 = Dropout(dropout_rate)(conv2)
    norm2 = BatchNormalization()(drop2)
    act2  = Activation(activation)(norm2)

    conv2b = Conv2D(filters = 128, kernel_size=kernel_size_small, padding=padding)(act2)
    drop2b = Dropout(dropout_rate)(conv2b)
    norm2b = BatchNormalization()(drop2b)
    act2b  = Activation(activation)(norm2b)
    
    pool2 = MaxPool2D()(act2b)

    conv3 = Conv2D(filters = 256 , kernel_size=kernel_size_small, padding=padding)(pool2)
    drop3 = Dropout(dropout_rate)(conv3)
    norm3 = BatchNormalization()(drop3)
    act3  = Activation(activation)(norm3)

    conv3b = Conv2D(filters = 256 , kernel_size=kernel_size_small, padding=padding)(act3)
    drop3b = Dropout(dropout_rate)(conv3b)
    norm3b = BatchNormalization()(drop3b)
    act3b  = Activation(activation)(norm3b)
    
    pool3 = MaxPool2D()(act3b)

   
    up8   = UpSampling2D()(pool3)
    drop8 = Dropout(dropout_rate)(up8)
    conv8 = Conv2D(filters = 256, kernel_size=kernel_size_small, padding=padding)(drop8)
    norm8 = BatchNormalization()(conv8)
    act8 = Activation(activation)(norm8)

    drop8b = Dropout(dropout_rate)(act8)
    conv8b = Conv2D(filters = 256, kernel_size=kernel_size_small, padding=padding)(drop8b)
    norm8b = BatchNormalization()(conv8b)
    act8b  = Activation(activation)(norm8b)

    up9   = UpSampling2D()(act8b)
    drop9 = Dropout(dropout_rate)(up9)
    conv9 = Conv2D(filters = 128, kernel_size=kernel_size_small, padding=padding)(drop9)
    norm9 = BatchNormalization()(conv9)
    act9 = Activation(activation)(norm9)

    drop9b = Dropout(dropout_rate)(act9)
    conv9b = Conv2D(filters = 128, kernel_size=kernel_size_small, padding=padding)(drop9b)
    norm9b = BatchNormalization()(conv9b)    
    act9b  = Activation(activation)(norm9b)

    up10  = UpSampling2D()(act9b)
    drop10 = Dropout(dropout_rate)(up10)
    conv10 = Conv2D(filters = 64, kernel_size=kernel_size_small, padding=padding)(drop10)
    norm10 = BatchNormalization()(conv10)
    act10 = Activation(activation)(norm10)
    
    drop10b = Dropout(dropout_rate)(act10)
    conv10b = Conv2D(filters = 64, kernel_size=kernel_size_small, padding=padding)(drop10b)
    norm10b = BatchNormalization()(conv10b)    
    act10b  = Activation(activation)(norm10b)

    conv11 = Conv2D(filters = 14, kernel_size=(3,3),padding='same')(act10b)
    resh11 = Reshape((-1,14))(conv11)
    act11  = Activation('softmax')(resh11)

    model = Model(inputs = [inputs],outputs = [act11])
    return model

### Main
***
imports the trained data (this data should be randomized previously to biases when using mini_batch)
creates the model, then trains.

The model checkpoints are saved to a csv file so they can be plotted when training

In [None]:
h5f = h5py.File('.\\data_file.h5','r')

#data should already be divided into train, test, and validate to keep this constant throughout training
#Here X corresponds to images, y corresponds to labels
X_train=h5f['X_train']  
y_train=h5f['y_train'] 
y_train=np.reshape(y_train,(-1,16384,14)) # reshape for one hot encoding, middle number corresponds to size of data set
X_test=h5f['X_test']
y_test=h5f['y_test']
y_test=np.reshape(y_test,(-1,16384,14))
X_val=h5f['X_val']
y_val=h5f['y_val']
y_val=np.reshape(y_val,(-1,16384,14))
h5f.close

# print the shape of the tain, test, valsidaion data set
print(
"X_train shape:", X_train.shape,'\n',
"y_train shape:", y_train.shape,'\n',
"X_test shape:", X_test.shape,'\n',
"y_test shape:", y_test.shape, '\n',
"X_val shape:", X_val.shape,'\n',
"y_val shape:", y_val.shape,'\n'
)

#creat the model
model=model_seg(0)

#define the optimizer
optimizer = Adam() # parameters for Adam() can be tuned for training

#in case a learning rate decay should be used
def lr_decay(epoch):
    initial_lrate = 0.01
    drop = 1
    lrate = initial_lrate * math.pow(drop,epoch)
    return lrate

################################################################
#define some checkpoint saves to monitor how training is going
model_name = "model_8c_256_double-double-conv"
lrRate_callback=keras.callbacks.LearningRateScheduler(lr_decay, verbose=1)
model_checkpoint=keras.callbacks.ModelCheckpoint("./checkpoints_model_8c/weights_256_.{epoch:02d}-{val_loss:.2f}-{val_dice_coef:.2f}.hdf5", 
                 monitor='loss', verbose=1, save_best_only=True,save_weights_only=False, mode='min', period=5)
csv_callback=keras.callbacks.CSVLogger('./checkpoints_model_8c/{}.csv'.format(model_name), separator=',', append=False)
callbacks=[lrRate_callback,model_checkpoint,csv_callback]
################################################################

#compile the model
model.compile(optimizer=optimizer,loss='categorical_crossentropy',metrics=['accuracy',dice_coef])
model.summary()
model_name = "model_8c"
#run this instead if you want to load a previously trained model
#model=load_model('.\\previously_trained_model.hdf5',custom_objects={'dice_coef': dice_coef,'dice_coef_loss':dice_coef_loss})

#define training epochs
epochs = 100

#start the training process
model.fit(x=X_train,y=y_train,epochs=epochs, batch_size=50,validation_split=0,validation_data=(X_val,y_val),
          callbacks=callbacks,shuffle="batch")

model.save(model_name)

### Plot the training
***
if you are interested in seeing how the training is going, this code will plot based on the .csv files saved

In [None]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math

plt.style.use('ggplot')

# define the learning rate function that is used for training in order to plot it as a function number of epochs

def lr_decay(epoch):
    initial_lrate = 0.01
    drop = 1
    lrate = initial_lrate * math.pow(drop,epoch)
    return lrate

#read in the desired CSV file
df=pd.read_csv('./checkpoints_model_8/model_8_128_double-conv.csv')   
df['lr']=df['epoch'].apply(lr_decay) 

    
fig,ax=plt.subplots(2,3,figsize=(15,6))
    
ax[0][0].scatter(df['epoch'],df['loss'],color='blue')
ax[0][0].plot(df['epoch'],df['loss'],color='blue')
ax[0][0].set_xlabel('epoch')
ax[0][0].set_ylabel('loss')
    
ax[0][1].scatter(df['epoch'],df['dice_coef'],color='red')
ax[0][1].plot(df['epoch'],df['dice_coef'],color='red')
ax[0][1].set_xlabel('epoch')
ax[0][1].set_ylabel('dice_coef')
    
ax[0][2].scatter(df['epoch'],df['lr'],color='green')
ax[0][2].plot(df['epoch'],df['lr'],color='green')
ax[0][2].set_xlabel('epoch')
ax[0][2].set_ylabel('learning rate')
ax[0][2].set_yscale('log')
    
ax[1][0].scatter(df['epoch'],df['val_loss'],color='blue')
ax[1][0].plot(df['epoch'],df['val_loss'],color='blue')
ax[1][0].set_xlabel('epoch')
ax[1][0].set_ylabel('val_loss')
    
ax[1][1].scatter(df['epoch'],df['val_dice_coef'],color='red')
ax[1][1].plot(df['epoch'],df['val_dice_coef'],color='red')
ax[1][1].set_xlabel('epoch')
ax[1][1].set_ylabel('val_dice_coef')
    
ax[1][2].scatter(df['epoch'],df['val_acc'],color='green')
ax[1][2].plot(df['epoch'],df['val_acc'],color='green')
ax[1][2].set_xlabel('epoch')
ax[1][2].set_ylabel('val_acc')


plt.subplots_adjust(wspace = 0.3,hspace = 0.3)

#plt.show()
fig.savefig('./checkpoints_model_8/training_plots.pdf')