# Simple ConvNet Baseline
This simple model is used as baseline to be compared with other models to be constructed. It acts as a simple benchmark.

In [1]:
import os
import random
import numpy as np
import tensorflow as tf
import cv2
# Used to split test and train sets
from sklearn.model_selection import train_test_split
# Keras is a high level wrapper on top of tensorflow (machine learning library)
# The Sequential container is a linear stack of layers
from tensorflow.python.keras.models import Sequential
# Popular optimization strategy that uses gradient descent 
from tensorflow.python.keras.optimizers import Adam
# To save our model periodically as checkpoints for loading later
from tensorflow.python.keras.callbacks import ModelCheckpoint, EarlyStopping
# Types of layers do we want our model to have
from tensorflow.python.keras.layers import Lambda, Conv2D, MaxPooling2D, Dropout, Dense, Flatten, Cropping2D, BatchNormalization, ELU

%matplotlib inline

In [2]:
# Set a seed value
seed_value= 523
    
def reset_random_seed():
    '''
    Set all the random seed generator to a fixed value to reproduce the same results at every training
    '''
    # 1. Set `PYTHONHASHSEED` environment variable at a fixed value
    os.environ['PYTHONHASHSEED']=str(seed_value)
    # 2. Set `python` built-in pseudo-random generator at a fixed value
    random.seed(seed_value)
    # 3. Set `numpy` pseudo-random generator at a fixed value
    np.random.seed(seed_value)
    # 4. Set `tensorflow` pseudo-random generator at a fixed value
    tf.compat.v1.set_random_seed(seed_value)    

### Import Data

In [3]:
DATA_PATH = "training_data/baseline_batch/"
data = "path"
x_training = np.load(DATA_PATH + "input.npy")
y_training = np.load(DATA_PATH + "output.npy")

INPUT_SHAPE = np.shape(x_training)[1:]
print(INPUT_SHAPE)

(6, 128, 256)


### Baseline ConvNet

In [4]:
def construct_model(data = "path"):
    """
    NVIDIA model used, referenced as a starting point
    Image normalization to avoid saturation and make gradients work better.
    Convolution: 5x5, filter: 24, strides: 2x2, activation: ELU
    Convolution: 5x5, filter: 36, strides: 2x2, activation: ELU
    Convolution: 5x5, filter: 48, strides: 2x2, activation: ELU
    Convolution: 3x3, filter: 64, strides: 1x1, activation: ELU
    Convolution: 3x3, filter: 64, strides: 1x1, activation: ELU
    Drop out (0.5)
    Fully connected: neurons: 100, activation: ELU
    Fully connected: neurons: 50, activation: ELU
    Fully connected: neurons: 10, activation: ELU
    Fully connected: neurons: 1 (output)

    ELU(Exponential linear unit) function takes care of the Vanishing gradient problem. 
    """
    #reset_random_seed()
    
    model = Sequential()

    # Image normalization to avoid saturation and make gradients work better.
    #model.add(Lambda(lambda x: x/127.5-1.0, input_shape=INPUT_SHAPE))
    model.add(Lambda(lambda x: x /255.0 - 0.5, input_shape=INPUT_SHAPE ))
    # Convolutions
    model.add(Conv2D(24, 5, strides=(2, 2), padding = "same", activation='elu'))
    model.add(Conv2D(36, 3, strides=(2, 2), padding = "same", activation='elu'))
    model.add(Conv2D(256, 3, strides=(1, 1), padding = "same", activation='elu'))
    model.add(BatchNormalization())
    model.add(ELU())
    # Drop out (0.5)
    model.add(Dropout(0.5, seed=seed_value))
    model.add(Flatten())
    
    if data == "path":
        model.add(Dense(512, activation='relu'))
        model.add(Dropout(0.1, seed=seed_value))
        model.add(Dense(100, activation='relu'))
    else:
        # FCNs
        model.add(Dense(100, activation='elu'))
        model.add(Dense(50, activation='elu'))
        model.add(Dense(10, activation='elu'))
        model.add(Dense(1, activation='elu'))
        
    model.summary()

    return model

In [5]:
epochs = 20
batch_size = 32

model = construct_model(data)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lambda (Lambda)              (None, 6, 128, 256)       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 3, 64, 24)         153624    
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 2, 32, 36)         7812      
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 2, 32, 256)        83200     
_________________________________________________________________
batch_normalization (BatchNo (None, 2, 32, 256)        1024      
_________________________________________________________________
elu (ELU)                    (None, 2, 32, 256)        0         
_________________________________________________________________
dropout (Dropout)            (None, 2, 32, 256)        0

For a dumb baseline, we should make sure the following:
    1. Random seed is fixed so that the data is training to reproduce the same thing very single time. This removes a factor of variation and will help keep you sane. For more information, read [here](https://medium.com/@ODSC/properly-setting-the-random-seed-in-ml-experiments-not-as-simple-as-you-might-imagine-219969c84752) CHECKED
    2. Simplify the data, no augmentation is done. CHECKED
    3. Verify init loss. Make sure the loss starts at the correct loss value. CHECKED
    4. Init well so that the first few epochs don't need to waste the time to learn the biases. We know from the data processing that the path data has a mean around 1, hence the default initialiser is good enough. CHECKED 

In [6]:
from keras import backend as K
session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
tf.compat.v1.keras.backend.set_session(sess)

# Using adam optimizer and also mean squared error as the loss function
adam = Adam(lr=0.0001)
model.compile(optimizer='adam', loss="mse")


checkpoint = ModelCheckpoint("simple_v1.h5", monitor='val_loss', verbose=1,
                                  save_best_only=True, mode='min')

early_stop = EarlyStopping(monitor='loss', min_delta=0.0001, patience=50,
                                verbose=1, mode='min')


model.fit(x_training, y_training, batch_size=batch_size, epochs=epochs, verbose=1,
                      callbacks=[checkpoint, early_stop], validation_split=0.18, shuffle=True)

model.save('simple_v1.h5')

Using TensorFlow backend.


Epoch 1/20
Epoch 00001: val_loss improved from inf to 3.04270, saving model to simple_v1.h5
Epoch 2/20
Epoch 00002: val_loss did not improve from 3.04270
Epoch 3/20
Epoch 00003: val_loss improved from 3.04270 to 1.33940, saving model to simple_v1.h5
Epoch 4/20
Epoch 00004: val_loss improved from 1.33940 to 0.94715, saving model to simple_v1.h5
Epoch 5/20
Epoch 00005: val_loss improved from 0.94715 to 0.73490, saving model to simple_v1.h5
Epoch 6/20
Epoch 00006: val_loss improved from 0.73490 to 0.50588, saving model to simple_v1.h5
Epoch 7/20
Epoch 00007: val_loss improved from 0.50588 to 0.41339, saving model to simple_v1.h5
Epoch 8/20
Epoch 00008: val_loss improved from 0.41339 to 0.30039, saving model to simple_v1.h5
Epoch 9/20
Epoch 00009: val_loss did not improve from 0.30039
Epoch 10/20
Epoch 00010: val_loss did not improve from 0.30039
Epoch 11/20
Epoch 00011: val_loss improved from 0.30039 to 0.28071, saving model to simple_v1.h5
Epoch 12/20
Epoch 00012: val_loss did not improv