In [None]:
%%HTML
<link rel="stylesheet" type="text/css" href="//fonts.googleapis.com/css?family=Quicksand:300" />
<link rel="stylesheet" type="text/css" href="custom.css">

### Tell Keras to use tensorflow as backend

At the moment is the default option

In [None]:
import os

os.environ['KERAS_BACKEND'] = 'tensorflow'

In [None]:
import inspect

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from utils import plot_training_summary
from utils import TimeSummary
from utils import set_seed

%matplotlib inline
plt.rcParams['figure.figsize'] = 15, 6

# Basics of Keras sequential API

- Rodrigo Agundez
- Amsterdam @ Booking.com
- Friday May 25th, 2018

![footer_logo](images/logo.png)

**Goal**

- Get familiar with the Keras sequential API
- Build a feed forward neural network

### Import `Sequential` model API

In [None]:
from keras.models import Sequential

### Import predefined `layers`

In [None]:
from keras import layers
[layer for layer in dir(layers) if not layer.startswith('_')]

### Import `optimizers`

In [None]:
from keras import optimizers
[opt for opt in dir(optimizers) if not opt.startswith('_')]

### Build dummy  model

![simple nn](images/model_diagram.gif)

first initialize the model with the sequential API

In [None]:
model = Sequential(name='DummyModel')

we can immidiatelly add hidden layers using `model.add()`

In [None]:
help(model.add)

for each layer you can define the metadata parameters of it. For example the `Dense()` layer has name, number of units, its activation function, etc.

In [None]:
help(layers.Dense)

create the firts hidden layer of 3 units

In [None]:
# this should raise a ValueError
model.add(layers.Dense(name='FullyConnected_1', units=3, activation='relu'))

Keras takes a simple approach and defines the input layer together with the first hidden layer via the parameter `input_dim` or `input_shape`. 

In [None]:
model.add(layers.Dense(name='FullyConnected_1', units=3, activation='relu', input_dim=2))

let's see the structure of the model

In [None]:
model.summary()

**Question:** why are there 9 parameters?

> #### extra
>
> Which other activations could you use? Check out the [list of activations](https://keras.io/activations/).

now we can add the next hidden layer of 2 units

In [None]:
model.add(layers.Dense(name='FullyConnected_2', units=2, activation='relu'))
model.summary()

add the output layer of a single unit and use a `sigmoid` activation function

In [None]:
model.add(layers.Dense(name='FullyConnected_OutputLayer', units=1, activation='sigmoid'))
model.summary()

Models have to be compiled before training, we need to add:

- optimizer
- loss function
- metrics

The loss function defines the goal of our model. In this case binary classification.

The metric(s) set can be used to evaluate over the test dataset, but also, if at trainining time we define `validation_split`, a validation test will be performed over each epoch. This is very helpful to asses the health of our model (overfitting for example).

In [None]:
model.compile(
    optimizer='Adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

> #### extra
>
> - Which other optimizers are available? Check the [list of optimizers](https://keras.io/optimizers/)
> - How many losses are there avilable? Check the [list of loss functions](https://keras.io/losses/)
> - What other metrics? [List of metrics](https://keras.io/metrics/)

If you really want the complete picture, check the configuration. Don't worry if you don't understand what all the parameters mean

In [None]:
model.get_config()

### Model is ready be trained but where is the data?

Normally you would look at the data frist of course before creating the model, but this notebook focuses on the Keras API.

In [None]:
moons = pd.read_csv('data/moons.csv')
print('(rows, columns):', moons.shape)
moons.sample(3)

non-linearly separable data

In [None]:
sns.lmplot(data=moons, x='x1', y='x2', hue='y', fit_reg=False);

separate in train and test data

In [None]:
train_index = moons.sample(frac=0.8, random_state=21).index
X_train, y_train = moons.iloc[train_index][['x1', 'x2']], moons.iloc[train_index]['y']
X_test, y_test = moons.drop(index=train_index)[['x1', 'x2']], moons.drop(index=train_index)['y']

### Add custom callback to collect execution time

In [None]:
time_summary = TimeSummary()
print(inspect.getsource(TimeSummary))

> #### extra
>
> Which default callbacks are available? Check the [list of callbacks](https://keras.io/callbacks/)

In [None]:
summary = model.fit(
    X_train, y_train,
    batch_size=100,
    epochs=10,
    validation_split=0.1,
    verbose=0,
    callbacks=[time_summary]
)
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
plot_training_summary(summary, time_summary)

### Explicit creation of layers

The model we created looks like

In [None]:
model = Sequential()
model.add(layers.Dense(name='FullyConnected_1', units=3, activation='relu', input_dim=2))
model.add(layers.Dense(name='FullyConnected_2', units=2, activation='relu'))
model.add(layers.Dense(name='FullyConnected_OutputLayer', units=1, activation='sigmoid'))
model.summary()

we could have defined explicitly each of the activation functions components as an extra layer

In [None]:
model = Sequential()

# input layer transformations (none in this case)

# 1st hidden layer
model.add(layers.Dense(name='HiddenLayer_1', units=3, input_dim=2))
model.add(layers.Activation(name='ReLu_1', activation='relu'))

# 2nd hidden layer
model.add(layers.Dense(name='HiddenLayer_2', units=2))
model.add(layers.Activation(name='ReLu_2', activation='relu'))
# output layer
model.add(layers.Dense(name='OutputLayer', units=1))
model.add(layers.Activation(name='Sigmoid_3', activation='sigmoid'))

model.summary()

this is very important to understand as it allows for flexibility on the layer order when customizing a deep neaural network.

NOTE: we use this explicit layer declaration in exercise 2

## Exercise 1

Change the parameters:

- batch_size
- epochs

and observe what happens to the execution time and learning curves

In [None]:
def make_model():
    model = Sequential()
    model.add(layers.Dense(name='FullyConnected_1', units=3, activation='relu', input_dim=2))
    model.add(layers.Dense(name='FullyConnected_2', units=2, activation='relu'))
    model.add(layers.Dense(name='FullyConnected_OutputLayer', units=1, activation='sigmoid'))
    return model

model = make_model()

In [None]:
set_seed(123) # for reproducibility

model = make_model()

model.compile(
    optimizer='Adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)
summary = model.fit(
    X_train, y_train,
    batch_size=100,
    epochs=10,
    validation_split=0.1,
    verbose=0,
    callbacks=[time_summary]
)
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
plot_training_summary(summary, time_summary)

## Exercise 2 (solution in `utils.py`)

Make an overkill model for this data

- 3 hidden dense layers
- use a [`BatchNormalization`](https://keras.io/layers/normalization/) layer after each neuron layer before activation
- add a `Dropout` layers
- use `relu` and `tanh` or other [activation functions](https://keras.io/activations/)

In [None]:
def make_overkill_model():
    model = Sequential()
    # input layer batch normalization (what does this do qualitatively speaking?)
    
    # 1st hidden
    
    # 2nd hidden
    
    # 3rd hidden
    
    # output layer
    
    return model

model = make_model()

In [None]:
set_seed(123) # for reproducibility

model = make_overkill_model()

model.compile(
    optimizer='Adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)
summary = model.fit(
    X_train, y_train,
    batch_size=100,
    epochs=10,
    validation_split=0.1,
    verbose=0,
    callbacks=[time_summary]
)
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
plot_training_summary(summary, time_summary)

#  [Next: Build your first convolutional neural network](04_build_your_first_convolutional_neural_network.ipynb)