### Bake a Deep Learning Classifier with Keras
---------------------------------------------------

Keras is a library that simplifies the construction of neural networks.

This notebook will highlight how to construct a simple feed-forward neural network to predict the final rankings of bakers from episode 2.

The features used in the model include the mean ranking for technical challenges and the ranking of the technical challenge for episode 2

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import vapeplot
import seaborn as sns
import scipy.stats
from datetime import datetime
%matplotlib inline

In [2]:
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Embedding, Flatten, Dropout
from keras.activations import relu, sigmoid, tanh

from sklearn.preprocessing import QuantileTransformer
from sklearn.metrics import roc_curve, auc
import warnings
warnings.filterwarnings("ignore")

def timestamp(): return datetime.today().strftime('%Y%m%d')

def quantile_scale(df,feats):
    qua = df
    scaler = QuantileTransformer(
        n_quantiles=10,
        random_state=42,
        ignore_implicit_zeros=True, #sparse matrix
    )
    # fit the scaler
    scaler.fit(qua[feats])
    # transform values
    qua[feats] = scaler.transform(qua[feats])
    return qua

def calc_95ci(a,confidence=0.95):
    a = 1.0 * np.array(a)
    n = len(a)
    m, se = np.nanmean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    return h

def return_feats(df,feats,label):
    df = df.sample(frac=1.)
    X = np.matrix(df[feats])
    y = np.array(df[label])
    return X,y


Using TensorFlow backend.


In [3]:
# load data
episode=2
season=7
tech = pd.read_csv("../RESULTS/gbbo.techinical.data.20190907.tsv",sep='\t')

def transform_labels(classes):
    return np.where(classes<=7, classes, 8)

def tiered(classes):
    trans = []
    for x in classes:
        if x==1: c=0
        if x==2: c=1
        if x>=3 and x<=4: c=2
        if x>=5 and x<=7: c=3
        if x>=8: c=4
        trans.append(c)
    return trans

def _4tiers(classes):
    trans = []
    for x in classes:
        if x<=2: c=0
        if x>=3 and x<=4: c=1
        if x>=5 and x<=7: c=2
        if x>=8: c=3
        trans.append(c)
    return trans

classes = tiered(np.array(tech['place']))

tech['place']=classes
feats = ['tech_mean','tech']
tech = tech.loc[tech['episode']==episode]

tech = quantile_scale(tech,feats)
X,y = return_feats(tech,feats,'place')
X_test, y_test = return_feats(tech.loc[tech['season']==season],feats,'place')
X_train, y_train = return_feats(tech.loc[tech['season']!=season],feats,'place')

In [4]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV

input_shape = [np.matrix(X).shape[1]]
output_shape = [len(set(y))]

def create_model( nl1=1, nl2=1,  nl3=1, 
                 nn1=1000, nn2=500, nn3 = 200, lr=0.01, decay=0., l1=0.01, l2=0.01,
                act = 'relu', dropout=0,input_shape=input_shape,output_shape=output_shape):
    '''This is a model generating function so that we can search over neural net 
    parameters and architecture
    https://www.kaggle.com/arrogantlymodest/randomised-cv-search-over-keras-neural-network
    '''

    opt = keras.optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999,  decay=decay)
    reg = keras.regularizers.l1_l2(l1=l1, l2=l2)
                                                     
    model = Sequential()
    
    # for the firt layer we need to specify the input dimensions
    first=True
    
    for i in range(nl1):
        if first:
            model.add(Dense(nn1, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(Dense(nn1, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(Dropout(dropout))
            
    for i in range(nl2):
        if first:
            model.add(Dense(nn2, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(Dense(nn2, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(Dropout(dropout))
            
    for i in range(nl3):
        if first:
            model.add(Dense(nn3, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(Dense(nn3, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(Dropout(dropout))
            
    model.add(Dense(output_shape, activation='sigmoid'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'],)
    return model

model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=30, verbose=0)

#### Hyperparameters
----------------------

Hyperparameters are model settings that are defined before training. 
For Neural Networks, this include the learning rate, the number of hidden layers, number of neurons in hidden layers, and neuron activation functions

We will evaluate the performance of a neural network across different hyperparameter conditions

In [5]:
print('Number of Baker Classes: {}'.format(output_shape))

#################
# learning algorithm parameters
lr=[1e-2, 1e-3, 1e-4]
decay=[1e-6,1e-9,0]
activation=['relu', 'sigmoid']
# numbers of layers
nl1 = [0,1,2]
nl2 = [0,1,2]
nl3 = [0,1,2]
# neurons in each layer
nn1=[50,300,700,1400,2100]
nn2=[10,100,400,800]
nn3=[10,50,150,300]
# dropout and regularisation
dropout = [0, 0.1, 0.2, 0.3,0.5]
l1 = [0, 0.01, 0.003, 0.001,0.0001]
l2 = [0, 0.01, 0.003, 0.001,0.0001]
################

param_grid = dict(
                    nl1=nl1, nl2=nl2, nl3=nl3, nn1=nn1, nn2=nn2, nn3=nn3,
                    act=activation, l1=l1, l2=l2, lr=lr, decay=decay, dropout=dropout, 
                    input_shape=input_shape, output_shape = output_shape,
                 )

# Leave One (Season) Out Cross Validation
# leave one out CV
from sklearn.model_selection import LeaveOneGroupOut
loo = LeaveOneGroupOut()
cv=loo.split(X,groups=tech['season'])

grid = RandomizedSearchCV(estimator=model, cv=cv, param_distributions=param_grid, 
                          verbose=10,  n_iter=10, n_jobs=8)


Number of Baker Classes: [5]


Now we do the Leave One Out Cross Validation over all the different combinations of hyperparameters. 

-------------------------------------------------
#### This will take a while so let it bake!
-------------------------------------------------

#### Results

##### Tiers
* epochs 10; 30 batch

```
{'output_shape': 5,
 'nn3': 300,
 'nn2': 800,
 'nn1': 1400,
 'nl3': 1,
 'nl2': 1,
 'nl1': 1,
 'lr': 0.0001,
 'l2': 0,
 'l1': 0.003,
 'input_shape': 2,
 'dropout': 0,
 'decay': 1e-06,
 'act': 'relu'}
 0.35922330328561725
```

* epochs 6; 20 batch; more neurons
```
{'output_shape': 5,
 'nn3': 150,
 'nn2': 400,
 'nn1': 50,
 'nl3': 0,
 'nl2': 1,
 'nl1': 0,
 'lr': 0.001,
 'l2': 0,
 'l1': 0,
 'input_shape': 2,
 'dropout': 0,
 'decay': 0,
 'act': 'relu'}
 0.35922330328561725
```


In [6]:
grid_result = grid.fit(X,y)

Fitting 9 folds for each of 10 candidates, totalling 90 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 tasks      | elapsed:    3.2s
[Parallel(n_jobs=8)]: Done   9 tasks      | elapsed:    5.3s
[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:   23.1s
[Parallel(n_jobs=8)]: Done  25 tasks      | elapsed:   25.8s
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:   32.6s
[Parallel(n_jobs=8)]: Done  45 tasks      | elapsed:   37.5s
[Parallel(n_jobs=8)]: Done  56 tasks      | elapsed:   48.4s
[Parallel(n_jobs=8)]: Done  69 tasks      | elapsed:   54.1s
[Parallel(n_jobs=8)]: Done  85 out of  90 | elapsed:  1.0min remaining:    3.6s






Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.




[Parallel(n_jobs=8)]: Done  90 out of  90 | elapsed:  1.0min finished


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [7]:
grid_result.best_params_

{'output_shape': 5,
 'nn3': 300,
 'nn2': 800,
 'nn1': 2100,
 'nl3': 1,
 'nl2': 2,
 'nl1': 0,
 'lr': 0.0001,
 'l2': 0.0001,
 'l1': 0.0001,
 'input_shape': 2,
 'dropout': 0.1,
 'decay': 1e-06,
 'act': 'relu'}

In [8]:
grid_result.best_score_

0.349514568025626

In [9]:
clf = grid_result.best_estimator_

In [10]:
clf.get_params()

{'epochs': 10,
 'batch_size': 30,
 'verbose': 0,
 'output_shape': 5,
 'nn3': 300,
 'nn2': 800,
 'nn1': 2100,
 'nl3': 1,
 'nl2': 2,
 'nl1': 0,
 'lr': 0.0001,
 'l2': 0.0001,
 'l1': 0.0001,
 'input_shape': 2,
 'dropout': 0.1,
 'decay': 1e-06,
 'act': 'relu',
 'build_fn': <function __main__.create_model(nl1=1, nl2=1, nl3=1, nn1=1000, nn2=500, nn3=200, lr=0.01, decay=0.0, l1=0.01, l2=0.01, act='relu', dropout=0, input_shape=[2], output_shape=[5])>}

In [13]:
best_reg = keras.regularizers.l1_l2(l1=0.0001, l2=0.0001)
best_opt = keras.optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999,  decay=1e-09)

best_clf = Sequential([
    
    Dense(800, input_shape=(2, ), activation='relu',kernel_regularizer=best_reg),
    Dropout(0.1),
    Dense(800, activation='relu',kernel_regularizer=best_reg),
    Dropout(0.1),
    Dense(300, activation='relu',kernel_regularizer=best_reg),
    Dropout(0.1),
    Dense(5, activation='softmax')
])

best_clf.summary()
best_clf.compile(optimizer=best_opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
best_clf.fit(X, y, validation_split=0., batch_size=30, epochs=10, shuffle=False, verbose=3)


Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 800)               2400      
_________________________________________________________________
dropout_5 (Dropout)          (None, 800)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 800)               640800    
_________________________________________________________________
dropout_6 (Dropout)          (None, 800)               0         
_________________________________________________________________
dense_9 (Dense)              (None, 300)               240300    
_________________________________________________________________
dropout_7 (Dropout)          (None, 300)               0         
_________________________________________________________________
dense_10 (Dense)             (None, 5)                

<keras.callbacks.History at 0x7fe1642e94d0>

In [12]:
y

array([3, 1, 3, 3, 4, 4, 0, 3, 1, 2, 4, 4, 1, 3, 4, 4, 2, 4, 4, 2, 4, 4,
       1, 3, 0, 3, 0, 4, 4, 2, 4, 1, 3, 3, 4, 4, 3, 3, 1, 1, 4, 2, 3, 4,
       3, 4, 2, 0, 3, 4, 2, 4, 1, 3, 1, 4, 0, 4, 2, 2, 3, 3, 3, 2, 2, 3,
       4, 2, 4, 2, 3, 0, 4, 3, 1, 3, 4, 2, 1, 0, 1, 4, 1, 1, 4, 3, 1, 4,
       2, 4, 4, 3, 0, 2, 0, 1, 2, 2, 4, 4, 3, 1, 4])