### Bake a Deep Learning Classifier with Keras
---------------------------------------------------

Keras is a library that simplifies the construction of neural networks.

This notebook will highlight how to construct a simple feed-forward neural network to predict the final rankings of bakers from episode 2.

The features used in the model include the mean ranking for technical challenges and the ranking of the technical challenge for episode 2

In [37]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import vapeplot
import seaborn as sns
import scipy.stats
from datetime import datetime
%matplotlib inline

In [39]:
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Embedding, Flatten, Dropout
from keras.activations import relu, sigmoid, tanh

from sklearn.preprocessing import QuantileTransformer
from sklearn.metrics import roc_curve, auc
import warnings
warnings.filterwarnings("ignore")

def timestamp(): return datetime.today().strftime('%Y%m%d')

def quantile_scale(df,feats):
    # force data into a normal distribution
    qua = df
    scaler = QuantileTransformer(
        n_quantiles=10,
        random_state=42,
        ignore_implicit_zeros=True, #sparse matrix
    )
    # fit the scaler
    scaler.fit(qua[feats])
    # transform values
    qua[feats] = scaler.transform(qua[feats])
    return qua

def calc_95ci(a,confidence=0.95):
    a = 1.0 * np.array(a)
    n = len(a)
    m, se = np.nanmean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    return h

def return_feats(df,feats,label):
    # returns a matrix of features and labels
    df = df.sample(frac=1.)
    X = np.matrix(df[feats])
    y = np.array(df[label])
    return X,y
# functions to transform class labels into tiers
def transform_labels(classes):
    """converts all places >=8 to 8"""
    return np.where(classes<=7, classes, 8)
def tiered(classes):
    """
    0 = 1st place
    1 = runner-ups
    2 = 3rd-4th place
    3 = 5th-7th place
    4 = 8th and below
    """
    trans = []
    for x in classes:
        if x==1: c=0
        if x==2: c=1
        if x>=3 and x<=4: c=2
        if x>=5 and x<=7: c=3
        if x>=8: c=4
        trans.append(c)
    return trans

def _4tiers(classes):
    """
    0 = 1st and runner-up    
    1 = 3rd-4th place 
    rest follows tiered()
    """
    trans = []
    for x in classes:
        if x<=2: c=0
        if x>=3 and x<=4: c=1
        if x>=5 and x<=7: c=2
        if x>=8: c=3
        trans.append(c)
    return trans

In [42]:
# episode 2 classifier
episode=2
season=7

tech = pd.read_csv("../RESULTS/gbbo.features.20190909.tsv",sep='\t')
feats = ['tech_mean','tech','mean_star','star','mean_good','good','mean_bad','bad']

# transform class labels into tiers
classes = tiered(np.array(tech['place']))
tech['place']=classes
tech = tech.loc[tech['episode']==episode]

# noramlize features
tech = quantile_scale(tech,feats)
X,y = return_feats(tech,feats,'place')

In [43]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV

input_shape = [np.matrix(X).shape[1]]
output_shape = [len(set(y))]

"""
nl[1-3]:
the number of hidden neural layers.
this function takes a combination of different 
parameters to construct networks with varying dimensions
----------------------------------------------------
number of hidden layers =  1 - sum(max(nl1,nl2,nl3))

nn[1-3]: 
the number of neurons to spawn for each hidden layer
this variable is paired to nl, so nl1 layer will have nn1 neurons
"""

def create_model( nl1=1, nl2=1,  nl3=1, 
                 nn1=1000, nn2=500, nn3 = 200, lr=0.01, decay=0., l1=0.01, l2=0.01,
                act = 'relu', dropout=0,input_shape=input_shape,output_shape=output_shape):
    
    '''This is a model generating function so that we can search over neural net 
    parameters and architecture
    https://www.kaggle.com/arrogantlymodest/randomised-cv-search-over-keras-neural-network
    '''

    opt = keras.optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999,  decay=decay)
    reg = keras.regularizers.l1_l2(l1=l1, l2=l2)
                                                     
    model = Sequential()
    
    # for the first layer we need to specify the input dimensions
    first=True
    
    for i in range(nl1):
        if first:
            model.add(Dense(nn1, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(Dense(nn1, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(Dropout(dropout))
            
    for i in range(nl2):
        if first:
            model.add(Dense(nn2, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(Dense(nn2, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(Dropout(dropout))
            
    for i in range(nl3):
        if first:
            model.add(Dense(nn3, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(Dense(nn3, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(Dropout(dropout))
            
    model.add(Dense(output_shape, activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'],)
    return model


#### Hyperparameters
----------------------

Hyperparameters are model settings that are defined before training. 
For Neural Networks, this include the learning rate, the number of hidden layers, number of neurons in hidden layers, and neuron activation functions

We will evaluate the performance of a neural network across different hyperparameter conditions

In [49]:
print('Number of Baker Classes: {}'.format(output_shape))

#################
# learning algorithm parameters
lr=[1e-2, 1e-3, 1e-4]
decay=[1e-6,1e-9,0]
activation=['relu', 'sigmoid']
# numbers of layers
nl1 = [0,1,2,3,4]
nl2 = [0,1,2,3,4]
nl3 = [0,1,2,3,4]
# neurons in each layer
nn1=[2,4,8,16,32,64,128,300,700,1400,2100]
nn2=[10,100,400,800]
nn3=[10,50,150,300]
# dropout and regularisation
dropout = [0, 0.1, 0.2, 0.3,0.5]
l1 = [0, 0.01, 0.003, 0.001,0.0001]
l2 = [0, 0.01, 0.003, 0.001,0.0001]
################
EPOCHS, BATCH = 25, 36
param_grid = dict(
                    nl1=nl1, nl2=nl2, nl3=nl3, nn1=nn1, nn2=nn2, nn3=nn3,
                    act=activation, l1=l1, l2=l2, lr=lr, decay=decay, dropout=dropout, 
                    input_shape=input_shape, output_shape = output_shape,
                 )

model = KerasClassifier(build_fn=create_model, epochs=EPOCHS, batch_size=BATCH, verbose=0)
# Leave One (Season) Out Cross Validation
# leave one out CV
from sklearn.model_selection import LeaveOneGroupOut
loo = LeaveOneGroupOut()
cv=loo.split(X,groups=tech['season'])

grid = RandomizedSearchCV(estimator=model, cv=cv, param_distributions=param_grid, 
                          verbose=10,  n_iter=10, n_jobs=8)


Number of Baker Classes: [5]


Now we do the Leave One Out Cross Validation over all the different combinations of hyperparameters. 

-------------------------------------------------
#### This will take a while so let it bake!
-------------------------------------------------

#### Results
--------------------
 0.3301886835328813 

epochs = 6, batch_size = 20
{'output_shape': 5, 'nn3': 150, 'nn2': 10, 'nn1': 16, 'nl3': 0, 'nl2': 4, 'nl1': 0, 'lr': 0.01, 'l2': 0, 'l1': 0.003, 'input_shape': 8, 'dropout': 0, 'decay': 1e-06, 'act': 'sigmoid'}

 0.36792453462785146 
 
epochs = 100, batch_size = 24
{'output_shape': 5, 'nn3': 50, 'nn2': 100, 'nn1': 32, 'nl3': 1, 'nl2': 1, 'nl1': 0, 'lr': 0.01, 'l2': 0, 'l1': 0.01, 'input_shape': 8, 'dropout': 0.2, 'decay': 1e-06, 'act': 'relu'}

---------------------------------------------

 0.48113208053246986 
 
 epochs = 25 batch_size=36
{'output_shape': 5, 'nn3': 300, 'nn2': 400, 'nn1': 128, 'nl3': 0, 'nl2': 3, 'nl1': 2, 'lr': 0.001, 'l2': 0.003, 'l1': 0.0001, 'input_shape': 8, 'dropout': 0.3, 'decay': 0, 'act': 'relu'}

---------------------------------------------

In [50]:
grid_result = grid.fit(X,y)

Fitting 9 folds for each of 10 candidates, totalling 90 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 tasks      | elapsed:   11.0s
[Parallel(n_jobs=8)]: Done   9 tasks      | elapsed:   17.7s
[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:   23.7s
[Parallel(n_jobs=8)]: Done  25 tasks      | elapsed:   31.0s
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:   37.8s
[Parallel(n_jobs=8)]: Done  45 tasks      | elapsed:   51.2s
[Parallel(n_jobs=8)]: Done  56 tasks      | elapsed:  1.1min
[Parallel(n_jobs=8)]: Done  69 tasks      | elapsed:  1.6min
[Parallel(n_jobs=8)]: Done  85 out of  90 | elapsed:  2.0min remaining:    6.9s
[Parallel(n_jobs=8)]: Done  90 out of  90 | elapsed:  2.1min finished


In [51]:
print('-'*45)
print('\n',grid_result.best_score_,'\n')
print(grid_result.best_params_)
print('-'*45)

---------------------------------------------

 0.48113208053246986 

{'output_shape': 5, 'nn3': 300, 'nn2': 400, 'nn1': 128, 'nl3': 0, 'nl2': 3, 'nl1': 2, 'lr': 0.001, 'l2': 0.003, 'l1': 0.0001, 'input_shape': 8, 'dropout': 0.3, 'decay': 0, 'act': 'relu'}
---------------------------------------------


In [53]:
grid_result.best_params_

{'output_shape': 5,
 'nn3': 300,
 'nn2': 400,
 'nn1': 128,
 'nl3': 0,
 'nl2': 3,
 'nl1': 2,
 'lr': 0.001,
 'l2': 0.003,
 'l1': 0.0001,
 'input_shape': 8,
 'dropout': 0.3,
 'decay': 0,
 'act': 'relu'}

In [48]:
params = grid_result.best_params_
l1 = 0.0001
l2 = 0
lr = 0.0001
nl1 = 0
nl2 = 2
nl3 = 2
nn1 = 2100
nn2 = 10
nn3 = 300
dropout = 0.3
decay = 0
act='relu'
n_dims = np.matrix(X).shape[1]
n_classes = len(set(y))
clf = create_model( nl1=nl1, nl2=nl2,  nl3=nl3, 
                     nn1=nn1, nn2=nn2, nn3 = nn3, 
                     lr=lr, decay=decay, l1=l1, l2=l2,
                     act = act, dropout=dropout,
                     input_shape=n_dims,
                     output_shape=n_classes)


In [28]:
clf.summary()
clf.fit(X, y, validation_split=0.2, batch_size=24, epochs=100, verbose=3)

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_26 (Dense)             (None, 10)                90        
_________________________________________________________________
dense_27 (Dense)             (None, 10)                110       
_________________________________________________________________
dense_28 (Dense)             (None, 10)                110       
_________________________________________________________________
dense_29 (Dense)             (None, 10)                110       
_________________________________________________________________
dense_30 (Dense)             (None, 5)                 55        
Total params: 475
Trainable params: 475
Non-trainable params: 0
_________________________________________________________________
Epoch 1/1


<keras.callbacks.History at 0x7f8064397d90>

In [54]:
# now iterate the number of neurons per layer
#################
# learning algorithm parameters
lr=[1e-2, 1e-3, 1e-4]
decay=[1e-6,1e-9,0]
activation=['relu', 'sigmoid']
# numbers of layers
nl1 = [2,3,4]
nl2 = [2,3,4]
nl3 = [0,1,2,3,4]
# neurons in each layer
nn1=[2,4,8,16,32,64,128,300,512,700,1024,1400,2048,2100,2500]
nn2=nn1
nn3=nn1
# dropout and regularisation
dropout = [0, 0.1, 0.2, 0.3,0.5]
l1 = [0, 0.01, 0.003, 0.001,0.0001]
l2 = [0, 0.01, 0.003, 0.001,0.0001]
################
EPOCHS, BATCH = 25, 36
param_grid = dict(
                    nl1=nl1, nl2=nl2, nl3=nl3, nn1=nn1, nn2=nn2, nn3=nn3,
                    act=activation, l1=l1, l2=l2, lr=lr, decay=decay, dropout=dropout, 
                    input_shape=input_shape, output_shape = output_shape,
                 )

model = KerasClassifier(build_fn=create_model, epochs=EPOCHS, batch_size=BATCH, verbose=0)
loo = LeaveOneGroupOut()
cv=loo.split(X,groups=tech['season'])
grid_2cv = RandomizedSearchCV(estimator=model, cv=cv, param_distributions=param_grid, 
                          verbose=10,  n_iter=10, n_jobs=8)
grid_2nd = grid_2cv.fit(X,y)
print('-'*45)
print('\n',grid_2nd.best_score_,'\n')
print(grid_2nd.best_params_)
print('-'*45)

Fitting 9 folds for each of 10 candidates, totalling 90 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 tasks      | elapsed:  1.6min
[Parallel(n_jobs=8)]: Done   9 tasks      | elapsed:  3.1min
[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:  3.2min
[Parallel(n_jobs=8)]: Done  25 tasks      | elapsed:  3.3min
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:  4.1min
[Parallel(n_jobs=8)]: Done  45 tasks      | elapsed:  7.3min
[Parallel(n_jobs=8)]: Done  56 tasks      | elapsed:  8.2min
[Parallel(n_jobs=8)]: Done  69 tasks      | elapsed:  8.9min
[Parallel(n_jobs=8)]: Done  85 out of  90 | elapsed:  9.2min remaining:   32.6s
[Parallel(n_jobs=8)]: Done  90 out of  90 | elapsed:  9.3min finished


---------------------------------------------

 0.36792452886419474 

{'output_shape': 5, 'nn3': 8, 'nn2': 4, 'nn1': 300, 'nl3': 2, 'nl2': 2, 'nl1': 2, 'lr': 0.01, 'l2': 0, 'l1': 0.001, 'input_shape': 8, 'dropout': 0.2, 'decay': 1e-09, 'act': 'relu'}
---------------------------------------------


In [55]:
grid_2nd.best_params_

{'output_shape': 5,
 'nn3': 8,
 'nn2': 4,
 'nn1': 300,
 'nl3': 2,
 'nl2': 2,
 'nl1': 2,
 'lr': 0.01,
 'l2': 0,
 'l1': 0.001,
 'input_shape': 8,
 'dropout': 0.2,
 'decay': 1e-09,
 'act': 'relu'}

In [9]:
reg = keras.regularizers.l1_l2(l1=0.003, l2=0.0001)
opt = keras.optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999,  decay=1e-09)

best_clf = Sequential([
    
    Dense(800, input_shape=(2, ), activation='relu',kernel_regularizer=best_reg),
    Dropout(0.1),
    Dense(800, activation='relu',kernel_regularizer=best_reg),
    Dropout(0.1),
    Dense(300, activation='relu',kernel_regularizer=best_reg),
    Dropout(0.1),
    Dense(5, activation='softmax')
])

best_clf.summary()
best_clf.compile(optimizer=best_opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
best_clf.fit(X, y, validation_split=0., batch_size=30, epochs=10, shuffle=False, verbose=3)


In [10]:
clf.get_params()

{'epochs': 10,
 'batch_size': 30,
 'verbose': 0,
 'output_shape': 5,
 'nn3': 300,
 'nn2': 800,
 'nn1': 2100,
 'nl3': 1,
 'nl2': 2,
 'nl1': 0,
 'lr': 0.0001,
 'l2': 0.0001,
 'l1': 0.0001,
 'input_shape': 2,
 'dropout': 0.1,
 'decay': 1e-06,
 'act': 'relu',
 'build_fn': <function __main__.create_model(nl1=1, nl2=1, nl3=1, nn1=1000, nn2=500, nn3=200, lr=0.01, decay=0.0, l1=0.01, l2=0.01, act='relu', dropout=0, input_shape=[2], output_shape=[5])>}

In [12]:
y

array([3, 1, 3, 3, 4, 4, 0, 3, 1, 2, 4, 4, 1, 3, 4, 4, 2, 4, 4, 2, 4, 4,
       1, 3, 0, 3, 0, 4, 4, 2, 4, 1, 3, 3, 4, 4, 3, 3, 1, 1, 4, 2, 3, 4,
       3, 4, 2, 0, 3, 4, 2, 4, 1, 3, 1, 4, 0, 4, 2, 2, 3, 3, 3, 2, 2, 3,
       4, 2, 4, 2, 3, 0, 4, 3, 1, 3, 4, 2, 1, 0, 1, 4, 1, 1, 4, 3, 1, 4,
       2, 4, 4, 3, 0, 2, 0, 1, 2, 2, 4, 4, 3, 1, 4])