<a href="https://colab.research.google.com/github/georgeliu1998/keras_model_tuning/blob/master/keras.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import sklearn
import pickle
from time import time

#from sklearn import datasets
#from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import StratifiedKFold, cross_val_score
#from sklearn.metrics import confusion_matrix, mean_squared_error
from sklearn.datasets import load_iris

import xgboost as xgb

from keras.models import Sequential
from keras.layers import Dense
#from keras import optimizers
#from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import to_categorical

Using TensorFlow backend.


In [0]:
seed = np.random.RandomState(6)

In [0]:
iris = load_iris()

X = iris['data']
y = iris['target']

In [4]:
clf = xgb.XGBClassifier()
cv = StratifiedKFold(n_splits=5, random_state=seed)

scores = cross_val_score(clf, X, y, cv=cv)

print("Mean Accuracy: {:.2%}, Standard Deviation: {:.2%}".format(scores.mean(), scores.std()))


Mean Accuracy: 95.33%, Standard Deviation: 3.40%


In [5]:
print("X shape: {}, y shape: {}".format(X.shape, y.shape))

X shape: (150, 4), y shape: (150,)


In [0]:
class SequentialModel:
  
  def __init__(self, input_dim, num_layers, num_units, 
               activation, activation_out, 
               loss, initializer, optimizer, 
               metrics, epochs, batch_size, one_hot=False):
    """
    Params:
      input_dim: int, number of features
      num_layers: int, number of layers of the model (excluding the input layer)
      num_units: list, number of units in each layer(excluding the input layer)
      activation: str, activation function used in all layers except output
      activation_out: str, activation function used in output layer
      loss: str, loss functon
      initializer: str, kernel initializer
      optimizer: str, optimizer
      metrics: list of strings, metrics used
      epochs: int, number of epochs to train for
      batch_size: int, number of samples per batch
      one_hot: bool, whether one hot encoding is needed
    """
    self.input_dim = input_dim
    self.num_layers = num_layers
    self.num_units = num_units
    self.activation = activation
    self.activation_out = activation_out
    self.loss = loss
    self.initializer = initializer
    self.optimizer = optimizer
    self.metrics = metrics
    self.epochs = epochs
    self.batch_size = batch_size
    self.one_hot = one_hot
    
    # Initialize the sequential model
    self.model = Sequential()
  
    
  def build_model(self):
    """
    Adds layers and compiles the model
    """
    # Ensure num_units tuple's length is the same as num_layers
    if self.num_layers != len(self.num_units):
      # Expand the list by repeating number of nodes except for last layer
      num_nodes, num_nodes_out = self.num_units[0], self.num_units[-1]
      self.num_units = [i for i in range(num_layers-1) for i in [num_nodes]]
      self.num_units.append(num_nodes_out) 
    
    # Loop thru all the layers
    for i in range(self.num_layers):
      # Different layers should have different setups
      if i == 0: # first layer
        self.model.add(Dense(units=self.num_units[i],
                             input_dim=self.input_dim,
                             kernel_initializer=initializer,
                             activation=activation)) 
      elif i+1 == self.num_layers: # output layer
        self.model.add(Dense(units=self.num_units[i],
                             kernel_initializer=initializer,
                             activation=activation_out))
      else:
        self.model.add(Dense(units=self.num_units[i],
                            kernel_initializer=initializer,
                            activation=activation))
    
    self.model.compile(loss=self.loss,
                       optimizer=self.optimizer,
                       metrics=self.metrics)
      
  
  
  def evaluate_model(self, X, y, n_splits=3):
    """
    Evaluates the model using cross-validation.
    
    Params:
      X: np.array, features
      y: np.array, labels
      n_splits: int, number of folds for the cross-validation
    """
    score_lst = []
    t1 = time()
    
    print("Starting {}-fold cross-validation...".format(n_splits))
    
    kfold = StratifiedKFold(n_splits=n_splits, 
                            shuffle=True, 
                            random_state=seed)
    
    # Loop through the different folds
    for train_index, test_index in kfold.split(X, y):
      # Do one-hot encoding when needed
      if self.one_hot:
        y_one_hot = to_categorical(y)
      else:
        y_one_hot = y
        
      self.model.fit(X[train_index],
                     y_one_hot[train_index],
                     epochs=self.epochs,
                     batch_size=self.batch_size,
                     verbose=1)
        
      scores = self.model.evaluate(X[test_index],
                                   y_one_hot[test_index], 
                                   verbose=1)
            
      # The second item is accuracy
      score_lst.append(scores[1])

    t2 = time()
    t = t2 - t1
    # Convert time to mintues
    t /= 60

    print("Finished cross-valiation. Took {:.1f} mintues.".format(t))

    # Convert to np.array and calculate mean and sd
    score_lst = np.array(score_lst)
    mean_acc = score_lst.mean()
    sd_acc = score_lst.std()

    print("Mean Accuracy: {:.2%}, Standard Deviation: {:.2%}".format(mean_acc, sd_acc))
    return mean_acc

In [7]:
input_dim = 4
num_layers = 2
num_units = (4, 3) 
activation = 'relu'
activation_out = 'softmax'
loss = 'binary_crossentropy'
initializer = 'random_uniform'
optimizer = 'adam'
metrics = ['accuracy']
epochs = 100
batch_size = 5
one_hot = True


model = SequentialModel(input_dim, num_layers, num_units,
                        activation, activation_out, 
                        loss, initializer, optimizer, 
                        metrics, epochs, batch_size, one_hot)

model.build_model()
model.evaluate_model(X, y)

Starting 3-fold cross-validation...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/10

0.9055011003625159

In [8]:
pima = pd.read_csv("/content/pima-indians-diabetes.csv", header=None)
pima.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [9]:
X_pima, y_pima = pima.values[:, 0:8], pima.values[:, 8]
print(X_pima.shape, y_pima.shape)

(768, 8) (768,)


In [10]:
input_dim = 8
num_layers = 2
num_units = (8, 1) 
activation = 'relu'
activation_out = 'sigmoid'
loss = 'binary_crossentropy'
initializer = 'random_uniform'
optimizer = 'adam'
metrics = ['accuracy']
epochs = 10
batch_size = 5
one_hot = False


model = SequentialModel(input_dim, num_layers, num_units,
                        activation, activation_out, 
                        loss, initializer, optimizer, 
                        metrics, epochs, batch_size, one_hot)

model.build_model()
model.evaluate_model(X_pima, y_pima)

Starting 3-fold cross-validation...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Finished cross-valiation. Took 0.3 mintues.
Mean Accuracy: 68.36%, Standard Deviation: 0.75%


0.6836159832613208

In [0]:
width = [8, 16, 32, 64]
depth = [2, 4, 8, 16]
loss = ['binary_crossentropy', 'categorical_crossentropy', 'sparse_categorical_crossentropy']
initializer = ['random_uniform', 'random_normal', 'glorot_normal', 'glorot_uniform']
optimizer = ['adam', 'adagrad', 'sgd', 'rmsprop']
epochs = [10, 20, 40, 100]
batch_size = [1, 5, 10, 15]


tuning_options = {'width': width,
                  'depth': depth, 
                  'loss': loss, 
                  'initializer': initializer, 
                  'optimizer': optimizer, 
                  'epochs': epochs, 
                  'batch_size': batch_size}

In [12]:
results = {}
error_options = {}

for parameter, options in tuning_options.items():
  
  results[parameter] = []
  
  input_dim = 8
  num_layers = 2
  num_units = (8, 1) 
  activation = 'relu'
  activation_out = 'sigmoid'
  loss = 'binary_crossentropy'
  initializer = 'random_uniform'
  optimizer = 'adam'
  metrics = ['accuracy']
  epochs = 10
  batch_size = 5
  one_hot = False

  for option in options:
    
    if parameter == 'width':
      num_units = (option, 1)
    elif parameter == 'depth':
      num_layers = option
    elif parameter == 'loss':
      loss = option
    elif parameter == 'initializer':
      initializer = option
    elif parameter == 'optimizer':
      optimizer = option
    elif parameter == 'epochs':
      epochs = option
    else:
      batch_size = option
    
    print("Evaluating for parameter {} as {}...".format(parameter, option))
    
    model = SequentialModel(input_dim, num_layers, num_units,
                        activation, activation_out, 
                        loss, initializer, optimizer, 
                        metrics, epochs, batch_size, one_hot)
    try:
      model.build_model()
      result = model.evaluate_model(X_pima, y_pima)  
      results[parameter].append(result)
    except:
      error_options[parameter] = option
      print('Error, skipped.')
      pass

# Save the dict    
with open('cross_validation_results.pkl', 'wb') as f:
    pickle.dump(results, f, pickle.HIGHEST_PROTOCOL)

Evaluating for parameter width as 8...
Starting 3-fold cross-validation...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Finished cross-valiation. Took 0.3 mintues.
Mean Accuracy: 70.05%, Standard Deviation: 1.57%
Evaluating for parameter width as 16...
Starting 3-fold cross-validation...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Finished cross-valiation. Took 0.3 mintues.
Mean Accuracy: 69.79%, Standard Deviation: 1.7

In [13]:
results

{'batch_size': [0.7200593556005289,
  0.6954772290264626,
  0.6875476450180492,
  0.687573096511814],
 'depth': [0.7018808127492487,
  0.6745013590222327,
  0.6510431969321328,
  0.6510431968542182],
 'epochs': [0.6797657027988788,
  0.7123028239693684,
  0.7448754525191301,
  0.7526115569762523],
 'initializer': [0.6862454413828033,
  0.6783667991639808,
  0.6679652920207912,
  0.5989493334400384],
 'loss': [0.7031371786224464, 0.0],
 'optimizer': [0.7032541046956041,
  0.6758491597377873,
  0.6457889065477279,
  0.6731179339758199],
 'width': [0.7005382384668426,
  0.697928727246723,
  0.6901721570472449,
  0.7161684229034323]}

In [14]:
error_options

{'loss': 'categorical_crossentropy'}

In [0]:
with open('cross_validation_results.pkl', 'rb') as f:
    results_load = pickle.load(f)

In [16]:
results_load

{'batch_size': [0.7200593556005289,
  0.6954772290264626,
  0.6875476450180492,
  0.687573096511814],
 'depth': [0.7018808127492487,
  0.6745013590222327,
  0.6510431969321328,
  0.6510431968542182],
 'epochs': [0.6797657027988788,
  0.7123028239693684,
  0.7448754525191301,
  0.7526115569762523],
 'initializer': [0.6862454413828033,
  0.6783667991639808,
  0.6679652920207912,
  0.5989493334400384],
 'loss': [0.7031371786224464, 0.0],
 'optimizer': [0.7032541046956041,
  0.6758491597377873,
  0.6457889065477279,
  0.6731179339758199],
 'width': [0.7005382384668426,
  0.697928727246723,
  0.6901721570472449,
  0.7161684229034323]}