## This example shows how to use hyperopt to pick parameters for a MLP classifier on the MNIST handwritten digits dataset

In [1]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.datasets import fetch_mldata
from sklearn.neural_network import MLPClassifier
import numpy as np

mnist = fetch_mldata('MNIST original')

# Scale the images to be between 0 and 1
X = mnist.data / 255.
y = mnist.target

# Split into training and testing sets
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]

In [2]:
# Define an objective function to minimize
# The classifier will be created, trained, and scored within this function
def objective(args):
    
    # Build a classifier based on the parameters chosen
    mlp = MLPClassifier(hidden_layer_sizes=(int(args['layer_size']),), max_iter=10,
                        alpha=args['alpha'], algorithm=args['algorithm'], tol=1e-4, 
                        random_state=1, activation=args['activation'], 
                        learning_rate_init=args['learning_rate']
                       )
    
    # Fit the classifier to the training data
    mlp.fit(X_train, y_train)
    
    #NOTE: Normally you should use a separate 'validation' set here
    #      and have a 'test' set that is only used on the final classifier
    #      once parameters have been selected, the final classifier can be
    #      retrained on both the 'training' and 'validation' sets
    loss = -mlp.score(X_test, y_test)
    
    # Must return loss and status, any additional information can also be saved here.
    # In this example the fully trained model is also returned
    return {'loss': loss, 'status': STATUS_OK, 'model':mlp}

In [21]:
# Define the parameter space to search over
# In this case the objective function is expecting a single dictionary argument, 
# so the space variable is set up to match that
space = {'layer_size':hp.quniform('layer_size', 25, 100, 1),
         'alpha':hp.lognormal('alpha', mu=np.log(1e-4), sigma=1),
         'algorithm':hp.choice('algorithm', ['l-bfgs', 'sgd', 'adam']),
         'activation':hp.choice('activation', ['logistic', 'tanh', 'relu']),
         #'learning_rate':hp.uniform('learning_rate', low=0.001, high=0.999),
         'learning_rate':hp.loguniform('learning_rate', low=np.log(1e-4), high=np.log(1.)),
        }

In [24]:
# Create a Trials object to store results of each evaluation
trials = Trials()

# Run the search for the specified number of evaluations
best = fmin(objective,
            space=space,
            algo=tpe.suggest,
            trials=trials,
            max_evals=10)

In [25]:
# Get the trained model from the best trial
best_model = trials.best_trial['result']['model']

# Compute the training and testing scores on this model
print("Training Accuracy: %f" % best_model.score(X_train, y_train))
print("Testing Accuracy: %f" % best_model.score(X_test, y_test))

Training Accuracy: 0.985050
Testing Accuracy: 0.972200


In [16]:
best

{'activation': 0,
 'algorithm': 1,
 'alpha': 5.7616756046266688e-05,
 'layer_size': 92.0,
 'learning_rate': 0.36964613992731787}

In [26]:
trials.best_trial

{'book_time': datetime.datetime(2016, 5, 24, 21, 9, 31, 41000),
 'exp_key': None,
 'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
  'idxs': {'activation': [3],
   'algorithm': [3],
   'alpha': [3],
   'layer_size': [3],
   'learning_rate': [3]},
  'tid': 3,
  'vals': {'activation': [2],
   'algorithm': [1],
   'alpha': [0.00016887477537756929],
   'layer_size': [45.0],
   'learning_rate': [0.085753033052009875]},
  'workdir': None},
 'owner': None,
 'refresh_time': datetime.datetime(2016, 5, 24, 21, 9, 37, 523000),
 'result': {'loss': -0.9722,
  'model': MLPClassifier(activation='relu', algorithm='sgd', alpha=0.000168874775378,
         batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False,
         epsilon=1e-08, hidden_layer_sizes=(45,), learning_rate='constant',
         learning_rate_init=0.085753033052, max_iter=10, momentum=0.9,
         nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
         tol=0.0001, validation_fraction=0.1, verbose

In [27]:
trials.results

[{'loss': -0.9315,
  'model': MLPClassifier(activation='relu', algorithm='sgd', alpha=9.14078888052e-05,
         batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False,
         epsilon=1e-08, hidden_layer_sizes=(73,), learning_rate='constant',
         learning_rate_init=0.00284028790736, max_iter=10, momentum=0.9,
         nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
         tol=0.0001, validation_fraction=0.1, verbose=False,
         warm_start=False),
  'status': 'ok'},
 {'loss': -0.9395,
  'model': MLPClassifier(activation='tanh', algorithm='adam', alpha=2.04779917725e-05,
         batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False,
         epsilon=1e-08, hidden_layer_sizes=(53,), learning_rate='constant',
         learning_rate_init=0.000204837302568, max_iter=10, momentum=0.9,
         nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
         tol=0.0001, validation_fraction=0.1, verbose=False,
         warm_s