In [1]:
!pip install keras-tuner -q

[?25l[K     |█████▏                          | 10kB 16.6MB/s eta 0:00:01[K     |██████████▍                     | 20kB 18.0MB/s eta 0:00:01[K     |███████████████▋                | 30kB 11.7MB/s eta 0:00:01[K     |████████████████████▉           | 40kB 9.5MB/s eta 0:00:01[K     |██████████████████████████      | 51kB 5.4MB/s eta 0:00:01[K     |███████████████████████████████▎| 61kB 6.3MB/s eta 0:00:01[K     |████████████████████████████████| 71kB 3.8MB/s 
[?25h  Building wheel for keras-tuner (setup.py) ... [?25l[?25hdone
  Building wheel for terminaltables (setup.py) ... [?25l[?25hdone


In [13]:
import collections
import numpy as np
import os
import pickle
from sklearn import model_selection
from sklearn.pipeline import Pipeline
import tensorflow as tf
import kerastuner as kt


class ShallowTuner(kt.engine.base_tuner.BaseTuner):
    def run_trial(self,
                  trial,
                  X,
                  y,
                  validation_data):
        model = self.hypermodel.build(trial.hyperparameters) # build the model
        model.fit(X, y) # fit the model
        X_test, y_test = validation_data  # get the validation data
        self.oracle.update_trial(trial.trial_id, {'score': model.score(X_test, y_test)})  # inform the oracle of the eval result, the result is a dictionary with the metric names as the keys.
        self.save_model(trial.trial_id, model) # save the model to disk

    def save_model(self, trial_id, model, step=0):
        """save the model with pickle"""
        fname = os.path.join(self.get_trial_dir(trial_id), 'model.pickle')
        with tf.io.gfile.GFile(fname, 'wb') as f:
            pickle.dump(model, f)

    def load_model(self, trial):
        """load the model with pickle"""
        fname = os.path.join(
            self.get_trial_dir(trial.trial_id), 'model.pickle')
        with tf.io.gfile.GFile(fname, 'rb') as f:
            return pickle.load(f)

In [14]:
from sklearn.datasets import load_digits

# Load the hand-written digits dataset
digits = load_digits()

# Get the images and corresponding labels
images, labels = digits.images, digits.target
images.shape, labels.shape

# reshape images to vectors
n_samples = len(digits.images)
X = digits.images.reshape((n_samples, -1))

# Split data into train and test subsets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, labels, test_size=0.2, shuffle=False)
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.2, shuffle=False)



In [15]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from kerastuner.engine import hyperparameters as hp


def build_model(hp):
    model_type = hp.Choice('model_type', ['svm', 'random_forest'])
    if model_type == 'svm':
        with hp.conditional_scope('model_type', 'svm'):
            model = SVC(C=hp.Float('C', 1e-3, 10, sampling='linear', default=1),
                        kernel=hp.Choice("kernel_type", ['linear', 'rbf'], default="linear"),
                        random_state=42)
    elif model_type == 'random_forest':
        with hp.conditional_scope('model_type', 'random_forest'):
            model =  RandomForestClassifier(
                n_estimators=hp.Int('n_estimators', 10, 200, step=10),
                max_depth=hp.Int('max_depth', 3, 10))
    else:
        raise ValueError('Unrecognized model_type')
    return model

random_tuner = ShallowTuner(
        oracle=kt.oracles.RandomSearch(
            objective=kt.Objective('score', 'max'),
            max_trials=3,
            seed=42),
        hypermodel=build_model,
        overwrite=True,
        project_name='random_tuner')

random_tuner.search(X_train, y_train, validation_data=(X_val, y_val))

Trial 3 Complete [00h 00m 00s]
score: 0.9861111111111112

Best score So Far: 0.9861111111111112
Total elapsed time: 00h 00m 00s
INFO:tensorflow:Oracle triggered exit


In [16]:
# Evaluate the best discovered model
from sklearn.metrics import accuracy_score

best_model = random_tuner.get_best_models(1)[0]
best_model.fit(X_train, y_train)
y_pred_test = best_model.predict(X_test)
test_acc = accuracy_score(y_test, y_pred_test)
print("The prediction accuracy on test set: {:.2f} %".format(test_acc * 100))

The prediction accuracy on test set: 95.00 %


In [17]:
from kerastuner.engine import hyperparameters as hp
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline


def build_pipeline(hp):

    # select hyperparameters of PCA
    n_components=hp.Choice("n_components", [2, 5, 10], default=5)
    pca = PCA(n_components=n_components)

    # select model type
    model_type = hp.Choice('model_type', ['svm', 'random_forest'])
    if model_type == 'svm':
        with hp.conditional_scope('model_type', 'svm'):
            model = SVC(C=hp.Float('C', 1e-3, 10, sampling='linear', default=1),
                        kernel=hp.Choice("kernel_type", ['linear', 'rbf'], default="linear"),
                        random_state=42)
    elif model_type == 'random_forest':
        with hp.conditional_scope('model_type', 'random_forest'):
            model =  RandomForestClassifier(
                n_estimators=hp.Int('n_estimators', 10, 200, step=10),
                max_depth=hp.Int('max_depth', 3, 10))
    else:
        raise ValueError('Unrecognized model_type')

    # create pipeline
    pipeline = Pipeline([
        ('pca', pca),
        ('clf', model)
        ])

    return pipeline

In [18]:
tuner = ShallowTuner(
        oracle=kt.oracles.RandomSearch(
            objective=kt.Objective('score', 'max'),
            max_trials=3),
        hypermodel=build_pipeline,
        overwrite=True)
tuner.search(X_train, y_train, validation_data=(X_val, y_val))

Trial 3 Complete [00h 00m 00s]
score: 0.5763888888888888

Best score So Far: 0.8159722222222222
Total elapsed time: 00h 00m 01s
INFO:tensorflow:Oracle triggered exit


In [19]:
from sklearn.metrics import accuracy_score

best_pipeline = tuner.get_best_models(1)[0]
best_pipeline.fit(X_train, y_train)
y_pred_test = best_pipeline.predict(X_test)
test_acc = accuracy_score(y_test, y_pred_test)
print("The prediction accuracy on test set: {:.2f} %".format(test_acc * 100))

The prediction accuracy on test set: 76.67 %
