### Sequential model-based optimization



* **HyperOpt**: Sequential model-based optimization is a Bayesian optimization technique that uses information from past trials to inform the next set of hyperparameters to explore, and there are two variants of this algorithm used in practice: (i) one based on the Gaussian process and the other on (ii) the Tree Parzen Estimator. The HyperOpt package implements the Tree Parzen Estimator algorithm.

*****************************

**Step 1:** Create the conda enviroments with the requirements in requirements.txt. Instructions [&lt;here&gt;](https://github.com/erikapat/python-tips/blob/master/Conda_enviroments.ipynb).

In [2]:
#!pip install numpy==1.13.1
#!conda install -c conda-forge hyperopt
#!pip install hyperopt==0.2.2
#!pip install scipy==0.19.1

In [22]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
#from sklearn.preprocessing import scale
from sklearn.preprocessing import normalize
from hyperopt import hp, tpe, fmin, Trials, STATUS_OK

In [34]:
from sklearn.naive_bayes import BernoulliNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

  from numpy.core.umath_tests import inner1d


## **KNeighborsClassifier**

In [3]:
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
y = iris.target
def hyperopt_train_test(params):
    clf = KNeighborsClassifier(**params)
    return cross_val_score(clf, X, y).mean()
space4knn = {
    'n_neighbors': hp.choice('n_neighbors', range(1,100))
}
def f(params):
    acc = hyperopt_train_test(params)
    return {'loss': -acc, 'status': STATUS_OK}
trials = Trials()
best = fmin(f, space4knn, algo=tpe.suggest, max_evals=100, trials=trials)

100%|██████████| 100/100 [00:01<00:00, 89.87it/s, best loss: -0.9869281045751634]


In [4]:
print('best:')
print(best)

best:
{'n_neighbors': 4}


## **SVC**

In [27]:
iris = datasets.load_iris()
X = iris.data
y = iris.target
def hyperopt_train_test(params):
    X_ = X[:]
    clf = SVC(**params)
    return cross_val_score(clf, X_, y).mean()
space4svm = {
    'C': hp.uniform('C', 0, 20),
    'kernel': hp.choice('kernel', ['linear', 'sigmoid', 'poly', 'rbf']),
    'gamma': hp.uniform('gamma', 0, 20),
}
def f(params):
    acc = hyperopt_train_test(params)
    return {'loss': -acc, 'status': STATUS_OK}
trials = Trials()
best = fmin(f, space4svm, algo=tpe.suggest, max_evals=100, trials=trials)
print('best:')
print(best)

100%|██████████| 100/100 [00:02<00:00, 32.10it/s, best loss: -0.9934640522875817]
best:
{'C': 1.432441579188317, 'gamma': 17.599407611852758, 'kernel': 0}


## **DecisionTreeClassifier**

In [29]:

iris = datasets.load_iris()
X_original = iris.data
y_original = iris.target
def hyperopt_train_test(params):
    X_ = X[:]
    clf = DecisionTreeClassifier(**params)
    return cross_val_score(clf, X, y).mean()
space4dt = {
    'max_depth': hp.choice('max_depth', range(1,20)),
    'max_features': hp.choice('max_features', range(1,5)),
    'criterion': hp.choice('criterion', ["gini", "entropy"]),
}
def f(params):
    acc = hyperopt_train_test(params)
    return {'loss': -acc, 'status': STATUS_OK}
trials = Trials()
best = fmin(f, space4dt, algo=tpe.suggest, max_evals=300, trials=trials)
print('best:')
print(best)


100%|██████████| 300/300 [00:01<00:00, 164.39it/s, best loss: -0.9738562091503268]
best:
{'criterion': 1, 'max_depth': 17, 'max_features': 2}


## **RandomForestClassifier**

In [38]:
iris = datasets.load_iris()
X_original = iris.data
y_original = iris.target
def hyperopt_train_test(params):
    X_ = X[:]
    clf = RandomForestClassifier(**params)
    return cross_val_score(clf, X, y).mean()
space4rf = {
    'max_depth': hp.choice('max_depth', range(1,20)),
    'max_features': hp.choice('max_features', range(1,5)),
    'n_estimators': hp.choice('n_estimators', range(1,20)),
    'criterion': hp.choice('criterion', ["gini", "entropy"]),
}
best = 0
def f(params):
    global best
    acc = hyperopt_train_test(params)
    if acc > best:
        best = acc
    #print('new best:', best, params)
    return {'loss': -acc, 'status': STATUS_OK}

trials = Trials()
best = fmin(f, space4rf, algo=tpe.suggest, max_evals=300, trials=trials)
print('best:')
print(best)

100%|██████████| 300/300 [00:12<00:00, 24.45it/s, best loss: -0.9738562091503268]
best:
{'criterion': 1, 'max_depth': 5, 'max_features': 3, 'n_estimators': 0}


## **ALL AT ONCE**

In [19]:
from sklearn.naive_bayes import BernoulliNB
from sklearn.svm import SVC

In [None]:
digits = datasets.load_digits()
X = digits.data
y = digits.target
print(X.shape, y.shape)
def hyperopt_train_test(params):
    t = params['type']
    del params['type']
    if t == 'naive_bayes':
        clf = BernoulliNB(**params)
    elif t == 'svm':
        clf = SVC(**params)
    elif t == 'dtree':
        clf = DecisionTreeClassifier(**params)
    elif t == 'knn':
        clf = KNeighborsClassifier(**params)
    else:
        return 0
    return cross_val_score(clf, X, y).mean()
space = hp.choice('classifier_type', [
    {
        'type': 'naive_bayes',
        'alpha': hp.uniform('alpha', 0.0, 2.0)
    },
    {
        'type': 'svm',
        'C': hp.uniform('C', 0, 10.0),
        'kernel': hp.choice('kernel', ['linear', 'rbf']),
        'gamma': hp.uniform('gamma', 0, 20.0)
    },
    {
        'type': 'randomforest',
        'max_depth': hp.choice('max_depth', range(1,20)),
        'max_features': hp.choice('max_features', range(1,5)),
        'n_estimators': hp.choice('n_estimators', range(1,20)),
        'criterion': hp.choice('criterion', ["gini", "entropy"]),
        'scale': hp.choice('scale', [0, 1]),
        'normalize': hp.choice('normalize', [0, 1])
    },
    {
        'type': 'knn',
        'n_neighbors': hp.choice('knn_n_neighbors', range(1,50))
    }
])
count = 0
best = 0
def f(params):
    global best, count
    count += 1
    acc = hyperopt_train_test(params.copy())
    if acc > best:
        print('new best:', acc, 'using', params['type'])
        best = acc
    if count % 50 == 0:
        print('iters:', count, ', acc:', acc, 'using', params)
    return {'loss': -acc, 'status': STATUS_OK}
trials = Trials()
best = fmin(f, space, algo=tpe.suggest, max_evals=1500, trials=trials)

(1797, 64) (1797,)
new best:                                             
0.9437997276286755                                    
using                                                 
svm                                                   
new best:                                                                       
0.968293886616605                                                               
using                                                                           
knn                                                                             
iters:                                                                          
50                                                                              
, acc:                                                                          
0.824145826346896                                                               
using                                                                           
{'alpha': 1.5328813416239089, 't

## REFERENCES:
1. [&lt;HyperOpt&gt;](https://blog.dominodatalab.com/hyperopt-bayesian-hyperparameter-optimization/?utm_source=house-list&utm_medium=email&utm_campaign=General&mkt_tok=eyJpIjoiTkdNeE9UZzJOV1l3Wm1SaiIsInQiOiJcL2VobUxHYnpHRHdaQWJ6eVwvNHZBSjVKVlJKbURwZ2wzdjliNHVtbVdTa2FGM0FVU2NkQVN6QVhFNHFOVnA4TTNVeDBLY2U0YlgwXC9KNFwvYlhIVFJyR0NsYTFscjh2TzR5MjB4T1UxSzgrdHM3eHNYcFRnZHdJZlpJZHB4bFNSdXAifQ%3D%3D).
* http://henrymossblog.blogspot.com/2017/02/bayesian-hyperparameter-tuning-with_16.html
* https://machinelearningmastery.com/what-is-bayesian-optimization/