In [1]:
from functools import partial
import pandas as pd
import numpy as np

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

from hyperopt import hp, fmin, tpe, Trials, STATUS_OK

from utils import HPOpt

# Load data

In [2]:
iris_data = load_iris()

col_names = ['_'.join(feat.split(' ')[:2]) for feat in iris_data['feature_names']]
X, y = pd.DataFrame(iris_data['data'],columns=col_names),pd.Series(iris_data['target'])
X_test, X_train, y_test, y_train = train_test_split(X, y, train_size=.8, random_state=42, stratify=y)


In [3]:
print(X.shape)
X.head()

(150, 4)


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


# Defining model parameters and training parameters

In [4]:
classifier_params_space = {
    'learning_rate':    hp.choice('learning_rate',    np.arange(0.05, .31, 0.05)),
    'max_depth':        hp.choice('max_depth',        np.arange(1, 4, 1, dtype=int)),
    'min_child_weight': hp.choice('min_child_weight', np.arange(1, 8, 1, dtype=int)),
    'min_child_samples':hp.choice('min_child_samples', np.arange(1, 10, 1, dtype=int)),
    'colsample_bytree': hp.choice('colsample_bytree', np.arange(0.3, 0.8, 0.1)),
    'subsample':        hp.uniform('subsample', 0.8, 1),
    'n_estimators':     hp.choice('n_estimators', np.arange(5, 20, 1)),
}
fit_params = {
    'eval_metric': 'logloss',
    'early_stopping_rounds': 3,
    'verbose': False
}
lgb_para = dict()
lgb_para['params_space'] = classifier_params_space
lgb_para['fit_params'] = fit_params
lgb_para['scoring_func'] = partial(f1_score, average='micro')



# First experiment 

In [5]:
obj = HPOpt(X_train, X_test, y_train, y_test, 'experiment 1')

hp_results = obj.process(space=lgb_para, trials=Trials(), algo=tpe.suggest, max_evals=20)

100%|██████████| 20/20 [00:02<00:00,  6.42it/s, best loss: 0.3114945837518382]


# Second experiment 
Lets add some other features

In [6]:
X['sepal_area'] = X.sepal_length*X.sepal_width
X['petal_area'] = X.petal_length*X.petal_width
X_test, X_train, y_test, y_train = train_test_split(X, y, train_size=.8, random_state=42, stratify=y)


In [7]:
obj = HPOpt(X_train, X_test, y_train, y_test, 'experiment 2')

hp_results = obj.process(space=lgb_para, trials=Trials(), algo=tpe.suggest, max_evals=20)

100%|██████████| 20/20 [00:02<00:00,  7.74it/s, best loss: 0.21655618619350206]
