# Optimization using hyperopt

# Ensemble Regressor

In [None]:
import pandas as pd
import numpy as np
from mypipes_trees import *

In [None]:
# in_train=pd.read_csv('/content/insurance.csv')
# cat_vars=['sex','region','smoker']
# num_vars=['age','bmi','children']
# p1=pdPipeline([
#     ('cat_select',VarSelector(cat_vars)),
#     ('missing_trt',DataFrameImputer()),
#     ('create_dummies',get_dummies_Pipe(15))
# ])

# p2=pdPipeline([
#     ('num_select',VarSelector(num_vars)),
#     ('missing_trt',DataFrameImputer())
# ])

# data_pipe=FeatureUnion([
#     ('cat_pipe',p1),
#     ('num_pipe',p2)
# ])
# x_train=pd.DataFrame(data=data_pipe.fit_transform(in_train),
#                      columns=data_pipe.get_feature_names())
# y_train=in_train['charges']


In [None]:
!pip install hyperopt
from hyperopt import hp, fmin, tpe, Trials
from hyperopt.pyll.base import scope
from functools import partial
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, ExtraTreesRegressor
from sklearn.model_selection import cross_val_score



In [None]:
def optimize_ensemble(param, x, y):
  model = RandomForestRegressor(**param)
  return -np.mean(cross_val_score(model, x, y, cv = 10, n_jobs = -1, scoring = 'neg_mean_absolute_error'))


In [None]:
param_space = {
"max_depth": scope.int(hp.quniform("max_depth", 3, 15, 1)),
"n_estimators": scope.int(hp.quniform("n_estimators", 100, 1500, 1)),
'min_samples_split': scope.int(hp.quniform('min_samples_split', 1, 10, 1)),
'min_samples_leaf': scope.int(hp.quniform('min_samples_leaf', 1, 10, 1)),
"max_features": hp.uniform("max_features", 0.01, 1),
"ccp_alpha": hp.uniform("ccp_alpha", 0.01, 1),
'bootstrap': hp.choice('bootstrap', [True, False])
}

In [None]:
optimization_function = partial(
optimize_ensemble,
x=x_train,
y=y_train
)
# initialize trials to keep logging information
trials = Trials()
# run hyperopt
hopt = fmin(
fn=optimization_function,
space=param_space,
algo=tpe.suggest,
max_evals=15,
trials=trials
)
print(hopt)

100%|██████████| 15/15 [03:14<00:00, 12.94s/it, best loss: 2487.3314403926843]
{'bootstrap': 0, 'ccp_alpha': 0.01636239429699251, 'max_depth': 7.0, 'max_features': 0.8513851222127036, 'min_samples_leaf': 9.0, 'min_samples_split': 10.0, 'n_estimators': 212.0}


# Linear Models

In [None]:
from sklearn.linear_model import Lasso
def optimize_linear(param, x, y):
  model = Lasso(**param)
  return -np.mean(cross_val_score(model, x, y, cv = 10, n_jobs = -1, scoring = 'neg_mean_absolute_error'))


In [None]:
param_space = {
"alpha": hp.uniform("alpha", 0.01, 100),
'normalize': hp.choice('normalize', [True, False]),
'fit_intercept': hp.choice('fit_intercept', [True, False])
}

In [None]:
optimization_function = partial(
optimize_linear,
x=x_train,
y=y_train
)
# initialize trials to keep logging information
trials = Trials()
# run hyperopt
hopt = fmin(
fn=optimization_function,
space=param_space,
algo=tpe.suggest,
max_evals=15,
trials=trials
)
print(hopt)

100%|██████████| 15/15 [00:02<00:00,  6.93it/s, best loss: 4207.828696547045]
{'alpha': 23.9372676294255, 'fit_intercept': 0, 'normalize': 1}


# KNN Regressor

In [None]:
from sklearn.neighbors import KNeighborsRegressor
def optimize_knn(param, x, y):
  model = KNeighborsRegressor(**param)
  return -np.mean(cross_val_score(model, x, y, cv = 10, n_jobs = -1, scoring = 'neg_mean_absolute_error'))


In [None]:
param_space = {
"n_neighbors": scope.int(hp.quniform("n_neighbors", 3, 100, 2)),
'p': hp.choice('p', [2, 3]),
}

In [None]:
optimization_function = partial(
optimize_knn,
x=x_train,
y=y_train
)
# initialize trials to keep logging information
trials = Trials()
# run hyperopt
hopt = fmin(
fn=optimization_function,
space=param_space,
algo=tpe.suggest,
max_evals=15,
trials=trials
)
print(hopt)

100%|██████████| 15/15 [00:03<00:00,  4.62it/s, best loss: 8378.389123738776]
{'n_neighbors': 8.0, 'p': 1}


# SVR

In [None]:
from sklearn.svm import SVR
def optimize_svr(param, x, y):
  model = SVR(**param)
  return -np.mean(cross_val_score(model, x, y, cv = 10, n_jobs = -1, scoring = 'neg_mean_absolute_error'))


In [None]:
param_space = {
"epsilon": hp.uniform("epsilon", 0.01, 2),
'kernel': hp.choice('kernel', ['rbf', 'poly']),
'degree': scope.int(hp.quniform('degree', 3, 10, 1)),
"C": hp.uniform("C", 0.001, 1)
}

In [None]:
optimization_function = partial(
optimize_svr,
x=x_train,
y=y_train
)
# initialize trials to keep logging information
trials = Trials()
# run hyperopt
hopt = fmin(
fn=optimization_function,
space=param_space,
algo=tpe.suggest,
max_evals=15,
trials=trials
)
print(hopt)

100%|██████████| 15/15 [00:12<00:00,  1.24it/s, best loss: 7275.674405431926]
{'C': 0.9751350566630774, 'degree': 7.0, 'epsilon': 0.407010017688621, 'kernel': 1}


# Decision Tree

In [None]:
from sklearn.tree import DecisionTreeRegressor
def optimize_dtr(param, x, y):
  model = DecisionTreeRegressor(**param)
  return -np.mean(cross_val_score(model, x, y, cv = 10, n_jobs = -1, scoring = 'neg_mean_absolute_error'))


In [None]:
param_space = {
"max_depth": scope.int(hp.quniform("max_depth", 3, 15, 1)),
'min_samples_split': scope.int(hp.quniform('min_samples_split', 1, 10, 1)),
'min_samples_leaf': scope.int(hp.quniform('min_samples_leaf', 1, 10, 1)),
"ccp_alpha": hp.uniform("ccp_alpha", 0.01, 1),
'criterion': hp.choice('criterion', ['mse', 'friedman_mse']),
'splitter': hp.choice('splitter', ['best', 'random'])
}

In [None]:
optimization_function = partial(
optimize_dtr,
x=x_train,
y=y_train
)
# initialize trials to keep logging information
trials = Trials()
# run hyperopt
hopt = fmin(
fn=optimization_function,
space=param_space,
algo=tpe.suggest,
max_evals=15,
trials=trials
)
print(hopt)

100%|██████████| 15/15 [00:02<00:00,  6.51it/s, best loss: 2581.233894787322]
{'ccp_alpha': 0.6075098245079362, 'criterion': 0, 'max_depth': 5.0, 'min_samples_leaf': 10.0, 'min_samples_split': 9.0, 'splitter': 0}
