# Optimization using skopt 

# Ensemble Regressor(skopt)



In [None]:
import pandas as pd
import numpy as np

In [None]:
!pip install scikit-optimize
from skopt import gp_minimize
from skopt import space
from functools import partial
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, ExtraTreesRegressor
from sklearn.model_selection import cross_val_score



In [None]:
def optimize_ensemble(param, param_names, x, y):
  param = dict(zip(param_names, param))
  model = RandomForestRegressor(**param)
  return -np.mean(cross_val_score(model, x, y, cv = 10, n_jobs = -1, scoring = 'neg_mean_absolute_error'))


In [None]:
param_names = [
"max_depth",
"n_estimators",
'min_samples_split',
'min_samples_leaf',
"max_features",
'ccp_alpha',
'bootstrap'
]

param_space = [
space.Integer(3, 15, name="max_depth"),
space.Integer(100, 1500, name="n_estimators"),
space.Integer(1, 10, name='min_samples_split'),
space.Integer(1, 10, name='min_samples_leaf'),
space.Real(0.01, 1, prior="uniform", name="max_features"),
space.Real(0.01, 1, prior="uniform", name="ccp_alpha"),
space.Categorical([True, False], name = 'bootstrap')
]


In [None]:
optimization_function = partial(
optimize_ensemble,
param_names=param_names,
x=x_train,
y=y_train
)

result = gp_minimize(
optimization_function,
dimensions=param_space,
n_calls=30,
n_random_starts=20,
verbose=10
)

best_params = dict(
zip(
param_names,
result.x
)
)
print(best_params)

Iteration No: 1 started. Evaluating function at random point.
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 3.3689
Function value obtained: 3773.2624
Current minimum: 3773.2624
Iteration No: 2 started. Evaluating function at random point.
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 6.8884
Function value obtained: 2963.3407
Current minimum: 2963.3407
Iteration No: 3 started. Evaluating function at random point.
Iteration No: 3 ended. Evaluation done at random point.
Time taken: 26.1776
Function value obtained: 2524.0377
Current minimum: 2524.0377
Iteration No: 4 started. Evaluating function at random point.
Iteration No: 4 ended. Evaluation done at random point.
Time taken: 11.7575
Function value obtained: 4949.1737
Current minimum: 2524.0377
Iteration No: 5 started. Evaluating function at random point.
Iteration No: 5 ended. Evaluation done at random point.
Time taken: 17.3565
Function value obtained: 2625.9587
Current minimum: 2524.0377
It

# Linear Models (skopt)

In [None]:
from sklearn.linear_model import Lasso
def optimize_linear(param, param_names, x, y):
  param = dict(zip(param_names, param))
  model = Lasso(**param)
  return -np.mean(cross_val_score(model, x, y, cv = 10, n_jobs = -1, scoring = 'neg_mean_absolute_error'))


In [None]:
param_names = [
'alpha', 
'fit_intercept',
'normalize'
]

param_space = [
space.Real(0.01, 100, prior="uniform", name="alpha"),
space.Categorical([True, False], name = 'fit_intercept'),
space.Categorical([True, False], name = 'normalize')
]


In [None]:
optimization_function = partial(
optimize_linear,
param_names=param_names,
x=x_train,
y=y_train
)

result = gp_minimize(
optimization_function,
dimensions=param_space,
n_calls=15,
n_random_starts=10,
verbose=10
)

best_params = dict(
zip(
param_names,
result.x
)
)
print(best_params)

Iteration No: 1 started. Evaluating function at random point.
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.0632
Function value obtained: 4802.7715
Current minimum: 4802.7715
Iteration No: 2 started. Evaluating function at random point.
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.0564
Function value obtained: 5185.3528
Current minimum: 4802.7715
Iteration No: 3 started. Evaluating function at random point.
Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.0664
Function value obtained: 4804.0252
Current minimum: 4802.7715
Iteration No: 4 started. Evaluating function at random point.
Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.0584
Function value obtained: 4201.4513
Current minimum: 4201.4513
Iteration No: 5 started. Evaluating function at random point.
Iteration No: 5 ended. Evaluation done at random point.
Time taken: 0.0576
Function value obtained: 4801.2650
Current minimum: 4201.4513
Itera

# KNN Regressor

In [None]:
from sklearn.neighbors import KNeighborsRegressor
def optimize_knn(param, param_names, x, y):
  param = dict(zip(param_names, param))
  model = KNeighborsRegressor(**param)
  return -np.mean(cross_val_score(model, x, y, cv = 10, n_jobs = -1, scoring = 'neg_mean_absolute_error'))


In [None]:
param_names = [
'n_neighbors',
'p'
]

param_space = [
space.Integer(3, 100, name="n_neighbors"),
space.Categorical([2, 3], name = 'p')
]


In [None]:
optimization_function = partial(
optimize_knn,
param_names=param_names,
x=x_train,
y=y_train
)

result = gp_minimize(
optimization_function,
dimensions=param_space,
n_calls=50,
n_random_starts=40,
verbose=10
)

best_params = dict(
zip(
param_names,
result.x
)
)
print(best_params)

Iteration No: 1 started. Evaluating function at random point.
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.1424
Function value obtained: 8956.6780
Current minimum: 8956.6780
Iteration No: 2 started. Evaluating function at random point.
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.1889
Function value obtained: 9002.6658
Current minimum: 8956.6780
Iteration No: 3 started. Evaluating function at random point.
Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.0775
Function value obtained: 8845.8637
Current minimum: 8845.8637
Iteration No: 4 started. Evaluating function at random point.
Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.0691
Function value obtained: 8661.9856
Current minimum: 8661.9856
Iteration No: 5 started. Evaluating function at random point.
Iteration No: 5 ended. Evaluation done at random point.
Time taken: 0.0755
Function value obtained: 8845.8637
Current minimum: 8661.9856
Itera



Iteration No: 44 ended. Search finished for the next optimal point.
Time taken: 0.6678
Function value obtained: 7683.4501
Current minimum: 7223.5789
Iteration No: 45 started. Searching for the next optimal point.
Iteration No: 45 ended. Search finished for the next optimal point.
Time taken: 0.6856
Function value obtained: 7525.3617
Current minimum: 7223.5789
Iteration No: 46 started. Searching for the next optimal point.




Iteration No: 46 ended. Search finished for the next optimal point.
Time taken: 0.6964
Function value obtained: 7525.3617
Current minimum: 7223.5789
Iteration No: 47 started. Searching for the next optimal point.




Iteration No: 47 ended. Search finished for the next optimal point.
Time taken: 0.6836
Function value obtained: 7525.3617
Current minimum: 7223.5789
Iteration No: 48 started. Searching for the next optimal point.




Iteration No: 48 ended. Search finished for the next optimal point.
Time taken: 0.6719
Function value obtained: 7525.3617
Current minimum: 7223.5789
Iteration No: 49 started. Searching for the next optimal point.




Iteration No: 49 ended. Search finished for the next optimal point.
Time taken: 0.7124
Function value obtained: 7525.3617
Current minimum: 7223.5789
Iteration No: 50 started. Searching for the next optimal point.




Iteration No: 50 ended. Search finished for the next optimal point.
Time taken: 0.6904
Function value obtained: 7223.5789
Current minimum: 7223.5789
{'n_neighbors': 3, 'p': 2}


# SVR

In [None]:
from sklearn.svm import SVR
def optimize_svr(param, param_names, x, y):
  param = dict(zip(param_names, param))
  model = SVR(**param)
  return -np.mean(cross_val_score(model, x, y, cv = 10, n_jobs = -1, scoring = 'neg_mean_absolute_error'))


In [None]:
param_names = [
'kernel', 
'C',
'epsilon',
'degree'
]

param_space = [
space.Categorical(['poly', 'rbf'], name = 'Kernel'),
space.Real(0.001, 10, prior="uniform", name="C"),
space.Real(0.01, 2, prior="uniform", name="epsilon"),
space.Integer(3, 10, name = 'degree')
]


In [None]:
optimization_function = partial(
optimize_svr,
param_names=param_names,
x=x_train,
y=y_train
)

result = gp_minimize(
optimization_function,
dimensions=param_space,
n_calls=15,
n_random_starts=10,
verbose=10
)

best_params = dict(
zip(
param_names,
result.x
)
)
print(best_params)

Iteration No: 1 started. Evaluating function at random point.
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.6691
Function value obtained: 7152.8083
Current minimum: 7152.8083
Iteration No: 2 started. Evaluating function at random point.
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.6902
Function value obtained: 7139.4868
Current minimum: 7139.4868
Iteration No: 3 started. Evaluating function at random point.
Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.9092
Function value obtained: 7360.7458
Current minimum: 7139.4868
Iteration No: 4 started. Evaluating function at random point.
Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.7030
Function value obtained: 7421.5572
Current minimum: 7139.4868
Iteration No: 5 started. Evaluating function at random point.
Iteration No: 5 ended. Evaluation done at random point.
Time taken: 0.6540
Function value obtained: 6954.4183
Current minimum: 6954.4183
Itera

# Decision Tree

In [None]:
from sklearn.tree import DecisionTreeRegressor
def optimize_dtr(param, param_names, x, y):
  param = dict(zip(param_names, param))
  model = DecisionTreeRegressor(**param)
  return -np.mean(cross_val_score(model, x, y, cv = 10, n_jobs = -1, scoring = 'neg_mean_absolute_error'))


In [None]:
param_names = [
'criterion',
'splitter',
'max_depth',
'min_samples_split',
'min_samples_leaf',
'ccp_alpha'
]

param_space = [
space.Categorical(['mse', 'friedman_mse'], name = 'criterion'),
space.Categorical(['best', 'random'], name = 'splitter'),
space.Integer(3, 10, name = 'max_depth'),
space.Integer(1, 10, name='min_samples_split'),
space.Integer(1, 10, name='min_samples_leaf'),
space.Real(0.01, 1, prior='uniform', name = 'ccp_alpha')
]


In [None]:
optimization_function = partial(
optimize_dtr,
param_names=param_names,
x=x_train,
y=y_train
)

result = gp_minimize(
optimization_function,
dimensions=param_space,
n_calls=20,
n_random_starts=20,
verbose=10
)

best_params = dict(
zip(
param_names,
result.x
)
)
print(best_params)

# XGB

In [None]:
from xgboost import XGBRegressor
def optimize_xgb(param, param_names, x, y):
  param = dict(zip(param_names, param))
  model = XGBRegressor(**param)
  return -np.mean(cross_val_score(model, x, y, cv = 10, n_jobs = -1, scoring = 'neg_mean_absolute_error'))


In [None]:
param_names = [
"max_depth",
"n_estimators",
'eta',
'gamma',
"min_child_weight"
]

param_space = [
space.Integer(3, 15, name="max_depth"),
space.Integer(100, 1500, name="n_estimators"),
space.Real(0.01, 1, prior="uniform", name="eta"),
space.Real(0.01, 1, prior="uniform", name="gamma"),
space.Integer(1, 10, name='min_child_weight'),
]


In [None]:
optimization_function = partial(
optimize_xgb,
param_names=param_names,
x=x_train,
y=y_train
)

result = gp_minimize(
optimization_function,
dimensions=param_space,
n_calls=20,
n_random_starts=20,
verbose=10
)

best_params = dict(
zip(
param_names,
result.x
)
)
print(best_params)