# Bayesian optimization

With baselines ready it's now time to find optimal hyperparams for LightGBM model 

In [1]:
import os
import skopt
import pandas as pd
from importlib.machinery import SourceFileLoader

In [2]:
output_data_path = os.path.join("..", "data", "final")

## 1. Load data

In [3]:
x_train = pd.read_csv(os.path.join(output_data_path, 'x_train.csv'))
y_train = pd.read_csv(os.path.join(output_data_path, 'y_train.csv'))

In [4]:
x_train.head()

Unnamed: 0,start_lat,start_long,start_alt,end_lat,end_long,end_alt,distance
0,37.789756,-122.394643,3.633262,37.776619,-122.417385,16.152735,2.476302
1,37.776619,-122.417385,16.152735,37.786305,-122.404966,12.183614,1.533845
2,37.795392,-122.394203,2.596962,37.80477,-122.403234,2.659792,1.310756
3,37.771058,-122.402717,3.03987,37.790302,-122.390637,2.818376,2.389468
4,37.786978,-122.398108,7.070823,37.776617,-122.39526,2.990568,1.179335


## 2. Define search space

In [5]:
static_params = {
    'objective': 'regression',
    'metric': 'l2',
    'boosting': 'gbdt',
    'random_state': 42,
    'verbosity': -1
}

opt_params = [
    skopt.space.Real(0.01, 0.1, name='learning_rate', prior='log-uniform'),
    skopt.space.Integer(2, 50, name='num_leaves'),
    skopt.space.Integer(2, 10, name='max_depth'),
    skopt.space.Integer(10, 50, name='min_data_in_leaf'),
    skopt.space.Real(0.8, 1.0, name='bagging_fraction', prior='log-uniform'),
    skopt.space.Integer(0, 5, name='bagging_freq')
]

## 3. Define objective function

In [6]:
utils = SourceFileLoader('utils', os.path.join("..", "app", "backend", 'utils.py')).load_module()

In [7]:
@skopt.utils.use_named_args(opt_params)
def objective(**params):
  return utils.lgbm_regression_cv(x_train.values, y_train.values, {**params, **static_params}, cv=5)

## 4. Callbacks

In [8]:
early_stopping = skopt.callbacks.EarlyStopper()
verbosity = skopt.callbacks.VerboseCallback(n_total=1)

Iteration No: 1 started. Searching for the next optimal point.


## 5. Run search


In [9]:
results = skopt.gp_minimize(objective, opt_params, callback=[verbosity])

Iteration No: 1 ended. Search finished for the next optimal point.
Time taken: 138.9630
Function value obtained: 3.7294
Current minimum: 3.7294
Iteration No: 2 ended. Search finished for the next optimal point.
Time taken: 291.0264
Function value obtained: 3.6959
Current minimum: 3.6959
Iteration No: 3 ended. Search finished for the next optimal point.
Time taken: 374.9009
Function value obtained: 3.6612
Current minimum: 3.6612
Iteration No: 4 ended. Search finished for the next optimal point.
Time taken: 522.4568
Function value obtained: 3.6835
Current minimum: 3.6612
Iteration No: 5 ended. Search finished for the next optimal point.
Time taken: 581.8512
Function value obtained: 3.8562
Current minimum: 3.6612
Iteration No: 6 ended. Search finished for the next optimal point.
Time taken: 644.7115
Function value obtained: 3.6714
Current minimum: 3.6612
Iteration No: 7 ended. Search finished for the next optimal point.
Time taken: 779.1050
Function value obtained: 3.7200
Current minimum:

In [11]:
skopt.dump(results, os.path.join(output_data_path, 'gp_optim_results.pkl'), store_objective=False)