In [1]:
from sklearn.metrics import make_scorer, mean_squared_log_error
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import Pipeline

from bayes_opt import BayesianOptimization

import pandas as pd
import numpy as np
import dill
import time

seed = 101 # Lucky seed

In [2]:
def load_dill(fname):
    with open(fname, 'rb') as f:
        return dill.load(f)
    
def rmsle(y_true, y_pred):
    # Remember, we transformed price with log1p previously.
    return np.sqrt(mean_squared_log_error(np.expm1(y_true), np.expm1(y_pred)))

neg_rmsle = make_scorer(rmsle, greater_is_better=False)

Load data from mercari_process.ipynb.

In [3]:
data = load_dill('mercari.dill')
X_train, y_train = data['X_train'], data['y_train']

Remember the features are as follows:
- item_condition_id
- shipping
- cat_0
- cat_1
- cat_2
- brand_name

The rest are full descriptions transformed via lsa.

In [6]:
from catboost import CatBoostRegressor

def target(**params):
    learning_rate = params['learning_rate']
    iterations = int(params['iterations'])
    depth = int(params['depth'])
    model = CatBoostRegressor(logging_level='Silent',
                              iterations=iterations,
                              learning_rate=learning_rate,
                              depth=depth,
                              random_seed=seed)
    scores = cross_val_score(model, X_train, y_train, scoring=neg_rmsle, cv=3)
    return scores.mean()
    
params = {'learning_rate':(1e-4,1),
          'iterations':(10,300),
          'depth':(1,10)}
bo = BayesianOptimization(target, params, random_state=seed)
bo.gp.set_params(alpha=1e-8)
bo.maximize(init_points=5, n_iter=5, acq='ucb', kappa=5)

[31mInitialization[0m
[94m--------------------------------------------------------------------------[0m
 Step |   Time |      Value |     depth |   iterations |   learning_rate | 
    1 | 30m01s | [35m  -0.55789[0m | [32m   5.9880[0m | [32m    251.8301[0m | [32m         0.5164[0m | 
    2 | 13m23s |   -0.59087 |    4.1692 |      99.0202 |          0.5707 | 
    3 | 16m32s |   -0.67912 |    2.6370 |     269.1478 |          0.0286 | 
    4 | 43m53s |   -0.55971 |    8.0704 |     219.2477 |          0.1716 | 
    5 | 17m10s |   -0.57276 |    9.6893 |      65.0823 |          0.6853 | 
[31mBayesian Optimization[0m
[94m--------------------------------------------------------------------------[0m
 Step |   Time |      Value |     depth |   iterations |   learning_rate | 
    6 | 05m34s |   -3.06894 |    1.0000 |      10.0000 |          0.0001 | 
    7 | 41m58s |   -0.56062 |   10.0000 |     159.0267 |          1.0000 | 
    8 | 74m14s |   -0.56245 |   10.0000 |     300.0000 | 