In [12]:
# Taken from https://github.com/Microsoft/LightGBM/blob/master/examples/python-guide/simple_example.py
import numpy as np
import pandas as pd
import lightgbm as lgb

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV

print('Loading data...')
# load or create your dataset
df_train = pd.read_csv('./regression.train.txt', header=None, sep='\t')
df_test = pd.read_csv('./regression.test.txt', header=None, sep='\t')

y_train = df_train[0]
y_test = df_test[0]
X_train = df_train.drop(0, axis=1)
X_test = df_test.drop(0, axis=1)

print('Starting training...')
# train
gbm = lgb.LGBMRegressor(num_leaves=31,
                        learning_rate=0.05,
                        n_estimators=20)
gbm.fit(X_train, y_train,
        eval_set=[(X_test, y_test)],
        eval_metric='l1',
        early_stopping_rounds=5)

print('Starting predicting...')
# predict
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration_)
# eval
print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5)

# feature importances
print('Feature importances:', list(gbm.feature_importances_))


# self-defined eval metric
# f(y_true: array, y_pred: array) -> name: string, eval_result: float, is_higher_better: bool
# Root Mean Squared Logarithmic Error (RMSLE)
def rmsle(y_true, y_pred):
    return 'RMSLE', np.sqrt(np.mean(np.power(np.log1p(y_pred) - np.log1p(y_true), 2))), False


print('Starting training with custom eval function...')
# train
gbm.fit(X_train, y_train,
        eval_set=[(X_test, y_test)],
        eval_metric=rmsle,
        early_stopping_rounds=5)


# another self-defined eval metric
# f(y_true: array, y_pred: array) -> name: string, eval_result: float, is_higher_better: bool
# Relative Absolute Error (RAE)
def rae(y_true, y_pred):
    return 'RAE', np.sum(np.abs(y_pred - y_true)) / np.sum(np.abs(np.mean(y_true) - y_true)), False


print('Starting training with multiple custom eval functions...')
# train
gbm.fit(X_train, y_train,
        eval_set=[(X_test, y_test)],
        eval_metric=lambda y_true, y_pred: [rmsle(y_true, y_pred), rae(y_true, y_pred)],
        early_stopping_rounds=5)

print('Starting predicting...')
# predict
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration_)
# eval
print('The rmsle of prediction is:', rmsle(y_test, y_pred)[1])
print('The rae of prediction is:', rae(y_test, y_pred)[1])

# other scikit-learn modules
estimator = lgb.LGBMRegressor(num_leaves=31)

param_grid = {
    'learning_rate': [0.01, 0.1, 1],
    'n_estimators': [20, 40]
}

gbm = GridSearchCV(estimator, param_grid, cv=3)
gbm.fit(X_train, y_train)

print('Best parameters found by grid search are:', gbm.best_params_)

Loading data...
Starting training...
[1]	valid_0's l2: 0.242763	valid_0's l1: 0.491735
Training until validation scores don't improve for 5 rounds.
[2]	valid_0's l2: 0.237895	valid_0's l1: 0.486563
[3]	valid_0's l2: 0.233277	valid_0's l1: 0.481489
[4]	valid_0's l2: 0.22925	valid_0's l1: 0.476848
[5]	valid_0's l2: 0.226155	valid_0's l1: 0.47305
[6]	valid_0's l2: 0.222963	valid_0's l1: 0.469049
[7]	valid_0's l2: 0.220364	valid_0's l1: 0.465556
[8]	valid_0's l2: 0.217872	valid_0's l1: 0.462208
[9]	valid_0's l2: 0.215328	valid_0's l1: 0.458676
[10]	valid_0's l2: 0.212743	valid_0's l1: 0.454998
[11]	valid_0's l2: 0.210805	valid_0's l1: 0.452047
[12]	valid_0's l2: 0.208945	valid_0's l1: 0.449158
[13]	valid_0's l2: 0.206986	valid_0's l1: 0.44608
[14]	valid_0's l2: 0.205513	valid_0's l1: 0.443554
[15]	valid_0's l2: 0.203728	valid_0's l1: 0.440643
[16]	valid_0's l2: 0.201865	valid_0's l1: 0.437687
[17]	valid_0's l2: 0.200639	valid_0's l1: 0.435454
[18]	valid_0's l2: 0.199522	valid_0's l1: 0.433

In [19]:
import blazee
import os
import logging
logging.basicConfig(level=logging.INFO)
%load_ext autoreload
%autoreload 2

# Deploy the model on Blazee
api_key = os.environ['BLAZEE_API_KEY']
bz = blazee.Blazee(api_key)
bm = bz.deploy_model(gbm, model_name="SK LightGBM Regressor")
bm

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


INFO:root:Uploading model version to Blazee  (108.8 KB)...
INFO:root:Deploying new model version: v1...
INFO:root:Successfully deployed model version 0ecf3524-a003-4367-be6b-79ce3856d989


<BlazeeModel 'SK LightGBM Regressor'
	id=2adba5ba-9a0c-4571-9072-9ba0f18468da>

In [21]:
# Predictions from Blazee model
[p.prediction for p in bm.predict_batch(X_test)]

[0.8257646282148502,
 0.4123071018043463,
 0.24269368102594482,
 0.4696884764490895,
 0.24659772613405778,
 0.33670517896588925,
 0.36510372822253734,
 0.384108996126978,
 0.7135510119323984,
 0.36821617725769756,
 0.618979758082155,
 0.7787357351263059,
 0.7583265448995183,
 0.7469671278799553,
 0.46549842187233004,
 0.766383794393904,
 0.39381455084446637,
 0.6029948474423068,
 0.6134054497397179,
 0.5399952990183657,
 0.7392494515151498,
 0.6492501428744724,
 0.5316622297336435,
 0.4655239343539504,
 0.5003303077752469,
 0.6289086395189045,
 0.5961926540349629,
 0.8153805638279291,
 0.4501762904530452,
 0.7867929020048664,
 0.6289618620986811,
 0.6653706406929076,
 0.33620955250842777,
 0.6021165275273311,
 0.4827693556053621,
 0.28082298844689957,
 0.2672589691845787,
 0.10695389007437166,
 0.5296159758279994,
 0.8082771894726081,
 0.08489917657112316,
 0.704823865967765,
 0.3063660357841089,
 0.4148757439305074,
 0.2866171410703307,
 -0.04961621997673224,
 0.5774784170012971,
 0.4

In [22]:
gbm.predict(X_test)

array([ 0.82576463,  0.4123071 ,  0.24269368,  0.46968848,  0.24659773,
        0.33670518,  0.36510373,  0.384109  ,  0.71355101,  0.36821618,
        0.61897976,  0.77873574,  0.75832654,  0.74696713,  0.46549842,
        0.76638379,  0.39381455,  0.60299485,  0.61340545,  0.5399953 ,
        0.73924945,  0.64925014,  0.53166223,  0.46552393,  0.50033031,
        0.62890864,  0.59619265,  0.81538056,  0.45017629,  0.7867929 ,
        0.62896186,  0.66537064,  0.33620955,  0.60211653,  0.48276936,
        0.28082299,  0.26725897,  0.10695389,  0.52961598,  0.80827719,
        0.08489918,  0.70482387,  0.30636604,  0.41487574,  0.28661714,
       -0.04961622,  0.57747842,  0.47075764,  0.89740194,  0.47968334,
        1.02339966,  0.39447373,  0.13866534,  0.8374728 ,  0.78923258,
        0.49993217,  0.14629382,  0.38747214,  0.61929202,  0.79523654,
        0.37456597,  0.83894295,  0.81924898,  0.32747004,  0.78078941,
        0.28393855,  0.52053557,  0.15417987,  0.87339303,  0.80