In [1]:
import pandas as pd
import lightgbm as lgb
from sklearn.metrics import mean_squared_error

In [2]:
%%capture
!pip install wandb

In [3]:
import wandb
from wandb.lightgbm import wandb_callback

wandb.login()

wandb: Currently logged in as: dbhadore (use `wandb login --relogin` to force relogin)


True

In [4]:
!curl https://raw.githubusercontent.com/microsoft/LightGBM/master/examples/regression/regression.train --output regression.train --silent
!curl https://raw.githubusercontent.com/microsoft/LightGBM/master/examples/regression/regression.test --output regression.test --silent

In [5]:
df_train = pd.read_csv('regression.train', header=None, sep='\t')
df_test = pd.read_csv('regression.test', header=None, sep='\t')

y_train = df_train[0]
y_test = df_test[0]
X_train = df_train.drop(0, axis=1)
X_test = df_test.drop(0, axis=1)

# create dataset for lightgbm
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)

In [6]:
# specify your configurations as a dict
params = {
    'boosting_type': 'gbdt',
    'objective': 'regression',
    'metric': ['rmse', 'l2', 'l1', 'huber'],
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbosity': 0
}

wandb.init(project='my-lightgbm-integration-dhiman', config=params);

# train 
# add lightgbm callback
gbm = lgb.train(params,
                lgb_train,
                num_boost_round=30,
                valid_sets=lgb_eval,
                valid_names=('validation'),
                callbacks=[wandb_callback()],
                early_stopping_rounds=5)

# predict
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
# eval
print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5)
wandb.log({'rmse_prediction': mean_squared_error(y_test, y_pred) ** 0.5})

You can set `force_col_wise=true` to remove the overhead.
[1]	validation's rmse: 0.494041	validation's l2: 0.244076	validation's l1: 0.493018	validation's huber: 0.122038
Training until validation scores don't improve for 5 rounds
[2]	validation's rmse: 0.490201	validation's l2: 0.240297	validation's l1: 0.489056	validation's huber: 0.120148
[3]	validation's rmse: 0.485523	validation's l2: 0.235733	validation's l1: 0.484089	validation's huber: 0.117866
[4]	validation's rmse: 0.480991	validation's l2: 0.231352	validation's l1: 0.479088	validation's huber: 0.115676
[5]	validation's rmse: 0.478476	validation's l2: 0.228939	validation's l1: 0.476159	validation's huber: 0.11447
[6]	validation's rmse: 0.475321	validation's l2: 0.22593	validation's l1: 0.472664	validation's huber: 0.112965
[7]	validation's rmse: 0.471715	validation's l2: 0.222515	validation's l1: 0.468425	validation's huber: 0.111258
[8]	validation's rmse: 0.468582	validation's l2: 0.219569	validation's l1: 0.464594	validatio