In [9]:
import pandas as pd
import lightgbm as lgb
from sklearn.metrics import mean_squared_error

#### Install Weights & Biases

In [10]:
%%capture
!pip install wandb

#### Login wandb

In [11]:
import wandb
from wandb.lightgbm import wandb_callback

wandb.login()

True

#### Load training and test data

In [12]:
!curl https://raw.githubusercontent.com/microsoft/LightGBM/master/examples/regression/regression.train --output regression.train --silent
!curl https://raw.githubusercontent.com/microsoft/LightGBM/master/examples/regression/regression.test --output regression.test --silent

In [13]:
df_train = pd.read_csv('regression.train', header=None, sep='\t')
df_test = pd.read_csv('regression.test', header=None, sep='\t')

y_train = df_train[0]
y_test = df_test[0]
X_train = df_train.drop(0, axis=1)
X_test = df_test.drop(0, axis=1)

# create dataset for lightgbm
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)

#### 1. wandb.init : initialize wandb
#### 2. wandb_callback(): for integration
#### 3. wandb.log : log results to be seen in UI

In [14]:
# specify your configurations as a dict
params = {
    'boosting_type': 'gbdt',
    'objective': 'regression',
    'metric': ['rmse', 'l2', 'l1', 'huber'],
    'num_leaves': 31,
    'learning_rate': 0.06,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbosity': 0
}

wandb.init(project='gbm-dhiman', config=params);

# train 
# add lightgbm callback
gbm = lgb.train(params,
                lgb_train,
                num_boost_round=30,
                valid_sets=lgb_eval,
                valid_names=('validation'),
                callbacks=[wandb_callback()],
                early_stopping_rounds=5)

# predict
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
# eval
print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5)
wandb.log({'Prediction': mean_squared_error(y_test, y_pred) ** 0.5})

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[1]	validation's rmse: 0.493229	validation's l2: 0.243275	validation's l1: 0.492165	validation's huber: 0.121637
Training until validation scores don't improve for 5 rounds
[2]	validation's rmse: 0.488682	validation's l2: 0.23881	validation's l1: 0.487438	validation's huber: 0.119405
[3]	validation's rmse: 0.483228	validation's l2: 0.233509	validation's l1: 0.48156	validation's huber: 0.116755
[4]	validation's rmse: 0.478201	validation's l2: 0.228676	validation's l1: 0.475989	validation's huber: 0.114338
[5]	validation's rmse: 0.475061	validation's l2: 0.225683	validation's l1: 0.472263	validation's huber: 0.112842
[6]	validation's rmse: 0.471554	validation's l2: 0.222363	validation's l1: 0.468271	validation's huber: 0.111182
[7]	validation's rmse: 0.467668	validation's l2: 0.218713	validation's l1: 0.463499	validation's huber: 0.109357
[8]	validation's rmse: 0.4641