# LightGBM & ipywidgets을 통한 시각화
* LightGBM 공식 github에 있는 파일
https://github.com/microsoft/LightGBM/blob/master/examples/python-guide/notebooks/interactive_plot_example.ipynb
* datafile: https://github.com/microsoft/LightGBM/blob/master/examples/regression/regression.train

In [None]:
!pip install lightgbm --upgrade   # 2.2.3 ---> 3.21

## Load libraries

In [1]:
import pandas as pd
import lightgbm as lgb

import matplotlib.pyplot as plt

%matplotlib inline

try:
    # To enable interactive mode you should install ipywidgets
    # https://github.com/jupyter-widgets/ipywidgets
    from ipywidgets import interact, SelectMultiple
    INTERACTIVE = True
    !jupyter nbextension enable --py widgetsnbextension
    import cufflinks as cf
    cf.go_offline(connected=True)
    print('INTERACTIVE On')
except ImportError:
    INTERACTIVE = False
    print('INTERACTIVE Off')

print('INTERACTIVE: ',INTERACTIVE)

Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m


INTERACTIVE On
INTERACTIVE:  True


## Load data

In [2]:
df_train = pd.read_csv('regression.train', header=None, sep='\t')
df_test = pd.read_csv('regression.test', header=None, sep='\t')

df_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28
0,1,0.869,-0.635,0.226,0.327,-0.69,0.754,-0.249,-1.092,0.0,1.375,-0.654,0.93,1.107,1.139,-1.578,-1.047,0.0,0.658,-0.01,-0.046,3.102,1.354,0.98,0.978,0.92,0.722,0.989,0.877
1,1,0.908,0.329,0.359,1.498,-0.313,1.096,-0.558,-1.588,2.173,0.813,-0.214,1.271,2.215,0.5,-1.261,0.732,0.0,0.399,-1.139,-0.001,0.0,0.302,0.833,0.986,0.978,0.78,0.992,0.798
2,1,0.799,1.471,-1.636,0.454,0.426,1.105,1.282,1.382,0.0,0.852,1.541,-0.82,2.215,0.993,0.356,-0.209,2.548,1.257,1.129,0.9,0.0,0.91,1.108,0.986,0.951,0.803,0.866,0.78
3,0,1.344,-0.877,0.936,1.992,0.882,1.786,-1.647,-0.942,0.0,2.423,-0.676,0.736,2.215,1.299,-1.431,-0.365,0.0,0.745,-0.678,-1.36,0.0,0.947,1.029,0.999,0.728,0.869,1.027,0.958
4,1,1.105,0.321,1.522,0.883,-1.205,0.681,-1.07,-0.922,0.0,0.801,1.021,0.971,2.215,0.597,-0.35,0.631,0.0,0.48,-0.374,0.113,0.0,0.756,1.361,0.987,0.838,1.133,0.872,0.808


In [3]:
y_train = df_train[0]   # 0 또는 1
y_test = df_test[0]     # O 또는 1
X_train = df_train.drop(0, axis=1)  # (제일 왼쪽에 있는) 정답 label 제거
X_test = df_test.drop(0, axis=1)
print(X_train.shape,y_train.shape, X_test.shape, y_test.shape)

(7000, 28) (7000,) (500, 28) (500,)


## Create Dataset object for LightGBM

In [4]:
lgb_train = lgb.Dataset(X_train, y_train)
lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train)

## Configuration dictionary

In [5]:
params = {
    'num_leaves': 5,
    'metric': ['l1', 'l2'],
    'verbose': -1
}

## Training

In [6]:
evals_result = {}  # to record eval results for plotting
gbm = lgb.train(params,
                lgb_train,
                num_boost_round=100,
                valid_sets=[lgb_train, lgb_test],
                feature_name=['f' + str(i + 1) for i in range(X_train.shape[-1])],
                categorical_feature=[21],
                evals_result=evals_result,
                verbose_eval=10)

[10]	training's l1: 0.457448	training's l2: 0.217995	valid_1's l1: 0.456464	valid_1's l2: 0.21641
[20]	training's l1: 0.436869	training's l2: 0.205099	valid_1's l1: 0.434057	valid_1's l2: 0.201616
[30]	training's l1: 0.421302	training's l2: 0.197421	valid_1's l1: 0.417019	valid_1's l2: 0.192514
[40]	training's l1: 0.411107	training's l2: 0.192856	valid_1's l1: 0.406303	valid_1's l2: 0.187258
[50]	training's l1: 0.403695	training's l2: 0.189593	valid_1's l1: 0.398997	valid_1's l2: 0.183688
[60]	training's l1: 0.398704	training's l2: 0.187043	valid_1's l1: 0.393977	valid_1's l2: 0.181009
[70]	training's l1: 0.394876	training's l2: 0.184982	valid_1's l1: 0.389805	valid_1's l2: 0.178803
[80]	training's l1: 0.391147	training's l2: 0.1828	valid_1's l1: 0.386476	valid_1's l2: 0.176799
[90]	training's l1: 0.388101	training's l2: 0.180817	valid_1's l1: 0.384404	valid_1's l2: 0.175775
[100]	training's l1: 0.385174	training's l2: 0.179171	valid_1's l1: 0.382929	valid_1's l2: 0.175321



categorical_feature in Dataset is overridden.
New categorical_feature is [21]


Overriding the parameters from Reference Dataset.


categorical_column in param dict is overridden.



## Plot metrics recorded during training

In [7]:
def render_metric(metric_name):
    ax = lgb.plot_metric(evals_result, metric=metric_name, figsize=(10, 5))
    plt.show()

In [8]:
print(INTERACTIVE)

True


In [9]:
if INTERACTIVE:
    # create widget to switch between metrics
    print('widget!!!!')
    interact(render_metric, metric_name=params['metric'])
else:
    render_metric(params['metric'][0])

widget!!!!


interactive(children=(Dropdown(description='metric_name', options=('l1', 'l2'), value='l1'), Output()), _dom_c…

## Plot feature importances

In [10]:
def render_plot_importance(importance_type, max_features=10,
                           ignore_zero=True, precision=3):
    ax = lgb.plot_importance(gbm, importance_type=importance_type,
                             max_num_features=max_features,
                             ignore_zero=ignore_zero, figsize=(12, 8),
                             precision=precision)
    plt.show()

In [11]:
if INTERACTIVE:
    # create widget for interactive feature importance plot
    interact(render_plot_importance,
             importance_type=['split', 'gain'],
             max_features=(1, X_train.shape[-1]),
             precision=(0, 10))
else:
    render_plot_importance(importance_type='split')

interactive(children=(Dropdown(description='importance_type', options=('split', 'gain'), value='split'), IntSl…

## Plot split value histogram

In [12]:
def render_histogram(feature):
    ax = lgb.plot_split_value_histogram(gbm, feature=feature,
                                        bins='auto', figsize=(10, 5))
    plt.show()

In [13]:
if INTERACTIVE:
    # create widget for interactive split value histogram
    interact(render_histogram,
             feature=gbm.feature_name())
else:
    render_histogram(feature='f26')

interactive(children=(Dropdown(description='feature', options=('f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8',…

## Plot trees

In [14]:
def render_tree(tree_index, show_info, precision=3):
    show_info = None if 'None' in show_info else show_info
    return lgb.create_tree_digraph(gbm, tree_index=tree_index,
                                   show_info=show_info, precision=precision)

In [15]:
# https://graphviz.org/download/ --> 설치 후, path 등록

if INTERACTIVE:
    # create widget to switch between trees and control info in nodes
    interact(render_tree,
             tree_index=(0, gbm.num_trees() - 1),
             show_info=SelectMultiple(  # allow multiple values to be selected
                 options=['None',
                          'split_gain',
                          'internal_value',
                          'internal_count',
                          'internal_weight',
                          'leaf_count',
                          'leaf_weight',
                          'data_percentage'],
                 value=['None']),
             precision=(0, 10))
    tree = None
else:
    tree = render_tree(53, ['None'])
tree

interactive(children=(IntSlider(value=49, description='tree_index', max=99), SelectMultiple(description='show_…

In [20]:
lgb.__version__

'2.2.3'

In [21]:
!pip install lightgbm --upgrade

Collecting lightgbm
  Downloading lightgbm-3.2.1-py3-none-manylinux1_x86_64.whl (2.0 MB)
[K     |████████████████████████████████| 2.0 MB 31.3 MB/s 
Installing collected packages: lightgbm
  Attempting uninstall: lightgbm
    Found existing installation: lightgbm 2.2.3
    Uninstalling lightgbm-2.2.3:
      Successfully uninstalled lightgbm-2.2.3
Successfully installed lightgbm-3.2.1
