# Import

* https://github.com/SauceCat/PDPbox
* http://scikit-learn.org/stable/auto_examples/ensemble/plot_partial_dependence.html
* https://christophm.github.io/interpretable-ml-book/pdp.html

In [1]:
import logging

import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from sklearn.datasets.california_housing import fetch_california_housing
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble.partial_dependence import (partial_dependence,
                                                 plot_partial_dependence)
from sklearn.model_selection import train_test_split

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

  from numpy.core.umath_tests import inner1d


# Build GBR Model

In [2]:
cal_housing = fetch_california_housing()

# split 80/20 train-test
X_train, X_test, y_train, y_test = train_test_split(
    cal_housing.data, cal_housing.target, test_size=0.2, random_state=1)
names = cal_housing.feature_names

logger.info("Training GBRT...")
clf = GradientBoostingRegressor(
    n_estimators=100,
    max_depth=4,
    learning_rate=0.1,
    loss='huber',
    random_state=1)
clf.fit(X_train, y_train)
logger.info("done.")

In [3]:
cal_housing

{'data': array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
           37.88      , -122.23      ],
        [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
           37.86      , -122.22      ],
        [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
           37.85      , -122.24      ],
        ...,
        [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
           39.43      , -121.22      ],
        [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
           39.43      , -121.32      ],
        [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
           39.37      , -121.24      ]]),
 'target': array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]),
 'feature_names': ['MedInc',
  'HouseAge',
  'AveRooms',
  'AveBedrms',
  'Population',
  'AveOccup',
  'Latitude',
  'Longitude'],
 'DESCR': 'California housing dataset.\n\nThe original database is available from StatLib\n\n 

# Convenience plot with ``partial_dependence_plots``

In [4]:
import ipywidgets as widgets
from IPython.display import HTML, Javascript, clear_output, display


def on_pdp_plot(f1, f2):
    logger.info('Custom 3d plot via ``partial_dependence``')
    plt.rcParams['figure.figsize'] = [13, 5]  # [width, height]
    
    features = (f1, f2, [f1, f2])
    fig, axs = plot_partial_dependence(
        clf, X_train, features, feature_names=names, n_jobs=3, grid_resolution=50)
    fig.suptitle('Partial dependence of house value on nonlocation features\n'
                 'for the California housing dataset')
    plt.subplots_adjust(top=0.9)  # tight_layout causes overlap with suptitle

    fig = plt.figure()

    target_feature = (f1, f2)
    pdp, axes = partial_dependence(
        clf, target_feature, X=X_train, grid_resolution=50)
    XX, YY = np.meshgrid(axes[0], axes[1])
    Z = pdp[0].reshape(list(map(np.size, axes))).T
    ax = Axes3D(fig)
    surf = ax.plot_surface(
        XX, YY, Z, rstride=1, cstride=1, cmap=plt.cm.BuPu, edgecolor='k')
    ax.set_xlabel(names[target_feature[0]])
    ax.set_ylabel(names[target_feature[1]])
    ax.set_zlabel('Partial dependence')
    #  pretty init view
    ax.view_init(elev=22, azim=122)
    plt.colorbar(surf)
    plt.suptitle('Partial dependence of house value')
    plt.subplots_adjust(top=1.2)

    # plt.show()
    # display(fig)


picker_w = widgets.interact(
    on_pdp_plot, f1=range(0,8), f2=range(0,8))

interactive(children=(Dropdown(description='f1', options=(0, 1, 2, 3, 4, 5, 6, 7), value=0), Dropdown(descript…