# Partial Dependence Plots

In [None]:
%matplotlib inline

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, roc_curve, roc_auc_score
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import datasets
#import seaborn as sns

# these are new imports for the partial dependence plots
from sklearn.ensemble.partial_dependence import plot_partial_dependence
from sklearn.ensemble.partial_dependence import partial_dependence
from sklearn.datasets.california_housing import fetch_california_housing
from mpl_toolkits.mplot3d import Axes3D

You can read more on partial dependence plots [here](http://scikit-learn.org/stable/auto_examples/ensemble/plot_partial_dependence.html).  Let's load a dataset on California housing broken out by census block.  Info on the dataset can be found here: [http://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html](http://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html)

In [None]:
cal_housing = datasets.california_housing.fetch_california_housing()

In [None]:
names = cal_housing.feature_names
names

The target is the average home values in the census block in units of $100k.

In [None]:
cal_housing.target

In [None]:
cal_housing.data

We'll make a train/test split and fit a gradient boosting trees regressor to the data.

In [None]:
X_train, X_test, y_train, y_test = train_test_split(cal_housing.data, cal_housing.target, test_size=0.2)

In [None]:
gbt = GradientBoostingRegressor(n_estimators=100, max_depth=4, learning_rate=0.1, loss='ls')
gbt.fit(X_train, y_train)

The `plot_partial_dependence` function will plot several partial dependence plots for a given set of predictors.

In [None]:
# these are the predictors; note the tuple at the end, which will give us a 2D plot 
features = [0, 5, 1, 2, (5, 1)]

fig, axs = plot_partial_dependence(gbt, X_train, features, feature_names=names, n_jobs=4, grid_resolution=50)
fig.suptitle('Partial dependence of house value on nonlocation features\n'
             'for the California housing dataset')
plt.subplots_adjust(top=0.9)
fig = plt.figure()