In [1]:
from sklearn.datasets import load_boston
import sklearn.ensemble
import numpy as np

In [2]:
# Boston Dataset: 506 instances and 14 attributes 
#Attribute Information (in order):
    #        - CRIM     per capita crime rate by town
    #        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
    #        - INDUS    proportion of non-retail business acres per town
    #        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
    #        - NOX      nitric oxides concentration (parts per 10 million)
    #        - RM       average number of rooms per dwelling
    #        - AGE      proportion of owner-occupied units built prior to 1940
    #        - DIS      weighted distances to five Boston employment centres
    #        - RAD      index of accessibility to radial highways
    #        - TAX      full-value property-tax rate per $10,000
    #        - PTRATIO  pupil-teacher ratio by town
    #        - B        1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
    #        - LSTAT    % lower status of the population
    #        - MEDV     Median value of owner-occupied homes in $1000's

In [3]:
boston = load_boston()

rf = sklearn.ensemble.RandomForestRegressor(n_estimators=1000)

train, test, labels_train, labels_test = sklearn.model_selection.train_test_split(boston.data, boston.target, train_size=0.80)

rf.fit(train, labels_train)

print('Random Forest MSError', np.mean((rf.predict(test) - labels_test) ** 2))

print('MSError when predicting the mean', np.mean((labels_train.mean() - labels_test) ** 2))

Random Forest MSError 11.601019224803833
MSError when predicting the mean 84.55438956268226


In [4]:
categorical_features = np.argwhere(np.array([len(set(boston.data[:,x])) for x in range(boston.data.shape[1])]) <= 10).flatten()

In [5]:
import lime
import lime.lime_tabular

ModuleNotFoundError: No module named 'lime'

In [None]:
explainer = lime.lime_tabular.LimeTabularExplainer(train, feature_names=boston.feature_names, class_names=['price'], categorical_features=categorical_features, verbose=True, mode='regression')

i = 25
exp = explainer.explain_instance(test[i], rf.predict, num_features=5)

In [None]:
exp.show_in_notebook(show_table=True)

exp.as_list()