In [28]:
import pandas as pd
import xgboost as xgb
import eli5 as eli

from sklearn.datasets import california_housing
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [2]:
features = california_housing.fetch_california_housing().get('data')
target = california_housing.fetch_california_housing().get('target')

In [4]:
X_train, X_test, Y_train, Y_test = train_test_split(features, target, train_size=0.70)

# Base Model - XGBoost

In [5]:
model = xgb.sklearn.XGBRegressor(max_depth=6, n_estimators=20)

In [6]:
# training the model
model.fit(X_train, Y_train)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.300000012, max_delta_step=0, max_depth=6,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=20, n_jobs=4, num_parallel_tree=1,
             objective='reg:squarederror', random_state=0, reg_alpha=0,
             reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact',
             validate_parameters=1, verbosity=None)

In [7]:
# testing the model
Y_pred = model.predict(X_test)
print(Y_pred)

[2.2353919 2.7690594 3.5250585 ... 2.352295  4.020177  1.4882139]


In [8]:
print(Y_test)

[3.75  3.446 4.153 ... 2.761 3.519 1.584]


In [11]:
metrics.r2_score(Y_test, Y_pred)

0.8081565146078142

# Explaining the Model

In [13]:
california_housing.fetch_california_housing().get('feature_names')

['MedInc',
 'HouseAge',
 'AveRooms',
 'AveBedrms',
 'Population',
 'AveOccup',
 'Latitude',
 'Longitude']

In [12]:
eli.xgboost.explain_weights(model)

Weight,Feature
0.5399,f0
0.1525,f5
0.0745,f7
0.0716,f1
0.0678,f6
0.0489,f2
0.0237,f3
0.0211,f4


In [14]:
eli.xgboost.explain_weights_xgboost(model)

Weight,Feature
0.5399,f0
0.1525,f5
0.0745,f7
0.0716,f1
0.0678,f6
0.0489,f2
0.0237,f3
0.0211,f4


In [16]:
import pandas as pd

In [19]:
pd.DataFrame(X_test)

Unnamed: 0,0,1,2,3,4,5,6,7
0,5.3269,19.0,9.142857,1.214286,101.0,3.607143,34.02,-117.53
1,5.1765,34.0,6.784173,0.971223,378.0,2.719424,37.29,-121.92
2,5.7979,52.0,5.981432,1.087533,823.0,2.183024,37.88,-122.26
3,1.6747,20.0,3.989648,1.082816,3555.0,3.680124,32.74,-117.10
4,4.4000,44.0,4.644068,1.016949,480.0,2.711864,33.88,-118.35
...,...,...,...,...,...,...,...,...
6188,4.7773,37.0,3.535461,0.929078,531.0,1.882979,33.74,-118.10
6189,1.5536,39.0,3.296593,1.096192,1339.0,2.683367,33.77,-118.18
6190,3.9038,35.0,4.535831,1.079805,1631.0,2.656352,34.18,-118.41
6191,5.2490,52.0,5.904255,0.992908,656.0,2.326241,33.73,-118.31


In [21]:
pd.DataFrame(X_test[0])

Unnamed: 0,0
0,5.3269
1,19.0
2,9.142857
3,1.214286
4,101.0
5,3.607143
6,34.02
7,-117.53


In [23]:
eli.xgboost.explain_prediction(model, X_test[0])

Contribution?,Feature
1.559,<BIAS>
0.561,x0
0.414,x2
0.012,x6
0.002,x1
-0.009,x3
-0.115,x4
-0.181,x7
-0.508,x5


In [22]:
eli.xgboost.explain_prediction_xgboost(model, X_test[0])

Contribution?,Feature
1.559,<BIAS>
0.561,x0
0.414,x2
0.012,x6
0.002,x1
-0.009,x3
-0.115,x4
-0.181,x7
-0.508,x5


In [25]:
pd.DataFrame(X_test[1])

Unnamed: 0,0
0,5.1765
1,34.0
2,6.784173
3,0.971223
4,378.0
5,2.719424
6,37.29
7,-121.92


In [24]:
eli.xgboost.explain_prediction(model, X_test[1])

Contribution?,Feature
1.559,<BIAS>
0.421,x0
0.206,x7
0.087,x6
0.079,x2
0.022,x1
-0.033,x3
-0.034,x4
-0.039,x5


In [26]:
pd.DataFrame(X_test[2])

Unnamed: 0,0
0,5.7979
1,52.0
2,5.981432
3,1.087533
4,823.0
5,2.183024
6,37.88
7,-122.26


In [27]:
eli.xgboost.explain_prediction(model, X_test[2])

Contribution?,Feature
1.559,<BIAS>
0.761,x0
0.444,x5
0.285,x1
0.103,x3
0.005,x4
-0.002,x7
-0.009,x2
-0.121,x6
