In [17]:
import numpy as np
import pandas as pd
import pickle
from sklearn.inspection import permutation_importance
from sklearn.metrics import make_scorer, mean_squared_error

In [2]:
with open('model/model.pickle', 'rb') as f:
    model = pickle.load(f)

In [5]:
feature_cols = ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
target_col = 'MedHouseVal'

In [7]:
df_importance = pd.DataFrame({
    'feature': feature_cols,
    'gain': model.feature_importances_
}).sort_values('gain', ascending=False)

df_importance

Unnamed: 0,feature,gain
0,MedInc,51570.542924
5,AveOccup,12306.917106
7,Longitude,11006.996057
6,Latitude,10759.996946
1,HouseAge,4437.360295
2,AveRooms,3125.337069
3,AveBedrms,1569.160782
4,Population,1399.797162


In [8]:
x_train = pd.read_csv('data/x_train.csv')
x_test = pd.read_csv('data/x_test.csv')
y_train = pd.read_csv('data/y_train.csv')
y_test = pd.read_csv('data/y_test.csv')

x_train.shape, x_test.shape, y_train.shape, y_test.shape

((14448, 8), (6192, 8), (14448, 1), (6192, 1))

In [9]:
x_train.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,1.975,52.0,2.8,0.7,193.0,4.825,36.73,-119.79
1,2.2604,43.0,3.67148,1.184116,836.0,3.018051,37.77,-122.21
2,6.299,17.0,6.478022,1.087912,1387.0,3.81044,33.87,-118.04
3,1.7199,17.0,2.518,1.196,3051.0,3.051,34.06,-118.28
4,2.2206,50.0,4.622754,1.161677,606.0,3.628743,36.73,-119.81


In [10]:
y_train.head()

Unnamed: 0,MedHouseVal
0,0.475
1,1.0
2,2.858
3,1.75
4,0.592


In [18]:
def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

mse_scorer = make_scorer(rmse)

pi = permutation_importance(
    model, x_train, y_train, scoring=mse_scorer,
    n_repeats=10, n_jobs=-1, random_state=0
)

In [21]:
df_pi = pd.DataFrame({
    'feature': feature_cols,
    'pi_mean': pi['importances_mean'],
    'pi_std': pi['importances_std']
}).sort_values('pi_mean')

df_pi

Unnamed: 0,feature,pi_mean,pi_std
6,Latitude,-1.003821,0.004385
7,Longitude,-0.910353,0.004251
0,MedInc,-0.522493,0.0043
5,AveOccup,-0.286416,0.005044
2,AveRooms,-0.234351,0.003603
1,HouseAge,-0.126653,0.002381
3,AveBedrms,-0.075737,0.00081
4,Population,-0.068257,0.000971
