In [1]:
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
from sklearn.datasets import fetch_california_housing
california_housing = fetch_california_housing(as_frame=True)

In [29]:
dataset = california_housing.frame
X = dataset.drop(['MedHouseVal'], axis=1)
y = dataset['MedHouseVal']
cal_features = california_housing.feature_names

In [4]:
def build_model(shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=shape),
        tf.keras.layers.Dense(40, activation=tf.nn.relu),
        tf.keras.layers.Dense(20, activation=tf.nn.relu),
        tf.keras.layers.Dense(1)
    ])

    model.compile(optimizer=tf.keras.optimizers.Adam(), 
                  loss='mse',
                  metrics=['mae', 'mse'])
    return model

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [6]:
(X_train.shape[1],)

(8,)

In [7]:
y_train.shape

(16512,)

In [8]:
model = build_model((X_train.shape[1],))
# X_train.shape
model.fit(X_train, y_train, epochs=30)

Epoch 1/30
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 59.7947 - mae: 3.8189 - mse: 59.7947
Epoch 2/30
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 2.6883 - mae: 1.1078 - mse: 2.6883
Epoch 3/30
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 8.2718 - mae: 1.4385 - mse: 8.2718
Epoch 4/30
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.7295 - mae: 0.9255 - mse: 1.7295
Epoch 5/30
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.1945 - mae: 0.7946 - mse: 1.1945
Epoch 6/30
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 60.7107 - mae: 3.6062 - mse: 60.7107
Epoch 7/30
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.8554 - mae: 0.7000 - mse: 0.8554
Epoch 8/30
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss:

<keras.src.callbacks.history.History at 0x1ac38a474c0>

In [9]:
df_perturb = X_test.copy()
df_perturb['MedInc'] = df_perturb['MedInc'].sample(frac=1.0).values

In [10]:
def compute_mse(preds, labels):
    return np.mean((preds - labels)**2)

In [18]:
model_rmse = 0.51
preds = model.predict(df_perturb.values)
feature_rmse = np.sqrt(compute_mse(preds.flatten(), y_test))
permutation_feature_importance = model_rmse - feature_rmse
permutation_feature_importance

[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


np.float64(-1.0169420403335876)

In [16]:
df_perturb.columns

Index(['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup',
       'Latitude', 'Longitude'],
      dtype='object')

In [14]:
preds.shape


(4128, 1)

In [15]:
y_test.shape

(4128,)

In [31]:
from sklearn.neural_network import MLPRegressor 
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline

mlp_reg = MLPRegressor(hidden_layer_sizes=[30, 20, 10, 5], max_iter=700)
transformer = ColumnTransformer([('numerical', MinMaxScaler(feature_range=(-1,1)), cal_features),])

mlp_pipeline = Pipeline(steps=[
    ('transform', transformer),
    ('model', mlp_reg)
])

mlp_pipeline.fit(X_train, y_train)

In [33]:
mlp_pipeline.score(X_test, y_test)

0.6766190927476115

In [32]:
from sklearn.inspection import permutation_importance
r = permutation_importance(mlp_pipeline, X_test, y_test,
                           n_repeats=30,
                           scoring=['r2'],
                           random_state=0)

In [39]:
r.keys()

dict_keys(['r2'])

In [56]:
result = r['r2']
result.keys()

dict_keys(['importances_mean', 'importances_std', 'importances'])

In [57]:
result.importances_mean

array([7.11315049e-01, 4.56463517e-02, 2.15880921e-03, 6.55836117e-03,
       1.63873924e-03, 8.34421412e-04, 2.72175878e+00, 2.53769390e+00])

In [58]:
for i in result.importances_mean.argsort()[::-1]:
    print(f'{cal_features[i]}: \t'
          f'{result.importances_mean[i]:.3f} +/- {result.importances_std[i]:.3f}')

Latitude: 	2.722 +/- 0.042
Longitude: 	2.538 +/- 0.036
MedInc: 	0.711 +/- 0.017
HouseAge: 	0.046 +/- 0.003
AveBedrms: 	0.007 +/- 0.001
AveRooms: 	0.002 +/- 0.001
Population: 	0.002 +/- 0.001
AveOccup: 	0.001 +/- 0.000
