In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import xgboost as xgb

## Data

In [2]:
X, y = make_regression(n_samples=1_000_000, n_features=10, n_targets=1, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [3]:
feature_names = [f'feat{i}' for i in range(1, X.shape[1]+1)]

## Model Setup and Training

In [4]:
xgb_model = xgb.XGBRegressor(n_estimators=100, max_depth=3, n_jobs=-1, random_state=0)
xgb_model.fit(X_train, y_train)

## Evaluate

In [5]:
y_pred = xgb_model.predict(X_test)
print(f"MSE:{mean_squared_error(y_test, y_pred)}")
print(f"R2:{r2_score(y_test, y_pred)}")

MSE:581.5312491959091
R2:0.9864208364342159


## Insample, model-specific feature importance

In [7]:
xgb_model.feature_importances_

array([0.20755406, 0.14071487, 0.00143316, 0.01944364, 0.1161005 ,
       0.10012282, 0.12812941, 0.01756507, 0.11897319, 0.14996333],
      dtype=float32)

## Model fingerprint

In [8]:
from model_fingerprint import ModelFingerprint

In [10]:
mf = ModelFingerprint()
mf.explain(model=xgb_model, 
           explained_data=X_test, 
           feature_names=feature_names, 
           pairwise_combinations=[('feat1', 'feat2'), ('feat2', 'feat3'), ('feat5', 'feat6'), 
                                  ('feat7', 'feat10'), ('feat3', 'feat1'), ('feat4', 'feat8'), 
                                  ('feat1', 'feat5'), ('feat9', 'feat10'), ('feat2', 'feat4')])

Output()

In [11]:
mf.lin_nonlin_effect

Unnamed: 0,linear_effect,nonlinear_effect
feat1,81.10831,4.795171
feat2,62.63687,3.856829
feat3,0.98707,0.24984
feat4,15.709137,1.340632
feat5,61.603147,4.011959
feat6,53.513274,3.456115
feat7,59.951516,3.413286
feat8,14.595197,1.233599
feat9,61.249386,4.27056
feat10,69.403883,4.15753


In [12]:
mf.pairwise_effect

Unnamed: 0,feat_x,feat_y,pairwise_effect
0,feat1,feat2,2.90141
1,feat2,feat3,0.009259
2,feat5,feat6,1.555042
3,feat7,feat10,0.559223
4,feat3,feat1,0.017416
5,feat4,feat8,0.368591
6,feat1,feat5,3.642698
7,feat9,feat10,4.811069
8,feat2,feat4,0.021311


In [13]:
mf.plot()