In [8]:
import sys
sys.path.append("../../")

import warnings
warnings.filterwarnings("ignore")

import pandas as pd
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.preprocessing import StandardScaler

from holisticai.datasets import load_adult

# data and simple preprocessing
dataset = load_adult()['frame']
X = pd.get_dummies(dataset.iloc[:1000,:].drop(columns=['class']), drop_first=True)
scaler = StandardScaler()
X_standard = scaler.fit_transform(X)

y_clf = pd.DataFrame(dataset.iloc[:1000,:]['class'].apply(lambda x: 1 if x == '>50K' else 0))
y_reg = pd.DataFrame(dataset.iloc[:1000,:]['fnlwgt'])
y_reg = scaler.fit_transform(y_reg)

# instantiate and fit models

# regression
reg = LinearRegression()
reg.fit(X_standard, y_reg)

# classification
clf = LogisticRegression(random_state=42, max_iter=100)
clf.fit(X_standard, y_clf)

# import Explainer
from holisticai.explainability import Explainer

In [9]:
# instantiate explainer permutation
explainer = Explainer(based_on='feature_importance', 
                      strategy_type='permutation', 
                      model_type='binary_classification', 
                      model=clf, 
                      x=X_standard,
                      y=y_clf)

explainer.metrics()

Unnamed: 0,Value,Reference
Fourth Fifths,0.237113,0
Importance Spread Divergence,1.107565,-
Importance Spread Ratio,0.757894,0
Global Overlap Score [label=0],1.0,1
Global Overlap Score [label=1],1.0,1
Global Range Overlap Score [label=0],1.0,1
Global Range Overlap Score [label=1],1.0,1
Global Explainability Score,1.0,1


In [10]:
# instantiate explainer permutation
explainer = Explainer(based_on='feature_importance', 
                      strategy_type='permutation', 
                      model_type='regression', 
                      model=reg, 
                      x=X_standard, 
                      y=y_reg)

explainer.metrics()

Unnamed: 0,Value,Reference
Fourth Fifths,0.0,0
Importance Spread Divergence,4.563263,-
Importance Spread Ratio,0.002502,0
Global Overlap Score [Q0-Q1],0.0,1
Global Overlap Score [Q1-Q2],0.0,1
Global Overlap Score [Q2-Q3],0.0,1
Global Overlap Score [Q3-Q4],0.0,1
Global Range Overlap Score [Q0-Q1],0.987641,1
Global Range Overlap Score [Q1-Q2],0.967959,1
Global Range Overlap Score [Q2-Q3],0.936898,1


In [11]:
# instantiate explainer surrogate classification
explainer = Explainer(based_on='feature_importance', 
                      strategy_type='surrogate', 
                      model_type='binary_classification',
                      model=clf, 
                      x=X_standard, 
                      y=y_clf)

explainer.metrics()

Unnamed: 0,Value,Reference
Fourth Fifths,0.020619,0
Importance Spread Divergence,3.328649,-
Importance Spread Ratio,0.27238,0
Global Explainability Score,1.0,1
Surrogate Efficacy Classification,0.822,1


In [12]:
# instantiate explainer surrogate regression
explainer = Explainer(based_on='feature_importance', 
                      strategy_type='surrogate', 
                      model_type='regression', 
                      model=reg, 
                      x=X_standard, 
                      y=y_reg)

explainer.metrics()

Unnamed: 0,Value,Reference
Fourth Fifths,0.0,0
Importance Spread Divergence,4.574711,-
Importance Spread Ratio,0.0,0
Global Explainability Score,1.0,1
Surrogate Efficacy Regression,0.174344,0


In [13]:
# instantiate explainer lime classification
explainer = Explainer(based_on='feature_importance', 
                      strategy_type='lime', 
                      model_type='binary_classification', 
                      model=clf, 
                      x=X_standard, 
                      y=y_clf)

explainer.metrics()

Unnamed: 0,Value,Reference
Dataset Spread Stability,0.004298,0
Dataset Spread Mean,1.049769,0
Dataset Spread Ratio,0.998901,0
Features Spread Stability,0.345545,0
Features Spread Mean,0.109666,0
Features Spread Ratio,0.924466,0


In [14]:
# instantiate explainer lime regression
explainer = Explainer(based_on='feature_importance', 
                      strategy_type='lime', 
                      model_type='regression', 
                      model=clf, 
                      x=X_standard, 
                      y=y_reg)

explainer.metrics()

Unnamed: 0,Value,Reference
Dataset Spread Stability,0.004537,0
Dataset Spread Mean,1.112952,0
Dataset Spread Ratio,0.99884,0
Features Spread Stability,0.30676,0
Features Spread Mean,0.111707,0
Features Spread Ratio,0.932944,0
