In [1]:
import ethik
import lightgbm as lgb
import numpy as np
import pandas as pd
import plotly.graph_objs as go
from sklearn import metrics, model_selection

X, y = ethik.datasets.load_adult()
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, shuffle=True, random_state=42)

model = lgb.LGBMClassifier(random_state=42).fit(X_train, y_train)

y_pred = model.predict_proba(X_test)[:, 1]
y_pred = pd.Series(y_pred, name=">$50k")

exp = ethik.ClassificationExplainer()
age = X_test["age"]

In [2]:
query = pd.DataFrame({
    "group": [0, 0],
    "feature": ["age", "education-num"],
    "target": [30, 10],
    "label": [y_pred.name, y_pred.name],
})
query

Unnamed: 0,group,feature,target,label
0,0,age,30,>$50k
1,0,education-num,10,>$50k


In [3]:
exp._fill_ksis(X_test, query)

Unnamed: 0,group,feature,target,label,ksi
0,0,age,30,>$50k,-0.818156
1,0,education-num,10,>$50k,0.056918


In [4]:
exp._explain_influence(X_test, y_pred, query)

100%|██████████| 1/1 [00:00<00:00, 313.90it/s]


Unnamed: 0,group,feature,target,label,ksi,influence,influence_low,influence_high
0,0,age,30,>$50k,-0.818156,0.160545,0.160545,0.160545
1,0,education-num,10,>$50k,0.056918,0.160545,0.160545,0.160545


In [5]:
exp._explain_performance(
    X_test,
    y_test,
    y_pred > 0.5,
    metrics.accuracy_score,
    query
)

100%|██████████| 1/1 [00:00<00:00, 91.84it/s]


Unnamed: 0,group,feature,target,label,ksi,accuracy_score,accuracy_score_low,accuracy_score_high
0,0,age,30,>$50k,-0.818156,0.91305,0.91305,0.91305
1,0,education-num,10,>$50k,0.056918,0.91305,0.91305,0.91305


In [6]:
bob = X_test.iloc[2].rename("bob")
mary = X_test.iloc[1].rename("mary")
mary

age                                45
workclass                   State-gov
fnlwgt                          50567
education                     HS-grad
education-num                       9
marital-status     Married-civ-spouse
occupation            Exec-managerial
relationship                     Wife
race                            White
gender                         Female
capital-gain                        0
capital-loss                        0
hours-per-week                     40
native-country          United-States
Name: mary, dtype: object

In [7]:
exp.compare_influence(
    X_test=X_test[["age", "education-num", "hours-per-week", "gender"]],
    y_pred=y_pred,
    reference=bob,
    compared=mary,
)

100%|██████████| 10/10 [00:00<00:00, 380.83it/s]


Unnamed: 0,feature,label,reference,compared
0,hours-per-week,>$50k,0.301345,0.235129
1,age,>$50k,0.143152,0.272689
2,education-num,>$50k,0.456936,0.1907
3,gender,>$50k,0.303191,0.109424


In [8]:
exp.compute_weights(X_test["age"], targets=[25, 45])

{25: 14160    0.000187
 27048    0.000020
 28868    0.000145
 5667     0.000128
 7827     0.000145
            ...   
 26658    0.000068
 29401    0.000003
 27265    0.000240
 30339    0.000010
 8610     0.000272
 Name: age, Length: 8141, dtype: float64, 45: 14160    0.000078
 27048    0.000136
 28868    0.000083
 5667     0.000085
 7827     0.000083
            ...   
 26658    0.000100
 29401    0.000218
 27265    0.000073
 30339    0.000159
 8610     0.000071
 Name: age, Length: 8141, dtype: float64}

In [9]:
exp.compute_distributions(X_test["age"], targets=[25, 45])

{25: {'edges': array([17.        , 25.11111111, 33.22222222, 41.33333333, 49.44444444,
         57.55555556, 65.66666667, 73.77777778, 81.88888889, 90.        ]),
  'hist': array([7.88496185e-02, 2.89191048e-02, 1.11288275e-02, 3.28967152e-03,
         8.91664809e-04, 1.80337767e-04, 2.53055037e-05, 2.82029276e-06,
         3.20559548e-07]),
  'kde': None,
  'average': 25},
 45: {'edges': array([17.        , 25.11111111, 33.22222222, 41.33333333, 49.44444444,
         57.55555556, 65.66666667, 73.77777778, 81.88888889, 90.        ]),
  'hist': array([0.0131571 , 0.01727102, 0.02257766, 0.0232401 , 0.02106497,
         0.01461035, 0.00704278, 0.00288985, 0.00143383]),
  'kde': None,
  'average': 45}}