### Load the data and train ACXplainer to generate Counterfactual Rules

In [1]:
import numpy as np
from acv_explainers import ACXplainer
from acv_explainers.utils import *
from sklearn.linear_model import LogisticRegression
from lightgbm import LGBMClassifier
from utils import MyTabNetClassifier
from utils import DatasetHelper, DATASETS_NAME
from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

model= 'X'
np.random.seed(0)

if(model=='L'):
    print('* Classifier: LogisticRegression')
    mdl = LogisticRegression(penalty='l2', C=1.0, solver='liblinear')
    print('\t* C: {}'.format(mdl.C)); print('\t* penalty: {}'.format(mdl.penalty));
elif(model=='X'):
    print('* Classifier: LightGBM')
    mdl = LGBMClassifier(n_estimators=50, num_leaves=8)
    print('\t* n_estimators: {}'.format(mdl.n_estimators)); print('\t* num_leaves: {}'.format(mdl.num_leaves));
elif(model=='T'):
    print('* Classifier: TabNet')
    mdl = MyTabNetClassifier(D.feature_types, verbose=0)

seed = 2022    
X, y = load_breast_cancer(return_X_y=True, as_frame=True)
X_tr, X_ts, y_tr, y_ts = train_test_split(X, y, test_size=0.20, random_state=seed)

isolation = IsolationForest()
isolation.fit(X_tr)

mdl = LGBMClassifier(n_estimators=50, num_leaves=8)
mdl.fit(X_tr, y_tr)

x_train = X_tr.copy()
x_test = X_ts.copy()

y_train = mdl.predict(X_tr)
y_test = mdl.predict(X_ts)


x_train = x_train.values
x_test = x_test.values

### Train Explainer (ACXplainer)
ac_explainer = ACXplainer(classifier=True, n_estimators=20, max_depth=8)
ac_explainer.fit(x_train, y_train)

print('# Trained ACXplainer -- score = {}'.format(accuracy_score(y_test, ac_explainer.predict(x_test))))

x, y = x_test[:500], y_test[:500]
x_rules, y_rules = x_train[:1000], y_train[:1000]

columns_name = [col.replace(' ', '') for col in X_tr.columns]

* Classifier: LightGBM
	* n_estimators: 50
	* num_leaves: 8
# Trained ACXplainer -- score = 0.9649122807017544


In [2]:
results = RunExperiments(ac_explainer, x_train, x_test, y_train, y_test, columns_name, model=mdl)

In [3]:
results.run_local_divergent_set(x, y)

### Computing the local divergent set of (x, y)


100%|██████████████████████████████████████████| 20/20 [00:00<00:00, 400.73it/s]
100%|███████████████████████████████████████████| 10/10 [00:34<00:00,  3.42s/it]


In [4]:
results.run_local_counterfactual_rules(x, y, acc_level=0.9, pi_level=0.9)

### Computing the local counterfactual rules of (x, y)


100%|█████████████████████████████████████████| 114/114 [02:06<00:00,  1.11s/it]


In [5]:
results.run_local_counterfactual_rules(x, y, acc_level=0.9, pi_level=0.9)

### Computing the local counterfactual rules of (x, y)


100%|█████████████████████████████████████████| 114/114 [01:58<00:00,  1.04s/it]


In [6]:
results.run_sampling_local_counterfactuals(x, y, batch=1000, max_iter=1000, temp=0.5)

### Sampling using the local counterfactual rules of (x, y)


100%|█████████████████████████████████████████| 114/114 [00:56<00:00,  2.01it/s]


In [7]:
print('Local Accuracy = {} -- Local Coverage = {}'.format(results.accuracy_local, results.coverage_local))

Local Accuracy = 0.9565217391304348 -- Local Coverage = 0.20175438596491227


In [8]:
results.run_sufficient_rules(x_rules, y_rules, pi_level=0.8)

### Computing the Sufficient Explanations and the Sufficient Rules


100%|███████████████████████████████████████████| 10/10 [00:08<00:00,  1.21it/s]
100%|█████████████████████████████████████████| 455/455 [00:46<00:00,  9.74it/s]


In [9]:
results.run_regional_divergent_set(stop=True, pi_level=0.8)

### Computing the regional divergent set of (x, y)


100%|███████████████████████████████████████████| 10/10 [01:30<00:00,  9.00s/it]


In [10]:
results.run_regional_counterfactual_rules(acc_level=0.8, pi_level=0.8)

### Computing the regional counterfactual rules of (x, y)


100%|█████████████████████████████████████████| 455/455 [07:21<00:00,  1.03it/s]


In [11]:
results.run_sampling_regional_counterfactuals_alltests(max_obs=x_test.shape[0],batch=1000, max_iter=1000, temp=0.5)

### Sampling using the regional counterfactual rules


100%|█████████████████████████████████████████| 114/114 [06:22<00:00,  3.36s/it]


In [12]:
print('Regional Accuracy = {} -- Regional Coverage = {}'.format(results.accuracy_regional, results.coverage_regional))

Regional Accuracy = 0.9298245614035088 -- Regional Coverage = 1.0


In [27]:
results.accuracy_local

0.9565217391304348

In [13]:
if np.mean(mdl.predict(results.x_test) == results.y_test):
    print('CONSISTENT')
else:
    raise ValueError


CONSISTENT


In [32]:
x = []
for i, c in enumerate(results.counterfactuals_samples_local):
    if len(c) !=0:
        x.append(results.x_test[i])

x = np.array(x)
ce = np.array(results.dist_local)
ce_r = np.array(results.dist_regional)

print('all acc', np.mean(mdl.predict(x_test) != mdl.predict(ce_r)))

all acc 0.8947368421052632


In [33]:
x_pos = x[mdl.predict(x) == 1]
ce_pos = ce[mdl.predict(x) == 1]

print('LOCAL positive accuracy', np.mean(mdl.predict(x_pos) != mdl.predict(ce_pos)))

LOCAL positive accuracy 1.0


In [34]:
print('LOCAL positive sparsity', np.mean(np.sum(x_pos-ce_pos!=0, axis=1)))

LOCAL positive sparsity 9.625


In [35]:
inlier_pos = np.mean(results.isolation.predict(ce_pos) == 1)
print('LOCAL positive inlier', inlier_pos)

LOCAL positive inlier 0.9375


In [36]:
x_neg = x[mdl.predict(x) == 0]
ce_neg = ce[mdl.predict(x) == 0]

print('LOCAL negative accuracy', np.mean(mdl.predict(x_neg) != mdl.predict(ce_neg)))

LOCAL negative accuracy 1.0


In [37]:
print('LOCAL negative sparsity', np.mean(np.sum(x_neg-ce_neg!=0, axis=1)))

LOCAL negative sparsity 7.285714285714286


In [38]:
inlier_neg = np.mean(results.isolation.predict(ce_neg) == 1)
print('LOCAL negative inlier', inlier_neg)

LOCAL negative inlier 1.0


In [39]:
x_pos_r = x_test[mdl.predict(x_test) == 1]
ce_pos_r = ce_r[mdl.predict(x_test) == 1]

print('REGIONAL positive accuracy', np.mean(mdl.predict(x_pos_r) != mdl.predict(ce_pos_r)))

REGIONAL positive accuracy 0.9117647058823529


In [40]:
print('REGIONAL positive sparsity', np.mean(np.sum(x_pos_r-ce_pos_r!=0, axis=1)))

REGIONAL positive sparsity 9.955882352941176


In [41]:
inlier_pos = np.mean(results.isolation.predict(ce_pos_r) == 1)
print('REGIONAL positive inlier', inlier_pos)

REGIONAL positive inlier 0.9117647058823529


In [42]:
x_neg_r = x_test[mdl.predict(x_test) == 0]
ce_neg_r = ce_r[mdl.predict(x_test) == 0]

print('REGIONAL negative accuracy', np.mean(mdl.predict(x_neg_r) != mdl.predict(ce_neg_r)))

REGIONAL negative accuracy 0.8695652173913043


In [43]:
print('REGIONAL negative sparsity', np.mean(np.sum(x_neg_r-ce_neg_r!=0, axis=1)))

REGIONAL negative sparsity 10.0


In [44]:
inlier_neg_r = np.mean(results.isolation.predict(ce_neg_r) == 1)
print('REGIONAL negative inlier', inlier_neg_r)

REGIONAL negative inlier 0.9782608695652174


In [45]:
print('Local Coverage = {} -- Global Coverage {}'.format(results.coverage_local, 
                                                        results.coverage_regional))

Local Coverage = 0.20175438596491227 -- Global Coverage 1.0
