In [1]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from lightgbm import LGBMClassifier
from utils import MyTabNetClassifier
from acv_explainers.utils import *
from ares import AReS
from cet import CounterfactualExplanationTree
from ce import ActionExtractor
from utils import DatasetHelper, submodular_picking, DATASETS_NAME
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

seed = 2022
np.random.seed(0)

In [2]:
# Load breast cancer dataset

X, y = load_breast_cancer(return_X_y=True, as_frame=True)
X_tr, X_ts, y_tr, y_ts = train_test_split(X, y, test_size=0.20, random_state=seed)

In [3]:
# Train Isolation for outlier score

from sklearn.ensemble import IsolationForest

isolation = IsolationForest()
isolation.fit(X_tr)

IsolationForest()

In [4]:
# Train the model to explain

mdl = LGBMClassifier(n_estimators=50, num_leaves=8)
mdl.fit(X_tr, y_tr)

LGBMClassifier(n_estimators=50, num_leaves=8)

In [5]:
feature_names = [col.replace(' ', '') for col in X_tr.columns]
feature_names

['meanradius',
 'meantexture',
 'meanperimeter',
 'meanarea',
 'meansmoothness',
 'meancompactness',
 'meanconcavity',
 'meanconcavepoints',
 'meansymmetry',
 'meanfractaldimension',
 'radiuserror',
 'textureerror',
 'perimetererror',
 'areaerror',
 'smoothnesserror',
 'compactnesserror',
 'concavityerror',
 'concavepointserror',
 'symmetryerror',
 'fractaldimensionerror',
 'worstradius',
 'worsttexture',
 'worstperimeter',
 'worstarea',
 'worstsmoothness',
 'worstcompactness',
 'worstconcavity',
 'worstconcavepoints',
 'worstsymmetry',
 'worstfractaldimension']

In [6]:
target_name = 'Cancer'
target_labels = ['Good', 'Bad']

In [7]:
feature_constraints = ['' for i in range(len(feature_names))]
len(feature_constraints) == X_tr.shape[1]

True

In [8]:
feature_categories = []

In [9]:
feature_types = ['C' for i in range(len(feature_names))]

In [10]:
X_tr = X_tr.values
X_ts = X_ts.values

In [11]:
y_tr = y_tr.values
y_ts = y_ts.values

In [12]:
X = X_tr[mdl.predict(X_tr)==1]
X_vl = X_ts[mdl.predict(X_ts)==1]

## Actionable Recourse Summary

In [15]:
print('## Actionable Recourse Summary')
ares = AReS(mdl, X_tr, max_rule=4, max_rule_length=4, discretization_bins=10, minimum_support=0.05, print_objective=False,
            feature_names=feature_names, feature_types=feature_types, feature_categories=feature_categories, 
            feature_constraints=feature_constraints, target_name=target_name, target_labels=target_labels)
ares = ares.fit(X, max_change_num=4, cost_type='MPS', lambda_acc=1.0, lambda_cov=1.0, lambda_cst=1.0)
print('* Parameters:')
print('\t* lambda_acc: {}'.format(ares.lambda_acc)); print('\t* lambda_cov: {}'.format(ares.lambda_cov)); print('\t* lambda_cst: {}'.format(ares.lambda_cst));
print('\t* minimum support: {}'.format(ares.rule_miner_.minsup_)); print('\t* discretization bins: {}'.format(ares.rule_miner_.fd_.bins)); print('\t* pre-processing time[s]: {}'.format(ares.preprocess_time_)); 
print('\t* max rule: {}'.format(ares.max_rule_)); print('\t* max rule length: {}'.format(ares.max_rule_length_)); print('\t* Time[s]:', ares.time_); 
print('\t* uncover test: {}'.format(ares.uncover(X_vl))); print('\t* conflict: {}'.format(ares.conflict(X_vl))); print();
print('### Learned AReS')
print(ares.to_markdown())

## Actionable Recourse Summary
* Parameters:
	* lambda_acc: 1.0
	* lambda_cov: 1.0
	* lambda_cst: 1.0
	* minimum support: 0.05
	* discretization bins: 10
	* pre-processing time[s]: 0.41532748501049355
	* max rule: 4
	* max rule length: 4
	* Time[s]: 276.7805148810003
	* uncover test: 0.6764705882352942
	* conflict: 0.0

### Learned AReS
| | Rule | Action |
| :---: | --- | --- |
| Recourse <br> rule 1 <br> (probability: 11.4%) | If 'meanradius<10.21' <br> AND 'worstradius<11.16' <br> AND 'worstperimeter<71.83' <br> AND 'worstarea<380.4' | meanradius>=19.55 <br> AND worstradius>=23.93 <br> AND worstperimeter>=159.8 <br> AND worstarea>=1.72e+03 |
| Recourse <br> rule 2 <br> (probability: 4.2%) | If '16.34<=worstradius<17.74' <br> AND '108.5<=worstperimeter<118.1' <br> AND '812.3<=worstarea<971.7' | 20.4<=worstradius<23.93 <br> AND 136.6<=worstperimeter<159.8 <br> AND 1.293e+03<=worstarea<1.72e+03 |
| Recourse <br> rule 3 <br> (probability: 7.3%) | If '14.48<=worstradius<15.34' <br> AND '9

In [16]:
x_ares = ares.predict(X_vl) + X_vl
y_ares = mdl.predict(x_ares)

print('Positve accuracy of AReS = {}'.format(np.mean(y_ares != mdl.predict(X_vl))))
print('Positve Oulier AReS = {}'.format(np.mean(isolation.predict(x_ares) == 1)))

Positve accuracy of AReS = 0.6323529411764706
Positve Oulier AReS = 0.8235294117647058


In [19]:
print('Sparsity', np.mean(np.sum(x_ares-X_vl!=0, axis=1)))

Sparsity 3.764705882352941


# Counterfactual TREE

### 1- CET All

In [22]:
LAMBDA = 0.01
GAMMA = 1.0

print('## Counterfactual Explanation Tree')
cet = CounterfactualExplanationTree(mdl, X_tr, y_tr, max_iteration=500, lime_approximation=False,
                                    feature_names=feature_names, feature_types=feature_types, feature_categories=feature_categories,
                                    feature_constraints=feature_constraints, target_name=target_name, target_labels=target_labels)

cet = cet.fit(X, max_change_num=8, cost_type='MPS', C=LAMBDA, gamma=GAMMA, max_leaf_size=-1, time_limit=180, verbose=True)
print('* Parameters:'); print('\t* lambda: {}'.format(cet.lambda_)); print('\t* gamma: {}'.format(cet.gamma_)); print('\t* max_iteration: {}'.format(cet.max_iteration_));
print('\t* leaf size bound:', cet.leaf_size_bound_); print('\t* leaf size:', cet.n_leaves_); print('\t* LIME approximation:', cet.lime_approximation_); print('\t* Time[s]:', cet.time_); print();
print('### Learned CET')
cet.print_tree()

## Counterfactual Explanation Tree
## Stochastic Local Searching ...
### Iteration: 10
#### Before:
- If meanfractaldimension<0.0676:
	- If compactnesserror<0.0117:
		* Action [Cancer: Bad -> Good] (74/74 = 100.0% / MeanCost = 0.738):
			* meanradius: +10.2081
			* radiuserror: +2.3715
			* areaerror: +501.1211
			* worstradius: +16.6584
			* worsttexture: +4.0068
			* worstperimeter: +108.5082
			* worstarea: +3099.6640
			* worstconcavepoints: +0.0466
	- Else:
		* Action [Cancer: Bad -> Good] (164/165 = 99.4% / MeanCost = 0.602):
			* meanconcavepoints: +0.0452
			* areaerror: +503.2741
			* worstradius: +7.3990
			* worsttexture: +4.8724
			* worstperimeter: +53.3870
			* worstarea: +756.7600
			* worstconcavity: +0.1903
			* worstconcavepoints: +0.0897
- Else:
	- If meanarea<494.52:
		- If meancompactness<0.1088:
			- If meanarea<396.4:
				- If smoothnesserror<0.0059:
					* Action [Cancer: Bad -> Good] (1/1 = 100.0% / MeanCost = 0.582):
						* meanconcavepoints: +0.0607
						* 

- If meanfractaldimension<0.0676:
	- If compactnesserror<0.0117:
		* Action [Cancer: Bad -> Good] (74/74 = 100.0% / MeanCost = 0.738):
			* meanradius: +10.2081
			* radiuserror: +2.3715
			* areaerror: +501.1211
			* worstradius: +16.6584
			* worsttexture: +4.0068
			* worstperimeter: +108.5082
			* worstarea: +3099.6640
			* worstconcavepoints: +0.0466
	- Else:
		* Action [Cancer: Bad -> Good] (164/165 = 99.4% / MeanCost = 0.602):
			* meanconcavepoints: +0.0452
			* areaerror: +503.2741
			* worstradius: +7.3990
			* worsttexture: +4.8724
			* worstperimeter: +53.3870
			* worstarea: +756.7600
			* worstconcavity: +0.1903
			* worstconcavepoints: +0.0897
- Else:
	- If meanarea<494.52:
		- If meancompactness<0.1088:
			- If meanarea<396.4:
				- If smoothnesserror<0.0059:
					* Action [Cancer: Bad -> Good] (1/1 = 100.0% / MeanCost = 0.582):
						* meanconcavepoints: +0.0607
						* worstradius: +6.6718
						* worsttexture: +10.0328
						* worstperimeter: +57.9776
						* worstar

### Iteration: 40
#### Before:
- If worstperimeter<91.102:
	- If worstsmoothness<0.0712:
		* Action [Cancer: Bad -> Good] (1/1 = 100.0% / MeanCost = 0.635):
			* meantexture: +21.0600
			* radiuserror: +2.5743
			* perimetererror: +19.7500
			* areaerror: +520.5100
			* worstradius: +6.9572
			* worsttexture: +11.8918
			* worstperimeter: +48.3970
			* worstarea: +695.1600
	- Else:
		- If textureerror<1.556:
			* Action [Cancer: Bad -> Good] (130/130 = 100.0% / MeanCost = 0.764):
				* meanradius: +8.8742
				* meanconcavepoints: +0.0684
				* areaerror: +513.9821
				* worstradius: +17.4282
				* worsttexture: +9.3492
				* worstperimeter: +120.4740
				* worstarea: +3661.9200
				* worstconcavepoints: +0.1164
		- Else:
			- If worstconcavity<0.0906:
				- If meanarea<396.4:
					- If worstsymmetry<0.2682:
						* Action [Cancer: Bad -> Good] (5/7 = 71.4% / MeanCost = 0.645):
							* meantexture: +6.8280
							* radiuserror: +0.4624
							* areaerror: +521.4120
							* worstradius

- If worstperimeter<91.102:
	- If worstsmoothness<0.0712:
		* Action [Cancer: Bad -> Good] (1/1 = 100.0% / MeanCost = 0.635):
			* meantexture: +21.0600
			* radiuserror: +2.5743
			* perimetererror: +19.7500
			* areaerror: +520.5100
			* worstradius: +6.9572
			* worsttexture: +11.8918
			* worstperimeter: +48.3970
			* worstarea: +695.1600
	- Else:
		- If textureerror<1.556:
			* Action [Cancer: Bad -> Good] (130/130 = 100.0% / MeanCost = 0.764):
				* meanradius: +8.8742
				* meanconcavepoints: +0.0684
				* areaerror: +513.9821
				* worstradius: +17.4282
				* worsttexture: +9.3492
				* worstperimeter: +120.4740
				* worstarea: +3661.9200
				* worstconcavepoints: +0.1164
		- Else:
			- If worstconcavity<0.0906:
				- If meantexture<15.698:
					- If worstsymmetry<0.2682:
						* Action [Cancer: Bad -> Good] (1/1 = 100.0% / MeanCost = 0.786):
							* meanconcavepoints: +0.1754
							* radiuserror: +2.7209
							* areaerror: +523.1200
							* worstradius: +12.8838
							* 


KeyboardInterrupt



In [23]:
x_cet = cet.predict(X_vl) + X_vl
y_cet = mdl.predict(x_cet)

print('Positive accuracy of CET = {}'.format(np.mean(y_cet != mdl.predict(X_vl))))
print('Positive INlier CET = {}'.format(np.mean(isolation.predict(x_cet) == 1)))

Positive accuracy of CET = 0.9558823529411765
Positive INlier CET = 0.5882352941176471


In [24]:
print('Sparsity', np.mean(np.sum(x_cet-X_vl!=0, axis=1)))

Sparsity 8.0


In [25]:
cet.print_tree()

- If worstperimeter<105.38:
	- If compactnesserror<0.0117:
		* Action [Cancer: Bad -> Good] (78/78 = 100.0% / MeanCost = 0.727):
			* meanradius: +9.2968
			* radiuserror: +0.4956
			* areaerror: +511.8291
			* worstradius: +19.6770
			* worsttexture: +4.0068
			* worstperimeter: +120.4740
			* worstarea: +3417.7920
			* worstconcavity: +0.2003
	- Else:
		* Action [Cancer: Bad -> Good] (175/182 = 96.2% / MeanCost = 0.616):
			* meanconcavepoints: +0.0483
			* radiuserror: +0.4965
			* areaerror: +471.1502
			* worstradius: +5.9984
			* worsttexture: +4.8724
			* worstperimeter: +44.1096
			* worstarea: +612.6080
			* worstconcavepoints: +0.0815
- Else:
	* Action [Cancer: Bad -> Good] (29/29 = 100.0% / MeanCost = 0.242):
		* meanradius: +3.8829
		* meantexture: +1.9784
		* worstradius: +6.1228
		* worsttexture: +3.8302
		* worstperimeter: +40.9892
		* worstarea: +615.4640
		* worstconcavity: +0.0955
		* worstconcavepoints: +0.0469

