# This notebook contains the experiments on Banknote dataset with LionForests

In [1]:
import sys
cpath = !pwd
sys.path.append(cpath[0][:-18])
sys.path.append('C:\\Users\\iamollas\\Downloads\\LionForests Journal\\')
sys.path.append('C:\\Users\\iamollas\\Downloads\\LionForests Journal\\algorithms\\')

In [2]:
from lionforests import LionForests
from algorithms.simpleSurrogate import GlobalSurrogateTree, LocalSurrogateTree
from algorithms.DefragTrees.defragTrees import DefragModel
from CHIRPS.structures import data_container
import CHIRPS.routines as rt
import CHIRPS.structures as strcts

from scipy import sparse

from datasets.dataset import Dataset
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import pandas as pd 
import numpy as np
np.seterr(invalid='ignore')
import warnings
warnings.filterwarnings("ignore")
import time

from sklearn.model_selection import train_test_split, LeaveOneOut
from sklearn.metrics import f1_score, precision_score

from anchor import anchor_tabular


Firstly, we load the dataset and we set the feature and class names

In [7]:
banknotes = Dataset()
X, y, feature_names, class_names = banknotes.load_banknote()
one_h_percent = int(min(10*len(X)/100,100))
print("Instances:",one_h_percent)
new_fn = []
for i in feature_names:
    new_fn.append(i.replace(' ','_'))
feature_names = new_fn

Instances: 100


In this set of experiments we are using this not very well written code to perform few experiments/comparisons though different datasets and algorithms!

In [66]:
def measure(X_train, X_test, y_train, y_test, feature_names, class_names, tech=False):
    parameters = [{
        'max_depth': [10],
        'max_features': [0.75],
        'bootstrap': [True],
        'min_samples_leaf' : [1],
        'n_estimators': [500]
    }]
    scaler = MinMaxScaler(feature_range=(-1,1))
    lf = LionForests(None, False, scaler, feature_names, class_names)
    lf.fit(X_train, y_train, params=parameters)
    
    train = lf.utilizer.transform(X_train)
    test = lf.utilizer.transform(X_test)

    predictions = lf.model.predict(train)
    test_predictions = lf.model.predict(test)
    
    def techniques(model, train, y_train, predictions, test, feature_names, class_names, lf, task):

        #BaselineTechnique ==============================================================================
        gt = GlobalSurrogateTree(train, predictions, feature_names, task)
        lt = LocalSurrogateTree(train, predictions, feature_names, task, 150)

        #DefragTechnique ================================================================================
        Kmax = 10
        splitter = DefragModel.parseSLtrees(model) # parse sklearn tree ensembles into the array of (feature index, threshold)
        mdl = DefragModel(modeltype=task, maxitr=100, qitr=0, tol=1e-6, restart=20, verbose=0)
        mdl.fit(train, predictions, splitter, Kmax, fittype='FAB', featurename=feature_names)
        def def_cov(instances):
            score, cover, coll = mdl.evaluate(instances[1:],lf.model.predict(instances[1:]))
            def_predictions = mdl.predict(instances[1:])
            rule = mdl.find_rule_length(instances[0])
            print(mdl)
            return rule, cover, def_predictions

        #Anchors =======================================================================================
        explainer = anchor_tabular.AnchorTabularExplainer(class_names, feature_names, train)
        def anchors_method(instance):
            exp = explainer.explain_instance(instance, lf.model.predict, threshold=0.95)
            anchors_dict = {}
            for i in exp.names():
                terms = i.split(' ')
                if len(terms) == 3:
                    anchors_dict[terms[0]] = [[terms[1],float(terms[2])]]
                else:
                    anchors_dict[terms[2]] = [[terms[3],float(terms[4])],[terms[1],float(terms[0])]]
            return anchors_dict, 0

        #CHIRPS =======================================================================================
        project_dir = 'C:\\Users\\iamollas\\Downloads\\LionForests Journal\\algorithms\\CHIRPS'
        temp_frame = pd.DataFrame(np.hstack((train,y_train.reshape(len(y_train),1))),columns=feature_names+['class'])
        temp_frame['class']=temp_frame['class'].astype(int)

        temp_frame = temp_frame.replace({"class": {1: 2}})
        temp_frame = temp_frame.replace({"class": {0: 1}})

        mydata = data_container(
                data = temp_frame, class_col = 'class', var_names = ['variance', 'skew', 'curtosis', 'entropy'],
                project_dir = project_dir, save_dir = 'banknote', random_state=123)
        meta_data = mydata.get_meta()
        f_walker = strcts.classification_trees_walker(forest=model, meta_data=meta_data)
        f_walker.forest_walk(instances = test, labels = model.predict(test), forest_walk_async = True)

        explanations = strcts.CHIRPS_container(f_walker.path_detail,
                                        forest=model,
                                        sample_instances=sparse.csr_matrix(train), # any representative sample can be used
                                        sample_labels=predictions,
                                        meta_data=meta_data)

        explanations.run_explanations(target_classes=model.predict(test), # we're explaining the prediction, not the true label!
                                explanation_async=False,
                                random_state=123,
                                which_trees='majority',
                                alpha_paths=0.0,
                                support_paths=0.1,
                                score_func=1,
                                precis_threshold=0.99,
                                disc_path_bins=4,
                                merging_bootstraps=20,
                                pruning_bootstraps=20,
                                delta=0.2,
                                weighting='kldiv')
        def chirps_method(idx):
            chirps_dict = {}
            for i in explanations.explainers[idx].pruned_rule:
                if i[1]:
                    chirps_dict[i[0]] = [['<=',float(i[2])]]
                else:
                    chirps_dict[i[0]] = [['>',float(i[2])]]
            return chirps_dict, 0

        #LionForests
        def lf_rule(instance):
            temp = lf.explain(instance)[5]
            rule = {}
            for key,value in temp.items():
                rule[key] = [['<=',value[1]],['>',value[0]]]
            return rule

        return {'gs':gt.rule,'ls':lt.rule,'an':anchors_method,'lf':lf_rule, 'df': def_cov, 'ch':chirps_method}
    interpretation = techniques(lf.model, train, y_train, predictions, test, feature_names, class_names, lf, 'classification')
    if tech:
        return interpretation, lf
    def rule_cov(instance, feature_names, rule):
        covered = True
        for k in range(len(instance)):
            feature = feature_names[k]
            if feature in rule.keys():
                if len(rule[feature]) == 2:
                    if instance[k] > rule[feature][0][1]: #<=
                        covered = False
                    if instance[k] <= rule[feature][1][1]:
                        covered = False
                elif rule[feature][0][0] == '>':
                    if instance[k] <= rule[feature][0][1]:
                        covered = False
                else:
                     if instance[k] > rule[feature][0][1]: #<=
                        covered = False
        if covered:
            return 1
        else:
            return 0

    gt = GlobalSurrogateTree(train,lf.model.predict(train),feature_names,'c')
    lt = LocalSurrogateTree(train,lf.model.predict(train),feature_names,'c',150)

    loo = LeaveOneOut()
    loo.get_n_splits(test)
    rule_generator = interpretation
    full_coverage = {'gs':0, 'ls':0, 'an':0, 'lf':0, 'df':0, 'ch': 0}
    rule_length = {'gs':0, 'ls':0, 'an':0, 'lf':0, 'df':0, 'ch': 0}
    f_precision = {'gs':[], 'ls':[], 'an':[], 'lf':[], 'df':[], 'ch':[]}

    for train_index, test_index in loo.split(test):
        x_train_temp, x_test_temp = test[train_index], test[test_index]
        y_train_temp, y_test_temp = test_predictions[train_index], test_predictions[test_index]
        x_train_temp_lf = lf.utilizer.inverse_transform(x_train_temp)
        x_test_temp_lf = lf.utilizer.inverse_transform(x_test_temp)
        for name, method in rule_generator.items():
            if name == 'ch':
                rule, op = method(test_index[0])
                coverage = 0
                precision = []
                co = 0
                for i in x_train_temp_lf:
                    res = rule_cov(i, feature_names, rule)
                    coverage = coverage + res
                    if res == 1:
                        precision.append([y_test_temp[0], y_train_temp[co]])
                    co = co + 1
                if len(precision) > 1:
                    precision = np.array(precision)
                    f_precision[name].append(precision_score(precision[:,:1],precision[:,1:],average='micro'))
                full_coverage[name] = full_coverage[name] + coverage/len(x_train_temp)
            elif name == 'df':
                #FIX RULE LENGTH!
                rule, cover, predictions = method(np.concatenate((x_test_temp,x_train_temp)))
                f_precision[name].append(precision_score(predictions,y_train_temp,average='micro'))
                full_coverage[name] = full_coverage[name] + cover
            elif name == 'lf':
                rule = method(x_test_temp_lf[0])
                coverage = 0
                precision = []
                co = 0
                for i in x_train_temp_lf:
                    res = rule_cov(i, feature_names, rule)
                    coverage = coverage + res
                    if res == 1:
                        precision.append([y_test_temp[0], y_train_temp[co]])
                    co = co + 1
                if len(precision) > 1:
                    precision = np.array(precision)
                    f_precision[name].append(precision_score(precision[:,:1],precision[:,1:],average='micro'))
                full_coverage[name] = full_coverage[name] + coverage/len(x_train_temp)
            else:
                rule, prediction = method(x_test_temp[0])
                coverage = 0
                precision = []
                co = 0 
                for i in x_train_temp:
                    if name == 'anchors':
                        res = rule_cov(i, feature_names, rule)
                    else:    
                        res = rule_cov(i, feature_names, rule)
                    coverage = coverage + res
                    if res == 1 and name=='anchors':
                        precision.append([y_test_temp[0], y_train_temp[co]])
                    elif res == 1:
                        precision.append([prediction, y_train_temp[co]])
                    co = co + 1
                if len(precision) > 1:
                    precision = np.array(precision)
                    f_precision[name].append(precision_score(precision[:,:1],precision[:,1:],average='micro'))
                full_coverage[name] = full_coverage[name] + coverage/len(x_train_temp)
            rule_length[name] = rule_length[name] + len(rule)
    return rule_generator, full_coverage, rule_length, f_precision

In [None]:
total_results = []
rs = [7, 77, 777, 7777, 77777, 5, 55, 555, 5555, 55555]
for random_s in rs:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=one_h_percent, stratify=y, random_state=random_s)
    results = measure(X_train, X_test, y_train, y_test, feature_names, class_names)
    total_results.append(results)

In [None]:
full_coverage = {'gs':[], 'ls':[], 'an':[], 'lf':[], 'df':[], 'ch': []}
rule_length = {'gs':[], 'ls':[], 'an':[], 'lf':[], 'df':[], 'ch': []}
f_precision = {'gs':[], 'ls':[], 'an':[], 'lf':[], 'df':[], 'ch':[]}

for i in total_results:
    for name, method in i[0].items():
        full_coverage[name].append(i[1][name]/100)
        rule_length[name].append(i[2][name]/100)
        f_precision[name].append(np.array(i[3][name]).mean())
for name, method in i[0].items():
    print(name,np.array(full_coverage[name]).mean(),'+-',np.array(full_coverage[name]).std(),
          np.array(rule_length[name]).mean(),'+-',np.array(rule_length[name]).std(),
          np.array(f_precision[name]).mean(),'+-',np.array(f_precision[name]).std())

# Conclusiveness Check:

Let's test a few examples!

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=10, stratify=y, random_state=7)
techniques = measure(X_train, X_test, y_train, y_test, feature_names, class_names, True)

We prepare our explanation techniques. They are trained in almost the whole train set

In [482]:
lf = techniques[1]
techniques = techniques[0]
X_test_scaled = lf.utilizer.transform(X_test)
X_train_scaled = lf.utilizer.transform(X_train)

We select the instance 9

In [487]:
temp_id = 9
temp = X_test[temp_id].copy()
temp_scaled = lf.utilizer.transform([X_test[temp_id],X_test[temp_id]])[0]
temp, temp_scaled

(array([ 3.583  , -3.7971 ,  3.4391 , -0.12501]),
 array([ 0.53244056, -0.25342473, -0.24826502,  0.53180938]))

Know let's generate 

In [488]:
techniques['ls'](temp_scaled) #faulty for for id 9, temp2[2] = -5 for id 8, id 5, curt id 4

({}, 0)

Local Surrogate said 0 without giving any info

In [489]:
techniques['an'](temp_scaled) #faulty for temp2[2] = -5 for id 9

({'variance': [['>', 0.42]]}, 0)

Anchors said if variance > 0.42

In [490]:
techniques['ch'](temp_id) #faulty for temp2[2] = -5 for id 9 and id 8 amd od 4, temp2[1] = -13.7731 for id 5

({'variance': [['>', 0.23071]]}, 0)

CHIRPS Said 0 variance > 0.23071

In [491]:
techniques['gs'](temp_scaled)

({'skew': [['<=', 0.66949972230517]],
  'variance': [['>', 0.1280646916062096]],
  'curtosis': [['>', -0.6443884608512067]]},
 0)

The Global Suroggate said 0 because variance > 0.1280646916062096 and skew <= 0.66949972230517 and curtosis > -0.6443884608512067

In [492]:
techniques['lf'](temp)

{'variance': [['<=', 6.8248], ['>', 2.3942999407537267]],
 'curtosis': [['<=', 17.9274], ['>', -0.8297101872697472]]}

Finally, LF said 0 if 0.36099632 < variance < 1 and -0.61605188 < curtosis < 1. Those values are transformed, because LF provides ranges for the real values of an instance and not for the scaled values.

In [498]:
temp2 = temp.copy()
temp2[2] = -5 #Let's change the curtosis value and put it to the min value. Scaled value is -1 
temp2_scaled = lf.utilizer.transform([temp2,temp2])[0]
lf.model.predict([temp_scaled,temp2_scaled])

array([0, 1], dtype=int64)

Local surrogate did not gave any valuable info as well. Anchors and CHIRPS did not provided any condition for the curtosis feature. And when we modified it's value the prediction changed. Therefore, we can say that the two rules were inconclusive. Global Surrogate provided a conclusive answer, like LF. 

In [500]:
temp_id = 6
temp = X_test[temp_id].copy()
temp_scaled = lf.utilizer.transform([X_test[temp_id],X_test[temp_id]])[0]
temp, temp_scaled

(array([ 3.5156, 10.1891, -4.2759, -4.978 ]),
 array([ 0.52271957,  0.79326241, -0.91296444, -0.35073697]))

In [485]:
techniques['df'](np.array([temp_scaled,temp_scaled, temp_scaled, temp_scaled]))

[Rule  1]
y = 0 when
	 skew > 0.337126
	 curtosis > -0.974536

[Rule  2]
y = 0 when
	 variance > 0.078377
	 skew <= 0.400540
	 curtosis > -0.726306
	 entropy > 0.160262

[Rule  3]
y = 1 when
	 variance <= 0.118145
	 skew <= 0.301429
	 curtosis > -0.484685

[Rule  4]
y = 1 when
	 -0.645371 < variance <= 0.308764
	 -0.135811 < skew <= 0.420517
	 curtosis <= -0.274181
	 entropy <= 0.777904

[Otherwise]
y = 0



([0, 0], 1.0, array([0, 0, 0], dtype=int64))

In [484]:
techniques['lf'](temp)

{'variance': [['<=', 6.8248], ['>', 2.8434999517112955]],
 'curtosis': [['<=', -4.02399996997267], ['>', -4.364150089024007]]}

In [505]:
temp2 = temp.copy()
temp2[0] = -7 #Let's change the curtosis value and put it to the min value. Scaled value is -1 
temp2_scaled = lf.utilizer.transform([temp2,temp2])[0]
lf.model.predict([temp_scaled,temp2_scaled])

array([0, 1], dtype=int64)

Let's try one more example to prove that GS is also inconclusive

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=10, stratify=y, random_state=77)
techniques = measure(X_train, X_test, y_train, y_test, feature_names, class_names, True)

In [465]:
lf = techniques[1]
techniques = techniques[0]
X_test_scaled = lf.utilizer.transform(X_test)
X_train_scaled = lf.utilizer.transform(X_train)

We select a random instance again

In [466]:
temp_id = 0
temp = X_test[temp_id].copy()
temp_scaled = lf.utilizer.transform([X_test[temp_id],X_test[temp_id]])[0]
temp, temp_scaled

(array([-1.6936,  2.7852, -2.1835, -1.9276]),
 array([-0.22859471,  0.23917574, -0.73269003,  0.2039972 ]))

In [467]:
techniques['gs'](temp_scaled)

({'variance': [['<=', 0.05603344863243409], ['>', -0.25483223672374644]],
  'skew': [['<=', 0.32315588433361764], ['>', 0.22769872546014208]]},
 1)

GS said 1 if -0.2548322 < Variance <= 0.05603344 and 0.2276987254 < skew <= 0.32315588433361764

In [468]:
techniques['lf'](temp)

{'variance': [['<=', -0.966239995934629], ['>', -1.698349986315147]],
 'skew': [['<=', 3.9254000015519557], ['>', -13.773100000000001]],
 'curtosis': [['<=', -0.2843348647490144], ['>', -5.286099999999999]]}

And LF said 0 because: -0.229 < variance <= -0.12368878 and -1 < skew <= 0.32 and -1 < curtosis <= -0.56906411

In [480]:
temp2 = temp.copy()

temp2[2] = 17.9274 

temp2_scaled = lf.utilizer.transform([temp2,temp2])[0]
lf.model.predict([temp_scaled,temp2_scaled])

array([1, 0], dtype=int64)

We changed the value of curtosis, which was not iuncluded in GS's rule and the prediction changed. GS's rule therefore it was not conclusive