## Functions

In [1]:
from anchor import utils

import time
import csv
import newlime_utils

from tabulate import tabulate

def sample(index, dataset, dataset_name, dataset_folder, write_file=False):
    trg, label, tab = newlime_utils.get_trg_sample(index, dataset, dataset_name)
    
    print('Prediction:', dataset.class_names[rf.predict(trg.reshape(1, -1))[0]])
    print('True:      ', dataset.class_names[dataset.labels_test[index]])
    print(tabulate(tab))
    if write_file:
        with open('img/%s/%05d-instance.csv' % (dataset_name, index), 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerows([['feature', 'value']])
            writer.writerows(tab)
    return trg, label

In [2]:
from lime import explanation
from lime import lime_tabular

def lime_original(trg, pred_label):
    lime_explainer = lime_tabular.LimeTabularExplainer(
        dataset.train,
        feature_names=dataset.feature_names,
        class_names=dataset.class_names,
        discretize_continuous=False)
    lime_exp = lime_explainer.explain_instance(
        trg, rf.predict_proba, num_features=5, top_labels=1)
    # lime_exp.show_in_notebook(show_table=True, show_all=True)
    weights = [0] * len(dataset.feature_names)
    for t in lime_exp.local_exp[pred_label]:
        weights[t[0]] = t[1] * (pred_label * 2 - 1)
    newlime_utils.plot_weights(weights, dataset.feature_names)

In [3]:
from anchor import anchor_tabular

def anchor_original(trg, threshold=0.85):
    anchor_explainer = anchor_tabular.AnchorTabularExplainer(
        dataset.class_names,
        dataset.feature_names,
        dataset.train,
        dataset.categorical_names)
    anchor_exp = anchor_explainer.explain_instance(
        trg, rf.predict, threshold)

    print('Threshold:  %.2f' % threshold)
    # print('Prediction:', anchor_explainer.class_names[rf.predict(trg.reshape(1, -1))[0]])
    print('Anchor:     %s' % (' AND '.join(anchor_exp.names())))
    print('Precision:  %.2f' % anchor_exp.precision())
    print('Coverage:   %.2f' % anchor_exp.coverage())

In [4]:
import newlime_base
import newlime_tabular
import newlime_utils

def new_lime(trg, dataset, threshold, epsilon, beam_size, my_verbose):
    print("-----------")
    print("Threshold: ", threshold)
    anchor_explainer = newlime_tabular.NewLimeTabularExplainer(
        dataset.class_names,
        dataset.feature_names,
        dataset.train,
        dataset.categorical_names
    )
    hyper_param = newlime_tabular.HyperParam(
        delta = 0.05,
        epsilon = epsilon, 
        epsilon_stop = 0.05,
        beam_size = beam_size,
        batch_size = 100,
        desired_confidence = threshold,
        coverage_samples_num = 10000,
        max_rule_length = None,
    )
    anchor_exp, surrogate_model = anchor_explainer.my_explain_instance(
        trg, rf.predict, hyper_param)
    names = anchor_exp.names()
    multiline_names = []
    max_i = int(len(names) / 3)
    for i in range(max_i):
        triple = [names[i * 3], names[i * 3 + 1], names[i * 3 + 2]]
        multiline_names.append(' AND '.join(triple))
    if len(names) != max_i* 3:
        multiline_names.append(' AND '.join(names[max_i * 3:]))

    weights = list(surrogate_model['LogisticRegression'].weights.values())
    feat = dataset.feature_names
    rule = ' AND \n'.join(multiline_names)
    prec = anchor_exp.precision()
    cov = anchor_exp.coverage()
    print("Rule     : ", rule)
    print("Precision: ", prec)
    print("Coverage : ", cov)
    
    if weights == []:
        print("Error! Surrogate model has no weights!")
    else:
        newlime_utils.plot_weights(weights, feat, rule, prec, cov)

## Generating Explanations

In [5]:
dataset_folder = 'datasets/'
dataset_name = 'recidivism'
# dataset_name = 'adult'
# dataset_name = 'diabetes'
# dataset_name = 'default'
# dataset_name = 'lending'

# dataset = utils.load_dataset(
    # dataset_name, balance=True, dataset_folder=dataset_folder, discretize=True)

dataset = newlime_utils.load_dataset(dataset_name, dataset_folder)
print(dataset.class_names)

print(dataset.train.shape[0])
print(dataset.test.shape[0])
print(dataset.train.shape[0] + dataset.test.shape[0])

['No more crimes', 'Re-arrested']
5635
705
6340


In [6]:
import sklearn.ensemble

rf = sklearn.ensemble.RandomForestClassifier(n_estimators=50, n_jobs=5)
rf.fit(dataset.train, dataset.labels_train)

print('Train', sklearn.metrics.accuracy_score(
    dataset.labels_train, rf.predict(dataset.train)))
print('Test', sklearn.metrics.accuracy_score(
    dataset.labels_test, rf.predict(dataset.test)))

Train 0.935226264418811
Test 0.7531914893617021


In [7]:
import random

i = random.randint(10, dataset.test.shape[0])

# adult
# 730, 956, 1196, 1522
# i = 1443 # --- Capital Gain = 2
i = 0

# recidivism
# i = 266
# i = 599

trg, trg_label = sample(i, dataset, dataset_name, dataset_folder)

Prediction: Re-arrested
True:       Re-arrested
----------------------  -------------------------------
Race                    White (1)
Alcohol                 No (0)
Junky                   No (0)
Supervised Release      Yes (1)
Married                 No (0)
Felony                  Yes (1)
WorkRelease             Yes (1)
Crime against Property  Yes (1)
Crime against Person    No (0)
Gender                  Male (1)
Priors                  1
YearsSchool             8.00 < YearsSchool <= 10.00 (1)
PrisonViolations        0
Age                     Age <= 21.00 (0)
MonthsServed            4.00 < MonthsServed <= 9.00 (1)
Recidivism              Re-arrested (1)
----------------------  -------------------------------


In [8]:
import importlib
importlib.reload(newlime_base)
importlib.reload(newlime_tabular)
importlib.reload(newlime_utils)
write_file = False

i = random.randint(10, dataset.test.shape[0])
i = 0
trg, trg_label = sample(i, dataset, dataset_name, dataset_folder)

for t in [0.70, 0.80, 0.90, 0.95]:
    new_lime(trg, dataset, threshold=t, beam_size=1, epsilon=0.1, my_verbose=True)
    if write_file:
        img_file = 'img/%s/%05d-%03d.png' % (dataset_name, i, t * 100)
        plt.savefig(img_file, bbox_inches='tight')

Prediction: Re-arrested
True:       Re-arrested
----------------------  -------------------------------
Race                    White (1)
Alcohol                 No (0)
Junky                   No (0)
Supervised Release      Yes (1)
Married                 No (0)
Felony                  Yes (1)
WorkRelease             Yes (1)
Crime against Property  Yes (1)
Crime against Person    No (0)
Gender                  Male (1)
Priors                  1
YearsSchool             8.00 < YearsSchool <= 10.00 (1)
PrisonViolations        0
Age                     Age <= 21.00 (0)
MonthsServed            4.00 < MonthsServed <= 9.00 (1)
Recidivism              Re-arrested (1)
----------------------  -------------------------------
-----------
Threshold:  0.7
Rule     :  Felony = Yes AND Priors = 1
Precision:  0.7096897740329375
Coverage :  0.2041
-----------
Threshold:  0.8
Rule     :  Priors = 1 AND 4.00 < MonthsServed <= 9.00 AND PrisonViolations = 0 AND 
WorkRelease = Yes
Precision:  0.8396274343776

In [9]:
lime_original(trg, rf.predict([dataset.test[i]])[0])

write_file = False
if write_file:
    img_file = 'img/%s/%05d-LIME.png' % (dataset_name, i)
    plt.savefig(img_file, bbox_inches='tight')

In [10]:
i

0

In [11]:
anchor_original(trg, threshold=0.80)

Threshold:  0.80
Anchor:     Age <= 21.00 AND Felony = Yes AND Married = No AND 8.00 < YearsSchool <= 10.00 AND Crime against Property = Yes AND WorkRelease = Yes
Precision:  0.85
Coverage:   0.01
