In [6]:
import pandas as pd 
import numpy as np
select_blackbox = ['dt', 'nn', 'rf'][1]

df = pd.read_csv("data/output/result.csv", header=None)
df.columns = ['accuracy', 'blackbox', 'dataset', 'explainer', 'explanation', 'explanation size', 'positive counterexamples', 'query', 'random words checked', 'syntactic grammar', 'terminate', 'time', 'time learner', 'time verifier', 'total counterexamples']
df = df[df['blackbox'] == select_blackbox]
print("minimum iterations:", min(df.groupby(['query'])['explanation'].count()), "max iterations:",max(df.groupby(['query'])['explanation'].count()))
df.head()

minimum iterations: 2 max iterations: 2


Unnamed: 0,accuracy,blackbox,dataset,explainer,explanation,explanation size,positive counterexamples,query,random words checked,syntactic grammar,terminate,time,time learner,time verifier,total counterexamples
0,0.545455,nn,zoo,sygus,(and (not fins) (and tail (< legs (/ 3 4)))),3.0,0.142857,"- threshold: 0.1\n- specific_input: [1.0, 0.0,...",3329.0,1.0,0.0,11.534405,9.81197,0.717693,21.0
1,1.0,nn,zoo,sygus,(and hair milk airborne toothed backbone brea...,16.0,0.333333,"- threshold: 0.1\n- specific_input: [1.0, 0.0,...",89.0,0.0,1.0,0.049068,0.022897,0.025799,3.0


In [7]:
# take median
group_list = ['dataset','explainer','query', 'syntactic grammar']
df_med = df.groupby(group_list).median()
df_med.reset_index(inplace=True)
df_med

Unnamed: 0,dataset,explainer,query,syntactic grammar,accuracy,explanation size,positive counterexamples,random words checked,terminate,time,time learner,time verifier,total counterexamples
0,zoo,sygus,"- threshold: 0.1\n- specific_input: [1.0, 0.0,...",0.0,1.0,16.0,0.333333,89.0,1.0,0.049068,0.022897,0.025799,3.0
1,zoo,sygus,"- threshold: 0.1\n- specific_input: [1.0, 0.0,...",1.0,0.545455,3.0,0.142857,3329.0,0.0,11.534405,9.81197,0.717693,21.0


In [8]:
# find representative explanation
query_explanations = {}
for key, item in df.groupby(['dataset','explainer','query', 'syntactic grammar'], as_index = False):
    item.reset_index(inplace=True, drop=True)
    if(len(item) == 1):
        query_explanations[key] = item['explanation'].item()
    else:
        # nearest explanation to the average accuracy
        explanation = item.iloc[item.index[(item['accuracy']-df_med['accuracy'][(df_med['dataset'] == key[0]) & (df_med['explainer'] == key[1]) & (df_med['syntactic grammar'] == key[3]) & (df_med['query'] == key[2])].item()).abs().argsort()][0]]['explanation']
        query_explanations[key] = explanation

In [9]:
df_output = df_med
df_output = df_output.drop("terminate", axis=1)
df_output['explanation'] = np.nan 
for key in query_explanations:
    df_output['explanation'][(df_output['dataset'] == key[0]) & (df_output['explainer'] == key[1]) & (df_output['query'] == key[2])] = query_explanations[key]

# reorganise columns
df_output = df_output[[ 'dataset',  'syntactic grammar', 'explainer','query', 'explanation', 'accuracy', 'time', 'time learner', 'time verifier', 'random words checked', 'total counterexamples', 'positive counterexamples']]
df_output = df_output.round(2)
df_output.head()

Unnamed: 0,dataset,syntactic grammar,explainer,query,explanation,accuracy,time,time learner,time verifier,random words checked,total counterexamples,positive counterexamples
0,zoo,0.0,sygus,"- threshold: 0.1\n- specific_input: [1.0, 0.0,...",(and (not fins) (and tail (< legs (/ 3 4)))),1.0,0.05,0.02,0.03,89.0,3.0,0.33
1,zoo,1.0,sygus,"- threshold: 0.1\n- specific_input: [1.0, 0.0,...",(and (not fins) (and tail (< legs (/ 3 4)))),0.55,11.53,9.81,0.72,3329.0,21.0,0.14


In [14]:
import matplotlib.pyplot as plt
import pickle
from IPython.display import Markdown, display
from trustable_explanation import helper_functions
import numpy as np


features = ['hair', 'feathers', 'eggs', 'milk', 'airborne', 'aquatic', 'predator', 'toothed', 'backbone', 'breathes', 'venomous', 'fins', 'legs', 'tail', 'domestic', 'catsize']
# print results in an explainable manner
for key, item in df_output.groupby(['dataset','query','explainer','syntactic grammar'], as_index = False):
    print("\n\n")
    display(Markdown("### Query"))
    for i in key:
        print(i)
    display(Markdown("### Result"))
    if(key[1] == "logistic regression"):
        feature_importance = np.fromstring(item['explanation'].item()[1:-1], dtype=np.float, sep=' ')
        feature_importance = 100.0 * (feature_importance / (abs(feature_importance).max()))
        sorted_idx = np.argsort(abs(feature_importance))
        pos = np.arange(sorted_idx.shape[0]) + .5
        featfig = plt.figure()
        featax = featfig.add_subplot(1, 1, 1)
        featax.barh(pos, feature_importance[sorted_idx], align='center')
        featax.set_yticks(pos)
        featax.set_yticklabels(np.array(features)[sorted_idx])
        featax.set_xlabel('Relative Feature Importance')
        plt.tight_layout()   
        plt.show()
    elif(key[1] == "decision tree"):
        dt = None
        with open(item['explanation'].item(), 'rb') as fid:
            dt = pickle.load(fid)
        print(helper_functions.tree_to_code(dt,features))
    else:
        print(item['explanation'].item())
    print("\n\naccuracy",item['accuracy'].item())







### Query

zoo
- threshold: 0.1
- specific_input: [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.25, 1.0, 0.0, 0.0]
- detailed_input: [('hair', 1.0), ('feathers', 0.0), ('eggs', 0.0), ('milk', 1.0), ('airborne', 1.0), ('aquatic', 0.0), ('predator', 0.0), ('toothed', 1.0), ('backbone', 1.0), ('breathes', 1.0), ('venomous', 0.0), ('fins', 0.0), ('legs', 0.25), ('tail', 1.0), ('domestic', 0.0), ('catsize', 0.0)]
sygus
0.0


### Result

(and (not fins) (and tail (< legs (/ 3 4))))


accuracy 1.0





### Query

zoo
- threshold: 0.1
- specific_input: [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.25, 1.0, 0.0, 0.0]
- detailed_input: [('hair', 1.0), ('feathers', 0.0), ('eggs', 0.0), ('milk', 1.0), ('airborne', 1.0), ('aquatic', 0.0), ('predator', 0.0), ('toothed', 1.0), ('backbone', 1.0), ('breathes', 1.0), ('venomous', 0.0), ('fins', 0.0), ('legs', 0.25), ('tail', 1.0), ('domestic', 0.0), ('catsize', 0.0)]
sygus
1.0


### Result

(and (not fins) (and tail (< legs (/ 3 4))))


accuracy 0.55


In [70]:
# to remote 
# df_output.to_csv("data/output/summary.csv", index = False)
df_output.to_csv("/home/bishwamittra/Dropbox/trustable_explanations/result.csv", index = False)