## This is a note experimenting on the inconsistency issue of LIME and how BayLIME (with 3 options) improves on it -- Image datasets


In [None]:
import sys
import os
sys.path.append("..")# allow the notebook to find the parent folder
import keras
from keras.applications import inception_v3 as inc_net
from keras.preprocessing import image
from keras.applications.imagenet_utils import decode_predictions
from skimage.io import imread
import matplotlib.pyplot as plt
#%matplotlib inline
import numpy as np
from skimage.segmentation import mark_boundaries
#from lime.lime_image import *
from lime import lime_image
import csv
import math
from lime import calculate_posteriors
print('Notebook run using keras:', keras.__version__)

#Here we create a standard InceptionV3 pretrained model 
#and use it on images by first preprocessing them with the preprocessing tools
inet_model = inc_net.InceptionV3()


def transform_img_fn(path_list):
    out = []
    for img_path in path_list:
        img = image.load_img(img_path, target_size=(299, 299))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = inc_net.preprocess_input(x)
        out.append(x)
    return np.vstack(out)


images = transform_img_fn([os.path.join('data','5.jpg')])
# I'm dividing by 2 and adding 0.5 because of
# how this Inception represents images
plt.imshow(images[0] / 2 + 0.5)
plt.show()
preds = inet_model.predict(images)
for x in decode_predictions(preds)[0]:
    print(x)

In [None]:
explainer = lime_image.LimeImageExplainer(feature_selection='none')#kernel_width=0.1   feature_selection='none'

# Hide color is the color for a superpixel turned OFF. Alternatively, if it is NONE, the superpixel will be replaced by the average of its pixels
explanation = explainer.explain_instance(images[0], inet_model.predict,
                                         top_labels=1, hide_color=0, batch_size=10,
                                         num_samples=100,model_regressor='Bay_info_prior')#'non_Bay' 'Bay_non_info_prior' 'Bay_info_prior','BayesianRidge_inf_prior_fit_alpha'

# temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=True, num_features=2, hide_rest=True)
# plt.imshow(mark_boundaries(temp / 2 + 0.5, mask))
# plt.show()

# temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=True, num_features=5, hide_rest=False)
# plt.imshow(mark_boundaries(temp / 2 + 0.5, mask))
# plt.show()

temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=False, num_features=5, hide_rest=False)
plt.imshow(mark_boundaries(temp / 2 + 0.5, mask))
plt.show()
print("**")
print(explanation.as_list(explanation.top_labels[0]))

alpha_init=1
lambda_init=1
with open('./posterior_configure.csv') as csv_file:
    csv_reader=csv.reader(csv_file)
    line_count = 0
    for row in csv_reader:
        if line_count == 1:
            alpha_init=float(row[0])
            lambda_init=float(row[1])
        line_count=line_count+1

explanation=calculate_posteriors.get_posterior(explanation,'.\data\prior_knowledge_5_jpg.csv' ,hyper_para_alpha=alpha_init, hyper_para_lambda=lambda_init,
                                        label=explanation.top_labels[0])



temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=False, num_features=5, hide_rest=False)
plt.imshow(mark_boundaries(temp / 2 + 0.5, mask))
plt.show()

print("**")
print(explanation.as_list(explanation.top_labels[0]))


In [None]:
k=3#number of explanations 
m=63# number of features
i=1
instance=3
explanations=np.array([])
while i<=k:
    explainer = lime_image.LimeImageExplainer(feature_selection='none')#kernel_width=0.1   feature_selection='none'

    exp = explainer.explain_instance(images[0], inet_model.predict,
                                         top_labels=1, hide_color=0, batch_size=10,
                                         num_samples=100,model_regressor='Bay_info_prior')#'non_Bay' 'Bay_non_info_prior' 'Bay_info_prior','BayesianRidge_inf_prior_fit_alpha'

    temp_list=exp.as_list(explanation.top_labels[0])
    temp_array = np.array(temp_list)
    explanations=np.append(explanations,temp_array)
    i=i+1

In [None]:
exps=explanations.reshape(k,2*m)# k exps, 63 features for this instance.. 

In [None]:
for exp in exps:
    #print(exp)
    i=1
    temp_vector=np.array([])
    while i<=(2*m-1):
        temp_vector=np.append(temp_vector,float(exp[i]))
        i=i+2
    #print(temp_vector)
    normlised_temp_vector=temp_vector/np.linalg.norm(temp_vector)
    #print(normlised_temp_vector)
    i=1
    while i<=(2*m-1):
        exp[i]=normlised_temp_vector[math.floor(i/2)]
        i=i+2

In [None]:
feature_names=np.array([])
i=0
while i<=(2*m-1):
    feature_names=np.append(feature_names,exps[0,i])
    i=i+2
print(feature_names)

In [None]:
def rankings_in_k_exp (feature, k_exps):
    ranks=np.array([])
    for exp in k_exps:
        rank=math.ceil(exp.tolist().index(feature)/2)+1
        ranks=np.append(ranks,rank)
    return ranks

print(rankings_in_k_exp('7',exps))

def importance_in_k_exp (feature, k_exps):
    importance_s=np.array([])
    for exp in k_exps:
        importance=exp[exp.tolist().index(feature)+1]
        importance_s=np.append(importance_s,importance)
    return importance_s

print(importance_in_k_exp('7',exps))

In [None]:
g_i_s=np.array([])
f_i_s=np.array([])
for feature in feature_names:
    g_i=importance_in_k_exp(feature,exps)
    f_i=rankings_in_k_exp(feature,exps)
    g_i_s=np.append(g_i_s,g_i)
    f_i_s=np.append(f_i_s,f_i)
g_i_s=g_i_s.reshape(m,k)
f_i_s=f_i_s.reshape(m,k)
print(g_i_s)
print(f_i_s)

In [None]:
#now calculate the index of dispersion of ranks for each feature
IoD_f_i_s=np.array([])
for f_i in f_i_s:
    if np.mean(f_i)==0:
        IoD_f_i=0
        IoD_f_i_s=np.append(IoD_f_i_s,IoD_f_i)
    else:
        IoD_f_i=np.var(f_i)/np.mean(f_i)
        IoD_f_i_s=np.append(IoD_f_i_s,IoD_f_i)
print(IoD_f_i_s)

In [None]:
#now calculate the weighted importance for each feature
weights_g_i_s=np.array([])
for g_i in g_i_s:
    weight=np.mean(abs(g_i.astype(np.float)))
    weights_g_i_s=np.append(weights_g_i_s,weight)
weights_g_i_s=weights_g_i_s/sum(weights_g_i_s)
print(weights_g_i_s)

In [None]:
np.dot(weights_g_i_s,IoD_f_i_s)

In [None]:
def kendall_w(expt_ratings):
    if expt_ratings.ndim!=2:
        raise 'ratings matrix must be 2-dimensional'
    m = expt_ratings.shape[0] #raters
    n = expt_ratings.shape[1] # items rated
    denom = m**2*(n**3-n)
    rating_sums = np.sum(expt_ratings, axis=0)
    S = n*np.var(rating_sums)
    return 12*S/denom

In [None]:
ken_w=kendall_w(f_i_s.T)
print(ken_w)

## Now we have shown a toy example of calculating the inconsistency of k explanations for a given number of samples, while we want to see inconsistency as a function of the number of samples... Let us do it now...

In [None]:

n=10 #number of samples
inconsistency_non_info=np.array([])
ken_w_non_info=np.array([])
while n<=1000:


    k=20#number of explanations 
    m=63# number of features
    i=1
    explanations=np.array([])
    while i<=k:
        explainer = lime_image.LimeImageExplainer(feature_selection='none')#kernel_width=0.1   feature_selection='none'

        exp = explainer.explain_instance(images[0], inet_model.predict,
                                         top_labels=1, hide_color=0, batch_size=10,
                                         num_samples=n,model_regressor='Bay_non_info_prior')#'non_Bay' 'Bay_non_info_prior' 'Bay_info_prior','BayesianRidge_inf_prior_fit_alpha'

        temp_list=exp.as_list(explanation.top_labels[0])
        temp_array = np.array(temp_list)
        explanations=np.append(explanations,temp_array)
        i=i+1
        
    exps=explanations.reshape(k,2*m)# k exps, 13 features for this instance.. 
    for exp in exps:
    #print(exp)
        i=1
        temp_vector=np.array([])
        while i<=(2*m-1):
            temp_vector=np.append(temp_vector,float(exp[i]))
            i=i+2
    #print(temp_vector)
        normlised_temp_vector=temp_vector/np.linalg.norm(temp_vector)
    #print(normlised_temp_vector)
        i=1
        while i<=(2*m-1):
            exp[i]=normlised_temp_vector[math.floor(i/2)]
            i=i+2
    feature_names=np.array([])
    i=0
    while i<=(2*m-1):
        feature_names=np.append(feature_names,exps[0,i])
        i=i+2
    
    g_i_s=np.array([])
    f_i_s=np.array([])
    for feature in feature_names:
        g_i=importance_in_k_exp(feature,exps)
        f_i=rankings_in_k_exp(feature,exps)
        g_i_s=np.append(g_i_s,g_i)
        f_i_s=np.append(f_i_s,f_i)
    g_i_s=g_i_s.reshape(m,k)
    f_i_s=f_i_s.reshape(m,k)
    IoD_f_i_s=np.array([])
    for f_i in f_i_s:
        if np.mean(f_i)==0:
            IoD_f_i=0
            IoD_f_i_s=np.append(IoD_f_i_s,IoD_f_i)
        else:
            IoD_f_i=np.var(f_i)/np.mean(f_i)
            IoD_f_i_s=np.append(IoD_f_i_s,IoD_f_i)
    weights_g_i_s=np.array([])
    for g_i in g_i_s:
        weight=np.mean(abs(g_i.astype(np.float)))
        weights_g_i_s=np.append(weights_g_i_s,weight)
    weights_g_i_s=weights_g_i_s/sum(weights_g_i_s)
    
    inconsistency_non_info=np.append(inconsistency_non_info,np.dot(weights_g_i_s,IoD_f_i_s))
    
    ken_w_non_info=np.append(ken_w_non_info,kendall_w(f_i_s.T))
    
    print(n)
    n=n+50

In [None]:
n=10 #number of samples
inconsistency_non_Bay=np.array([])
ken_w_non_Bay=np.array([])
while n<=1000:


    k=20#number of explanations 
    m=63# number of features
    i=1
    explanations=np.array([])
    while i<=k:
        explainer = lime_image.LimeImageExplainer(feature_selection='none')#kernel_width=0.1   feature_selection='none'

        exp = explainer.explain_instance(images[0], inet_model.predict,
                                         top_labels=1, hide_color=0, batch_size=10,
                                         num_samples=n,model_regressor='non_Bay')#'non_Bay' 'Bay_non_info_prior' 'Bay_info_prior','BayesianRidge_inf_prior_fit_alpha'

        temp_list=exp.as_list(explanation.top_labels[0])
        temp_array = np.array(temp_list)
        explanations=np.append(explanations,temp_array)
        i=i+1
        
    exps=explanations.reshape(k,2*m)# k exps, 13 features for this instance.. 
    for exp in exps:
    #print(exp)
        i=1
        temp_vector=np.array([])
        while i<=(2*m-1):
            temp_vector=np.append(temp_vector,float(exp[i]))
            i=i+2
    #print(temp_vector)
        normlised_temp_vector=temp_vector/np.linalg.norm(temp_vector)
    #print(normlised_temp_vector)
        i=1
        while i<=(2*m-1):
            exp[i]=normlised_temp_vector[math.floor(i/2)]
            i=i+2
    feature_names=np.array([])
    i=0
    while i<=(2*m-1):
        feature_names=np.append(feature_names,exps[0,i])
        i=i+2
    
    g_i_s=np.array([])
    f_i_s=np.array([])
    for feature in feature_names:
        g_i=importance_in_k_exp(feature,exps)
        f_i=rankings_in_k_exp(feature,exps)
        g_i_s=np.append(g_i_s,g_i)
        f_i_s=np.append(f_i_s,f_i)
    g_i_s=g_i_s.reshape(m,k)
    f_i_s=f_i_s.reshape(m,k)
    IoD_f_i_s=np.array([])
    for f_i in f_i_s:
        if np.mean(f_i)==0:
            IoD_f_i=0
            IoD_f_i_s=np.append(IoD_f_i_s,IoD_f_i)
        else:
            IoD_f_i=np.var(f_i)/np.mean(f_i)
            IoD_f_i_s=np.append(IoD_f_i_s,IoD_f_i)
    weights_g_i_s=np.array([])
    for g_i in g_i_s:
        weight=np.mean(abs(g_i.astype(np.float)))
        weights_g_i_s=np.append(weights_g_i_s,weight)
    weights_g_i_s=weights_g_i_s/sum(weights_g_i_s)
    
    inconsistency_non_Bay=np.append(inconsistency_non_Bay,np.dot(weights_g_i_s,IoD_f_i_s))
    
    ken_w_non_Bay=np.append(ken_w_non_Bay,kendall_w(f_i_s.T))
    
    print(n)
    n=n+50

In [None]:
np.savetxt("./figures/inconsistency_image_raw_data/lime_incon.csv", inconsistency_non_Bay, delimiter=",")
np.savetxt("./figures/inconsistency_image_raw_data/lime_ken_w.csv", ken_w_non_Bay, delimiter=",")
np.savetxt("./figures/inconsistency_image_raw_data/BayLIME_non_info_incon.csv", inconsistency_non_info, delimiter=",")
np.savetxt("./figures/inconsistency_image_raw_data/BayLIME_non_info_ken_w.csv", ken_w_non_info, delimiter=",")

In [None]:
n=10 #number of samples
inconsistency_info_prior_fit_alpha=np.array([])
ken_w_info_prior_fit_alpha=np.array([])
while n<=1000:

    k=20#number of explanations 
    m=63# number of features
    i=1
    explanations=np.array([])
    while i<=k:
        explainer = lime_image.LimeImageExplainer(feature_selection='none')#kernel_width=0.1   feature_selection='none'

        exp = explainer.explain_instance(images[0], inet_model.predict,
                                         top_labels=1, hide_color=0, batch_size=10,
                                         num_samples=n,model_regressor='BayesianRidge_inf_prior_fit_alpha')#'non_Bay' 'Bay_non_info_prior' 'Bay_info_prior','BayesianRidge_inf_prior_fit_alpha'
        alpha_init=1
        lambda_init=1
        with open('.\posterior_configure.csv') as csv_file:
            csv_reader=csv.reader(csv_file)
            line_count = 0
            for row in csv_reader:
                if line_count == 1:
                    alpha_init=float(row[0])
                    lambda_init=float(row[1])
                line_count=line_count+1

        exp=calculate_posteriors.get_posterior(exp,'.\data\prior_knowledge_5_jpg.csv' ,hyper_para_alpha=alpha_init, hyper_para_lambda=lambda_init,
                                        label=explanation.top_labels[0])
        temp_list=exp.as_list(explanation.top_labels[0])
        
        temp_array = np.array(temp_list)
        explanations=np.append(explanations,temp_array)
        i=i+1
        
    exps=explanations.reshape(k,2*m)# k exps, 63 features for this instance.. 
    for exp in exps:
    #print(exp)
        i=1
        temp_vector=np.array([])
        while i<=(2*m-1):
            temp_vector=np.append(temp_vector,float(exp[i]))
            i=i+2
    #print(temp_vector)
        normlised_temp_vector=temp_vector/np.linalg.norm(temp_vector)
    #print(normlised_temp_vector)
        i=1
        while i<=(2*m-1):
            exp[i]=normlised_temp_vector[math.floor(i/2)]
            i=i+2
    feature_names=np.array([])
    i=0
    while i<=(2*m-1):
        feature_names=np.append(feature_names,exps[0,i])
        i=i+2
    
    g_i_s=np.array([])
    f_i_s=np.array([])
    for feature in feature_names:
        g_i=importance_in_k_exp(feature,exps)
        f_i=rankings_in_k_exp(feature,exps)
        g_i_s=np.append(g_i_s,g_i)
        f_i_s=np.append(f_i_s,f_i)
    g_i_s=g_i_s.reshape(m,k)
    f_i_s=f_i_s.reshape(m,k)
    IoD_f_i_s=np.array([])
    for f_i in f_i_s:
        if np.mean(f_i)==0:
            IoD_f_i=0
            IoD_f_i_s=np.append(IoD_f_i_s,IoD_f_i)
        else:
            IoD_f_i=np.var(f_i)/np.mean(f_i)
            IoD_f_i_s=np.append(IoD_f_i_s,IoD_f_i)
    weights_g_i_s=np.array([])
    for g_i in g_i_s:
        weight=np.mean(abs(g_i.astype(np.float)))
        weights_g_i_s=np.append(weights_g_i_s,weight)
    weights_g_i_s=weights_g_i_s/sum(weights_g_i_s)
    
    inconsistency_info_prior_fit_alpha=np.append(inconsistency_info_prior_fit_alpha,np.dot(weights_g_i_s,IoD_f_i_s))
    
    ken_w_info_prior_fit_alpha=np.append(ken_w_info_prior_fit_alpha,kendall_w(f_i_s.T))
    
    n=n+50

In [None]:
np.savetxt("./figures/inconsistency_image_raw_data/BayLIME_info_fit_alpha_lambda200_incon.csv", inconsistency_info_prior_fit_alpha, delimiter=",")
np.savetxt("./figures/inconsistency_image_raw_data/BayLIME_info_fit_alpha_lambda200_ken_w.csv", ken_w_info_prior_fit_alpha, delimiter=",")

In [None]:
n=10 #number of samples
inconsistency_full_info_prior=np.array([])
ken_w_full_info_prior=np.array([])
while n<=1000:

    k=20#number of explanations 
    m=63# number of features
    i=1
    explanations=np.array([])
    while i<=k:
        explainer = lime_image.LimeImageExplainer(feature_selection='none')#kernel_width=0.1   feature_selection='none'

        exp = explainer.explain_instance(images[0], inet_model.predict,
                                         top_labels=1, hide_color=0, batch_size=10,
                                         num_samples=n,model_regressor='Bay_info_prior')#'non_Bay' 'Bay_non_info_prior' 'Bay_info_prior','BayesianRidge_inf_prior_fit_alpha'
        alpha_init=1
        lambda_init=1
        with open('.\posterior_configure.csv') as csv_file:
            csv_reader=csv.reader(csv_file)
            line_count = 0
            for row in csv_reader:
                if line_count == 1:
                    alpha_init=float(row[0])
                    lambda_init=float(row[1])
                line_count=line_count+1

        exp=calculate_posteriors.get_posterior(exp,'.\data\prior_knowledge_5_jpg.csv' ,hyper_para_alpha=alpha_init, hyper_para_lambda=lambda_init,
                                        label=explanation.top_labels[0])
        temp_list=exp.as_list(explanation.top_labels[0])
        
        temp_array = np.array(temp_list)
        explanations=np.append(explanations,temp_array)
        i=i+1
        
    exps=explanations.reshape(k,2*m)# k exps, 63 features for this instance.. 
    for exp in exps:
    #print(exp)
        i=1
        temp_vector=np.array([])
        while i<=(2*m-1):
            temp_vector=np.append(temp_vector,float(exp[i]))
            i=i+2
    #print(temp_vector)
        normlised_temp_vector=temp_vector/np.linalg.norm(temp_vector)
    #print(normlised_temp_vector)
        i=1
        while i<=(2*m-1):
            exp[i]=normlised_temp_vector[math.floor(i/2)]
            i=i+2
    feature_names=np.array([])
    i=0
    while i<=(2*m-1):
        feature_names=np.append(feature_names,exps[0,i])
        i=i+2
    
    g_i_s=np.array([])
    f_i_s=np.array([])
    for feature in feature_names:
        g_i=importance_in_k_exp(feature,exps)
        f_i=rankings_in_k_exp(feature,exps)
        g_i_s=np.append(g_i_s,g_i)
        f_i_s=np.append(f_i_s,f_i)
    g_i_s=g_i_s.reshape(m,k)
    f_i_s=f_i_s.reshape(m,k)
    IoD_f_i_s=np.array([])
    for f_i in f_i_s:
        if np.mean(f_i)==0:
            IoD_f_i=0
            IoD_f_i_s=np.append(IoD_f_i_s,IoD_f_i)
        else:
            IoD_f_i=np.var(f_i)/np.mean(f_i)
            IoD_f_i_s=np.append(IoD_f_i_s,IoD_f_i)
    weights_g_i_s=np.array([])
    for g_i in g_i_s:
        weight=np.mean(abs(g_i.astype(np.float)))
        weights_g_i_s=np.append(weights_g_i_s,weight)
    weights_g_i_s=weights_g_i_s/sum(weights_g_i_s)
    
    inconsistency_full_info_prior=np.append(inconsistency_full_info_prior,np.dot(weights_g_i_s,IoD_f_i_s))
    
    ken_w_full_info_prior=np.append(ken_w_full_info_prior,kendall_w(f_i_s.T))
    
    n=n+50

In [None]:
np.savetxt("./figures/inconsistency_image_raw_data/BayLIME_full_info_alpha1_lambda200_incon.csv", inconsistency_full_info_prior, delimiter=",")
np.savetxt("./figures/inconsistency_image_raw_data/BayLIME_full_info_alpha1_lambda200_ken_w.csv", ken_w_full_info_prior, delimiter=",")

In [None]:
import matplotlib.pyplot as plt
x_index=np.array([])
i=10
while i<=1000:
    x_index=np.append(x_index,i)
    i=i+50
    

plt.plot(x_index,inconsistency_non_Bay,linestyle='-',color='red',label='LIME')
plt.plot(x_index,inconsistency_non_info,linestyle='-',color='green',label='BayLIME with non-informative priors')
plt.plot(x_index,inconsistency_info_prior_fit_alpha,linestyle='-',color='blue',label=r'BayLIME with partial informative priors ($\lambda=20$)')
plt.plot(x_index,inconsistency_full_info_prior,linestyle='-',color='orange',label=r'BayLIME with full informative priors ($\alpha=1,\lambda=20$)')

#plt.xscale('log')
plt.legend(loc='upper right',fontsize=10)#bbox_to_anchor=(1,1)
#plt.axis([10, 300, 0,10])# set the ranges of axis
plt.xlabel('$n$')
plt.ylabel('inconsistency')
plt.grid(True)

plt.show()

In [None]:
plt.plot(x_index,ken_w_non_Bay,linestyle='-',color='red',label='LIME')
plt.plot(x_index,ken_w_non_info,linestyle='-',color='green',label='BayLIME with non-informative priors')
plt.plot(x_index,ken_w_info_prior_fit_alpha,linestyle='-',color='blue',label=r'BayLIME with partial informative priors ($\lambda=20$)')
plt.plot(x_index,ken_w_full_info_prior,linestyle='-',color='orange',label=r'BayLIME with full informative priors ($\alpha=1,\lambda=20$)')

#plt.xscale('log')
plt.legend(fontsize=10)#bbox_to_anchor=(1,1) ,loc='upper right'
#plt.axis([10, 300, 0,10])# set the ranges of axis
plt.xlabel('$n$')
plt.ylabel('Kendall’s W')
plt.grid(True)

plt.show()