# Faithful and Robust Local Interpretability for Textual Predictions
## Example of FRED for text classification

In [1]:
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix
np.random.seed(42)

In [2]:
# Display the visualization
from IPython.core.display import display, HTML

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
# pip install -r 'requirements.txt'

In [5]:
# !python -m spacy download en_core_web_lg

In [6]:
from fred.explainer import Fred

In [7]:
from utils.setup import *

In [8]:
dataset_name = 'yelp'
model_name = 'forest_classifier'

# Set up the dataset
X_train, X_test, y_train, y_test, class_names = setup_dataset(dataset_name)

# Set up the model
model = setup_model(model_name)

model.train(X_train, y_train)

In [9]:
# Evaluate model performance
y_pred = model.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(f'Confusion matrix: \n {cm}')
accuracy = accuracy_score(y_test, y_pred)
print(f'accuracy: {accuracy}')

Confusion matrix: 
 [[105  23]
 [ 10 112]]
accuracy: 0.868


In [10]:
example = 'The food is great and location is amazing but the service is really awful'

print(f'Example: \'{example}\'\n')

prediction = model.predict_proba([example])
print(f'Classified as \'{class_names[prediction.argmax(1)[0]]}\'')

Example: 'The food is great and location is amazing but the service is really awful'

Classified as 'positive'


### Part-of-speech sampling scheme

In [11]:
# Initialize the explainer
np.random.shuffle(X_test)
fredpos_explainer = Fred(classifier_fn=model.predict_proba, class_names=class_names, pos=True, pos_dataset=X_test[:100])

In [12]:
# explain the example
fredpos_exp = fredpos_explainer.explain_instance(example, perturb_proba=0.1, n_sample=5000, verbose=True)


FRED mode: 'pos sampling'.
Example to explain: 
	'the food is great and location is amazing but the service is really awful'
Original prediction: 'positive'
Average confidence over the sample: 0.68

Explaining class 'positive':
The minimal subset of tokens that make the confidence drop by 15.0% if perturbed is 
	['great']

Saliency weights: 
	[('great', 0.126), ('amazing', 0.059), ('and', 0.059), ('but', 0.032), ('service', 0.013), ('location', 0.011), ('really', 0.006), ('is', 0.004), ('food', -0.003), ('is', -0.005)]


In [13]:
visualization = fredpos_exp.html_explanation()
display(visualization)

#### Counterfactual explanations

In [14]:
counter_sample, perturbed_tokens = fredpos_exp.counterfactual(counter_label=class_names[0], verbose=True, k=5)


Counterfactual explanation for the example
	'the food is great and location is amazing but the service is really awful'

FRED mode: 'pos sampling'.
Original prediction: 'positive'

Sample(s) with minimal perturbation predicted as 'negative':

['the food is mundane and location is least but the service is really awful'
 'the food is dirty so location is amazing but the service is really awful'
 'the food is hostile yet location is amazing but the service is really awful'
 'the food is mexican and location is neat but the service is really awful'
 'the food is longer or location is amazing but the service is really awful']

Perturbed tokens: 
	[['great', 'amazing'], ['great', 'and'], ['great', 'and'], ['great', 'amazing'], ['great', 'and']]


### MASK sampling scheme

In [15]:
# Initialize the explainer
fred_explainer = Fred(classifier_fn=model.predict_proba, class_names=class_names, pos=False)

In [16]:
# explain the example
fred_exp = fred_explainer.explain_instance(example, perturb_proba=0.2, verbose=True)


FRED mode: 'mask sampling'.
Example to explain: 
	'the food is great and location is amazing but the service is really awful'
Original prediction: 'positive'
Average confidence over the sample: 0.68

Explaining class 'positive':
The minimal subset of tokens that make the confidence drop by 15.0% if perturbed is 
	['great']

Saliency weights: 
	[('great', 0.127), ('amazing', 0.073), ('and', 0.072), ('but', 0.035), ('service', 0.03), ('location', 0.021), ('really', 0.02), ('is', 0.017), ('is', 0.016), ('is', 0.015)]


In [17]:
visualization = fred_exp.html_explanation()
display(visualization)

#### Counterfactual explanations

In [18]:
counter_sample, perturbed_tokens = fred_exp.counterfactual(counter_label=class_names[0], verbose=True, k=5)


Counterfactual explanation for the example
	'the food is great and location is amazing but the service is really awful'

FRED mode: 'mask sampling'.
Original prediction: 'positive'

Sample(s) with minimal perturbation predicted as 'negative':

['the food is UNK UNK location is amazing but the service is really awful'
 'the food is UNK UNK location is amazing but the service is really awful'
 'the food is UNK and location is UNK but the service is really awful'
 'the food is UNK UNK location is amazing but the service is really awful'
 'the food is UNK UNK location is amazing but the service is really awful']

Perturbed tokens: 
	[['great', 'and'], ['great', 'and'], ['great', 'amazing'], ['great', 'and'], ['great', 'and']]
