In [14]:
%load_ext autoreload
%autoreload 2
is_cuda = False

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# General setup and perturbation

In [None]:
# initiate a wrapper.
# model path is defaulted to our portable model:
# https://huggingface.co/uw-hai/polyjuice
# No need to change this unless you are using customized model
from polyjuice import Polyjuice
pj = Polyjuice(model_path="uw-hai/polyjuice", is_cuda=is_cuda)

In [16]:
# the base sentence
text = "It is great for kids."

# perturb the sentence with one line:
# When running it for the first time, the wrapper will automatically
# load related models, e.g. the generator and the perplexity filter.
perturbations = pj.perturb(text)
perturbations

INFO:polyjuice.polyjuice_wrapper:Setup Polyjuice.
Device set to use cpu
INFO:polyjuice.polyjuice_wrapper:Setup SpaCy processor.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=1000) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=1000) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=1000) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for m

['It is great to be.', "It's not for kids.", 'It is great for kids to watch.']

In [17]:
# To perturb with more controls,

perturbations = pj.perturb(
    orig_sent=text,
    # can specify where to put the blank. Otherwise, it's automatically selected.
    # Can be a list or a single sentence.
    blanked_sent=["It is [BLANK] for kids.", "It is great for [BLANK]."],
    # can also specify the ctrl code (a list or a single code.)
    # The code should be from 'resemantic', 'restructure', 'negation', 'insert', 'lexical', 'shuffle', 'quantifier', 'delete'.
    ctrl_code="negation",
    # Customzie perplexity score. 
    perplex_thred=20,
    # number of perturbations to return
    num_perturbations=3,
    # the function also takes in additional arguments for huggingface generators.
    num_beams=3
)
perturbations

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=1000) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=1000) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


['It is great for kids but not for anyone.',
 'It is great for kids but not for any adults.',
 'It is not great for kids.']

In [18]:
# detect the ctrl code from a given sentence pair
pj.detect_ctrl_code("it's great for kids.", 'It is great for kids but not for any adults.')

'negation'

# Select for diversity

Having each perturbation be represented by its token changes, control code, and dependency tree strcuture, we greedily select the ones that are least similar to those already selected. This tries to avoid redundancy in common perturbations such as black -> white.


In [19]:
# over-generate some examples

orig_text = "It is great for kids."
perturb_texts = pj.perturb(
    orig_sent=orig_text, perplex_thred=10, num_perturbations=None, num_beams=3)
orig_and_perturb_pairs = [(orig_text, perturb_text) for perturb_text in perturb_texts]
orig_and_perturb_pairs

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=1000) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=1000) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=1000) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Setting `pad_token_id` to `eos_

[('It is great for kids.', 'It is bad for kids.'),
 ('It is great for kids.', 'It is good for kids too.'),
 ('It is great for kids.', 'It is terrible for kids.'),
 ('It is great for kids.', 'is great for kids.'),
 ('It is great for kids.', 'It is great for any audience.'),
 ('It is great for kids.', 'It is not for kids.'),
 ('It is great for kids.', 'It is good for kids.'),
 ('It is great for kids.', 'It is great for adults.'),
 ('It is great for kids.', 'It is great for anyone.'),
 ('It is great for kids.', 'It is not great for kids.'),
 ('It is great for kids.', 'It is not good for kids.'),
 ('It is great for kids.', 'It is great for any child.'),
 ('It is great for kids.', 'It is great for kids but not for adults.'),
 ('It is great for kids.', 'It is bad for kids too.')]

In [20]:
sampled = pj.select_diverse_perturbations(
    orig_and_perturb_pairs=orig_and_perturb_pairs, nsamples=3)
sampled

TypeError: hypernyms() got an unexpected keyword argument 'recursive'

# Select surprising perturbations as counterfactual explanations

Because different models/explainers may have different forms of predictions/feature weight computation methods, Polyjuice selection expects all predictions and feature weights to be precomputed. Here, we give an example of Quora Question Pair Detection. 


In [None]:
# set a perturbation base
orig = (
    "How can I help a friend experiencing serious depression?",
    "How do I help a friend who is in depression?"
)
orig_label = 1

# we perturb the second question.

In [None]:
# get a model
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import torch
model_name = "textattack/bert-base-uncased-QQP"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
# sentiment analysis is a general name in Huggingface to load the pipeline for text classification tasks.
# set device=-1 if you don't have a gpu
pipe = pipeline(
    "sentiment-analysis", model=model, tokenizer=tokenizer, 
    framework="pt", device=0 if is_cuda else -1, return_all_scores=True)

NameError: name 'is_cuda' is not defined

In [None]:
# some wrapper for prediction
import numpy as np
def extract_predict_label(raw_pred):
    raw_pred = sorted(raw_pred, key=lambda r: -r["score"])
    if raw_pred:
        return raw_pred[0]["label"]
    return None
def predict(examples, predictor, batch_size=128):
    raw_preds, preds, distribution = [], [], []
    with torch.no_grad():
        for e in (range(0, len(examples), batch_size)):
            raw_preds.extend(predictor(examples[e:e+batch_size]))
    for raw_pred in raw_preds:
        raw_pred = raw_pred if type(raw_pred) == list else [raw_pred]
        for m in raw_pred:
            m["label"] = int(m["label"].split("_")[1])
    return raw_preds

p = predict([orig], predictor=pipe)[0]
(p, extract_predict_label(p))

([{'label': 0, 'score': 0.0179734043776989},
  {'label': 1, 'score': 0.9820265769958496}],
 1)

In [None]:
## collect some base perturbations
from polyjuice.generations import ALL_CTRL_CODES

# perturb the second question in orig.
perturb_idx = 1
perturb_texts = pj.perturb(
    orig[perturb_idx], 
    ctrl_code=ALL_CTRL_CODES, 
    num_perturbations=None, perplex_thred=10)

perturb_texts


['How do I help a suicidal girl?',
 'How do I help a friend who is suicidal?',
 'How do I get help a friend who is in depression?',
 'How do I help a friend who is in really bad health?',
 'How do I help a friend who is in deep trouble?',
 'How would I help a friend who is in depression?',
 'How do I help a friend who is in health?',
 'How do I help a friend?',
 'How do I help a suicidal student?',
 'How do I not help a friend who is in depression?',
 'How can I help a friend who is in depression?']

In [None]:
# To estimate feature importance, we set up shap explainer
# install shap
!pip install shap

Looking in indexes: https://pypi.org/simple, https://packagecloud.io/github/git-lfs/pypi/simple


In [None]:
import shap
import functools
from copy import deepcopy
# setup a prediction function for computing the shap feature importance

def wrap_perturbed_instances(perturb_texts, orig, perturb_idx=1):
    perturbs = []
    for a in perturb_texts:
        curr_example = deepcopy(list(orig))
        curr_example[perturb_idx] = a
        perturbs.append(tuple(curr_example))
    return perturbs

def predict_on_perturbs(perturb_texts, orig, predictor, perturb_idx=1):
    perturbs = wrap_perturbed_instances(perturb_texts, orig, perturb_idx)
    perturbs_preds = predict(perturbs, predictor=predictor)
    perturbs_pred_dicts = [{p["label"]: p["score"] for p in perturbs_pred} for perturbs_pred in perturbs_preds]
    orig_preds = predict([orig], predictor=predictor)
    orig_pred = extract_predict_label(orig_preds[0])
    # the return is probability of the originally predicted label
    return [pr_dict[orig_pred] for pr_dict in perturbs_pred_dicts]
def normalize_shap_importance(features, importances, is_use_abs=True):
    normalized_features = {}
    for idx, (f, v) in enumerate(zip(features, importances)):
        f = f.strip('Ġ')
        if not f.startswith("##"): 
            key, val = "", 0
        key += f.replace("#", "").strip()
        val += v
        if (idx == len(features)-1 or (not features[idx+1].startswith("##"))) and key != "":
            normalized_features[key] = abs(val) if is_use_abs else val
    return normalized_features
def explain_with_shap(orig, predictor=pipe, tokenzier=pipe.tokenizer, perturb_idx=1):
    predict_for_shap_func = functools.partial(
        predict_on_perturbs, orig=orig, predictor=predictor, perturb_idx=perturb_idx)
    shap_explainer = shap.Explainer(predict_for_shap_func, tokenizer) 
    exp = shap_explainer([str(orig[perturb_idx])])
    return normalize_shap_importance(exp.data[0], exp.values[0])

feature_importance_dict = explain_with_shap(orig)
feature_importance_dict

{'How': 0.052321239694720134,
 'do': 0.052321239694720134,
 'I': 0.05254904864705168,
 'help': 0.05254904864705168,
 'a': 0.03752649684611242,
 'friend': 0.03752649684611242,
 'who': 0.03752649684611242,
 'is': 0.03752649684611242,
 'in': 0.2708918958087452,
 'depression': 0.2708918958087452,
 '?': 0.07552210992434993}

In [None]:
# get the predictions for original and also new instances
orig_pred = predict([orig], predictor=pipe)[0]

perturb_instances = wrap_perturbed_instances(perturb_texts, orig, perturb_idx)
perturb_preds = predict(perturb_instances, predictor=pipe)

surprises = pj.select_surprise_explanations(
    orig_text=orig[perturb_idx], 
    perturb_texts=perturb_texts, 
    orig_pred=orig_pred, 
    perturb_preds=perturb_preds, 
    feature_importance_dict=feature_importance_dict)
surprises

[Munch({'case': 'Suprise flip', 'pred': 0, 'changed_features': ['help'], 'perturb_text': 'How do I not help a friend who is in depression?'}),
 Munch({'case': 'Suprise unflip', 'pred': 1, 'changed_features': ['depression'], 'perturb_text': 'How do I help a friend who is in really bad health?'})]