In [1]:
import json
import random
from transformers import AutoTokenizer
import numpy as np
from allennlp.common.util import import_module_and_submodules as import_submodules
from allennlp.models.archival import load_archive
from allennlp.predictors import Predictor

dataset="agnews"
news_labels = {'1': 'World/Politics', '2': 'Sports', '3': 'Business', '4': 'Science/Technology'}

model_name="roberta-large"
model_path=f"experiments/models/{dataset}/{model_name}"

In [3]:
archive = load_archive(model_path + '/model.tar.gz')
model = archive.model

predictor = Predictor.from_archive(archive, 'text_classifier')

tok = AutoTokenizer.from_pretrained("roberta-large")

In [10]:
text = "BlueGene sneaks past Earth Simulator: The Earth Simulator, an NEC supercomputer, " \
          "is surpassed, at last. IBM announced yesterday that its Blue Gene/L " \
          "supercomputer had achieved a sustained performance of 36."
label = '3'
out = predictor.predict(text)
prediction = out['label']

print('Text:', text)
print('Prediction:', news_labels[prediction])
print('Label:', news_labels[label])

Text: BlueGene sneaks past Earth Simulator: The Earth Simulator, an NEC supercomputer, is surpassed, at last. IBM announced yesterday that its Blue Gene/L supercomputer had achieved a sustained performance of 36.
Prediction: Science/Technology
Label: Business


In [11]:
# In the automatic procedure, we attempt to find the longest highlight that produces the foil. Let's do that.

import nltk
from nltk.tokenize.treebank import TreebankWordDetokenizer

tokenized_text = nltk.word_tokenize(text)
tok = TreebankWordDetokenizer()
        
contrast_highlight = None
for j in range(len(tokenized_text)-1, 0, -1):
    for i in range(len(tokenized_text)):
        
        masked_text = [token if i <= k <= i + j else '<mask>' for k, token in enumerate(tokenized_text)]
        masked_text = tok.detokenize(masked_text)
        
        out = predictor.predict(masked_text)
        if out['label'] == label:
            contrast_highlight = [i, i + j]
            break
    if contrast_highlight is not None:
        break

In [12]:
from IPython.display import display, Markdown

if contrast_highlight is None:
    print('No contrast highlight found. This means the model never predicts the foil for any highlight.')
else:
    print('Highlight found:')
    marked_text = tokenized_text[:contrast_highlight[0]] + ['<mark>'] \
        + tokenized_text[contrast_highlight[0]:contrast_highlight[1]+1] + ['</mark>'] \
        + tokenized_text[contrast_highlight[1]+1:]
    display(Markdown(tok.detokenize(marked_text)))

Highlight found:


BlueGene sneaks past Earth Simulator: The Earth Simulator, an NEC supercomputer <mark>, is surpassed, at last . IBM announced yesterday that its </mark> Blue Gene/L supercomputer had achieved a sustained performance of 36.

In [13]:
# Now let's find the shortest highlight deltas that flip the prediction back to the model's normal prediction.

def highlight_contains_span(highlight, span):
    if span[0] >= highlight[0] and span[1] <= highlight[1]:
        return True
    return False

contrast_text = [token if contrast_highlight[0] <= k <= contrast_highlight[1] else '<mask>' for k, token in enumerate(tokenized_text)]

delta_spans = []

for j in range(0, len(tokenized_text) - 1):
    for i in range(len(tokenized_text)):
        if contrast_highlight[0] <= i <= contrast_highlight[1] or contrast_highlight[0] <= i + j <= contrast_highlight[1]:
            continue
        if contrast_highlight[0] >= i and i + j >= contrast_highlight[1]:
            continue    
            
        masked_text = [tokenized_text[k] if i <= k <= i + j else token for k, token in enumerate(contrast_text)]
        masked_text = tok.detokenize(masked_text)
        
        out = predictor.predict(masked_text)
        if out['label'] == prediction:
            delta_spans.append([i, i + j])

    if len(delta_spans) >= 1:
        break


In [14]:
assert len(delta_spans) > 0, "We should be guaranteed to find a delta highlight (in the worst case, the entire text)"

print('Contrast highlight:')
marked_text = tokenized_text[:contrast_highlight[0]] + ['<mark>'] \
    + tokenized_text[contrast_highlight[0]:contrast_highlight[1]+1] + ['</mark>'] \
    + tokenized_text[contrast_highlight[1]+1:]
display(Markdown(tok.detokenize(marked_text)))

print('Delta highlights:')
for span in delta_spans:
    if span[0] < contrast_highlight[0]:
        marked_text_2 = tokenized_text[:span[0]] + ['<mark>'] \
            + tokenized_text[span[0]:span[1]+1] + ['</mark>'] \
            + marked_text[span[1]+1:]
    else:
        marked_text_2 = marked_text[:span[0]+2] + ['<mark>'] \
            + marked_text[span[0]+2:span[1]+3] + ['</mark>'] \
            + tokenized_text[span[1]+1:]
    display(Markdown(tok.detokenize(marked_text_2)))

Contrast highlight:


BlueGene sneaks past Earth Simulator: The Earth Simulator, an NEC supercomputer <mark>, is surpassed, at last . IBM announced yesterday that its </mark> Blue Gene/L supercomputer had achieved a sustained performance of 36.

Delta highlights:


<mark> BlueGene </mark> sneaks past Earth Simulator: The Earth Simulator, an NEC supercomputer <mark>, is surpassed, at last . IBM announced yesterday that its </mark> Blue Gene/L supercomputer had achieved a sustained performance of 36.

BlueGene sneaks past <mark> Earth </mark> Simulator: The Earth Simulator, an NEC supercomputer <mark>, is surpassed, at last . IBM announced yesterday that its </mark> Blue Gene/L supercomputer had achieved a sustained performance of 36.

BlueGene sneaks past Earth <mark> Simulator </mark>: The Earth Simulator, an NEC supercomputer <mark>, is surpassed, at last . IBM announced yesterday that its </mark> Blue Gene/L supercomputer had achieved a sustained performance of 36.

BlueGene sneaks past Earth Simulator: The <mark> Earth </mark> Simulator, an NEC supercomputer <mark>, is surpassed, at last . IBM announced yesterday that its </mark> Blue Gene/L supercomputer had achieved a sustained performance of 36.

BlueGene sneaks past Earth Simulator: The Earth <mark> Simulator </mark>, an NEC supercomputer <mark>, is surpassed, at last . IBM announced yesterday that its </mark> Blue Gene/L supercomputer had achieved a sustained performance of 36.

BlueGene sneaks past Earth Simulator: The Earth Simulator, an <mark> NEC </mark> supercomputer <mark>, is surpassed, at last . IBM announced yesterday that its </mark> Blue Gene/L supercomputer had achieved a sustained performance of 36.

BlueGene sneaks past Earth Simulator: The Earth Simulator, an NEC <mark> supercomputer </mark> <mark>, is surpassed, at last . IBM announced yesterday that its </mark> Blue Gene/L supercomputer had achieved a sustained performance of 36.

BlueGene sneaks past Earth Simulator: The Earth Simulator, an NEC supercomputer <mark>, is surpassed, at last . IBM announced yesterday that its </mark> <mark> Blue </mark> Gene/L supercomputer had achieved a sustained performance of 36.

BlueGene sneaks past Earth Simulator: The Earth Simulator, an NEC supercomputer <mark>, is surpassed, at last . IBM announced yesterday that its </mark> Blue <mark> Gene/L </mark> supercomputer had achieved a sustained performance of 36.