MIT License

Copyright (c) 2020 Marco Tulio Correia Ribeiro

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

Notebook modified from: https://github.com/marcotcr/checklist/blob/master/notebooks/Sentiment.ipynb

# SST CheckList Capabilities on Test Set

In this notebook, we generate the CheckList capabilities for robustness testing. Specifically, we generate test cases from scratch and augment instances from the SST-2 test set. 

## Import and Load

In [1]:
import checklist
from checklist.editor import Editor
from checklist.perturb import Perturb
from checklist.test_types import MFT, INV, DIR
from checklist.test_suite import TestSuite
from checklist.expect import Expect

In [None]:
import sys
import spacy
import json
import numpy as np
processor = spacy.load('en_core_web_sm')

from tqdm import tqdm

In [3]:
import json
import re
from checklist.perturb import process_ret

def change_names_sst2(doc, meta=False, n=10, seed=None, negative_names=False): 
    """
    
    Function that perturbs names present in the test set with names from the training set that were mostly only present in positive or negative
    instances to check for possible biases of the model. 
    
    Args: 
        negative_names (bool): Boolean indicating if instances should be perturbed only with names that occur in negative train instances.
    """
    if seed is not None:
            np.random.seed(seed)
    
    with open("entities_sst2_train.json", "r") as f:
        entities_sst2 = json.load(f)
        
    with open("entities_sst2_test.json", "r") as f: 
        entities_sst2_test = json.load(f)

    names = [k for k, v in entities_sst2.items() if np.isclose(v["mean"], 1.0) and np.isclose(v["std"], 0.0)]
    if negative_names: 
        names = [k for k, v in entities_sst2.items() if np.isclose(v["mean"], 0.0) and np.isclose(v["std"], 0.0)]
    ents = [x.text for x in doc.ents if np.all([a.ent_type_ == 'PERSON' for a in x])]
    ret = []
    ret_m = []
    for x in ents:
        if x not in entities_sst2_test.keys(): 
            continue
        to_use = np.random.choice(names, n)
        for y in to_use:
            ret.append(re.sub(r'\b%s\b' % re.escape(x), y, doc.text))
            ret_m.append((f, y))            
    return process_ret(ret, ret_m=ret_m, n=n, meta=meta)

In [73]:
def change_hollywood(doc, meta=False, seed=None):
    """
    
    Function that perturbs the name Hollywood (if present in the instance) with other movie industry names. 
    
    """
    if seed is not None:
        np.random.seed(seed)

    movie_industries = ["Bollywood", "Nollywood", "Cantonwood", "Chinawood", "Taiwood", "Hallyuwood", "Hogawood", "Tollywood", "Kollywood", "Tamalewood", "Aussiewood", "Peruliwood", "Ghollywood"]
    n = len(movie_industries)
    ret = []
    ret_m = []
    x = "Hollywood"
    if x in doc.text:
        sub_re = re.compile(r'\b%s\b' % re.escape(x))
        to_use = np.random.choice(movie_industries, n, replace=False)
        ret.extend([sub_re.sub(n, doc.text) for n in to_use])
        ret_m.extend([(x, n) for n in to_use])

    return process_ret(ret, ret_m=ret_m, n=n, meta=meta)

In [5]:
from generalize_checklist.utils import get_dataset
            
dataset = get_dataset("glue", "albert-large-v2", "sst2", split="test")

Reusing dataset glue (/Users/urjakhurana/.cache/huggingface/datasets/glue/sst2/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)


Since the test dataset from HuggingFace does not contain uppercasing, we extract the original sentences with correct uppercasing from the original GLUE SST-2 dataset (https://gluebenchmark.com/tasks) to detect and perturb names. 

In [6]:
with open("sst2_test_labels.json", "r") as f: 
    test_labels = json.load(f)

# Filter out sentences for which we cannot retrieve original test labels and yield positive and negative sentences in test set.
all_sentences = [d["sentence"] for d in dataset if d["sentence"] in test_labels]
labels = [test_labels[sent] for sent in all_sentences]
positive_sentences = set([sent for idx, sent in enumerate(all_sentences) if labels[idx] == 1])
negative_sentences = set([sent for idx, sent in enumerate(all_sentences) if labels[idx] == 0])

dictionary_path = "../../../../Downloads/SST-2/original/dictionary.txt"

with open(dictionary_path, "r") as f: 
    original_sentences = f.read().splitlines()
    
original_sentences = [h.split("|")[0] for h in original_sentences][1:]
lower_originals = [s.lower() for s in original_sentences]

# Create mapping between lower and uppercased sentences. 
matched = {}
matched_positive = {}
matched_negative = {}
sentence_to_label = {}
for i, new_sentence in tqdm(enumerate(all_sentences)): 
    if new_sentence in lower_originals: 
        idx = lower_originals.index(new_sentence)
        matched[new_sentence] = original_sentences[idx]
        if new_sentence in positive_sentences: 
            matched_positive[new_sentence] = original_sentences[idx]
        elif new_sentence in negative_sentences: 
            matched_negative[new_sentence] = original_sentences[idx]

1815it [00:01, 1295.45it/s]


In [8]:
assert len(all_sentences) == len(positive_sentences) + len(negative_sentences)
assert len(matched.keys()) == len(matched_positive.keys()) + len(matched_negative.keys())

In [9]:
sents = list(matched.values())
processed_qs = list(tqdm(processor.pipe(sents, batch_size=32)))
spacy_map = {q: processed_q for (q, processed_q) in zip(sents, processed_qs)}
parsed_qs = [spacy_map[q] for q in sents]

sents_pos = list(matched_positive.values())
processed_pos_qs = list(tqdm(processor.pipe(sents_pos, batch_size=32)))
spacy_map_pos = {q: processed_q for (q, processed_q) in zip(sents_pos, processed_pos_qs)}
parsed_qs_pos = [spacy_map_pos[q] for q in sents_pos]

sents_neg = list(matched_negative.values())
processed_neg_qs = list(tqdm(processor.pipe(sents_neg, batch_size=32)))
spacy_map_neg = {q: processed_q for (q, processed_q) in zip(sents_neg, processed_neg_qs)}
parsed_qs_neg = [spacy_map_neg[q] for q in sents_neg]

1795it [00:03, 452.61it/s]
893it [00:01, 551.54it/s]
902it [00:01, 558.97it/s]


In [10]:
suite = TestSuite()
editor = Editor()

## Capability: Vocabulary

### Synonyms and Antonyms

In [11]:
from nltk.corpus import wordnet

def get_antonyms_synonyms(word, antonym=False): 
    words = set([])
    for syn in wordnet.synsets(word): 
        for l in syn.lemmas():
            if antonym and l.antonyms(): 
                words.add(l.antonyms()[0].name().lower())
            elif not antonym: 
                words.add(l.name().lower())
    return list(words)

        

positive_words = ["nice", "beautiful", "good", "entertaining", "interesting"]
negative_words = ["bad", "horrible", "boring", "annoying"]

positive_synonyms = [word for pos_word in positive_words for word in get_antonyms_synonyms(pos_word)]
positive_antonyms = [word for pos_word in positive_words for word in get_antonyms_synonyms(pos_word, antonym=True)]
negative_synonyms = [word for neg_word in negative_words for word in get_antonyms_synonyms(neg_word)]
negative_antonyms = [word for neg_word in negative_words for word in get_antonyms_synonyms(neg_word, antonym=True)]

In [12]:
print(", ".join(positive_synonyms))
print("\n")
print(", ".join(positive_antonyms))
print("\n")
print(", ".join(negative_synonyms))
print("\n")
print(", ".join(negative_antonyms))

prissy, overnice, squeamish, gracious, decent, courteous, skillful, nice, dainty, beautiful, well, honest, goodness, in_force, skillful, salutary, upright, undecomposed, unspoiled, expert, trade_good, dear, respectable, practiced, near, serious, ripe, soundly, sound, proficient, safe, effective, thoroughly, in_effect, skilful, honorable, full, unspoilt, commodity, good, beneficial, right, just, secure, adept, dependable, estimable, entertaining, think_of, toy_with, nurse, think_about, hold, harbour, harbor, entertain, flirt_with, worry, matter_to, concern, interesting, interest, occupy


nasty, ugly, bad, badness, ill, evilness, evil, uninteresting, bore


big, risky, uncollectible, badly, sorry, bad, badness, unsound, forged, defective, tough, spoilt, speculative, spoiled, high-risk, regretful, unfit, frightful, horrible, horrifying, ugly, atrocious, drill, ho-hum, tire, slow, tedious, oil_production, drilling, dull, tiresome, deadening, boring, irksome, wearisome, bore, nark, irritat

In [13]:
positive_words += ["decent", "honest"]
negative_words += ["nasty", "ugly", "evil", "uninteresting", "tough", "horrifying", "irritating", "bothering", "tiresome"]

In [14]:
editor.add_lexicon("positive_words", positive_words)
editor.add_lexicon("negative_words", negative_words)

In [15]:
t = editor.template("This movie is {positive_words}.", labels=1, save=True, seed=0)
t += editor.template("This movie was so {positive_words} to watch.", labels=1, save=True, seed=0)
t += editor.template("This movie is so {negative_words}.", labels=0, save=True, seed=0)
t += editor.template("This movie was so {negative_words} to watch.", labels=0, save=True, seed=0)
t += editor.template("The acting in this movie was {positive_words}.", labels=1, save=True, seed=0)
t += editor.template("The acting in this movie was {negative_words}.", labels=0, save=True, seed=0)

ignore = ['This movie is beautiful.', 'This movie is good.']

new_data = []
new_labels = []
for sample, label in zip(t.data, t.labels): 
    if sample not in ignore: 
        new_data.append(sample)
        new_labels.append(label)
        
t.data = new_data
t.labels = new_labels

test = MFT(**t)
suite.add(test, "Movie sentiments", "Synonym/Antonym", "Use positive and negative words with their synonyms.")

In [16]:
rt_noun = ["movie", "director", "actor", "show", "scene"]
editor.add_lexicon("rt_noun", rt_noun)

In [17]:
print(', '.join(editor.suggest('It was {a:mask} {rt_noun}.')[:10]))

beautiful, great, good, wonderful, terrible, fantastic, bad, terrific, brilliant, amazing


In [18]:
pos_adj = ["beautiful", "great", "good", "wonderful", "fantastic", "brilliant", "amazing"]
neg_adj = ["terrible", "bad"]
neutral_adj = ["American", "British", "New", "Old"]
editor.add_lexicon('pos_adj', pos_adj, overwrite=True)
editor.add_lexicon('neg_adj', neg_adj, overwrite=True )
editor.add_lexicon('neutral_adj', neutral_adj, overwrite=True)

In [19]:
pos_verb_present = ['like', 'enjoy', 'appreciate', 'love',  'recommend', 'admire', 'value', 'welcome']
neg_verb_present = ['hate', 'dislike', 'regret',  'abhor', 'dread', 'despise' ]
neutral_verb_present = ['see', 'find']
pos_verb_past = ['liked', 'enjoyed', 'appreciated', 'loved', 'admired', 'valued', 'welcomed']
neg_verb_past = ['hated', 'disliked', 'regretted',  'abhorred', 'dreaded', 'despised']
neutral_verb_past = ['saw', 'found']
editor.add_lexicon('pos_verb_present', pos_verb_present, overwrite=True)
editor.add_lexicon('neg_verb_present', neg_verb_present, overwrite=True)
editor.add_lexicon('neutral_verb_present', neutral_verb_present, overwrite=True)
editor.add_lexicon('pos_verb_past', pos_verb_past, overwrite=True)
editor.add_lexicon('neg_verb_past', neg_verb_past, overwrite=True)
editor.add_lexicon('neutral_verb_past', neutral_verb_past, overwrite=True)
editor.add_lexicon('pos_verb', pos_verb_present+ pos_verb_past, overwrite=True)
editor.add_lexicon('neg_verb', neg_verb_present + neg_verb_past, overwrite=True)
editor.add_lexicon('neutral_verb', neutral_verb_present + neutral_verb_past, overwrite=True)

In [20]:
# Individual positive words
t = MFT(pos_adj + pos_verb_present + pos_verb_past, labels=1)
suite.add(t, 'Single positive words', 'Vocabulary', '')

In [21]:
# Individual negative words
t = MFT(neg_adj + neg_verb_present + neg_verb_past, labels=0)
suite.add(t, 'Single negative words', 'Vocabulary', '')

In [22]:
t = editor.template('{it} {rt_noun} {be} {pos_adj}.', it=['The', 'This', 'That'], be=['is', 'was'], labels=1, save=True, seed=0)
t += editor.template('{it} {be} {a:pos_adj} {rt_noun}.', it=['It', 'This', 'That'], be=['is', 'was'], labels=1, save=True, seed=0)
t += editor.template('{i} {pos_verb} {the} {rt_noun}.', i=['I', 'We'], the=['this', 'that', 'the'], labels=1, save=True, seed=0)
t += editor.template('{it} {rt_noun} {be} {neg_adj}.', it=['That', 'This', 'The'], be=['is', 'was'], labels=0, save=True, seed=0)
t += editor.template('{it} {be} {a:neg_adj} {rt_noun}.', it=['It', 'This', 'That'], be=['is', 'was'], labels=0, save=True, seed=0)
t += editor.template('{i} {neg_verb} {the} {rt_noun}.', i=['I', 'We'], the=['this', 'that', 'the'], labels=0, save=True, seed=0)

test = MFT(**t)
suite.add(test, 'Sentiment-laden words in context', 'Vocabulary', 'Use positive and negative verbs and adjectives')

### Add Phrases

In [123]:
positive = editor.template('I {pos_verb_present} it.').data
positive += editor.template('It is {pos_adj}.').data
positive += ['I would watch this again.']
negative = editor.template('I {neg_verb_present} it.').data
negative += editor.template('It is {neg_adj}.').data
negative += ['Never watching this again.']

def add_phrase_function(phrases):
    def pert(d):
        while d[-1].pos_ == 'PUNCT':
            d = d[:-1]
        d = d.text
        ret = [d + '. ' + x for x in phrases]
        idx = np.random.choice(len(ret), min(10, len(ret)), replace=False)
        ret = [ret[i] for i in idx]
        return ret
    return pert

In [124]:
def positive_change(orig_conf, conf):
    softmax = type(orig_conf) in [np.array, np.ndarray]
    return orig_conf[0] - conf[0] + conf[1] - orig_conf[1]

def diff_up(orig_pred, pred, orig_conf, conf, labels=None, meta=None):
    tolerance = 0.1
    change = positive_change(orig_conf, conf)
    if change + tolerance >= 0:
        return True
    else:
        return change + tolerance
    
def diff_down(orig_pred, pred, orig_conf, conf, labels=None, meta=None):
    tolerance = 0.1
    change = positive_change(orig_conf, conf)
    if change - tolerance <= 0:
        return True
    else:
        return -(change - tolerance)
    
goes_up = Expect.pairwise(diff_up)
goes_down = Expect.pairwise(diff_down)

In [125]:
t = Perturb.perturb(parsed_qs, add_phrase_function(positive), nsamples=500)
test = DIR(t.data, goes_up)
description = 'Add very positive phrases (e.g. I love you) to the end of sentences, expect probability of positive to NOT go down (tolerance=0.1)'
suite.add(test, 'add positive phrases', 'Vocabulary', description, overwrite=True)

In [127]:
t = Perturb.perturb(parsed_qs, add_phrase_function(negative), nsamples=500)
test = DIR(t.data, goes_down)
description = 'Add very negative phrases (e.g. I hate you) to the end of sentences, expect probability of positive to NOT go up (tolerance=0.1)'
suite.add(test, 'add negative phrases', 'Vocabulary', description, overwrite=True)

## Capability: Negation

In [27]:
t = editor.template('{it} {rt_noun} {nt} {pos_adj}.', it=['This', 'That', 'The'], nt=['is not', 'isn\'t'], save=True, seed=0)
t += editor.template('{it} {benot} {a:pos_adj} {rt_noun}.', it=['It', 'This', 'That'], benot=['is not',  'isn\'t', 'was not', 'wasn\'t'], save=True, seed=0)
neg = ['I can\'t say I', 'I don\'t', 'I would never say I', 'I don\'t think I', 'I didn\'t' ]
t += editor.template('{neg} {pos_verb_present} {the} {rt_noun}.', neg=neg, the=['this', 'that', 'the'], save=True, seed=0)
t += editor.template('No one {pos_verb_present}s {the} {rt_noun}.', neg=neg, the=['this', 'that', 'the'], save=True, seed=0)
test = MFT(t.data, labels=0, templates=t.templates)
suite.add(test, 'Simple negations: negative', 'Negation', 'Very simple negations of positive statements')

In [134]:
new_neg = neg[:-1]
neutral =['my history with movies', 'all that I\'ve seen over the years', 'it\'s a Friday', "that I bought it last week", "that we watched a lot"]
t = editor.template('{neg}, given {neutral}, that {it} {rt_noun} {be} {pos_adj}.', neutral=neutral, neg=['I don\'t think', 'I can\'t say', 'I wouldn\'t say'], it=['this', 'that', 'the'], be=['is', 'was'], save=True, seed=0)
t += editor.template('{neg}, given {neutral}, that {it} {be} {a:pos_adj} {rt_noun}.',neutral=neutral,  neg=['I don\'t think', 'I can\'t say', 'I wouldn\'t say'], it=['this', 'that', 'the'], be=['is', 'was'], save=True, seed=0)
t += editor.template('{neg}, given {neutral}, that {i} {pos_verb_present} {the} {rt_noun}.',neutral=neutral,  neg=['I don\'t think', 'I can\'t say', 'I wouldn\'t say'], i=['I', 'we'], the=['this', 'that', 'the'], save=True, seed=0)
t.data = list(np.random.choice(t.data, 500, replace=False))
test = MFT(t.data, labels=0, templates=t.templates)
suite.add(test, 'Hard: Negation of positive with neutral stuff in the middle (should be negative)', 'Negation', '', overwrite=True)

## Genre-Specific Sentiments

In [52]:
start = ["The", "This"]
be = ["is", "was"]
positive_horror = ["scary", "terrifying", "frightening"]
positive_romantic = ["wholesome", "feel-good", "charming"]
positive_comedy = ["funny", "light-hearted", "rib-tickling"]
positive_drama = ["serious", "moving"]

negative_horror = ["calming"]
negative_comedy = ["serious", "unamusing"]
negative_children = ["scary"]

start_be=start_be = ["This is", "It's", "That is"]
positive_feeling = ["liked", "enjoyed", "loved"]
genres = ["horror", "romantic", "comedy", "drama", "children"]
audience=["I", "we", "they", "everyone"]

t = editor.template("{start} horror movie {be} {positive_horror}", start=start, be=be, positive_horror=positive_horror, save=True, seed=0, labels=1)
t += editor.template("{start} romantic movie {be} {positive_romantic}", start=start, be=be, positive_romantic=positive_romantic, save=True, seed=0, labels=1)
t += editor.template("{start} comedy movie {be} {positive_comedy}", start=start, be=be, positive_comedy=positive_comedy, save=True, seed=0, labels=1)
t += editor.template("{start} drama movie {be} {positive_drama}", start=start, be=be, positive_drama=positive_drama, save=True, seed=0, labels=1)

t += editor.template("{start} horror movie {be} {negative_horror}", start=start, be=be, negative_horror=negative_horror, save=True, seed=0, labels=0)
t += editor.template("{start} comedy movie {be} {negative_comedy}", start=start, be=be, negative_comedy=negative_comedy, save=True, seed=0, labels=0)
t += editor.template("{start} children movie {be} {negative_children}", start=start, be=be, negative_children=negative_children, save=True, seed=0, labels=0)

t += editor.template("{start} comedy movie {be} scary rather than {positive_comedy}", start=start, be=be, positive_comedy=positive_comedy, save=True, seed=0, labels=0)
t += editor.template("{start} horror movie {be} laughable rather than {positive_horror}", start=start, be=be, positive_horror=positive_horror, save=True, seed=0, labels=0)
t += editor.template("{start} drama movie {be} funny rather than {positive_drama}", start=start, be=be, positive_drama=positive_drama, save=True, seed=0, labels=0)
t += editor.template("{start} romantic movie {be} cold rather than {positive_romantic}", start=start, be=be, positive_romantic=positive_romantic, save=True, seed=0, labels=0)

t += editor.template("{start} comedy movie {be} {negative_comedy}, not {positive_comedy}", start=start, be=be, negative_comedy=negative_comedy, positive_comedy=positive_comedy, save=True, seed=0, labels=0)
t += editor.template("{start} horror movie {be} laughable, not {positive_horror}", start=start, be=be, positive_horror=positive_horror, save=True, seed=0, labels=0)
t += editor.template("{start} drama movie {be} funny, not {positive_drama}", start=start, be=be, positive_drama=positive_drama, save=True, seed=0, labels=0)
t += editor.template("{start} romantic movie {be} cold, not {positive_romantic}", start=start, be=be, positive_romantic=positive_romantic, save=True, seed=0, labels=0)

t += editor.template("{start_be} a {positive_horror} movie but {audience} {positive_feeling} it", start_be=start_be, positive_feeling=positive_feeling, positive_horror=positive_horror, audience=audience, save=True, seed=0, labels=1)
t += editor.template("{start_be} a {positive_romantic} movie but {audience} {positive_feeling} it", start_be=start_be, positive_feeling=positive_feeling, positive_romantic=positive_romantic, audience=audience, save=True, seed=0, labels=1)
t += editor.template("{start_be} a {positive_comedy} movie but {audience} {positive_feeling} it", start_be=start_be, positive_feeling=positive_feeling, positive_comedy=positive_comedy, audience=audience, save=True, seed=0, labels=1)
t += editor.template("{start_be} a {positive_drama} movie but {audience} {positive_feeling} it", start_be=start_be, positive_feeling=positive_feeling, positive_drama=positive_drama, audience=audience, save=True, seed=0, labels=1)

t += editor.template("{start_be} a {genres} movie but {audience} actually {positive_feeling} it", start_be=start_be, genres=genres, audience=audience, positive_feeling=positive_feeling, save=True, seed=0, labels=1)

new_data = []
new_labels = []

for sample, label in zip(t.data, t.labels): 
    if sample not in new_data: 
        new_data.append(sample)
        new_labels.append(label)

t.data = new_data
t.labels = new_labels

test = MFT(**t)
suite.add(test, "Movie genre specific sentiments", 'Sentiment', "Sentiment sentences that are specific to movie genres.", overwrite=True)
print(t.data[0])
print(t.data[1])
print(t.data[2])

The horror movie is scary
The horror movie was scary
The horror movie is terrifying


## Capability: NER

In [55]:
t = Perturb.perturb(parsed_qs, Perturb.change_names, nsamples=500, seed=0)
test = INV(t.data)
suite.add(test, 'Change names', 'NER', 'Replace names with other common names')

print(t.data[0][0])
print(t.data[0][1])
print(t.data[0][2])

Director Kapur is a filmmaker with a real flair for epic landscapes and adventure , and this is a better film than his earlier English-language movie , the overpraised Elizabeth .
Director Kapur is a filmmaker with a real flair for epic landscapes and adventure , and this is a better film than his earlier English-language movie , the overpraised Alyssa .
Director Kapur is a filmmaker with a real flair for epic landscapes and adventure , and this is a better film than his earlier English-language movie , the overpraised Maria .


In [63]:
t = Perturb.perturb(parsed_qs_pos, change_names_sst2, nsamples=200, seed=0, negative_names=True)
test = INV(t.data)
suite.add(test, "Polarizing Negative Names - Positive Instances", "NER", "Replaces names with polarizing negative celebrity names in positive instances of the training set.", overwrite=True)

print(t.data[0][0])
print(t.data[0][1])
print(t.data[0][2])

De Niro and McDormand give solid performances , but their screen time is sabotaged by the story 's inability to create interest .
De Niro and Reginald Hudlin give solid performances , but their screen time is sabotaged by the story 's inability to create interest .
Einstein and McDormand give solid performances , but their screen time is sabotaged by the story 's inability to create interest .


In [65]:
t = Perturb.perturb(parsed_qs_neg, change_names_sst2, nsamples=200, seed=0, negative_names=False)
test = INV(t.data)
suite.add(test, "Polarizing Positive Names - Negative Instances", "NER", "Replaces names with polarizing positive celebrity names in negative instances of the training set.", overwrite=True)

print(t.data[0][0])
print(t.data[0][1])
print(t.data[0][2])

Feel bad for King , who 's honestly trying , and Schwartzman , who 's shot himself in the foot .
Feel bad for King , who 's honestly trying , and Smokey Robinson , who 's shot himself in the foot .
Feel bad for King , who 's honestly trying , and Craig Bartlett , who 's shot himself in the foot .


In [67]:
t = Perturb.perturb(parsed_qs_neg, change_names_sst2, nsamples=200, seed=0, negative_names=True)
test = INV(t.data)
suite.add(test, "Polarizing Negative Names - Negative Instances", "NER", "Replaces names with polarizing negative celebrity names in negative instances of the training set.", overwrite=True)

print(t.data[0][0])
print(t.data[0][1])
print(t.data[0][2])

Feel bad for King , who 's honestly trying , and Schwartzman , who 's shot himself in the foot .
Feel bad for King , who 's honestly trying , and Crispin Glover , who 's shot himself in the foot .
Feel bad for King , who 's honestly trying , and Yvan Attal , who 's shot himself in the foot .


In [68]:
t = Perturb.perturb(parsed_qs_pos, change_names_sst2, nsamples=200, seed=0, negative_names=False)
test = INV(t.data)
suite.add(test, "Polarizing Positive Names - Positive Instances", "NER", "Replaces names with polarizing positive celebrity names in positive instances of the training set.", overwrite=True)

print(t.data[0][0])
print(t.data[0][1])
print(t.data[0][2])

Often gruelling and heartbreaking to witness , but Seldahl and Wollter 's sterling performances raise this far above the level of the usual maudlin disease movie .
Often gruelling and heartbreaking to witness , but Seldahl and Smokey Robinson 's sterling performances raise this far above the level of the usual maudlin disease movie .
Often gruelling and heartbreaking to witness , but Seldahl and Craig Bartlett 's sterling performances raise this far above the level of the usual maudlin disease movie .


In [75]:
t = Perturb.perturb(parsed_qs, change_hollywood, nsamples=200, seed=0)
test = INV(t.data)
suite.add(test, 'Change Movie Industries', 'NER', 'Replace the movie industry of Hollywood with other industries in the world', overwrite=True)

print(t.data[0][0])
print(t.data[0][1])
print(t.data[0][2])

It 's one of the most honest films ever made about Hollywood .
It 's one of the most honest films ever made about Hogawood .
It 's one of the most honest films ever made about Peruliwood .


In [103]:
movie_industries = ["Hollywood", "Bollywood", "Nollywood", "Cantonwood", "Chinawood", "Taiwood", "Hallyuwood", "Hogawood", "Tollywood", "Kollywood", "Tamalewood", "Aussiewood"]

t = editor.template("{movie_industries} movies are {positive_words}", movie_industries=movie_industries, positive_words=positive_words, save=True, seed=0, labels=1)
t += editor.template("{movie_industries} movies are {negative_words}", movie_industries=movie_industries, negative_words=negative_words, save=True, seed=0, labels=0)
t += editor.template("{start} {movie_industries} movie {be} {positive_words}", start=start, movie_industries=movie_industries, be=be, positive_words=positive_words, save=True, seed=0, labels=1)
t += editor.template("{start} {movie_industries} movie {be} {negative_words}", start=start, movie_industries=movie_industries, be=be, negative_words=negative_words, save=True, seed=0, labels=0)

new_data = []
new_labels = []

for sample, label in zip(t.data, t.labels): 
    if sample not in new_data: 
        new_data.append(sample)
        new_labels.append(label)

t.data = new_data
t.labels = new_labels

test = MFT(**t)
suite.add(test, "Movie Industries specific sentiments", 'Sentiment', "Sentiment sentences about movie industries.", overwrite=True)
print(t.data[0])
print(t.data[1])
print(t.data[2])

Hollywood movies are nice
Hollywood movies are beautiful
Hollywood movies are good


In [112]:
neutral_words = set(
    ['.', 'the', 'The', ',', 'a', 'A', 'and', 'of', 'to', 'it', 'that', 'in',
     'this', 'for',  'you', 'there', 'or', 'an', 'by', 'about', 'movie', 'show' 'my',
     'in', 'of', 'have', 'with', 'was', 'at', 'it', 'get', 'from', 'this',
    ])
forbidden = set(['No', 'no', 'Not', 'not', 'Nothing', 'nothing', 'without', 'but'])
def change_neutral(d):
    examples = []
    subs = []
    words_in = [x for x in d.capitalize().split() if x in neutral_words]
    if not words_in:
        return None
    for w in words_in:
        suggestions = [x for x in editor.suggest_replace(d, w, beam_size=5, words_and_sentences=True) if x[0] not in forbidden]
        examples.extend([x[1] for x in suggestions])
        subs.extend(['%s -> %s' % (w, x[0]) for x in suggestions])
    if examples:
        idxs = np.random.choice(len(examples), min(len(examples), 10), replace=False)
        return list(set([examples[i] for i in idxs]))

In [115]:
t = Perturb.perturb(sents, change_neutral, nsamples=500)
test = INV(t.data)
description = 'Change a set of neutral words with other context-appropriate neutral words (using BERT).'
suite.add(test, 'change neutral words with BERT', 'Vocabulary', description, overwrite=True)

print(t.data[0][0])
print(t.data[0][1])
print(t.data[0][2])

It 's a decent glimpse into a time period , and an outcast , that is no longer accessible , but it does n't necessarily shed more light on its subject than the popular predecessor .
It 's a decent glimpse into a time period , considered an outcast , that is no longer accessible , but it does n't necessarily shed more light on its subject than the popular predecessor .
It 's a decent glimpse into a time period , and social outcast , that is no longer accessible , but it does n't necessarily shed more light on its subject than the popular predecessor .


## Capability: Temporality

In [118]:
change = ['but', 'even though', 'although', '']
t = editor.template(['I used to think this movie was {neg_adj}, {change} now I think it is {pos_adj}.',
                                 'I think this movie is {pos_adj}, {change} I used to think it was {neg_adj}.',
                                 'In the past I thought this movie was {neg_adj}, {change} now I think it is {pos_adj}.',
                                 'I think this movie is {pos_adj}, {change} in the past I thought it was {neg_adj}.',
                                ] ,
                                 change=change, unroll=True, save=True, labels=1, seed=0)
t += editor.template(['I used to {neg_verb_present} this movie, {change} now I {pos_verb_present} it.',
                                 'I {pos_verb_present} this movie, {change} I used to {neg_verb_present} it.',
                                 'In the past I would {neg_verb_present} this movie, {change} now I {pos_verb} it.',
                                 'I {pos_verb_present} this movie, {change} in the past I would {neg_verb_present} it.',
                                ] ,
                                change=change, unroll=True, save=True, labels=1, seed=0)

t += editor.template(['I used to think this movie was {pos_adj}, {change} now I think it is {neg_adj}.',
                                 'I think this movie is {neg_adj}, {change} I used to think it was {pos_adj}.',
                                 'In the past I thought this movie was {pos_adj}, {change} now I think it is {neg_adj}.',
                                 'I think this movie is {neg_adj}, {change} in the past I thought it was {pos_adj}.',
                                ] ,
                                 change=change, unroll=True, save=True, labels=0, seed=0)
t += editor.template(['I used to {pos_verb_present} this movie, {change} now I {neg_verb_present} it.',
                                 'I {neg_verb_present} this movie, {change} I used to {pos_verb_present} it.',
                                 'In the past I would {pos_verb_present} this movie, {change} now I {neg_verb_present} it.',
                                 'I {neg_verb_present} this movie, {change} in the past I would {pos_verb_present} it.',
                                ] ,
                                change=change, unroll=True, save=True, labels=0, seed=0)

new_data = []
new_labels = []

for sample, label in zip(t.data, t.labels): 
    if sample not in new_data: 
        new_data.append(sample)
        new_labels.append(label)

t.data = new_data
t.labels = new_labels
test = MFT(**t)
description = '''Have two conflicing statements, one about the past and one about the present.
Expect the present to carry the sentiment. Examples:
I used to love this movie, now I hate it -> should be negative
I love this movie, although I used to hate it -> should be positive
'''
suite.add(test, 'used to, but now', 'Temporal', description, overwrite=True)
len(t.data)

2152

In [130]:
for test in suite.tests: 
    print(suite.tests[test])

<checklist.test_types.MFT object at 0x7fec2de71d90>
<checklist.test_types.MFT object at 0x7fec2e1f3f50>
<checklist.test_types.MFT object at 0x7fec2e1c79d0>
<checklist.test_types.MFT object at 0x7fec2e1d6910>
<checklist.test_types.DIR object at 0x7fec2e1e3950>
<checklist.test_types.DIR object at 0x7fec2e853bd0>
<checklist.test_types.MFT object at 0x7fec2e1ec490>
<checklist.test_types.MFT object at 0x7fec2e1f3090>
<checklist.test_types.MFT object at 0x7fec2e408390>
<checklist.test_types.INV object at 0x7fec2e1fb2d0>
<checklist.test_types.INV object at 0x7fec2e1dd950>
<checklist.test_types.INV object at 0x7fec5da20cd0>
<checklist.test_types.INV object at 0x7fec2e40b150>
<checklist.test_types.INV object at 0x7fec2f8591d0>
<checklist.test_types.INV object at 0x7fec2e679950>
<checklist.test_types.MFT object at 0x7fec2e6819d0>
<checklist.test_types.INV object at 0x7fec2e850bd0>
<checklist.test_types.MFT object at 0x7fec2e74b410>


In [135]:
# suite.summary()
suite.save("testset_19_07_21.pkl")