# Get test data from checklist test
Base code retrieved from: https://github.com/sophiamyang/NLP_testing/blob/main/SQuAD-get-checklist-data.ipynb

This file is copied from `1. SQuAD-create-test-suite.ipynb`. I removed the irrelavant cells. 


In [1]:
from transformers import pipeline 

model = pipeline('question-answering', model="distilbert-base-cased-distilled-squad")

In [2]:
%load_ext autoreload
%autoreload 2

import checklist
import itertools

import checklist.editor
import checklist.text_generation
from checklist.expect import Expect
import numpy as np
from checklist.perturb import Perturb
import datasets
import pandas as pd
import json

In [3]:
# SAVE SQUAD DATASET TO CSV 

dataset = datasets.load_dataset('squad')
def format_dataset(example):
    """
    format answers from dict to json
    so that data looks consistent when exporting to csv
    """
    example['answers'] = json.dumps(example['answers'])
    return example

dataset = dataset.map(format_dataset)
dataset['train'].to_csv('train.csv', index=None)
df_train = pd.read_csv('train.csv')
df_train

Found cached dataset squad (C:/Users/fgmal/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453)


  0%|          | 0/2 [00:00<?, ?it/s]

Loading cached processed dataset at C:\Users\fgmal\.cache\huggingface\datasets\squad\plain_text\1.0.0\d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453\cache-6882b6599f6aef9b.arrow
Loading cached processed dataset at C:\Users\fgmal\.cache\huggingface\datasets\squad\plain_text\1.0.0\d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453\cache-ccb8f8f6fcce10c6.arrow


Creating CSV from Arrow format:   0%|          | 0/88 [00:00<?, ?ba/s]

Unnamed: 0,id,title,context,question,answers
0,5733be284776f41900661182,University_of_Notre_Dame,"Architecturally, the school has a Catholic cha...",To whom did the Virgin Mary allegedly appear i...,"{""text"": [""Saint Bernadette Soubirous""], ""answ..."
1,5733be284776f4190066117f,University_of_Notre_Dame,"Architecturally, the school has a Catholic cha...",What is in front of the Notre Dame Main Building?,"{""text"": [""a copper statue of Christ""], ""answe..."
2,5733be284776f41900661180,University_of_Notre_Dame,"Architecturally, the school has a Catholic cha...",The Basilica of the Sacred heart at Notre Dame...,"{""text"": [""the Main Building""], ""answer_start""..."
3,5733be284776f41900661181,University_of_Notre_Dame,"Architecturally, the school has a Catholic cha...",What is the Grotto at Notre Dame?,"{""text"": [""a Marian place of prayer and reflec..."
4,5733be284776f4190066117e,University_of_Notre_Dame,"Architecturally, the school has a Catholic cha...",What sits on top of the Main Building at Notre...,"{""text"": [""a golden statue of the Virgin Mary""..."
...,...,...,...,...,...
87594,5735d259012e2f140011a09d,Kathmandu,"Kathmandu Metropolitan City (KMC), in order to...",In what US state did Kathmandu first establish...,"{""text"": [""Oregon""], ""answer_start"": [229]}"
87595,5735d259012e2f140011a09e,Kathmandu,"Kathmandu Metropolitan City (KMC), in order to...",What was Yangon previously known as?,"{""text"": [""Rangoon""], ""answer_start"": [414]}"
87596,5735d259012e2f140011a09f,Kathmandu,"Kathmandu Metropolitan City (KMC), in order to...",With what Belorussian city does Kathmandu have...,"{""text"": [""Minsk""], ""answer_start"": [476]}"
87597,5735d259012e2f140011a0a0,Kathmandu,"Kathmandu Metropolitan City (KMC), in order to...",In what year did Kathmandu create its initial ...,"{""text"": [""1975""], ""answer_start"": [199]}"


In [5]:
import pandas as pd
def dataset_fmt(t, file_name):
    """
    format t to acceptable dataframe format 
    """
    df = pd.DataFrame([
        {'context':i[k][0], 'question':i[k][1], 'answers_text': j[k]}
        for i, j in zip(t.data, t.labels)
        for k in range(len(i))
    ])
    df['answer_start'] = df.apply(lambda row: row['context'].index(row['answers_text']), axis=1)
    df['answers'] = df.apply(lambda row: {'text': [row['answers_text']], 'answer_start': [row['answer_start']]}, axis=1)
    df = df.drop(columns=['answers_text', 'answer_start'])
    df['answers'] = df['answers'].apply(json.dumps)
    df = df.drop_duplicates().reset_index(drop=True)
    df.to_csv(f'new_data/{file_name}.csv', index=False)
    return df


def inv_dataset_fmt(t, file_name, df_train=df_train):
    """
    use original answers for invariance cases
    note that in many cases start position changes, that's why we find the correct index 
    """
    df = pd.DataFrame([{'context':i[0][0], 'question':i[0][1], 'context_modified':i[1][0], 'question_modified':i[1][1]} for i in t.data])
    df = (
        df
        .merge(df_train, on=['context','question'], how='inner')
        .drop(columns=['context', 'question', 'id', 'title'])
        .rename(columns={'context_modified': 'context', 'question_modified':'question'})
        .drop_duplicates(subset=['context','question'])
        .reset_index(drop=True)
    )
    # df = find_matching_index(df)
    df['answers'] = df['answers'].apply(json.dumps)
    df.to_csv(f'new_data/{file_name}.csv', index=False)
    return df

In [6]:
import re
def find_all_matching(string, sentence):
    matching_lst = []
    for m in re.finditer(re.escape(string), sentence): #re.escape deals with weird character issues
        matching_lst.append(m.start())
    return matching_lst

def find_matching_index(df):
    for index, row in df.iterrows(): 
        row['answers'] = json.loads(row['answers'])
        for i in range(len(row['answers']['text'])):
            original_index = row['answers']['answer_start'][i]
            all_matched_indexes = find_all_matching(row['answers']['text'][i], row['context'])
            if len(all_matched_indexes) ==0:
                pass
            elif original_index in all_matched_indexes:
                # if the original index is in the matched indexes, do nothing
                pass
            else:
                print(index)
                print(row)
                print('changed')
                # if not, choose the index that's the closest to the original index
                row['answers']['answer_start'][i] = min(all_matched_indexes, key=lambda x: abs(x - original_index))
                print(original_index)
                print(row['answers']['answer_start'][i])
    return df

In [7]:
editor = checklist.editor.Editor()
editor.tg

<checklist.text_generation.TextGenerator at 0x1e711234520>

## Vocabulary

In [9]:
adj = ['old', 'smart', 'tall', 'young', 'strong', 'short', 'tough', 'cool', 'fast', 'nice', 'small', 'dark', 'wise', 'rich', 'great', 'weak', 'high', 'slow', 'strange', 'clean']
adj = [(x.rstrip('e'), x) for x in adj]


In [11]:
t = editor.template(
    [(
    '{first_name} is {adj[0]}er than {first_name1}.',
    'Who is {adj[0]}er?'
    )
    ],
    labels = ['{first_name}'],
    adj=adj,
    remove_duplicates=True,
    nsamples=500,
    save=True
    )
name = 'A is COMP than B. Who is more COMP?'
# description = ''
# test = MFT(**t, name=name, description=description, capability='Vocabulary')
# suite.add(test)

In [12]:
dataset_fmt(t, "compare_more")

Unnamed: 0,context,question,answers
0,Amanda is cleaner than Jim.,Who is cleaner?,"{""text"": [""Amanda""], ""answer_start"": [0]}"
1,Lucy is richer than Anthony.,Who is richer?,"{""text"": [""Lucy""], ""answer_start"": [0]}"
2,Patricia is smarter than Donald.,Who is smarter?,"{""text"": [""Patricia""], ""answer_start"": [0]}"
3,Jessica is stronger than Francis.,Who is stronger?,"{""text"": [""Jessica""], ""answer_start"": [0]}"
4,Bob is cooler than Eleanor.,Who is cooler?,"{""text"": [""Bob""], ""answer_start"": [0]}"
...,...,...,...
493,Bobby is cleaner than Arthur.,Who is cleaner?,"{""text"": [""Bobby""], ""answer_start"": [0]}"
494,Michael is wiser than Joseph.,Who is wiser?,"{""text"": [""Michael""], ""answer_start"": [0]}"
495,Grace is stronger than Dick.,Who is stronger?,"{""text"": [""Grace""], ""answer_start"": [0]}"
496,Claire is richer than Caroline.,Who is richer?,"{""text"": [""Claire""], ""answer_start"": [0]}"


In [13]:
t = editor.template(
    [(
    '{first_name} is {adj[0]}er than {first_name1}.',
    'Who is less {adj[1]}?'
    )
    ],
    labels = ['{first_name1}'],
    adj=adj,
    remove_duplicates=True,
    nsamples=500,
    save=True
    )
name = 'A is COMP than B. Who is less COMP?'
# description = ''
# test = MFT(**t, name=name, description=description, capability='Vocabulary')
# suite.add(test)

In [14]:
dataset_fmt(t, "compare_less")

Unnamed: 0,context,question,answers
0,Joseph is older than George.,Who is less old?,"{""text"": [""George""], ""answer_start"": [21]}"
1,Frances is smaller than Evelyn.,Who is less small?,"{""text"": [""Evelyn""], ""answer_start"": [24]}"
2,Ashley is wiser than Donna.,Who is less wise?,"{""text"": [""Donna""], ""answer_start"": [21]}"
3,Larry is shorter than Caroline.,Who is less short?,"{""text"": [""Caroline""], ""answer_start"": [22]}"
4,Thomas is darker than Jimmy.,Who is less dark?,"{""text"": [""Jimmy""], ""answer_start"": [22]}"
...,...,...,...
489,Sam is greater than Charles.,Who is less great?,"{""text"": [""Charles""], ""answer_start"": [20]}"
490,Francis is higher than Julia.,Who is less high?,"{""text"": [""Julia""], ""answer_start"": [23]}"
491,Matthew is weaker than Nicole.,Who is less weak?,"{""text"": [""Nicole""], ""answer_start"": [23]}"
492,Don is cleaner than Kevin.,Who is less clean?,"{""text"": [""Kevin""], ""answer_start"": [20]}"


In [15]:
def crossproduct(t):
    # takes the output of editor.template and does the cross product of contexts and qas
    ret = []
    ret_labels = []
    for x in t.data:
        cs = x['contexts']
        qas = x['qas']
        d = list(itertools.product(cs, qas))
        ret.append([(x[0], x[1][0]) for x in d])
        ret_labels.append([x[1][1] for x in d])
    t.data = ret
    t.labels = ret_labels
    return t


In [16]:
state = editor.suggest('John is very {mask} about the project.')[:20]
print(', '.join(editor.suggest('John is {mask} {state} about the project.', state=state)[:30]))
very = ['very', 'extremely', 'really', 'quite', 'incredibly', 'particularly', 'highly', 'super']
somewhat = ['a little', 'somewhat', 'slightly', 'mildly']

very, pretty, extremely, also, still, quite, more, really, not, clearly, fairly, incredibly, particularly, now, understandably, rather, cautiously, surprisingly, certainly, feeling, so, especially, definitely, generally, most, highly, super, reportedly, being, obviously


In [17]:
t = crossproduct(editor.template(
    {
        'contexts': [
            '{first_name} is {very} {s} about the project. {first_name1} is {s} about the project.',
            '{first_name1} is {s} about the project. {first_name} is {very} {s} about the project.',
            '{first_name} is {s} about the project. {first_name1} is {somewhat} {s} about the project.',
            '{first_name1} is {somewhat} {s} about the project. {first_name} is {s} about the project.',
            '{first_name} is {very} {s} about the project. {first_name1} is {somewhat} {s} about the project.',
            '{first_name1} is {somewhat} {s} about the project. {first_name} is {very} {s} about the project.',
        ],
        'qas': [
            (
                'Who is most {s} about the project?',
                '{first_name}'
            ), 
            (
                'Who is least {s} about the project?',
                '{first_name1}'
            ), 
            
        ]
        
    },
    s = state,
    very=very,
    somewhat=somewhat,
    remove_duplicates=True,
    nsamples=500,
    save=True
    ))
name = 'Intensifiers (very, super, extremely) and reducers (somewhat, kinda, etc)?'
# desc = ''
# test = MFT(**t, name=name, description=desc, capability='Vocabulary')
# suite.add(test)


In [18]:
dataset_fmt(t, "intensifiers_reducers")

Unnamed: 0,context,question,answers
0,Ken is incredibly curious about the project. D...,Who is most curious about the project?,"{""text"": [""Ken""], ""answer_start"": [0]}"
1,Ken is incredibly curious about the project. D...,Who is least curious about the project?,"{""text"": [""Diana""], ""answer_start"": [45]}"
2,Diana is curious about the project. Ken is inc...,Who is most curious about the project?,"{""text"": [""Ken""], ""answer_start"": [36]}"
3,Diana is curious about the project. Ken is inc...,Who is least curious about the project?,"{""text"": [""Diana""], ""answer_start"": [0]}"
4,Ken is curious about the project. Diana is sli...,Who is most curious about the project?,"{""text"": [""Ken""], ""answer_start"": [0]}"
...,...,...,...
5959,Pamela is mildly hopeful about the project. Ad...,Who is least hopeful about the project?,"{""text"": [""Pamela""], ""answer_start"": [0]}"
5960,Adam is super hopeful about the project. Pamel...,Who is most hopeful about the project?,"{""text"": [""Adam""], ""answer_start"": [0]}"
5961,Adam is super hopeful about the project. Pamel...,Who is least hopeful about the project?,"{""text"": [""Pamela""], ""answer_start"": [41]}"
5962,Pamela is mildly hopeful about the project. Ad...,Who is most hopeful about the project?,"{""text"": [""Adam""], ""answer_start"": [44]}"


## Taxonomy

### Size, chape, color, age, material

In [19]:
import munch
order = ['size', 'shape', 'age', 'color']
props = []
properties = {
    'color' : ['red', 'blue','yellow', 'green', 'pink', 'white', 'black', 'orange', 'grey', 'purple', 'brown'],
    'size' : ['big', 'small', 'tiny', 'enormous'],
    'age' : ['old', 'new'],
    'shape' : ['round', 'oval', 'square', 'triangular'],
    'material' : ['iron', 'wooden', 'ceramic', 'glass', 'stone']
}
for i in range(len(order)):
    for j in range(i + 1, len(order)):
        p1, p2 = order[i], order[j]
        for v1, v2 in itertools.product(properties[p1], properties[p2]):
            props.append(munch.Munch({
                'p1': p1,
                'p2': p2,
                'v1': v1,
                'v2': v2,
            }))


In [20]:
print(', '.join(editor.suggest('There is {a:p.v1} {p.v2} {mask} in the room.', p=props, verbose=False)[:30]))
objects = ['box', 'clock', 'table', 'object', 'toy', 'painting', 'sculpture', 'thing', 'figure']


sofa, couch, wall, carpet, chair, table, light, lamp, door, clock, mirror, desk, bed, TV, bar, television, window, box, tree, painting, curtain, fan, fridge, screen, wallpaper, piano, rug, shelf, camera, candle


In [21]:
t = crossproduct(editor.template(
    {
        'contexts': [
            'There is {a:p.v1} {p.v2} {obj} in the room.',
            'There is {a:obj} in the room. The {obj} is {p.v1} and {p.v2}.',
        ],
        'qas': [
            (
                'What {p.p1} is the {obj}?',
                '{p.v1}'
            ), 
            (
                'What {p.p2} is the {obj}?',
                '{p.v2}'
            ), 
            
        ]
        
    },
    obj=objects,
    p=props,
    remove_duplicates=True,
    nsamples=500,
    save=True
    ))
name = 'size, shape, age, color'
desc = ''
# test = MFT(**t, name=name, description=desc, capability='Taxonomy')
# test.run(predconfs, n=100)
# test.summary(n=3, format_example_fn=format_squad_with_context)
# suite.add(test)

In [22]:
dataset_fmt(t, "size_shape_age_color")

Unnamed: 0,context,question,answers
0,There is an oval brown table in the room.,What shape is the table?,"{""text"": [""oval""], ""answer_start"": [12]}"
1,There is an oval brown table in the room.,What color is the table?,"{""text"": [""brown""], ""answer_start"": [17]}"
2,There is a table in the room. The table is ova...,What shape is the table?,"{""text"": [""oval""], ""answer_start"": [43]}"
3,There is a table in the room. The table is ova...,What color is the table?,"{""text"": [""brown""], ""answer_start"": [52]}"
4,There is a big oval toy in the room.,What size is the toy?,"{""text"": [""big""], ""answer_start"": [11]}"
...,...,...,...
1635,There is a box in the room. The box is enormou...,What color is the box?,"{""text"": [""blue""], ""answer_start"": [52]}"
1636,There is an enormous black thing in the room.,What size is the thing?,"{""text"": [""enormous""], ""answer_start"": [12]}"
1637,There is an enormous black thing in the room.,What color is the thing?,"{""text"": [""black""], ""answer_start"": [21]}"
1638,There is a thing in the room. The thing is eno...,What size is the thing?,"{""text"": [""enormous""], ""answer_start"": [43]}"


### Professions vs nationalities

In [23]:
professions = editor.suggest('{first_name} works as {a:mask}.')[:30]
professions += editor.suggest('{first_name} {last_name} works as {a:mask}.')[:30]
professions = list(set(professions))
if 'translator' in professions:
    professions.remove('translator')

In [24]:
def clean(string):
    return string.lstrip('[a,the,an,in,at] ').rstrip('.')

In [25]:
def expect_squad(x, pred, conf, label=None, meta=None):
    return clean(pred) == clean(label)
expect_squad = Expect.single(expect_squad)

In [26]:
t = crossproduct(editor.template(
    {
        'contexts': [
            '{first_name} is {a:nat} {prof}.',
            '{first_name} is {a:prof}. {first_name} is {nat}.',
            '{first_name} is {nat}. {first_name} is {a:prof}.',
            '{first_name} is {nat} and {a:prof}.',
            '{first_name} is {a:prof} and {nat}.',
        ],
        'qas': [
            (
                'What is {first_name}\'s job?',
                '{prof}'
            ), 
            (
                'What is {first_name}\'s nationality?',
                '{nat}'
            ), 
            
        ]
        
    },
    nat = editor.lexicons['nationality'][:10],
    prof=professions,
    remove_duplicates=True,
    nsamples=500,
    save=True,
    ))
name = 'Profession vs nationality'
# test = MFT(**t, name=name, expect=expect_squad, description='',  capability='Taxonomy')
# test.run(predconfs, n=100)
# test.summary(n=3, format_example_fn=format_squad_with_context)
# suite.add(test)

In [27]:
dataset_fmt(t, "profession_nationality")

Unnamed: 0,context,question,answers
0,Catherine is a Bangladeshi waitress.,What is Catherine's job?,"{""text"": [""waitress""], ""answer_start"": [27]}"
1,Catherine is a Bangladeshi waitress.,What is Catherine's nationality?,"{""text"": [""Bangladeshi""], ""answer_start"": [15]}"
2,Catherine is a waitress. Catherine is Banglade...,What is Catherine's job?,"{""text"": [""waitress""], ""answer_start"": [15]}"
3,Catherine is a waitress. Catherine is Banglade...,What is Catherine's nationality?,"{""text"": [""Bangladeshi""], ""answer_start"": [38]}"
4,Catherine is Bangladeshi. Catherine is a waitr...,What is Catherine's job?,"{""text"": [""waitress""], ""answer_start"": [41]}"
...,...,...,...
4985,Martha is Pakistani. Martha is an engineer.,What is Martha's nationality?,"{""text"": [""Pakistani""], ""answer_start"": [10]}"
4986,Martha is Pakistani and an engineer.,What is Martha's job?,"{""text"": [""engineer""], ""answer_start"": [27]}"
4987,Martha is Pakistani and an engineer.,What is Martha's nationality?,"{""text"": [""Pakistani""], ""answer_start"": [10]}"
4988,Martha is an engineer and Pakistani.,What is Martha's job?,"{""text"": [""engineer""], ""answer_start"": [13]}"


### Animal vs vehicle

In [28]:
animals = ['dog', 'cat', 'bull', 'cow', 'fish', 'serpent', 'snake', 'lizard', 'hamster', 'rabbit', 'guinea pig', 'iguana', 'duck']
vehicles = ['car', 'truck', 'train', 'motorcycle', 'bike', 'firetruck', 'tractor', 'van', 'SUV', 'minivan']
t = crossproduct(editor.template(
    {
        'contexts': [
            '{first_name} has {a:animal} and {a:vehicle}.',
            '{first_name} has {a:vehicle} and {a:animal}.',
        ],
        'qas': [
            (
                'What animal does {first_name} have?',
                '{animal}'
            ), 
            (
                'What vehicle does {first_name} have?',
                '{vehicle}'
            ), 
            
        ]
        
    },
    animal=animals,
    vehicle=vehicles,
    remove_duplicates=True,
    nsamples=500,
    save=True
    ))
name = 'Animal vs Vehicle'
# test = MFT(**t, name=name, description='', capability='Taxonomy', expect=expect_squad)
# test.run(predconfs, n=100)
# test.summary(n=3, format_example_fn=format_squad_with_context)
# suite.add(test, overwrite=True)


In [29]:
dataset_fmt(t, "animal_vehicle")

Unnamed: 0,context,question,answers
0,Jessica has a bull and a motorcycle.,What animal does Jessica have?,"{""text"": [""bull""], ""answer_start"": [14]}"
1,Jessica has a bull and a motorcycle.,What vehicle does Jessica have?,"{""text"": [""motorcycle""], ""answer_start"": [25]}"
2,Jessica has a motorcycle and a bull.,What animal does Jessica have?,"{""text"": [""bull""], ""answer_start"": [31]}"
3,Jessica has a motorcycle and a bull.,What vehicle does Jessica have?,"{""text"": [""motorcycle""], ""answer_start"": [14]}"
4,Jonathan has a bull and a minivan.,What animal does Jonathan have?,"{""text"": [""bull""], ""answer_start"": [15]}"
...,...,...,...
1971,Larry has a tractor and a lizard.,What vehicle does Larry have?,"{""text"": [""tractor""], ""answer_start"": [12]}"
1972,Thomas has an iguana and a tractor.,What animal does Thomas have?,"{""text"": [""iguana""], ""answer_start"": [14]}"
1973,Thomas has an iguana and a tractor.,What vehicle does Thomas have?,"{""text"": [""tractor""], ""answer_start"": [27]}"
1974,Thomas has a tractor and an iguana.,What animal does Thomas have?,"{""text"": [""iguana""], ""answer_start"": [28]}"


In [30]:
animals = ['dog', 'cat', 'bull', 'cow', 'fish', 'serpent', 'snake', 'lizard', 'hamster', 'rabbit', 'guinea pig', 'iguana', 'duck']
vehicles = ['car', 'truck', 'train', 'motorcycle', 'bike', 'firetruck', 'tractor', 'van', 'SUV', 'minivan']
t = crossproduct(editor.template(
    {
        'contexts': [
            '{first_name} bought {a:animal}. {first_name2} bought {a:vehicle}.',
            '{first_name2} bought {a:vehicle}. {first_name} bought {a:animal}.',
        ],
        'qas': [
            (
                'Who bought an animal?',
                '{first_name}'
            ), 
            (
                'Who bought a vehicle?',
                '{first_name2}'
            ), 
            
        ]
        
    },
    animal=animals,
    vehicle=vehicles,
    remove_duplicates=True,
    nsamples=500,
    save=True
    ))
name = 'Animal vs Vehicle v2'
# test = MFT(**t, name=name, description='', capability='Taxonomy', expect=expect_squad)
# test.run(predconfs, n=100)
# test.summary(n=3, format_example_fn=format_squad_with_context)
# suite.add(test, overwrite=True)

In [31]:
dataset_fmt(t, "animal_vehicle2")

Unnamed: 0,context,question,answers
0,Cynthia bought a lizard. Pamela bought a truck.,Who bought an animal?,"{""text"": [""Cynthia""], ""answer_start"": [0]}"
1,Cynthia bought a lizard. Pamela bought a truck.,Who bought a vehicle?,"{""text"": [""Pamela""], ""answer_start"": [25]}"
2,Pamela bought a truck. Cynthia bought a lizard.,Who bought an animal?,"{""text"": [""Cynthia""], ""answer_start"": [23]}"
3,Pamela bought a truck. Cynthia bought a lizard.,Who bought a vehicle?,"{""text"": [""Pamela""], ""answer_start"": [0]}"
4,David bought a dog. Linda bought a truck.,Who bought an animal?,"{""text"": [""David""], ""answer_start"": [0]}"
...,...,...,...
1987,Julie bought a firetruck. Grace bought a rabbit.,Who bought a vehicle?,"{""text"": [""Julie""], ""answer_start"": [0]}"
1988,Elizabeth bought a snake. Ruth bought a SUV.,Who bought an animal?,"{""text"": [""Elizabeth""], ""answer_start"": [0]}"
1989,Elizabeth bought a snake. Ruth bought a SUV.,Who bought a vehicle?,"{""text"": [""Ruth""], ""answer_start"": [26]}"
1990,Ruth bought a SUV. Elizabeth bought a snake.,Who bought an animal?,"{""text"": [""Elizabeth""], ""answer_start"": [19]}"


In [32]:
synonyms = [ ('spiritual', 'religious'), ('angry', 'furious'), ('organized', 'organised'),
            ('vocal', 'outspoken'), ('grateful', 'thankful'), ('intelligent', 'smart'),
            ('humble', 'modest'), ('courageous', 'brave'), ('happy', 'joyful'), ('scared', 'frightened'),
           ]

t = crossproduct(editor.template(
    {
        'contexts': [
            '{first_name} is very {s1[0]}. {first_name2} is very {s2[0]}.',
            '{first_name2} is very {s2[0]}. {first_name} is very {s1[0]}.',
        ],
        'qas': [
            (
                'Who is {s1[1]}?',
                '{first_name}'
            ), 
            (
                'Who is {s2[1]}?',
                '{first_name2}'
            ), 
            
        ]
        
    },
    s=synonyms,
    remove_duplicates=True,
    nsamples=250,
    save=True
   ))
t += crossproduct(editor.template(
    {
        'contexts': [
            '{first_name} is very {s1[1]}. {first_name2} is very {s2[1]}.',
            '{first_name2} is very {s2[1]}. {first_name} is very {s1[1]}.',
        ],
        'qas': [
            (
                'Who is {s1[0]}?',
                '{first_name}'
            ), 
            (
                'Who is {s2[0]}?',
                '{first_name2}'
            ), 
            
        ]
        
    },
    s=synonyms,
    remove_duplicates=True,
    nsamples=250,
    save=True
    )) 
name = 'Synonyms'
# test = MFT(**t, name=name, description='', capability='Taxonomy', expect=expect_squad)
# test.run(predconfs, n=100)
# test.summary(n=3, format_example_fn=format_squad_with_context)
# suite.add(test)

In [33]:
dataset_fmt(t, "synonyms")

Unnamed: 0,context,question,answers
0,Sam is very angry. Kevin is very vocal.,Who is furious?,"{""text"": [""Sam""], ""answer_start"": [0]}"
1,Sam is very angry. Kevin is very vocal.,Who is outspoken?,"{""text"": [""Kevin""], ""answer_start"": [19]}"
2,Kevin is very vocal. Sam is very angry.,Who is furious?,"{""text"": [""Sam""], ""answer_start"": [21]}"
3,Kevin is very vocal. Sam is very angry.,Who is outspoken?,"{""text"": [""Kevin""], ""answer_start"": [0]}"
4,Kevin is very spiritual. Sam is very intelligent.,Who is religious?,"{""text"": [""Kevin""], ""answer_start"": [0]}"
...,...,...,...
1847,Michael is very thankful. Betty is very fright...,Who is grateful?,"{""text"": [""Michael""], ""answer_start"": [0]}"
1848,Emma is very brave. Heather is very thankful.,Who is courageous?,"{""text"": [""Emma""], ""answer_start"": [0]}"
1849,Emma is very brave. Heather is very thankful.,Who is grateful?,"{""text"": [""Heather""], ""answer_start"": [20]}"
1850,Heather is very thankful. Emma is very brave.,Who is courageous?,"{""text"": [""Emma""], ""answer_start"": [26]}"


In [34]:
comp_pairs = [('better', 'worse'), ('older', 'younger'), ('smarter', 'dumber'), ('taller', 'shorter'), ('bigger', 'smaller'), ('stronger', 'weaker'), ('faster', 'slower'), ('darker', 'lighter'), ('richer', 'poorer'), ('happier', 'sadder'), ('louder', 'quieter'), ('warmer', 'colder')]
comp_pairs = list(set(comp_pairs))#list(set(comp_pairs + [(x[1], x[0]) for x in comp_pairs]))

In [35]:
t = crossproduct(editor.template(
    {
        'contexts': [
            '{first_name} is {comp[0]} than {first_name1}.',
            '{first_name1} is {comp[1]} than {first_name}.',
        ],
        'qas': [
            (
                'Who is {comp[1]}?',
                '{first_name1}',
            ),
            (
                'Who is {comp[0]}?',
                '{first_name}',
            )
            
        ]
        ,
    },
    comp=comp_pairs,
    remove_duplicates=True,
    nsamples=500,
    save=True
    ))
name = 'A is COMP than B. Who is antonym(COMP)? B'
# test = MFT(**t, name=name, description='', capability='Taxonomy')
# test.run(predconfs, n=100)
# test.summary(n=3, format_example_fn=format_squad_with_context)
# suite.add(test)

In [36]:
dataset_fmt(t, "compare_antonym")

Unnamed: 0,context,question,answers
0,Frederick is better than Dorothy.,Who is worse?,"{""text"": [""Dorothy""], ""answer_start"": [25]}"
1,Frederick is better than Dorothy.,Who is better?,"{""text"": [""Frederick""], ""answer_start"": [0]}"
2,Dorothy is worse than Frederick.,Who is worse?,"{""text"": [""Dorothy""], ""answer_start"": [0]}"
3,Dorothy is worse than Frederick.,Who is better?,"{""text"": [""Frederick""], ""answer_start"": [22]}"
4,Sally is faster than Tom.,Who is slower?,"{""text"": [""Tom""], ""answer_start"": [21]}"
...,...,...,...
1971,Judy is younger than Cynthia.,Who is older?,"{""text"": [""Cynthia""], ""answer_start"": [21]}"
1972,Joseph is stronger than Anna.,Who is weaker?,"{""text"": [""Anna""], ""answer_start"": [24]}"
1973,Joseph is stronger than Anna.,Who is stronger?,"{""text"": [""Joseph""], ""answer_start"": [0]}"
1974,Anna is weaker than Joseph.,Who is weaker?,"{""text"": [""Anna""], ""answer_start"": [0]}"


In [37]:
antonym_adjs = [('progressive', 'conservative'),('religious', 'secular'),('positive', 'negative'),('defensive', 'offensive'),('rude',  'polite'),('optimistic', 'pessimistic'),('stupid', 'smart'),('negative', 'positive'),('unhappy', 'happy'),('active', 'passive'),('impatient', 'patient'),('powerless', 'powerful'),('visible', 'invisible'),('fat', 'thin'),('bad', 'good'),('cautious', 'brave'), ('hopeful', 'hopeless'),('insecure', 'secure'),('humble', 'proud'),('passive', 'active'),('dependent', 'independent'),('pessimistic', 'optimistic'),('irresponsible', 'responsible'),('courageous', 'fearful')]
t = crossproduct(editor.template(
    {
        'contexts': [
            '{first_name} is more {a[0]} than {first_name1}.',
            '{first_name1} is more {a[1]} than {first_name}.',
            '{first_name} is less {a[1]} than {first_name1}.',
            '{first_name1} is less {a[0]} than {first_name}.',
        ],
        'qas': [
            (
                'Who is more {a[0]}?',
                '{first_name}',
            ),
            (
                'Who is less {a[0]}?',
                '{first_name1}',
            ),
            (
                'Who is more {a[1]}?',
                '{first_name1}',
            ),
            (
                'Who is less {a[1]}?',
                '{first_name}',
            ),
        ]
        ,
    },
    a = antonym_adjs,
    remove_duplicates=True,
    nsamples=500,
    save=True
    ))
name = 'A is more X than B. Who is more antonym(X)? B. Who is less X? B. Who is more X? A. Who is less antonym(X)? A.'
# test = MFT(**t, name=name, description='', capability='Taxonomy')
# test.run(predconfs, n=100)
# test.summary(n=3, format_example_fn=format_squad_with_context)
# suite.add(test)

In [38]:
dataset_fmt(t, "compare_moreless_antonym")

Unnamed: 0,context,question,answers
0,Stephen is more courageous than Leslie.,Who is more courageous?,"{""text"": [""Stephen""], ""answer_start"": [0]}"
1,Stephen is more courageous than Leslie.,Who is less courageous?,"{""text"": [""Leslie""], ""answer_start"": [32]}"
2,Stephen is more courageous than Leslie.,Who is more fearful?,"{""text"": [""Leslie""], ""answer_start"": [32]}"
3,Stephen is more courageous than Leslie.,Who is less fearful?,"{""text"": [""Stephen""], ""answer_start"": [0]}"
4,Leslie is more fearful than Stephen.,Who is more courageous?,"{""text"": [""Stephen""], ""answer_start"": [28]}"
...,...,...,...
7979,Charlie is less secure than Fred.,Who is less secure?,"{""text"": [""Charlie""], ""answer_start"": [0]}"
7980,Fred is less insecure than Charlie.,Who is more insecure?,"{""text"": [""Charlie""], ""answer_start"": [27]}"
7981,Fred is less insecure than Charlie.,Who is less insecure?,"{""text"": [""Fred""], ""answer_start"": [0]}"
7982,Fred is less insecure than Charlie.,Who is more secure?,"{""text"": [""Fred""], ""answer_start"": [0]}"


## Robustness

typos

In [40]:
pairs = [(x['context'], x['question']) for x in dataset['train']]

In [41]:
import pickle
spacy_map =  pickle.load(open('processed_squad.pkl', 'rb'))


In [42]:
processed_pairs = [(spacy_map[x[0]], spacy_map[x[1]]) for x in pairs]

In [43]:
def question_typo(x):
    """
    x[0]: context
    x[1]: question 
    Perturb.add_typos(x[1]): add a typo to question 
    """
    return (x[0], Perturb.add_typos(x[1]))
t = Perturb.perturb(pairs, question_typo, nsamples=500)
# test = INV(**t, name='Question typo', capability='Robustness', description='')
# test.run(predconfs, n=100)
# test.summary(n=3, format_example_fn=format_squad)
# suite.add(test, overwrite=True)

In [44]:
inv_dataset_fmt(t, 'typo')

Unnamed: 0,context,question,answers
0,"On the other hand, in his study on nine of ""th...",BeDuhn clarifies that the differencesa re actu...,"""{\""text\"": [\""greater accuracy\""], \""answer_s..."
1,People can be exposed to asphalt in the workpl...,How must asphalt be tretaed to be workable?,"""{\""text\"": [\""heated or diluted\""], \""answer_..."
2,"In the past, Qutb Shahi rulers and Nizams attr...",When was Lazzat UnN isa completed?,"""{\""text\"": [\""the 15th century\""], \""answer_s..."
3,Every major company selling the antipsychotics...,Pfizer settled th eGeodon lawsuit for how much...,"""{\""text\"": [\""$301 million\""], \""answer_start..."
4,"Arsenal fans often refer to themselves as ""Goo...",Where did the attenadnce at Arsenal games rank...,"""{\""text\"": [\""second-highest average\""], \""an..."
...,...,...,...
495,St. John's is one of the oldest settlements in...,Which cityi s considered one of the oldest set...,"""{\""text\"": [\""St. John's\""], \""answer_start\""..."
496,Species that have no long-distance migratory r...,What species has no long-distanc emigratory re...,"""{\""text\"": [\""the waxwings Bombycilla\""], \""a..."
497,The names for the nation of Greece and the Gre...,From what owrd is Greece derived?,"""{\""text\"": [\""Graecia\""], \""answer_start\"": [..."
498,"In the United Kingdom, sociologists and other ...",What made up Leisrue activities and goods as v...,"""{\""text\"": [\""art, music, film, food, sports,..."


Contractions

In [45]:
def contractions(x):
    conts = Perturb.contractions(x[1])
    return [(x[0], a) for a in conts]
t = Perturb.perturb(pairs, contractions, nsamples=500)
# test = INV(**t, name='Question contractions', capability='Robustness', description='')
# test.run(predconfs, n=100)
# test.summary(n=3, format_example_fn=format_squad)
# suite.add(test)

In [46]:
inv_dataset_fmt(t, 'contractions')

Unnamed: 0,context,question,answers
0,Consistent with the missions and priorities ou...,What diplomatic effort does the CAF perform as...,"""{\""text\"": [\""relationship-building efforts\""..."
1,Lateral-cut disc records were developed in the...,What's the name of lateral cut disc records?,"""{\""text\"": [\""gramophone\""], \""answer_start\""..."
2,The bandwidth characteristics of a resonant an...,What's the largest Q that could be achieved wi...,"""{\""text\"": [\""15\""], \""answer_start\"": [980]}"""
3,Until the 1950s guns firing ballistic munition...,Which range didn't use guided missiles?,"""{\""text\"": [\""the very shortest ranges\""], \""..."
4,"The ""Jeltoqsan"" (Kazakh for ""December"") of 198...",What's the English translation of the word Jel...,"""{\""text\"": [\""December\""], \""answer_start\"": ..."
...,...,...,...
495,The Cineteca Nacional (the Mexican Film Librar...,Where's the Mexican Film Library located?,"""{\""text\"": [\""near the Coyoac\\u00e1n suburb\..."
496,The transcribed pre-mRNA contains untranslated...,What's at both ends of the transcribed pre-mRNA?,"""{\""text\"": [\""untranslated regions\""], \""answ..."
497,"In the 20th century, Greek composers have had ...",Who's one of the notable Greek opera singers i...,"""{\""text\"": [\""Maria Callas\""], \""answer_start..."
498,Students attending BYU are required to follow ...,What's the source of much of BYU's funding?,"""{\""text\"": [\""the church's tithing funds\""], ..."


Add random sentence

In [47]:
random_sentences = set()
for x, _ in processed_pairs:
    for y in x.sents:
        random_sentences.add(y.text)
random_sentences = list(random_sentences)

In [50]:
def add_random_sentence(x, **kwargs):
    random_s = np.random.choice(random_sentences)
    while random_s in x[0]:
        random_s = np.random.choice(random_sentences)
    random_s = random_s.strip('.') + '. '
    meta = ['add to end: %s' % random_s, 'add to beg: %s' % random_s]
    return [(x[0] + random_s, x[1]), (random_s + x[0], x[1])], meta

t = Perturb.perturb(pairs, add_random_sentence, nsamples=500, meta=True)
# test = INV(**t, name='Add random sentence to context', capability='Robustness', description='')
# test.run(predconfs, n=100)
# test.summary(n=3, format_example_fn=format_add)
# suite.add(test)

In [None]:
inv_dataset_fmt(t, 'random_sentence')

Unnamed: 0,context,question,answers
0,"As for Mac OS, System 7 was a 32-bit rewrite f...",How did the Mac System 7 improve multitasking?,"""{\""text\"": [\""co-operative multitasking\""], \..."
1,"During the Hellenistic period, Judea became a ...",What religion rose in Judea durring the Hellen...,"""{\""text\"": [\""Judaism\""], \""answer_start\"": [..."
2,"Historically, the cuisine of Estonia has been ...",What food gathering behaviors are now seen as ...,"""{\""text\"": [\""Hunting and fishing\""], \""answe..."
3,Ptolemy's Geography divided Asia on a similar ...,"""India on this side of the Ganges"" is located ...","""{\""text\"": [\""To the south\""], \""answer_start..."
4,Energy transformations in the universe over ti...,What is a process ultimately using the gravita...,"""{\""text\"": [\""nucleosynthesis\""], \""answer_st..."
...,...,...,...
495,The method of execution of federal prisoners f...,According to what law are federal prisoners ex...,"""{\""text\"": [\""Violent Crime Control and Law E..."
496,Rapid environmental changes typically cause ma...,How many species currently live on earth?,"""{\""text\"": [\""Earth's current species range f..."
497,Religious beliefs in the Eastern Empire and Pe...,In what yer did Muhammad die?,"""{\""text\"": [\""632\""], \""answer_start\"": [468]}"""
498,The annual Southampton Boat Show is held in Se...,What Southampton festival culminates in the Bo...,"""{\""text\"": [\""Sea City\""], \""answer_start\"": ..."


## NER

In [None]:
import re
def change_thing(change_fn):
    def change_both(cq, **kwargs):
        context, question = cq
        a = change_fn(context, meta=True)
        if not a:
            return None
        changed, meta = a
        ret = []
        for c, m in zip(changed, meta):
            new_q = re.sub(r'\b%s\b' % re.escape(m[0]), m[1], question.text)
            ret.append((c, new_q))
        return ret, meta
    return change_both
            

In [None]:
t = Perturb.perturb(processed_pairs, change_thing(Perturb.change_names), nsamples=500, meta=True)

# test = INV(**t, name='Change name everywhere', capability='NER',
#           description='', expect=Expect.pairwise(expect_same))
# test.run(predconfs, n=100)
# test.summary(3, format_example_fn=format_replace)
# suite.add(test, overwrite=True)

In [None]:
inv_dataset_fmt(t, 'name_change')

Unnamed: 0,context,question,answers
0,"After just 100 hours of ground combat, and wit...",Why did Coalition nations fear the removal of ...,"""{\""text\"": [\""it would create a power vacuum ..."
1,"Mithridates the Great was the ruler of Pontus,...",How many Romans lived in Mithridate the Great'...,"""{\""text\"": [\""80,000\""], \""answer_start\"": [3..."
2,"On 6 September 2007, Belgian-based Internation...",How much did the Princess Elizabeth station cost?,"""{\""text\"": [\""$16.3 million\""], \""answer_star..."
3,"On September 13, 2009, during the 2009 MTV Vid...",What artist's award reception did Kanye interr...,"""{\""text\"": [\""Taylor Swift\""], \""answer_start..."
4,Eisenhower returned to the U.S. in December 19...,What event contributed to Eisenhower receiving...,"""{\""text\"": [\""Louisiana Maneuvers\""], \""answe..."
...,...,...,...
495,An alternative view offered by Michael Sanders...,To what part of the prey does Michael Sanders ...,"""{\""text\"": [\""the body\""], \""answer_start\"": ..."
496,Beyoncé's vocal range spans four octaves. Jody...,New York Times' Jon Pareles calls Beyoncé's vo...,"""{\""text\"": [\""tart\""], \""answer_start\"": [630]}"""
497,"In 1867, the university opened the first priva...",When did Washington University establish its m...,"""{\""text\"": [\""1891\""], \""answer_start\"": [290]}"""
498,"The team worked on a Wii control scheme, adapt...",What kind of movement interfaced with the swor...,"""{\""text\"": [\""swinging gesture\""], \""answer_s..."


In [None]:
t = Perturb.perturb(processed_pairs, change_thing(Perturb.change_location), nsamples=500, meta=True)

# test = INV(**t, name='Change location everywhere', capability='NER',
#           description='', expect=Expect.pairwise(expect_same))
# test.run(predconfs, n=100)
# test.summary(3, format_example_fn=format_replace)
# suite.add(test, overwrite=True)

In [None]:
inv_dataset_fmt(t, 'location_change')

Unnamed: 0,context,question,answers
0,Argentine activists told a news conference tha...,What is the name of the activist who promised ...,"""{\""text\"": [\""Jorge Carcavallo\""], \""answer_s..."
1,"By 1976, Queen were back in the studio recordi...",What Queen album was released in 1976?,"""{\""text\"": [\""A Day at the Races\""], \""answer..."
2,"In the 1874 general election, Disraeli was ret...",What removed Catholic Rituals from Anglican se...,"""{\""text\"": [\""Public Worship Regulation Act 1..."
3,"Imperial College Union, the students' union at...",How long is the tenure for an officer to run t...,"""{\""text\"": [\""one year\""], \""answer_start\"": ..."
4,"In 2010, 24.9 percent of households reported h...",Percentage of unwed births?,"""{\""text\"": [\""56\""], \""answer_start\"": [635]}"""
...,...,...,...
495,Coyotes and big cats have also been known to a...,What big cats in Indonesia also attack dogs?,"""{\""text\"": [\""Tigers\""], \""answer_start\"": [2..."
496,Spectre opened in Germany with $22.45 million ...,How much more did Spectre earn compared with S...,"""{\""text\"": [\""4%\""], \""answer_start\"": [849]}"""
497,Portland is known for being a trailblazer in v...,How many $10+ donations must Portland city cou...,"""{\""text\"": [\""200\""], \""answer_start\"": [257]}"""
498,The first boardwalk was built in 1870 along a ...,Why was the first boardwalk built in New Orleans?,"""{\""text\"": [\""to help hotel owners keep sand ..."


## Temporal

In [None]:
t = crossproduct(editor.template(
    {
        'contexts': [
            'Both {first_name} and {first_name2} were {prof1}s, but there was a change in {first_name}, who is now {a:prof2}.',
            'Both {first_name2} and {first_name} were {prof1}s, but there was a change in {first_name}, who is now {a:prof2}.',
        ],
        'qas': [
            (
                'Who is {a:prof2}?',
                '{first_name}'
            ), 
        ]
        
    },
    save=True,
    prof=professions,
    remove_duplicates=True,
    nsamples=500,
    ))
name = 'There was a change in profession'
# test = MFT(**t, expect=expect_squad, capability='Temporal', name=name, description='' )
# test.run(predconfs, n=100)
# test.summary(n=3, format_example_fn=format_squad_with_context)
# suite.add(test)

In [64]:
dataset_fmt(t, "temproal")

Unnamed: 0,context,question,answers
0,"Both Suzanne and Kenneth were models, but ther...",Who is a producer?,"{""text"": [""Suzanne""], ""answer_start"": [5]}"
1,"Both Kenneth and Suzanne were models, but ther...",Who is a producer?,"{""text"": [""Suzanne""], ""answer_start"": [17]}"
2,"Both Edith and Kathy were agents, but there wa...",Who is an escort?,"{""text"": [""Edith""], ""answer_start"": [5]}"
3,"Both Kathy and Edith were agents, but there wa...",Who is an escort?,"{""text"": [""Edith""], ""answer_start"": [15]}"
4,"Both David and Adam were organizers, but there...",Who is an interpreter?,"{""text"": [""David""], ""answer_start"": [5]}"
...,...,...,...
961,"Both Martha and Howard were educators, but the...",Who is a nurse?,"{""text"": [""Howard""], ""answer_start"": [16]}"
962,"Both Lawrence and Pamela were reporters, but t...",Who is an educator?,"{""text"": [""Lawrence""], ""answer_start"": [5]}"
963,"Both Pamela and Lawrence were reporters, but t...",Who is an educator?,"{""text"": [""Lawrence""], ""answer_start"": [16]}"
964,"Both Lawrence and Carl were attorneys, but the...",Who is an author?,"{""text"": [""Lawrence""], ""answer_start"": [5]}"


In [65]:
t = crossproduct(editor.template(
    {
        'contexts': [
            '{first_name} became a {prof} before {first_name2} did.',
            '{first_name2} became a {prof} after {first_name} did.',
        ],
        'qas': [
            (
                'Who became a {prof} first?',
                '{first_name}'
            ), 
            (
                'Who became a {prof} last?',
                '{first_name2}'
            ), 
        ]
        
    },
    save=True,
    prof=professions,
    remove_duplicates=True,
    nsamples=500,
    ))
name = 'Understanding before / after -> first / last.'
# test = MFT(**t, expect=expect_squad, capability='Temporal', name=name, description='' )
# test.run(predconfs, n=100)
# test.summary(n=3, format_example_fn=format_squad_with_context)
# suite.add(test)


In [66]:
dataset_fmt(t, "before_after")

Unnamed: 0,context,question,answers
0,Katherine became a accountant before Alfred did.,Who became a accountant first?,"{""text"": [""Katherine""], ""answer_start"": [0]}"
1,Katherine became a accountant before Alfred did.,Who became a accountant last?,"{""text"": [""Alfred""], ""answer_start"": [37]}"
2,Alfred became a accountant after Katherine did.,Who became a accountant first?,"{""text"": [""Katherine""], ""answer_start"": [33]}"
3,Alfred became a accountant after Katherine did.,Who became a accountant last?,"{""text"": [""Alfred""], ""answer_start"": [0]}"
4,Donald became a educator before Thomas did.,Who became a educator first?,"{""text"": [""Donald""], ""answer_start"": [0]}"
...,...,...,...
1991,Tom became a photographer after Cynthia did.,Who became a photographer last?,"{""text"": [""Tom""], ""answer_start"": [0]}"
1992,Wendy became a nurse before James did.,Who became a nurse first?,"{""text"": [""Wendy""], ""answer_start"": [0]}"
1993,Wendy became a nurse before James did.,Who became a nurse last?,"{""text"": [""James""], ""answer_start"": [28]}"
1994,James became a nurse after Wendy did.,Who became a nurse first?,"{""text"": [""Wendy""], ""answer_start"": [27]}"


## Negation

In context

In [67]:
t = crossproduct(editor.template(
    {
        'contexts': [
            '{first_name} is not {a:prof}. {first_name2} is.',
            '{first_name2} is {a:prof}. {first_name} is not.',
        ],
        'qas': [
            (
                'Who is {a:prof}?',
                '{first_name2}'
            ), 
            (
                'Who is not {a:prof}?',
                '{first_name}'
            ), 
        ]
        
    },
    save=True,
    prof=professions,
    remove_duplicates=True,
    nsamples=500,
    ))
name = 'Negation in context, may or may not be in question'
# test = MFT(**t, expect=expect_squad, capability='Negation', name=name, description='' )
# test.run(predconfs, n=100)
# test.summary(n=3, format_example_fn=format_squad_with_context)
# suite.add(test)

In [68]:
dataset_fmt(t, "negation")

Unnamed: 0,context,question,answers
0,Carl is not an author. Wendy is.,Who is an author?,"{""text"": [""Wendy""], ""answer_start"": [23]}"
1,Carl is not an author. Wendy is.,Who is not an author?,"{""text"": [""Carl""], ""answer_start"": [0]}"
2,Wendy is an author. Carl is not.,Who is an author?,"{""text"": [""Wendy""], ""answer_start"": [0]}"
3,Wendy is an author. Carl is not.,Who is not an author?,"{""text"": [""Carl""], ""answer_start"": [20]}"
4,Richard is not an assistant. Nick is.,Who is an assistant?,"{""text"": [""Nick""], ""answer_start"": [29]}"
...,...,...,...
1975,Ruth is an artist. Kevin is not.,Who is not an artist?,"{""text"": [""Kevin""], ""answer_start"": [19]}"
1976,Louis is not an entrepreneur. Richard is.,Who is an entrepreneur?,"{""text"": [""Richard""], ""answer_start"": [30]}"
1977,Louis is not an entrepreneur. Richard is.,Who is not an entrepreneur?,"{""text"": [""Louis""], ""answer_start"": [0]}"
1978,Richard is an entrepreneur. Louis is not.,Who is an entrepreneur?,"{""text"": [""Richard""], ""answer_start"": [0]}"


Not in context:

In [69]:

t = crossproduct(editor.template(
    {
        'contexts': [
            '{first_name} is {a:prof}. {first_name2} is {a:prof2}.',
            '{first_name2} is {a:prof2}. {first_name} is {a:prof}.',
        ],
        'qas': [
            (
                'Who is {a:prof}?',
                '{first_name}'
            ), 
            (
                'Who is not {a:prof}?',
                '{first_name2}'
            ), 
            (
                'Who is {a:prof2}?',
                '{first_name2}'
            ), 
            (
                'Who is not {a:prof2}?',
                '{first_name}'
            ), 
        ]
        
    },
    prof=professions,
    remove_duplicates=True,
    nsamples=500,
    ))
name = 'Negation in question only.'
# test = MFT(**t, expect=expect_squad, capability='Negation', name=name, description='' )
# test.run(predconfs, n=100)
# test.summary(n=3, format_example_fn=format_squad_with_context)
# suite.add(test)

In [70]:
dataset_fmt(t, "negation_question")

Unnamed: 0,context,question,answers
0,Nick is an agent. Roy is an economist.,Who is an agent?,"{""text"": [""Nick""], ""answer_start"": [0]}"
1,Nick is an agent. Roy is an economist.,Who is not an agent?,"{""text"": [""Roy""], ""answer_start"": [18]}"
2,Nick is an agent. Roy is an economist.,Who is an economist?,"{""text"": [""Roy""], ""answer_start"": [18]}"
3,Nick is an agent. Roy is an economist.,Who is not an economist?,"{""text"": [""Nick""], ""answer_start"": [0]}"
4,Roy is an economist. Nick is an agent.,Who is an agent?,"{""text"": [""Nick""], ""answer_start"": [21]}"
...,...,...,...
3867,Lisa is a journalist. Sam is an executive.,Who is not an executive?,"{""text"": [""Lisa""], ""answer_start"": [0]}"
3868,Sam is an executive. Lisa is a journalist.,Who is a journalist?,"{""text"": [""Lisa""], ""answer_start"": [21]}"
3869,Sam is an executive. Lisa is a journalist.,Who is not a journalist?,"{""text"": [""Sam""], ""answer_start"": [0]}"
3870,Sam is an executive. Lisa is a journalist.,Who is an executive?,"{""text"": [""Sam""], ""answer_start"": [0]}"


## Fairness spinoff

In [71]:
fewer_profs = ['doctor', 'nurse', 'secretary', 'CEO']
t = editor.template(
    [
        ('{male} is not {a:prof}, {female} is.', 'Who is {a:prof}?', '{female}', 'woman', '{prof}'),
        ('{female} is not {a:prof}, {male} is.', 'Who is {a:prof}?', '{male}', 'man', '{prof}'),
    ],
#     prof=professions + ['doctor'],
    prof=fewer_profs,
    remove_duplicates=True,
    nsamples=1000,
    unroll=True,
    save=True,
    )
data = [(d[0], d[1]) for d in t.data]
labels = [d[2] for d in t.data]
meta = [(d[3], d[4]) for d in t.data]

# test = MFT(data, expect=expect_squad, labels=labels, meta=meta, templates=t.templates,
#           name='M/F failure rates should be similar for different professions', capability='Fairness',
#           description='Using negation in context.')
# test.run(predconfs, n=100)

# suite.add(test)

In [72]:
t.data = [[i] for i in data]
t.labels = [[i] for i in labels]

In [73]:
dataset_fmt(t, "fair")

Unnamed: 0,context,question,answers
0,"Adam is not a nurse, Sharon is.",Who is a nurse?,"{""text"": [""Sharon""], ""answer_start"": [21]}"
1,"Sharon is not a nurse, Adam is.",Who is a nurse?,"{""text"": [""Adam""], ""answer_start"": [23]}"
2,"Paul is not a doctor, Jill is.",Who is a doctor?,"{""text"": [""Jill""], ""answer_start"": [22]}"
3,"Jill is not a doctor, Paul is.",Who is a doctor?,"{""text"": [""Paul""], ""answer_start"": [22]}"
4,"Thomas is not a CEO, Betty is.",Who is a CEO?,"{""text"": [""Betty""], ""answer_start"": [21]}"
...,...,...,...
1969,"Martha is not a doctor, Tim is.",Who is a doctor?,"{""text"": [""Tim""], ""answer_start"": [24]}"
1970,"Chris is not a secretary, Diane is.",Who is a secretary?,"{""text"": [""Diane""], ""answer_start"": [26]}"
1971,"Diane is not a secretary, Chris is.",Who is a secretary?,"{""text"": [""Chris""], ""answer_start"": [26]}"
1972,"Brian is not a CEO, Kathryn is.",Who is a CEO?,"{""text"": [""Kathryn""], ""answer_start"": [20]}"


## Coref

Basic coref

In [74]:
if 'actress' in professions:
    professions.remove('actress')

In [75]:
t = crossproduct(editor.template(
    {
        'contexts': [
            '{male} and {female} are friends. He is {a:prof1}, and she is {a:prof2}.',
            '{female} and {male} are friends. He is {a:prof1}, and she is {a:prof2}.',
            '{male} and {female} are friends. She is {a:prof2}, and he is {a:prof1}.',
            '{female} and {male} are friends. She is {a:prof2}, and he is {a:prof1}.',
        ],
        'qas': [
            (
                'Who is {a:prof1}?',
                '{male}'
            ), 
            (
                'Who is {a:prof2}?',
                '{female}'
            ), 
        ]
        
    },
    save=True,
    prof=professions,
    remove_duplicates=True,
    nsamples=500,
    ))
name = 'Basic coref, he / she'
# test = MFT(**t, expect=expect_squad, name=name, description='', capability='Coref')
# test.run(predconfs, n=100)
# test.summary(n=3, format_example_fn=format_squad_with_context)
# suite.add(test)

In [76]:
dataset_fmt(t, "basic_coref")

Unnamed: 0,context,question,answers
0,"Joe and Katie are friends. He is an assistant,...",Who is an assistant?,"{""text"": [""Joe""], ""answer_start"": [0]}"
1,"Joe and Katie are friends. He is an assistant,...",Who is a reporter?,"{""text"": [""Katie""], ""answer_start"": [8]}"
2,"Katie and Joe are friends. He is an assistant,...",Who is an assistant?,"{""text"": [""Joe""], ""answer_start"": [10]}"
3,"Katie and Joe are friends. He is an assistant,...",Who is a reporter?,"{""text"": [""Katie""], ""answer_start"": [0]}"
4,"Joe and Katie are friends. She is a reporter, ...",Who is an assistant?,"{""text"": [""Joe""], ""answer_start"": [0]}"
...,...,...,...
3915,Rose and Henry are friends. He is an architect...,Who is an intern?,"{""text"": [""Rose""], ""answer_start"": [0]}"
3916,"Henry and Rose are friends. She is an intern, ...",Who is an architect?,"{""text"": [""Henry""], ""answer_start"": [0]}"
3917,"Henry and Rose are friends. She is an intern, ...",Who is an intern?,"{""text"": [""Rose""], ""answer_start"": [10]}"
3918,"Rose and Henry are friends. She is an intern, ...",Who is an architect?,"{""text"": [""Henry""], ""answer_start"": [9]}"


In [77]:
t = crossproduct(editor.template(
    {
        'contexts': [
            '{male} and {female} are friends. His mom is {a:prof}.',
            '{female} and {male} are friends. His mom is {a:prof}.',
        ],
        'qas': [
            (
                'Whose mom is {a:prof}?',
                '{male}'
            ), 
        ]
        
    },
    save=True,
    prof=professions,
    remove_duplicates=True,
    nsamples=250,
    ))
t += crossproduct(editor.template(
    {
        'contexts': [
            '{male} and {female} are friends. Her mom is {a:prof}.',
            '{female} and {male} are friends. Her mom is {a:prof}.',
        ],
        'qas': [
            (
                'Whose mom is {a:prof}?',
                '{female}'
            ), 
        ]
        
    },
    save=True,
    prof=professions,
    remove_duplicates=True,
    nsamples=250,
    ))

name = 'Basic coref, his / her'
# test = MFT(**t, expect=expect_squad, name=name, description='', capability='Coref')
# test.run(predconfs, n=100)
# test.summary(n=3, format_example_fn=format_squad_with_context)
# suite.add(test)

In [78]:
dataset_fmt(t, "basic_coref2")

Unnamed: 0,context,question,answers
0,Albert and Ellen are friends. His mom is an at...,Whose mom is an attorney?,"{""text"": [""Albert""], ""answer_start"": [0]}"
1,Ellen and Albert are friends. His mom is an at...,Whose mom is an attorney?,"{""text"": [""Albert""], ""answer_start"": [10]}"
2,Tim and Grace are friends. His mom is an entre...,Whose mom is an entrepreneur?,"{""text"": [""Tim""], ""answer_start"": [0]}"
3,Grace and Tim are friends. His mom is an entre...,Whose mom is an entrepreneur?,"{""text"": [""Tim""], ""answer_start"": [10]}"
4,Steve and Amy are friends. His mom is an entre...,Whose mom is an entrepreneur?,"{""text"": [""Steve""], ""answer_start"": [0]}"
...,...,...,...
995,Nicole and Sam are friends. Her mom is an orga...,Whose mom is an organizer?,"{""text"": [""Nicole""], ""answer_start"": [0]}"
996,Eric and Diana are friends. Her mom is a produ...,Whose mom is a producer?,"{""text"": [""Diana""], ""answer_start"": [9]}"
997,Diana and Eric are friends. Her mom is a produ...,Whose mom is a producer?,"{""text"": [""Diana""], ""answer_start"": [0]}"
998,Harold and Donna are friends. Her mom is an or...,Whose mom is an organizer?,"{""text"": [""Donna""], ""answer_start"": [11]}"


Former, latter

In [79]:
t = crossproduct(editor.template(
    {
        'contexts': [
            '{first_name} and {first_name2} are friends. The former is {a:prof1}.',
            '{first_name2} and {first_name} are friends. The latter is {a:prof1}.',
            '{first_name} and {first_name2} are friends. The former is {a:prof1} and the latter is {a:prof2}.',
            '{first_name2} and {first_name} are friends. The former is {a:prof2} and the latter is {a:prof1}.',
        ],
        'qas': [
            (
                'Who is {a:prof1}?',
                '{first_name}'
            ), 
        ]
        
    },
    prof=professions,
    remove_duplicates=True,
    nsamples=500,
    save=True
    ))
name = 'Former / Latter'
# test = MFT(**t, expect=expect_squad, name=name, description='', capability='Coref')
# test.run(predconfs, n=100)
# test.summary(n=3, format_example_fn=format_squad_with_context)
# suite.add(test)

In [80]:
dataset_fmt(t, "former")

Unnamed: 0,context,question,answers
0,Grace and Suzanne are friends. The former is a...,Who is an engineer?,"{""text"": [""Grace""], ""answer_start"": [0]}"
1,Suzanne and Grace are friends. The latter is a...,Who is an engineer?,"{""text"": [""Grace""], ""answer_start"": [12]}"
2,Grace and Suzanne are friends. The former is a...,Who is an engineer?,"{""text"": [""Grace""], ""answer_start"": [0]}"
3,Suzanne and Grace are friends. The former is a...,Who is an engineer?,"{""text"": [""Grace""], ""answer_start"": [12]}"
4,Sam and Benjamin are friends. The former is an...,Who is an investigator?,"{""text"": [""Sam""], ""answer_start"": [0]}"
...,...,...,...
1959,Deborah and Suzanne are friends. The former is...,Who is a nurse?,"{""text"": [""Suzanne""], ""answer_start"": [12]}"
1960,Anne and Dorothy are friends. The former is an...,Who is an assistant?,"{""text"": [""Anne""], ""answer_start"": [0]}"
1961,Dorothy and Anne are friends. The latter is an...,Who is an assistant?,"{""text"": [""Anne""], ""answer_start"": [12]}"
1962,Anne and Dorothy are friends. The former is an...,Who is an assistant?,"{""text"": [""Anne""], ""answer_start"": [0]}"


## SRL

In [81]:
import pattern
import pattern.en
pverb = ['love', 'hate', 'like', 'remember', 'recognize', 'trust', 'deserve', 'understand', 'blame', 'dislike', 'prefer', 'follow', 'notice', 'hurt', 'bother', 'support', 'believe', 'accept', 'attack']
a = pattern.en.tenses('loves')[0]
b = pattern.en.tenses('stolen')[0]
pverb = [(pattern.en.conjugate(v, *a), pattern.en.conjugate(v, *b)) for v in pverb]

t = crossproduct(editor.template(
    {
        'contexts': [
            '{first_name} {v[0]} {first_name2}.',
            '{first_name2} is {v[1]} by {first_name}.',
        ],
        'qas': [
            (
                'Who {v[0]}?',
                '{first_name}'
            ), 
            (
                'Who is {v[1]}?',
                '{first_name2}'
            ), 
        ]
        
    },
    v=pverb,
    remove_duplicates=True,
    nsamples=500,
    ))
name = 'Agent / object distinction'
# test = MFT(**t, expect=expect_squad, name=name, description='', capability='SRL')
# test.run(predconfs, n=100)
# test.summary(n=3, format_example_fn=format_squad_with_context)
# suite.add(test)

RuntimeError: generator raised StopIteration

In [None]:
dataset_fmt(t, "agent")

Unnamed: 0,context,question,answers
0,Martin dislikes Jean.,Who dislikes?,"{""text"": [""Martin""], ""answer_start"": [0]}"
1,Martin dislikes Jean.,Who is disliked?,"{""text"": [""Jean""], ""answer_start"": [16]}"
2,Jean is disliked by Martin.,Who dislikes?,"{""text"": [""Martin""], ""answer_start"": [20]}"
3,Jean is disliked by Martin.,Who is disliked?,"{""text"": [""Jean""], ""answer_start"": [0]}"
4,Katherine trusts Frances.,Who trusts?,"{""text"": [""Katherine""], ""answer_start"": [0]}"
...,...,...,...
1991,Suzanne is believed by Kate.,Who is believed?,"{""text"": [""Suzanne""], ""answer_start"": [0]}"
1992,Dan attacks Greg.,Who attacks?,"{""text"": [""Dan""], ""answer_start"": [0]}"
1993,Dan attacks Greg.,Who is attacked?,"{""text"": [""Greg""], ""answer_start"": [12]}"
1994,Greg is attacked by Dan.,Who attacks?,"{""text"": [""Dan""], ""answer_start"": [20]}"


In [82]:
t = crossproduct(editor.template(
    {
        'contexts': [
            '{first_name} {v[0]} {first_name2}. {first_name2} {v[0]} {first_name3}.',
            '{first_name} {v[0]} {first_name2}. {first_name3} is {v[1]} by {first_name2}.',
            '{first_name2} is {v[1]} by {first_name}. {first_name2} {v[0]} {first_name3}.',
            '{first_name2} is {v[1]} by {first_name}. {first_name3} is {v[1]} by {first_name2}.',
        ],
        'qas': [
            (
                'Who {v[0]} {first_name2}?',
                '{first_name}'
            ), 
            (
                'Who {v[0]} {first_name3}?',
                '{first_name2}'
            ), 
            (
                'Who is {v[1]} by {first_name}?',
                '{first_name2}'
            ), 
            (
                'Who is {v[1]} by {first_name2}?',
                '{first_name3}'
            ), 
        ]
        
    },
    save=True,
    v=pverb,
    remove_duplicates=True,
    nsamples=500,
    ))
name = 'Agent / object distinction with 3 agents'
# test = MFT(**t, expect=expect_squad, name=name, description='', capability='SRL')
# test.run(predconfs, n=100)
# test.summary(n=3, format_example_fn=format_squad_with_context)
# suite.add(test)


In [83]:
dataset_fmt(t, "agent2")

Unnamed: 0,context,question,answers
0,Edwin a Jerry. Jerry a Catherine.,Who a Jerry?,"{""text"": [""Edwin""], ""answer_start"": [0]}"
1,Edwin a Jerry. Jerry a Catherine.,Who a Catherine?,"{""text"": [""Jerry""], ""answer_start"": [8]}"
2,Edwin a Jerry. Jerry a Catherine.,Who is c by Edwin?,"{""text"": [""Jerry""], ""answer_start"": [8]}"
3,Edwin a Jerry. Jerry a Catherine.,Who is c by Jerry?,"{""text"": [""Catherine""], ""answer_start"": [23]}"
4,Edwin a Jerry. Catherine is c by Jerry.,Who a Jerry?,"{""text"": [""Edwin""], ""answer_start"": [0]}"
...,...,...,...
7851,Ruth is a by Diane. Ruth h Jane.,Who is a by Ruth?,"{""text"": [""Jane""], ""answer_start"": [27]}"
7852,Ruth is a by Diane. Jane is a by Ruth.,Who h Ruth?,"{""text"": [""Diane""], ""answer_start"": [13]}"
7853,Ruth is a by Diane. Jane is a by Ruth.,Who h Jane?,"{""text"": [""Ruth""], ""answer_start"": [0]}"
7854,Ruth is a by Diane. Jane is a by Ruth.,Who is a by Diane?,"{""text"": [""Ruth""], ""answer_start"": [0]}"


## Combine all

In [84]:
import pandas as pd
import glob

path = 'new_data' 
all_files = glob.glob(path + "/*.csv")

li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

frame = pd.concat(li, axis=0, ignore_index=True)
del li

In [86]:
frame

Unnamed: 0,context,question,answers
0,Edwin a Jerry. Jerry a Catherine.,Who a Jerry?,"{""text"": [""Edwin""], ""answer_start"": [0]}"
1,Edwin a Jerry. Jerry a Catherine.,Who a Catherine?,"{""text"": [""Jerry""], ""answer_start"": [8]}"
2,Edwin a Jerry. Jerry a Catherine.,Who is c by Edwin?,"{""text"": [""Jerry""], ""answer_start"": [8]}"
3,Edwin a Jerry. Jerry a Catherine.,Who is c by Jerry?,"{""text"": [""Catherine""], ""answer_start"": [23]}"
4,Edwin a Jerry. Catherine is c by Jerry.,Who a Jerry?,"{""text"": [""Edwin""], ""answer_start"": [0]}"
...,...,...,...
57407,The dipole component of the magnetic field at ...,What is Neptune's dipole magnetic moment ?,"""{\""text\"": [\""2.2 \\u00d7 1017 T\\u00b7m3\""],..."
57408,Abu el-Haj argues that genomics and the mappin...,Hammer and others recently aimed to test what ...,"""{\""text\"": [\""neighbouring non-Jewish populat..."
57409,Ancient and medieval Hindu texts identify six ...,How many pramanas are there in Hnidu philosophy?,"""{\""text\"": [\""six\""], \""answer_start\"": [42]}"""
57410,The Eritrean highway system is named according...,What are the three levels of road lcassificati...,"""{\""text\"": [\""primary (P), secondary (S), and..."


In [102]:
json.loads(json.loads(frame.iloc[57410].answers))

{'text': ['primary (P), secondary (S), and tertiary (T)'],
 'answer_start': [115]}

In [87]:
frame.to_csv('checklist_train.csv', index=None)

In [103]:
combined = pd.concat([
    df_train[['context','question','answers']],
    frame]
)

In [105]:
## shuffle
combined.iloc[np.random.permutation(len(combined))].reset_index(drop=True)

Unnamed: 0,context,question,answers
0,The university owns several centers around the...,At which location is the London Center operate...,"{""text"": [""1 Suffolk Street in Trafalgar Squar..."
1,Caroline is more offensive than Ashley.,Who is less defensive?,"{""text"": [""Caroline""], ""answer_start"": [0]}"
2,Alexandra is r by Dan. Alexandra p Jerry.,Who is r by Alexandra?,"{""text"": [""Jerry""], ""answer_start"": [35]}"
3,Kathryn has a lizard and a minivan.,What vehicle does Kathryn have?,"{""text"": [""minivan""], ""answer_start"": [27]}"
4,The song was released as a digital download on...,The release of Writing's on the Wall caused wh...,"{""text"": [""Shirley Bassey""], ""answer_start"": [..."
...,...,...,...
145006,"Beyoncé's music is generally R&B, but she also...",What language does she mainly sing?,"{""text"": [""English""], ""answer_start"": [267]}"
145007,BYU alumni in academia include former Dean of ...,What is former alumnus Paul D. Boyer known for...,"{""text"": [""Nobel Prize winner""], ""answer_start..."
145008,Marshall Field & Company originated in 1852. I...,In what year was Marshall Field and company es...,"{""text"": [""1852""], ""answer_start"": [39]}"
145009,"Until 1998, the network carried a variety of A...",In what year did the network end American prog...,"{""text"": [""1998""], ""answer_start"": [6]}"


In [106]:
combined.to_csv('combined_train.csv', index=None)