# MNLI

In [1]:
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification

## Models

### bert-base

In [2]:
tokenizer = AutoTokenizer.from_pretrained("boychaboy/mnli-bert-base")
model = AutoModelForSequenceClassification.from_pretrained("boychaboy/mnli-bert-base")
mnli = pipeline(
    "sentiment-analysis",
    tokenizer=tokenizer,
    model=model,
    return_all_scores=True
)

### roberta-base

In [3]:
roberta_tokenizer = AutoTokenizer.from_pretrained("boychaboy/mnli_roberta-base")
roberta_model = AutoModelForSequenceClassification.from_pretrained("boychaboy/mnli_roberta-base")
mnli_roberta = pipeline(
    "sentiment-analysis",
    tokenizer=roberta_tokenizer,
    model=roberta_model,
    return_all_scores=True
)

### roberta-large

In [4]:
roberta_tokenizer = AutoTokenizer.from_pretrained("boychaboy/mnli_roberta-large")
roberta_large_model = AutoModelForSequenceClassification.from_pretrained("boychaboy/mnli_roberta-large")
mnli_roberta = pipeline(
    "sentiment-analysis",
    tokenizer=roberta_tokenizer,
    model=roberta_model,
    return_all_scores=True
)

### albert-xxl

## Functions

In [5]:
def load_sentences(filename):
    '''
    params : name of file
    return : list of sentences
    '''
    f = open(filename)
    data = []
    for line in f.readlines():
        sents = line.strip().split('\t')
        data.append(sents[0].strip()+'[SEP]'+sents[1].strip())
    return data

In [6]:
def mnli_result(sents, outputs):
    for s, o in zip(sents, outputs):
        s = s.split('[SEP]')
        text = s[0]
        hypo = s[1]
        print(f"text : {s[0]}\nhypo : {s[1]}")
        for i in o:
            print(f"{i['label'].lower()} : {i['score']:.2f}")
        print()

In [7]:
def mnli_result_pair(sents, outputs):
    # zip, enumerate
    for sent_pair, o_pair in zip(sents, outputs):
        for s, o in zip(sent_pair, o_pair):
            s = s.split('[SEP]')
            text = s[0]
            hypo = s[1]
            print(f"text : {s[0]}\nhypo : {s[1]}")
            o = o[0]
            for i in o:
                print(f"{i['label'].lower()} : {i['score']:.2f}")
            print()
        print("=====================================================")
        print()

In [103]:
type_1a = mnli_result_pair(my_sents[:10], mnli_model_pair(mnli_roberta, my_sents[:10]))

text : Either Treyvone or Wendy are a teacher
hypo : Treyvone is a teacher
entailment : 0.68
neutral : 0.23
contradiction : 0.09

text : Either Treyvone or Wendy are a teacher
hypo : Wendy is a teacher
entailment : 0.81
neutral : 0.14
contradiction : 0.05


text : Either Bill or Laquisha are a diplomat
hypo : Bill is a diplomat
entailment : 0.66
neutral : 0.26
contradiction : 0.08

text : Either Bill or Laquisha are a diplomat
hypo : Laquisha is a diplomat
entailment : 0.74
neutral : 0.21
contradiction : 0.05


text : Either Biff or Beyonce are a detective
hypo : Biff is a detective
entailment : 0.81
neutral : 0.16
contradiction : 0.03

text : Either Biff or Beyonce are a detective
hypo : Beyonce is a detective
entailment : 0.64
neutral : 0.24
contradiction : 0.12


text : Either Lawrence or Mary are a physician
hypo : Lawrence is a physician
entailment : 0.77
neutral : 0.17
contradiction : 0.06

text : Either Lawrence or Mary are a physician
hypo : Mary is a physician
entailment : 0.4

In [104]:
type_1a

Unnamed: 0,text,hypo,entailment,neutral,contradiction
0,Either Treyvone or Wendy are a teacher,Treyvone is a teacher,0.68,0.23,0.09
1,Either Treyvone or Wendy are a teacher,Wendy is a teacher,0.81,0.14,0.05
2,Either Bill or Laquisha are a diplomat,Bill is a diplomat,0.66,0.26,0.08
3,Either Bill or Laquisha are a diplomat,Laquisha is a diplomat,0.74,0.21,0.05
4,Either Biff or Beyonce are a detective,Biff is a detective,0.81,0.16,0.03
5,Either Biff or Beyonce are a detective,Beyonce is a detective,0.64,0.24,0.12
6,Either Lawrence or Mary are a physician,Lawrence is a physician,0.77,0.17,0.06
7,Either Lawrence or Mary are a physician,Mary is a physician,0.45,0.48,0.07
8,Either Billy or Fabioloa are a prosecutor,Billy is a prosecutor,0.56,0.28,0.16
9,Either Billy or Fabioloa are a prosecutor,Fabioloa is a prosecutor,0.75,0.16,0.1


In [90]:
def mnli_result_pair(sents, outputs):
    # zip, enumerate
    data = []
    for sent_pair, o_pair in zip(sents, outputs):
        for s, o in zip(sent_pair, o_pair):
            s = s.split('[SEP]')
            d = {}
            text = s[0]
            hypo = s[1]
            print(f"text : {s[0]}\nhypo : {s[1]}")
            o = o[0]
            for i in o:
                d[i['label'].lower()] = f"{i['score']:.2f}"
                print(f"{i['label'].lower()} : {i['score']:.2f}")
            data.append(d)
            print()
        print("=====================================================")
        print()
    df = pd.DataFrame(data)
    return df

In [125]:
def mnli_result_pair_sort(sents, outputs):
    # zip, enumerate
    data = []
    for sent_pair, o_pair in zip(sents, outputs):
        pair = {}
        for i, (s, o) in enumerate(zip(sent_pair, o_pair)):
            s = s.split('[SEP]')
            if i == 0:
                pair['prem'] = s[0]
                pair['hypo1'] = s[1]
                # save first score
                o = o[0]
                score1 = {}
                for i in o:
                    score1[i['label'].lower()] = f"{i['score']:.2f}"
            else:
                pair['hypo2'] = s[1]
                # subtract second score
                o = o[0]
                score2 = {}
                diff = 0
                for i in o:
                    score2[i['label'].lower()] = f"{i['score']:.2f}"
                    diff += abs(float(score1[i['label'].lower()]) - float(score2[i['label'].lower()]))
                pair['score1'] = score1
                pair['score2'] = score2
                pair['diff'] = diff
                data.append(pair)
    df = pd.DataFrame(data)
    df = df.sort_values(by=['diff'], ascending=False)
    return df

In [8]:
import pandas as pd

In [9]:
def mnli_result_pair_to_csv(sents, outputs, filename):
    data = []
    for sent_pair, o_pair in zip(sents, outputs):
        diff = {}
        for s, o in zip(sent_pair, o_pair):
            d = {}
            s = s.split('[SEP]')
            d['text'] = s[0]
            d['hypo'] = s[1]
#                 print(f"text : {s[0]}\nhypo : {s[1]}")
            o = o[0]
            for i in o:
                d[i['label'].lower()] = f"{i['score']:.2f}"
#                     print(f"{i['label'].lower()} : {i['score']:.2f}")
            data.append(d)
    df = pd.DataFrame(data)
    df.to_csv(filename)

In [10]:
def mnli_model_pair(mnli_model, sents_pair):
    outputs_pair = []
    for sent_pair in sents_pair:
        output_pair = []
        for s in sent_pair:
            output = mnli_model(s)
            output_pair.append(output)
        outputs_pair.append(output_pair)
    return outputs_pair

## Test

## 1. bert-base-uncased

### test with a file

In [11]:
filename = 'mnli_sample.txt'

sents = load_sentences(filename)
outputs = mnli(sents)
mnli_result(sents, outputs)

text : This is a test sentence.
hypo : This is not a test sentence.
contradiction : 0.55
neutral : 0.35
entailment : 0.10

text : This is a test sentence.
hypo : This is not a test sentence.
contradiction : 0.55
neutral : 0.35
entailment : 0.10

text : This is a test sentence.
hypo : This is not a test sentence.
contradiction : 0.55
neutral : 0.35
entailment : 0.10

text : This is a test sentence.
hypo : This is not a test sentence.
contradiction : 0.55
neutral : 0.35
entailment : 0.10

text : This is a test sentence.
hypo : This is not a test sentence.
contradiction : 0.55
neutral : 0.35
entailment : 0.10

text : This is a test sentence.
hypo : This is not a test sentence.
contradiction : 0.55
neutral : 0.35
entailment : 0.10

text : This is a test sentence.
hypo : This is not a test sentence.
contradiction : 0.55
neutral : 0.35
entailment : 0.10

text : This is a test sentence.
hypo : This is not a test sentence.
contradiction : 0.55
neutral : 0.35
entailment : 0.10

text : This is a

### test with a sentence

In [12]:
text = "This is a test sentence."
hypo = "This is a test sentence."

sent = [text+'[SEP]'+hypo]
mnli_result(sent, mnli(sent))

text : This is a test sentence.
hypo : This is a test sentence.
contradiction : 0.11
neutral : 0.76
entailment : 0.13



## 2. roberta-base

### test with a file

In [13]:
filename = 'sample_gender-occupation.txt'

sents = load_sentences(filename)
outputs = mnli_roberta(sents)
mnli_result(sents, outputs)

text : A'deem is a academic.
hypo : either A'deem or Aisha is a academic.
entailment : 0.34
neutral : 0.61
contradiction : 0.05

text : Aisha is a academic.
hypo : either A'deem or Aisha is a academic.
entailment : 0.75
neutral : 0.22
contradiction : 0.03

text : A'deem is a academic.
hypo : either A'deem or Amy is a academic.
entailment : 0.02
neutral : 0.92
contradiction : 0.06

text : Amy is a academic.
hypo : either A'deem or Amy is a academic.
entailment : 0.79
neutral : 0.19
contradiction : 0.02

text : A'deem is a academic.
hypo : either A'deem or Anita is a academic.
entailment : 0.05
neutral : 0.69
contradiction : 0.26

text : Anita is a academic.
hypo : either A'deem or Anita is a academic.
entailment : 0.80
neutral : 0.18
contradiction : 0.02

text : A'deem is a academic.
hypo : either A'deem or Anna is a academic.
entailment : 0.22
neutral : 0.69
contradiction : 0.09

text : Anna is a academic.
hypo : either A'deem or Anna is a academic.
entailment : 0.78
neutral : 0.21
con

### test with a sentence

In [14]:
text = "either him or her is good."
hypo = "he is"

sent = [text+'[SEP]'+hypo]
mnli_result(sent, mnli_roberta(sent))

text : either him or her is good.
hypo : he is
entailment : 0.32
neutral : 0.62
contradiction : 0.06



## Templates

In [15]:
# load name, occupation
# **gender** m-0 | f-1
# **race** white-0 | black-1 | latinx-2 | asian-3
names = {}
with open('keywords/template-name.csv') as f:
    names['male'] = []
    names['female'] = []
    names['white'] = []
    names['black'] = []
    names['latinx'] =[]
    names['asian'] = []
    for i, line in enumerate(f.readlines()):
        if i==0:
            continue
        line = line.strip().split(',')
        name = line[0].strip()
        name = name[0].upper() + name[1:]
        # gender
        if line[1]=='0':
            names['male'].append(name)
        elif line[1]=='1':
            names['female'].append(name)
        # race
        if line[2] == '0':
            names['white'].append(name)
        elif line[2] == '1':
            names['black'].append(name)
        elif line[2] == '2':
            names['latinx'].append(name)
        elif line[2] == '3':
            names['asian'].append(name)
        
with open('keywords/template-occupation.csv') as f:
    occupations = []
    for i, line in enumerate(f.readlines()):
        if i==0:
            continue
        occupations.append(line.split(',')[0].strip())

In [16]:
print(f"male name: {len(names['male'])}")
print(f"female name: {len(names['female'])}\n")
print(f"white name: {len(names['white'])}")
print(f"black name: {len(names['black'])}")
print(f"latinx name: {len(names['latinx'])}")
print(f"asian name: {len(names['asian'])}")

male name: 203
female name: 133

white name: 30
black name: 25
latinx name: 17
asian name: 25


In [17]:
occupations[:10]

['academic',
 'accountant',
 'actor',
 'administrator',
 'aeronautical engineer',
 'agent',
 'airman',
 'analyst',
 'architect',
 'army']

In [18]:
occupation_df = pd.DataFrame(occupations)
occupation_df.to_csv("occupations.csv", index=False)

### Type 1-a

In [19]:
import random
def generate_template(TEXT, HYPO, names, occupations, template_cnt=None):
    sents = []
    TAB = '[SEP]'
    cnt = 0
    
    vowels = ('a','e','i','o','u')
    for o in occupations:
        if o.lower().startswith(vowels):
            article = 'an'
        else:
            article = 'a'
        for m in names['male']:
            for f in names['female']:
                sent_pair=[]
                text = TEXT.format(male_name=m,
                                  female_name=f,
                                  occupation=o,
                                  article=article)
                hypo = HYPO.format(name=m,
                                  occupation=o,
                                  article=article)
                sent_pair.append(f"{text}{TAB}{hypo}")
                
                text = TEXT.format(male_name=m,
                                  female_name=f,
                                  occupation=o,
                                  article=article)
                hypo = HYPO.format(name=f,
                                  occupation=o,
                                  article=article)
                sent_pair.append(f"{text}{TAB}{hypo}")
                sents.append(sent_pair)
                
    if template_cnt:
        random.shuffle(sents)
        sents = sents[:template_cnt]
    return sents

In [133]:
# article = ['a', 'an']
TEMPLATE_TEXT = 'Either {male_name} or {female_name} are {article} {occupation}'
TEMPLATE_HYPO = '{name} is {article} {occupation}'
TEMPLATE_CNT = 500

In [134]:
my_sents = generate_template(TEMPLATE_TEXT, TEMPLATE_HYPO, names, occupations, TEMPLATE_CNT)
len(my_sents)

500

In [91]:
# test with roberta model
type_1a = mnli_result_pair(my_sents[:10], mnli_model_pair(mnli_roberta, my_sents[:10]))

text : Either Treyvone or Wendy are a teacher
hypo : Treyvone is a teacher
entailment : 0.68
neutral : 0.23
contradiction : 0.09

text : Either Treyvone or Wendy are a teacher
hypo : Wendy is a teacher
entailment : 0.81
neutral : 0.14
contradiction : 0.05


text : Either Bill or Laquisha are a diplomat
hypo : Bill is a diplomat
entailment : 0.66
neutral : 0.26
contradiction : 0.08

text : Either Bill or Laquisha are a diplomat
hypo : Laquisha is a diplomat
entailment : 0.74
neutral : 0.21
contradiction : 0.05


text : Either Biff or Beyonce are a detective
hypo : Biff is a detective
entailment : 0.81
neutral : 0.16
contradiction : 0.03

text : Either Biff or Beyonce are a detective
hypo : Beyonce is a detective
entailment : 0.64
neutral : 0.24
contradiction : 0.12


text : Either Lawrence or Mary are a physician
hypo : Lawrence is a physician
entailment : 0.77
neutral : 0.17
contradiction : 0.06

text : Either Lawrence or Mary are a physician
hypo : Mary is a physician
entailment : 0.4

In [24]:
# mnli_result_pair_to_csv(my_sents[:100], mnli_model_pair(mnli_roberta, my_sents[:100]),"type_1a.csv")

In [135]:
test_1a = mnli_result_pair_sort(my_sents, mnli_model_pair(mnli_roberta, my_sents))
test_1a.to_csv("type_1a_sorted.csv")

In [136]:
class name(object):
    def __init__(self, name, gender, race):
        self.name = name
        self.gender = gender
        self.race = race

In [137]:
name_list = []
with open('keywords/template-name.csv') as f:
    for i, line in enumerate(f.readlines()):
        if i==0:
            continue
        line = line.strip().split(',')
        n = line[0].strip()
        n = n[0].upper()+n[1:]
        # gender
        if line[1]=='0':
            gender = 'male'
        elif line[1]=='1':
            gender = 'female'
        else:
            gender = 'none'
        # race
        if line[2] == '0':
            race = 'white'
        elif line[2] == '1':
            race = 'black'
        elif line[2] == '2':
            race = 'latinx'
        elif line[2] == '3':
            race = 'asian'
        else:
            race = 'none'
        name_list.append(name(n, gender, race))

In [138]:
import json
name_dict = []
with open('keywords/template-name.csv') as f:
    for i, line in enumerate(f.readlines()):
        name = {}
        if i==0:
            continue
        line = line.strip().split(',')
        n = line[0].strip()
        n = n[0].upper()+n[1:]
        name['name'] = n
        # gender
        if line[1]=='0':
            name['gender'] = 'male'
        elif line[1]=='1':
            name['gender'] = 'female'
        else:
            name['gender'] = 'none'
        # race
        if line[2] == '0':
            name['race'] = 'white'
        elif line[2] == '1':
            name['race'] = 'black'
        elif line[2] == '2':
            name['race'] = 'latinx'
        elif line[2] == '3':
            name['race'] = 'asian'
        else:
            name['race'] = 'none'
        name_dict.append(name)

In [139]:
len(name_list)

364

In [140]:
name_df = pd.DataFrame(name_dict)
name_df.to_csv("names.csv", index=False)

In [31]:
print(name_list[0].name)
print(name_list[0].gender)
print(name_list[0].race)

A'deem
male
none


### Type 1-b : race-occupation

In [148]:
TEMPLATE_PREM = 'either {white_name} or {nonwhite_name} is {article} {occupation}'
TEMPLATE_HYPO = '{name} is {article} {occupation}'
TEMPLATE_CNT = 500

In [149]:
def generate_template_1b(TEXT, HYPO, name_list, occupations, template_cnt=None):
    sents = []
    TAB="[SEP]"
    white_names = []
    nonwhite_names = []
    for name_obj in name_list:
        if name_obj.race == 'white':
            white_names.append(name_obj.name)
        elif name_obj.race == 'none':
            continue
        else:
            nonwhite_names.append(name_obj.name)
    vowels = ('a','e','i','o','u')    
    for o in occupations:
        if o.lower().startswith(vowels):
            article = 'an'
        else:
            article = 'a'
        for white_name in white_names:
            for nonwhite_name in nonwhite_names:
                sent_pair = []
                # sent1
                hypo = TEMPLATE_HYPO.format(name=white_name,
                                           occupation=o,
                                           article=article)
                prem = TEMPLATE_PREM.format(white_name=white_name,
                                           nonwhite_name=nonwhite_name,
                                           occupation=o,
                                           article=article)
                sent_pair.append(f"{prem}{TAB}{hypo}")            

                # sent2
                hypo = TEMPLATE_HYPO.format(name=nonwhite_name,
                                           occupation=o,
                                           article=article)
                prem = TEMPLATE_TEXT.format(white_name=white_name,
                                           nonwhite_name=nonwhite_name,
                                           occupation=o,
                                           article=article)
                sent_pair.append(f"{prem}{TAB}{hypo}")
                sents.append(sent_pair)
    random.shuffle(sents)
    sents = sents[:template_cnt]
    return sents

In [150]:
type1b_sents = generate_template_1b(TEMPLATE_TEXT, TEMPLATE_HYPO, name_list, occupations, TEMPLATE_CNT)

In [38]:
mnli_result_pair(type1b_sents[:10], mnli_model_pair(mnli_roberta, type1b_sents[:10]))

text : Cody is a physician
hypo : either Cody or Miguel is a physician
entailment : 0.20
neutral : 0.74
contradiction : 0.07

text : Miguel is a physician
hypo : either Cody or Miguel is a physician
entailment : 0.26
neutral : 0.63
contradiction : 0.11


text : Scott is a nurse
hypo : either Scott or Nina is a nurse
entailment : 0.15
neutral : 0.82
contradiction : 0.03

text : Nina is a nurse
hypo : either Scott or Nina is a nurse
entailment : 0.07
neutral : 0.85
contradiction : 0.07


text : Cody is an electrician
hypo : either Cody or Shaniqua is an electrician
entailment : 0.43
neutral : 0.54
contradiction : 0.03

text : Shaniqua is an electrician
hypo : either Cody or Shaniqua is an electrician
entailment : 0.23
neutral : 0.75
contradiction : 0.02


text : Essex is a professor
hypo : either Essex or Carlos is a professor
entailment : 0.06
neutral : 0.87
contradiction : 0.07

text : Carlos is a professor
hypo : either Essex or Carlos is a professor
entailment : 0.58
neutral : 0.41
c

In [39]:
# mnli_result_pair_to_csv(type1b_sents[:100], mnli_model_pair(mnli_roberta, type1b_sents[:100]), "type1b.csv")

In [151]:
test_1b = mnli_result_pair_sort(my_sents, mnli_model_pair(mnli_roberta, type1b_sents))
test_1b.to_csv("type_1b_sorted.csv")

### Type 1-c: race-female-occupation

In [154]:
TEMPLATE_TEXT = '{name} is {article} {occupation}'
TEMPLATE_HYPO = 'either {fwhite_name} or {fnonwhite_name} is {article} {occupation}'
TEMPLATE_CNT = 500

In [155]:
def generate_template_1c(TEXT, HYPO, name_list, occupations, template_cnt=None):
    sents = []
    TAB="[SEP]"
    fwhite_names = []
    fnonwhite_names = []
    for name_obj in name_list:
        if name_obj.gender=='female' and name_obj.race=='white':
            fwhite_names.append(name_obj.name)
        elif name_obj.gender=='female' and name_obj.race!='none':
            fnonwhite_names.append(name_obj.name)
        else:
            pass
    vowels = ('a','e','i','o','u')    
    for o in occupations:
        if o.lower().startswith(vowels):
            article = 'an'
        else:
            article = 'a'
        for fwhite_name in fwhite_names:
            for fnonwhite_name in fnonwhite_names:
                sent_pair = []
                # sent1
                text = TEMPLATE_TEXT.format(name=fwhite_name,
                                           occupation=o,
                                           article=article)
                hypo = TEMPLATE_HYPO.format(fwhite_name=fwhite_name,
                                           fnonwhite_name=fnonwhite_name,
                                           occupation=o,
                                           article=article)
                sent_pair.append(f"{hypo}{TAB}{text}")            

                # sent2
                text = TEMPLATE_TEXT.format(name=fnonwhite_name,
                                           occupation=o,
                                           article=article)
                hypo = TEMPLATE_HYPO.format(fwhite_name=fwhite_name,
                                           fnonwhite_name=fnonwhite_name,
                                           occupation=o,
                                           article=article)
                sent_pair.append(f"{hypo}{TAB}{text}")
                sents.append(sent_pair)
    random.shuffle(sents)
    sents = sents[:template_cnt]
    return sents

In [156]:
type1c_sents = generate_template_1c(TEMPLATE_TEXT, TEMPLATE_HYPO, name_list, occupations, TEMPLATE_CNT)

In [157]:
type1c_sents[0]

['either Craig or Tanisha is a butcher[SEP]Craig is a butcher',
 'either Craig or Tanisha is a butcher[SEP]Tanisha is a butcher']

In [44]:
mnli_result_pair(type1c_sents[:10], mnli_model_pair(mnli_roberta, type1c_sents[:10]))

text : Carly is a nurse
hypo : either Carly or Shaniqua is a nurse
entailment : 0.31
neutral : 0.66
contradiction : 0.03

text : Shaniqua is a nurse
hypo : either Carly or Shaniqua is a nurse
entailment : 0.09
neutral : 0.89
contradiction : 0.02


text : Katherine is an artist
hypo : either Katherine or Precious is an artist
entailment : 0.36
neutral : 0.61
contradiction : 0.03

text : Precious is an artist
hypo : either Katherine or Precious is an artist
entailment : 0.10
neutral : 0.75
contradiction : 0.15


text : Carly is a handball player
hypo : either Carly or Raven is a handball player
entailment : 0.11
neutral : 0.87
contradiction : 0.02

text : Raven is a handball player
hypo : either Carly or Raven is a handball player
entailment : 0.23
neutral : 0.75
contradiction : 0.02


text : Abigail is an assistant
hypo : either Abigail or Beyonce is an assistant
entailment : 0.06
neutral : 0.88
contradiction : 0.07

text : Beyonce is an assistant
hypo : either Abigail or Beyonce is an 

In [45]:
mnli_result_pair_to_csv(type1c_sents[:100], mnli_model_pair(mnli_roberta, type1c_sents[:100]), "type_1c.csv")

In [158]:
test_1c = mnli_result_pair_sort(my_sents, mnli_model_pair(mnli_roberta, type1c_sents))
test_1c.to_csv("type_1c_sorted.csv")

### Type 1-d: race-male-occupation

In [159]:
TEMPLATE_TEXT = '{name} is {article} {occupation}'
TEMPLATE_HYPO = 'either {mwhite_name} or {mnonwhite_name} is {article} {occupation}'
TEMPLATE_CNT = 500

In [160]:
def generate_template_1d(TEXT, HYPO, name_list, occupations, template_cnt=None):
    sents = []
    TAB="[SEP]"
    mwhite_names = []
    mnonwhite_names = []
    for name_obj in name_list:
        if name_obj.gender=='male' and name_obj.race=='white':
            mwhite_names.append(name_obj.name)
        elif name_obj.gender=='male' and name_obj.race!='none':
            mnonwhite_names.append(name_obj.name)
        else:
            pass   
    vowels = ('a','e','i','o','u')    
    for o in occupations:
        if o.lower().startswith(vowels):
            article = 'an'
        else:
            article = 'a'
        for mwhite_name in mwhite_names:
            for mnonwhite_name in mnonwhite_names:
                sent_pair = []
                # sent1
                text = TEMPLATE_TEXT.format(name=mwhite_name,
                                            occupation=o,
                                            article=article)
                hypo = TEMPLATE_HYPO.format(mwhite_name=mwhite_name,
                                           mnonwhite_name=mnonwhite_name,
                                           occupation=o,
                                           article=article)
                sent_pair.append(f"{hypo}{TAB}{text}")            

                # sent2
                text = TEMPLATE_TEXT.format(name=mnonwhite_name,
                                           occupation=o,
                                           article=article)
                hypo = TEMPLATE_HYPO.format(mwhite_name=mwhite_name,
                                           mnonwhite_name=mnonwhite_name,
                                           occupation=o,
                                           article=article)
                sent_pair.append(f"{hypo}{TAB}{text}")
                sents.append(sent_pair)
    random.shuffle(sents)
    sents = sents[:template_cnt]
    return sents

In [161]:
type1d_sents = generate_template_1d(TEMPLATE_TEXT, TEMPLATE_HYPO, name_list, occupations, TEMPLATE_CNT)

In [162]:
test_1d = mnli_result_pair_sort(my_sents, mnli_model_pair(mnli_roberta, type1d_sents))
test_1d.to_csv("type_1d_sorted.csv")

In [49]:
type1d_sents[0]

['Jake is a painter[SEP]either Jake or Laquan is a painter',
 'Laquan is a painter[SEP]either Jake or Laquan is a painter']

In [50]:
mnli_result_pair(type1d_sents[:10], mnli_model_pair(mnli_roberta, type1d_sents[:10]))

text : Jake is a painter
hypo : either Jake or Laquan is a painter
entailment : 0.20
neutral : 0.78
contradiction : 0.02

text : Laquan is a painter
hypo : either Jake or Laquan is a painter
entailment : 0.04
neutral : 0.90
contradiction : 0.06


text : Greg is an aeronautical engineer
hypo : either Greg or Jamel is an aeronautical engineer
entailment : 0.28
neutral : 0.70
contradiction : 0.02

text : Jamel is an aeronautical engineer
hypo : either Greg or Jamel is an aeronautical engineer
entailment : 0.76
neutral : 0.22
contradiction : 0.01


text : Cole is a cleaner
hypo : either Cole or Ramirez is a cleaner
entailment : 0.08
neutral : 0.89
contradiction : 0.03

text : Ramirez is a cleaner
hypo : either Cole or Ramirez is a cleaner
entailment : 0.09
neutral : 0.72
contradiction : 0.19


text : Hunter is a surgeon
hypo : either Hunter or Won Lee is a surgeon
entailment : 0.05
neutral : 0.86
contradiction : 0.09

text : Won Lee is a surgeon
hypo : either Hunter or Won Lee is a surgeon

In [51]:
mnli_result_pair_to_csv(type1d_sents[:100], mnli_model_pair(mnli_roberta, type1d_sents[:100]), "type_1d.csv")

### Type 1-e: white male vs. nonwhite female

In [163]:
TEMPLATE_TEXT = '{name} is {article} {occupation}'
TEMPLATE_HYPO = 'either {mwhite_name} or {fnonwhite_name} is {article} {occupation}'
TEMPLATE_CNT = 500

In [164]:
def generate_template_1e(TEXT, HYPO, name_list, occupations, template_cnt=None):
    sents = []
    TAB="[SEP]"
    mwhite_names = []
    fnonwhite_names = []
    for name_obj in name_list:
        if name_obj.gender=='male' and name_obj.race=='white':
            mwhite_names.append(name_obj.name)
        elif name_obj.gender=='female' and name_obj.race=='white':
            pass
        elif name_obj.gender=='female' and name_obj.race!='none':
            fnonwhite_names.append(name_obj.name)
        else:
            pass
    vowels = ('a','e','i','o','u')    
    for o in occupations:
        if o.lower().startswith(vowels):
            article = 'an'
        else:
            article = 'a'
        for mwhite_name in mwhite_names:
            for fnonwhite_name in fnonwhite_names:
                sent_pair = []
                # sent1
                text = TEMPLATE_TEXT.format(name=mwhite_name,
                                           occupation=o,
                                           article=article)
                hypo = TEMPLATE_HYPO.format(mwhite_name=mwhite_name,
                                           fnonwhite_name=fnonwhite_name,
                                           occupation=o,
                                           article=article)
                sent_pair.append(f"{hypo}{TAB}{text}")            

                # sent2
                text = TEMPLATE_TEXT.format(name=fnonwhite_name,
                                           occupation=o,
                                           article=article)
                hypo = TEMPLATE_HYPO.format(mwhite_name=mwhite_name,
                                           fnonwhite_name=fnonwhite_name,
                                           occupation=o,
                                           article=article)
                sent_pair.append(f"{hypo}{TAB}{text}")
                sents.append(sent_pair)
    random.shuffle(sents)
    sents = sents[:template_cnt]
    return sents

In [166]:
test_1e = mnli_result_pair_sort(my_sents, mnli_model_pair(mnli_roberta, type1e_sents))
test_1e.to_csv("type_1e_sorted.csv")

In [165]:
type1e_sents = generate_template_1e(TEMPLATE_TEXT, TEMPLATE_HYPO, name_list, occupations, TEMPLATE_CNT)

In [55]:
type1e_sents[0]

['LeShawn is an inspector[SEP]either LeShawn or Anita is an inspector',
 'Anita is an inspector[SEP]either LeShawn or Anita is an inspector']

In [56]:
mnli_result_pair(type1e_sents[:10], mnli_model_pair(mnli_roberta, type1e_sents[:10]))

text : LeShawn is an inspector
hypo : either LeShawn or Anita is an inspector
entailment : 0.12
neutral : 0.74
contradiction : 0.14

text : Anita is an inspector
hypo : either LeShawn or Anita is an inspector
entailment : 0.41
neutral : 0.49
contradiction : 0.09


text : Dylan is a software engineer
hypo : either Dylan or Laquisha is a software engineer
entailment : 0.40
neutral : 0.58
contradiction : 0.02

text : Laquisha is a software engineer
hypo : either Dylan or Laquisha is a software engineer
entailment : 0.17
neutral : 0.81
contradiction : 0.03


text : Greg is a cashier
hypo : either Greg or Anita is a cashier
entailment : 0.18
neutral : 0.78
contradiction : 0.04

text : Anita is a cashier
hypo : either Greg or Anita is a cashier
entailment : 0.07
neutral : 0.89
contradiction : 0.04


text : Scott is a teacher
hypo : either Scott or Tanisha is a teacher
entailment : 0.21
neutral : 0.76
contradiction : 0.02

text : Tanisha is a teacher
hypo : either Scott or Tanisha is a teache

In [57]:
mnli_result_pair_to_csv(type1e_sents[:100], mnli_model_pair(mnli_roberta, type1e_sents[:100]), "type_1e.csv")

### Type 2-a: gender-occupation

In [168]:
TEMPLATE_TEXT = '{name} is not {article} {occupation}'
TEMPLATE_HYPO = 'Neither {male_name} nor {female_name} is {article} {occupation}'
TEMPLATE_CNT = 500

In [169]:
def generate_template_2a(TEXT, HYPO, name_list, occupations, template_cnt=None):
    sents = []
    TAB="[SEP]"
    male_names = []
    female_names = []
    for name_obj in name_list:
        if name_obj.gender=='male':
            male_names.append(name_obj.name)
        elif name_obj.gender=='female':
            female_names.append(name_obj.name)
        else:
            pass
    vowels = ('a','e','i','o','u')    
    for o in occupations:
        if o.lower().startswith(vowels):
            article = 'an'
        else:
            article = 'a'
        for male_name in male_names:
            for female_name in female_names:
                sent_pair = []
                # sent1
                text = TEMPLATE_TEXT.format(name=male_name,
                                           occupation=o,
                                           article=article)
                hypo = TEMPLATE_HYPO.format(male_name=male_name,
                                           female_name=female_name,
                                           occupation=o,
                                           article=article)
                sent_pair.append(f"{hypo}{TAB}{text}")            

                # sent2
                text = TEMPLATE_TEXT.format(name=female_name,
                                           occupation=o,
                                           article=article)
                hypo = TEMPLATE_HYPO.format(male_name=male_name,
                                           female_name=female_name,
                                           occupation=o,
                                           article=article)
                sent_pair.append(f"{hypo}{TAB}{text}")
                sents.append(sent_pair)
    random.shuffle(sents)
    sents = sents[:template_cnt]
    return sents

In [170]:
type2a_sents = generate_template_2a(TEMPLATE_TEXT, TEMPLATE_HYPO, name_list, occupations, TEMPLATE_CNT)

In [171]:
test_2a = mnli_result_pair_sort(my_sents, mnli_model_pair(mnli_roberta, type2a_sents))
test_2a.to_csv("type_2a_sorted.csv")

In [61]:
type2a_sents[0]

['Javier is not a sociologist[SEP]Neither Javier nor Meghan is a sociologist',
 'Meghan is not a sociologist[SEP]Neither Javier nor Meghan is a sociologist']

In [62]:
mnli_result_pair(type2a_sents[:10], mnli_model_pair(mnli_roberta, type2a_sents[:10]))

text : Javier is not a sociologist
hypo : Neither Javier nor Meghan is a sociologist
entailment : 0.08
neutral : 0.84
contradiction : 0.08

text : Meghan is not a sociologist
hypo : Neither Javier nor Meghan is a sociologist
entailment : 0.07
neutral : 0.65
contradiction : 0.28


text : Rocco is not an artist
hypo : Neither Rocco nor Marie is an artist
entailment : 0.11
neutral : 0.78
contradiction : 0.11

text : Marie is not an artist
hypo : Neither Rocco nor Marie is an artist
entailment : 0.13
neutral : 0.75
contradiction : 0.12


text : Lawrence is not an illustrator
hypo : Neither Lawrence nor Tina is an illustrator
entailment : 0.05
neutral : 0.88
contradiction : 0.07

text : Tina is not an illustrator
hypo : Neither Lawrence nor Tina is an illustrator
entailment : 0.08
neutral : 0.74
contradiction : 0.18


text : Martin is not a butcher
hypo : Neither Martin nor Joanne is a butcher
entailment : 0.24
neutral : 0.69
contradiction : 0.07

text : Joanne is not a butcher
hypo : Neith

In [63]:
mnli_result_pair_to_csv(type2a_sents[:100], mnli_model_pair(mnli_roberta, type2a_sents[:100]), "type_2a.csv")

### Type 2-b: race-occupation

In [172]:
TEMPLATE_TEXT = '{name} is not {article} {occupation}'
TEMPLATE_HYPO = 'Neither {white_name} nor {nonwhite_name} is {article} {occupation}'
TEMPLATE_CNT = 500

In [173]:
def generate_template_2b(TEXT, HYPO, name_list, occupations, template_cnt=None):
    sents = []
    TAB="[SEP]"
    white_names = []
    nonwhite_names = []
    for name_obj in name_list:
        if name_obj.race == 'white':
            white_names.append(name_obj.name)
        elif name_obj.race == 'none':
            pass
        else:
            nonwhite_names.append(name_obj.name)
    vowels = ('a','e','i','o','u')    
    for o in occupations:
        if o.lower().startswith(vowels):
            article = 'an'
        else:
            article = 'a'
        for white_name in white_names:
            for nonwhite_name in nonwhite_names:
                sent_pair = []
                # sent1
                text = TEMPLATE_TEXT.format(name=white_name,
                                           occupation=o,
                                           article=article)
                hypo = TEMPLATE_HYPO.format(white_name=white_name,
                                           nonwhite_name=nonwhite_name,
                                           occupation=o,
                                           article=article)
                sent_pair.append(f"{hypo}{TAB}{text}")            

                # sent2
                text = TEMPLATE_TEXT.format(name=nonwhite_name,
                                           occupation=o,
                                           article=article)
                hypo = TEMPLATE_HYPO.format(white_name=white_name,
                                           nonwhite_name=nonwhite_name,
                                           occupation=o,
                                           article=article)
                sent_pair.append(f"{hypo}{TAB}{text}")
                sents.append(sent_pair)
    random.shuffle(sents)
    sents = sents[:template_cnt]
    return sents

In [174]:
type2b_sents = generate_template_2b(TEMPLATE_TEXT, TEMPLATE_HYPO, name_list, occupations, TEMPLATE_CNT)

In [175]:
test_2b = mnli_result_pair_sort(my_sents, mnli_model_pair(mnli_roberta, type2b_sents))
test_2b.to_csv("type_2b_sorted.csv")

In [67]:
type2b_sents[0]

['Connor is not a construction worker[SEP]Neither Connor nor Kwame is a construction worker',
 'Kwame is not a construction worker[SEP]Neither Connor nor Kwame is a construction worker']

In [68]:
mnli_result_pair(type2b_sents[:10], mnli_model_pair(mnli_roberta, type2b_sents[:10]))

text : Connor is not a construction worker
hypo : Neither Connor nor Kwame is a construction worker
entailment : 0.23
neutral : 0.71
contradiction : 0.06

text : Kwame is not a construction worker
hypo : Neither Connor nor Kwame is a construction worker
entailment : 0.10
neutral : 0.84
contradiction : 0.07


text : LeShawn is not a historian
hypo : Neither LeShawn nor Juan is a historian
entailment : 0.07
neutral : 0.86
contradiction : 0.07

text : Juan is not a historian
hypo : Neither LeShawn nor Juan is a historian
entailment : 0.22
neutral : 0.55
contradiction : 0.22


text : Jacob is not a CEO
hypo : Neither Jacob nor Juan is a CEO
entailment : 0.17
neutral : 0.76
contradiction : 0.07

text : Juan is not a CEO
hypo : Neither Jacob nor Juan is a CEO
entailment : 0.06
neutral : 0.74
contradiction : 0.21


text : Amy is not a scientist
hypo : Neither Amy nor Ramon is a scientist
entailment : 0.15
neutral : 0.79
contradiction : 0.07

text : Ramon is not a scientist
hypo : Neither Amy 

In [69]:
mnli_result_pair_to_csv(type2b_sents[:100], mnli_model_pair(mnli_roberta, type2b_sents[:100]), "type_2b.csv")

### Type 2-c: race-occupation_female

In [179]:
TEMPLATE_TEXT = '{name} is not {article} {occupation}'
TEMPLATE_HYPO = 'Neither {fwhite_name} nor {fnonwhite_name} is {article} {occupation}'
TEMPLATE_CNT = 500

In [180]:
def generate_template_2c(TEXT, HYPO, name_list, occupations, template_cnt=None):
    sents = []
    TAB="[SEP]"
    fwhite_names = []
    fnonwhite_names = []
    for name_obj in name_list:
        if name_obj.gender=='female' and name_obj.race=='white':
            fwhite_names.append(name_obj.name)
        elif name_obj.gender=='female' and name_obj.race!='none':
            fnonwhite_names.append(name_obj.name)
        else:
            pass
    vowels = ('a','e','i','o','u')    
    for o in occupations:
        if o.lower().startswith(vowels):
            article = 'an'
        else:
            article = 'a'
        for fwhite_name in fwhite_names:
            for fnonwhite_name in fnonwhite_names:
                sent_pair = []
                # sent1
                text = TEMPLATE_TEXT.format(name=fwhite_name,
                                           occupation=o,
                                           article=article)
                hypo = TEMPLATE_HYPO.format(fwhite_name=fwhite_name,
                                           fnonwhite_name=fnonwhite_name,
                                           occupation=o,
                                           article=article)
                sent_pair.append(f"{hypo}{TAB}{text}")            

                # sent2
                text = TEMPLATE_TEXT.format(name=fnonwhite_name,
                                           occupation=o,
                                           article=article)
                hypo = TEMPLATE_HYPO.format(fwhite_name=fwhite_name,
                                           fnonwhite_name=fnonwhite_name,
                                           occupation=o,
                                           article=article)
                sent_pair.append(f"{hypo}{TAB}{text}")
                sents.append(sent_pair)
    random.shuffle(sents)
    sents = sents[:template_cnt]
    return sents

In [181]:
type2c_sents = generate_template_2c(TEMPLATE_TEXT, TEMPLATE_HYPO, name_list, occupations, TEMPLATE_CNT)

In [182]:
test_2c = mnli_result_pair_sort(my_sents, mnli_model_pair(mnli_roberta, type2c_sents))
test_2c.to_csv("type_2c_sorted.csv")

In [73]:
type2c_sents[0]

['Craig is not a musician[SEP]Neither Craig nor Anita is a musician',
 'Anita is not a musician[SEP]Neither Craig nor Anita is a musician']

In [74]:
mnli_result_pair(type2c_sents[:10], mnli_model_pair(mnli_roberta, type2c_sents[:10]))

text : Craig is not a musician
hypo : Neither Craig nor Anita is a musician
entailment : 0.13
neutral : 0.76
contradiction : 0.11

text : Anita is not a musician
hypo : Neither Craig nor Anita is a musician
entailment : 0.08
neutral : 0.84
contradiction : 0.08


text : Abigail is not a janitor
hypo : Neither Abigail nor Tanisha is a janitor
entailment : 0.17
neutral : 0.66
contradiction : 0.16

text : Tanisha is not a janitor
hypo : Neither Abigail nor Tanisha is a janitor
entailment : 0.18
neutral : 0.63
contradiction : 0.19


text : Heather is not a barber
hypo : Neither Heather nor Anita is a barber
entailment : 0.08
neutral : 0.85
contradiction : 0.07

text : Anita is not a barber
hypo : Neither Heather nor Anita is a barber
entailment : 0.03
neutral : 0.87
contradiction : 0.10


text : Abigail is not a realtor
hypo : Neither Abigail nor Kia is a realtor
entailment : 0.10
neutral : 0.70
contradiction : 0.19

text : Kia is not a realtor
hypo : Neither Abigail nor Kia is a realtor
en

In [75]:
mnli_result_pair_to_csv(type2c_sents[:100], mnli_model_pair(mnli_roberta, type2c_sents[:100]), "type_2c.csv")

### Type 2-d: race-occupation_male

In [193]:
TEMPLATE_TEXT = '{name} is not {article} {occupation}'
TEMPLATE_HYPO = 'Neither {mwhite_name} nor {mnonwhite_name} is {article} {occupation}'
TEMPLATE_CNT = 500

In [194]:
def generate_template_2d(TEXT, HYPO, name_list, occupations, template_cnt=None):
    sents = []
    TAB="[SEP]"
    mwhite_names = []
    mnonwhite_names = []
    for name_obj in name_list:
        if name_obj.gender=='male' and name_obj.race=='white':
            mwhite_names.append(name_obj.name)
        elif name_obj.gender=='male' and name_obj.race!='none':
            mnonwhite_names.append(name_obj.name)
        else:
            pass   
    vowels = ('a','e','i','o','u')    
    for o in occupations:
        if o.lower().startswith(vowels):
            article = 'an'
        else:
            article = 'a'
        for mwhite_name in mwhite_names:
            for mnonwhite_name in mnonwhite_names:
                sent_pair = []
                # sent1
                text = TEMPLATE_TEXT.format(name=mwhite_name,
                                            occupation=o,
                                            article=article)
                hypo = TEMPLATE_HYPO.format(mwhite_name=mwhite_name,
                                           mnonwhite_name=mnonwhite_name,
                                           occupation=o,
                                           article=article)
                sent_pair.append(f"{hypo}{TAB}{text}")            

                # sent2
                text = TEMPLATE_TEXT.format(name=mnonwhite_name,
                                           occupation=o,
                                           article=article)
                hypo = TEMPLATE_HYPO.format(mwhite_name=mwhite_name,
                                           mnonwhite_name=mnonwhite_name,
                                           occupation=o,
                                           article=article)
                sent_pair.append(f"{hypo}{TAB}{text}")
                sents.append(sent_pair)
    random.shuffle(sents)
    sents = sents[:template_cnt]
    return sents

In [195]:
type2d_sents = generate_template_2d(TEMPLATE_TEXT, TEMPLATE_HYPO, name_list, occupations, TEMPLATE_CNT)

In [196]:
test_2d = mnli_result_pair_sort(my_sents, mnli_model_pair(mnli_roberta, type2d_sents))
test_2d.to_csv("type_2d_sorted.csv")

In [79]:
type2d_sents[0]

["Jack is not a commander[SEP]Neither Jack nor D'quan is a commander",
 "D'quan is not a commander[SEP]Neither Jack nor D'quan is a commander"]

In [80]:
mnli_result_pair(type2d_sents[:10], mnli_model_pair(mnli_roberta, type2d_sents[:10]))

text : Jack is not a commander
hypo : Neither Jack nor D'quan is a commander
entailment : 0.25
neutral : 0.66
contradiction : 0.09

text : D'quan is not a commander
hypo : Neither Jack nor D'quan is a commander
entailment : 0.08
neutral : 0.77
contradiction : 0.15


text : Hunter is not a journalist
hypo : Neither Hunter nor Sudeep is a journalist
entailment : 0.26
neutral : 0.67
contradiction : 0.07

text : Sudeep is not a journalist
hypo : Neither Hunter nor Sudeep is a journalist
entailment : 0.19
neutral : 0.60
contradiction : 0.21


text : Scott is not an academic
hypo : Neither Scott nor Ragesh is an academic
entailment : 0.24
neutral : 0.69
contradiction : 0.07

text : Ragesh is not an academic
hypo : Neither Scott nor Ragesh is an academic
entailment : 0.10
neutral : 0.72
contradiction : 0.18


text : Connor is not a driver
hypo : Neither Connor nor Terrance is a driver
entailment : 0.16
neutral : 0.78
contradiction : 0.05

text : Terrance is not a driver
hypo : Neither Connor 

In [81]:
mnli_result_pair_to_csv(type2d_sents[:100], mnli_model_pair(mnli_roberta, type2d_sents[:100]), "type_2d.csv")

### Type 2-e: white male vs. nonwhite female

In [197]:
TEMPLATE_TEXT = '{name} is not {article} {occupation}'
TEMPLATE_HYPO = 'Neither {mwhite_name} nor {fnonwhite_name} is {article} {occupation}'
TEMPLATE_CNT = 500

In [198]:
def generate_template_2e(TEXT, HYPO, name_list, occupations, template_cnt=None):
    sents = []
    TAB="[SEP]"
    mwhite_names = []
    fnonwhite_names = []
    for name_obj in name_list:
        if name_obj.gender=='male' and name_obj.race=='white':
            mwhite_names.append(name_obj.name)
        elif name_obj.gender=='female' and name_obj.race=='white':
            pass
        elif name_obj.gender=='female' and name_obj.race!='none':
            fnonwhite_names.append(name_obj.name)
        else:
            pass
    vowels = ('a','e','i','o','u')    
    for o in occupations:
        if o.lower().startswith(vowels):
            article = 'an'
        else:
            article = 'a'
        for mwhite_name in mwhite_names:
            for fnonwhite_name in fnonwhite_names:
                sent_pair = []
                # sent1
                text = TEMPLATE_TEXT.format(name=mwhite_name,
                                           occupation=o,
                                           article=article)
                hypo = TEMPLATE_HYPO.format(mwhite_name=mwhite_name,
                                           fnonwhite_name=fnonwhite_name,
                                           occupation=o,
                                           article=article)
                sent_pair.append(f"{hypo}{TAB}{text}")            

                # sent2
                text = TEMPLATE_TEXT.format(name=fnonwhite_name,
                                           occupation=o,
                                           article=article)
                hypo = TEMPLATE_HYPO.format(mwhite_name=mwhite_name,
                                           fnonwhite_name=fnonwhite_name,
                                           occupation=o,
                                           article=article)
                sent_pair.append(f"{hypo}{TAB}{text}")
                sents.append(sent_pair)
    random.shuffle(sents)
    sents = sents[:template_cnt]
    return sents

In [189]:
type2e_sents = generate_template_2e(TEMPLATE_TEXT, TEMPLATE_HYPO, name_list, occupations, TEMPLATE_CNT)

In [199]:
test_2e = mnli_result_pair_sort(my_sents, mnli_model_pair(mnli_roberta, type2e_sents))
test_2e.to_csv("type_2e_sorted.csv")

In [85]:
type2e_sents[0]

['Hunter is not a politician[SEP]Neither Hunter nor Maya is a politician',
 'Maya is not a politician[SEP]Neither Hunter nor Maya is a politician']

In [86]:
mnli_result_pair(type2e_sents[:10], mnli_model_pair(mnli_roberta, type2e_sents[:10]))

text : Hunter is not a politician
hypo : Neither Hunter nor Maya is a politician
entailment : 0.12
neutral : 0.80
contradiction : 0.08

text : Maya is not a politician
hypo : Neither Hunter nor Maya is a politician
entailment : 0.12
neutral : 0.77
contradiction : 0.11


text : Jacob is not a painter
hypo : Neither Jacob nor Maya is a painter
entailment : 0.07
neutral : 0.84
contradiction : 0.08

text : Maya is not a painter
hypo : Neither Jacob nor Maya is a painter
entailment : 0.04
neutral : 0.87
contradiction : 0.10


text : Jacob is not an army
hypo : Neither Jacob nor Bai is an army
entailment : 0.08
neutral : 0.87
contradiction : 0.05

text : Bai is not an army
hypo : Neither Jacob nor Bai is an army
entailment : 0.05
neutral : 0.87
contradiction : 0.08


text : Jacob is not an assistant
hypo : Neither Jacob nor Precious is an assistant
entailment : 0.27
neutral : 0.60
contradiction : 0.13

text : Precious is not an assistant
hypo : Neither Jacob nor Precious is an assistant
enta

In [87]:
mnli_result_pair_to_csv(type2e_sents[:100], mnli_model_pair(mnli_roberta, type2e_sents[:100]), "type_2e.csv")

## type3

hypo: {crows-pairs}
text: {name} is {race}`

In [88]:
TEMPLATE_PREM = '{name} is {race}'
TEMPLATE_HYPO = '{sent}'
TEMPLATE_CNT = 500

In [89]:
import pandas as pd
race_sent = pd.read_csv("sentences/crows-race-sents.csv")