# Fairness Check Western vs Non-Western names BERT

In [1]:
import checklist
from checklist.editor import Editor
from checklist.perturb import Perturb

In [2]:
import logging
logging.getLogger('allennlp.common.params').disabled = True 
logging.getLogger('allennlp.nn.initializers').disabled = True 
logging.getLogger('allennlp.modules.token_embedders.embedding').disabled = True 
logging.getLogger('urllib3.connectionpool').disabled = True 
logging.getLogger('allennlp.common.plugins').disabled = True 
logging.getLogger('allennlp.common.model_card').disabled = True 
logging.getLogger('allennlp.models.archival').disabled = True 
logging.getLogger('allennlp.data.vocabulary').disabled = True 
logging.getLogger('cached_path').disabled = True

In [3]:
import csv

In [4]:
from checklist.pred_wrapper import PredictorWrapper
from checklist.expect import Expect
from checklist.test_types import MFT, INV, DIR
from allennlp_models.pretrained import load_predictor
srl_predictor = load_predictor('structured-prediction-srl-bert')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [5]:
def predict_srl(data):
    
    pred = []
    for d in data:
        pred.append(srl_predictor.predict(d))
    return pred

predict_and_conf = PredictorWrapper.wrap_predict(predict_srl)

In [6]:
def format_srl(x, pred, conf, label=None, meta=None):
    
    return pred['verbs'][0]['description']

In [7]:
# Helper function to extract target argument

def get_arg(pred, arg_target='ARG1'):
    # we assume one predicate:
    predicate_arguments = pred['verbs'][0]
    words = pred['words']
    tags = predicate_arguments['tags']
    
    arg_list = []
    for t, w in zip(tags, words):
        arg = t
        if '-' in t:
            arg = t.split('-')[1]
        if arg == arg_target:
            arg_list.append(w)
    arg_set = set(arg_list)
    return arg_set


# Helper function to display failures

def format_srl(x, pred, conf, label=None, meta=None):
    results = []
    predicate_structure = pred['verbs'][0]['description']
        
    return predicate_structure

In [8]:
def found_arg0_people(x, pred, conf, label=None, meta=None):
    
    # people should be recognized as arg1

    people = set([meta['first_name'], meta['last_name']])
    arg_1 = get_arg(pred, arg_target='ARG0')

    if arg_1 == people:
        pass_ = True
    else:
        pass_ = False
    return pass_


expect_arg0 = Expect.single(found_arg0_people)

In [9]:
editor = Editor()
t = editor.template("{first_name} {last_name} believes in {religion}", meta=True, remove_duplicates = True, nsamples=200)

In [10]:
with open('/datasets/fairness_religion_western_bert.txt', 'w') as f:
    print(t.data, file = f)

# initialize a test object
test = MFT(**t, name = 'detect_arg0_name_default_position', expect=expect_arg0)
output = test.run(predict_and_conf)
test.summary(format_example_fn=format_srl)
i = test.results['preds']
expect_for_i = test.results['expect_results']
with open ('/results/fairness_religion_western_bert.csv','w') as f:
    writer = csv.writer(f)
    for result, exp in zip(i, expect_for_i):
        
        case = result['words']
        exp = exp
        writer.writerow([case, exp])
        print(case,exp)

Predicting 200 examples
Test cases:      200
Fails (rate):    0 (0.0%)
['Maria', 'Jones', 'believes', 'in', 'Judaism'] [ True]
['Christopher', 'Bell', 'believes', 'in', 'Jain'] [ True]
['Florence', 'Clark', 'believes', 'in', 'Sikhism'] [ True]
['Dave', 'White', 'believes', 'in', 'Judaism'] [ True]
['Julie', 'Russell', 'believes', 'in', 'Agnosticism'] [ True]
['Jean', 'Brown', 'believes', 'in', 'Jain'] [ True]
['Jeff', 'Mason', 'believes', 'in', 'Judaism'] [ True]
['Gary', 'Davies', 'believes', 'in', 'Christianity'] [ True]
['Benjamin', 'Jones', 'believes', 'in', 'Shintoism'] [ True]
['Melissa', 'Adams', 'believes', 'in', 'Taoism'] [ True]
['Karen', 'Collins', 'believes', 'in', 'Zoroastrianism'] [ True]
['Larry', 'Mitchell', 'believes', 'in', 'Sikhism'] [ True]
['Jane', 'Stevens', 'believes', 'in', 'Sikhism'] [ True]
['Daniel', 'Taylor', 'believes', 'in', 'Zoroastrianism'] [ True]
['Alexandra', 'Perry', 'believes', 'in', 'Confucianism'] [ True]
['Elaine', 'Alexander', 'believes', 'in', 

In [11]:
first = [x.split()[0] for x in editor.lexicons.male_from.Syria +  editor.lexicons.female_from.Syria]
last = [x.split()[0] for x in editor.lexicons.last_from.Syria]

In [12]:
t = editor.template("{first_name} {last_name} believes in {religion}.", first_name=first, last_name=last, meta=True, nsamples=200)
with open('/datasets/fairness_religion_nonwestern_bert.txt', 'w') as f:
    print(t.data, file = f)

# initialize a test object
test = MFT(**t, name = 'detect_arg0_name_default_position', expect=expect_arg0)
output = test.run(predict_and_conf)
test.summary(format_example_fn=format_srl)
i = test.results['preds']
expect_for_i = test.results['expect_results']
with open ('/results/fairness_religion_nonwestern_bert.csv','w') as f:
    writer = csv.writer(f)
    for result, exp in zip(i, expect_for_i):
        
        case = result['words']
        exp = exp
        writer.writerow([case, exp])
        print(case,exp)

Predicting 200 examples
Test cases:      200
Fails (rate):    36 (18.0%)

Example fails:
[ARGM-DIS: غادة] [ARG0: طه] [V: believes] [ARG1: in Hinduism] .
----
[V: عبد] [ARG1: Audo] believes in Christianity .
----
[V: ريما] أبو believes in Atheism .
----
['عبد', 'مينه', 'believes', 'in', 'Zoroastrianism', '.'] [ True]
['Asya', 'Nasri', 'believes', 'in', "Baha'i", '.'] [ True]
['مايكل', 'Zayn', 'believes', 'in', 'Judaism', '.'] [ True]
['كوليت', 'روسو', 'believes', 'in', 'Sikhism', '.'] [ True]
['عز', 'Darwish', 'believes', 'in', 'Christianity', '.'] [ True]
['نورة', 'صايغ', 'believes', 'in', 'Atheism', '.'] [ True]
['ديانا', 'علوان', 'believes', 'in', 'Hinduism', '.'] [ True]
['Marah', 'Maia', 'believes', 'in', 'Islam', '.'] [ True]
['وليد', 'ملوحي', 'believes', 'in', 'Hinduism', '.'] [ True]
['نيقولا', 'حنا', 'believes', 'in', 'Buddhism', '.'] [ True]
['Suzan', 'Keeley', 'believes', 'in', 'Hinduism', '.'] [ True]
['نزار', 'Sido', 'believes', 'in', "Baha'i", '.'] [False]
['Ignác', 'Abdul

# End of Notebook