Check whether the stereotypes derived from labor statistics (Zhao et al. 2018) that are used
for WinoMT correlate with the distribution in WMT19 EN–DE parallel data

In [1]:
import json
from collections import defaultdict
from pathlib import Path

import numpy as np
from scipy import stats

In [2]:
# Source: https://aclanthology.org/N18-2003/
winobias_female_percentages = {
    'carpenter': 2, 'editor': 52, 'mechanic': 4, 'designer': 54,
    'construction worker': 4, 'accountant': 61, 'laborer': 4, 'auditor': 61, 'driver': 6,
    'writer': 63, 'sheriff': 14, 'baker': 65, 'mover': 18, 'clerk': 72, 'developer': 20,
    'cashier': 73, 'farmer': 22, 'counselor': 73, 'guard': 22, 'attendant': 76, 'librarian': 84,
    'chief': 27, 'teacher': 78, 'janitor': 34, 'tailor': 80, 'lawyer': 35,
    'cook': 38, 'assistant': 85, 'physician': 38, 'cleaner': 89, 'CEO': 39,
    'housekeeper': 89, 'analyst': 41, 'nurse': 90, 'manager': 43, 'receptionist': 90,
    'supervisor': 44, 'hairdresser': 92, 'salesperson': 48, 'secretary': 95
}
winobias_female_ratios = {k: v / 100 for k, v in winobias_female_percentages.items()}

In [3]:
# Compute ratios of female forms in the training data
with open(Path(".") / "results" / "occupations_en-de.count_results.json") as f:
    wmt19_data = json.load(f)
wmt19_totals = defaultdict(int)
wmt19_female_counts = dict()
for row in wmt19_data:
    if "_original_" not in row[1] or not any(["_male" in row[1], "_female" in row[1]]):
        continue
    occupation = row[0]
    count = row[2]
    wmt19_totals[occupation] += count
    if "_female" in row[1]:
        wmt19_female_counts[occupation] = count
wmt19_female_ratios = {k: v / wmt19_totals[k] for k, v in wmt19_female_counts.items()}
print("occupation\tfemale ratio")
for occupation, female_ratio in sorted(wmt19_female_ratios.items(), key=lambda t: t[1], reverse=True):
    print(f"{occupation}\t{female_ratio:.2f}")

occupation	female ratio
nurse	0.91
cleaner	0.80
writer	0.40
secretary	0.32
assistant	0.24
cook	0.22
editor	0.17
therapist	0.17
teacher	0.17
painter	0.17
scientist	0.12
lawyer	0.12
student	0.12
designer	0.12
guard	0.09
physician	0.09
resident	0.08
CEO	0.08
instructor	0.08
tailor	0.08
manager	0.07
advisor	0.07
patient	0.07
chief	0.06
owner	0.06
witness	0.06
architect	0.05
farmer	0.05
worker	0.04
practitioner	0.04
engineer	0.04
specialist	0.04
doctor	0.04
employee	0.03
planner	0.03
officer	0.03
client	0.02
chef	0.02
passenger	0.01
buyer	0.01
visitor	0.01
administrator	0.01
pedestrian	0.01
developer	0.01
broker	0.00
inspector	0.00
customer	0.00
driver	0.00


In [4]:
# Compute correlation
compared_occupations = list(set(wmt19_female_ratios).intersection(winobias_female_ratios))
winobias_series = np.array([winobias_female_ratios[occupation] for occupation in compared_occupations])
wmt19_series = np.array([wmt19_female_ratios[occupation] for occupation in compared_occupations])
r, p = stats.pearsonr(winobias_series, wmt19_series)
print(f"r = {r}")
print(f"p-value = {p}")

r = 0.6892201290415162
p-value = 0.0010979118324630748
