# Identifying Bias in SpaCy word vectors

In [1]:
import spacy
from scipy import spatial
import tqdm

In [2]:
nlp = spacy.load('en_core_web_lg')

In [8]:
# Add in roles and gender pairs to compare
neutral_roles = ["doctor", "teacher", "scientist", "dancer", "builder", "boss", "engineer", "professor"]
gender_pairs = [
    ("male", "female"),
    ("man", "woman")
]

In [4]:
def cosine_similarity(term1: str, term2: str) -> float:
    """
    Gets the cosine similarity between two vectors
    """
    return 1 - spatial.distance.cosine(term1, term2)

In [5]:
def get_diff_roles(roles1: list, roles2: list) -> list:
    """
    Gets difference in cosine similarity of two role lists, returning a list of role-similarity difference pairs
    """
    results = {}
    roles1_dict = dict(roles1)
    roles2_dict = dict(roles2)
    keys = set(roles1_dict.keys()) | set(roles2_dict.keys())
    for k in tqdm.tqdm(keys, desc="Getting similarity difference between roles", unit="roles"):
        s1 = roles1_dict.get(k, 0)
        s2 = roles2_dict.get(k, 0)
        results[k] = ((s1, s1-s2), (s2, s2-s1))  # 1st similarity, 2nd similarity, similarity diff
    
    return results.items()


def get_top_roles(roles: list, similarity_index: int, top_n: int = 20, min_sim: float = 0.5) -> str:
    """
    Gets top N roles from similarity comparison list in get_diff_roles
    """
    return list(sorted(filter(lambda x: x[1][similarity_index][0 ] >= min_sim, roles), key=lambda x: x[1][similarity_index][0], reverse=True)[:top_n])

In [6]:
from IPython.display import Markdown, display


def get_table_row(role: list, similarity_index: int):
    """
    Formats the nested inner table for display_roles_table
    """
    return f'<tr><td>{role[0]}</td><td>{100*role[1][similarity_index][0]:.1f}% ({100*role[1][similarity_index][1]:.1f}%)</td></tr>'


def display_roles_table(role: str, m_label: str, f_label: str, m_roles: list, f_roles: list, diff_roles: list) -> str:
    m_rows = "<table>" + "".join([get_table_row(role, similarity_index=0) for role in m_roles]) + "</table>"
    f_rows = "<table>" + "".join([get_table_row(role, similarity_index=1) for role in f_roles]) + "</table>"
    form_row = f'<tr><td>{role}</td><td>{m_rows}</td><td>{f_rows}</td></tr>'
    form_table = f'<table><tr><th>Role</th><th>{m_label}</th><th>{f_label}</th></tr>' + form_row + "</table>"
    display(Markdown(form_table))

In [7]:
for role in neutral_roles:
    # Create role vector
    role_vect = nlp.vocab[role].vector
    for m, f in gender_pairs:
        # Create gender vectors
        m_vect = nlp.vocab[m].vector
        f_vect = nlp.vocab[f].vector
        m_role = role_vect - f_vect + m_vect
        f_role = role_vect - m_vect + f_vect
        
        # Find cosine similarity of all words to these words
        m_role_sims = []
        f_role_sims = []
        for word in tqdm.tqdm(nlp.vocab, desc=f"Looking for gender bias in {role!r} for {m!r} and {f!r}", unit=" words"):
            if word.has_vector and word.is_lower and word.is_alpha:
                m_role_sims.append((word.text, cosine_similarity(m_role, word.vector)))
                f_role_sims.append((word.text, cosine_similarity(f_role, word.vector)))
        
        # Find difference between genders in roles
        diff_roles = get_diff_roles(m_role_sims, f_role_sims)
        top_m_roles = get_top_roles(diff_roles, similarity_index=0, min_sim=0.0)
        top_f_roles = get_top_roles(diff_roles, similarity_index=1, min_sim=0.0)
        
        # Display data as table
        display_roles_table(role, m, f, top_m_roles, top_f_roles, diff_roles)

Looking for gender bias in 'doctor' for 'male' and 'female': 100%|█████████▉| 1343765/1344233 [00:24<00:00, 55832.35 words/s]
Getting similarity difference between roles: 100%|██████████| 247512/247512 [00:00<00:00, 499711.22roles/s]


<table><tr><th>Role</th><th>male</th><th>female</th></tr><tr><td>doctor</td><td><table><tr><td>doctor</td><td>93.2% (0.1%)</td></tr><tr><td>physician</td><td>75.1% (2.4%)</td></tr><tr><td>doctors</td><td>72.6% (0.4%)</td></tr><tr><td>surgeon</td><td>67.4% (3.5%)</td></tr><tr><td>pharmacist</td><td>66.3% (-1.4%)</td></tr><tr><td>medical</td><td>64.7% (-1.2%)</td></tr><tr><td>medicine</td><td>64.2% (0.6%)</td></tr><tr><td>medication</td><td>63.9% (2.6%)</td></tr><tr><td>patient</td><td>62.8% (1.0%)</td></tr><tr><td>pediatrician</td><td>62.1% (0.5%)</td></tr><tr><td>medications</td><td>61.3% (5.5%)</td></tr><tr><td>psychiatrist</td><td>60.9% (-1.1%)</td></tr><tr><td>meds</td><td>60.6% (4.6%)</td></tr><tr><td>cardiologist</td><td>60.0% (4.6%)</td></tr><tr><td>dentist</td><td>59.5% (1.7%)</td></tr><tr><td>clinic</td><td>59.4% (-0.3%)</td></tr><tr><td>surgery</td><td>59.2% (5.0%)</td></tr><tr><td>urologist</td><td>59.1% (14.0%)</td></tr><tr><td>dermatologist</td><td>58.6% (3.0%)</td></tr><tr><td>prescribe</td><td>57.7% (3.9%)</td></tr></table></td><td><table><tr><td>doctor</td><td>93.1% (-0.1%)</td></tr><tr><td>physician</td><td>72.6% (-2.4%)</td></tr><tr><td>doctors</td><td>72.3% (-0.4%)</td></tr><tr><td>nurse</td><td>70.9% (13.6%)</td></tr><tr><td>pharmacist</td><td>67.7% (1.4%)</td></tr><tr><td>medical</td><td>65.9% (1.2%)</td></tr><tr><td>surgeon</td><td>63.9% (-3.5%)</td></tr><tr><td>medicine</td><td>63.6% (-0.6%)</td></tr><tr><td>psychiatrist</td><td>62.0% (1.1%)</td></tr><tr><td>patient</td><td>61.9% (-1.0%)</td></tr><tr><td>midwife</td><td>61.9% (15.4%)</td></tr><tr><td>pediatrician</td><td>61.7% (-0.5%)</td></tr><tr><td>medication</td><td>61.4% (-2.6%)</td></tr><tr><td>hospital</td><td>60.0% (3.8%)</td></tr><tr><td>clinic</td><td>59.7% (0.3%)</td></tr><tr><td>gynecologist</td><td>59.5% (2.3%)</td></tr><tr><td>oncologist</td><td>58.8% (2.8%)</td></tr><tr><td>dentist</td><td>57.8% (-1.7%)</td></tr><tr><td>veterinarian</td><td>57.7% (4.4%)</td></tr><tr><td>neurologist</td><td>56.4% (-0.8%)</td></tr></table></td></tr></table>

Looking for gender bias in 'doctor' for 'man' and 'woman': 100%|█████████▉| 1343765/1344233 [00:23<00:00, 57964.39 words/s]
Getting similarity difference between roles: 100%|██████████| 247512/247512 [00:00<00:00, 476519.92roles/s]


<table><tr><th>Role</th><th>man</th><th>woman</th></tr><tr><td>doctor</td><td><table><tr><td>doctor</td><td>78.5% (-6.4%)</td></tr><tr><td>physician</td><td>62.8% (-4.0%)</td></tr><tr><td>doctors</td><td>59.2% (-8.4%)</td></tr><tr><td>medicine</td><td>56.9% (1.3%)</td></tr><tr><td>surgeon</td><td>56.3% (-2.6%)</td></tr><tr><td>pharmacist</td><td>54.8% (-7.7%)</td></tr><tr><td>medical</td><td>54.4% (-5.5%)</td></tr><tr><td>he</td><td>53.9% (29.0%)</td></tr><tr><td>man</td><td>53.7% (35.7%)</td></tr><tr><td>psychiatrist</td><td>52.8% (-2.3%)</td></tr><tr><td>medication</td><td>52.5% (-4.7%)</td></tr><tr><td>meds</td><td>52.2% (1.7%)</td></tr><tr><td>vet</td><td>51.1% (12.7%)</td></tr><tr><td>him</td><td>51.1% (24.8%)</td></tr><tr><td>cardiologist</td><td>50.9% (0.2%)</td></tr><tr><td>dentist</td><td>50.6% (-1.9%)</td></tr><tr><td>drugs</td><td>50.2% (8.0%)</td></tr><tr><td>neurologist</td><td>50.1% (0.3%)</td></tr><tr><td>medications</td><td>49.6% (-3.5%)</td></tr><tr><td>patient</td><td>49.0% (-10.8%)</td></tr></table></td><td><table><tr><td>doctor</td><td>84.8% (6.4%)</td></tr><tr><td>nurse</td><td>70.2% (30.1%)</td></tr><tr><td>doctors</td><td>67.6% (8.4%)</td></tr><tr><td>physician</td><td>66.8% (4.0%)</td></tr><tr><td>pregnant</td><td>66.6% (45.0%)</td></tr><tr><td>gynecologist</td><td>62.9% (25.0%)</td></tr><tr><td>woman</td><td>62.7% (46.8%)</td></tr><tr><td>pharmacist</td><td>62.5% (7.7%)</td></tr><tr><td>midwife</td><td>61.0% (29.2%)</td></tr><tr><td>medical</td><td>59.9% (5.5%)</td></tr><tr><td>pediatrician</td><td>59.9% (11.8%)</td></tr><tr><td>patient</td><td>59.8% (10.8%)</td></tr><tr><td>pregnancy</td><td>59.7% (37.9%)</td></tr><tr><td>surgeon</td><td>58.9% (2.6%)</td></tr><tr><td>clinic</td><td>57.5% (11.2%)</td></tr><tr><td>obstetrician</td><td>57.5% (20.8%)</td></tr><tr><td>medication</td><td>57.3% (4.7%)</td></tr><tr><td>medicine</td><td>55.6% (-1.3%)</td></tr><tr><td>hospital</td><td>55.4% (9.3%)</td></tr><tr><td>nurses</td><td>55.4% (24.8%)</td></tr></table></td></tr></table>

Looking for gender bias in 'teacher' for 'male' and 'female': 100%|█████████▉| 1343765/1344233 [00:24<00:00, 55889.75 words/s]
Getting similarity difference between roles: 100%|██████████| 247512/247512 [00:00<00:00, 491449.12roles/s]


<table><tr><th>Role</th><th>male</th><th>female</th></tr><tr><td>teacher</td><td><table><tr><td>teacher</td><td>92.6% (-1.4%)</td></tr><tr><td>teachers</td><td>75.0% (-0.5%)</td></tr><tr><td>school</td><td>70.2% (-1.1%)</td></tr><tr><td>teaching</td><td>68.2% (0.1%)</td></tr><tr><td>student</td><td>68.0% (-2.4%)</td></tr><tr><td>tutor</td><td>67.9% (2.3%)</td></tr><tr><td>classroom</td><td>64.1% (1.3%)</td></tr><tr><td>taught</td><td>62.4% (2.3%)</td></tr><tr><td>educator</td><td>62.2% (-4.7%)</td></tr><tr><td>elementary</td><td>61.8% (1.2%)</td></tr><tr><td>kindergarten</td><td>61.4% (2.5%)</td></tr><tr><td>instructor</td><td>60.8% (-4.2%)</td></tr><tr><td>teach</td><td>60.3% (1.5%)</td></tr><tr><td>education</td><td>59.1% (0.5%)</td></tr><tr><td>students</td><td>58.9% (-1.6%)</td></tr><tr><td>lesson</td><td>58.5% (0.4%)</td></tr><tr><td>curriculum</td><td>58.0% (1.6%)</td></tr><tr><td>schools</td><td>55.6% (0.5%)</td></tr><tr><td>lessons</td><td>55.5% (-1.1%)</td></tr><tr><td>pupil</td><td>55.4% (8.4%)</td></tr></table></td><td><table><tr><td>teacher</td><td>94.0% (1.4%)</td></tr><tr><td>teachers</td><td>75.5% (0.5%)</td></tr><tr><td>school</td><td>71.3% (1.1%)</td></tr><tr><td>student</td><td>70.4% (2.4%)</td></tr><tr><td>teaching</td><td>68.1% (-0.1%)</td></tr><tr><td>educator</td><td>66.8% (4.7%)</td></tr><tr><td>tutor</td><td>65.6% (-2.3%)</td></tr><tr><td>instructor</td><td>65.0% (4.2%)</td></tr><tr><td>classroom</td><td>62.8% (-1.3%)</td></tr><tr><td>elementary</td><td>60.5% (-1.2%)</td></tr><tr><td>students</td><td>60.5% (1.6%)</td></tr><tr><td>taught</td><td>60.1% (-2.3%)</td></tr><tr><td>kindergarten</td><td>58.9% (-2.5%)</td></tr><tr><td>teach</td><td>58.8% (-1.5%)</td></tr><tr><td>librarian</td><td>58.7% (9.8%)</td></tr><tr><td>education</td><td>58.7% (-0.5%)</td></tr><tr><td>lesson</td><td>58.1% (-0.4%)</td></tr><tr><td>graduate</td><td>57.7% (3.1%)</td></tr><tr><td>lecturer</td><td>56.9% (7.2%)</td></tr><tr><td>lessons</td><td>56.6% (1.1%)</td></tr></table></td></tr></table>

Looking for gender bias in 'teacher' for 'man' and 'woman': 100%|█████████▉| 1343765/1344233 [00:24<00:00, 55912.18 words/s]
Getting similarity difference between roles: 100%|██████████| 247512/247512 [00:00<00:00, 468414.32roles/s]


<table><tr><th>Role</th><th>man</th><th>woman</th></tr><tr><td>teacher</td><td><table><tr><td>teacher</td><td>79.3% (-5.1%)</td></tr><tr><td>teachers</td><td>65.4% (-1.3%)</td></tr><tr><td>teaching</td><td>62.3% (4.7%)</td></tr><tr><td>school</td><td>62.1% (-0.1%)</td></tr><tr><td>tutor</td><td>61.7% (5.8%)</td></tr><tr><td>taught</td><td>59.0% (9.9%)</td></tr><tr><td>lesson</td><td>56.8% (10.6%)</td></tr><tr><td>student</td><td>56.8% (-7.6%)</td></tr><tr><td>teach</td><td>56.4% (7.8%)</td></tr><tr><td>classroom</td><td>55.0% (-1.4%)</td></tr><tr><td>elementary</td><td>54.2% (0.9%)</td></tr><tr><td>instructor</td><td>53.7% (-3.0%)</td></tr><tr><td>mentor</td><td>53.6% (12.1%)</td></tr><tr><td>lessons</td><td>53.0% (7.0%)</td></tr><tr><td>learning</td><td>52.1% (9.1%)</td></tr><tr><td>math</td><td>51.8% (9.9%)</td></tr><tr><td>he</td><td>50.9% (27.4%)</td></tr><tr><td>educator</td><td>50.9% (-11.1%)</td></tr><tr><td>curriculum</td><td>50.2% (-0.1%)</td></tr><tr><td>students</td><td>50.1% (-4.4%)</td></tr></table></td><td><table><tr><td>teacher</td><td>84.4% (5.1%)</td></tr><tr><td>teachers</td><td>66.7% (1.3%)</td></tr><tr><td>student</td><td>64.4% (7.6%)</td></tr><tr><td>school</td><td>62.2% (0.1%)</td></tr><tr><td>educator</td><td>62.0% (11.1%)</td></tr><tr><td>nurse</td><td>59.2% (33.2%)</td></tr><tr><td>woman</td><td>58.3% (48.3%)</td></tr><tr><td>teaching</td><td>57.7% (-4.7%)</td></tr><tr><td>instructor</td><td>56.8% (3.0%)</td></tr><tr><td>mother</td><td>56.4% (31.7%)</td></tr><tr><td>classroom</td><td>56.4% (1.4%)</td></tr><tr><td>librarian</td><td>56.2% (18.6%)</td></tr><tr><td>kindergarten</td><td>56.1% (7.1%)</td></tr><tr><td>tutor</td><td>55.9% (-5.8%)</td></tr><tr><td>schoolteacher</td><td>55.8% (23.2%)</td></tr><tr><td>counselor</td><td>55.3% (15.5%)</td></tr><tr><td>girl</td><td>54.9% (35.2%)</td></tr><tr><td>mom</td><td>54.6% (26.0%)</td></tr><tr><td>students</td><td>54.6% (4.4%)</td></tr><tr><td>she</td><td>54.2% (26.0%)</td></tr></table></td></tr></table>

Looking for gender bias in 'scientist' for 'male' and 'female': 100%|█████████▉| 1343765/1344233 [00:23<00:00, 57009.31 words/s]
Getting similarity difference between roles: 100%|██████████| 247512/247512 [00:00<00:00, 459411.56roles/s]


<table><tr><th>Role</th><th>male</th><th>female</th></tr><tr><td>scientist</td><td><table><tr><td>scientist</td><td>93.2% (-0.8%)</td></tr><tr><td>physicist</td><td>74.0% (0.4%)</td></tr><tr><td>researcher</td><td>72.5% (-2.1%)</td></tr><tr><td>scientists</td><td>72.0% (1.7%)</td></tr><tr><td>biologist</td><td>71.3% (1.8%)</td></tr><tr><td>chemist</td><td>63.5% (4.7%)</td></tr><tr><td>biochemist</td><td>61.5% (1.4%)</td></tr><tr><td>geologist</td><td>60.8% (-0.3%)</td></tr><tr><td>geneticist</td><td>60.8% (3.4%)</td></tr><tr><td>professor</td><td>60.8% (-2.9%)</td></tr><tr><td>scientific</td><td>60.7% (4.3%)</td></tr><tr><td>mathematician</td><td>60.4% (2.8%)</td></tr><tr><td>astronomer</td><td>59.9% (0.6%)</td></tr><tr><td>science</td><td>59.2% (-1.5%)</td></tr><tr><td>researchers</td><td>57.4% (2.7%)</td></tr><tr><td>inventor</td><td>57.1% (3.0%)</td></tr><tr><td>botanist</td><td>57.0% (3.0%)</td></tr><tr><td>ecologist</td><td>56.5% (-0.7%)</td></tr><tr><td>investigator</td><td>55.9% (-3.3%)</td></tr><tr><td>engineer</td><td>55.5% (0.5%)</td></tr></table></td><td><table><tr><td>scientist</td><td>94.0% (0.8%)</td></tr><tr><td>researcher</td><td>74.6% (2.1%)</td></tr><tr><td>physicist</td><td>73.5% (-0.4%)</td></tr><tr><td>scientists</td><td>70.2% (-1.7%)</td></tr><tr><td>biologist</td><td>69.5% (-1.8%)</td></tr><tr><td>professor</td><td>63.6% (2.9%)</td></tr><tr><td>microbiologist</td><td>61.2% (8.7%)</td></tr><tr><td>geologist</td><td>61.1% (0.3%)</td></tr><tr><td>astrophysicist</td><td>60.9% (7.7%)</td></tr><tr><td>science</td><td>60.7% (1.5%)</td></tr><tr><td>biochemist</td><td>60.1% (-1.4%)</td></tr><tr><td>astronomer</td><td>59.3% (-0.6%)</td></tr><tr><td>investigator</td><td>59.2% (3.3%)</td></tr><tr><td>chemist</td><td>58.8% (-4.7%)</td></tr><tr><td>mathematician</td><td>57.6% (-2.8%)</td></tr><tr><td>anthropologist</td><td>57.5% (2.3%)</td></tr><tr><td>geneticist</td><td>57.3% (-3.4%)</td></tr><tr><td>ecologist</td><td>57.2% (0.7%)</td></tr><tr><td>archaeologist</td><td>56.6% (4.6%)</td></tr><tr><td>scientific</td><td>56.4% (-4.3%)</td></tr></table></td></tr></table>

Looking for gender bias in 'scientist' for 'man' and 'woman': 100%|█████████▉| 1343765/1344233 [00:23<00:00, 56149.73 words/s]
Getting similarity difference between roles: 100%|██████████| 247512/247512 [00:00<00:00, 472280.08roles/s]


<table><tr><th>Role</th><th>man</th><th>woman</th></tr><tr><td>scientist</td><td><table><tr><td>scientist</td><td>83.1% (1.0%)</td></tr><tr><td>physicist</td><td>66.6% (3.1%)</td></tr><tr><td>scientists</td><td>63.7% (1.9%)</td></tr><tr><td>biologist</td><td>61.7% (-0.9%)</td></tr><tr><td>researcher</td><td>59.1% (-11.8%)</td></tr><tr><td>geologist</td><td>57.7% (7.8%)</td></tr><tr><td>science</td><td>55.3% (4.7%)</td></tr><tr><td>scientific</td><td>54.3% (5.4%)</td></tr><tr><td>mathematician</td><td>54.1% (4.1%)</td></tr><tr><td>chemist</td><td>54.1% (0.4%)</td></tr><tr><td>inventor</td><td>53.7% (9.4%)</td></tr><tr><td>astronomer</td><td>53.6% (2.2%)</td></tr><tr><td>professor</td><td>52.6% (-4.8%)</td></tr><tr><td>engineer</td><td>52.3% (7.2%)</td></tr><tr><td>genius</td><td>52.2% (24.6%)</td></tr><tr><td>botanist</td><td>51.5% (5.2%)</td></tr><tr><td>biochemist</td><td>51.0% (-5.2%)</td></tr><tr><td>physicists</td><td>51.0% (7.8%)</td></tr><tr><td>geneticist</td><td>50.9% (-2.5%)</td></tr><tr><td>astrophysicist</td><td>50.4% (-0.2%)</td></tr></table></td><td><table><tr><td>scientist</td><td>82.1% (-1.0%)</td></tr><tr><td>researcher</td><td>70.9% (11.8%)</td></tr><tr><td>physicist</td><td>63.5% (-3.1%)</td></tr><tr><td>biologist</td><td>62.6% (0.9%)</td></tr><tr><td>scientists</td><td>61.8% (-1.9%)</td></tr><tr><td>professor</td><td>57.4% (4.8%)</td></tr><tr><td>biochemist</td><td>56.3% (5.2%)</td></tr><tr><td>psychologist</td><td>55.8% (16.6%)</td></tr><tr><td>investigator</td><td>55.4% (9.1%)</td></tr><tr><td>anthropologist</td><td>54.7% (9.8%)</td></tr><tr><td>microbiologist</td><td>54.2% (7.7%)</td></tr><tr><td>chemist</td><td>53.7% (-0.4%)</td></tr><tr><td>researchers</td><td>53.4% (7.7%)</td></tr><tr><td>geneticist</td><td>53.4% (2.5%)</td></tr><tr><td>ecologist</td><td>52.8% (5.1%)</td></tr><tr><td>woman</td><td>51.8% (52.2%)</td></tr><tr><td>astronomer</td><td>51.5% (-2.2%)</td></tr><tr><td>science</td><td>50.6% (-4.7%)</td></tr><tr><td>astrophysicist</td><td>50.5% (0.2%)</td></tr><tr><td>mathematician</td><td>50.0% (-4.1%)</td></tr></table></td></tr></table>

Looking for gender bias in 'dancer' for 'male' and 'female': 100%|█████████▉| 1343765/1344233 [00:22<00:00, 59051.91 words/s]
Getting similarity difference between roles: 100%|██████████| 247512/247512 [00:00<00:00, 472273.64roles/s]


<table><tr><th>Role</th><th>male</th><th>female</th></tr><tr><td>dancer</td><td><table><tr><td>dancer</td><td>92.4% (-2.0%)</td></tr><tr><td>dancers</td><td>70.8% (-0.9%)</td></tr><tr><td>dance</td><td>66.7% (-1.3%)</td></tr><tr><td>dancing</td><td>66.6% (2.0%)</td></tr><tr><td>choreographer</td><td>63.9% (-5.6%)</td></tr><tr><td>dances</td><td>62.2% (2.9%)</td></tr><tr><td>performer</td><td>60.6% (-4.2%)</td></tr><tr><td>ballet</td><td>60.2% (-2.9%)</td></tr><tr><td>danced</td><td>58.5% (4.0%)</td></tr><tr><td>ballerina</td><td>57.9% (-6.7%)</td></tr><tr><td>entertainer</td><td>53.9% (0.1%)</td></tr><tr><td>choreography</td><td>53.4% (-5.2%)</td></tr><tr><td>musician</td><td>53.2% (-0.3%)</td></tr><tr><td>tango</td><td>52.3% (4.1%)</td></tr><tr><td>troupe</td><td>52.3% (-2.0%)</td></tr><tr><td>magician</td><td>52.0% (10.4%)</td></tr><tr><td>stripper</td><td>51.9% (4.6%)</td></tr><tr><td>gymnast</td><td>51.2% (-6.8%)</td></tr><tr><td>singer</td><td>50.1% (-9.2%)</td></tr><tr><td>choreographed</td><td>50.0% (1.5%)</td></tr></table></td><td><table><tr><td>dancer</td><td>94.4% (2.0%)</td></tr><tr><td>dancers</td><td>71.7% (0.9%)</td></tr><tr><td>choreographer</td><td>69.5% (5.6%)</td></tr><tr><td>dance</td><td>68.0% (1.3%)</td></tr><tr><td>performer</td><td>64.8% (4.2%)</td></tr><tr><td>ballerina</td><td>64.6% (6.7%)</td></tr><tr><td>dancing</td><td>64.5% (-2.0%)</td></tr><tr><td>ballet</td><td>63.1% (2.9%)</td></tr><tr><td>actress</td><td>59.8% (17.8%)</td></tr><tr><td>singer</td><td>59.3% (9.2%)</td></tr><tr><td>dances</td><td>59.3% (-2.9%)</td></tr><tr><td>choreography</td><td>58.5% (5.2%)</td></tr><tr><td>gymnast</td><td>58.0% (6.8%)</td></tr><tr><td>burlesque</td><td>56.3% (8.5%)</td></tr><tr><td>diva</td><td>54.9% (12.7%)</td></tr><tr><td>danced</td><td>54.4% (-4.0%)</td></tr><tr><td>troupe</td><td>54.3% (2.0%)</td></tr><tr><td>entertainer</td><td>53.8% (-0.1%)</td></tr><tr><td>girl</td><td>53.6% (6.4%)</td></tr><tr><td>musician</td><td>53.5% (0.3%)</td></tr></table></td></tr></table>

Looking for gender bias in 'dancer' for 'man' and 'woman': 100%|█████████▉| 1343765/1344233 [00:22<00:00, 58957.29 words/s]
Getting similarity difference between roles: 100%|██████████| 247512/247512 [00:00<00:00, 502649.97roles/s]


<table><tr><th>Role</th><th>man</th><th>woman</th></tr><tr><td>dancer</td><td><table><tr><td>dancer</td><td>77.7% (-8.4%)</td></tr><tr><td>dance</td><td>62.5% (5.5%)</td></tr><tr><td>dancers</td><td>61.6% (-2.0%)</td></tr><tr><td>choreographer</td><td>61.6% (4.9%)</td></tr><tr><td>dancing</td><td>59.5% (3.3%)</td></tr><tr><td>performer</td><td>57.1% (3.2%)</td></tr><tr><td>choreography</td><td>55.2% (10.5%)</td></tr><tr><td>dances</td><td>54.2% (1.4%)</td></tr><tr><td>musician</td><td>53.8% (12.4%)</td></tr><tr><td>drummer</td><td>53.0% (33.3%)</td></tr><tr><td>magician</td><td>51.5% (19.1%)</td></tr><tr><td>danced</td><td>51.4% (3.3%)</td></tr><tr><td>entertainer</td><td>50.3% (5.1%)</td></tr><tr><td>percussionist</td><td>50.2% (21.1%)</td></tr><tr><td>troupe</td><td>50.0% (5.4%)</td></tr><tr><td>actor</td><td>48.8% (16.6%)</td></tr><tr><td>ballet</td><td>48.4% (-10.7%)</td></tr><tr><td>man</td><td>48.3% (36.3%)</td></tr><tr><td>singer</td><td>47.7% (-1.4%)</td></tr><tr><td>dude</td><td>47.7% (35.6%)</td></tr></table></td><td><table><tr><td>dancer</td><td>86.1% (8.4%)</td></tr><tr><td>ballerina</td><td>63.7% (21.3%)</td></tr><tr><td>dancers</td><td>63.7% (2.0%)</td></tr><tr><td>actress</td><td>62.1% (37.2%)</td></tr><tr><td>ballet</td><td>59.2% (10.7%)</td></tr><tr><td>woman</td><td>58.9% (45.1%)</td></tr><tr><td>girl</td><td>58.4% (30.9%)</td></tr><tr><td>dance</td><td>56.9% (-5.5%)</td></tr><tr><td>choreographer</td><td>56.7% (-4.9%)</td></tr><tr><td>dancing</td><td>56.2% (-3.3%)</td></tr><tr><td>gymnast</td><td>55.2% (15.4%)</td></tr><tr><td>lady</td><td>54.4% (30.8%)</td></tr><tr><td>performer</td><td>53.9% (-3.2%)</td></tr><tr><td>diva</td><td>53.8% (23.8%)</td></tr><tr><td>showgirl</td><td>53.6% (23.1%)</td></tr><tr><td>dances</td><td>52.8% (-1.4%)</td></tr><tr><td>female</td><td>52.6% (38.5%)</td></tr><tr><td>burlesque</td><td>52.0% (13.1%)</td></tr><tr><td>bellydancer</td><td>51.4% (22.8%)</td></tr><tr><td>bellydance</td><td>51.4% (21.2%)</td></tr></table></td></tr></table>

Looking for gender bias in 'builder' for 'male' and 'female': 100%|█████████▉| 1343765/1344233 [00:22<00:00, 58456.44 words/s]
Getting similarity difference between roles: 100%|██████████| 247512/247512 [00:00<00:00, 483604.20roles/s]


<table><tr><th>Role</th><th>male</th><th>female</th></tr><tr><td>builder</td><td><table><tr><td>builder</td><td>92.8% (0.3%)</td></tr><tr><td>builders</td><td>71.3% (1.6%)</td></tr><tr><td>contractor</td><td>56.4% (-0.9%)</td></tr><tr><td>handyman</td><td>54.5% (6.6%)</td></tr><tr><td>architect</td><td>54.1% (-3.3%)</td></tr><tr><td>carpenter</td><td>51.8% (6.4%)</td></tr><tr><td>developer</td><td>49.0% (-2.7%)</td></tr><tr><td>build</td><td>48.6% (-1.6%)</td></tr><tr><td>homebuilder</td><td>48.5% (0.3%)</td></tr><tr><td>installer</td><td>48.1% (1.7%)</td></tr><tr><td>building</td><td>47.9% (-4.5%)</td></tr><tr><td>electrician</td><td>47.5% (5.3%)</td></tr><tr><td>plumber</td><td>46.9% (5.4%)</td></tr><tr><td>built</td><td>45.5% (0.9%)</td></tr><tr><td>roofer</td><td>45.3% (2.3%)</td></tr><tr><td>engineer</td><td>45.2% (-4.0%)</td></tr><tr><td>construction</td><td>45.1% (-1.4%)</td></tr><tr><td>landscaper</td><td>44.9% (-0.6%)</td></tr><tr><td>remodeler</td><td>44.8% (-0.3%)</td></tr><tr><td>fabricator</td><td>44.5% (-1.8%)</td></tr></table></td><td><table><tr><td>builder</td><td>92.5% (-0.3%)</td></tr><tr><td>builders</td><td>69.8% (-1.6%)</td></tr><tr><td>contractor</td><td>57.4% (0.9%)</td></tr><tr><td>architect</td><td>57.4% (3.3%)</td></tr><tr><td>building</td><td>52.4% (4.5%)</td></tr><tr><td>developer</td><td>51.8% (2.7%)</td></tr><tr><td>build</td><td>50.2% (1.6%)</td></tr><tr><td>engineer</td><td>49.2% (4.0%)</td></tr><tr><td>decorator</td><td>49.1% (7.2%)</td></tr><tr><td>realtor</td><td>49.0% (5.5%)</td></tr><tr><td>homebuilder</td><td>48.3% (-0.3%)</td></tr><tr><td>handyman</td><td>48.0% (-6.6%)</td></tr><tr><td>designer</td><td>47.7% (12.8%)</td></tr><tr><td>construction</td><td>46.4% (1.4%)</td></tr><tr><td>installer</td><td>46.4% (-1.7%)</td></tr><tr><td>fabricator</td><td>46.3% (1.8%)</td></tr><tr><td>landscaper</td><td>45.5% (0.6%)</td></tr><tr><td>carpenter</td><td>45.4% (-6.4%)</td></tr><tr><td>remodeler</td><td>45.1% (0.3%)</td></tr><tr><td>homeowner</td><td>44.9% (3.5%)</td></tr></table></td></tr></table>

Looking for gender bias in 'builder' for 'man' and 'woman': 100%|█████████▉| 1343765/1344233 [00:22<00:00, 59121.32 words/s]
Getting similarity difference between roles: 100%|██████████| 247512/247512 [00:00<00:00, 471051.10roles/s]


<table><tr><th>Role</th><th>man</th><th>woman</th></tr><tr><td>builder</td><td><table><tr><td>builder</td><td>81.7% (2.2%)</td></tr><tr><td>builders</td><td>64.4% (6.2%)</td></tr><tr><td>contractor</td><td>51.1% (3.2%)</td></tr><tr><td>architect</td><td>50.9% (4.9%)</td></tr><tr><td>handyman</td><td>48.8% (8.6%)</td></tr><tr><td>build</td><td>47.8% (10.0%)</td></tr><tr><td>developer</td><td>46.5% (5.5%)</td></tr><tr><td>building</td><td>46.4% (5.6%)</td></tr><tr><td>carpenter</td><td>45.9% (7.3%)</td></tr><tr><td>built</td><td>44.9% (11.8%)</td></tr><tr><td>engineer</td><td>44.4% (6.7%)</td></tr><tr><td>installer</td><td>43.1% (4.2%)</td></tr><tr><td>construction</td><td>42.9% (6.4%)</td></tr><tr><td>fabricator</td><td>42.8% (6.8%)</td></tr><tr><td>electrician</td><td>42.4% (6.9%)</td></tr><tr><td>craftsman</td><td>42.2% (10.2%)</td></tr><tr><td>builds</td><td>41.7% (9.2%)</td></tr><tr><td>surveyor</td><td>41.6% (10.2%)</td></tr><tr><td>foreman</td><td>41.2% (14.7%)</td></tr><tr><td>master</td><td>40.9% (15.1%)</td></tr></table></td><td><table><tr><td>builder</td><td>79.5% (-2.2%)</td></tr><tr><td>builders</td><td>58.2% (-6.2%)</td></tr><tr><td>realtor</td><td>48.6% (16.4%)</td></tr><tr><td>contractor</td><td>47.9% (-3.2%)</td></tr><tr><td>architect</td><td>46.0% (-4.9%)</td></tr><tr><td>woman</td><td>45.8% (55.1%)</td></tr><tr><td>homeowner</td><td>45.0% (14.4%)</td></tr><tr><td>homebuilder</td><td>44.3% (4.3%)</td></tr><tr><td>designer</td><td>44.1% (15.9%)</td></tr><tr><td>decorator</td><td>43.9% (8.4%)</td></tr><tr><td>housewife</td><td>43.2% (46.3%)</td></tr><tr><td>planner</td><td>41.6% (15.4%)</td></tr><tr><td>developer</td><td>41.0% (-5.5%)</td></tr><tr><td>building</td><td>40.7% (-5.6%)</td></tr><tr><td>landscaper</td><td>40.6% (2.4%)</td></tr><tr><td>remodeler</td><td>40.5% (2.8%)</td></tr><tr><td>handyman</td><td>40.2% (-8.6%)</td></tr><tr><td>seamstress</td><td>39.1% (33.0%)</td></tr><tr><td>gallery</td><td>39.1% (25.1%)</td></tr><tr><td>installer</td><td>39.0% (-4.2%)</td></tr></table></td></tr></table>

Looking for gender bias in 'boss' for 'male' and 'female': 100%|█████████▉| 1343765/1344233 [00:22<00:00, 59081.30 words/s]
Getting similarity difference between roles: 100%|██████████| 247512/247512 [00:00<00:00, 483698.62roles/s]


<table><tr><th>Role</th><th>male</th><th>female</th></tr><tr><td>boss</td><td><table><tr><td>boss</td><td>92.2% (-0.1%)</td></tr><tr><td>bosses</td><td>67.9% (-4.1%)</td></tr><tr><td>man</td><td>48.8% (15.0%)</td></tr><tr><td>mate</td><td>48.0% (10.6%)</td></tr><tr><td>guy</td><td>47.9% (7.4%)</td></tr><tr><td>insists</td><td>47.7% (1.2%)</td></tr><tr><td>manager</td><td>47.0% (-4.8%)</td></tr><tr><td>buddy</td><td>46.9% (9.6%)</td></tr><tr><td>dad</td><td>46.8% (7.5%)</td></tr><tr><td>boyfriend</td><td>46.3% (6.4%)</td></tr><tr><td>brother</td><td>46.0% (12.6%)</td></tr><tr><td>bully</td><td>45.3% (9.8%)</td></tr><tr><td>wants</td><td>45.2% (-3.8%)</td></tr><tr><td>uncle</td><td>44.3% (10.2%)</td></tr><tr><td>mates</td><td>44.3% (7.4%)</td></tr><tr><td>admits</td><td>43.7% (1.0%)</td></tr><tr><td>bloke</td><td>43.6% (9.8%)</td></tr><tr><td>thug</td><td>43.1% (10.0%)</td></tr><tr><td>chief</td><td>42.9% (-4.4%)</td></tr><tr><td>honcho</td><td>42.9% (-1.0%)</td></tr></table></td><td><table><tr><td>boss</td><td>92.4% (0.1%)</td></tr><tr><td>bosses</td><td>72.0% (4.1%)</td></tr><tr><td>manager</td><td>51.8% (4.8%)</td></tr><tr><td>wants</td><td>49.1% (3.8%)</td></tr><tr><td>chief</td><td>47.4% (4.4%)</td></tr><tr><td>secretary</td><td>46.9% (13.3%)</td></tr><tr><td>insists</td><td>46.4% (-1.2%)</td></tr><tr><td>telling</td><td>46.3% (6.3%)</td></tr><tr><td>told</td><td>45.6% (7.2%)</td></tr><tr><td>head</td><td>45.5% (3.5%)</td></tr><tr><td>knows</td><td>44.6% (3.9%)</td></tr><tr><td>mad</td><td>44.3% (4.3%)</td></tr><tr><td>honcho</td><td>43.9% (1.0%)</td></tr><tr><td>lady</td><td>43.8% (17.1%)</td></tr><tr><td>colleague</td><td>43.5% (5.2%)</td></tr><tr><td>friend</td><td>43.3% (3.6%)</td></tr><tr><td>furious</td><td>43.1% (4.6%)</td></tr><tr><td>mum</td><td>42.9% (7.2%)</td></tr><tr><td>assistant</td><td>42.9% (9.9%)</td></tr><tr><td>angry</td><td>42.9% (2.1%)</td></tr></table></td></tr></table>

Looking for gender bias in 'boss' for 'man' and 'woman': 100%|█████████▉| 1343765/1344233 [00:24<00:00, 55612.27 words/s]
Getting similarity difference between roles: 100%|██████████| 247512/247512 [00:00<00:00, 452017.35roles/s]


<table><tr><th>Role</th><th>man</th><th>woman</th></tr><tr><td>boss</td><td><table><tr><td>boss</td><td>81.9% (4.1%)</td></tr><tr><td>bosses</td><td>58.2% (-4.9%)</td></tr><tr><td>man</td><td>49.1% (28.1%)</td></tr><tr><td>bro</td><td>46.6% (38.3%)</td></tr><tr><td>buddy</td><td>46.4% (20.9%)</td></tr><tr><td>dude</td><td>46.3% (27.6%)</td></tr><tr><td>guy</td><td>45.5% (15.2%)</td></tr><tr><td>mate</td><td>45.0% (16.8%)</td></tr><tr><td>honcho</td><td>43.6% (12.6%)</td></tr><tr><td>he</td><td>43.6% (20.7%)</td></tr><tr><td>manager</td><td>43.4% (1.4%)</td></tr><tr><td>chief</td><td>42.7% (7.6%)</td></tr><tr><td>hell</td><td>42.7% (15.3%)</td></tr><tr><td>hero</td><td>42.7% (17.8%)</td></tr><tr><td>reckons</td><td>42.4% (19.1%)</td></tr><tr><td>mad</td><td>42.4% (12.4%)</td></tr><tr><td>henchman</td><td>42.2% (22.2%)</td></tr><tr><td>hes</td><td>42.1% (26.8%)</td></tr><tr><td>brother</td><td>41.7% (15.4%)</td></tr><tr><td>enforcer</td><td>41.7% (22.4%)</td></tr></table></td><td><table><tr><td>boss</td><td>77.8% (-4.1%)</td></tr><tr><td>bosses</td><td>63.1% (4.9%)</td></tr><tr><td>woman</td><td>57.9% (57.7%)</td></tr><tr><td>lady</td><td>53.4% (43.6%)</td></tr><tr><td>girl</td><td>48.8% (44.2%)</td></tr><tr><td>wife</td><td>48.7% (35.4%)</td></tr><tr><td>girlfriend</td><td>48.5% (29.9%)</td></tr><tr><td>mom</td><td>48.5% (35.5%)</td></tr><tr><td>mother</td><td>48.5% (41.2%)</td></tr><tr><td>housewife</td><td>48.4% (48.3%)</td></tr><tr><td>herself</td><td>47.8% (44.4%)</td></tr><tr><td>pregnant</td><td>47.7% (55.8%)</td></tr><tr><td>coworker</td><td>47.4% (29.4%)</td></tr><tr><td>husband</td><td>47.1% (27.1%)</td></tr><tr><td>secretary</td><td>46.8% (22.9%)</td></tr><tr><td>she</td><td>46.8% (35.3%)</td></tr><tr><td>mistress</td><td>46.5% (34.8%)</td></tr><tr><td>boyfriend</td><td>46.0% (16.6%)</td></tr><tr><td>her</td><td>45.8% (39.6%)</td></tr><tr><td>businesswoman</td><td>45.7% (49.9%)</td></tr></table></td></tr></table>

Looking for gender bias in 'engineer' for 'male' and 'female': 100%|█████████▉| 1343765/1344233 [00:23<00:00, 56611.97 words/s]
Getting similarity difference between roles: 100%|██████████| 247512/247512 [00:00<00:00, 492958.52roles/s]


<table><tr><th>Role</th><th>male</th><th>female</th></tr><tr><td>engineer</td><td><table><tr><td>engineer</td><td>92.7% (-0.4%)</td></tr><tr><td>engineers</td><td>67.7% (-0.9%)</td></tr><tr><td>engineering</td><td>67.1% (-3.5%)</td></tr><tr><td>technician</td><td>64.6% (0.6%)</td></tr><tr><td>architect</td><td>58.9% (-0.5%)</td></tr><tr><td>electrician</td><td>58.6% (7.6%)</td></tr><tr><td>contractor</td><td>56.2% (1.7%)</td></tr><tr><td>mechanic</td><td>55.2% (1.7%)</td></tr><tr><td>surveyor</td><td>53.1% (3.5%)</td></tr><tr><td>programmer</td><td>52.9% (3.6%)</td></tr><tr><td>geologist</td><td>52.3% (-2.7%)</td></tr><tr><td>scientist</td><td>52.2% (-5.3%)</td></tr><tr><td>machinist</td><td>51.9% (4.0%)</td></tr><tr><td>inventor</td><td>51.1% (1.0%)</td></tr><tr><td>carpenter</td><td>50.9% (8.6%)</td></tr><tr><td>foreman</td><td>50.6% (8.5%)</td></tr><tr><td>chemist</td><td>50.3% (2.4%)</td></tr><tr><td>technologist</td><td>50.1% (-3.6%)</td></tr><tr><td>supervisor</td><td>49.9% (-4.1%)</td></tr><tr><td>electrical</td><td>49.4% (5.4%)</td></tr></table></td><td><table><tr><td>engineer</td><td>93.1% (0.4%)</td></tr><tr><td>engineering</td><td>70.6% (3.5%)</td></tr><tr><td>engineers</td><td>68.6% (0.9%)</td></tr><tr><td>technician</td><td>64.0% (-0.6%)</td></tr><tr><td>architect</td><td>59.4% (0.5%)</td></tr><tr><td>scientist</td><td>57.5% (5.3%)</td></tr><tr><td>consultant</td><td>55.6% (7.4%)</td></tr><tr><td>geologist</td><td>55.0% (2.7%)</td></tr><tr><td>contractor</td><td>54.5% (-1.7%)</td></tr><tr><td>supervisor</td><td>54.0% (4.1%)</td></tr><tr><td>technologist</td><td>53.7% (3.6%)</td></tr><tr><td>assistant</td><td>53.6% (8.5%)</td></tr><tr><td>mechanic</td><td>53.5% (-1.7%)</td></tr><tr><td>aerospace</td><td>53.2% (4.6%)</td></tr><tr><td>technical</td><td>51.4% (4.6%)</td></tr><tr><td>electrician</td><td>51.1% (-7.6%)</td></tr><tr><td>manager</td><td>51.0% (3.7%)</td></tr><tr><td>inventor</td><td>50.2% (-1.0%)</td></tr><tr><td>accountant</td><td>49.8% (2.1%)</td></tr><tr><td>physicist</td><td>49.7% (2.9%)</td></tr></table></td></tr></table>

Looking for gender bias in 'engineer' for 'man' and 'woman': 100%|█████████▉| 1343765/1344233 [00:23<00:00, 57471.17 words/s]
Getting similarity difference between roles: 100%|██████████| 247512/247512 [00:00<00:00, 410492.87roles/s]


<table><tr><th>Role</th><th>man</th><th>woman</th></tr><tr><td>engineer</td><td><table><tr><td>engineer</td><td>82.6% (3.0%)</td></tr><tr><td>engineering</td><td>63.3% (6.5%)</td></tr><tr><td>engineers</td><td>62.6% (6.3%)</td></tr><tr><td>architect</td><td>53.5% (3.6%)</td></tr><tr><td>technician</td><td>53.4% (-5.9%)</td></tr><tr><td>electrician</td><td>50.5% (5.4%)</td></tr><tr><td>geologist</td><td>49.9% (6.2%)</td></tr><tr><td>contractor</td><td>49.5% (2.3%)</td></tr><tr><td>surveyor</td><td>49.1% (8.8%)</td></tr><tr><td>mechanic</td><td>49.1% (3.3%)</td></tr><tr><td>inventor</td><td>48.1% (7.9%)</td></tr><tr><td>scientist</td><td>47.8% (-0.2%)</td></tr><tr><td>foreman</td><td>46.9% (13.3%)</td></tr><tr><td>technical</td><td>46.0% (6.3%)</td></tr><tr><td>machinist</td><td>45.9% (4.8%)</td></tr><tr><td>programmer</td><td>45.8% (2.3%)</td></tr><tr><td>aerospace</td><td>45.5% (2.1%)</td></tr><tr><td>journeyman</td><td>44.7% (14.3%)</td></tr><tr><td>draftsman</td><td>44.3% (4.5%)</td></tr><tr><td>manager</td><td>44.0% (2.1%)</td></tr></table></td><td><table><tr><td>engineer</td><td>79.7% (-3.0%)</td></tr><tr><td>technician</td><td>59.2% (5.9%)</td></tr><tr><td>engineering</td><td>56.8% (-6.5%)</td></tr><tr><td>engineers</td><td>56.3% (-6.3%)</td></tr><tr><td>technologist</td><td>52.7% (14.0%)</td></tr><tr><td>consultant</td><td>51.6% (12.0%)</td></tr><tr><td>supervisor</td><td>50.4% (9.6%)</td></tr><tr><td>worker</td><td>49.9% (20.6%)</td></tr><tr><td>architect</td><td>49.8% (-3.6%)</td></tr><tr><td>assistant</td><td>49.2% (11.8%)</td></tr><tr><td>scientist</td><td>48.1% (0.2%)</td></tr><tr><td>accountant</td><td>47.9% (10.2%)</td></tr><tr><td>woman</td><td>47.8% (54.9%)</td></tr><tr><td>contractor</td><td>47.2% (-2.3%)</td></tr><tr><td>nurse</td><td>47.0% (41.0%)</td></tr><tr><td>designer</td><td>47.0% (16.6%)</td></tr><tr><td>educator</td><td>46.6% (21.1%)</td></tr><tr><td>researcher</td><td>46.1% (13.3%)</td></tr><tr><td>freelance</td><td>46.0% (18.6%)</td></tr><tr><td>mechanic</td><td>45.8% (-3.3%)</td></tr></table></td></tr></table>

Looking for gender bias in 'professor' for 'male' and 'female': 100%|█████████▉| 1343765/1344233 [00:26<00:00, 50794.04 words/s]
Getting similarity difference between roles: 100%|██████████| 247512/247512 [00:00<00:00, 456328.33roles/s]


<table><tr><th>Role</th><th>male</th><th>female</th></tr><tr><td>professor</td><td><table><tr><td>professor</td><td>93.7% (-0.7%)</td></tr><tr><td>emeritus</td><td>69.3% (2.5%)</td></tr><tr><td>lecturer</td><td>67.9% (-7.6%)</td></tr><tr><td>dean</td><td>63.9% (-2.7%)</td></tr><tr><td>associate</td><td>63.8% (-0.7%)</td></tr><tr><td>researcher</td><td>63.7% (-2.4%)</td></tr><tr><td>professors</td><td>62.7% (-1.1%)</td></tr><tr><td>scholar</td><td>62.4% (-2.1%)</td></tr><tr><td>sociology</td><td>62.0% (0.2%)</td></tr><tr><td>scientist</td><td>61.3% (-2.5%)</td></tr><tr><td>graduate</td><td>61.2% (-4.5%)</td></tr><tr><td>doctorate</td><td>60.8% (-0.0%)</td></tr><tr><td>doctoral</td><td>60.1% (-2.5%)</td></tr><tr><td>faculty</td><td>59.0% (-4.4%)</td></tr><tr><td>sociologist</td><td>58.7% (0.9%)</td></tr><tr><td>prof</td><td>57.6% (2.6%)</td></tr><tr><td>adjunct</td><td>57.0% (-0.9%)</td></tr><tr><td>assistant</td><td>57.0% (-5.5%)</td></tr><tr><td>psychology</td><td>56.9% (2.9%)</td></tr><tr><td>university</td><td>56.6% (-4.3%)</td></tr></table></td><td><table><tr><td>professor</td><td>94.4% (0.7%)</td></tr><tr><td>lecturer</td><td>75.5% (7.6%)</td></tr><tr><td>emeritus</td><td>66.8% (-2.5%)</td></tr><tr><td>dean</td><td>66.6% (2.7%)</td></tr><tr><td>researcher</td><td>66.1% (2.4%)</td></tr><tr><td>graduate</td><td>65.8% (4.5%)</td></tr><tr><td>associate</td><td>64.6% (0.7%)</td></tr><tr><td>scholar</td><td>64.5% (2.1%)</td></tr><tr><td>professors</td><td>63.8% (1.1%)</td></tr><tr><td>scientist</td><td>63.8% (2.5%)</td></tr><tr><td>faculty</td><td>63.4% (4.4%)</td></tr><tr><td>doctoral</td><td>62.6% (2.5%)</td></tr><tr><td>assistant</td><td>62.5% (5.5%)</td></tr><tr><td>sociology</td><td>61.8% (-0.2%)</td></tr><tr><td>university</td><td>61.0% (4.3%)</td></tr><tr><td>doctorate</td><td>60.8% (0.0%)</td></tr><tr><td>student</td><td>58.8% (5.5%)</td></tr><tr><td>undergraduate</td><td>58.2% (2.0%)</td></tr><tr><td>adjunct</td><td>57.9% (0.9%)</td></tr><tr><td>sociologist</td><td>57.8% (-0.9%)</td></tr></table></td></tr></table>

Looking for gender bias in 'professor' for 'man' and 'woman': 100%|█████████▉| 1343765/1344233 [00:24<00:00, 55010.67 words/s]
Getting similarity difference between roles: 100%|██████████| 247512/247512 [00:00<00:00, 394307.01roles/s]


<table><tr><th>Role</th><th>man</th><th>woman</th></tr><tr><td>professor</td><td><table><tr><td>professor</td><td>83.3% (-1.0%)</td></tr><tr><td>emeritus</td><td>65.6% (10.0%)</td></tr><tr><td>lecturer</td><td>61.5% (-4.7%)</td></tr><tr><td>scholar</td><td>58.9% (4.8%)</td></tr><tr><td>dean</td><td>58.4% (0.6%)</td></tr><tr><td>scientist</td><td>58.0% (4.5%)</td></tr><tr><td>associate</td><td>56.0% (-2.3%)</td></tr><tr><td>professors</td><td>55.7% (-1.2%)</td></tr><tr><td>alumnus</td><td>54.6% (9.2%)</td></tr><tr><td>researcher</td><td>53.6% (-8.2%)</td></tr><tr><td>prof</td><td>52.2% (4.3%)</td></tr><tr><td>graduate</td><td>52.1% (-8.9%)</td></tr><tr><td>economics</td><td>52.0% (3.6%)</td></tr><tr><td>sociology</td><td>51.7% (-6.6%)</td></tr><tr><td>faculty</td><td>51.7% (-5.6%)</td></tr><tr><td>historian</td><td>51.6% (7.4%)</td></tr><tr><td>physicist</td><td>50.6% (5.9%)</td></tr><tr><td>doctorate</td><td>50.6% (-7.0%)</td></tr><tr><td>biologist</td><td>50.5% (2.0%)</td></tr><tr><td>economist</td><td>50.4% (6.4%)</td></tr></table></td><td><table><tr><td>professor</td><td>84.2% (1.0%)</td></tr><tr><td>lecturer</td><td>66.3% (4.7%)</td></tr><tr><td>researcher</td><td>61.9% (8.2%)</td></tr><tr><td>graduate</td><td>61.0% (8.9%)</td></tr><tr><td>doctoral</td><td>58.8% (8.3%)</td></tr><tr><td>sociology</td><td>58.3% (6.6%)</td></tr><tr><td>associate</td><td>58.3% (2.3%)</td></tr><tr><td>dean</td><td>57.9% (-0.6%)</td></tr><tr><td>doctorate</td><td>57.6% (7.0%)</td></tr><tr><td>faculty</td><td>57.3% (5.6%)</td></tr><tr><td>university</td><td>56.9% (9.2%)</td></tr><tr><td>professors</td><td>56.9% (1.2%)</td></tr><tr><td>psychologist</td><td>56.6% (13.5%)</td></tr><tr><td>assistant</td><td>56.5% (6.7%)</td></tr><tr><td>student</td><td>56.4% (13.1%)</td></tr><tr><td>alumna</td><td>56.3% (30.1%)</td></tr><tr><td>emeritus</td><td>55.6% (-10.0%)</td></tr><tr><td>librarian</td><td>54.9% (22.1%)</td></tr><tr><td>educator</td><td>54.7% (16.0%)</td></tr><tr><td>sociologist</td><td>54.6% (5.7%)</td></tr></table></td></tr></table>

Notable points:
- For Doctors, Nurses and midwives appear to be more female weighted and urologists appear to be more male weighted, despite male and female versions having the same title.
- For dancers, female roles feature ballet, burlesque and gymnast, whereas these are less male weighted
- Some fields in general seem to be male-dominated, with overall slightly higher percentages across the board

More analysis required to do direct comparisons rather than just a quick "head" of the data. Should also evaluate the other SpaCy models.