In [None]:
import os, re
import pickle
from copy import deepcopy
from tqdm import tqdm
import pandas as pd
import numpy as np
from datasets import load_dataset
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction import _stop_words

In [2]:
data_files = {
    "train": "../datasets/Biasbios/train.json",
    "valid": "../datasets/Biasbios/valid.json",
    "test": "../datasets/Biasbios/test.json"
}
dataset = load_dataset("json", data_files=data_files)

Using custom data configuration default-c180d2606892c537
Reusing dataset json (/home/hannahchen/.var/app/com.visualstudio.code/cache/huggingface/datasets/json/default-c180d2606892c537/0.0.0/83d5b3a2f62630efc6b5315f00f20209b4ad91a00ac586597caee3a4da0bef02)


In [42]:
vectorizer = CountVectorizer(max_features=85000)
X_train = vectorizer.fit_transform(dataset["train"]["title_scrubbed"])
y_train = dataset["train"]["title"]

X_valid = vectorizer.transform(dataset["valid"]["title_scrubbed"])
y_valid = dataset["valid"]["title"]
X_test = vectorizer.transform(dataset["test"]["title_scrubbed"])
y_test = dataset["test"]["title"]

X_gender_swap_test = vectorizer.transform(dataset["test"]["title_scrubbed_gender_swapped"])

In [None]:
model = LogisticRegression(solver="lbfgs", n_jobs=12)
model.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [3]:
# with open("models/BoW/bow_vectorizer.pkl", "wb") as f:
#     pickle.dump(vectorizer, f)
# with open("models/BoW/bow_model.pkl", "wb") as f:
#     pickle.dump(model, f)

with open("models/biasbios/BoW/bow_vectorizer.pkl", "rb") as f:
    vectorizer = pickle.load(f)
with open("models/biasbios/BoW/bow_model.pkl", "rb") as f:
    model = pickle.load(f)

In [6]:
def get_accuracy_by_occcupation(preds):
    temp = preds.loc[preds["label"] == preds.title_scrubbed_pred]
    temp = temp.groupby("label").size().to_frame("count").reset_index()
    temp2 = preds.groupby("label").size().to_frame("total_count").reset_index()
    acc_df = pd.merge(temp, temp2, on="label")
    acc_df["acc"] = acc_df["count"]/acc_df["total_count"]
    acc_df
    return acc_df[["label", "acc"]]

def TPR_gap(df, occupation, causal=False):
    if causal:
        temp = df.loc[df["label"] == occupation]
        female = temp.loc[temp.gender == "F"].copy()
        male = temp.loc[temp.gender == "M"].copy()
        female["factual"] = (female.title_scrubbed_pred == female["label"]).astype(int)
        female["counterfactual"] = (female.title_scrubbed_gender_swapped_pred == female["label"]).astype(int)
        male["factual"] = (male.title_scrubbed_pred == male["label"]).astype(int)
        male["counterfactual"] = (male.title_scrubbed_gender_swapped_pred == male["label"]).astype(int)
        female["gap"] =  female.factual - female.counterfactual
        male["gap"] = male.counterfactual - male.factual
        return (female["gap"].sum() + male["gap"].sum())/(female.shape[0]+male.shape[0])
    else:
        female_tpr = df[(df.title_scrubbed_pred == occupation) & (df["label"] == occupation) & (df.gender == "F")].shape[0]
        female_tpr /= df[(df.gender == "F") & (df["label"] == occupation)].shape[0]
        male_tpr = df[(df.title_scrubbed_pred == occupation) & (df["label"] == occupation) & (df.gender == "M")].shape[0]
        male_tpr /= df[(df.gender == "M") & (df["label"] == occupation)].shape[0]
        return female_tpr - male_tpr

def get_results(preds):
    df = preds

    occupation_list = preds["label"].unique().tolist()
    statistical_parity = []

    for occupation in occupation_list:
        statistical_parity.append(TPR_gap(df, occupation))

    causal_parity = []

    for occupation in occupation_list:
        causal_parity.append(TPR_gap(df, occupation, causal=True))

    df = pd.DataFrame({"occupation": occupation_list, "statistical_parity": statistical_parity, "causal_parity": causal_parity})
    return df.sort_values("occupation")

In [7]:
base_preds = pd.DataFrame({
    "gender": dataset["test"]["gender"], 
    "title_scrubbed": dataset["test"]["title_scrubbed"], "label": dataset["test"]["title"]
})
base_preds["title_scrubbed_pred"] = model.predict(X_test)
base_preds['title_scrubbed_gender_swapped_pred'] = model.predict(X_gender_swap_test)
base_preds

Unnamed: 0,gender,title_scrubbed,label,title_scrubbed_pred,title_scrubbed_gender_swapped_pred
0,M,"Mark Chauvin Bezinque, Esq. is an _ who focuse...",attorney,attorney,attorney
1,M,Farid Abdel-Nour is an _ of political science ...,professor,professor,professor
2,M,Sergio F. Ochoa is an _ of Computer Science at...,professor,professor,professor
3,M,Phillip Coppola is an _ of law. He teaches Con...,professor,professor,professor
4,M,Alan Farmer is a former _ trainer. He discusse...,teacher,teacher,teacher
...,...,...,...,...,...
98334,F,Ani Vartazarian is a staff _ at Counseling and...,psychologist,psychologist,psychologist
98335,F,Julia Stacey is a paraprofessional _ at APL. S...,teacher,teacher,teacher
98336,F,Mariam Noronha is a _ with over nine years of ...,teacher,teacher,teacher
98337,F,Dr. Kelly Bowers is a licensed _. She received...,psychologist,psychologist,psychologist


In [8]:
female_gender_tokens = ["she", "her", "hers", "herself", "mrs", "ms"]
female_gender_token_idxs = []
male_gender_tokens = ["he", "his", "him", "himself", "mr"]
male_gender_token_idxs = []
gender_token_idxs = []

for token in female_gender_tokens:
    female_gender_token_idxs.append(vectorizer.vocabulary_[token])
    gender_token_idxs.append(vectorizer.vocabulary_[token])

for token in male_gender_tokens:
    male_gender_token_idxs.append(vectorizer.vocabulary_[token])
    gender_token_idxs.append(vectorizer.vocabulary_[token])

In [26]:
occupation_list = list(set(dataset["test"]["title"]))

def get_reweighted_statistical_TPR_gap(X_test, gender="F", w_min=1.0, w_max=3.0, interval=1.0):
    df = pd.DataFrame({"occupation": occupation_list})

    for w in range(w_min, w_max+1, interval):
        statistical_parity = []
        if w == 1:
            temp_model = model
        else:
            temp_model = deepcopy(model)
            if gender == "F":
                token_idxs = female_gender_token_idxs
            elif gender == "M":
                token_idxs = male_gender_token_idxs
            elif gender == "both":
                token_idxs = gender_token_idxs

            for i in token_idxs:
                temp_model.coef_[:,i] = temp_model.coef_[:,i]*w
            
        preds = pd.DataFrame({
            "gender": dataset["test"]["gender"], 
            "title_scrubbed": dataset["test"]["title_scrubbed"], "label": dataset["test"]["title"]
        })
        preds["title_scrubbed_pred"] = temp_model.predict(X_test)

        for occupation in occupation_list:
            statistical_parity.append(TPR_gap(preds, occupation))

        df[f"statistical_parity w={w}"] = statistical_parity

    return df

def get_reweighted_causal_TPR_gap(X_test, X_gender_swap_test, gender="F", w_min=1.0, w_max=3.0, interval=1.0):
    df = pd.DataFrame({"occupation": occupation_list})

    for w in range(w_min, w_max+1, interval):
        causal_parity = []
        if w == 1:
            temp_model = model
        else:
            temp_model = deepcopy(model)
            if gender == "F":
                token_idxs = female_gender_token_idxs
            elif gender == "M":
                token_idxs = male_gender_token_idxs
            elif gender == "both":
                token_idxs = gender_token_idxs

            for i in token_idxs:
                temp_model.coef_[:,i] = temp_model.coef_[:,i]*w
            
        preds = pd.DataFrame({
            "gender": dataset["test"]["gender"], 
            "title_scrubbed": dataset["test"]["title_scrubbed"], "label": dataset["test"]["title"]
        })
        preds["title_scrubbed_pred"] = temp_model.predict(X_test)
        preds["title_scrubbed_gender_swapped_pred"] = temp_model.predict(X_gender_swap_test)

        for occupation in occupation_list:
            causal_parity.append(TPR_gap(preds, occupation, causal=True))

        df[f"causal_parity w={w}"] = causal_parity

    return df

In [10]:
average_gender_tokens = pd.DataFrame({"occupation": occupation_list})
avg_female_token, avg_male_token, avg_token = [], [], []

for occupation in occupation_list:
    all_idxs = base_preds.loc[(base_preds["label"] == occupation)].index.tolist()
    male_idxs = base_preds.loc[(base_preds.gender == "M")&(base_preds["label"] == occupation)].index.tolist()
    female_idxs = base_preds.loc[(base_preds.gender == "F")&(base_preds["label"] == occupation)].index.tolist()

    male_embeds = X_test[male_idxs, :]
    male_token_counts = male_embeds[:, male_gender_token_idxs]
    avg_male_token.append(male_token_counts.toarray().sum(axis=1).mean())

    female_embeds = X_test[female_idxs, :]
    female_token_counts = female_embeds[:, female_gender_token_idxs]
    avg_female_token.append(female_token_counts.toarray().sum(axis=1).mean())

    embeds = X_test[all_idxs, :]
    token_counts = embeds[:, gender_token_idxs]
    avg_token.append(token_counts.toarray().sum(axis=1).mean())

average_gender_tokens["female_avg_gender_tokens"] = avg_female_token
average_gender_tokens["male_avg_gender_tokens"] = avg_male_token
average_gender_tokens["avg_gender_tokens"] = avg_token
average_gender_tokens["avg_gender_token_diff"] = average_gender_tokens["female_avg_gender_tokens"] - average_gender_tokens["male_avg_gender_tokens"]

In [21]:
colorscale = px.colors.sequential.GnBu
# colorscale = px.colors.diverging.RdYlBu

## Increase Female Gender Token Weights

In [27]:
all_sp_result = get_reweighted_statistical_TPR_gap(X_test, gender="F", w_min=1, w_max=5, interval=1)
all_sp_result = all_sp_result.sort_values("statistical_parity w=1")
fig = go.Figure()

occupation_list = all_sp_result["occupation"].tolist()
for w in range(1, 6):
    sp = all_sp_result[f"statistical_parity w={w}"].tolist()

    fig.add_trace(go.Scatter(x=occupation_list, y=sp, mode='lines', name=f'w={w}', line=dict(color=colorscale[w+2])))

fig.update_layout(
    autosize=False,
    width=695,
    height=500,
    plot_bgcolor='white',
    font=dict(size=15.5),
    xaxis_title="Occupation",
    yaxis_title="Statistical TPR Gap",
    margin=dict(l=15, r=15, t=20, b=20),
)
fig.update_xaxes(
    mirror=True,
    showgrid=True,
    zeroline = True,
    zerolinecolor='lightgrey',
    gridcolor='lightgrey'
)
fig.update_yaxes(
    mirror=True,
    showgrid=True,
    zeroline = True,
    zerolinecolor='lightgrey',
    gridcolor='lightgrey',
)

fig.show()

In [22]:
all_cp_result = get_reweighted_causal_TPR_gap(X_test, X_gender_swap_test, gender="F", w_min=1, w_max=5, interval=1)
all_cp_result = all_cp_result.sort_values("causal_parity w=1")
fig = go.Figure()

occupation_list = all_cp_result["occupation"].tolist()
for w in range(1, 6):
    cp = all_cp_result[f"causal_parity w={w}"].tolist()

    fig.add_trace(go.Scatter(x=occupation_list, y=cp, mode='lines', name=f'w={w}.0', line=dict(color=colorscale[w+2])))

fig.update_layout(
    autosize=False,
    width=695,
    height=500,
    plot_bgcolor='white',
    font=dict(size=15),
    xaxis_title="Occupation",
    yaxis_title="Causal TPR Gap",
    margin=dict(l=15, r=15, t=20, b=20),
)
fig.update_xaxes(
    mirror=True,
    showgrid=True,
    zeroline = True,
    zerolinecolor='lightgrey',
    gridcolor='lightgrey'
)
fig.update_yaxes(
    mirror=True,
    showgrid=True,
    zeroline = True,
    zerolinecolor='lightgrey',
    gridcolor='lightgrey',
)

fig.show()

## Increase Male Gender Token Weights

In [15]:
all_sp_result = get_reweighted_statistical_TPR_gap(X_test, gender="M", w_min=1, w_max=5, interval=1)
all_sp_result = all_sp_result.sort_values("statistical_parity w=1")
fig = go.Figure()

occupation_list = all_sp_result["occupation"].tolist()
for w in range(1, 6):
    sp = all_sp_result[f"statistical_parity w={w}"].tolist()
    fig.add_trace(go.Scatter(x=occupation_list, y=sp, mode='lines', name=f'w={w}', line=dict(color=colorscale[w+2])))

fig.update_layout(
    autosize=False,
    width=695,
    height=500,
    plot_bgcolor='white',
    font=dict(size=15.5),
    xaxis_title="Occupation",
    yaxis_title="Statistical TPR Gap",
    margin=dict(l=15, r=15, t=20, b=20),
)
fig.update_xaxes(
    mirror=True,
    showgrid=True,
    zeroline = True,
    zerolinecolor='lightgrey',
    gridcolor='lightgrey'
)
fig.update_yaxes(
    mirror=True,
    showgrid=True,
    zeroline = True,
    zerolinecolor='lightgrey',
    gridcolor='lightgrey',
)
fig.update_traces(marker={'size': 8.5, "line": {"width": 1.5, "color": "DarkSlateGrey"}})

fig.show()

In [24]:
all_cp_result = get_reweighted_causal_TPR_gap(X_test, X_gender_swap_test, gender="M", w_min=1, w_max=5, interval=1)
all_cp_result = all_cp_result.sort_values("causal_parity w=1")
fig = go.Figure()

occupation_list = all_cp_result["occupation"].tolist()
for w in range(1, 6):
    cp = all_cp_result[f"causal_parity w={w}"].tolist()

    fig.add_trace(go.Scatter(x=occupation_list, y=cp, mode='lines', name=f'w={w}.0', line=dict(color=colorscale[w+2])))

fig.update_layout(
    autosize=False,
    width=695,
    height=500,
    plot_bgcolor='white',
    font=dict(size=15),
    xaxis_title="Occupation",
    yaxis_title="Causal TPR Gap",
    margin=dict(l=15, r=15, t=20, b=20),
)
fig.update_xaxes(
    mirror=True,
    showgrid=True,
    zeroline = True,
    zerolinecolor='lightgrey',
    gridcolor='lightgrey'
)
fig.update_yaxes(
    mirror=True,
    showgrid=True,
    zeroline = True,
    zerolinecolor='lightgrey',
    gridcolor='lightgrey',
)

fig.show()

## Increase All Gender Token Weights

In [29]:
colorscale = px.colors.sequential.thermal

In [18]:
all_sp_result = get_reweighted_statistical_TPR_gap(X_test, gender="both", w_min=-3, w_max=3, interval=1)
all_sp_result = all_sp_result.sort_values("statistical_parity w=1")
fig = go.Figure()

occupation_list = all_sp_result["occupation"].tolist()

mid = int(len(colorscale)/2)
for w in range(-3, 4):
    sp = all_sp_result[f"statistical_parity w={w}"].tolist()
    if w == 1:
        fig.add_trace(go.Scatter(x=occupation_list, y=sp, mode='lines', name=f'w={w}', line=dict(color=colorscale[mid+w], dash="dash")))
    else:
        fig.add_trace(go.Scatter(x=occupation_list, y=sp, mode='lines', name=f'w={w}', line=dict(color=colorscale[mid+w])))

fig.update_layout(
    autosize=False,
    width=695,
    height=500,
    plot_bgcolor='white',
    font=dict(size=15),
    xaxis_title="Occupation",
    yaxis_title="Statistical TPR Gap",
    margin=dict(l=15, r=15, t=20, b=20),
)
fig.update_xaxes(
    mirror=True,
    showgrid=True,
    zeroline = True,
    zerolinecolor='lightgrey',
    gridcolor='lightgrey'
)
fig.update_yaxes(
    mirror=True,
    showgrid=True,
    zeroline = True,
    zerolinecolor='lightgrey',
    gridcolor='lightgrey',
)

fig.show()

In [30]:
all_cp_result = get_reweighted_causal_TPR_gap(X_test, X_gender_swap_test, gender="both", w_min=-3, w_max=3, interval=1)
all_cp_result = all_cp_result.sort_values("causal_parity w=1")
fig = go.Figure()

occupation_list = all_cp_result["occupation"].tolist()

mid = int(len(colorscale)/2)
for w in range(-3, 4):
    cp = all_cp_result[f"causal_parity w={w}"].tolist()
    if w == 1:
        fig.add_trace(go.Scatter(x=occupation_list, y=cp, mode='lines', name=f'w={w}', line=dict(color=colorscale[mid+w], dash="dash")))
    else:
        fig.add_trace(go.Scatter(x=occupation_list, y=cp, mode='lines', name=f'w={w}', line=dict(color=colorscale[mid+w])))

fig.update_layout(
    autosize=False,
    width=695,
    height=500,
    plot_bgcolor='white',
    font=dict(size=15),
    xaxis_title="Occupation",
    yaxis_title="Causal TPR Gap",
    margin=dict(l=15, r=15, t=20, b=20),
)
fig.update_xaxes(
    mirror=True,
    showgrid=True,
    zeroline = True,
    zerolinecolor='lightgrey',
    gridcolor='lightgrey'
)
fig.update_yaxes(
    mirror=True,
    showgrid=True,
    zeroline = True,
    zerolinecolor='lightgrey',
    gridcolor='lightgrey',
)

fig.show()