In [1]:
import sys
import shutil
from pathlib import Path

p = str(Path.home() / "miniconda3/envs/bias-pmi/lib/python3.9/site-packages")
sys.path.append(p)

In [2]:
import pandas as pd
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt

from scripts.utils.figures import pearson_cor, scatter_color_plt, scatter_plt

In [3]:
pd.set_option("display.max_columns", 100)

## Data

Read final bias results

In [4]:
df_final = pd.read_csv("results/figures_data.csv")

In [5]:
df_final[["experiment","corpus","bias"]].value_counts().sort_index()

experiment        corpus    bias     
glasgow-gender    wiki2021  gender        4661
mturk-affluence   wiki2021  affluence       59
mturk-race        wiki2021  race            59
warriner-valence  wiki2021  valence      13565
dtype: int64

In [6]:
df_final.head(2)

Unnamed: 0,word,score,experiment,corpus,bias,idx,pmi_a,pmi_b,cooc_target_a,cooc_target_b,lor,lor_se,lor_pvalue,dpmi,dpmi_lower,dpmi_upper,freq,wefat_score_glovewc,sims_a_glovewc,sims_b_glovewc,wefat_score_sgns,sims_a_sgns,sims_b_sgns,wefat_score_ft,sims_a_ft,sims_b_ft,sgns_pvalue,ft_pvalue,glovewc_pvalue,lor_pvaluecor,sgns_pvaluecor,ft_pvaluecor,glovewc_pvaluecor,se_glovewc,se_sgns,se_ft,lower_glovewc,lower_sgns,lower_ft,upper_glovewc,upper_sgns,upper_ft
0,abattoir,2.609,glasgow-gender,wiki2021,gender,70162,-1.826518,-1.54063,21.8,85.8,-0.285888,0.239847,0.2332768,-0.285888,-0.75598,0.184203,779,0.315923,0.00543179|-0.09380331|-0.07327254|-0.12758706...,-0.03121008|-0.10647151|-0.04088708|-0.1480419...,-1.200106,0.07165016|0.08849885|0.11103344|0.06113353|0....,0.06226957|0.13884724|0.17500055|0.11445295|0....,-0.92987,0.13312729|0.17621896|0.2004441|0.1055144|0.15...,0.12088852|0.1969999|0.26026373|0.16933484|0.2...,0.013299,0.067893,0.549145,0.2653901,0.37954,0.66385,0.857763,0.490376,0.337565,0.384732,-0.668904,-1.756082,-1.589689,1.28903,-0.477644,-0.102452
1,abbey,4.697,glasgow-gender,wiki2021,gender,3131,0.034713,0.153478,4321.8,14391.8,-0.118769,0.017346,7.53423e-12,-0.118766,-0.152763,-0.084768,78053,-0.414972,0.00678171|0.00046867|-0.01932356|0.17157976|0...,0.03549158|0.05031704|0.00023039|0.19645763|0....,-0.138819,0.11105664|0.08128471|0.11001417|0.17782994|0....,0.14162242|0.14122758|0.11725641|0.21673883|0....,-0.303276,0.17868634|0.17785942|0.19954302|0.28001034|0....,0.21887751|0.22333494|0.21375681|0.32006515|0....,0.79922,0.575442,0.430257,1.490537e-11,0.971613,0.892894,0.800684,0.48783,0.525099,0.494075,-1.322785,-1.051208,-1.186511,0.599782,1.005707,0.684654


In [7]:
experiments_info = {
    "glasgow-gender": {
        "bias_label": "Sesgo de género",
        "col_label": "Juicio humano de la\nasociación con el género",
        "point_size": 2,
        "edgecolor": None,
        "error_bars": False,
    },
    "warriner-valence": {
        "bias_label": "Sesgo de sentimiento",
        "col_label": "Juicio humano del\nsentimiento",
        "point_size": 2,
        "edgecolor": None,
        "error_bars": False,
    },
    "mturk-affluence": {
        "bias_label": "Sesgo de riqueza",
        "col_label": "Juicio humano de la\nasociación con la riqueza",
        "point_size": 35,
        "edgecolor": "black",
        "error_bars": True,
    },
    "mturk-race": {
        "bias_label": "Sesgo étnico",
        "col_label": "Juicio humano de la\nasociación con la etnia",
        "point_size": 35,
        "edgecolor": "black",
        "error_bars": True,
    },
}

## Correlations

In [8]:
bias_and_se = [
    ("wefat_score_sgns", None),
    ("wefat_score_sgns", "se_sgns"),
    ("wefat_score_glovewc", None),
    ("wefat_score_glovewc", "se_glovewc"),
    ("wefat_score_ft", None),
    ("wefat_score_ft", "se_ft"),
    ("dpmi", None),
    ("dpmi", "lor_se"),
]

In [9]:
corpus_dict = {"wiki2021" : "Wikipedia"}
metrics_dict = {"dpmi" : "PMI", "wefat_score_glovewc" : "GloVe", 
                "wefat_score_sgns": "SGNS", "wefat_score_ft" : "FastText"}

In [10]:
res = []

for metric, se in bias_and_se:
    res_ = df_final.groupby(["corpus","experiment"], as_index=False).apply(
        lambda d: pearson_cor(d, "score", metric, weight_se_var=se))
    res_["correlation"] = "$r$ ponderado" if se else "$r$"
    res_["metric"] = metric
    res_ = res_.replace({"corpus": corpus_dict, "metric": metrics_dict})
    # res_["experiment"] = res_["experiment"].str.title()
    res_.columns = res_.columns.str.title()
    res += [res_.copy()]

df_res = pd.concat(res)
df_res.rename(columns={None: "value"}, inplace=True)
df_res["Experiment"] = df_res["Experiment"].map(
    lambda x: experiments_info[x]["bias_label"])
df_res = df_res.set_index(["Corpus", "Experiment", "Correlation", "Metric"],).unstack()
df_res.columns = df_res.columns.droplevel(0)
df_res = df_res[["PMI","SGNS","FastText","GloVe",]]

In [11]:
df_res.round(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Metric,PMI,SGNS,FastText,GloVe
Corpus,Experiment,Correlation,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Wikipedia,Sesgo de género,$r$,0.51,0.49,0.48,0.46
Wikipedia,Sesgo de género,$r$ ponderado,0.45,0.63,0.63,0.69
Wikipedia,Sesgo de riqueza,$r$,0.2,0.16,0.12,0.33
Wikipedia,Sesgo de riqueza,$r$ ponderado,0.09,0.16,0.14,0.36
Wikipedia,Sesgo de sentimiento,$r$,0.43,0.6,0.59,0.58
Wikipedia,Sesgo de sentimiento,$r$ ponderado,0.34,0.66,0.66,0.64
Wikipedia,Sesgo étnico,$r$,0.14,0.44,0.36,0.3
Wikipedia,Sesgo étnico,$r$ ponderado,0.15,0.51,0.2,0.43


In [12]:
corr_tab = df_res.droplevel(0)
corr_tab = corr_tab.query("Experiment != 'Sesgo de riqueza'").copy()
corr_tab = corr_tab.round(3)
corr_tab.index.names = ["Sesgo", "Correlación"]
corr_tab.columns.name = None
corr_tab

Unnamed: 0_level_0,Unnamed: 1_level_0,PMI,SGNS,FastText,GloVe
Sesgo,Correlación,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Sesgo de género,$r$,0.513,0.493,0.475,0.462
Sesgo de género,$r$ ponderado,0.453,0.625,0.633,0.687
Sesgo de sentimiento,$r$,0.427,0.595,0.59,0.576
Sesgo de sentimiento,$r$ ponderado,0.338,0.663,0.658,0.644
Sesgo étnico,$r$,0.137,0.442,0.363,0.3
Sesgo étnico,$r$ ponderado,0.146,0.508,0.205,0.426


In [13]:
print(
    corr_tab
        .style.format("{:.2f}")
        .to_latex(
            clines="skip-last;data", multicol_align="|c|", hrules=True,
            # column_format="c", multirow_align="naive", multicol_align="c"
        )
)

\begin{tabular}{llrrrr}
\toprule
 &  & PMI & SGNS & FastText & GloVe \\
Sesgo & Correlación &  &  &  &  \\
\midrule
\multirow[c]{2}{*}{Sesgo de género} & $r$ & 0.51 & 0.49 & 0.47 & 0.46 \\
 & $r$ ponderado & 0.45 & 0.62 & 0.63 & 0.69 \\
\cline{1-6}
\multirow[c]{2}{*}{Sesgo de sentimiento} & $r$ & 0.43 & 0.59 & 0.59 & 0.58 \\
 & $r$ ponderado & 0.34 & 0.66 & 0.66 & 0.64 \\
\cline{1-6}
\multirow[c]{2}{*}{Sesgo étnico} & $r$ & 0.14 & 0.44 & 0.36 & 0.30 \\
 & $r$ ponderado & 0.15 & 0.51 & 0.20 & 0.43 \\
\cline{1-6}
\bottomrule
\end{tabular}



## Plots & tables

In [14]:
metrics_info = {
    "wefat_score_sgns": {
        "label": "SGNS",
        "pvalue_col": "sgns_pvaluecor",
        "se_and_ci_cols": [[None, None], ["se_sgns", ["lower_sgns","upper_sgns"]]],
        # "se_and_ci_cols": [[None, None], ],
        },
    "wefat_score_ft": {
        "label": "FastText",
        "pvalue_col": "ft_pvaluecor",
        "se_and_ci_cols": [[None, None], ["se_ft", ["lower_ft","upper_ft"]]],
        # "se_and_ci_cols": [[None, None], ],
        },
    "wefat_score_glovewc": {
        "label": "GloVe",
        "pvalue_col": "glovewc_pvaluecor",
        "se_and_ci_cols": [[None, None], ["se_glovewc", ["lower_glovewc","upper_glovewc"]]],
        # "se_and_ci_cols": [[None, None], ],
        },
    "dpmi": {
        "label": "PMI",
        "pvalue_col": "lor_pvaluecor",
        "se_and_ci_cols": [[None, None], ["lor_se", ["dpmi_lower","dpmi_upper"]]],
        },
    }

In [15]:
corpora_info = {
    "wiki2021": {"label": "Wikipedia"},
}

In [16]:
df_settings = df_final[["corpus", "experiment"]].drop_duplicates()

In [17]:
grid_info = {
    "experiments": ["glasgow-gender", "warriner-valence", "mturk-race"],
    "metrics": ["PMI", "SGNS", "FastText", "GloVe"],
    "metric_types": ["uw", "w"],
    "corpora": ["wiki2021"],
}

In [18]:
def concat_tile(lista_2d: list, orient: str = "h"):
    if orient == "v":
        return cv.hconcat([cv.vconcat(list_h) for list_h in lista_2d])
    elif orient == "h":
        return cv.vconcat([cv.hconcat(list_h) for list_h in lista_2d])
    else:
        raise ValueError("orient must be 'h' or 'v'")

### Correlations

In [39]:
# save PNGs
dpi = 300
width = 1400
height = 1050
plt.ioff()

for _, (corpus, experiment) in df_settings.iterrows():
    for metric, metric_info in metrics_info.items():
        for se_col, ci_cols in metric_info["se_and_ci_cols"]:
            
            experiment_info = experiments_info[experiment]
            corpus_info = corpora_info[corpus]
            metric_type = "w" if se_col else "uw"
            xlabel = experiment_info['col_label']
            ylabel = f"{experiment_info['bias_label']} ({metric_info['label']})"
            corpus_name = corpus_info["label"]
            error_bars = experiment_info["error_bars"]
            point_size = experiment_info["point_size"]
            edgecolor = experiment_info["edgecolor"]
            ci_cols = ci_cols if error_bars else None
            outfile = f"results/plots/{corpus}_{experiment}_{metric_info['label']}_{metric_type}.png"

            df_ = df_final.query("experiment == @experiment & corpus == @corpus")

            fig_, ax_ = scatter_color_plt(
                "score", metric, df_, weight_se_var=se_col, error_cols=ci_cols,
                xlabel=xlabel, ylabel=ylabel, title=None, point_size=point_size,
                edgecolor=edgecolor, print_pearson=False
            )
            fig_.set_size_inches(width/dpi, height/dpi)
            plt.savefig(outfile, dpi=dpi, bbox_inches='tight')
            plt.close(fig_)


In [40]:
plot_files = Path("results/plots").glob("*.png")
plot_files = sorted(list(plot_files))

In [41]:
plot_files_wefat = [f for f in plot_files if not f.stem.startswith("pvalues")]
plot_files_dict = {}
for f in plot_files_wefat:
    f_name = f.stem
    plot_files_dict[f_name] = str(f)

In [42]:
img_lists = {}
img_grids = {}
for experiment in grid_info["experiments"]:
    img_lists[experiment] = []
    for corpus in grid_info["corpora"]:
        for metric_type in grid_info["metric_types"]:
            img_row = []
            for metric in grid_info["metrics"]:
                k = f"{corpus}_{experiment}_{metric}_{metric_type}"
                f = plot_files_dict[k]
                img_ = cv.imread(f)
                img_ = cv.resize(img_, (width, height))
                img_row.append(img_)
            img_lists[experiment].append(img_row)
    img_grids[experiment] = concat_tile(img_lists[experiment], orient="v")
    outfile = f"results/plots/grid_{experiment}.png"
    cv.imwrite(outfile, img_grids[experiment])
    # copy the file to latex/img/:
    shutil.copy(outfile, f"latex/img/grid_{experiment}.png")


### p-values

In [23]:
# save PNGs
dpi = 300
width = 1600
height = 1050
plt.ioff()

for _, (corpus, experiment) in df_settings.iterrows():
    for metric, metric_info in metrics_info.items():

        experiment_info = experiments_info[experiment]
        corpus_info = corpora_info[corpus]
        corpus_name = corpus_info["label"]
        point_size = experiment_info["point_size"]
        edgecolor = experiment_info["edgecolor"]
        pvalue_col = metric_info["pvalue_col"]
        xlabel = f"{experiment_info['bias_label']} ({metric_info['label']})"
        ylabel = f"p-valor ({metric_info['label']})"
        outfile = f"results/plots/pvalues_{corpus}_{experiment}_{metric_info['label']}.png"
        
        df_ = df_final.query("experiment == @experiment & corpus == @corpus")

        fig_, ax_ = scatter_plt(
            metric, pvalue_col, df_, xlabel=xlabel, ylabel=ylabel, 
            title=None, point_size=point_size, edgecolor=edgecolor)
        fig_.set_size_inches(width/dpi, height/dpi)
        plt.savefig(outfile, dpi=dpi, bbox_inches='tight')
        plt.close(fig_)

In [24]:
plot_files = Path("results/plots").glob("*.png")
plot_files = sorted(list(plot_files))

In [25]:
plot_files_pvalues = [f for f in plot_files if f.stem.startswith("pvalues")]
plot_files_dict = {}
for f in plot_files_pvalues:
    f_name = f.stem
    plot_files_dict[f_name] = str(f)

In [26]:
img_list = []
for metric in grid_info["metrics"]:
    for corpus in grid_info["corpora"]:
        img_row = []
        for experiment in grid_info["experiments"]:
            k = f"pvalues_{corpus}_{experiment}_{metric}"
            f = plot_files_dict[k]
            img_ = cv.imread(f)
            img_ = cv.resize(img_, (width, height))
            img_row.append(img_)
        img_list.append(img_row)

img_grid = concat_tile(img_list)

In [27]:
cv.imwrite(f"results/plots/grid_pvalues.png", img_grid)

True

In [28]:
# copy the file to latex/img/
shutil.copy("results/plots/grid_pvalues.png", "latex/img/grid_pvalues.png")

'latex/img/grid_pvalues.png'

In [29]:
experiments_info

{'glasgow-gender': {'bias_label': 'Sesgo de género',
  'col_label': 'Juicio humano de la\nasociación con el género',
  'point_size': 2,
  'edgecolor': None,
  'error_bars': False},
 'warriner-valence': {'bias_label': 'Sesgo de sentimiento',
  'col_label': 'Juicio humano del\nsentimiento',
  'point_size': 2,
  'edgecolor': None,
  'error_bars': False},
 'mturk-affluence': {'bias_label': 'Sesgo de riqueza',
  'col_label': 'Juicio humano de la\nasociación con la riqueza',
  'point_size': 35,
  'edgecolor': 'black',
  'error_bars': True},
 'mturk-race': {'bias_label': 'Sesgo étnico',
  'col_label': 'Juicio humano de la\nasociación con la etnia',
  'point_size': 35,
  'edgecolor': 'black',
  'error_bars': True}}

In [30]:
# Tabla: % de palabras con p-valor < 0.1
pval_cols = [c for c in df_final.columns if c.endswith("_pvaluecor")]

# % of pvalues < 0.05
df_tmp = df_final.query("experiment != 'mturk-affluence'").copy()
df_tmp = df_tmp.rename(columns={"experiment": "Sesgo"})
df_tmp["Sesgo"] = df_tmp["Sesgo"].map(
    lambda x: experiments_info[x]["bias_label"]
)

pval_tab = (
    df_tmp
        .groupby(["corpus", "Sesgo"])
        .apply(lambda x: (x[pval_cols] < 0.1).mean() * 100)
        .round(2)
)

pval_tab = pval_tab.droplevel(0)
pval_tab.columns = metrics_dict.values()
pval_tab.index.name = None
pval_tab

Unnamed: 0,PMI,GloVe,SGNS,FastText
Sesgo de género,82.51,0.3,0.0,0.32
Sesgo de sentimiento,66.44,29.72,33.85,21.33
Sesgo étnico,77.97,0.0,0.0,0.0


In [31]:
# format values with % sign
print(
    pval_tab
        .style.format("{:.2f}\%")
        .to_latex(
            clines="skip-last;data", multicol_align="|c|", hrules=True,
        )
)
# TODO no mostrar weighted corr?

\begin{tabular}{lrrrr}
\toprule
 & PMI & GloVe & SGNS & FastText \\
\midrule
Sesgo de género & 82.51\% & 0.30\% & 0.00\% & 0.32\% \\
Sesgo de sentimiento & 66.44\% & 29.72\% & 33.85\% & 21.33\% \\
Sesgo étnico & 77.97\% & 0.00\% & 0.00\% & 0.00\% \\
\bottomrule
\end{tabular}



### Confidence intervals

In [32]:
# add amplitud IC
df_tmp = df_final.copy()

for metric_data in metrics_info.values():
    metric_name = metric_data["label"]
    ci_cols = metric_data["se_and_ci_cols"][1][1]
    df_tmp[f"{metric_name}_amplitud"] = df_tmp[ci_cols].apply(
        lambda x: x[1] - x[0], axis=1
    )

In [33]:
# save PNGs
dpi = 300
width = 1600
height = 1050
plt.ioff()

# compute max amplitud among WE metrics
amplitud_cols = [c for c in df_tmp.columns if c.endswith("_amplitud")]
amplitud_cols = [c for c in amplitud_cols if not c.startswith("PMI_")]
max_amplitud = df_tmp[amplitud_cols].max().max()

for _, (corpus, experiment) in df_settings.iterrows():
    
    df_ = df_tmp.query("experiment == @experiment & corpus == @corpus")
    
    for metric, metric_info in metrics_info.items():

        experiment_info = experiments_info[experiment]
        corpus_info = corpora_info[corpus]
        corpus_name = corpus_info["label"]
        point_size = experiment_info["point_size"]
        edgecolor = experiment_info["edgecolor"]
        amplitud_col = f"{metric_info['label']}_amplitud"
        xlabel = f"{experiment_info['bias_label']} ({metric_info['label']})"
        ylabel = f"Amplitud IC ({metric_info['label']})"
        outfile = f"results/plots/ics_{corpus}_{experiment}_{metric_info['label']}.png"
        
        mean_amplitud = df_[amplitud_col].mean()

        fig_, ax_ = scatter_plt(
            metric, amplitud_col, df_, xlabel=xlabel, ylabel=ylabel, 
            title=None, point_size=point_size, edgecolor=edgecolor, 
            ylim=(0, max_amplitud+0.1))
        ax_.axhline(mean_amplitud, color="red", linestyle="--")

        fig_.set_size_inches(width/dpi, height/dpi)
        plt.savefig(outfile, dpi=dpi, bbox_inches='tight')
        plt.close(fig_)

In [34]:
# make grid:
plot_files = Path("results/plots").glob("*.png")
plot_files = sorted(list(plot_files))

In [35]:
plot_files_ics = [f for f in plot_files if f.stem.startswith("ics")]
plot_files_dict = {}
for f in plot_files_ics:
    f_name = f.stem
    plot_files_dict[f_name] = str(f)

In [36]:
# grid withouy PMI
img_list = []
for metric in grid_info["metrics"]:
    if metric == "PMI":
        continue
    for corpus in grid_info["corpora"]:
        img_row = []
        for experiment in grid_info["experiments"]:
            k = f"ics_{corpus}_{experiment}_{metric}"
            f = plot_files_dict[k]
            img_ = cv.imread(f)
            img_ = cv.resize(img_, (width, height))
            img_row.append(img_)
        img_list.append(img_row)

img_grid = concat_tile(img_list)

In [37]:
cv.imwrite(f"results/plots/grid_ics.png", img_grid)

True

In [38]:
# copy the file to latex/img/
shutil.copy("results/plots/grid_ics.png", "latex/img/grid_ics.png")

'latex/img/grid_ics.png'

### Casos particulares

In [34]:
pvalues_sgns = df_final.query(
    "experiment == 'glasgow-gender' & corpus == 'wiki2021'")["sgns_pvaluecor"]
sum(pvalues_sgns < .1), len(pvalues_sgns)

(14, 4661)

In [38]:
df_final.query("experiment == 'mturk-race' & corpus == 'wiki2021'").sort_values("dpmi").head()

Unnamed: 0,word,score,experiment,corpus,bias,idx,pmi_a,pmi_b,cooc_target_a,cooc_target_b,lor,lor_se,lor_pvalue,dpmi,dpmi_lower,dpmi_upper,freq,wefat_score_glovewc,sims_a_glovewc,sims_b_glovewc,wefat_score_sgns,sims_a_sgns,sims_b_sgns,wefat_score_ft,sims_a_ft,sims_b_ft,sgns_pvalue,ft_pvalue,glovewc_pvalue,lor_pvaluecor,sgns_pvaluecor,ft_pvaluecor,glovewc_pvaluecor,se_glovewc,se_sgns,se_ft,lower_glovewc,lower_sgns,lower_ft,upper_glovewc,upper_sgns,upper_ft
18331,shanice,82.21732,mturk-race,wiki2021,race,155217,-3.295075,-1.130062,0.4,4.4,-2.165013,1.651446,0.1898644,-2.165013,-5.401787,1.071761,220,0.355081,-0.13475664|-0.0938984|-0.11529653|0.15575527,-0.16770209|-0.04582499|-0.15099636|0.03045385,1.073313,0.16050113|0.09058352|0.15551023|0.23649386,0.17408269|0.08407194|0.01000986|0.0950039,0.856022,0.35198426|0.18942437|0.29601163|0.39890833,0.34351458|0.21070067|0.20471367|0.21746322,0.239437,0.323944,0.661972,0.2154231,0.672703,0.878927,0.937069,0.767541,0.512532,0.662149,-1.301975,-0.198294,-0.584049,1.499586,1.805948,1.926286
8602,wine,32.88507,mturk-race,wiki2021,race,3061,0.19499,1.907384,541.4,3787.4,-1.712561,0.045948,4.712653e-304,-1.712394,-1.80245,-1.622338,80395,-0.550261,0.19322053|0.00318124|0.15543147|-0.03381905,0.24212|0.07438218|0.18607633|0.02517726,-0.570132,0.15551669|0.14656915|0.135274|0.14546574,0.23697968|0.21188954|0.12459675|0.10451385,-0.393708,0.32062669|0.25741166|0.26957862|0.27048343,0.39186503|0.35066625|0.25539279|0.20785616,0.43662,0.619718,0.492958,2.780465e-302,0.884598,0.878927,0.937069,0.705557,0.814389,0.808158,-1.689272,-1.951555,-1.841,1.08512,1.818873,1.509647
18055,volleyball,25.8108,mturk-race,wiki2021,race,4335,0.11926,1.795665,319.4,2155.4,-1.676499,0.059958,4.805119e-172,-1.676405,-1.79392,-1.55889,52672,0.230169,0.0554065|0.03925283|0.19043749|0.08584033,0.00571295|0.01081299|0.28257461|-0.01749973,0.175906,0.12699347|0.04724094|0.17582731|0.19573528,0.11881582|0.01339721|0.29422971|0.05892098,0.322985,0.15996007|0.12798664|0.24864846|0.25916849,0.15228492|0.11699417|0.33886819|0.08207445,0.84507,0.71831,0.71831,1.41751e-170,0.918458,0.878927,0.937069,0.900793,0.810771,0.809205,-1.078745,-1.238798,-1.066277,1.86423,1.816623,1.824407
5580,pastry,38.23769,mturk-race,wiki2021,race,22029,-0.923927,0.498769,14.4,75.4,-1.4227,0.287589,7.536726e-07,-1.422696,-1.98636,-0.859033,4926,-1.497758,0.02016928|-0.03816994|-0.01013557|0.01865838,0.08866569|0.04038082|0.01940015|0.07409429,-0.937405,0.12285666|0.03234944|0.06409766|0.16480639,0.16675819|0.14399771|0.10490418|0.13614772,-0.658856,0.27512363|0.18123644|0.20132631|0.28340675,0.32538267|0.31298639|0.22630286|0.2101853,0.239437,0.408451,0.056338,1.710257e-06,0.672703,0.878927,0.937069,0.214465,0.612929,0.673564,-1.907243,-1.871103,-1.744373,-1.120401,0.632038,0.981006
12493,hamburger,49.86356,mturk-race,wiki2021,race,19867,-0.63841,0.713546,21.4,104.4,-1.35196,0.237292,1.216149e-08,-1.351956,-1.817041,-0.886872,5761,-0.503146,-0.02550893|-0.03033849|-0.03495901|0.11411147,-0.0032219|-0.03822681|0.05049457|0.15619492,-0.330147,0.12615389|0.12026184|0.14154667|0.20185548,0.10414716|0.17344884|0.22808314|0.13734863,-0.732654,0.2659967|0.21164216|0.27859087|0.28552659,0.28405756|0.27968898|0.3305458|0.24247514,0.661972,0.380282,0.535211,2.870111e-08,0.910888,0.878927,0.937069,0.697779,0.730674,0.618423,-1.710805,-1.582422,-1.621123,0.94538,1.14598,0.818303


In [96]:
df_final.query("word in ['shanice','wine'] & experiment == 'mturk-race'")[[
    "word","experiment", "dpmi", "lor_pvalue", "dpmi_lower", "dpmi_upper",
    "wefat_score_sgns", "sgns_pvalue", "lower_sgns", "upper_sgns",
    ]]

Unnamed: 0,word,experiment,dpmi,lor_pvalue,dpmi_lower,dpmi_upper,wefat_score_sgns,sgns_pvalue,lower_sgns,upper_sgns
8602,wine,mturk-race,-1.712394,4.712653e-304,-1.80245,-1.622338,-0.570132,0.43662,-1.951555,1.818873
18331,shanice,mturk-race,-2.165013,0.1898644,-5.401787,1.071761,1.073313,0.239437,-0.198294,1.805948


In [23]:
df_final.query("word in ['basketball','jazz'] & experiment == 'mturk-race'")[[
    "word","experiment", "dpmi", "lor_pvalue", "dpmi_lower", "dpmi_upper",
    "wefat_score_sgns", "sgns_pvalue", "lower_sgns", "upper_sgns",
    ]]

Unnamed: 0,word,experiment,dpmi,lor_pvalue,dpmi_lower,dpmi_upper,wefat_score_sgns,sgns_pvalue,lower_sgns,upper_sgns
632,basketball,mturk-race,-0.625285,1.817817e-58,-0.701313,-0.549257,0.831574,0.239437,-0.612249,1.952965
4211,jazz,mturk-race,0.840956,4.781434e-128,0.772481,0.909432,1.337575,0.028169,1.14227,1.9164


In [35]:
# df_tmp = df_final.query("experiment == 'warriner-valence' & corpus == 'wiki2021'")
# plt.scatter(df_tmp["wefat_score_ft"], df_tmp["ft_pvaluecor"], s=1)
# plt.show()
# plt.scatter(df_tmp["wefat_score_ft"], df_tmp["ft_pvalue"], s=1)
# plt.show()
# # plt.scatter(df_tmp["ft_pvalue"], df_tmp["ft_pvaluecor"], s=1)
# # plt.show()

In [30]:
# df_final.query("experiment == 'mturk-race' & corpus == 'wiki2021'").sort_values("dpmi")

In [31]:
# df_final.query("experiment == 'warriner-valence' & corpus == 'wiki2021'").sort_values("ft_pvaluecor")

In [None]:
# # example pvalues figure
# file_pmi = plot_files_dict["pvalues_wiki2021_names-gender_PMI"]
# file_we = plot_files_dict["pvalues_wiki2021_names-gender_SGNS"]
# img_pmi = cv.imread(file_pmi)
# img_we = cv.imread(file_we)
# img_pmi = cv.resize(img_pmi, (width, height))
# img_we =  cv.resize(img_we, (width, height))
# img_grid = cv.vconcat([img_we, img_pmi])

# cv.imwrite(f"results/plots/grid_pvalues_wiki2021_names-gender.png", img_grid)

In [52]:
from gensim.parsing.preprocessing import STOPWORDS

In [55]:
"cry" in STOPWORDS # weird

True

In [63]:
tab_stopwords = (
    df_final
        .sort_values("idx")
        # .query("experiment == 'glasgow-gender'")[[
        .query("word in @STOPWORDS & experiment == 'glasgow-gender'")[[
    "word", "dpmi", "wefat_score_sgns", "wefat_score_ft", "wefat_score_glovewc"
    ]]
        .rename(
            columns={
                "word": "Palabra", "dpmi": "PMI", "wefat_score_sgns": "SGNS",
                "wefat_score_ft": "FastText", "wefat_score_glovewc": "GloVe"
            }
        )
        .set_index("Palabra")
        .head(20)
)

# make it latex:
print(
    tab_stopwords
        .style.format("{:.2f}")
        .to_latex(
            clines="skip-last;data", multicol_align="|c|", hrules=True,
            # column_format="c", multirow_align="naive", multicol_align="c"
        )
)    

\begin{tabular}{lrrrr}
\toprule
 & PMI & SGNS & FastText & GloVe \\
Palabra &  &  &  &  \\
\midrule
which & -0.08 & -0.50 & -0.59 & -0.51 \\
first & 0.03 & -0.15 & -0.28 & -0.50 \\
after & -0.06 & -0.60 & -0.73 & -0.62 \\
have & 0.13 & -0.42 & -0.35 & -0.45 \\
other & 0.07 & -0.57 & -0.45 & -0.44 \\
all & -0.05 & -0.55 & -0.58 & -0.64 \\
over & -0.20 & -0.80 & -0.87 & -0.76 \\
only & 0.02 & -0.46 & -0.56 & -0.60 \\
most & -0.13 & -0.53 & -0.56 & -0.62 \\
up & 0.11 & -0.63 & -0.67 & -0.64 \\
used & -0.13 & -0.72 & -0.38 & -0.68 \\
under & -0.16 & -0.96 & -1.12 & -1.15 \\
part & 0.07 & -0.31 & -0.41 & -0.40 \\
many & -0.15 & -0.42 & -0.60 & -0.64 \\
well & 0.04 & -0.29 & -0.44 & -0.52 \\
name & 0.13 & -0.26 & -0.28 & -0.50 \\
several & -0.09 & -0.36 & -0.53 & -0.53 \\
same & 0.05 & -0.06 & -0.40 & -0.54 \\
former & -0.21 & -0.60 & -0.59 & -0.59 \\
system & -0.55 & -1.01 & -0.77 & -0.93 \\
\bottomrule
\end{tabular}



## Save results summary

In [None]:
cols_to_save = [
    "word", "corpus", "experiment", "bias", "score",
    "dpmi", "lor_se", "dpmi_lower", "dpmi_upper", "lor_pvalue", "lor_pvaluecor",
    "wefat_score_sgns", "se_sgns", "lower_sgns", "upper_sgns", "sgns_pvalue", "sgns_pvaluecor",
    "wefat_score_glovewc", "se_glovewc", "lower_glovewc", "upper_glovewc", "glovewc_pvalue", "glovewc_pvaluecor",
]
df_to_save = (
    df_final[cols_to_save]
    .rename(
        columns={"score": "external_score", "dpmi": "bias_pmi",
                 "wefat_score_sgns": "bias_sgns", "wefat_score_glovewc": "bias_glovewc"})
    .sort_values(["corpus", "experiment", "word", ])
    .reset_index(drop=True)
)
df_to_save.to_csv("results/experiments_results.csv", index=False)