# Heatmaps: IR vs GR

In [4]:
import gc
import os
import warnings
from os import path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

warnings.filterwarnings('ignore')

In [34]:
sample_size = 56        # 24, 56
plots_dir = os.path.join('out', 'plots', 'heatmap', f'n{sample_size}')
calculations_dir = os.path.join('out', 'calculations', f'n{sample_size}')

os.makedirs(plots_dir, exist_ok=True)
os.makedirs(calculations_dir, exist_ok=True)

metrics = {
    # 'sb.bin': 'Stereotypical bias',     # TODO add to shared calculations
    'acc_equality_diff.bin': 'Accuracy equality difference',
    'acc_equality_ratio.bin': 'Accuracy equality ratio',
    'disp_impact.bin': 'Disparate impact',
    'equal_opp_diff.bin': 'Equal opportunity difference',
    'equal_opp_ratio.bin': 'Equal opportunity ratio',
    'pred_equality_diff.bin': 'Predictive equality difference',
    'pred_equality_ratio.bin': 'Predictive equality ratio',
    'stat_parity.bin': 'Statistical parity',
    'neg_pred_parity_diff.bin': 'Negative predictive parity difference',
    'neg_pred_parity_ratio.bin': 'Negative predictive parity ratio',
    'pos_pred_parity_diff.bin': 'Positive predictive parity difference',
    'pos_pred_parity_ratio.bin': 'Positive predictive parity ratio',
}

In [3]:
with open(path.join(calculations_dir, 'gr.bin'), 'rb') as f:
    gr = pd.DataFrame(np.fromfile(f).astype(np.float16), columns=['gr'])

with open(path.join(calculations_dir, 'ir.bin'), 'rb') as f:
    ir = pd.DataFrame(np.fromfile(f).astype(np.float16), columns=['ir'])

## v1

In [14]:
metric_f, metric_n = list(metrics.items())[0]

with open(path.join(calculations_dir, metric_f), 'rb') as f:
    metric = pd.DataFrame(np.fromfile(f).astype(np.float16), columns=[metric_n])

df = pd.concat([gr, ir, metric], axis=1)
print(df.head())
print("===")

df = df.melt(id_vars=['gr', 'ir'], var_name='metric', value_name='value').drop('metric', axis=1)
print(df.head(20))
print("===")

df = df.pivot_table(index='gr', columns='ir', values='value', aggfunc=np.nanmean)
print(df.head())

         gr        ir  Accuracy equality difference
0  0.000000  1.000000                           NaN
1  0.000000  0.958496                           NaN
2  0.000000  0.958496                           NaN
3  0.000000  1.000000                           NaN
4  0.041656  1.000000                           0.0
===
          gr        ir     value
0   0.000000  1.000000       NaN
1   0.000000  0.958496       NaN
2   0.000000  0.958496       NaN
3   0.000000  1.000000       NaN
4   0.041656  1.000000  0.000000
5   0.041656  0.958496 -1.000000
6   0.041656  0.958496  0.000000
7   0.041656  1.000000 -1.000000
8   0.000000  0.916504       NaN
9   0.000000  0.916504       NaN
10  0.000000  0.958496       NaN
11  0.041656  0.958496  0.043488
12  0.041656  0.916504 -0.956543
13  0.041656  0.916504  0.043488
14  0.041656  0.958496 -0.956543
15  0.000000  0.916504       NaN
16  0.000000  0.958496       NaN
17  0.041656  0.958496  0.000000
18  0.041656  0.916504 -1.000000
19  0.041656  0.916504  

In [None]:
for metric_f, metric_n in metrics.items():
    with open(path.join(calculations_dir, metric_f), 'rb') as f:
        metric = pd.DataFrame(np.fromfile(f).astype(np.float16), columns=[metric_n])

    df = pd.concat([gr, ir, metric], axis=1).replace(np.inf, np.nan).dropna(axis=0, how='any')

    df = df.melt(id_vars=['gr', 'ir'], var_name='metric', value_name='value').drop('metric', axis=1)
    df = df.pivot_table(index='gr', columns='ir', values='value')

    plt.figure(figsize=(8, 6))
    sns.heatmap(
        df,
        cmap='coolwarm',
        center=0,
        square=True,
        xticklabels=[f'{x:.4f}' for x in df.columns],
        yticklabels=[f'{y:.4f}' for y in df.index],
    )
    plt.title(f'{metric_n} - mean')

    # invert y
    ax = plt.gca()
    ax.invert_yaxis()

    plt.tight_layout()
    plt.savefig(path.join(plots_dir, f'{metric_n}_hm_v1.png'))

    # garbage collection
    del df
    del metric
    gc.collect()
    plt.close()