This notebook is used to generate figures shown in the paper given the results

In [9]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from glob import glob
import re
import os

In [10]:
save_folder_path = 'figures/'
repo_path = '../'

In [11]:
# Initialize figure settings
SMALL_SIZE = 22
MEDIUM_SIZE = SMALL_SIZE + 2
BIGGER_SIZE = SMALL_SIZE + 5

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

rc_fonts = {
    "text.usetex": True,
    'text.latex.preamble':
        r"""
        \usepackage{amsmath}
        \usepackage{libertine}
        \usepackage[libertine]{newtxmath}
        """,
}

plt.rcParams.update(rc_fonts)

# Read Results Functions

In [12]:
def get_delta(df, index, tau=False):
    for col in ['accuracy', 'coverage', 'privacy cost', 'max fairness gap']:
        if col in ['accuracy', 'coverage']:
            df[f'delta_{col}'] = df[col] - df[col].loc[index]
        elif col == 'privacy cost':
            df[f'delta_{col}'] = df[col] - df['epsilon'].loc[index]
        else:
            if tau:
                df[f'delta_{col}'] = df[col] - df['max fairness gap'].loc[index]
            else:
                df[f'delta_{col}'] = df[col] - df['gamma'].loc[index]
    return df

In [13]:
def read_all(folder, two_datasets=False, tau = False):
    dfs = []
    for path in filter(lambda _str: re.match(r'\d*', _str.split("/")[-1]), glob(repo_path+f"results/{folder}/*")):
        # C_priv, C_fair = path.split("/")[-1].split("_")[-2:]
        df = pd.read_parquet(f"{path}/df.parquet.gzip")
        df = df[df['agent'] != 'calibration'].copy()
        # df = df.assign(C_fair=float(C_fair), C_priv=float(C_priv))
        if two_datasets:
            df1 = df[df['dataset'] == df['dataset'].loc[0]].copy()
            df2 = df[df['dataset'] == df['dataset'].loc[1]].copy()
            df = pd.concat([get_delta(df1, 0, tau), get_delta(df2, 1, tau)])
        else:
            df = get_delta(df, 0, tau)
        
        dfs.append(df)
    dfs = pd.concat(dfs)
    
    return dfs

In [14]:
def read_all_updated_C(folder):
    dfs = []
    for path in filter(lambda _str: re.match(r'\d_.*', _str.split("/")[-1]), glob(repo_path+f"results/{folder}/*")):
        path_2 = path.replace('round_1', 'round_2')
        path_3 = path.replace('round_1', 'round_3')
        df1 = pd.read_parquet(f"{path}/df.parquet.gzip")
        df1["experiment"] = '$C_{{fair}}$ = 1.0, $C_{{priv}}$ = 1.0'
        df1 = df1[df1['agent'] != 'calibration']
        df2 = pd.read_parquet(f"{path_2}/df.parquet.gzip")
        df2["experiment"] = '$C_{{fair}}$ = 3.0, $C_{{priv}}$ = 3.0'
        df2["round"] = df2["round"]  + 20
        df2 = df2[df2['agent'] != 'calibration']
        df = pd.concat([df1, df2]).reset_index(drop=True)
        if os.path.exists(f"{path_3}/df.parquet.gzip"):
            df3 = pd.read_parquet(f"{path_3}/df.parquet.gzip")
            df3["experiment"] = '$C_{{fair}}$ = 3.0, $C_{{priv}}$ = 4.5'
            df3["round"] = df3["round"]  + 40
            df3 = df3[df3['agent'] != 'calibration']
            df = pd.concat([df, df3]).reset_index(drop=True)

        df = get_delta(df, 0)
        
        dfs.append(df)
    dfs = pd.concat(dfs)
    
    return dfs

In [15]:
def read_all_comparison(folder):
    dfs = []
    for path in filter(lambda _str: re.match(r'\d_.*', _str.split("/")[-1]), glob(repo_path+f"results/{folder}/*")):
        path_2 = path.replace('regulator', 'builder')
        df1 = pd.read_parquet(f"{path}/df.parquet.gzip")
        df2 = pd.read_parquet(f"{path_2}/df.parquet.gzip")
        # df = df.assign(C_fair=float(C_fair), C_priv=float(C_priv))
        df1['coverage'] = df1['coverage'] * 100
        df2['coverage'] = df2['coverage'] * 100
        for col in ['accuracy', 'coverage', 'privacy cost', 'max fairness gap']:
            df1[f'delta_{col}'] = df2[col] - df1[col]
            df1[f'delta_relative_{col}'] = (df2[col] - df1[col]) / df1[col]
        df = df1[df1['agent'] != 'calibration']
        dfs.append(df)
    dfs = pd.concat(dfs)
    return dfs

# Plotting Functions

In [24]:
def plot_paper_twin_axis(results_df, save_folder_path, filename, fairpd=False, y_lim=1.6):
    '''
        Plot acc or cov or eps or lambda for paper figures on twin axis
    '''
    # we are only plotting the agent moves
    results_df = results_df[results_df['agent'] != 'calibration']
    
    fig, ax1 = plt.subplots(figsize=(6, 4))
    fig.tight_layout()

    color = 'tab:orange'
    ax1.set_ylabel("Privacy Budget\n Violation $\\varepsilon - \\varepsilon_\\text{reg}$", color=color)
    # ax1.set_title("$\\text{Specification Violations } \\boldsymbol{s} - \\boldsymbol{s}_\\text{reg}$", color=color)
    ax1 = sns.lineplot(data=results_df, x="round", y="delta_privacy cost", color=color)
    ax1.tick_params(axis='y', labelcolor=color)
    ax1.set_ylim(0, y_lim)

    if not fairpd:
        ax2 = ax1.twinx()
        color = 'tab:blue'
        # ax2.set_ylabel("$\gamma - \gamma_{reg}$", color=color)
        ax2.set_ylabel("Dem. Disparity\nViolation $\\gamma - \\gamma_\\text{reg}$", color=color)
        # ax2.set_title("$\\text{Specification Violations } \\boldsymbol{s} - \\boldsymbol{s}_\\text{reg}$", color=color)
        ax2 = sns.lineplot(data=results_df, x="round", y="delta_max fairness gap", color=color)
        ax2.tick_params(axis='y', labelcolor=color, color=color)
        # ax1.axhline(0, ls='--')
        ax2.set_ylim(0, .11)
    else:
        ax2 = ax1.twinx()
        color = 'tab:blue'
        # ax2.set_ylabel("$\gamma - \gamma_{reg}$", color=color)
        ax2.set_ylabel("Disp. Impact\nViolation $\\tau - \\tau_\\text{reg}$", color=color)
        # ax2.set_title("$\\text{Specification Violations } \\boldsymbol{s} - \\boldsymbol{s}_\\text{reg}$", color=color)
        ax2 = sns.lineplot(data=results_df, x="round", y="delta_max fairness gap", color=color)
        ax2.tick_params(axis='y', labelcolor=color, color=color)
        
    
    fig.tight_layout()    
    # ax2.axhline(0, ls='--', color='tab:orange')

    
            
    plt.savefig(save_folder_path+filename, bbox_inches="tight")
    plt.show()

In [17]:
def plot_paper_separate(results_df, save_folder_path, filename, options, agents, use_relative=False):
    '''
        Plot acc or cov or eps or lambda for paper figures
        :param: save_folder_path: the save path of the figure pdf
        :param: options: 
                1. two datasets: regulators and builder use two different datasets
                2. two experiment: plotting values from two experiments in one graph for comparison
                3. strategy change: games where C values change half way
                4. else: plotting values from one
        :param: agents:
                1. regulators: plotting eps and lambda
                2. builder: plotting acc and cov
                3. all
        :param: filename: name of the save file
        :param: num_ex: number of experiments in the plots
    '''
    titles = ["Privacy Budget Violation\n$\\varepsilon - \\varepsilon_\\text{reg}$", "Demographic Disparity Violation\n$\\gamma - \\gamma_\\text{reg}$", "Accuracy Gains\n$\operatorname{Acc}(\\boldsymbol{s}) - \operatorname{Acc}(\\boldsymbol{s}_\\text{reg})$", "Coverage Gains\n$\operatorname{Covr}(\\boldsymbol{s}) - \operatorname{Covr}(\\boldsymbol{s}_\\text{reg})$"]

    # we are only plotting the agent moves
    results_df = results_df[results_df['agent'] != 'calibration']
    
    plt.tight_layout()
    if options == "two datasets":
        # this is when regulators and builder use different datasets
        hue = "dataset"
        alpha = 0.7
    elif options == 'strategy change':
        hue = "experiment"
        alpha = 1
    else:
        hue = None
        alpha = 1
    
    if agents == "builder":
        fig, axes = plt.subplots(1, 2, figsize=(10, 5), sharex=True)
        fig.tight_layout()
        ax1 = sns.lineplot(data=results_df, x="round", y="delta_accuracy", hue=hue, ax=axes[0], linewidth=2.5, color = 'tab:olive', alpha=alpha)
        if options == "two experiments":
            ax1.set_ylabel("Change in Accuracy")
            # ax1.set_title("Change from Regulator-led\nto Builder-led (p.p.)")
            ax1.set_xlabel(ax1.get_xlabel()+"\n(a)")
        else:
            ax1.set_ylabel("$Acc. - Acc.^{(0)}$")
        
        ax2 = sns.lineplot(data=results_df, x="round", y="delta_coverage", hue=hue, ax=axes[1], linewidth=2.5, color = 'tab:purple', alpha=alpha)
        if options == "two experiments":
            # ax2.set_ylabel("$\\operatorname{Covr}_{1st} - \\operatorname{Covr}_{2nd}$")
            ax2.set_ylabel("Change in Coverage")
            # ax2.set_title("Change from Regulator-led\nto Builder-led (p.p.)")
            ax2.set_xlabel(ax2.get_xlabel()+"\n(b)")
        else:
            ax2.set_ylabel("$Covr. - Covr.^{(0)}$")
        
        # plot the horizontal line that marks the goal params
        # ax1.axhline(0, ls='--', color = 'tab:olive')
        # ax2.axhline(0, ls='--', color = 'tab:purple')
        
    elif agents == "regulators":
        fig, axes = plt.subplots(1, 2, figsize=(10, 5), sharex=True)
        fig.tight_layout()
        if use_relative:
            ax1 = sns.lineplot(data=results_df, x="round", y="delta_relative_max fairness gap", hue=hue, ax=axes[0], linewidth=2.5, alpha=alpha)
        else:
            ax1 = sns.lineplot(data=results_df, x="round", y="delta_max fairness gap", hue=hue, ax=axes[0], linewidth=2.5, alpha=alpha)
        if options == "two experiments":
            # ax1.set_ylabel("$\\frac{\\gamma_{1st} - \\gamma_{2nd}}{\\gamma_{2nd}}$")
            ax1.set_ylabel("$\\text{Change in Disparity } \\gamma$")
            if use_relative:
                ax1.set_title("Relative Change from\nRegulator-led to Builder-led")
                ax1.set_xlabel(ax1.get_xlabel()+"\n(c)")
            else:
                # ax1.set_title("Change from Regulator-led\nto Builder-led")
                ax1.set_xlabel(ax1.get_xlabel()+"\n(c)")
        else:
            # ax1.set_ylabel("$\\gamma - \\gamma_{reg}$")
            ax1.set_title("$\\text{Specification Violations } \\boldsymbol{s} - \\boldsymbol{s}_\\text{reg}$")
            ax1.set_ylabel("$\\text{Disparity } \\gamma$")
            
        if use_relative:
            ax2 = sns.lineplot(data=results_df, x="round", y="delta_relative_privacy cost", hue=hue, ax=axes[1], linewidth=2.5, color = 'tab:orange', alpha=alpha)
        else:
            ax2 = sns.lineplot(data=results_df, x="round", y="delta_privacy cost", hue=hue, ax=axes[1], linewidth=2.5, color = 'tab:orange', alpha=alpha)
        if options == "two experiments":
            # ax2.set_ylabel("$\\frac{\\varepsilon_{reg} - \\varepsilon_{build}}{\\varepsilon_{2nd}}$")
            ax2.set_ylabel("$\\text{Change in Privacy Budget } \\varepsilon$")
            if use_relative:
                ax2.set_title("Relative Change from\nRegulator-led to Builder-led")
                ax2.set_xlabel(ax2.get_xlabel()+"\n(d)")
            else:
                # ax2.set_title("Change from Regulator-led\nto Builder-led")
                ax2.set_xlabel(ax2.get_xlabel()+"\n(d)")
        else:
            ax2.set_title("$\\text{Specification Violations } \\boldsymbol{s} - \\boldsymbol{s}_\\text{reg}$")
            ax2.set_ylabel("$\\text{Privacy Budget } \\varepsilon$")
            # ax2.set_ylabel("$\\varepsilon - \\varepsilon_{reg}$")
        
        # ax1.axhline(0, ls='--')
        # ax2.axhline(0, ls='--', color = 'tab:orange')
    elif agents == "all":
        fig, axes = plt.subplots(1, 4, figsize=(16, 4), sharex=True)
        axes = axes.reshape(2, 2)
        fig.tight_layout()
        ax1 = sns.lineplot(data=results_df, x="round", y="delta_max fairness gap", hue=hue, ax=axes[0, 0], linewidth=2.5, alpha=alpha)
        if options == "two experiments":
            ax1.set_ylabel("$\\gamma_{1st} - \\gamma_{2nd}$")
        else:
            # ax1.set_ylabel("$\\gamma - \\gamma_{reg}$")
            # ax1.set_title("$\\text{Specification Violations } \\boldsymbol{s} - \\boldsymbol{s}_\\text{reg}$")
            # ax1.set_ylabel("$\\text{Disparity } \\gamma$")
            ax1.set_ylabel("")
            ax1.set_title(titles[1])
        ax2 = sns.lineplot(data=results_df, x="round", y="delta_privacy cost", hue=hue, ax=axes[0,1], linewidth=2.5, color = 'tab:orange', alpha=alpha)
        if options == "two experiments":
            ax2.set_ylabel("$\\varepsilon_{1st} - \\varepsilon_{2nd}$")
        else:
            # ax2.set_ylabel("$\\varepsilon - \\varepsilon_{reg}$")
            # ax2.set_title("$\\text{Specification Violations } \\boldsymbol{s} - \\boldsymbol{s}_\\text{reg}$")
            # ax2.set_ylabel("$\\text{Privacy Budget } \\varepsilon$")
            ax2.set_ylabel("")
            ax2.set_title(titles[0])

        
        # ax1.axhline(0, ls='--')
        # ax2.axhline(0, ls='--', color = 'tab:orange')
        
        ax3 = sns.lineplot(data=results_df, x="round", y="delta_accuracy", hue=hue, ax=axes[1, 0], linewidth=2.5, color = 'tab:olive', alpha=alpha)
        if options == "two experiments":
            ax3.set_ylabel("$\\operatorname{Acc}_{1st} - \\operatorname{Acc}_{2nd}$")
        else:
            # ax3.set_ylabel("$\\operatorname{Acc} - \\operatorname{Acc}_{reg}$")
            # ax3.set_title("$Gains from Specification Violations$")
            # ax3.set_ylabel("$Accuracy$")
            ax3.set_ylabel("")
            ax3.set_title(titles[2])
        ax4 = sns.lineplot(data=results_df, x="round", y="delta_coverage", hue=hue, ax=axes[1, 1], linewidth=2.5, color = 'tab:purple', alpha=alpha)
        if options == "two experiments":
            ax4.set_ylabel("$\\operatorname{Covr}_{1st} - \\operatorname{Covr}_{2nd}$")
        else:
            # ax4.set_ylabel("$\\operatorname{Covr} - \\operatorname{Covr}_{reg}$")
            # ax4.set_title("$Gains from Specification Violations$")
            # ax4.set_ylabel("$Coverage$")
            ax4.set_ylabel("")
            ax4.set_title(titles[3])
        
        # plot the horizontal line that marks the goal params
        # ax3.axhline(0, ls='--', color = 'tab:olive')
        # ax4.axhline(0, ls='--', color = 'tab:purple')
        
        ax3.get_legend().remove()
        ax4.get_legend().remove()

    if options == "two datasets" or "strategy change":
        if agents == "all":
            try:
                ax2.get_legend().remove()
                sns.move_legend(
                    ax1, "lower center",
                    bbox_to_anchor=(2, -0.5), ncol=5, columnspacing=1, title=None, frameon=False,
                )
            except ValueError:
                pass
            except AttributeError:
                pass
            plt.subplots_adjust(bottom=0.1, right=1.2, top=0.9)
            # plt.tight_layout()
        else:
            try:
                ax2.get_legend().remove()
                sns.move_legend(
                    ax1, "lower center",
                    bbox_to_anchor=(1, 1), ncol=2, columnspacing=0.8, title=None, frameon=False,
                )
                      
            except ValueError:
                pass
            except AttributeError:
                pass
            # plt.tight_layout()
            plt.subplots_adjust(bottom=0.1, right=1.2, top=0.9)
    else:
        plt.subplots_adjust(bottom=0.1, right=1, top=0.9, left=.9)
    #
    plt.savefig(save_folder_path+filename, bbox_inches="tight")
    plt.show()