In [1]:
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import seml.database as db_utils
from pathlib import Path


from itertools import product


import pandas as pd

import os

import sys
sys.path.append('../../../..')
from utils import load_results, merge_guarantees

import pickle

In [2]:
collection = 'group_amplification_neurips24_adp'


jk_config = {
    'username': 'YOURUSERNAME',
    'password': 'YOURPASSWORD',
    'host': 'YOURDATABASEHOST',
    'port': 27017,
    'db_name': 'YOURDATABASENAME'
}

col = db_utils.get_collection(collection, mongodb_config=jk_config)

In [3]:
def get_experiments(col, restrictions={}):
    
    restrictions['status'] = 'COMPLETED'

    if col.count_documents(restrictions) == 0:
        raise ValueError('No matches!')

    exps = col.find(restrictions, {'config':1, 'result': 1, '_id': 1})
    
    return exps

In [4]:
def get_dp_guarantees(save_file):
    with open(save_file, 'rb') as f:
        results = pickle.load(f)

    return {
        'epsilons': np.array(results['epsilons']),
        'deltas': np.minimum(np.array(results['deltas']), 1)
    }

In [5]:
def generate_exp_result_dict(exp):

    result_dict = {}

    
    result_dict['space'] = exp['config']['epsilons']['space']
    result_dict['start'] = exp['config']['epsilons']['params']['start']

    result_dict['true_response_prob'] = exp['config']['base_mechanism']['params']['true_response_prob']
    result_dict['subsampling_rate'] = exp['config']['amplification']['params']['subsampling_rate']

    result_dict['group_size'] = exp['config']['amplification']['params']['group_size']
    result_dict['insertions'] = exp['config']['amplification']['params']['insertions']

    result_dict['tight'] = bool(exp['config']['amplification']['tight'])
    result_dict['eval_method'] = exp['config']['amplification']['params']['eval_method']

    save_file = exp['result']['save_file']

    result_dict['raw_results_file'] = save_file

    dp_dict = get_dp_guarantees(result_dict['raw_results_file'])

    result_dict.update(dp_dict)

    return result_dict

In [None]:
experiments = get_experiments(col, {'config.amplification.subsampling_scheme': 'poisson',
                                    'config.base_mechanism.name': 'randomizedresponse',
                                    #'config.epsilons.space': {'$in': ['linear_continuous']},
                                    })
results = load_results(
            generate_exp_result_dict,
            experiments,
            results_file='./raw_data_randomized_response',
            overwrite=False
            )

results = results.loc[results['eval_method'].isin(['improved', 'quadrature'])]

results = results.loc[results['group_size'].isin([1, 2, 4, 8, 16])]
#results = results.loc[results['subsampling_rate'].isin([0.2, 0.1, 0.001])]


In [None]:
results.loc[(results['true_response_prob'] == 0.75)
            & (results['subsampling_rate'] == 0.5)
            & (results['group_size'] == 2)]

In [8]:
method_label_map = {
        'improved': 'Post-hoc',
        'quadrature': 'Specific',
    }

In [9]:
def prepare_plot_dict(data):

    plot_dict = {}

    for i, (index, row) in enumerate(data.iterrows()):
        epsilons, deltas, eval_method, group_size = row.loc[['epsilons', 'deltas', 'eval_method', 'group_size']]

        assert eval_method in ['improved', 'quadrature']

        if eval_method == 'improved':
            deltas = np.minimum.accumulate(deltas)  # Hockeystick is non-increasing. Helps baseline

        if eval_method not in plot_dict:
            plot_dict[eval_method] = {
                group_size: (epsilons, deltas),
                'label': method_label_map[eval_method]
            }
        
        elif group_size not in plot_dict[eval_method]:

            plot_dict[eval_method][group_size] = epsilons, deltas

        else:
            old_epsilons, old_deltas = plot_dict[eval_method][group_size]
            merged_epsilons, merged_deltas = merge_guarantees(old_epsilons, epsilons,
                                                              old_deltas, deltas,
                                                              max)

            plot_dict[eval_method][group_size] = merged_epsilons, merged_deltas

    return plot_dict

In [10]:
def plot_plot_dict(plot_dict, draw_legend_group_size=False, draw_legend_method=False, width=0.49, xlim=None):
    sns.set_theme()

    fig, ax = plt.subplots()

    pal = sns.color_palette('colorblind', 5)[::-1]

    for i, (eval_method, eval_method_dict) in list(enumerate(plot_dict.items())):
        group_sizes = np.sort([k for k in eval_method_dict if not isinstance(k, str)])

        for j, group_size in enumerate(group_sizes[::-1]):

            epsilons, deltas = eval_method_dict[group_size]

            prob_label = group_size if eval_method == 'quadrature' else None

            linestyle = 'solid' if eval_method in ['quadrature'] else 'dashed'

            ax.plot(epsilons, deltas, label=prob_label, c=pal[int(np.log2(group_size)) - 1], linestyle=linestyle, zorder=3)

    #ax.set_xscale('log')
    #ax.set_yscale('log')

    if xlim is not None:
        ax.set_xlim(*xlim)

    ax.set_ylim(bottom=0)

    ax.tick_params('both', which='major', length=2.5, width=0.75)
    ax.tick_params('both', which='minor', length=1.5, width=0.75, left=False)

    ax.set_xlabel('ADP $\\varepsilon$', fontsize=9)
    ax.set_ylabel('ADP $\\delta(\\varepsilon)$', fontsize=9)

    if draw_legend_group_size:
        legend_group_size = ax.legend(loc='lower right', title='Group size', title_fontsize=9)

    if draw_legend_method:
        handles_ls = []
        handles_ls.append(ax.plot([], [], c='black', ls='dashed')[0])
        handles_ls.append(ax.plot([], [], c='black', ls='solid')[0])

        legend_method = ax.legend(handles_ls, list(method_label_map.values()), loc=('upper left' if True else 'lower right'))

        if draw_legend_group_size:
            ax.add_artist(legend_group_size)

In [11]:
xlim_dict = {
    (0.1, 0.7): [0, 2],
    (0.1, 0.8): [0, 6],
    (0.1, 0.9): [0, 6],
    (0.2, 0.7): [0, 2],
    (0.2, 0.8): [0, 4],
    (0.2, 0.9): [0, 10],
    (0.5, 0.7): [0, 6],
    (0.5, 0.8): [0, 10],
    (0.5, 0.9): [0, 10],
}

In [12]:
save_dir = '/ceph/hdd/staff/schuchaj/group_amplification_plots/neurips24/adp/poisson/specific_vs_posthoc/randomized_response/half_page/both_legends'

for x in results.groupby(['subsampling_rate', 'true_response_prob']):
    
    subsampling_rate, true_response_prob = x[0]

    if (subsampling_rate, true_response_prob) not in xlim_dict:
        continue

    plot_dict = prepare_plot_dict(x[1])

    #print(subsampling_rate, standard_deviation)
    #print(plot_dict)
    xlim = xlim_dict[(subsampling_rate, true_response_prob)]
    #xlim = None

    draw_legend_group_size = True
    draw_legend_method = True
    
    plot_plot_dict(plot_dict, draw_legend_group_size=True, draw_legend_method=True, width=0.49, xlim=xlim)
    plt.savefig(f'{save_dir}/{subsampling_rate}_{true_response_prob}.png', dpi=256)
    plt.close()


In [14]:
save_dir = '/ceph/hdd/staff/schuchaj/group_amplification_plots/neurips24/adp/poisson/specific_vs_posthoc/randomized_response/half_page/no_legend'

for x in results.groupby(['subsampling_rate', 'true_response_prob']):
    
    subsampling_rate, true_response_prob = x[0]

    if (subsampling_rate, true_response_prob) not in xlim_dict:
        continue

    plot_dict = prepare_plot_dict(x[1])

    #print(subsampling_rate, true_response_prob)
    #print(plot_dict)
    xlim = xlim_dict[(subsampling_rate, true_response_prob)]
    #xlim = None

    draw_legend_group_size = False
    draw_legend_method = False
    
    plot_plot_dict(plot_dict, draw_legend_group_size=draw_legend_group_size, draw_legend_method=draw_legend_method, width=0.49, xlim=xlim)
    plt.savefig(f'{save_dir}/{subsampling_rate}_{true_response_prob}.png', dpi=256)
    plt.close()
