In [1]:
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import seml.database as db_utils
from pathlib import Path


from itertools import product


import pandas as pd

import os

import sys
sys.path.append('../../../..')
from utils import load_results, merge_guarantees


import pickle

In [2]:
collection = 'group_amplification_neurips24_pld'


jk_config = {
    'username': 'YOURUSERNAME',
    'password': 'YOURPASSWORD',
    'host': 'YOURDATABASEHOST',
    'port': 27017,
    'db_name': 'YOURDATABASENAME'
}

col = db_utils.get_collection(collection, mongodb_config=jk_config)

In [3]:
def get_experiments(col, restrictions={}):
    
    restrictions['status'] = 'COMPLETED'

    if col.count_documents(restrictions) == 0:
        raise ValueError('No matches!')

    exps = col.find(restrictions, {'config':1, 'result': 1, '_id': 1})
    
    return exps

In [4]:
def get_dp_guarantees(save_file):
    with open(save_file, 'rb') as f:
        results = pickle.load(f)

    epsilons = np.array(results['epsilons'])
    deltas = np.vstack(list(results['results'].values()))
    deltas = np.minimum(deltas, 1)
        
    return {
        'epsilons': epsilons,
        'deltas': deltas
    }

In [5]:
def generate_exp_result_dict(exp):

    result_dict = {}

    
    result_dict['space'] = exp['config']['epsilons']['space']
    result_dict['start'] = exp['config']['epsilons']['params']['start']
    result_dict['stop'] = exp['config']['epsilons']['params']['stop']

    result_dict['standard_deviation'] = exp['config']['base_mechanism']['params']['standard_deviation']
    result_dict['subsampling_rate'] = exp['config']['amplification']['params']['subsampling_rate']

    result_dict['group_size'] = exp['config']['amplification']['params']['group_size']
    result_dict['insertions'] = exp['config']['amplification']['params']['insertions']

    result_dict['tight'] = bool(exp['config']['amplification']['tight'])

    result_dict['eval_method'] = 'specific' if result_dict['tight'] else 'posthoc'

    save_file = exp['result']['save_file']

    result_dict['raw_results_file'] = save_file

    dp_dict = get_dp_guarantees(result_dict['raw_results_file'])

    result_dict.update(dp_dict)

    return result_dict

In [None]:
experiments = get_experiments(col, {'config.amplification.subsampling_scheme': 'poisson',
                                    'config.base_mechanism.name': 'gaussian',
                                    #'config.epsilons.space': {'$in': ['linear_continuous']},
                                    })
results = load_results(
            generate_exp_result_dict,
            experiments,
            results_file='./raw_data_gaussian',
            overwrite=False
            )

results = results.loc[results['group_size'].isin([1, 2, 4, 8, 16])]
results = results.loc[results['standard_deviation'].isin([1.0, 5.0])]
results = results.loc[results['subsampling_rate'].isin([0.01, 0.001])]


#results = results.loc[results['subsampling_rate'].isin([0.2, 0.1, 0.001])]


In [None]:
results.loc[(results['standard_deviation'] == 5)
            & (results['subsampling_rate'] == 0.001)
            & (results['group_size'] == 2)]

In [8]:
method_label_map = {
        'posthoc': 'Post-hoc',
        'specific': 'Specific',
    }

In [9]:
def prepare_plot_dict(data, epsilon):

    plot_dict = {}

    for i, (index, row) in enumerate(data.iterrows()):
        epsilons, deltas, eval_method, group_size = row.loc[['epsilons', 'deltas', 'eval_method', 'group_size']]

        if epsilon not in epsilons:
            continue

        deltas = deltas[epsilons == epsilon][0]
        iterations = np.arange(len(deltas)) + 1

        assert eval_method in ['posthoc', 'specific']

        if eval_method not in plot_dict:
            plot_dict[eval_method] = {
                group_size: (iterations, deltas),
                'label': method_label_map[eval_method]
            }
        
        elif group_size not in plot_dict[eval_method]:

            plot_dict[eval_method][group_size] = iterations, deltas

        else:
            old_iterations, old_deltas = plot_dict[eval_method][group_size]
            merged_iterations, merged_deltas = merge_guarantees(old_iterations, iterations,
                                                              old_deltas, deltas,
                                                              max)

            plot_dict[eval_method][group_size] = merged_iterations, merged_deltas

    return plot_dict

In [10]:
def plot_plot_dict(plot_dict, draw_legend_group_size=False, draw_legend_method=False, width=0.49, xlim=None):
    sns.set_theme()

    fig, ax = plt.subplots()

    pal = sns.color_palette('colorblind', 5)[::-1]

    for i, (eval_method, eval_method_dict) in list(enumerate(plot_dict.items())):
        group_sizes = np.sort([k for k in eval_method_dict if not isinstance(k, str)])

        for j, group_size in enumerate(group_sizes[::-1]):

            epsilons, deltas = eval_method_dict[group_size]

            prob_label = group_size if eval_method == 'specific' else None

            linestyle = 'solid' if eval_method in ['specific'] else 'dashed'

            ax.plot(epsilons, deltas, label=prob_label, c=pal[int(np.log2(group_size)) - 1], linestyle=linestyle, zorder=(3 if eval_method == 'specific' else 4))

    #ax.set_xscale('log')
    ax.set_yscale('log')

    if xlim is not None:
        ax.set_xlim(*xlim)

    #ax.set_ylim(bottom=0)

    ax.tick_params('both', which='major', length=2.5, width=0.75)
    ax.tick_params('both', which='minor', length=1.5, width=0.75, left=False)

    ax.set_xlabel('Iteration $t$', fontsize=9)
    ax.set_ylabel('ADP $\\delta(\\varepsilon)$', fontsize=9)

    if draw_legend_group_size:
        legend_group_size = ax.legend(loc='lower right', title='Group size', title_fontsize=9)

    if draw_legend_method:
        handles_ls = []
        handles_ls.append(ax.plot([], [], c='black', ls='dashed')[0])
        handles_ls.append(ax.plot([], [], c='black', ls='solid')[0])

        legend_method = ax.legend(handles_ls, list(method_label_map.values()), loc=('lower left' if True else 'lower right'))

        if draw_legend_group_size:
            ax.add_artist(legend_group_size)

In [None]:
save_dir = '/ceph/hdd/staff/schuchaj/group_amplification_plots/neurips24/pld/poisson/specific_vs_posthoc/gaussian/half_page/both_legends'

for epsilon in [0.5, 1.0, 2.0]:
    
    for x in results.groupby(['subsampling_rate', 'standard_deviation']):
        
        subsampling_rate, standard_deviation = x[0]

        plot_dict = prepare_plot_dict(x[1], epsilon)

        draw_legend_group_size = True
        draw_legend_method = True
        
        plot_plot_dict(plot_dict, draw_legend_group_size=True, draw_legend_method=True, width=0.49, xlim=None)
        plt.savefig(f'{save_dir}/{epsilon}_{subsampling_rate}_{standard_deviation}.png', dpi=256)
        #plt.close()

In [13]:
save_dir = '/ceph/hdd/staff/schuchaj/group_amplification_plots/neurips24/pld/poisson/specific_vs_posthoc/gaussian/half_page/no_legend'

for epsilon in [0.5, 1.0, 2.0]:

    for x in results.groupby(['subsampling_rate', 'standard_deviation']):
        
        subsampling_rate, standard_deviation = x[0]


        plot_dict = prepare_plot_dict(x[1], epsilon)


        draw_legend_group_size = False
        draw_legend_method = False
        
        plot_plot_dict(plot_dict, draw_legend_group_size=draw_legend_group_size, draw_legend_method=draw_legend_method, width=0.49, xlim=None)
        plt.savefig(f'{save_dir}/{epsilon}_{subsampling_rate}_{standard_deviation}.png', dpi=256)
        plt.close()
