In [1]:
import os

from seml import get_results
import matplotlib.pyplot as plt
import torch
import pandas as pd

import seaborn as sns

In [None]:
df = get_results('dp_timeseries_eval_pld_deterministic_vs_random_top_level',
                 fields=['config.privacy_loss_kwargs.num_sequences',
                         'config.privacy_loss_kwargs.top_level_mode',
                         'config.privacy_loss_kwargs.min_sequence_length',
                         'config.privacy_loss_kwargs.batch_size',
                         'result.log_dir'],
            to_data_frame=True,
            filter_dict={
                    'config.privacy_loss_kwargs.bottom_level_mode': 'sampling_with_replacement',
                    'config.privacy_loss_kwargs.future_length': 1
                })

df = df.drop(columns=['_id'])

df

In [3]:
columns = df.columns

df = df.rename(columns={
    c: c.split('.')[-1]
    for c in columns
})

In [4]:
def epsilons_deltas_from_log_dir(df):
    data_dict = {
        'epsilons': [],
        'deltas': []}

    for log_dir in df['log_dir']:
        
        save_dict = torch.load(os.path.join(log_dir, 'config_and_results.pyt'))
        data_dict['epsilons'].append(save_dict['results']['epsilons'])
        data_dict['deltas'].append(save_dict['results']['deltas'])
    
    return pd.DataFrame(data_dict)

In [None]:
df_results = pd.concat((df, epsilons_deltas_from_log_dir(df)), axis=1)
df_results.head()

In [6]:
def plot_tradeoff(df, min_sequence_length, batch_size, xlim=None, max_epoch=10):
    df = df.copy()

    df = df.loc[(df['min_sequence_length'] == min_sequence_length)
                & (df['batch_size'] == batch_size)]

    df = df[['num_sequences', 'top_level_mode', 'epsilons', 'deltas']]
    df = df.sort_values('top_level_mode')

    sns.set_theme()
    pal = sns.color_palette('colorblind', 2)

    fig, ax = mplt.subplots(1, 1)
    
    for _, (num_sequences, top_level_mode, epsilons, deltas) in df.iterrows():
        assert (num_sequences % batch_size) == 0

        steps_per_epoch = num_sequences // batch_size

        if top_level_mode == 'iteration':
            num_epochs = len(deltas)
            assert max_epoch <= num_epochs

            i_first_step = 0
            i_first_epoch = 0
            i_last_epoch = max_epoch - 1
            color = pal[0]
            label_method = 'Deterministic'
        elif top_level_mode == 'sampling_without_replacement':
            assert (len(deltas) % steps_per_epoch) == 0
            num_epochs = len(deltas) // steps_per_epoch
            assert max_epoch <= num_epochs

            i_first_step = 0
            i_first_epoch = steps_per_epoch - 1
            i_last_epoch = steps_per_epoch * max_epoch - 1
            color = pal[1]
            label_method = 'Sampling (WOR)'
        else:
            raise ValueError
        
        if xlim is not None:
            deltas = deltas[:, epsilons <= xlim]
            epsilons = epsilons[epsilons <= xlim]

        ax.plot(epsilons, deltas[i_last_epoch], label=label_method, color=color, linestyle='solid', clip_on=False, zorder=3)
        ax.plot(epsilons, deltas[i_first_epoch], color=color, linestyle='dashed', clip_on=False, zorder=3)
        ax.plot(epsilons, deltas[i_first_step], color=color, linestyle='dotted', clip_on=False, zorder=3)

    ax.set_xscale('log')
    #plt.yscale('log')

    ax.minorticks_off()

    legend_1 = ax.legend(loc='upper right')

    # Separate legend for linestyle
    linestyles = ['solid', 'dashed', 'dotted']
    labels = ['Step $1$', 'Epoch $1$', f'Epoch {max_epoch}'][::-1]
    dummy_lines = []
    for linestyle, label in zip(linestyles, labels):
        dummy_lines.append(ax.plot([],[], c="black", linestyle=linestyle)[0])
    ax.legend(dummy_lines, labels, loc='lower right')

    # Add old legend back
    ax.add_artist(legend_1)

    
    ax.set_xlabel('$\epsilon$')
    ax.set_ylabel('$\delta(\epsilon)$')
    ax.set_ylim(bottom=0)
    if xlim is not None:
        ax.set_xlim(left=epsilons.min(), right=xlim)
    else:
        ax.set_xlim(left=epsilons.min(), right=epsilons.max())

In [None]:
min_sequence_lengths = [4, 4, 20, 20]
batch_sizes = [32, 160, 32, 160]
batch_size = 32

save_dir = '/ceph/hdd/staff/schuchaj/dp_timeseries_plots_camera_icml25/eval_pld_deterministic_vs_random_top_level'

for min_sequence_length, batch_size in zip(min_sequence_lengths, batch_sizes):

    plot_tradeoff(df_results, min_sequence_length, batch_size, xlim=100)


In [9]:
min_sequence_lengths = [20]
batch_sizes = [32]
batch_size = 32

save_dir = '/ceph/hdd/staff/schuchaj/dp_timeseries_plots_camera_icml25/eval_pld_deterministic_vs_random_top_level'

for min_sequence_length, batch_size in zip(min_sequence_lengths, batch_sizes):

    plot_tradeoff(df_results, min_sequence_length, batch_size, xlim=10)
