In [None]:
import numpy as np
import attacks
import matplotlib.pyplot as plt
import os
from attacks import calculate_synthetic_random_baseline
import torch
from utils import Timer
import pickle
import pandas as pd

## General results on FedSGD with known label (top halves of table 1 in the main paper, and tables 17-19 in the Appendix)

In [None]:
dataset_names = ['ADULT', 'German', 'Lawschool', 'HealthHeritage']  # set to include only the datasets on which you have already obtained the data

In [None]:
experiments = {
    46: 'TabLeak',
    47: 'TabLeak (no pooling)',
    4103: 'TabLeak (no softmax)',
    0: 'Inverting Gradients',
    1000: 'Deep Gradient Leakage'
}

In [None]:
batch_sizes = [1, 2, 4, 8, 16, 32, 64, 128]
display_map = {
    'mean': 0,
    'std': 1,
    'median': 2,
    'min': 3,
    'max': 4
}
training_epochs = {
    0: 0
}

show_all_features = False
display = 'mean'
training_epochs_display = [0]
training_epochs_to_display = {key: training_epochs[key] for key in training_epochs_display}

In [None]:
dataframes_per_dataset = {}

for dataset_name in dataset_names:
    
    # load random baseline
    random_baseline = np.load(f'experiment_data/initial_experiments/random_inversion/{dataset_name}/random_baseline_{batch_sizes[-1]}_0.319_all_empirical.npy')
    
    # load all available experiments
    experiments_data = {}
    for experiment_num in experiments.keys():
        base_path = f'experiment_data/large_scale_experiments/{dataset_name}/experiment_{experiment_num}'
        if experiment_num in [46, 4103]:
            specific_file_path = base_path + f'/inversion_data_all_{experiment_num}_{dataset_name}_50_30_1500_{batch_sizes[-1]}_0.319_42.npy'
        else:
            specific_file_path = base_path + f'/inversion_data_all_{experiment_num}_{dataset_name}_50_1_1500_{batch_sizes[-1]}_0.319_42.npy'
        if os.path.isfile(specific_file_path):
            experiments_data[experiment_num] = np.load(specific_file_path).astype(np.float32)
    
    dataframe_of_dataset = pd.DataFrame()
    dataframe_of_dataset['Batch Size'] = batch_sizes
    for experiment_num, experiment_data in experiments_data.items():
        formatted_experiment_col = []
        random_baseline_col = []
        for l, batch_size in enumerate(batch_sizes):
            formatted_experiment_col.append((np.around(100-100*experiments_data[experiment_num][0, l, 0, display_map[display]], 1), np.around(100*experiments_data[experiment_num][0, l, 0, 1], 1)))
            random_baseline_col.append((np.around(100 - 100*random_baseline[l, 0, display_map[display]], 1), np.around(100*random_baseline[l, 0, 1], 1)))
        dataframe_of_dataset[experiments[experiment_num]] = formatted_experiment_col
    dataframe_of_dataset['Random Baseline'] = random_baseline_col
    dataframes_per_dataset[dataset_name] = dataframe_of_dataset

### Adult

In [None]:
dataframes_per_dataset['ADULT']

### German

In [None]:
dataframes_per_dataset['German']

### Lawschool

In [None]:
dataframes_per_dataset['Lawschool']

### Health Heritage

In [None]:
dataframes_per_dataset['HealthHeritage']

## General results on FedSGD with unknown label (bottom halves of table 1 in the main paper, and tables 17-19 in the Appendix)

In [None]:
dataset_names = ['ADULT', 'German', 'Lawschool', 'HealthHeritage']  # set to include only the datasets on which you have already obtained the data

In [None]:
experiments = {
    946: 'TabLeak',
    947: 'TabLeak (no pooling)',
    94103: 'TabLeak (no softmax)',
    90: 'Inverting Gradients',
    91000: 'Deep Gradient Leakage'
}

In [None]:
batch_sizes = [1, 2, 4, 8, 16, 32, 64, 128]
display_map = {
    'mean': 0,
    'std': 1,
    'median': 2,
    'min': 3,
    'max': 4
}
training_epochs = {
    0: 0
}

show_all_features = False
display = 'mean'
training_epochs_display = [0]
training_epochs_to_display = {key: training_epochs[key] for key in training_epochs_display}

In [None]:
dataframes_per_dataset = {}

for dataset_name in dataset_names:
    
    # load random baseline
    random_baseline = np.load(f'experiment_data/initial_experiments/random_inversion/{dataset_name}/random_baseline_{batch_sizes[-1]}_0.319_all_empirical.npy')
    
    # load all available experiments
    experiments_data = {}
    for experiment_num in experiments.keys():
        base_path = f'experiment_data/large_scale_experiments/{dataset_name}/experiment_{experiment_num}'
        if experiment_num in [946, 94103]:
            specific_file_path = base_path + f'/inversion_data_all_{experiment_num}_{dataset_name}_50_30_1500_{batch_sizes[-1]}_0.319_42.npy'
        else:
            specific_file_path = base_path + f'/inversion_data_all_{experiment_num}_{dataset_name}_50_1_1500_{batch_sizes[-1]}_0.319_42.npy'
        if os.path.isfile(specific_file_path):
            experiments_data[experiment_num] = np.load(specific_file_path).astype(np.float32)
    
    dataframe_of_dataset = pd.DataFrame()
    dataframe_of_dataset['Batch Size'] = batch_sizes
    for experiment_num, experiment_data in experiments_data.items():
        formatted_experiment_col = []
        random_baseline_col = []
        for l, batch_size in enumerate(batch_sizes):
            formatted_experiment_col.append((np.around(100-100*experiments_data[experiment_num][0, l, 0, display_map[display]], 1), np.around(100*experiments_data[experiment_num][0, l, 0, 1], 1)))
            random_baseline_col.append((np.around(100 - 100*random_baseline[l, 0, display_map[display]], 1), np.around(100*random_baseline[l, 0, 1], 1)))
        dataframe_of_dataset[experiments[experiment_num]] = formatted_experiment_col
    dataframe_of_dataset['Random Baseline'] = random_baseline_col
    dataframes_per_dataset[dataset_name] = dataframe_of_dataset

### Adult

In [None]:
dataframes_per_dataset['ADULT']

### German

In [None]:
dataframes_per_dataset['German']

### Lawschool

In [None]:
dataframes_per_dataset['Lawschool']

### Health Heritage

In [None]:
dataframes_per_dataset['HealthHeritage']

## Categorical vs. Continous feature reconstruction errors (Figures 4 and 12)

In [None]:
experiments = {
    0: 'Inverting Gradients, \n',
    46: 'TabLeak, \n'
}

In [None]:
batch_sizes = [1, 2, 4, 8, 16, 32, 64, 128]
display_map = {
    'mean': 0,
    'std': 1,
    'median': 2,
    'min': 3,
    'max': 4
}
training_epochs = {
    0: 0,
    100: 1
}

show_all_features = False
display = 'mean'
training_epochs_display = [0]
training_epochs_to_display = {key: training_epochs[key] for key in training_epochs_display}

In [None]:
experiment_colors = {
    0: 'cornflowerblue',
    46: 'indigo'
}

In [None]:
for dataset_name in dataset_names:
    error_types = ['disc', 'cont']
    
    # load all available experiments
    experiments_data = {}
    for experiment_num in experiments.keys():
        base_path = f'experiment_data/large_scale_experiments/{dataset_name}/experiment_{experiment_num}'
        if experiment_num == 46:
            specific_file_path = base_path + f'/inversion_data_all_{experiment_num}_{dataset_name}_50_30_1500_{batch_sizes[-1]}_0.319_42.npy'
        else:
            specific_file_path = base_path + f'/inversion_data_all_{experiment_num}_{dataset_name}_50_1_1500_{batch_sizes[-1]}_0.319_42.npy'
        if os.path.isfile(specific_file_path):
            experiments_data[experiment_num] = np.load(specific_file_path).astype(np.float32)
            
    plt.figure(figsize=(11, 10))
    ax = plt.axes()
    ax.set_facecolor('white')
    ax.tick_params(axis=u'both', which=u'both',length=0)
    for i, error_type in enumerate(error_types):
        i = i + 1
        
        for experiment_num, experiment_data in experiments_data.items():
            for epoch_num, epoch_indx in training_epochs_to_display.items():
                if error_type == 'disc':
                    label_extension = 'discrete'
                else:
                    label_extension = 'continuous'
                plt.plot(batch_sizes, 1 - experiments_data[experiment_num][epoch_indx, :, i, display_map[display]], '--', marker=f'${error_type[0].capitalize()}$', c=experiment_colors[experiment_num], markersize=20, label=experiments[experiment_num] + label_extension)
                if display in ['mean', 'median']:
                    plt.fill_between(batch_sizes, 1 - experiments_data[experiment_num][epoch_indx, :, i, display_map[display]] - experiments_data[experiment_num][epoch_indx, :, i, 1], 
                                     np.minimum(1 - experiments_data[experiment_num][epoch_indx, :, i, display_map[display]] + experiments_data[experiment_num][epoch_indx, :, i, 1], np.ones(len(batch_sizes))), 
                                     color=experiment_colors[experiment_num], alpha=0.05)
    plt.xscale('log')
    plt.xlabel('Batchsize (log scale)', fontsize=30, labelpad=15)
    plt.xticks(batch_sizes, batch_sizes, fontsize=30)
    plt.ylabel('Reconstruction Accuracy [%]', fontsize=30, labelpad=15)
    plt.yticks(0.2 + 0.1 * np.arange(9), 20 + 10 * np.arange(9), fontsize=30)
    loc = 'lower right' if error_type != 'cont' else 'upper right'
    ax.legend(fancybox=True, fontsize=30, loc=(.03, .04), framealpha=0.5)
    plt.title(f'{dataset_name}', fontsize=30)
    plt.grid(True, alpha=.3)
    plt.box(False)
    plt.ylim([0.18, 1.02])
    plt.tight_layout()
    
    plt.show()
    print('\n', '\n', '\n')