In [46]:
import numpy as np
import pandas as pd
import random
import timeit
import pickle
import sweep
import utils
import matplotlib.pyplot as plt
import matplotlib as mpl
from scipy.stats import chi2_contingency
from scipy.spatial.distance import hamming

## Initializing parametrs

In [None]:
cms_k = 128
cms_m = 1024
rappor_k = 64
rappor_m = 1024
epsilons = [0.1, 0.25, 0.5, 0.75, 1, 2, 3, 5, 7, 10]
seed = 123456789

## Preprocessing

### Census Dataset

The [Census-Income (KDD) Dataset](https://archive.ics.uci.edu/ml/datasets/Census-Income+%28KDD%29) consists of 41 features, 2 classes and 198,649 entries. We will select the categorical features, as well as the continuous feature "age", which has to be discretized. Based on the values and their frequency, we discretize into into 6 categories. 

The processed dataset consists of 15 features, 2 classes and 198,649 entries.

All of the zeros in the created table are replaced with 10. 

In [None]:
census_df = pd.read_csv("./data/census-income.data", skipinitialspace=True)
census_df = census_df.dropna()

census_df.loc[census_df.age <= 20, 'agegroup'] = 0
census_df.loc[(census_df.age > 20) & (census_df.age <= 30), 'agegroup'] = 1
census_df.loc[(census_df.age > 30) & (census_df.age <= 40), 'agegroup'] = 2
census_df.loc[(census_df.age > 40) & (census_df.age <= 50), 'agegroup'] = 3
census_df.loc[(census_df.age > 50) & (census_df.age <= 60), 'agegroup'] = 4
census_df.loc[(census_df.age > 60), 'agegroup'] = 5
census_df = census_df.drop('age', axis=1)

census_features = ['agegroup', 'class_of_worker', 'education', 'marital_stat', 
                   'race', 'hispanic_origin', 'sex', 'member_of_a_labor_union', 
                   'reason_for_unemployment', 'full_or_part_time_employment_stat', 
                   'tax_filer_stat', 'region_of_previous_residence', 
                   'num_persons_worked_for_employer',
                   'citizenship', 'own_business_or_self_employed']
census_X = census_df[census_features]
census_Y = census_df['icnome']  

In [None]:
census_values, census_counts = create_table(census_X, census_Y, census_features)
fix_zeros(census_counts, 10) 

census_data = create_data(census_features, census_counts)

### Nursery Dataset

The [Nursery Dataset](https://archive.ics.uci.edu/ml/datasets/nursery) consists of 8 features, 5 classes and 12,960 entries. All of the features are categorical; however, the "health" feature is not considered. Because of very low frequency, entries with the class "recommend" are removed.

The processed dataset consists of 7 features, 4 classes and 12,958 entries.

All of the zeros in the created table are replaced with 10. 

In [None]:
nursery_df = pd.read_csv("nursery.data", skipinitialspace=True)
nursery_df = nursery_df.drop(nursery_df[nursery_df['class'] == 'recommend'].index)
nursery_features = ['parents', 'has_nurs', 'form', 'children', 'housing', 'finance', 'social']
nursery_X = nursery_df[nursery_features]
nursery_Y = nursery_df['class']  

In [None]:
nursery_values, nursery_counts = create_table(nursery_X, nursery_Y, nursery_features)
fix_zeros(nursery_counts, 10)
  
nursery_data = create_data(nursery_features, nursery_counts)

### Contraceptive Dataset

The [Contraceptive Method Choice (CMC) Dataset](https://archive.ics.uci.edu/ml/datasets/Contraceptive+Method+Choice) consists of 9 features, 3 classes and 1,473 entries. All of the features except "age" are categorical. Hence, we discretize the "age" feature into 4 categories. Furthermore, "children" feature has been altered so that entries with more than 6 children will be represented as having 6.

The processed dataset consists of 9 features, 3 classes and 1,473 entries.

All of the zeros in the created table are replaced with 5. 

In [None]:
cmc_df = pd.read_csv("cmc.data", skipinitialspace=True)
cmc_df.loc[cmc_df.age <= 20, 'agegroup'] = 0
cmc_df.loc[(cmc_df.age > 20) & (cmc_df.age <= 30), 'agegroup'] = 1
cmc_df.loc[(cmc_df.age > 30) & (cmc_df.age <= 40), 'agegroup'] = 2
cmc_df.loc[(cmc_df.age > 40), 'agegroup'] = 3
cmc_df = cmc_df.drop('age', axis=1)
cmc_df.loc[cmc_df.children >= 6, 'children'] = 6
cmc_df = cmc_df.dropna()
cmc_features = ['agegroup', 'education', 'husband_education', 'children',
                'religion', 'working', 'husband_occupation', 'living_index', 
                'exposure']
cmc_X = cmc_df[cmc_features]
cmc_Y = cmc_df['contraceptive']  

In [None]:
cmc_values, cmc_counts = create_table(cmc_X, cmc_Y, cmc_features)
fix_zeros(cmc_counts, 5)

cmc_data = create_data(cmc_features, cmc_counts)

### Mushroom Dataset

The [Mushroom Dataset](https://archive.ics.uci.edu/ml/datasets/mushroom) consists of 22 features, 2 classes and 8,124 entries. All of the features are categorical. The feature "veil_type" has been removed, since it only has one possible value.

The processed dataset consists of 21 features, 2 classes and 8,124 entries.

All of the zeros in the created table are replaced with 10. 

In [None]:
mushroom_df = pd.read_csv("agaricus-lepiota.data", skipinitialspace=True)
mushroom_features = list(mushroom_df.columns)
mushroom_features.remove('edible')
mushroom_features.remove('veil_type')
mushroom_X = mushroom_df[mushroom_features]
mushroom_Y = mushroom_df['edible']  

In [None]:
mushroom_values, mushroom_counts = create_table(mushroom_X, mushroom_Y, mushroom_features)
fix_zeros(mushroom_counts, 10)
  
mushroom_data = create_data(mushroom_features, mushroom_counts)

### Obesity Dataset

The [Estimation of Obesity Dataset](https://archive.ics.uci.edu/ml/datasets/Estimation+of+obesity+levels+based+on+eating+habits+and+physical+condition+) consists of 16 features, 7 classes and 2,111 entries. Some of the useful features including "Age", "Height", "Weight", "FCVC", "NCP", and "CH2O" are continuous numerical features limited to a range. Therefore, they can be discretized according to their values.

The Processed dataset consists of 14 features, 7 classes and 2,111 entries.

All of the zeros in the created table are replaced with 1. 

In [None]:
obesity_df = pd.read_csv("obesity.csv", skipinitialspace=True)
obesity_df.loc[obesity_df.Age <= 18, 'Agegroup'] = 0
obesity_df.loc[(obesity_df.Age > 18) & (obesity_df.Age <= 20), 'Agegroup'] = 1
obesity_df.loc[(obesity_df.Age > 20) & (obesity_df.Age <= 25), 'Agegroup'] = 2
obesity_df.loc[(obesity_df.Age > 25) & (obesity_df.Age <= 30), 'Agegroup'] = 3
obesity_df.loc[(obesity_df.Age > 30), 'Agegroup'] = 4
obesity_df = obesity_df.drop('Age', axis=1)
obesity_df.loc[obesity_df.Height <= 1.5, 'Heightgroup'] = 0
obesity_df.loc[(obesity_df.Height > 1.5) & (obesity_df.Height <= 1.6), 'Heightgroup'] = 1
obesity_df.loc[(obesity_df.Height > 1.6) & (obesity_df.Height <= 1.7), 'Heightgroup'] = 2
obesity_df.loc[(obesity_df.Height > 1.7) & (obesity_df.Height <= 1.8), 'Heightgroup'] = 3
obesity_df.loc[(obesity_df.Height > 1.8) & (obesity_df.Height <= 1.9), 'Heightgroup'] = 4
obesity_df.loc[(obesity_df.Height > 1.9), 'Heightgroup'] = 5
obesity_df = obesity_df.drop('Height', axis=1)
obesity_df.loc[obesity_df.Weight <= 50, 'Weightgroup'] = 0
obesity_df.loc[(obesity_df.Weight > 50) & (obesity_df.Weight <= 60), 'Weightgroup'] = 1
obesity_df.loc[(obesity_df.Weight > 60) & (obesity_df.Weight <= 70), 'Weightgroup'] = 2
obesity_df.loc[(obesity_df.Weight > 70) & (obesity_df.Weight <= 80), 'Weightgroup'] = 3
obesity_df.loc[(obesity_df.Weight > 80) & (obesity_df.Weight <= 90), 'Weightgroup'] = 4
obesity_df.loc[(obesity_df.Weight > 90) & (obesity_df.Weight <= 100), 'Weightgroup'] = 5
obesity_df.loc[(obesity_df.Weight > 100) & (obesity_df.Weight <= 110), 'Weightgroup'] = 6
obesity_df.loc[(obesity_df.Weight > 110) & (obesity_df.Weight <= 120), 'Weightgroup'] = 7
obesity_df.loc[(obesity_df.Weight > 120), 'Weightgroup'] = 8
obesity_df = obesity_df.drop('Weight', axis=1)
obesity_df.loc[obesity_df.FCVC <= 1, 'FCVCgroup'] = 0
obesity_df.loc[(obesity_df.FCVC > 1) & (obesity_df.FCVC <= 1.5), 'FCVCgroup'] = 1
obesity_df.loc[(obesity_df.FCVC > 1.5) & (obesity_df.FCVC <= 2), 'FCVCgroup'] = 2
obesity_df.loc[(obesity_df.FCVC > 2) & (obesity_df.FCVC <= 2.5), 'FCVCgroup'] = 3
obesity_df.loc[(obesity_df.FCVC > 2.5), 'FCVCgroup'] = 4
obesity_df = obesity_df.drop('FCVC', axis=1)
obesity_df.loc[obesity_df.NCP <= 1, 'NCPgroup'] = 0
obesity_df.loc[(obesity_df.NCP > 1) & (obesity_df.NCP <= 2), 'NCPgroup'] = 1
obesity_df.loc[(obesity_df.NCP > 2) & (obesity_df.NCP <= 3), 'NCPgroup'] = 2
obesity_df.loc[(obesity_df.NCP > 3), 'NCPgroup'] = 3
obesity_df = obesity_df.drop('NCP', axis=1)
obesity_df.loc[obesity_df.CH2O <= 1.5, 'CH2Ogroup'] = 0
obesity_df.loc[(obesity_df.CH2O > 1.5) & (obesity_df.CH2O <= 2), 'CH2Ogroup'] = 1
obesity_df.loc[(obesity_df.CH2O > 2) & (obesity_df.CH2O <= 2.5), 'CH2Ogroup'] = 2
obesity_df.loc[(obesity_df.CH2O > 2.5), 'CH2Ogroup'] = 3
obesity_df = obesity_df.drop('CH2O', axis=1)
obesity_df = obesity_df.dropna()
obesity_features = ['Gender', 'Agegroup', 'Heightgroup', 'Weightgroup', 
                    'family_history_with_overweight','FAVC', 'FCVCgroup', 
                    'NCPgroup', 'CAEC', 'SMOKE', 'CH2Ogroup', 'SCC', 'CALC', 'MTRANS']
obesity_X = obesity_df[obesity_features]
obesity_Y = obesity_df['NObeyesdad']  

In [None]:
obesity_values, obesity_counts = create_table(obesity_X, obesity_Y, obesity_features)
fix_zeros(obesity_counts, 1)
  
obesity_data = create_data(obesity_features, obesity_counts)

### Biology Dataset

The [Molecular Biology](https://archive.ics.uci.edu/ml/datasets/Molecular+Biology+%28Splice-junction+Gene+Sequences%29) consists of 60 features, 3 classes and 3,190 entries. All of the features are categorical. The ambiguity values (values except the four DNA bases) are removed. 

The Processed dataset consists of 60 features, 3 classes and 3,175 entries.

All of the zeros in the created table are replaced with 5. 

In [None]:
splice_df = pd.read_csv("splice.data", skipinitialspace=True)
splice_features = list(splice_df.columns)
splice_features.remove('class')
for _, feature in enumerate(splice_features):
    splice_df.drop(splice_df[(splice_df[feature] != 'G') & (splice_df[feature] != 'C') & 
               (splice_df[feature] != 'T') & (splice_df[feature] != 'A')].index, inplace=True)

splice_X = splice_df[splice_features]
splice_Y = splice_df['class']  

In [None]:
splice_values, splice_counts = create_table(splice_X, splice_Y, splice_features)
fix_zeros(splice_counts, 5)
  
splice_data = create_data(splice_features, splice_counts)

## Runtime

In this part, we will measure the runtime of each LDP algorithm for given epsilons. 

In [None]:
census_times = epsilon_sweep_time(epsilons, census_data, census_Y, seed)
nursery_times = epsilon_sweep_time(epsilons, nursery_data, nursery_Y, seed)
cmc_times = epsilon_sweep_time(epsilons, cmc_data, cmc_Y, seed)
mushroom_times = epsilon_sweep_time(epsilons, mushroom_data, mushroom_Y, seed)
obesity_times = epsilon_sweep_time(epsilons, obesity_data, obesity_Y, seed)
splice_times = epsilon_sweep_time(epsilons, splice_data, splice_Y, seed)

In [None]:
census_time_means = np.mean(list(census_times.values()), axis=1)
census_time_std = np.std(list(census_times.values()), axis=1)

nursery_time_means = np.mean(list(nursery_times.values()), axis=1)
nursery_time_std = np.std(list(nursery_times.values()), axis=1)

cmc_time_means = np.mean(list(cmc_times.values()), axis=1)
cmc_time_std = np.std(list(cmc_times.values()), axis=1)

mushroom_time_means = np.mean(list(mushroom_times.values()), axis=1)
mushroom_time_std = np.std(list(mushroom_times.values()), axis=1)

obesity_time_means = np.mean(list(obesity_times.values()), axis=1)
obesity_time_std = np.std(list(obesity_times.values()), axis=1)

splice_time_means = np.mean(list(splice_times.values()), axis=1)
splice_time_std = np.std(list(splice_times.values()), axis=1)

In [24]:
print("Census Dataset:")
print(["{:.3f}±{:.3f}".format(mean,std) for mean, std in zip(census_time_means, census_time_std)])

print("Nursery Dataset:")
print(["{:.3f}±{:.3f}".format(mean,std) for mean, std in zip(nursery_time_means, nursery_time_std)])

print("Contraceptive Dataset:")
print(["{:.3f}±{:.3f}".format(mean,std) for mean, std in zip(cmc_time_means, cmc_time_std)])

print("Mushroom Dataset:")
print(["{:.3f}±{:.3f}".format(mean,std) for mean, std in zip(mushroom_time_means, mushroom_time_std)])

print("Obesity Dataset:")
print(["{:.3f}±{:.3f}".format(mean,std) for mean, std in zip(obesity_time_means, obesity_time_std)])

print("Biology Dataset:")
print(["{:.3f}±{:.3f}".format(mean,std) for mean, std in zip(splice_time_means, splice_time_std)])

array([ 0.36117169,  0.06001478,  0.05324954,  0.1824318 ,  0.71201507,
       14.94801237])

## Information Gain

In [None]:
census_estimates = sweep(epsilons, census_data, census_Y, seed)
nursery_estimates = sweep(epsilons, nursery_data, nursery_Y, seed)
cmc_estimates = sweep(epsilons, cmc_data, cmc_Y, seed)
mushroom_estimates = sweep(epsilons, mushroom_data, mushroom_Y, seed)
obesity_estimates = sweep(epsilons, obesity_data, obesity_Y, seed)
splice_estimates = sweep(epsilons, splice_data, splice_Y, seed)

In [None]:
census_IG, census_RMSE, census_kendall = calc_IG_metrics(epsilons, census_counts, census_estimates)
nursery_IG, nursery_RMSE, nursery_kendall = calc_IG_metrics(epsilons, nursery_counts, nursery_estimates)
cmc_IG, cmc_RMSE, cmc_kendall = calc_IG_metrics(epsilons, cmc_counts, cmc_estimates)
mushroom_IG, mushroom_RMSE, mushroom_kendall = calc_IG_metrics(epsilons, mushroom_counts, mushroom_estimates)
obesity_IG, obesity_RMSE, obesity_kendall = calc_IG_metrics(epsilons, obesity_counts, obesity_estimates)
splice_IG, splice_RMSE, splice_kendall = calc_IG_metrics(epsilons, splice_counts, splice_estimates)

### RMSE Plots

In [None]:
mpl.rcParams.update(mpl.rcParamsDefault)
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(census_RMSE['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(census_RMSE['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(census_RMSE['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(census_RMSE['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(census_RMSE['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(census_RMSE['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)

plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("RMSE", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.ylim(top=0.35)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/census_RMSE.pdf", bbox_inches='tight')

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(nursery_RMSE['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(nursery_RMSE['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(nursery_RMSE['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(nursery_RMSE['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(nursery_RMSE['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(nursery_RMSE['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)

plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("RMSE", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.ylim(top=0.35)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/nursery_RMSE.pdf", bbox_inches='tight')

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(cmc_RMSE['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(cmc_RMSE['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(cmc_RMSE['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(cmc_RMSE['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(cmc_RMSE['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(cmc_RMSE['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)

plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("RMSE", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.ylim(top=0.35)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/cmc_RMSE.pdf", bbox_inches='tight')

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(mushroom_RMSE['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(mushroom_RMSE['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(mushroom_RMSE['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(mushroom_RMSE['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(mushroom_RMSE['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(mushroom_RMSE['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)

plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("RMSE", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.ylim(top=0.35)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/mushroom_RMSE.pdf", bbox_inches='tight')

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(obesity_RMSE['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(obesity_RMSE['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(obesity_RMSE['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(obesity_RMSE['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(obesity_RMSE['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(obesity_RMSE['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)

plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("RMSE", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.ylim(top=0.35)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/obesity_RMSE.pdf", bbox_inches='tight')

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(splice_RMSE['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(splice_RMSE['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(splice_RMSE['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(splice_RMSE['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(splice_RMSE['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(splice_RMSE['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)

plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("RMSE", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.ylim(top=0.35)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/splice_RMSE.pdf", bbox_inches='tight')

### Kendall Plots

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(census_kendall['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(census_kendall['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(census_kendall['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(census_kendall['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(census_kendall['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(census_kendall['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)

plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("Kendall's Tau Distance", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.ylim(top=0.6)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/census_kendall.pdf", bbox_inches='tight')

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(nursery_kendall['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(nursery_kendall['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(nursery_kendall['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(nursery_kendall['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(nursery_kendall['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(nursery_kendall['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)

plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("Kendall's Tau Distance", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.ylim(top=0.6)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/nursery_kendall.pdf", bbox_inches='tight')

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(cmc_kendall['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(cmc_kendall['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(cmc_kendall['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(cmc_kendall['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(cmc_kendall['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(cmc_kendall['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)

plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("Kendall's Tau Distance", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.ylim(top=0.6)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/cmc_kendall.pdf", bbox_inches='tight')

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(mushroom_kendall['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(mushroom_kendall['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(mushroom_kendall['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(mushroom_kendall['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(mushroom_kendall['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(mushroom_kendall['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)

plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("Kendall's Tau Distance", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.ylim(top=0.6)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/mushroom_kendall.pdf", bbox_inches='tight')

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(obesity_kendall['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(obesity_kendall['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(obesity_kendall['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(obesity_kendall['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(obesity_kendall['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(obesity_kendall['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)

plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("Kendall's Tau Distance", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.ylim(top=0.6)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/obesity_kendall.pdf", bbox_inches='tight')

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(splice_kendall['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(splice_kendall['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(splice_kendall['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(splice_kendall['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(splice_kendall['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(splice_kendall['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)

plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("Kendall's Tau Distance", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.ylim(top=0.6)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/splice_kendall.pdf", bbox_inches='tight')

## Barplots

In [None]:
OLH_RMSE_arrays = [cmc_RMSE['OLH'], nursery_RMSE['OLH'], obesity_RMSE['OLH'],
              mushroom_RMSE['OLH'], splice_RMSE['OLH'], census_RMSE['OLH']]
OLH_RMSE_means = [np.mean(k) for k in zip(*OLH_RMSE_arrays)]

OUE_RMSE_arrays = [cmc_RMSE['OUE'], nursery_RMSE['OUE'], obesity_RMSE['OUE'],
              mushroom_RMSE['OUE'], splice_RMSE['OUE'], census_RMSE['OUE']]
OUE_RMSE_means = [np.mean(k) for k in zip(*OUE_RMSE_arrays)]

THE_RMSE_arrays = [cmc_RMSE['THE'], nursery_RMSE['THE'], obesity_RMSE['THE'],
              mushroom_RMSE['THE'], splice_RMSE['THE'], census_RMSE['THE']]
THE_RMSE_means = [np.mean(k) for k in zip(*THE_RMSE_arrays)]

HR_RMSE_arrays = [cmc_RMSE['HR'], nursery_RMSE['HR'], obesity_RMSE['HR'],
              mushroom_RMSE['HR'], splice_RMSE['HR'], census_RMSE['HR']]
HR_RMSE_means = [np.mean(k) for k in zip(*HR_RMSE_arrays)]

CMS_RMSE_arrays = [cmc_RMSE['CMS'], nursery_RMSE['CMS'], obesity_RMSE['CMS'],
              mushroom_RMSE['CMS'], splice_RMSE['CMS'], census_RMSE['CMS']]
CMS_RMSE_means = [np.mean(k) for k in zip(*CMS_RMSE_arrays)]

RAPPOR_RMSE_arrays = [cmc_RMSE['RAPPOR'], nursery_RMSE['RAPPOR'], obesity_RMSE['RAPPOR'],
              mushroom_RMSE['RAPPOR'], splice_RMSE['RAPPOR'], census_RMSE['RAPPOR']]
RAPPOR_RMSE_means = [np.mean(k) for k in zip(*RAPPOR_RMSE_arrays)]

In [None]:
fig  = plt.figure(figsize=(25,12))
index = np.arange(10)
bar_width = 0.13
opacity = 0.6

rects1 = ax.bar(index, OLH_RMSE_means, bar_width, alpha=opacity, color='b', label='OLH', hatch = '//')
rects2 = ax.bar(index+bar_width, OUE_RMSE_means, bar_width, alpha=opacity, color='skyblue', label='OUE', hatch = '\\\\')
rects3 = ax.bar(index+2*bar_width, THE_RMSE_means, bar_width, alpha=opacity,  color='darkmagenta', label='THE', hatch = '||')
rects4 = ax.bar(index+3*bar_width, HR_RMSE_means, bar_width, alpha=opacity, color='c', label='HR', hatch = '*')
rects5 = ax.bar(index+4*bar_width, CMS_RMSE_means, bar_width, alpha=opacity, color='mediumpurple', label='CMS', hatch = 'O')
rects6 = ax.bar(index+5*bar_width, RAPPOR_RMSE_means, bar_width, alpha=opacity, color='midnightblue', label='RAPPOR', hatch = '.')

ax.set_xlabel(r"$\epsilon$", size=30)
ax.set_ylabel('RMSE', size=28)
ax.set_xticks(index + 2.5*bar_width)
ax.set_xticklabels(epsilons)
plt.yticks(size=24)
plt.xticks(size=24)
ax.legend(ncol=3, prop={'size':35})

plt.savefig("./results/RMSE_barplot.pdf", bbox_inches='tight')

In [None]:
fig  = plt.figure(figsize=(25,12))
index = np.arange(10)
bar_width = 0.13
opacity = 0.6

rects1 = ax.bar(index, OLH_kendall_means, bar_width, alpha=opacity, color='b', label='OLH', hatch = '//')
rects2 = ax.bar(index+bar_width, OUE_kendall_means, bar_width, alpha=opacity, color='skyblue', label='OUE', hatch = '\\\\')
rects3 = ax.bar(index+2*bar_width, THE_kendall_means, bar_width, alpha=opacity,  color='darkmagenta', label='THE', hatch = '||')
rects4 = ax.bar(index+3*bar_width, HR_kendall_means, bar_width, alpha=opacity, color='c', label='HR', hatch = '*')
rects5 = ax.bar(index+4*bar_width, CMS_kendall_means, bar_width, alpha=opacity, color='mediumpurple', label='CMS', hatch = 'O')
rects6 = ax.bar(index+5*bar_width, RAPPOR_kendall_means, bar_width, alpha=opacity, color='midnightblue', label='RAPPOR', hatch = '.')

ax.set_xlabel(r"$\epsilon$", size=30)
plt.ylabel("Kendall's Tau Distance", size=28)
ax.set_xticks(index + 2.5*bar_width)
ax.set_xticklabels(epsilons)
plt.yticks(size=24)
plt.xticks(size=24)
ax.legend(ncol=3, prop={'size':35})

plt.savefig("./results/kendall_barplot.pdf", bbox_inches='tight')

### Boxplot

In order to make the boxplot more readable, two of the outliers have been removed.

In [None]:
IG_cmc = calc_information_gain(cmc_counts)
IG_nursery = calc_information_gain(nursery_counts)
IG_obesity = calc_information_gain(obesity_counts)
IG_splice = calc_information_gain(splice_counts)
IG_mushroom = calc_information_gain(mushroom_counts)
IG_census = calc_information_gain(census_counts)

IGs = [IG_census, IG_nursery, IG_cmc, np.delete(IG_mushroom, 4), np.delete(IG_obesity, 3), IG_splice]

In [None]:
fig  = plt.figure(figsize=(18,12))

plt.boxplot(IGs, labels=['Census', 'Nursery', 'Contraceptive', 'Mushroom', 'Obesity', 'Biology'], 
            boxprops={'linewidth':2}, whiskerprops={'linewidth':2}, capprops={'linewidth':2},
            flierprops={'markersize':10, 'markeredgewidth':2}, medianprops={'linewidth':2, 'linestyle':'--', 'color': 'black'})

plt.xlabel("Datasets", fontsize=28)
plt.ylabel("Information Gain", fontsize=14)
plt.xticks(size=24)
plt.yticks(size=24)

plt.savefig("./results/boxplot.pdf", bbox_inches='tight')

## Chi-squared

In [None]:
_, _, census_chi2_rmse, census_chi2_kendall = calc_chi2_metrics(epsilons, census_counts, census_estimates)
_, _, nursery_chi2_rmse, nursery_chi2_kendall = calc_chi2_metrics(epsilons, nursery_counts, nursery_estimates)
_, _, cmc_chi2_rmse, cmc_chi2_kendall = calc_chi2_metrics(epsilons, cmc_counts, cmc_estimates)
_, _, mushroom_chi2_rmse, mushroom_chi2_kendall = calc_chi2_metrics(epsilons, mushroom_counts, mushroom_estimates)
_, _, obesity_chi2_rmse, obesity_chi2_kendall = calc_chi2_metrics(epsilons, obesity_counts, obesity_estimates)
_, _, splice_chi2_rmse, splice_chi2_kendall = calc_chi2_metrics(epsilons, splice_counts, splice_estimates)


In [None]:
census_chi2_rmse_std = standardize(census_chi2_rmse)
nursery_chi2_rmse_std = standardize(nursery_chi2_rmse)
cmc_chi2_rmse_std = standardize(cmc_chi2_rmse)
mushroom_chi2_rmse_std = standardize(mushroom_chi2_rmse)
obesity_chi2_rmse_std = standardize(obesity_chi2_rmse)
splice_chi2_rmse_std = standardize(splice_chi2_rmse)

### (Standardized) RMSE Plots

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(census_chi2_rmse_std['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(census_chi2_rmse_std['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(census_chi2_rmse_std['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(census_chi2_rmse_std['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(census_chi2_rmse_std['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(census_chi2_rmse_std['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)

plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("Standardized RMSE", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/census_chi2_rmse.pdf", bbox_inches='tight')

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(nursery_chi2_rmse_std['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(nursery_chi2_rmse_std['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(nursery_chi2_rmse_std['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(nursery_chi2_rmse_std['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(nursery_chi2_rmse_std['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(nursery_chi2_rmse_std['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)

plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("Standardized RMSE", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/nursery_chi2_rmse.pdf", bbox_inches='tight')

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(cmc_chi2_rmse_std['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(cmc_chi2_rmse_std['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(cmc_chi2_rmse_std['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(cmc_chi2_rmse_std['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(cmc_chi2_rmse_std['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(cmc_chi2_rmse_std['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)

plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("Standardized RMSE", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/cmc_chi2_rmse.pdf", bbox_inches='tight')

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(mushroom_chi2_rmse_std['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(mushroom_chi2_rmse_std['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(mushroom_chi2_rmse_std['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(mushroom_chi2_rmse_std['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(mushroom_chi2_rmse_std['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(mushroom_chi2_rmse_std['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)

plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("Standardized RMSE", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/mushroom_chi2_rmse.pdf", bbox_inches='tight')

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(obesity_chi2_rmse_std['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(obesity_chi2_rmse_std['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(obesity_chi2_rmse_std['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(obesity_chi2_rmse_std['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(obesity_chi2_rmse_std['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(obesity_chi2_rmse_std['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)

plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("Standardized RMSE", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/obesity_chi2_rmse.pdf", bbox_inches='tight')

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(splice_chi2_rmse_std['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(splice_chi2_rmse_std['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(splice_chi2_rmse_std['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(splice_chi2_rmse_std['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(splice_chi2_rmse_std['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(splice_chi2_rmse_std['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)

plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("Standardized RMSE", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/splice_chi2_rmse.pdf", bbox_inches='tight')

### Kendall Plots

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(census_chi2_kendall['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(census_chi2_kendall['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(census_chi2_kendall['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(census_chi2_kendall['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(census_chi2_kendall['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(census_chi2_kendall['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)
plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("Kendall's Tau Distance", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.ylim(top=0.6)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/census_chi2_kendall.pdf", bbox_inches='tight')

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(nursery_chi2_kendall['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(nursery_chi2_kendall['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(nursery_chi2_kendall['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(nursery_chi2_kendall['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(nursery_chi2_kendall['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(nursery_chi2_kendall['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)
plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("Kendall's Tau Distance", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.ylim(top=0.6)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/nursery_chi2_kendall.pdf", bbox_inches='tight')

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(cmc_chi2_kendall['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(cmc_chi2_kendall['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(cmc_chi2_kendall['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(cmc_chi2_kendall['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(cmc_chi2_kendall['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(cmc_chi2_kendall['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)
plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("Kendall's Tau Distance", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.ylim(top=0.6)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/cmc_chi2_kendall.pdf", bbox_inches='tight')

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(mushroom_chi2_kendall['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(mushroom_chi2_kendall['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(mushroom_chi2_kendall['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(mushroom_chi2_kendall['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(mushroom_chi2_kendall['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(mushroom_chi2_kendall['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)
plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("Kendall's Tau Distance", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.ylim(top=0.6)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/mushroom_chi2_kendall.pdf", bbox_inches='tight')

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(obesity_chi2_kendall['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(obesity_chi2_kendall['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(obesity_chi2_kendall['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(obesity_chi2_kendall['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(obesity_chi2_kendall['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(obesity_chi2_kendall['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)
plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("Kendall's Tau Distance", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.ylim(top=0.6)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/obesity_chi2_kendall.pdf", bbox_inches='tight')

In [None]:
fig  = plt.figure(figsize=(10,8))

plt.plot(epsilons, smooth(splice_chi2_kendall['OLH'], 5), label="OLH", fillstyle='none', marker='D', color='b', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(splice_chi2_kendall['OUE'], 5), label="OUE", fillstyle='none', marker='s', color='r', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(splice_chi2_kendall['THE'], 5), label="THE", fillstyle='none', marker='v', color='m', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(splice_chi2_kendall['HR'], 5), label="HR", fillstyle='none', marker='^', color='k', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(splice_chi2_kendall['CMS'], 5), label="CMS", fillstyle='none', marker='x', color='saddlebrown', markersize=16, linewidth=2.2)
plt.plot(epsilons, smooth(splice_chi2_kendall['RAPPOR'], 5), label="RAPPOR", fillstyle='none', marker='o', color='g', markersize=16, linewidth=2.2)
plt.xlabel(r'$\epsilon$', fontsize=28)
plt.ylabel("Kendall's Tau Distance", fontsize=24)
plt.xticks(epsilon_ticks, fontsize=24)
plt.xticks(rotation=90) 
plt.yticks(fontsize=24)
plt.ylim(top=0.6)
plt.legend(ncol=2, prop={'size': 22})

plt.savefig("./results/figures/splice_chi2_kendall.pdf", bbox_inches='tight')

In [None]:
OLH_kendall_arrays = [cmc_kendall['OLH'], nursery_kendall['OLH'], obesity_kendall['OLH'],
              mushroom_kendall['OLH'], splice_kendall['OLH'], census_kendall['OLH']]
OLH_kendall_means = [np.mean(k) for k in zip(*OLH_kendall_arrays)]

OUE_kendall_arrays = [cmc_kendall['OUE'], nursery_kendall['OUE'], obesity_kendall['OUE'],
              mushroom_kendall['OUE'], splice_kendall['OUE'], census_kendall['OUE']]
OUE_kendall_means = [np.mean(k) for k in zip(*OUE_kendall_arrays)]

THE_kendall_arrays = [cmc_kendall['THE'], nursery_kendall['THE'], obesity_kendall['THE'],
              mushroom_kendall['THE'], splice_kendall['THE'], census_kendall['THE']]
THE_kendall_means = [np.mean(k) for k in zip(*THE_kendall_arrays)]

HR_kendall_arrays = [cmc_kendall['HR'], nursery_kendall['HR'], obesity_kendall['HR'],
              mushroom_kendall['HR'], splice_kendall['HR'], census_kendall['HR']]
HR_kendall_means = [np.mean(k) for k in zip(*HR_kendall_arrays)]

CMS_kendall_arrays = [cmc_kendall['CMS'], nursery_kendall['CMS'], obesity_kendall['CMS'],
              mushroom_kendall['CMS'], splice_kendall['CMS'], census_kendall['CMS']]
CMS_kendall_means = [np.mean(k) for k in zip(*CMS_kendall_arrays)]

RAPPOR_kendall_arrays = [cmc_kendall['RAPPOR'], nursery_kendall['RAPPOR'], obesity_kendall['RAPPOR'],
              mushroom_kendall['RAPPOR'], splice_kendall['RAPPOR'], census_kendall['RAPPOR']]
RAPPOR_kendall_means = [np.mean(k) for k in zip(*RAPPOR_kendall_arrays)]
