In [302]:
import numpy as np
from plotnine import *
import pandas as pd
import patchworklib as pw

_args = [
    "../output/analytics/base_analytics/departure-diffusion_exp/base_analytics_2019_01_01.csv",
    "../output/analytics/k_anonymous/departure-diffusion_exp/k_anonymous_analytics_2019_01_01.csv",
    "../output/analytics/gdp/departure-diffusion_exp/gdp_analytics_2019_01_01.csv",
    "../output/analytics/naive_ldp/departure-diffusion_exp/naive_ldp_analytics_2019_01_01.csv",
    "../output/analytics/cms/departure-diffusion_exp/cms_analytics_2019_01_01.csv"
    ""
]

In [303]:
base = pd.read_csv(_args[0], dtype={'geoid_o': str, 'geoid_d': str})
k_anonymous = pd.read_csv(_args[1], dtype={'geoid_o': str, 'geoid_d': str}).rename(columns={'count': 'count_k_anonymous'})
gdp = pd.read_csv(_args[2], dtype={'geoid_o': str, 'geoid_d': str}).rename(columns={'count': 'count_gdp'})
ldp = pd.read_csv(_args[3], dtype={'geoid_o': str, 'geoid_d': str}).rename(columns={'count': 'count_ldp'})
cms = pd.read_csv(_args[4], dtype={'geoid_o': str, 'geoid_d': str}).rename(columns={'count': 'count_cms'})


In [304]:
base.sort_values(by='count', inplace=True, ascending=False)
base['id'] = range(1, base.shape[0] + 1)

In [305]:
base = pd.merge(base, k_anonymous, on=['geoid_o', 'geoid_d'], how='left',  validate='one_to_one')
base = pd.merge(base, gdp, on=['geoid_o', 'geoid_d'], how='left',  validate='one_to_one')
base = pd.merge(base, ldp, on=['geoid_o', 'geoid_d'], how='left',  validate='one_to_one')
base = pd.merge(base, cms, on=['geoid_o', 'geoid_d'], how='left',  validate='one_to_one')

In [306]:
base_long = pd.melt(base, id_vars=['id', 'geoid_o', 'geoid_d', 'count'], value_vars=['count_k_anonymous', 'count_gdp', 'count_ldp', 'count_cms'], var_name='mechanism')

In [307]:
start_exp=0
max_value = base_long['value'].max(skipna=True)
end_exp = np.log10(max_value) 

pa = (ggplot(base_long.loc[base_long['mechanism'] == 'count_k_anonymous']) + 
 geom_point(aes(x='id', y='count'), color='blue', size=0.1) + 
 geom_point(aes(x='id', y='value'), color='red', size=0.1) + 
 geom_hline(yintercept=10, color='black', linetype='dashed') +
 scale_y_continuous(trans='pseudo_log', limits=[base_long['value'].min(skipna=True), base_long['value'].max(skipna=True)],
                    breaks = [10**x for x in range(0, int(end_exp)+2)]) + 
 scale_x_continuous(trans='pseudo_log') + 
 labs(x = "Origin-destination pair",
      y = "Number of trips") + 
    theme_classic() + 
    theme(axis_text_x=element_blank(),
          axis_ticks_major_x=element_blank())
)

In [308]:
def plot_privacy(data, mechanism):

    return (ggplot(data.loc[base_long['mechanism'] == mechanism]) + 
        geom_point(aes(x='id', y='value'), color='red', size=0.1) + 
        geom_point(aes(x='id', y='count'), color='blue', size=0.1) + 
        scale_y_continuous(trans='pseudo_log', limits=[base_long['value'].min(skipna=True), base_long['value'].max(skipna=True)],
                           breaks = [10**x for x in range(0, int(end_exp)+2)]) + 
        scale_x_continuous(trans='pseudo_log') + 
        labs(x = "Origin-destination pair",
            y = "Number of trips") + 
            theme_classic() + 
            theme(axis_text_x=element_blank(),
                axis_ticks_major_x=element_blank())
        )

In [309]:
pb = plot_privacy(base_long, 'count_gdp')
pc = plot_privacy(base_long, 'count_ldp')
pd = plot_privacy(base_long, 'count_cms')

In [310]:
width = 7
height = 4
pa.save('../output/figs/k_anonymity_construction.png', width=width, height=height, dpi=300)
pb.save('../output/figs/gdp_construction.png', width=width, height=height, dpi=300)
pc.save('../output/figs/naive_ldp_construction.png', width=width, height=height, dpi=300)
pd.save('../output/figs/cms_construction.png', width=width, height=height, dpi=300)
