In [1]:
import pandas as pd
import os
import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
import statsmodels.api as sm
from pystout import pystout
import warnings
import numpy as np
import scipy.stats
import itertools

warnings.filterwarnings('ignore', message='The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.')
plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams["font.size"] = 16
project_path = os.path.join(os.path.expanduser('~'), 'Dropbox (MIT)/Privacy_Paradox/2023-recovery')
print(project_path)
assert os.path.exists(project_path)
out_dir = os.path.join(project_path, 'out')

dir_summary = os.path.join(out_dir, 'summary')
dir_main = os.path.join(out_dir, 'main')

for d in [out_dir, dir_summary, dir_main]:
    if not os.path.exists(d):
        os.makedirs(d)

primary_colors = {
    0: 'cornflowerblue',
    1: 'indianred',
    2: 'darkseagreen',
    3: 'mediumpurple',
}

sns.set_palette(sns.color_palette(list(primary_colors.values())))
top_rule = '2.3pt'

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [None]:
df = pd.read_table(os.path.join(project_path, 'data_public.csv'), sep=',', index_col=None).copy()
df['max_priv_intermediary'] = df.walletx2.isin(['Blockchain', 'Electrum']).astype(int)
df['max_priv_gov'] = df.walletx2.isin(['Electrum']).astype(int)
df['max_priv_public'] = df.walletx2.isin(['Coinbase', 'Circle']).astype(int)

In [None]:
df['treatment_encryption'] = df.rpgpextra
df['cash_out'] = df.sb_cashing_out_btc_05

In [None]:
def clean_reg_table(file_name, split_rows, multicol_width='3cm', verbose=False):
    with open(file_name, 'r') as f:
        tex = f.read()
    tex = tex.replace('\\hline\\hline\n\\end{tabular}', f'\\bottomrule[{top_rule}]\n' + '\\end{tabular}')
    tex = tex.replace('\\hline\\hline', f'\\toprule[{top_rule}]')
    tex = tex.replace('\multicolumn{2}{c}', '\\multicolumn{2}{m{' + multicol_width + '}}').replace('\multicolumn{3}{c}', '\\multicolumn{3}{m{' + multicol_width + '}}')
    tex = tex.replace('\\hline\n\\hline', '\\midrule\n')
    
    tex = tex.split('\n')
    to_add = None
    var_section = False
    new_tex = []
    for el in tex:
        if isinstance(to_add, str):
            el = to_add + ' ' + el
            to_add = None
        
        if split_rows:
            if '$\\times$' in el:
                to_add = el.split('$\\times$')[1].split('&')[0]
                el = el.replace(to_add, '')
        
        if '\\hline' in el:
            var_section = not var_section
        if split_rows:
            if var_section and '(' in el:
                el = el + '\\\\'
        new_tex.append(el)
    
    tex = '\n'.join(new_tex)

    if verbose:
        print(tex)
    
    with open(file_name, 'w') as f:
        f.write(tex)

# Summary Tables

In [None]:
tab = pd.DataFrame(columns=['Variable', 'Mean', 'Std. Dev.', 'Min.', 'Max.', 'N'])
col_map = {
    'Encryption Randomization': 'treatment_encryption',
    'Cash Out': 'cash_out',
    'Bank-Like Wallet': 'banklike',
    'Year': 'ir_studyr',
    'Male': 'male',
    'Top Coder': 'topcoder',
    'Expected Price Decay': 'expectpricedown',
    'Open Source Browser': 'open_source_browser',
    'High Privacy from Peers': 'am_privacy_peers',
    'High Privacy from Intermediary': 'am_privacy_int',  # am == above medium
    'High Privacy from Government': 'am_privacy_gov',
    'High Trust in Government': 'trust_gov',
    'High Trust in Startup': 'trust_startup',
    'High Trust in Retailer': 'trust_retailer',
    'Selected Wallet Max. Priv. from Public': 'max_priv_public',
    'Selected Wallet Max. Priv. from Intermediary': 'max_priv_intermediary',
    'Selected Wallet Max. Priv. from Government': 'max_priv_gov',
    'Wallet High Priv. Public Not Listed 1st': 'best_priv_public_n1st',
    'Wallet High Priv. Intermediary Not Listed 1st': 'best_priv_int_n1st',
    'Wallet High Priv. Government Not Listed 1st': 'best_priv_gov_n1st',
    'Escaping Surveillance from Public': 'es_pub',
    'Escaping Surveillance from Intermediary': 'es_int',
    'Escaping Surveillance from Government': 'es_gov'
}
for clean_name in tqdm.tqdm(col_map):
    tmp = df
    col = col_map[clean_name]
    if isinstance(col_map[clean_name], tuple):
        ixs = col_map[clean_name][1].astype(bool)
        tmp = df.loc[ixs, :]
        col = col_map[clean_name][0]
    tmp = tmp.loc[~pd.isna(tmp[col])]

    rid = len(tab)
    tab.loc[rid, 'Variable'] = clean_name
    tab.loc[rid, 'Mean'] = tmp[col].mean()
    tab.loc[rid, 'Std. Dev.'] = tmp[col].std()
    tab.loc[rid, 'Min.'] = tmp[col].min()
    tab.loc[rid, 'Max.'] = tmp[col].max()
    tab.loc[rid, 'N'] = len(tmp[col])

tab.to_csv(os.path.join(dir_summary, 'summary_table.csv'))

decimal_format = '{:.3f}'
int_format = '{:.0f}'
formats = {
    'Mean': decimal_format,
    'Std. Dev.': decimal_format,
    'Min.': int_format,
    'Max.': int_format,
    'N': int_format
}

tex = tab.style.hide(axis='index').format(formatter=formats).to_latex(hrules=True, column_format='l' + 'c'*(len(tab.columns) - 1), multicol_align='c')
new_line_locs = ['Incentive Randomization', 'Cash Out', 'Year', 'High Privacy from Peers', 'High Trust in Government', 'Selected Wallet Max. Priv. from Public', 'Wallet High Priv. Public Not Listed 1st', 'Escaping Surveillance from Public']
for el in new_line_locs:
    tex = tex.replace(el, '\\vspace{2\\baselineskip}\\\\ \n' + el)
with open(os.path.join(dir_summary, 'summary_table.tex'), 'w') as f:
    f.write(tex)
tab

In [None]:
# randomization and balance table
treatment_cols = {
    'Small cost': ['best_priv_{0}_n1st'.format(k) for k in ['public', 'gov']],
    'Small talk': ['rpgpextra']
}
covariates = [el for el in col_map if 'invalid' not in el.lower() and 'randomization' not in el.lower() and 'not listed' not in el.lower() and 'wallet' not in el.lower() and 'cash out' not in el.lower()]
balance_df = pd.DataFrame()
for group in treatment_cols:
    rhs = '+'.join(treatment_cols[group])
    test_str = ','.join([el + '=0' for el in treatment_cols[group]])
    for cov_name in covariates:
        balance_df.loc[cov_name, 'Covariate'] = cov_name
        cov = col_map[cov_name]
        mod = smf.ols('{0} ~ 1 + {1}'.format(cov, rhs), data=df).fit(cov_type='hc3')
        test = mod.wald_test(test_str, scalar=True).pvalue
        balance_df.loc[cov_name, group] = test

balance_df.to_csv(os.path.join(dir_summary, 'balance_table.csv'))

decimal_format = '{:.3f}'
formats = dict((k, decimal_format) for k in balance_df.columns if 'covariate' not in k.lower())

tex = balance_df.style.hide(axis='index').format(formatter=formats).to_latex(hrules=True, column_format='l' + 'c'*(len(balance_df.columns) - 1), multicol_align='c')
new_line_locs = ['Incentive Randomization', 'Cash Out', 'Year', 'High Privacy from Peers', 'High Trust in Government', 'Selected Wallet Max. Priv. from Public', 'Wallet High Priv. Public Not Listed 1st', 'Escaping Surveillance from Public']
for el in new_line_locs:
    tex = tex.replace(el, '\\vspace{2\\baselineskip}\\\\ \n' + el)
with open(os.path.join(dir_summary, 'balance_table.tex'), 'w') as f:
    f.write(tex)
balance_df

In [None]:
# Randomization test
wallets = {'circle': 'Circle', 'coinbase': 'Coinbase', 'blockchain': 'blockchain.info', 'electrum': 'Electrum'}
cols = ['r{0}1'.format(el) for el in wallets]
print(df[cols].mean())
print(scipy.stats.chisquare(f_obs=df[cols].mean(), f_exp=[0.25]*4))

# Effect of small costs

In [None]:
stated_privacy_col_map = {
    'peers': 'fw_notracePEERS',
    'int': 'fw_notraceISP',
    'gov': 'fw_notraceGOV'
}

In [None]:
mods = []
endog_names = []
groups = ['public', 'intermediary', 'gov']
varlabels = {}
mgroups = {}
print(df.groupby('walletx2')[['max_priv_intermediary', 'max_priv_gov', 'max_priv_public']].mean())

notes = {'Average of DV': []}
for g in groups:
    df['best_n1st'] = df['best_priv_{0}_n1st'.format(g.replace('intermediary', 'int'))]
    df['am_stated'] = df['am_privacy_{0}'.format(g.replace('public', 'peers').replace('intermediary', 'int'))]
    df['stated_cont'] = df[stated_privacy_col_map[g.replace('public', 'peers').replace('intermediary', 'int')]].copy()
    
    colname = '\\centering Maximized Privacy from the {0}'.format(g.replace('gov', 'government').title())
    formula = 'max_priv_{0} ~ 1 + best_n1st'.format(g)
    mod = smf.ols(formula, data=df).fit(cov_type='hc3')
    mods.append(mod)
    notes['Average of DV'].append('{0:.3f}'.format(df[f'max_priv_{g}'].mean()))

    df['stated'] = df.am_stated
    formula = 'max_priv_{0} ~ 1 + best_n1st*stated'.format(g)
    mod = smf.ols(formula, data=df).fit(cov_type='hc3')
    mods.append(mod)
    notes['Average of DV'].append('{0:.3f}'.format(df[f'max_priv_{g}'].mean()))

    formula = 'max_priv_{0} ~ 1 + best_n1st*rmatrix'.format(g)
    mod = smf.ols(formula, data=df).fit(cov_type='hc3')
    mods.append(mod)
    notes['Average of DV'].append('{0:.3f}'.format(df[f'max_priv_{g}'].mean()))
    
    mgroups[colname] = [len(mods) - 2, len(mods)]
    
    varlabels['best_n1st'] = 'Privacy Max. Wallet Not 1st'
    varlabels['am_stated'] = 'Stated Preference for Privacy'
    varlabels['stated'] = 'High Stated Preference for Privacy'
    varlabels['best_n1st:am_stated'] = 'Privacy Max. Wallet Not 1st $\\times$ Stated Preference for Priv.'
    varlabels['best_n1st:stated'] = 'Privacy Max. Wallet Not 1st $\\times$ High Stated Preference for Priv.'
    varlabels['rmatrix'] = 'Increased Transparency'
    varlabels['best_n1st:rmatrix'] = 'Privacy Max. Wallet Not 1st $\\times$ Increased Transparency'

notes = dict(('\\hline\n' + k, notes[k]) for k in notes)

In [None]:
fn = os.path.join(dir_main, 'costs_te_table.tex')
pystout(
    models=mods,
    file=fn,
    digits=3,
    varlabels=varlabels,
    mgroups=mgroups,
    addrows=notes,
    modstat={'nobs': 'Observations', 'rsquared': 'R-Squared'},
    stars={0.1: '*', 0.05: '**', 0.01: '***'},
)
clean_reg_table(fn, split_rows=True, multicol_width='4cm', verbose=True)

In [None]:
# balance tests
wallet_cols = list(wallets.keys())
df['r_wallet_first'] = df[['r{0}1'.format(el) for el in wallet_cols]].apply(lambda row: wallet_cols[row.argmax()], axis=1)

cols = [el for el in col_map if 'all invalid' not in el.lower() and 'random' not in el.lower() and 'bank' not in el.lower() and 'select' not in el.lower() and 'wallet' not in el.lower()]
tab = pd.DataFrame()
for c in tqdm.tqdm(cols):
    cc = col_map[c]
    mod = smf.ols('{0} ~ 0 + C(r_wallet_first)'.format(cc), data=df).fit(cov_type='hc3')
    for p in mod.params.index:
        wallet = wallets[p.split('[')[1].replace(']', '')]
        tab.loc[c, wallet] = mod.params[p]
    test_form = []
    first_coef = mod.params.index[0]
    for coef in mod.params.index[1:]:
        test_form.append('{0}={1}'.format(first_coef, coef))
    test_form = ', '.join(test_form)
    test = mod.wald_test(test_form, scalar=False)
    tab.loc[c, '$p$-value'] = test.pvalue

fn = os.path.join(dir_main, 'costs_balance.tex')
tex = tab.to_latex(
    float_format="{:.2f}".format,
)
with open(fn, 'w') as f:
    f.write(tex)
tab

### Choice model

In [None]:

wallet_keys = [el for el in wallets]
long_df = {}

assert len(df.uid.unique()) == len(df)
for _, row in tqdm.tqdm(df.iterrows()):
    for r in range(1, 5):
        cols = ['r{0}{1}'.format(el, r) for el in wallet_keys]
        assert row[cols].sum() == 1
        wallet = wallet_keys[row[cols].argmax()]
        stated_preference = 0
        if wallet in ['coinbase', 'circle']:
            stated_preference = row.am_privacy_peers
            stated_preference_cont = row[stated_privacy_col_map['peers']]
        elif wallet == 'blockchain':
            stated_preference = row.am_privacy_int
            stated_preference_cont = row[stated_privacy_col_map['int']]
        elif wallet == 'electrum':
            stated_preference = max(row.am_privacy_int, row.am_privacy_gov)
            stated_preference_cont = (row[stated_privacy_col_map['gov']] + row[stated_privacy_col_map['int']]) / 2
        else:
            raise NotImplemented(wallet)
        to_add = {
            'uid': row.uid,
            'wallet': wallet,
            'position': r - 1,
            # 'selected': row[wallet],
            'selected': row['walletx2'].lower() == wallet,
            'stated_public': row.am_privacy_peers,
            'stated_intermediary': row.am_privacy_int,
            'stated_gov': row.am_privacy_gov,
            'stated_preference': stated_preference,
            'stated_preference_cont': stated_preference_cont,
            'rmatrix': row.rmatrix,
        }
        long_df[len(long_df)] = to_add

long_df = pd.DataFrame.from_dict(long_df).T

for c in ['selected', 'position', 'stated_preference', 'stated_preference_cont']:
    long_df[c] = long_df[c].astype(float)

long_df['stated_preference_cont'] = long_df['stated_preference_cont'].fillna(long_df.stated_preference_cont.mean())

long_df['wallet_type'] = long_df.wallet.apply(lambda v: 'Bank-Like Wallet' if v in ['coinbase', 'circle'] else ['Open-Source Wallet']) 
long_df['bank_like'] = (long_df.wallet_type == 'Bank-Like Wallet').astype(int)

In [None]:
def choice_probs(d, mod):
    exps = np.exp(d[mod.params.index] @ mod.params)
    return exps / exps.sum()

In [None]:
mods = []
long_df['const'] = 1
mod = sm.ConditionalLogit(endog=long_df['selected'], exog=long_df[['bank_like']], groups=long_df['uid']).fit(cov_type='hc3')
mods.append(mod)

mod = sm.ConditionalLogit(endog=long_df['selected'], exog=long_df[['bank_like', 'position']], groups=long_df['uid']).fit(cov_type='hc3')
actual_choice_probs = []
counter_choice_probs = []
assert long_df.position.min() == 0 and long_df.position.max() == 3
for _, tmp in long_df.groupby('uid'):
    assert tmp.position.iloc[0] == 0
    assert tmp.position.iloc[-1] == 3
    exps = np.exp(tmp[['bank_like', 'position']] @ mod.params)
    actual_choice_probs.append(choice_probs(d=tmp, mod=mod).iloc[0])
    tmp = tmp.copy()
    tmp.loc[tmp.index[0], 'position'] = 3
    tmp.loc[tmp.index[-1], 'position'] = 0
    assert len(tmp.position.unique()) == len(tmp.position)
    counter_choice_probs.append(choice_probs(d=tmp, mod=mod).iloc[0])
effect = np.mean(actual_choice_probs) - np.mean(counter_choice_probs)
print('Effect of moving from last position to first: {0:.1f} pp'.format(100*effect))
mods.append(mod)
mod.summary()

long_df['position_stated_preference'] = (long_df.position * long_df.stated_preference_cont).astype(float)
mod = sm.ConditionalLogit(endog=long_df['selected'], exog=long_df[['bank_like', 'position', 'stated_preference_cont', 'position_stated_preference']], groups=long_df['uid']).fit(cov_type='hc3')
actual_choice_probs = []
counter_choice_probs = []
for _, tmp in long_df.groupby('uid'):
    cps = choice_probs(d=tmp, mod=mod)
    for ix in range(len(tmp)):
        actual_choice_probs.append(cps.iloc[ix])
        tmp_inner = tmp.copy()
        tmp_inner.loc[tmp_inner.index[ix], 'stated_preference_cont'] = long_df['stated_preference_cont'].max()
        counter_choice_probs.append(choice_probs(d=tmp_inner, mod=mod).iloc[ix])
mods.append(mod)
effect = np.mean(counter_choice_probs) - np.mean(actual_choice_probs)
print('Effect of stated preference: {0:.1f} pp'.format(100*effect))
print(long_df.groupby('uid').stated_preference.mean().describe())

long_df['position_rmatrix'] = (long_df.position * long_df.rmatrix).astype(float)
long_df['stated_preference_rmatrix'] = (long_df.stated_preference_cont * long_df.rmatrix).astype(float)
long_df['bank_like_rmatrix'] = (long_df.rmatrix * long_df.bank_like).astype(float)
mod = sm.ConditionalLogit(endog=long_df['selected'], exog=long_df[['bank_like', 'position', 'bank_like_rmatrix', 'position_rmatrix', 'position_stated_preference', 'stated_preference_cont', 'stated_preference_rmatrix']], groups=long_df['uid']).fit(cov_type='hc3')
preferred_mod = mod
mods.append(mod)
mod.summary()

In [None]:
varlabels = {
    'C(wallet)[blockchain]': 'Blockchain.info',
    'C(wallet)[circle]': 'Circle',
    'C(wallet)[coinbase]': 'Coinbase',
    'C(wallet)[electrum]': 'Electrum',
    'max_priv_public': 'Max Privacy Public',
    'max_priv_gov': 'Max Privacy Government',
    'max_priv_int': 'Max Privacy Intermediary',
    'bank_like': 'Bank-Like Wallet',
    'position': 'Wallet Order',
    'C(position)[T.2.0]': 'Rank: 2',
    'C(position)[T.3.0]': 'Rank: 3',
    'C(position)[T.4.0]': 'Rank: 4',
    'stated_preference': 'Stated Preference',
    'stated_preference_cont': 'Stated Preference',
    'position_stated_preference': 'Stated Preference $\\times$ Wallet Order',
    'C(rmatrix)[T.1]': 'Increased Transparency',
    'position_rmatrix': 'Wallet Order $\\times$ Increased Transparency',
    'stated_preference_rmatrix': 'Stated Preference $\\times$ Increased Transparency',    
    'bank_like_rmatrix': 'Bank-Like Wallet $\\times$ Increased Transparency',    
}

# write regression table
fn = os.path.join(dir_main, 'choice_model.tex')
pystout(
    models=mods,
    file=fn,
    digits=3,
    varlabels=varlabels,
    modstat={'nobs': 'Observations', 'rsquared': 'R-Squared'},
    stars={0.1: '*', 0.05: '**', 0.01: '***'},
)
clean_reg_table(fn, split_rows=True, multicol_width='4cm', verbose=True)

In [None]:
# calculate difference in choice probability from an algorithm that maximizes stated privacy preferences
actual_prob_max = []
counter_prob_max = []
counter_prob_min = []
variation_in_stated = []

for _, tmp in tqdm.tqdm(long_df.groupby('uid')):
    max_ixs = tmp.stated_preference_cont == tmp.stated_preference_cont.max()
    variation_in_stated.append(len(tmp.stated_preference_cont.unique()) > 1)

    tmp['cps'] = choice_probs(d=tmp, mod=preferred_mod)
    assert np.abs(tmp.cps.sum() - 1) < 0.000001, tmp.cps.sum()
    actual_prob_max.append(tmp.loc[max_ixs].cps.sum())
    tmp = tmp.copy()
    max_stated = 0
    min_stated = 100000
    for new_rank in itertools.permutations(tmp.position):
        tmp['position'] = new_rank
        tmp['cps'] = choice_probs(d=tmp, mod=preferred_mod)
        max_prob = tmp.loc[max_ixs].cps.sum()
        if max_prob > max_stated:
            max_stated = max_prob
        if max_prob < min_stated:
            min_stated = max_prob
    counter_prob_max.append(max_stated)
    counter_prob_min.append(min_stated)
print('Prob selecting max stated. Actual: {0:.3f}, Max: {1:.3f}, Min: {2:.3f}'.format(np.mean(actual_prob_max), np.mean(counter_prob_max), np.mean(counter_prob_min)))
print('Among those with variation in stated. Actual: {0:.3f}, Max: {1:.3f}, Min: {2:.3f}'.format(
    np.mean([actual_prob_max[ix] for ix, el in enumerate(variation_in_stated) if el]), 
    np.mean([counter_prob_max[ix] for ix, el in enumerate(variation_in_stated) if el]),
    np.mean([counter_prob_min[ix] for ix, el in enumerate(variation_in_stated) if el]),
))

# Effects of small talk

In [None]:
mods = []
mods_robust = []
endog_names = []
groups = ['public', 'intermediary', 'gov']
varlabels = {}
mgroups = {}
notes = {'Average of DV': []}
notes_robust = {'Average of DV': []}

for g in groups:
    df['best_n1st'] = df['best_priv_{0}_n1st'.format(g.replace('intermediary', 'int'))]
    df['am_stated'] = df['am_privacy_{0}'.format(g.replace('public', 'peers').replace('intermediary', 'int'))]
    df['stated_cont'] = df[stated_privacy_col_map[g.replace('public', 'peers').replace('intermediary', 'int')]].copy()

    colname = '\\centering Remove Visibility from {0}'.format(g.replace('gov', 'government').title())
    formula = 'es_{0} ~ 1 + rpgpextra'.format(g[0:3])
    mod = smf.ols(formula, data=df).fit(cov_type='hc3')
    mods.append(mod)
    notes['Average of DV'].append('{0:.3f}'.format(df['es_{0}'.format(g[0:3])].mean()))

    if g == 'public':
        reg_df = df.loc[~pd.isna(df.es_gov)]
        mod = smf.ols(formula, data=reg_df).fit(cov_type='hc3')
        mods_robust.append(mod)
        notes_robust['Average of DV'].append('{0:.3f}'.format(reg_df['es_{0}'.format(g[0:3])].mean()))        
    
    formula = 'es_{0} ~ 1 + rpgpextra*topcoder'.format(g[0:3])
    mod = smf.ols(formula, data=df).fit(cov_type='hc3')
    mods.append(mod)
    notes['Average of DV'].append('{0:.3f}'.format(df['es_{0}'.format(g[0:3])].mean()))

    if g == 'public':
        reg_df = df.loc[~pd.isna(df.es_gov)]
        mod = smf.ols(formula, data=reg_df).fit(cov_type='hc3')
        mods_robust.append(mod)
        notes_robust['Average of DV'].append('{0:.3f}'.format(reg_df['es_{0}'.format(g[0:3])].mean()))        

    df['stated'] = df.am_stated
    formula = 'es_{0} ~ 1 + rpgpextra*stated'.format(g[0:3])
    mod = smf.ols(formula, data=df).fit(cov_type='hc3')
    mods.append(mod)
    notes['Average of DV'].append('{0:.3f}'.format(df['es_{0}'.format(g[0:3])].mean()))

    if g == 'public':
        reg_df = df.loc[~pd.isna(df.es_gov)]
        mod = smf.ols(formula, data=reg_df).fit(cov_type='hc3')
        mods_robust.append(mod)
        notes_robust['Average of DV'].append('{0:.3f}'.format(reg_df['es_{0}'.format(g[0:3])].mean()))        

    mgroups[colname] = [len(mods) - 2, len(mods)]
    
    varlabels['rpgpextra'] = 'Encryption Randomization'
    varlabels['stated'] = 'High Stated Preference for Privacy'
    varlabels['topcoder'] = 'Top Coder'
    varlabels['rpgpextra:topcoder'] = 'Encryption Randomization $\\times$ Top Coder'
    varlabels['rpgpextra:stated'] = 'Encryption Randomization $\\times$ High Stated Preference for Priv.'

notes = dict(('\\hline\n' + k, notes[k]) for k in notes) 
notes_robust = dict(('\\hline\n' + k, notes_robust[k]) for k in notes_robust) 
mods

In [None]:
# write regression table
fn = os.path.join(dir_main, 'talk_te_table.tex')
pystout(
    models=mods,
    file=fn,
    digits=3,
    varlabels=varlabels,
    mgroups=mgroups,
    addrows=notes,
    modstat={'nobs': 'Observations', 'rsquared': 'R-Squared'},
    stars={0.1: '*', 0.05: '**', 0.01: '***'},
)
clean_reg_table(fn, split_rows=True, multicol_width='4cm')
with open(fn, 'r') as f:
    print(f.read())

In [None]:
fn = os.path.join(dir_main, 'talk_te_table_robust.tex')
pystout(
    models=mods_robust,
    file=fn,
    digits=3,
    varlabels=varlabels,
    addrows=notes_robust,
    mgroups={'\\centering Remove Visibility from Public': [1, 3]},
    modstat={'nobs': 'Observations', 'rsquared': 'R-Squared'},
    stars={0.1: '*', 0.05: '**', 0.01: '***'},
    #     addnotes=['*$p<0.1$; **$p<0.05$, ***$p<0.01$ '],
)
clean_reg_table(fn, split_rows=True, multicol_width='4cm')
with open(fn, 'r') as f:
    print(f.read())