In [1]:
import numpy as np
import os
from sklearn.metrics import mean_squared_error as mse_f
from scipy import sparse
from scipy.stats import gamma
from scipy.stats import ttest_ind
import warnings
import pandas as pd
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [2]:
paper_model_names = models = {'pif.z-only':'PIF-Item+Net',
          'pif.z-theta-joint':'PIF-Joint',
            'spf.main':'MSPF', 
          'unadjusted.main':'Unadjusted',
          'network_pref_only.main':'Network Only',
          'item_only.main':'PIF-Item',
         'no_unobs.main':'BD Adj.',
         'item_only_oracle.main':'PIF-Item (No region)'}

In [22]:
def print_table(exp_results, regimes, models, exps=10):
    ncols = len(regimes.keys())
    nrows = len(models)
    results=np.zeros((nrows, ncols))
    std = np.zeros((nrows, ncols))
    
    col_idx=0
    for regime, c in regimes.items():
        row_idx=0
        for model in models:
            mse = np.zeros(exps)
            for i in range(exps):
                try:
                    beta_predicted = exp_results[c][model][i][0]
                    truth = exp_results[c][model][i][1]  
                    sq_err = (beta_predicted - truth) ** 2
                    mse[i] = sq_err.mean()
                except:
                    print(model, 'exp',i,'not found')
            results[row_idx][col_idx]= "{:.2e}".format(mse.mean()) #round(mse.mean()*1000, 3)
            std[row_idx][col_idx]= round(mse.std(),4)
            row_idx += 1
        col_idx += 1
    
    proper_names = [paper_model_names[m] for m in models]
    col_names = list(regimes.keys())
    df = pd.DataFrame(results, index=proper_names, columns=col_names, dtype=str)
    std_df = pd.DataFrame(std, index=proper_names, columns=col_names, dtype=str)
    df = df + '$\pm$' + std_df
    return df

### Load results

In [23]:
pokec_out = '../../out/pokec_low_inf'
exps = 10
embed='user'
models = ['unadjusted.main',
          'spf.main',
          'network_pref_only.main',
          'pif.z-theta-joint']

conf_types = ['homophily', 'exog', 'both']
confounding_strengths = [(50, 10), (50, 50), (50, 100)]
exp_results = {}

for i in range(1, exps+1):
    for model in models:
        for (cov1conf, cov2conf) in confounding_strengths:
            for ct in conf_types:
                try:
                    base_file_name = 'conf=' + str((cov1conf, cov2conf)) +';conf_type=' +ct + '.npz'
                    result_file = os.path.join(pokec_out, str(i), model + '_model_fitted_params', base_file_name)
                    res = np.load(result_file)
                    params = res['fitted'] 
                    truth = res['true']

                    if (ct, (cov1conf,cov2conf)) in exp_results:
                        if model in exp_results[(ct, (cov1conf,cov2conf))]:
                            exp_results[(ct, (cov1conf,cov2conf))][model].append((params, truth))
                        else:
                            exp_results[(ct, (cov1conf,cov2conf))][model]= [(params, truth)]
                    else:
                        exp_results[(ct, (cov1conf,cov2conf))] = {model:[(params, truth)]}
                except:
                    print(result_file,' not found')

### Confounding bias from per-item confounders only

In [26]:
confounding_type='exog'
models = list(exp_results[(confounding_type, confounding_strengths[1])].keys())
regime1 = {'Low':(confounding_type, confounding_strengths[0]), 
           'Med.':(confounding_type, confounding_strengths[1]), 
           'High':(confounding_type, confounding_strengths[2])}

df1 = print_table(exp_results, regime1, models)
df1

Unnamed: 0,Low,Med.,High
Unadjusted,0.00142$\pm$0.0002,0.00186$\pm$0.0002,0.00203$\pm$0.0003
MSPF,5.12e-05$\pm$0.0,0.000107$\pm$0.0001,8.86e-05$\pm$0.0
Network Only,0.000278$\pm$0.0001,0.000584$\pm$0.0001,0.000623$\pm$0.0002
PIF-Joint,2.64e-05$\pm$0.0,4.61e-05$\pm$0.0,6.48e-05$\pm$0.0


### Confounding bias from per-person confounders only

In [25]:
confounding_type='homophily'
models = list(exp_results[(confounding_type, confounding_strengths[0])].keys())
regime1 = {'Low':(confounding_type, confounding_strengths[0]), 
           'Med.':(confounding_type, confounding_strengths[1]), 
           'High':(confounding_type, confounding_strengths[2])}

df2= print_table(exp_results, regime1, models)
df2

Unnamed: 0,Low,Med.,High
Unadjusted,0.00172$\pm$0.0002,0.00217$\pm$0.0003,0.00223$\pm$0.0003
MSPF,0.000139$\pm$0.0,0.000196$\pm$0.0001,0.000216$\pm$0.0001
Network Only,0.000502$\pm$0.0002,0.00083$\pm$0.0003,0.000888$\pm$0.0004
PIF-Joint,0.0001$\pm$0.0,0.000139$\pm$0.0,0.000134$\pm$0.0


### Confounding bias from both sources

In [27]:
confounding_type='both'
models = list(exp_results[(confounding_type, confounding_strengths[0])].keys())
regime1 = {'Low':(confounding_type, confounding_strengths[0]), 
           'Med.':(confounding_type, confounding_strengths[1]), 
           'High':(confounding_type, confounding_strengths[2])}

df3 = print_table(exp_results, regime1, models)
df3

Unnamed: 0,Low,Med.,High
Unadjusted,0.00209$\pm$0.0002,0.00232$\pm$0.0003,0.00241$\pm$0.0003
MSPF,9.39e-05$\pm$0.0,0.00014$\pm$0.0,0.000137$\pm$0.0
Network Only,0.000346$\pm$0.0001,0.00052$\pm$0.0001,0.000562$\pm$0.0001
PIF-Joint,7.59e-05$\pm$0.0,9.4e-05$\pm$0.0,0.000124$\pm$0.0


### Visualize all results

In [28]:
all_results = pd.concat([df1, df2, df3], axis=1)
all_results

Unnamed: 0,Low,Med.,High,Low.1,Med..1,High.1,Low.2,Med..2,High.2
Unadjusted,0.00142$\pm$0.0002,0.00186$\pm$0.0002,0.00203$\pm$0.0003,0.00172$\pm$0.0002,0.00217$\pm$0.0003,0.00223$\pm$0.0003,0.00209$\pm$0.0002,0.00232$\pm$0.0003,0.00241$\pm$0.0003
MSPF,5.12e-05$\pm$0.0,0.000107$\pm$0.0001,8.86e-05$\pm$0.0,0.000139$\pm$0.0,0.000196$\pm$0.0001,0.000216$\pm$0.0001,9.39e-05$\pm$0.0,0.00014$\pm$0.0,0.000137$\pm$0.0
Network Only,0.000278$\pm$0.0001,0.000584$\pm$0.0001,0.000623$\pm$0.0002,0.000502$\pm$0.0002,0.00083$\pm$0.0003,0.000888$\pm$0.0004,0.000346$\pm$0.0001,0.00052$\pm$0.0001,0.000562$\pm$0.0001
PIF-Joint,2.64e-05$\pm$0.0,4.61e-05$\pm$0.0,6.48e-05$\pm$0.0,0.0001$\pm$0.0,0.000139$\pm$0.0,0.000134$\pm$0.0,7.59e-05$\pm$0.0,9.4e-05$\pm$0.0,0.000124$\pm$0.0
