In [1]:
import numpy as np
import os
from sklearn.metrics import mean_squared_error as mse_f
from scipy import sparse
from scipy.stats import gamma
from scipy.stats import ttest_ind
import warnings
import pandas as pd
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [2]:
paper_model_names = models = {'pif.z-only':'PIF-Item+Net',
          'pif.z-theta-joint':'PIF-Item+Net+Purchase',
            'spf.main':'MSPF', 
          'unadjusted.main':'Unadjusted',
          'network_pref_only.main':'Network Only',
          'item_only.main':'PIF-Item',
         'no_unobs.main':'BD Adj.',
         'item_only_oracle.main':'PIF-Item (No region)'}

In [3]:
def print_table(exp_results, regimes, models, exps=10):
    ncols = len(regimes.keys())
    nrows = len(models)
    results=np.zeros((nrows, ncols))
    std = np.zeros((nrows, ncols))
    
    col_idx=0
    for regime, c in regimes.items():
        row_idx=0
        for model in models:
            mse = np.zeros(exps)
            for i in range(exps):
                try:
                    beta_predicted = exp_results[c][model][i][0]
                    truth = exp_results[c][model][i][1]  
                    sq_err = (beta_predicted - truth) ** 2
                    mse[i] = sq_err.mean()
                except:
                    print(model, 'exp',i,'not found')
            results[row_idx][col_idx]=round(mse.mean()*1000, 2)
            std[row_idx][col_idx]=round(mse.std()*1000,2)
            row_idx += 1
        col_idx += 1
    
    proper_names = [paper_model_names[m] for m in models]
    col_names = list(regimes.keys())
    df = pd.DataFrame(results, index=proper_names, columns=col_names, dtype=str)
    std_df = pd.DataFrame(std, index=proper_names, columns=col_names, dtype=str)
    df = df + '$\pm$' + std_df
    return df

In [4]:
pokec_out = '../../out/pokec_paper_results'
exps = 10
embed='user'
models = ['unadjusted.main',
          'spf.main',
          'network_pref_only.main',
          'pif.z-theta-joint']

conf_types = ['homophily', 'exog', 'both']
confounding_strengths = [(50, 10), (50, 50), (50, 100)]
exp_results = {}

for i in range(1, exps+1):
    for model in models:
        for (cov1conf, cov2conf) in confounding_strengths:
            for ct in conf_types:
                try:
                    base_file_name = 'conf=' + str((cov1conf, cov2conf)) +';conf_type=' +ct + '.npz'
                    result_file = os.path.join(pokec_out, str(i), model + '_model_fitted_params', base_file_name)
                    res = np.load(result_file)
                    params = res['fitted'] 
                    truth = res['true']

                    if (ct, (cov1conf,cov2conf)) in exp_results:
                        if model in exp_results[(ct, (cov1conf,cov2conf))]:
                            exp_results[(ct, (cov1conf,cov2conf))][model].append((params, truth))
                        else:
                            exp_results[(ct, (cov1conf,cov2conf))][model]= [(params, truth)]
                    else:
                        exp_results[(ct, (cov1conf,cov2conf))] = {model:[(params, truth)]}
                except:
                    print(result_file,' not found')

../../out/pokec_low_inf/1/spf.main_model_fitted_params/conf=(50, 10);conf_type=homophily.npz  not found
../../out/pokec_low_inf/1/spf.main_model_fitted_params/conf=(50, 10);conf_type=exog.npz  not found
../../out/pokec_low_inf/1/spf.main_model_fitted_params/conf=(50, 10);conf_type=both.npz  not found
../../out/pokec_low_inf/1/spf.main_model_fitted_params/conf=(50, 50);conf_type=homophily.npz  not found
../../out/pokec_low_inf/1/spf.main_model_fitted_params/conf=(50, 50);conf_type=exog.npz  not found
../../out/pokec_low_inf/1/spf.main_model_fitted_params/conf=(50, 100);conf_type=homophily.npz  not found
../../out/pokec_low_inf/1/spf.main_model_fitted_params/conf=(50, 100);conf_type=exog.npz  not found
../../out/pokec_low_inf/1/spf.main_model_fitted_params/conf=(50, 100);conf_type=both.npz  not found
../../out/pokec_low_inf/1/network_pref_only.main_model_fitted_params/conf=(50, 10);conf_type=homophily.npz  not found
../../out/pokec_low_inf/1/network_pref_only.main_model_fitted_params/con

In [None]:
confounding_type='exog'
models = list(exp_results[(confounding_type, confounding_strengths[1])].keys())
regime1 = {'Low':(confounding_type, confounding_strengths[0]), 
           'Med.':(confounding_type, confounding_strengths[1]), 
           'High':(confounding_type, confounding_strengths[2])}

df1 = print_table(exp_results, regime1, models)

In [None]:
confounding_type='homophily'
models = list(exp_results[(confounding_type, confounding_strengths[0])].keys())
regime1 = {'Low':(confounding_type, confounding_strengths[0]), 
           'Med.':(confounding_type, confounding_strengths[1]), 
           'High':(confounding_type, confounding_strengths[2])}

df2= print_table(exp_results, regime1, models)

In [None]:
confounding_type='both'
models = list(exp_results[(confounding_type, confounding_strengths[0])].keys())
regime1 = {'Low':(confounding_type, confounding_strengths[0]), 
           'Med.':(confounding_type, confounding_strengths[1]), 
           'High':(confounding_type, confounding_strengths[2])}

df3 = print_table(exp_results, regime1, models)

In [9]:
all_results = pd.concat([df1, df2, df3], axis=1)
all_results

Unnamed: 0,Low,Med.,High,Low.1,Med..1,High.1,Low.2,Med..2,High.2
BD Adj.,0.17$\pm$0.07,0.17$\pm$0.16,0.2$\pm$0.17,0.29$\pm$0.06,0.27$\pm$0.08,0.25$\pm$0.06,0.13$\pm$0.06,0.13$\pm$0.09,0.12$\pm$0.06
Unadjusted,1.56$\pm$0.21,2.0$\pm$0.35,2.16$\pm$0.31,1.91$\pm$0.25,2.4$\pm$0.4,2.44$\pm$0.33,2.21$\pm$0.25,2.49$\pm$0.35,2.56$\pm$0.35
MSPF,0.53$\pm$0.42,0.56$\pm$0.35,1.1$\pm$1.55,0.66$\pm$0.38,0.67$\pm$0.37,0.76$\pm$0.48,0.55$\pm$0.34,0.62$\pm$0.43,1.5$\pm$2.67
Network Only,0.48$\pm$0.14,0.84$\pm$0.22,0.84$\pm$0.25,0.76$\pm$0.2,1.08$\pm$0.36,1.16$\pm$0.36,0.55$\pm$0.11,0.73$\pm$0.15,0.78$\pm$0.17
PIF-Item,0.17$\pm$0.11,0.15$\pm$0.12,0.21$\pm$0.12,0.28$\pm$0.08,0.38$\pm$0.17,0.48$\pm$0.23,0.24$\pm$0.12,0.21$\pm$0.12,0.23$\pm$0.12
PIF-Item+Net,0.31$\pm$0.2,0.31$\pm$0.22,0.35$\pm$0.18,0.43$\pm$0.2,0.53$\pm$0.22,0.5$\pm$0.23,0.32$\pm$0.21,0.4$\pm$0.2,0.37$\pm$0.2
PIF-Item+Net+Purchase,0.26$\pm$0.19,0.2$\pm$0.15,0.3$\pm$0.18,0.38$\pm$0.19,0.46$\pm$0.22,0.42$\pm$0.24,0.29$\pm$0.2,0.35$\pm$0.19,0.32$\pm$0.19
PIF-Item (No region),0.1$\pm$0.06,0.11$\pm$0.04,0.13$\pm$0.07,1.15$\pm$0.14,1.65$\pm$0.27,1.7$\pm$0.22,0.3$\pm$0.04,0.35$\pm$0.05,0.38$\pm$0.05
