In [1]:
import numpy as np
import os
from sklearn.metrics import mean_squared_error as mse_f
from scipy import sparse
from scipy.stats import gamma
from scipy.stats import ttest_ind
import warnings
import pandas as pd
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [8]:
paper_model_names = models = {'pif.z-only':'PIF-Item+Net',
          'pif.z-theta-joint':'PIF-Joint',
            'spf.main':'MSPF', 
          'unadjusted.main':'Unadjusted',
          'network_pref_only.main':'Network Only',
          'item_only.main':'PIF-Item',
         'no_unobs.main':'BD Adj.',
         'item_only_oracle.main':'PIF-Item (No region)'}

In [3]:
def print_table(exp_results, regimes, models, exps=10):
    ncols = len(regimes.keys())
    nrows = len(models)
    results=np.zeros((nrows, ncols))
    std = np.zeros((nrows, ncols))
    
    col_idx=0
    for regime, c in regimes.items():
        row_idx=0
        for model in models:
            mse = np.zeros(exps)
            for i in range(exps):
                try:
                    beta_predicted = exp_results[c][model][i][0]
                    truth = exp_results[c][model][i][1]  
                    sq_err = (beta_predicted - truth) ** 2
                    mse[i] = sq_err.mean()
                except:
                    print(model, 'exp',i,'not found')
            results[row_idx][col_idx]= "{:.2e}".format(mse.mean()) #round(mse.mean()*1000, 3)
            std[row_idx][col_idx]= round(mse.std()*1000,4)
            row_idx += 1
        col_idx += 1
    
    proper_names = [paper_model_names[m] for m in models]
    col_names = list(regimes.keys())
    df = pd.DataFrame(results, index=proper_names, columns=col_names, dtype=str)
    std_df = pd.DataFrame(std, index=proper_names, columns=col_names, dtype=str)
#     df = df + '$\pm$' + std_df
    return df

In [4]:
pokec_out = '../../out/pokec_low_inf'
exps = 10
embed='user'
models = ['unadjusted.main',
          'spf.main',
          'network_pref_only.main',
          'pif.z-theta-joint']

conf_types = ['homophily', 'exog', 'both']
confounding_strengths = [(50, 10), (50, 50), (50, 100)]
exp_results = {}

for i in range(1, exps+1):
    for model in models:
        for (cov1conf, cov2conf) in confounding_strengths:
            for ct in conf_types:
                try:
                    base_file_name = 'conf=' + str((cov1conf, cov2conf)) +';conf_type=' +ct + '.npz'
                    result_file = os.path.join(pokec_out, str(i), model + '_model_fitted_params', base_file_name)
                    res = np.load(result_file)
                    params = res['fitted'] 
                    truth = res['true']

                    if (ct, (cov1conf,cov2conf)) in exp_results:
                        if model in exp_results[(ct, (cov1conf,cov2conf))]:
                            exp_results[(ct, (cov1conf,cov2conf))][model].append((params, truth))
                        else:
                            exp_results[(ct, (cov1conf,cov2conf))][model]= [(params, truth)]
                    else:
                        exp_results[(ct, (cov1conf,cov2conf))] = {model:[(params, truth)]}
                except:
                    print(result_file,' not found')

In [5]:
confounding_type='exog'
models = list(exp_results[(confounding_type, confounding_strengths[1])].keys())
regime1 = {'Low':(confounding_type, confounding_strengths[0]), 
           'Med.':(confounding_type, confounding_strengths[1]), 
           'High':(confounding_type, confounding_strengths[2])}

df1 = print_table(exp_results, regime1, models)
print(df1.to_markdown())

|                       |      Low |     Med. |     High |
|:----------------------|---------:|---------:|---------:|
| Unadjusted            | 0.00142  | 0.00186  | 0.00204  |
| MSPF                  | 5.36e-05 | 0.000106 | 8.91e-05 |
| Network Only          | 0.000278 | 0.000586 | 0.00062  |
| PIF-Item+Net+Purchase | 2.55e-05 | 4.56e-05 | 6.33e-05 |


In [6]:
confounding_type='homophily'
models = list(exp_results[(confounding_type, confounding_strengths[0])].keys())
regime1 = {'Low':(confounding_type, confounding_strengths[0]), 
           'Med.':(confounding_type, confounding_strengths[1]), 
           'High':(confounding_type, confounding_strengths[2])}

df2= print_table(exp_results, regime1, models)
print(df2.to_markdown())

|                       |      Low |     Med. |     High |
|:----------------------|---------:|---------:|---------:|
| Unadjusted            | 0.00174  | 0.00217  | 0.00222  |
| MSPF                  | 0.000135 | 0.000198 | 0.000219 |
| Network Only          | 0.000526 | 0.000828 | 0.000888 |
| PIF-Item+Net+Purchase | 9.59e-05 | 0.000138 | 0.000135 |


In [9]:
confounding_type='both'
models = list(exp_results[(confounding_type, confounding_strengths[0])].keys())
regime1 = {'Low':(confounding_type, confounding_strengths[0]), 
           'Med.':(confounding_type, confounding_strengths[1]), 
           'High':(confounding_type, confounding_strengths[2])}

df3 = print_table(exp_results, regime1, models)
print(df3.to_markdown())

|              |      Low |     Med. |     High |
|:-------------|---------:|---------:|---------:|
| Unadjusted   | 0.00209  | 0.00232  | 0.00241  |
| MSPF         | 9.33e-05 | 0.00014  | 0.000139 |
| Network Only | 0.000347 | 0.000521 | 0.000562 |
| PIF-Joint    | 7.56e-05 | 9.48e-05 | 0.000126 |


In [21]:
all_results = pd.concat([df1, df2, df3], axis=1)
all_results

Unnamed: 0,Low,Med.,High,Low.1,Med..1,High.1,Low.2,Med..2,High.2
Unadjusted,0.00142,0.00186,0.00204,0.00174,0.00217,0.00222,0.00209,0.00232,0.00241
MSPF,5.36e-05,0.000106,8.91e-05,0.000135,0.000198,0.000219,9.33e-05,0.00014,0.000139
Network Only,0.000278,0.000586,0.00062,0.000526,0.000828,0.000888,0.000347,0.000521,0.000562
PIF-Item+Net+Purchase,2.55e-05,4.56e-05,6.33e-05,9.59e-05,0.000138,0.000135,7.56e-05,9.48e-05,0.000126
