In [1]:
import numpy as np
import os
from sklearn.metrics import mean_squared_error as mse_f
from scipy import sparse
from scipy.stats import gamma
from scipy.stats import ttest_ind
import warnings
import pandas as pd

warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)


In [2]:
paper_model_names = models = {
    "dsbmm_dpf.z-only": "DPIF-DD-Topic+Net",
    "dsbmm_dpf.z-theta-joint": "DPIF-DD-Topic+Net+Pubs",
    # 'spf.main':'MSPF',
    "unadjusted.main": "Unadjusted",
    "network_pref_only.main": "Network Only",
    "topic_only.main": "DPIF-DD-Topic",
    "no_unobs.main": "Oracle",
    "topic_only_oracle.main": "DPIF-Topic (No region)",
}


In [3]:
tmp_kv = tuple(paper_model_names.items())
for k, v in tmp_kv:
    if "dsbmm_dpf" in k:
        paper_model_names[k + "-ndc"] = v + " (No DC)"

tmp_dict = {
    k + "old_subs" + "_ewcnone" + "_rcolmain_adm1_1hot": v
    for k, v in paper_model_names.items()
    if "dsbmm_dpf" not in k
}
sub_choice_pretty = {"old_subs": " (prev. subs.) ", "upd_subs": " (subs. w. trans.) "}
reg_choice_pretty = {"adm1": "(ADM1)", "ctry": "(Ctry.)"}
for k, v in paper_model_names.items():
    if "dsbmm_dpf" in k:
        for sub_choice in ["old_subs", "upd_subs"]:
            for region in ["adm1", "ctry"]:
                tmp_dict[k + f"{sub_choice}_ewcnone_rcolmain_{region}_1hot"] = (
                    v + sub_choice_pretty[sub_choice] + reg_choice_pretty[region]
                )


In [4]:
full_paper_model_names = tmp_dict

In [77]:
def print_table(exp_results, regimes, models, exps=10, print_notfound=False):
    ncols = len(regimes.keys())
    nrows = len(models)
    results = np.zeros((nrows, ncols))
    std = np.zeros((nrows, ncols))
    alt_results = np.zeros((nrows, ncols))
    alt_std = np.zeros((nrows, ncols))

    col_idx = 0
    for regime, c in regimes.items():
        row_idx = 0
        for model in models:
            mse = np.zeros((exps,4))
            for i in range(exps):
                try:
                    beta_predicted = exp_results[c][model][i][0]
                    truth = exp_results[c][model][i][1]
                    sq_err = (beta_predicted - truth) ** 2
                    mse[i] = sq_err.mean(axis=0)
                except:
                    if print_notfound:
                        print(model, "exp", i, "not found")
            results[row_idx][col_idx] = round(mse.mean() * 1000, 2)
            std[row_idx][col_idx] = round(mse.std() * 1000, 2)
            
            alt_results[row_idx][col_idx] = round(mse[:,:-1].mean() * 1000, 2)
            alt_std[row_idx][col_idx] = round(mse[:,:-1].std() * 1000, 2)
            
            row_idx += 1
        col_idx += 1

    proper_names = [full_paper_model_names[m] for m in models]
    col_names = list(regimes.keys())
    df = pd.DataFrame(results, index=proper_names, columns=col_names, dtype=str)
    std_df = pd.DataFrame(std, index=proper_names, columns=col_names, dtype=str)
    df = df + "$\pm$" + std_df
    
    alt_df = pd.DataFrame(alt_results, index=proper_names, columns=col_names, dtype=str)
    alt_std_df = pd.DataFrame(alt_std, index=proper_names, columns=col_names, dtype=str)
    alt_df = alt_df + "$\pm$" + alt_std_df
    return df, alt_df


### Load results

In [78]:
from pathlib import Path

res_dir = Path("/scratch/fitzgeraldj/data/caus_inf_data/results")
exps = 5
# embed = "user"
sub_choices = ["old_subs","upd_subs"] 
regions = ["adm1","ctry"]  # or "ctry" for dsbmm_dpf models
base_models = [
    "unadjusted.main",
    "network_pref_only.main",
    "topic_only.main",
    "no_unobs.main",
    "topic_only_oracle.main",
    "dsbmm_dpf.z-only",
    "dsbmm_dpf.z-theta-joint",
    "dsbmm_dpf.z-theta-joint-ndc",
]
models = [m + f"{sub_choice}_ewcnone_rcolmain_{region}_1hot" for m in base_models for sub_choice in sub_choices for region in regions]

conf_types = ["homophily", "exog", "both"]
confounding_strengths = [(50, 10), (50, 50), (50, 100)]
exp_results = {}
found = set()

for i in range(1, exps + 1):
    for model in models:
        for (cov1conf, cov2conf) in confounding_strengths:
            for ct in conf_types:
                try:
                    base_file_name = (
                        "conf="
                        + str((cov1conf, cov2conf))
                        + ";conf_type="
                        + ct
                        + ".npz"
                    )
                    result_file = (
                        (res_dir / str(i)) / (model + "_model_fitted_params")
                    ) / base_file_name
                    res = np.load(result_file)
                    params = res["fitted"]
                    truth = res["true"]

                    if (ct, (cov1conf, cov2conf)) in exp_results:
                        if model in exp_results[(ct, (cov1conf, cov2conf))]:
                            exp_results[(ct, (cov1conf, cov2conf))][model].append(
                                (params, truth)
                            )
                        else:
                            exp_results[(ct, (cov1conf, cov2conf))][model] = [
                                (params, truth)
                            ]
                    else:
                        exp_results[(ct, (cov1conf, cov2conf))] = {
                            model: [(params, truth)]
                        }
                    if model not in found:
                        print(model, "found")
                        found |= set([model])
                except:
                    # print(result_file, " not found")
                    continue


dsbmm_dpf.z-theta-jointold_subs_ewcnone_rcolmain_adm1_1hot found
dsbmm_dpf.z-theta-jointold_subs_ewcnone_rcolmain_ctry_1hot found
unadjusted.mainold_subs_ewcnone_rcolmain_adm1_1hot found
no_unobs.mainold_subs_ewcnone_rcolmain_adm1_1hot found
topic_only_oracle.mainold_subs_ewcnone_rcolmain_adm1_1hot found


In [79]:
# dsbmm_dpf.z-onlyupd_subs_ewcnone_rcolmain_adm1_1hot_model_fitted_params
found

{'dsbmm_dpf.z-theta-jointold_subs_ewcnone_rcolmain_adm1_1hot',
 'dsbmm_dpf.z-theta-jointold_subs_ewcnone_rcolmain_ctry_1hot',
 'no_unobs.mainold_subs_ewcnone_rcolmain_adm1_1hot',
 'topic_only_oracle.mainold_subs_ewcnone_rcolmain_adm1_1hot',
 'unadjusted.mainold_subs_ewcnone_rcolmain_adm1_1hot'}

### Visualize results

In [80]:
confounding_type = "exog"
models = list(exp_results[(confounding_type, confounding_strengths[1])].keys())
regime1 = {
    "Low": (confounding_type, confounding_strengths[0]),
    "Med.": (confounding_type, confounding_strengths[1]),
    "High": (confounding_type, confounding_strengths[2]),
}

df1,alt_df1 = print_table(exp_results, regime1, models)


In [81]:
confounding_type = "homophily"
models = list(exp_results[(confounding_type, confounding_strengths[0])].keys())
regime1 = {
    "Low": (confounding_type, confounding_strengths[0]),
    "Med.": (confounding_type, confounding_strengths[1]),
    "High": (confounding_type, confounding_strengths[2]),
}

df2,alt_df2 = print_table(exp_results, regime1, models)


In [82]:
confounding_type = "both"
models = list(exp_results[(confounding_type, confounding_strengths[0])].keys())
regime1 = {
    "Low": (confounding_type, confounding_strengths[0]),
    "Med.": (confounding_type, confounding_strengths[1]),
    "High": (confounding_type, confounding_strengths[2]),
}

df3,alt_df3 = print_table(exp_results, regime1, models)


In [99]:
all_results = pd.concat([df1, df2, df3], axis=1, keys=["Exog.", "Homophily", "Both"])


In [100]:
all_results[all_results == all_results.min(axis=0)] = '\textbf{' + all_results[all_results == all_results.min(axis=0)] + '}'

In [101]:
all_results.to_latex('./results/semi-synth.tex',escape=False)

In [85]:
alt_all_results = pd.concat([alt_df1, alt_df2, alt_df3], axis=1, keys=["Exog.", "Homophily", "Both"])
alt_all_results


Unnamed: 0_level_0,Exog.,Exog.,Exog.,Homophily,Homophily,Homophily,Both,Both,Both
Unnamed: 0_level_1,Low,Med.,High,Low,Med.,High,Low,Med.,High
DPIF-DD-Topic+Net+Pubs (prev. subs.) (ADM1),0.8$\pm$2.22,1.24$\pm$3.68,1.15$\pm$2.39,5.86$\pm$11.87,4.45$\pm$8.62,4.3$\pm$8.28,6.23$\pm$12.32,4.82$\pm$9.2,4.96$\pm$9.38
DPIF-DD-Topic+Net+Pubs (prev. subs.) (Ctry.),0.81$\pm$2.26,1.24$\pm$3.76,1.07$\pm$2.4,6.56$\pm$12.94,4.47$\pm$8.64,4.31$\pm$8.31,6.23$\pm$12.34,4.82$\pm$9.19,4.97$\pm$9.38
Unadjusted,0.07$\pm$0.22,0.11$\pm$0.34,0.1$\pm$0.33,1.43$\pm$5.4,1.44$\pm$5.46,1.18$\pm$4.13,2.64$\pm$11.58,1.71$\pm$6.77,1.42$\pm$5.3
Oracle,0.03$\pm$0.1,0.03$\pm$0.11,0.03$\pm$0.12,2.03$\pm$10.8,1.26$\pm$6.68,0.63$\pm$3.35,2.62$\pm$13.92,1.27$\pm$6.75,0.87$\pm$4.6
DPIF-Topic (No region),0.01$\pm$0.03,0.01$\pm$0.04,0.01$\pm$0.04,1.86$\pm$9.26,1.31$\pm$6.08,0.9$\pm$3.69,2.49$\pm$12.63,1.75$\pm$8.41,1.08$\pm$4.68


In [96]:
alt_all_results[alt_all_results == alt_all_results.min(axis=0)] = '\textbf{' + alt_all_results[alt_all_results == alt_all_results.min(axis=0)] + '}'

In [97]:
alt_all_results

Unnamed: 0_level_0,Exog.,Exog.,Exog.,Homophily,Homophily,Homophily,Both,Both,Both
Unnamed: 0_level_1,Low,Med.,High,Low,Med.,High,Low,Med.,High
DPIF-DD-Topic+Net+Pubs (prev. subs.) (ADM1),0.8$\pm$2.22,1.24$\pm$3.68,1.15$\pm$2.39,5.86$\pm$11.87,4.45$\pm$8.62,4.3$\pm$8.28,6.23$\pm$12.32,4.82$\pm$9.2,4.96$\pm$9.38
DPIF-DD-Topic+Net+Pubs (prev. subs.) (Ctry.),0.81$\pm$2.26,1.24$\pm$3.76,1.07$\pm$2.4,6.56$\pm$12.94,4.47$\pm$8.64,4.31$\pm$8.31,6.23$\pm$12.34,4.82$\pm$9.19,4.97$\pm$9.38
Unadjusted,0.07$\pm$0.22,0.11$\pm$0.34,0.1$\pm$0.33,\textbf{1.43$\pm$5.4},1.44$\pm$5.46,1.18$\pm$4.13,2.64$\pm$11.58,1.71$\pm$6.77,1.42$\pm$5.3
Oracle,0.03$\pm$0.1,0.03$\pm$0.11,0.03$\pm$0.12,2.03$\pm$10.8,\textbf{1.26$\pm$6.68},\textbf{0.63$\pm$3.35},2.62$\pm$13.92,\textbf{1.27$\pm$6.75},\textbf{0.87$\pm$4.6}
DPIF-Topic (No region),\textbf{0.01$\pm$0.03},\textbf{0.01$\pm$0.04},\textbf{0.01$\pm$0.04},1.86$\pm$9.26,1.31$\pm$6.08,0.9$\pm$3.69,\textbf{2.49$\pm$12.63},1.75$\pm$8.41,1.08$\pm$4.68


In [98]:
alt_all_results.to_latex('./results/alt-semi-synth.tex',escape=False)

In [76]:
import pickle
# load up PPC results
with open(res_dir / "dsbmm_ppc_results.pkl", "rb") as f:
    dsbmm_ppc_results = pickle.load(f)
with open(res_dir / "dpf_ppc_results.pkl", "rb") as f:
    dpf_ppc_results = pickle.load(f)
with open(res_dir / "dpf_auc_results.pkl", "rb") as f:
    dpf_auc_results = pickle.load(f)

FileNotFoundError: [Errno 2] No such file or directory: '/scratch/fitzgeraldj/data/caus_inf_data/results/dsbmm_ppc_results.pkl'