In [None]:
import seaborn as sns
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels
import matplotlib.pyplot as plt
import dodiscover as dod
import hyppo
import scipy as sp
import sklearn as sk
from sims import *
import dask
from dask.distributed import Client, progress
import dask.dataframe as ddf
import logging
import warnings
warnings.filterwarnings("ignore")
import contextlib

# Simulations

In [None]:
# docker run -ti --entrypoint /bin/bash -v /cis/home/ebridge2/Documents/research/hypo-repos/cdcorr:/cdcorr neurodata/cdcorr:0.0.1
# cd cdcorr/cdcorr/simulations

import seaborn as sns
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels
import matplotlib.pyplot as plt
import dodiscover as dod
import hyppo
import scipy as sp
import sklearn as sk
from sims import *
import dask
from dask.distributed import Client, progress
import dask.dataframe as ddf
import logging
import warnings
warnings.filterwarnings("ignore")
import contextlib

nbreaks = 8
nsamp = 100
plow = 10
phigh = 101
bhigh = 0.8
blow = 0.4
K = 3
nsims = 100
ncores = 50
Nrep = 1000
covar_sz = 1

def run_row(row, nrep=Nrep):
    if row["index"] % 100 == 0:
        print("Row {:d}...".format(row["index"]))
    fns_to_run = {"cMANOVA": cond_manova, "KernelCDTest" : codite, "cDCorr" : cond_dcorr, "DCorr" : dcorr}
    sim_fn = simulations[row["Setting"]]
    pvals = []
    stat_names = []
    if row["Setting"] == "K-Class":
        Y, T, X, _, _, _ = sim_fn(row["#Samples"], row["Dimensionality"],
                                  causal_effect_size=row["Effect Size"], covar_effect_size=covar_sz,
                                  balance=row["Balance"],
                                  K=3)
    else:
        Y, T, X, _, _, _ = sim_fn(row["#Samples"], row["Dimensionality"],
                                  causal_effect_size=row["Effect Size"], covar_effect_size=covar_sz,
                                  balance=row["Balance"])
    for statname, fn in fns_to_run.items():
        try:
            with contextlib.redirect_stderr(None):
                pval, _ = fn(Y, T, X, nrep=nrep)
                pvals.append(pval)
                stat_names.append(statname)
        except:
            pvals.append(float("NaN"))
    with contextlib.redirect_stdout(None):
        balanced_ids = causal_prep(X, T)
    X_bal = X[balanced_ids]; T_bal = T[balanced_ids]; Y_bal = Y[balanced_ids,:]
    for statname, fn in fns_to_run.items():
        try:
            with contextlib.redirect_stderr(None):
                    pval, _ = fn(Y_bal, T_bal, X_bal, nrep=nrep)
                    pvals.append(pval)
                    stat_names.append("Causal {:s}".format(statname))
        except:
            pvals.append(float("NaN"))
    return tuple([row["Setting"], row["#Samples"], row["Dimensionality"], row["Balance"],
                  row["Effect Size"], row["i"], *pvals])

simulations = {
    "Non-Monotone" : nonmonotonic_sim_cate,
    "Sigmoidal Rotation": sigmoidal_sim_cate,
    "K-Class Rotation": kclass_rotation_cate
}

exps = []
counter = 0
for sim in simulations.keys():
    print(sim)
    for p in [plow, phigh]:
        for balance in [blow, bhigh]:
            for eff_sz in np.linspace(0, 2, nbreaks):
                for i in range(nsims):
                    exps.append([sim, nsamp, p, balance, eff_sz, i, counter])
                    counter = counter + 1

sim_exps = pd.DataFrame(exps, columns=["Setting", "#Samples", "Dimensionality", "Balance", "Effect Size", "i", "index"])
print(sim_exps.head(n=10))
print(sim_exps.shape)

client = Client(threads_per_worker=1, n_workers=ncores, silence_logs=logging.ERROR)
print(client)

sim_exps = ddf.from_pandas(sim_exps, npartitions=ncores)
sim_results = sim_exps.apply(lambda x: run_row(x), axis=1, result_type='expand',
                             meta={0: str, 1: int, 2: int, 3: float, 4: float,
                                   5: int, 6: float, 7: float, 8: float, 9: float, 10: float, 11: float,
                                   12: float, 13: float})

sim_results = sim_results.compute(scheduler="multiprocessing")
# io43 = "Linear"
# io44 = "Sigmoidal"
# io46 = "K-Class"

In [None]:
client = Client(threads_per_worker=1, n_workers=ncores, silence_logs=logging.ERROR)
client

In [None]:
sim_exps = ddf.from_pandas(sim_exps, npartitions=ncores)
sim_results = sim_exps.apply(lambda x: run_row(x), axis=1, result_type='expand',
                             meta={0: str, 1: int, 2: int, 3: float, 4: float,
                                   5: int, 6: float, 7: float, 8: float, 9: float, 10: float, 11: float,
                                   12: float, 13: float})

In [None]:
sim_results = sim_results.compute(scheduler="multiprocessing")

In [None]:
sim_results = sim_results.rename(columns={0: "Simulation", 1: "#Samples", 2: "Dimensionality", 3: "Balance",
                                          4: "Effect Size", 5: "i", 6: "cMANOVA", 7: "CoDITE", 8: "cDCorr",
                                          9: "DCorr", 10: "Causal cMANOVA", 11: "Causal CoDITE", 12: "Causal cDCorr",
                                          13: "Causal DCorr"})
sim_results.to_pickle('./data/sim_results_covarfixed_sig.pkl')

In [None]:
sim_results