In [1]:
import seaborn as sns
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels
import matplotlib.pyplot as plt
import dodiscover as dod
import hyppo
import scipy as sp
import sklearn as sk
from sims import *
import dask
from dask.distributed import Client, progress
import dask.dataframe as ddf
import logging
import warnings
warnings.filterwarnings("ignore")
import contextlib

# Simulations

In [2]:
nbreaks = 8
nsamp = 100
plow = 10
phigh = 101
bhigh = 0.8
blow = 0.4
K = 3
nsims = 100
ncores = 10
Nrep = 1000

def run_row(row, nrep=Nrep):
    fns_to_run = {"cMANOVA": cond_manova, "CoDITE" : codite, "cDCorr" : cond_dcorr}
    sim_fn = simulations[row["Setting"]]
    pvals = []
    stat_names = []
    if row["Setting"] == "K-Class":
        Y, T, X, _, _, _ = sim_fn(row["#Samples"], row["Dimensionality"],
                                  causal_effect_size=row["Effect Size"], balance=row["Balance"],
                                  K=3)
    else:
        Y, T, X, _, _, _ = sim_fn(row["#Samples"], row["Dimensionality"],
                                  causal_effect_size=row["Effect Size"], balance=row["Balance"])
    for statname, fn in fns_to_run.items():
        try:
            pval, _ = fn(Y, T, X, nrep=nrep)
            pvals.append(pval)
            stat_names.append(statname)
        except:
            pvals.append(float("NaN"))
    with contextlib.redirect_stdout(None):
        balanced_ids = causal_prep(X, T)
    X_bal = X[balanced_ids]; T_bal = T[balanced_ids]; Y_bal = Y[balanced_ids,:]
    for statname, fn in fns_to_run.items():
        try:
            pval, _ = fn(Y_bal, T_bal, X_bal, nrep=nrep)
            pvals.append(pval)
            stat_names.append("Causal {:s}".format(statname))
        except:
            pvals.append(float("NaN"))
    return tuple([row["Setting"], row["#Samples"], row["Dimensionality"], row["Balance"],
                  row["Effect Size"], row["i"], *pvals])

simulations = {
    "Linear" : linear_sim,
    "Sigmoidal": sigmoidal_sim,
    "K-Class": kclass_sim
}

exps = []
for sim in simulations.keys():
    print(sim)
    for p in [plow, phigh]:
        for balance in [blow, bhigh]:
            for eff_sz in np.linspace(0, 2, nbreaks):
                for i in range(nsims):
                    exps.append([sim, nsamp, p, balance, eff_sz, i])

sim_exps = pd.DataFrame(exps, columns=["Setting", "#Samples", "Dimensionality", "Balance", "Effect Size", "i"])
print(sim_exps.head(n=10))
print(sim_exps.shape)

Linear
Sigmoidal
K-Class
  Setting  #Samples  Dimensionality  Balance  Effect Size  i
0  Linear       100              10      0.4          0.0  0
1  Linear       100              10      0.4          0.0  1
2  Linear       100              10      0.4          0.0  2
3  Linear       100              10      0.4          0.0  3
4  Linear       100              10      0.4          0.0  4
5  Linear       100              10      0.4          0.0  5
6  Linear       100              10      0.4          0.0  6
7  Linear       100              10      0.4          0.0  7
8  Linear       100              10      0.4          0.0  8
9  Linear       100              10      0.4          0.0  9
(9600, 6)


In [3]:
client = Client(threads_per_worker=1, n_workers=ncores, silence_logs=logging.ERROR)
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 10
Total threads: 10,Total memory: 32.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:59872,Workers: 10
Dashboard: http://127.0.0.1:8787/status,Total threads: 10
Started: Just now,Total memory: 32.00 GiB

0,1
Comm: tcp://127.0.0.1:59897,Total threads: 1
Dashboard: http://127.0.0.1:59904/status,Memory: 3.20 GiB
Nanny: tcp://127.0.0.1:59875,
Local directory: /var/folders/r_/8hcfnmrs7z5160l03b04bjpw0000gn/T/dask-worker-space/worker-aazfa7ez,Local directory: /var/folders/r_/8hcfnmrs7z5160l03b04bjpw0000gn/T/dask-worker-space/worker-aazfa7ez

0,1
Comm: tcp://127.0.0.1:59895,Total threads: 1
Dashboard: http://127.0.0.1:59899/status,Memory: 3.20 GiB
Nanny: tcp://127.0.0.1:59876,
Local directory: /var/folders/r_/8hcfnmrs7z5160l03b04bjpw0000gn/T/dask-worker-space/worker-v3hwa20s,Local directory: /var/folders/r_/8hcfnmrs7z5160l03b04bjpw0000gn/T/dask-worker-space/worker-v3hwa20s

0,1
Comm: tcp://127.0.0.1:59896,Total threads: 1
Dashboard: http://127.0.0.1:59901/status,Memory: 3.20 GiB
Nanny: tcp://127.0.0.1:59877,
Local directory: /var/folders/r_/8hcfnmrs7z5160l03b04bjpw0000gn/T/dask-worker-space/worker-7uriy1id,Local directory: /var/folders/r_/8hcfnmrs7z5160l03b04bjpw0000gn/T/dask-worker-space/worker-7uriy1id

0,1
Comm: tcp://127.0.0.1:59898,Total threads: 1
Dashboard: http://127.0.0.1:59906/status,Memory: 3.20 GiB
Nanny: tcp://127.0.0.1:59878,
Local directory: /var/folders/r_/8hcfnmrs7z5160l03b04bjpw0000gn/T/dask-worker-space/worker-cjpr64zk,Local directory: /var/folders/r_/8hcfnmrs7z5160l03b04bjpw0000gn/T/dask-worker-space/worker-cjpr64zk

0,1
Comm: tcp://127.0.0.1:59911,Total threads: 1
Dashboard: http://127.0.0.1:59915/status,Memory: 3.20 GiB
Nanny: tcp://127.0.0.1:59879,
Local directory: /var/folders/r_/8hcfnmrs7z5160l03b04bjpw0000gn/T/dask-worker-space/worker-wp4odf2_,Local directory: /var/folders/r_/8hcfnmrs7z5160l03b04bjpw0000gn/T/dask-worker-space/worker-wp4odf2_

0,1
Comm: tcp://127.0.0.1:59903,Total threads: 1
Dashboard: http://127.0.0.1:59908/status,Memory: 3.20 GiB
Nanny: tcp://127.0.0.1:59880,
Local directory: /var/folders/r_/8hcfnmrs7z5160l03b04bjpw0000gn/T/dask-worker-space/worker-jb722ghm,Local directory: /var/folders/r_/8hcfnmrs7z5160l03b04bjpw0000gn/T/dask-worker-space/worker-jb722ghm

0,1
Comm: tcp://127.0.0.1:59909,Total threads: 1
Dashboard: http://127.0.0.1:59913/status,Memory: 3.20 GiB
Nanny: tcp://127.0.0.1:59881,
Local directory: /var/folders/r_/8hcfnmrs7z5160l03b04bjpw0000gn/T/dask-worker-space/worker-jus172th,Local directory: /var/folders/r_/8hcfnmrs7z5160l03b04bjpw0000gn/T/dask-worker-space/worker-jus172th

0,1
Comm: tcp://127.0.0.1:59912,Total threads: 1
Dashboard: http://127.0.0.1:59918/status,Memory: 3.20 GiB
Nanny: tcp://127.0.0.1:59882,
Local directory: /var/folders/r_/8hcfnmrs7z5160l03b04bjpw0000gn/T/dask-worker-space/worker-lwjrf1g5,Local directory: /var/folders/r_/8hcfnmrs7z5160l03b04bjpw0000gn/T/dask-worker-space/worker-lwjrf1g5

0,1
Comm: tcp://127.0.0.1:59917,Total threads: 1
Dashboard: http://127.0.0.1:59920/status,Memory: 3.20 GiB
Nanny: tcp://127.0.0.1:59883,
Local directory: /var/folders/r_/8hcfnmrs7z5160l03b04bjpw0000gn/T/dask-worker-space/worker-hh2h4u3o,Local directory: /var/folders/r_/8hcfnmrs7z5160l03b04bjpw0000gn/T/dask-worker-space/worker-hh2h4u3o

0,1
Comm: tcp://127.0.0.1:59922,Total threads: 1
Dashboard: http://127.0.0.1:59923/status,Memory: 3.20 GiB
Nanny: tcp://127.0.0.1:59884,
Local directory: /var/folders/r_/8hcfnmrs7z5160l03b04bjpw0000gn/T/dask-worker-space/worker-2n958jz9,Local directory: /var/folders/r_/8hcfnmrs7z5160l03b04bjpw0000gn/T/dask-worker-space/worker-2n958jz9


In [4]:
sim_exps = ddf.from_pandas(sim_exps, npartitions=ncores)
sim_results = sim_exps.apply(lambda x: run_row(x), axis=1, result_type='expand',
                             meta={0: str, 1: int, 2: int, 3: float, 4: float,
                                   5: int, 6: float, 7: float, 8: float, 9: float, 10: float, 11: float})

In [None]:
sim_results = sim_results.compute(scheduler="multiprocessing")

  eigv1 = np.array([i / (1 - i) for i in eigv2])
  eigv1 = np.array([i / (1 - i) for i in eigv2])
  F = (1 - lmd) / lmd * df2 / df1
  F = (1 - lmd) / lmd * df2 / df1
  F = df2 / df1 / s * U
  F = df2 / df1 * sigma
  eigv1 = np.array([i / (1 - i) for i in eigv2])
  eigv1 = np.array([i / (1 - i) for i in eigv2])
  F = (1 - lmd) / lmd * df2 / df1
  F = (1 - lmd) / lmd * df2 / df1
  F = df2 / df1 / s * U
  F = df2 / df1 * sigma
  eigv1 = np.array([i / (1 - i) for i in eigv2])
  eigv1 = np.array([i / (1 - i) for i in eigv2])
  F = (1 - lmd) / lmd * df2 / df1
  F = (1 - lmd) / lmd * df2 / df1
  F = df2 / df1 / s * U
  F = df2 / df1 * sigma
  eigv1 = np.array([i / (1 - i) for i in eigv2])
  eigv1 = np.array([i / (1 - i) for i in eigv2])
  F = (1 - lmd) / lmd * df2 / df1
  F = (1 - lmd) / lmd * df2 / df1
  F = df2 / df1 / s * U
  F = df2 / df1 * sigma
  eigv1 = np.array([i / (1 - i) for i in eigv2])
  eigv1 = np.array([i / (1 - i) for i in eigv2])
  F = (1 - lmd) / lmd * df2 / df1
  F = (1 - l

In [None]:
sim_results = sim_results.rename(columns={0: "Simulation", 1: "#Samples", 2: "Dimensionality", 3: "Balance",
                                          4: "Effect Size", 5: "i", 6: "cMANOVA", 7: "CoDITE", 8: "cDCorr",
                                          9: "Causal MANOVA", 10: "Causal DITE", 11: "Causal DCorr"})
sim_results.to_pickle('./data/sim_results.pkl')awww

In [None]:
sim_results