In [3]:
import pandas as pd
import numpy as np
import glob
import graspologic as gp
import seaborn as sns
import os
import re
import matplotlib.pyplot as plt
import sys
sys.path.append("./")
sys.path.append("../simulations/")
sys.path.append("../mice/")
from sims import *
import dask
from dask.distributed import Client, progress
import dask.dataframe as ddf
import logging
import warnings
warnings.filterwarnings("ignore")
import contextlib

def read_connectome(path):
    gr = pd.read_excel(path)
    gr_ar = np.array(gr.drop(gr.columns[0], axis=1))
    np.fill_diagonal(gr_ar, 0)
    gr_gp = gp.utils.pass_to_ranks(gr_ar)
    return gr_gp

conn_paths = glob.glob("./Connectomes/*.xlsx")

def grab_covariates(conn_paths, covar_path, filter_genotypes=["APOE44HN", "APOE33HN", "HN", "APOE22HN"]):
    basenames = [os.path.basename(path).split("_")[0] for path in conn_paths]

    covars = pd.read_csv(covar_path)
    covars["DWI"] = [re.split("_|-", str(name))[0] for name in covars["DWI"]]
    covar_df = []
    for i, basename in enumerate(basenames):
        pd_row = covars[covars["DWI"] == basename]
        if pd_row.shape[0] > 1:
            print("Error: {:s}".format(basename))
        elif pd_row.shape[0] == 0:
            print("No row: {:s}".format(basename))
            covar_conn = {"Id": basename, "Diet": np.nan, "Age": np.nan, "Sex": np.nan, "Genotype": np.nan}
        else:
            covar_conn = {"Mouse": basename, "Id": i, "Diet": pd_row["Treatment"].item(), "Age": float(pd_row["Age_Years"].item()),
                          "Sex": pd_row["Sex_other"].item().strip(), "Genotype" : pd_row["Geno"].item()}
        covar_df.append(covar_conn)

    covar_df = pd.DataFrame(covar_df)
    scrubbed_df = covar_df[~covar_df["Genotype"].isin(["APOE44HN", "APOE33HN", "HN", "APOE22HN"])]
    scrubbed_df = scrubbed_df.dropna()
    return scrubbed_df

graphs = np.load("./Connectomes/graphs.npy")
scrubbed_covariates = grab_covariates(conn_paths, "./phenotypic.csv")
graphs_without_nas = graphs[scrubbed_covariates["Id"].tolist(),:,:]

node_labels = pd.read_csv("./node_label_dictionary.csv")


node_labels["Hem"] = node_labels["Hemisphere"].map({"Left": "L", "Right": "R"})
node_labels["L1"] = node_labels["Level_1"].map({"1_forebrain": "F", "2_midbrain": "M",
                                                   "3_hindbrain": "H", "4_white_matter_tracts": "W",
                                                   "5_ventricular_system": "V"})
node_labels["Label"] = node_labels["Hem"] + node_labels["Level_2_abbrev"]

results = []

scrubbed_covariates["Genotype"] = pd.Categorical(scrubbed_covariates["Genotype"])
scrubbed_covariates["Sex"] = pd.Categorical(scrubbed_covariates["Sex"])
scrubbed_covariates["Diet"] = pd.Categorical(scrubbed_covariates["Diet"])
Xs = scrubbed_covariates.loc[:, ["Sex", "Diet", "Age"]]
# OHE relevant columns
ohe_sex = pd.get_dummies(Xs["Sex"]).drop("female", axis=1)
ohe_diet = pd.get_dummies(Xs["Diet"]).drop("HFD", axis=1)
Xs = Xs.drop("Diet", axis=1)
Xs = Xs.drop("Sex", axis=1)
Xs = Xs.join(ohe_sex)
Xs = Xs.join(ohe_diet)
# normalize the data to 0 mean, 1 std
Xs_normalized = (Xs - Xs.mean(axis=0))/Xs.std(axis=0)
Ts = scrubbed_covariates["Genotype"].cat.codes.to_numpy()

balanced_ids = causal_prep(Xs_normalized.to_numpy(), Ts)
Xs_causal = Xs_normalized[balanced_ids]
Ts_causal = Ts[balanced_ids]
graphs_causal = graphs_without_nas[balanced_ids,:,:]

from itertools import combinations
ncores = 50
nrep = 10000
nodelabs = node_labels["Label"]


exps = []

un_labs = np.unique(node_labels["Label"])

for k in range(0, len(un_labs)):
    for l in range(k, len(un_labs)):
        exps.append([un_labs[k], un_labs[l]])

def run_comb(row, num_combs=len(exps)):
    if row["index"] % 100 == 0:
        print("Combination: {:d} of {:d}".format(row["index"], num_combs))
    try:
        pval, stat = cond_dcorr(graphs_causal[:,nodelabs == row["Community k"],:][:,:,nodelabs == row["Community l"]].reshape(graphs_causal.shape[0],-1),
                                Ts_causal, Xs_causal, nrep=nrep)
    except:
        pval, stat = np.nan, np.nan
    return tuple([row["Community k"], row["Community l"], pval, stat])

realdat_exps = pd.DataFrame(exps, columns=["Community k", "Community l"])
realdat_exps["index"] = range(0, len(realdat_exps))
print(realdat_exps.head())
client = Client(threads_per_worker=1, n_workers=ncores, silence_logs=logging.ERROR)
print(client)

realdat_exps = ddf.from_pandas(realdat_exps, npartitions=ncores)
realdat_results = realdat_exps.apply(lambda x: run_comb(x), axis=1, result_type='expand',
                             meta={0: str, 1: str, 2: float, 3: float})
realdat_results = realdat_results.compute(scheduler="multiprocessing")

realdat_results = realdat_results.rename(columns={0: "Community k", 1: "Community l", 2: "p-value", 3: "Test Statistic"})
realdat_results.to_pickle('./data/mouse_processed_L2.pkl')

  from .autonotebook import tqdm as notebook_tqdm


FileNotFoundError: [Errno 2] No such file or directory: './Connectomes/graphs.npy'