# DAP on simulation results

In [8]:
import numpy as np
import pandas as pd
from pprint import pprint
# from pandasql import sqldf
from scipy import stats
# import pickle
import feather
from collections import Counter
from datetime import datetime
from utils import run_dap, load_data
import os
filename = 'data/calcium_pathway_4000_shape_25.data.blocks.pkl'
fileout = "data/calcium_pathway_4000_shape_25.data"
print(fileout)

data/calcium_pathway_4000_shape_25.data


## Data transform: $y \in \{0,1\}$ to $y \in \{0,t\}$

In [9]:
data = load_data(filename)
# the lifetime prevalence of schizophrenia is 4.0/1000, not 1%, but we first set it as 0.01
# according to https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1140960/
# relative risk is normally between 2 and 10
RR = 2.5
prevalence = data['debug']['args']['prevalence']
t = stats.norm(0,1).ppf(1-prevalence)
# effect size of beta is z_mu, prior
z_mu = t - stats.norm(0,1).ppf(1-RR*prevalence)
print (t, z_mu)

2.57582930355 0.334426575944


In [10]:
# create prior file
cal_pthwy_genes = pd.read_table("../data/calciumgeneset.txt", skiprows = 2, header = None, names = ["gene_name"])
ref_genes = pd.read_table("../data/refGene.txt.gz", compression="gzip", sep="\t", header = None, 
                          usecols=(2,12), names=["chrom", "gene_name"]).drop_duplicates(subset=("gene_name"))
calpath_gene_pos = pd.merge(ref_genes, cal_pthwy_genes, how="inner", on=["gene_name"])
diff = set(cal_pthwy_genes["gene_name"].tolist()) - set(calpath_gene_pos["gene_name"].tolist())
# add the two missing genes and their chromosome in the dataframe
calpath_gene_pos.loc[calpath_gene_pos.shape[0]] = ["chr2", list(diff)[0]]
calpath_gene_pos.loc[calpath_gene_pos.shape[0]] = ["chr15", list(diff)[1]]

In [11]:
calpath_gene_pos["chr.gene"] = calpath_gene_pos.apply(lambda row: "{}.{}".format(row["chrom"], row["gene_name"]), 
                                                      axis=1)
calpath_gene_pos["prior.pr"] = calpath_gene_pos.apply(lambda row: np.random.uniform(0, z_mu), axis=1)

In [12]:
beta = z_mu
y_tran = t

## Run DAP

In [None]:
dap_method = 'dap-g'
multiplier = 10
fileout = fileout + '_multiplier{}.{}'.format(multiplier, dap_method)
# clean it up, very important!
if os.path.isfile(fileout):
    os.remove(fileout)
os.system('rm -f /tmp/F*{dat,prior,grid}')
for idx, df in enumerate(data['data']):
    df["phenotype"] = df.apply(lambda row: y_tran if row["phenotype"]==1.0 else 0, axis=1)
    times = run_dap(df, fileout, cal_pthwy_genes, ref_genes, grid = [(0, beta)], 
                    multiplier = multiplier, ncpu = 4, dry_run = False, dap_method = dap_method)
    print('Job {2}/{3}, elapsed {0:.2f}s prepare & {1:.2f}s DAP'.\
          format(times[1] - times[0], times[2] - times[1], idx + 1, len(data['data'])))

Job 1/1848, elapsed 2.32s prepare & 1382.34s DAP
Job 2/1848, elapsed 0.09s prepare & 1.35s DAP
Job 3/1848, elapsed 0.03s prepare & 0.68s DAP
Job 4/1848, elapsed 0.81s prepare & 1315.79s DAP
Job 5/1848, elapsed 1.43s prepare & 620.19s DAP
Job 6/1848, elapsed 0.33s prepare & 2.46s DAP
Job 7/1848, elapsed 0.15s prepare & 1.26s DAP
Job 8/1848, elapsed 2.83s prepare & 201.48s DAP
Job 9/1848, elapsed 0.51s prepare & 91.10s DAP
Job 10/1848, elapsed 0.41s prepare & 17.29s DAP
Job 11/1848, elapsed 0.95s prepare & 1385.32s DAP
Job 12/1848, elapsed 0.02s prepare & 0.52s DAP
Job 13/1848, elapsed 0.84s prepare & 787.88s DAP
Job 14/1848, elapsed 0.35s prepare & 4.58s DAP
Job 15/1848, elapsed 0.17s prepare & 2.07s DAP
Job 16/1848, elapsed 0.34s prepare & 4.49s DAP
Job 17/1848, elapsed 0.32s prepare & 4.26s DAP
Job 18/1848, elapsed 0.21s prepare & 2.63s DAP
Job 19/1848, elapsed 1.51s prepare & 3869.17s DAP
Job 20/1848, elapsed 0.45s prepare & 3.42s DAP
Job 21/1848, elapsed 0.11s prepare & 1.54s DAP
Jo

Job 173/1848, elapsed 0.03s prepare & 0.66s DAP
Job 174/1848, elapsed 0.03s prepare & 0.66s DAP
Job 175/1848, elapsed 0.04s prepare & 0.82s DAP
Job 176/1848, elapsed 0.15s prepare & 2.17s DAP
Job 177/1848, elapsed 0.16s prepare & 2.19s DAP
Job 178/1848, elapsed 0.03s prepare & 0.67s DAP
Job 179/1848, elapsed 0.13s prepare & 1.82s DAP
Job 180/1848, elapsed 0.02s prepare & 0.50s DAP
Job 181/1848, elapsed 0.02s prepare & 0.52s DAP
Job 182/1848, elapsed 0.27s prepare & 3.33s DAP
Job 183/1848, elapsed 0.09s prepare & 1.36s DAP
Job 184/1848, elapsed 0.23s prepare & 2.92s DAP
Job 185/1848, elapsed 0.28s prepare & 3.67s DAP
Job 186/1848, elapsed 0.10s prepare & 1.51s DAP
Job 187/1848, elapsed 0.02s prepare & 0.51s DAP
Job 188/1848, elapsed 0.07s prepare & 1.19s DAP
Job 189/1848, elapsed 0.21s prepare & 2.87s DAP
Job 190/1848, elapsed 0.05s prepare & 0.86s DAP
Job 191/1848, elapsed 0.02s prepare & 0.51s DAP
Job 192/1848, elapsed 0.04s prepare & 0.85s DAP
Job 193/1848, elapsed 0.02s prepare & 0.

Job 344/1848, elapsed 0.02s prepare & 0.52s DAP
Job 345/1848, elapsed 0.03s prepare & 0.68s DAP
Job 346/1848, elapsed 0.02s prepare & 0.51s DAP
Job 347/1848, elapsed 0.02s prepare & 0.51s DAP
Job 348/1848, elapsed 0.05s prepare & 0.84s DAP
Job 349/1848, elapsed 0.02s prepare & 0.51s DAP
Job 350/1848, elapsed 0.13s prepare & 1.85s DAP
Job 351/1848, elapsed 0.03s prepare & 0.67s DAP
Job 352/1848, elapsed 0.02s prepare & 0.52s DAP
Job 353/1848, elapsed 0.05s prepare & 0.68s DAP
Job 354/1848, elapsed 0.07s prepare & 1.15s DAP
Job 355/1848, elapsed 0.05s prepare & 0.85s DAP
Job 356/1848, elapsed 0.05s prepare & 0.84s DAP
Job 357/1848, elapsed 0.15s prepare & 2.03s DAP
Job 358/1848, elapsed 0.02s prepare & 0.52s DAP
Job 359/1848, elapsed 0.06s prepare & 1.01s DAP
Job 360/1848, elapsed 0.04s prepare & 0.67s DAP
Job 361/1848, elapsed 0.02s prepare & 0.52s DAP
Job 362/1848, elapsed 0.02s prepare & 0.51s DAP
Job 363/1848, elapsed 0.02s prepare & 0.51s DAP
Job 364/1848, elapsed 1.01s prepare & 29

Job 514/1848, elapsed 0.45s prepare & 3.70s DAP
Job 515/1848, elapsed 0.62s prepare & 105.91s DAP
Job 516/1848, elapsed 1.27s prepare & 1566.14s DAP
Job 517/1848, elapsed 0.53s prepare & 92.53s DAP
Job 518/1848, elapsed 0.95s prepare & 1338.93s DAP
Job 519/1848, elapsed 0.11s prepare & 1.52s DAP
Job 520/1848, elapsed 0.03s prepare & 0.69s DAP
Job 521/1848, elapsed 0.03s prepare & 0.72s DAP
Job 522/1848, elapsed 0.42s prepare & 23.35s DAP
Job 523/1848, elapsed 0.17s prepare & 2.58s DAP
Job 524/1848, elapsed 0.11s prepare & 1.58s DAP
Job 525/1848, elapsed 0.07s prepare & 1.09s DAP
Job 526/1848, elapsed 0.02s prepare & 0.56s DAP
Job 527/1848, elapsed 0.20s prepare & 2.66s DAP
Job 528/1848, elapsed 0.02s prepare & 0.52s DAP
Job 529/1848, elapsed 0.05s prepare & 0.87s DAP
Job 530/1848, elapsed 0.21s prepare & 1.79s DAP
Job 531/1848, elapsed 0.91s prepare & 52.33s DAP
Job 532/1848, elapsed 0.20s prepare & 1.85s DAP
Job 533/1848, elapsed 0.11s prepare & 1.52s DAP
Job 534/1848, elapsed 0.03s p

In [1]:
# dap/dap -d /tmp/F1503014064.6944265.dat -g /tmp/F1503014064.6944265.grid -it 0.05 -prior /tmp/F1503014064.6944265.prior -t 2 > data/calcium_pathway_N4000_shape25_nomask.data.dap 

In [9]:
# calpath_gene_pos.to_csv("test.csv", sep="\t")
print (cal_pthwy_genes["gene_name"].tolist())

['ADCY1', 'ADCY2', 'ADCY3', 'ADCY4', 'ADCY7', 'ADCY8', 'ADCY9', 'ADORA2A', 'ADORA2B', 'ADRA1A', 'ADRA1B', 'ADRA1D', 'ADRB1', 'ADRB2', 'ADRB3', 'AGTR1', 'ATP2A1', 'ATP2A2', 'ATP2A3', 'ATP2B1', 'ATP2B2', 'ATP2B3', 'ATP2B4', 'AVPR1A', 'AVPR1B', 'BDKRB1', 'BDKRB2', 'BST1', 'CACNA1A', 'CACNA1B', 'CACNA1C', 'CACNA1D', 'CACNA1E', 'CACNA1F', 'CACNA1G', 'CACNA1H', 'CACNA1I', 'CACNA1S', 'CALM1', 'CALM2', 'CALM3', 'CALML3', 'CALML5', 'CALML6', 'CAMK2A', 'CAMK2B', 'CAMK2D', 'CAMK2G', 'CAMK4', 'CCKAR', 'CCKBR', 'CD38', 'CHP', 'CHP2', 'CHRM1', 'CHRM2', 'CHRM3', 'CHRM5', 'CHRNA7', 'CYSLTR1', 'CYSLTR2', 'DRD1', 'DRD5', 'EDNRA', 'EDNRB', 'EGFR', 'ERBB2', 'ERBB3', 'ERBB4', 'F2R', 'GNA11', 'GNA14', 'GNA15', 'GNAL', 'GNAQ', 'GNAS', 'GRIN1', 'GRIN2A', 'GRIN2C', 'GRIN2D', 'GRM1', 'GRM5', 'GRPR', 'HRH1', 'HRH2', 'HTR2A', 'HTR2B', 'HTR2C', 'HTR4', 'HTR5A', 'HTR6', 'HTR7', 'ITPKA', 'ITPKB', 'ITPR1', 'ITPR2', 'ITPR3', 'LHCGR', 'LOC729317', 'LTB4R2', 'MYLK', 'MYLK2', 'MYLK3', 'NOS1', 'NOS2', 'NOS3', 'NTSR1', 'OX

In [None]:
# chrom
# chrom = ["chr{}".format(i) for i in list(range(1,23))+["X"]]
# print ((chrom))

In [None]:
# [I 15:34:54.151 NotebookApp] Accepting one-time-token-authenticated connection from ::1
# [I 15:34:54.814 NotebookApp] Kernel started: 60079e22-d81d-4f83-a293-c5750b5456a1
# [I 15:36:54.808 NotebookApp] Saving file at /20170726_Dap_on_simulation.ipynb
# [W 15:42:01.170 NotebookApp] WebSocket ping timeout after 92790 ms.
# [I 15:44:57.820 NotebookApp] KernelRestarter: restarting kernel (1/5)