In [1]:
import os
os.environ["OMP_NUM_THREADS"] = "32"

In [2]:
from graph_tool.all import *
import pandas as pd
import numpy as np
import scipy as sp
from sklearn.covariance import LedoitWolf, OAS
#import matplotlib.pyplot as py
#import seaborn as sns
import statsmodels.api as sm
from multipy.fdr import qvalue
from multipy.fdr import lsu

In [3]:
from trim_networks import *
from fit_sbm import *

In [4]:
### Input expression data
folder = "/Genomics/ayroleslab2/lamaya/bigProject/eQTLcatalog/modularity/matrices/"
gene_expr_body = pd.read_table(folder + "VOOMCounts_CPM1_body_ctrl_939ind_covfree_Aug3121.txt")
gene_expr_head = pd.read_table(folder + "VOOMCounts_CPM1_head_ctrl_940ind_covfree_Aug3121.txt")
gene_expr_dict = {"head": gene_expr_head.T, "body": gene_expr_body.T}

In [5]:
folder = "../data/output/SBM/graphs/"
g_body = load_graph(folder + "VOOMCounts_CPM1_body_ctrl_939ind_covfree_Aug3121.xml.gz")
g_head = load_graph(folder + "VOOMCounts_CPM1_head_ctrl_940ind_covfree_Aug3121.xml.gz")

g_dict = {"head": g_head, "body": g_body}

In [6]:
g_dict_fdr_1e2 = {"head": filterByFDR(g_head, 1e-2, False), "body": filterByFDR(g_body, 1e-2, False)}

In [46]:
def corrTable(tissue):
    block_df = pd.DataFrame(columns=('Tissue', 'Gene1', 'Gene2', "SpearmanCorr"))
    
    gr = g_dict_fdr_1e2[tissue]
    correlation = gr.edge_properties["spearman"]
    edges = gr.get_edges()
    genes = gr.vertex_properties["genes"]

    block_df["Gene1"] = [genes[e[0]] for e in edges]
    block_df["Gene2"] = [genes[e[1]] for e in edges]
    block_df["SpearmanCorr"] = [correlation[gr.edge(e[0], e[1])] for e in edges]
    block_df["Tissue"] = tissue

    return block_df

In [None]:
corr_dict_fdr_1e2 = {"head": corrTable("head"), "body": corrTable("body")}

In [50]:
corr_dict_fdr_1e2["body"].append(corr_dict_fdr_1e2["head"]).to_csv("../data/output/spearmanCorrelations_VOOMCounts_CPM1_headbody_ctrl_covfree_Aug3121.csv")

In [6]:
def getGeneNetworkStats(g, tissue):
    genes = g.vertex_properties["genes"]
    corr = g.edge_properties["spearman"]
    block_df = pd.DataFrame(columns=('Gene', 'Tissue', "Average_Spearman",
                                     'Degree_thr_0.1', 'WeightedDegree_thr_0.1',
                                     'Degree_thr_0.2', 'WeightedDegree_thr_0.2',
                                     'Degree_thr_0.3', 'WeightedDegree_thr_0.3',
                                     'Degree_thr_0.4', 'WeightedDegree_thr_0.4',
                                     'Degree_thr_0.5', 'WeightedDegree_thr_0.5',
                                     'Degree_fdr_5e-2' , 'WeightedDegree_fdr_5e-2',
                                     'Degree_fdr_1e-2' , 'WeightedDegree_fdr_1e-2',
                                     'Degree_fdr_1e-3' , 'WeightedDegree_fdr_1e-3',
                                     'Degree_fdr_1e-4' , 'WeightedDegree_fdr_1e-4',
                                     'Degree_fdr_1e-5' , 'WeightedDegree_fdr_1e-5',
                                     'Degree_fdr_1e-6' , 'WeightedDegree_fdr_1e-6'))

    tv_01 = filterByEdge(g, "spearman", 0.1, False)
    tv_02 = filterByEdge(g, "spearman", 0.2, False)
    tv_03 = filterByEdge(g, "spearman", 0.3, False)
    tv_04 = filterByEdge(g, "spearman", 0.4, False)
    tv_05 = filterByEdge(g, "spearman", 0.5, False)

    tv_fdr5e2 = filterByFDR(g, 5e-2, False)
    tv_fdr1e2 = filterByFDR(g, 1e-2, False)
    tv_fdr1e3 = filterByFDR(g, 1e-3, False)
    tv_fdr1e4 = filterByFDR(g, 1e-4, False)
    tv_fdr1e5 = filterByFDR(g, 1e-5, False)
    tv_fdr1e6 = filterByFDR(g, 1e-6, False)

    for v in g.vertex_index:
        line = [genes[v]]
        line.append(tissue)
        line.append(np.mean(np.abs(g.get_all_edges(v, [corr] )[:,2])))

        line.append(tv_01.get_total_degrees([v])[0])
        line.append(np.mean(np.abs(tv_01.get_all_edges(v, [corr] )[:,2])))

        line.append(tv_02.get_total_degrees([v])[0])
        line.append(np.mean(np.abs(tv_02.get_all_edges(v, [corr] )[:,2])))

        line.append(tv_03.get_total_degrees([v])[0])
        line.append(np.mean(np.abs(tv_03.get_all_edges(v, [corr] )[:,2])))

        line.append(tv_04.get_total_degrees([v])[0])
        line.append(np.mean(np.abs(tv_04.get_all_edges(v, [corr] )[:,2])))
        
        line.append(tv_05.get_total_degrees([v])[0])
        line.append(np.mean(np.abs(tv_05.get_all_edges(v, [corr] )[:,2])))

        line.append(tv_fdr5e2.get_total_degrees([v])[0])
        line.append(np.mean(np.abs(tv_fdr5e2.get_all_edges(v, [corr] )[:,2])))

        line.append(tv_fdr1e2.get_total_degrees([v])[0])
        line.append(np.mean(np.abs(tv_fdr1e2.get_all_edges(v, [corr] )[:,2])))

        line.append(tv_fdr1e3.get_total_degrees([v])[0])
        line.append(np.mean(np.abs(tv_fdr1e3.get_all_edges(v, [corr] )[:,2])))

        line.append(tv_fdr1e4.get_total_degrees([v])[0])
        line.append(np.mean(np.abs(tv_fdr1e4.get_all_edges(v, [corr] )[:,2])))
        
        line.append(tv_fdr1e5.get_total_degrees([v])[0])
        line.append(np.mean(np.abs(tv_fdr1e5.get_all_edges(v, [corr] )[:,2])))

        line.append(tv_fdr1e6.get_total_degrees([v])[0])
        line.append(np.mean(np.abs(tv_fdr1e6.get_all_edges(v, [corr] )[:,2])))

        block_df.loc[v] = line
    return block_df


In [83]:
gene_stats = {"body": getGeneNetworkStats(g_dict["body"], "body"), 
              "head": getGeneNetworkStats(g_dict["head"], "head") }

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [84]:
gene_stats["body"].append(gene_stats["head"])

Unnamed: 0,Gene,Tissue,Average_Spearman,Degree_thr_0.1,WeightedDegree_thr_0.1,Degree_thr_0.2,WeightedDegree_thr_0.2,Degree_thr_0.3,WeightedDegree_thr_0.3,Degree_thr_0.4,...,Degree_fdr_1e-2,WeightedDegree_fdr_1e-2,Degree_fdr_1e-3,WeightedDegree_fdr_1e-3,Degree_fdr_1e-4,WeightedDegree_fdr_1e-4,Degree_fdr_1e-5,WeightedDegree_fdr_1e-5,Degree_fdr_1e-6,WeightedDegree_fdr_1e-6
0,FBgn0031081,body,0.038366,270,0.113440,0,,0,,0,...,158,0.120611,26,0.143284,6,0.157670,1,0.168032,0,
1,FBgn0031080,body,0.034086,142,0.114631,0,,0,,0,...,93,0.120563,18,0.139415,1,0.161738,0,,0,
2,FBgn0053217,body,0.027842,26,0.110019,0,,0,,0,...,15,0.113821,0,,0,,0,,0,
3,FBgn0052350,body,0.057266,1374,0.134153,49,0.217163,0,,0,...,1141,0.140415,604,0.160620,336,0.177346,212,0.188960,120,0.201009
4,FBgn0024733,body,0.108547,3554,0.193872,1127,0.305479,342,0.461753,152,...,3320,0.200239,2563,0.224517,2033,0.246402,1676,0.265091,1404,0.282785
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8872,FBgn0031309,head,0.030923,77,0.112013,0,,0,,0,...,30,0.124679,7,0.145454,2,0.164880,1,0.169586,0,
8873,FBgn0031305,head,0.093991,2858,0.203305,1041,0.313529,442,0.411247,228,...,2565,0.214554,2016,0.240131,1665,0.260940,1420,0.278580,1231,0.294615
8874,FBgn0016926,head,0.053447,1227,0.134456,68,0.223093,0,,0,...,934,0.143815,489,0.165292,279,0.183672,179,0.197259,117,0.209819
8875,FBgn0031299,head,0.031718,81,0.110465,0,,0,,0,...,24,0.123319,6,0.137336,0,,0,,0,


In [85]:
gene_stats["body"].append(gene_stats["head"]).to_csv("../data/output/connectivity_stats_VOOMCounts_CPM1_headbody_ctrl_onlygenesinmainchr_covfree_Jul21.21.csv")

In [91]:
g_body = load_graph("../data/output/SBM/graphs/body_fdrLevel-1e-05_genes-3253_density-0.052.xml.gz")
g_head = load_graph("../data/output/SBM/graphs/head_fdrLevel-0.0001_genes-3589_density-0.021.xml.gz")

In [92]:
### Input expression data
folder = "/Genomics/ayroleslab2/lamaya/bigProject/eQTLcatalog/modularity/matrices/"
gene_expr_body = pd.read_table(folder + "VOOMCounts_CPM1_body_ctrl_249ind_counts3M_covfree_Aug3121.txt")
gene_expr_head = pd.read_table(folder + "VOOMCounts_CPM1_head_ctrl_248ind_counts3M_covfree_Aug3121.txt")
gene_expr_dict = {"head": gene_expr_head.T, "body": gene_expr_body.T}

In [96]:
saveGeneTable(g_body, gene_expr_dict["body"], "body_table_MMC_fdrLevel-1e-05.csv", transpose = True)

Unnamed: 0,108_C8,112_A12,112_B12,112_D7,112_E12,112_F8,112_F7,112_F6,112_G12,112_G8,...,25_G6,25_G5,25_H3,25_H10,25_H9,25_H8,25_H7,25_H6,25_H5,25_H4
FBgn0052350,4.666673,4.725106,4.489804,3.965429,5.386417,4.324769,4.433128,6.174101,4.319237,3.724704,...,4.692688,4.409530,4.268967,5.256499,4.765600,4.934845,5.246194,5.178693,3.789783,4.106381
FBgn0024733,12.696374,12.551650,12.695390,12.785993,12.257801,12.279272,12.827803,12.811845,12.271877,12.999786,...,12.474309,12.239402,12.591404,12.538048,12.576038,12.740517,12.474053,12.941661,12.691720,12.554564
FBgn0040372,5.999112,6.643936,6.998040,6.097233,5.450841,6.910218,6.854690,3.627779,7.375418,4.251846,...,5.992034,6.756598,5.765587,6.081621,6.030482,5.814569,6.047758,5.682712,5.900457,5.977577
FBgn0023537,7.341523,7.535076,7.322965,7.131314,7.383753,7.240970,6.833788,7.366229,6.790949,7.085584,...,7.255819,7.256800,7.840498,7.454328,7.359523,6.979546,6.974246,7.039977,7.509133,7.429816
FBgn0025640,5.985546,6.945339,5.470562,5.908373,5.451714,5.281303,6.323340,6.275304,4.018803,6.583160,...,5.363152,5.521469,6.018057,6.078494,6.418687,6.831483,5.814769,5.670190,5.304898,5.699454
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
FBgn0040723,6.105179,4.802297,6.123859,6.164717,5.895083,6.352962,5.822638,6.679689,4.604728,6.239176,...,6.180312,5.523158,5.969279,7.037078,5.334698,4.759996,6.025050,7.270884,6.816498,7.129507
FBgn0031313,5.918264,6.171835,6.340808,6.089399,6.149789,5.875911,6.067641,6.070273,5.481993,6.455578,...,5.014249,5.829295,6.554390,6.272824,5.218400,5.947117,5.727823,5.698852,6.603801,5.761063
FBgn0031305,4.761648,4.611826,5.461739,4.421460,5.260187,4.115146,6.167421,5.307882,3.420034,3.390454,...,5.140925,4.868978,5.307307,5.497945,4.869616,3.957237,5.127294,4.636585,5.642494,4.077977
FBgn0016926,7.339149,7.180547,7.404588,7.464626,7.876966,7.098806,7.164017,7.463492,7.077461,8.049890,...,7.531820,7.181787,7.834116,7.635872,7.140892,7.287072,7.043663,7.567542,6.972575,7.554466


In [97]:
saveGeneTable(g_head, gene_expr_dict["head"], "head_table_MMC_fdrLevel-1e-04.csv", transpose = True)

Unnamed: 0,106_B7,106_C6,106_A11,106_E4,106_A2,106_G6,106_H4,106_A7,108_A3,108_A4,...,91_B8,91_H6,91_A10,91_A7,91_D5,91_F10,91_F8,91_G6,91_H4,91_H5
FBgn0031081,4.930257,5.116940,5.130395,5.117535,4.872438,5.076564,4.784349,5.357564,5.073539,5.139573,...,5.109125,4.658383,5.422209,4.767662,5.276421,5.906908,4.916268,4.853495,4.645266,4.926815
FBgn0024733,12.446084,12.371157,11.731869,11.536977,11.666225,11.777192,11.766539,11.846365,11.537174,12.145302,...,11.802386,11.528199,11.825508,11.981789,12.187418,11.900221,11.917156,11.858870,11.719484,12.206980
FBgn0023537,7.765844,7.265553,7.970708,7.647486,7.343377,7.745795,7.583874,7.530910,7.425124,7.276209,...,7.798779,7.696549,7.529240,7.623707,7.940260,7.262788,7.338602,7.895260,7.453813,7.527935
FBgn0000108,8.747210,9.105088,9.252584,9.413587,9.091358,9.318069,9.211433,9.299391,9.121894,9.351398,...,9.167501,8.765370,9.034739,9.082118,9.380982,9.121801,9.371622,9.282389,9.431019,9.160859
FBgn0025640,5.321905,5.467671,4.679742,5.502545,5.263348,5.582923,5.526607,5.361313,5.245509,5.590256,...,5.651988,5.179148,5.954432,5.589088,5.518766,4.719776,4.981229,5.424587,4.862859,5.500694
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
FBgn0031312,4.676809,5.328336,4.376806,4.824994,4.889144,4.580169,5.376145,4.660123,4.291399,4.706139,...,4.643149,6.227836,4.751461,4.498571,4.750080,4.331867,4.345303,5.127194,4.671457,5.043739
FBgn0031318,5.120889,4.640974,5.259675,5.388349,4.783267,4.146392,5.420461,5.074531,5.706937,4.782010,...,4.961650,4.671093,4.434347,5.286002,4.878753,5.522341,4.591386,5.235887,5.004348,5.207365
FBgn0031313,5.460565,7.500843,7.167708,7.832304,6.245783,7.513841,4.573734,7.412862,7.887702,5.401785,...,7.246642,5.514437,5.821060,7.529048,7.289258,6.137463,6.810809,6.855183,7.378696,6.551953
FBgn0031305,6.004027,5.168781,4.750899,5.241088,4.865616,5.907712,6.308029,4.944960,4.849428,5.510896,...,3.624463,5.692812,5.359399,5.772243,6.361252,4.719096,5.021679,6.053737,5.605705,5.778503
