In [2]:
import os
os.environ["OMP_NUM_THREADS"] = "32"
from graph_tool.all import *
import pandas as pd
import numpy as np
import scipy as sp
from sklearn.covariance import LedoitWolf, OAS
import matplotlib.pyplot as plt
import matplotlib.cm as mpl
import seaborn as sns
import statsmodels.api as sm
from multipy.fdr import qvalue
from multipy.fdr import lsu

import dill

In [3]:
import sys
sys.path.append('/Genomics/argo/users/damelo/projects/HS-Expression-GxE/SBM/snakemake/scripts/')

from trim_networks import *

In [4]:
# Loading blocks...
def load_blocks(blocks):
    with open (blocks, "rb") as fh:
        bs = dill.load(fh)[0:6]
    return bs
# For each unique element in x, find its first apperance in x
def first_occurrence(x):
    _, idx = np.unique(x, return_index=True)
    return idx

def labelVertices(state):
    g = state.g
    g.vp.level_0 = g.new_vertex_property("double", state.get_bs()[0])
    first = first_occurrence(np.array([g.vp.level_0.a]))
    state.g.vp.labels = g.new_vp("string", [str(int(x)) if i in first else "" for i, x in enumerate(g.vp.level_0.a)])
    return state

In [6]:
g_path = '/Genomics/argo/users/damelo/projects/HS-Expression-GxE/SBM/snakemake/cache/trimmed_graph/fdr-1e-3/layered/'
tissues = ['head', 'body']
conditions = ['hs', 'ctrl']
graphs = {f'{tissue}':load_graph(g_path + f'{tissue}.xml.gz') for tissue in tissues}
b_path = '/Genomics/argo/users/damelo/projects/HS-Expression-GxE/SBM/snakemake/cache/MCMC/blocks/fdr-1e-3/layered/'
blocks = {f'{tissue}':load_blocks(b_path + f'{tissue}.dill') for tissue in tissues}

labels = [f'{tissue}-{condition}' for tissue in tissues for condition in conditions]

In [7]:
cond_graphs = {}
for t in tissues:
    g = graphs[t]
    ds = g.ep.dataset
    for c in conditions:
        l = f'{t}-{c}'
        print(l)
        u = GraphView(g, efilt=lambda e: ds[e] == c)
        u = Graph(u, prune=True)  
        cond_graphs[l] = u  

head-hs
head-ctrl
body-hs
body-ctrl


In [9]:
cond_graphs

{'head-hs': <Graph object, undirected, with 6826 vertices and 4767735 edges, 2 internal vertex properties, 5 internal edge properties, at 0x7f6a39ec1990>,
 'head-ctrl': <Graph object, undirected, with 6826 vertices and 5097528 edges, 2 internal vertex properties, 5 internal edge properties, at 0x7f6a3950c2d0>,
 'body-hs': <Graph object, undirected, with 6575 vertices and 1303683 edges, 2 internal vertex properties, 5 internal edge properties, at 0x7f6a3b212cd0>,
 'body-ctrl': <Graph object, undirected, with 6575 vertices and 2924794 edges, 2 internal vertex properties, 5 internal edge properties, at 0x7f6b8858e390>}

In [12]:
g = cond_graphs['head-hs']
g.vertex_properties["genes"]
g.edge_properties["spearman"]

<EdgePropertyMap object with value type 'double', for Graph 0x7f6a39ec1990, at 0x7f6a3956c050>

In [18]:
output_folder = '/Genomics/argo/users/damelo/projects/HS-Expression-GxE/output/'

for t in ['head']:
    for c in ['hs', 'ctrl']:
        label = t + "-" + c
        print(label)
        g = cond_graphs[label]
        genes = g.vertex_properties["genes"]
        corr = g.edge_properties["spearman"]
        block_df = pd.DataFrame(columns=('Gene', 'Tissue', 'Condition',
                                            "Average_Spearman",
                                            'Degree',
                                            'WeightedDegree'))

        for v in g.vertex_index:
            line = [genes[v]]
            line.append(t)
            line.append(c)

            line.append(np.mean(np.abs(g.get_all_edges(v, [corr] )[:,2])))

            line.append(g.get_total_degrees([v])[0])
            line.append(np.mean(np.abs(g.get_all_edges(v, [corr] )[:,2])))

            block_df.loc[v] = line
        block_df.to_csv(output_folder + "gene_connectivity-" + t + "_" + c + "-fdr1e-3.csv")

head-hs


head-ctrl


In [83]:
gene_stats = {"ctrl": getGeneNetworkStats(g_dict["body"], "body"), 
              "hs": getGeneNetworkStats(g_dict["head"], "head") }

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [84]:
gene_stats["body"].append(gene_stats["head"])

Unnamed: 0,Gene,Tissue,Average_Spearman,Degree_thr_0.1,WeightedDegree_thr_0.1,Degree_thr_0.2,WeightedDegree_thr_0.2,Degree_thr_0.3,WeightedDegree_thr_0.3,Degree_thr_0.4,...,Degree_fdr_1e-2,WeightedDegree_fdr_1e-2,Degree_fdr_1e-3,WeightedDegree_fdr_1e-3,Degree_fdr_1e-4,WeightedDegree_fdr_1e-4,Degree_fdr_1e-5,WeightedDegree_fdr_1e-5,Degree_fdr_1e-6,WeightedDegree_fdr_1e-6
0,FBgn0031081,body,0.038366,270,0.113440,0,,0,,0,...,158,0.120611,26,0.143284,6,0.157670,1,0.168032,0,
1,FBgn0031080,body,0.034086,142,0.114631,0,,0,,0,...,93,0.120563,18,0.139415,1,0.161738,0,,0,
2,FBgn0053217,body,0.027842,26,0.110019,0,,0,,0,...,15,0.113821,0,,0,,0,,0,
3,FBgn0052350,body,0.057266,1374,0.134153,49,0.217163,0,,0,...,1141,0.140415,604,0.160620,336,0.177346,212,0.188960,120,0.201009
4,FBgn0024733,body,0.108547,3554,0.193872,1127,0.305479,342,0.461753,152,...,3320,0.200239,2563,0.224517,2033,0.246402,1676,0.265091,1404,0.282785
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8872,FBgn0031309,head,0.030923,77,0.112013,0,,0,,0,...,30,0.124679,7,0.145454,2,0.164880,1,0.169586,0,
8873,FBgn0031305,head,0.093991,2858,0.203305,1041,0.313529,442,0.411247,228,...,2565,0.214554,2016,0.240131,1665,0.260940,1420,0.278580,1231,0.294615
8874,FBgn0016926,head,0.053447,1227,0.134456,68,0.223093,0,,0,...,934,0.143815,489,0.165292,279,0.183672,179,0.197259,117,0.209819
8875,FBgn0031299,head,0.031718,81,0.110465,0,,0,,0,...,24,0.123319,6,0.137336,0,,0,,0,


In [85]:
gene_stats["body"].append(gene_stats["head"]).to_csv("../data/output/connectivity_stats_VOOMCounts_CPM1_headbody_ctrl_onlygenesinmainchr_covfree_Jul21.21.csv")

In [5]:
g_body = load_graph("../data/output/SBM/graphs/body_fdrLevel-0.001_genes-5124_density-0.059.xml.gz")
g_head = load_graph("../data/output/SBM/graphs/head_fdrLevel-0.01_genes-5261_density-0.033.xml.gz")

In [6]:
### Input expression data
folder = "/Genomics/ayroleslab2/lamaya/bigProject/eQTLcatalog/modularity/matrices/"
gene_expr_body = pd.read_table(folder + "VOOMCounts_CPM1_body_ctrl_249ind_counts3M_covfree_Aug3121.txt")
gene_expr_head = pd.read_table(folder + "VOOMCounts_CPM1_head_ctrl_248ind_counts3M_covfree_Aug3121.txt")
gene_expr_dict = {"head": gene_expr_head.T, "body": gene_expr_body.T}

In [11]:
saveGeneTable(g_body, gene_expr_dict["body"], "../data/output/SBM/body_table_MMC_fdrLevel-1e-03.csv", transpose = True)
saveGeneTable(g_body, gene_expr_dict["body"], "../data/output/SBM/body_table_WGCNA_fdrLevel-1e-03.csv")

Unnamed: 0,FBgn0052350,FBgn0024733,FBgn0040372,FBgn0023536,FBgn0023537,FBgn0029525,FBgn0040373,FBgn0025640,FBgn0025635,FBgn0001341,...,FBgn0031318,FBgn0040723,FBgn0031320,FBgn0031317,FBgn0024314,FBgn0031313,FBgn0031309,FBgn0031305,FBgn0016926,FBgn0003310
108_C8,4.666673,12.696374,5.999112,4.013996,7.341523,4.550885,3.873485,5.985546,4.935493,4.609722,...,3.887492,6.105179,3.545199,3.857863,5.732161,5.918264,4.747071,4.761648,7.339149,5.698606
112_A12,4.725106,12.551650,6.643936,2.969763,7.535076,4.682938,4.290474,6.945339,5.127323,4.440915,...,3.181654,4.802297,1.913350,4.012728,6.013415,6.171835,4.721000,4.611826,7.180547,6.533150
112_B12,4.489804,12.695390,6.998040,4.894730,7.322965,4.661880,2.322034,5.470562,4.769568,5.487188,...,2.604839,6.123859,6.037593,3.444710,5.307450,6.340808,5.773387,5.461739,7.404588,6.472540
112_D7,3.965429,12.785993,6.097233,4.212551,7.131314,5.011071,3.361021,5.908373,4.055271,2.084384,...,5.489655,6.164717,3.102528,4.673237,6.327470,6.089399,4.731841,4.421460,7.464626,4.820018
112_E12,5.386417,12.257801,5.450841,5.299829,7.383753,4.747985,3.254762,5.451714,5.404206,4.075736,...,6.150032,5.895083,2.927556,3.969305,4.674796,6.149789,4.877856,5.260187,7.876966,6.547870
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25_H8,4.934845,12.740517,5.814569,4.389145,6.979546,3.923241,3.855806,6.831483,5.016134,5.336107,...,4.835807,4.759996,3.034198,3.481802,6.131827,5.947117,4.160251,3.957237,7.287072,5.192880
25_H7,5.246194,12.474053,6.047758,4.473454,6.974246,4.955966,3.912796,5.814769,5.129959,4.667557,...,3.714206,6.025050,3.507084,4.688855,5.502931,5.727823,4.944825,5.127294,7.043663,5.596092
25_H6,5.178693,12.941661,5.682712,3.528161,7.039977,4.668518,5.097504,5.670190,3.784694,4.072969,...,3.077406,7.270884,2.972862,3.123105,6.492631,5.698852,4.874241,4.636585,7.567542,4.995451
25_H5,3.789783,12.691720,5.900457,2.585071,7.509133,4.178104,2.483198,5.304898,4.878069,4.461653,...,2.789720,6.816498,4.159668,3.689030,5.933878,6.603801,5.941063,5.642494,6.972575,5.989087


In [12]:
saveGeneTable(g_head, gene_expr_dict["head"], "../data/output/SBM/head_table_MMC_fdrLevel-1e-02.csv", transpose = True)
saveGeneTable(g_head, gene_expr_dict["head"], "../data/output/SBM/head_table_WGCNA_fdrLevel-1e-02.csv")

Unnamed: 0,FBgn0031081,FBgn0052350,FBgn0024733,FBgn0040372,FBgn0023534,FBgn0023537,FBgn0000108,FBgn0025640,FBgn0003575,FBgn0025634,...,FBgn0028481,FBgn0031304,FBgn0031306,FBgn0031310,FBgn0031312,FBgn0031318,FBgn0031313,FBgn0031305,FBgn0016926,FBgn0003310
106_B7,4.930257,4.508820,12.446084,2.998503,4.254751,7.765844,8.747210,5.321905,3.958489,3.301961,...,5.523266,3.767689,7.302393,5.547475,4.676809,5.120889,5.460565,6.004027,7.364620,4.434400
106_C6,5.116940,3.499342,12.371157,4.103512,4.454292,7.265553,9.105088,5.467671,3.894698,3.434972,...,5.343685,3.771723,7.567253,4.495246,5.328336,4.640974,7.500843,5.168781,8.030241,4.459652
106_A11,5.130395,2.191575,11.731869,3.629449,4.523700,7.970708,9.252584,4.679742,3.868023,3.940965,...,5.598387,4.216300,7.801604,4.072101,4.376806,5.259675,7.167708,4.750899,7.640526,4.701143
106_E4,5.117535,3.808431,11.536977,4.321388,3.246516,7.647486,9.413587,5.502545,4.305092,3.661570,...,6.119292,3.977472,7.398697,4.565921,4.824994,5.388349,7.832304,5.241088,7.831294,4.304306
106_A2,4.872438,3.596606,11.666225,4.027889,4.576394,7.343377,9.091358,5.263348,3.886851,3.667850,...,5.342655,3.586015,7.432748,5.171387,4.889144,4.783267,6.245783,4.865616,7.747890,4.193615
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91_F10,5.906908,4.191011,11.900221,3.736749,4.391849,7.262788,9.121801,4.719776,4.465559,3.827256,...,5.771786,3.389047,7.277369,4.911614,4.331867,5.522341,6.137463,4.719096,7.863104,3.439760
91_F8,4.916268,3.996113,11.917156,4.312322,3.593919,7.338602,9.371622,4.981229,3.287729,3.394885,...,5.850625,3.340536,7.515939,3.818818,4.345303,4.591386,6.810809,5.021679,7.349856,5.105646
91_G6,4.853495,3.128001,11.858870,3.682921,4.684516,7.895260,9.282389,5.424587,4.239814,3.922597,...,5.467999,3.693077,7.371023,4.714244,5.127194,5.235887,6.855183,6.053737,7.485812,4.088535
91_H4,4.645266,3.177461,11.719484,3.737951,5.076319,7.453813,9.431019,4.862859,4.008583,3.549151,...,6.087302,4.122579,7.725842,4.689232,4.671457,5.004348,7.378696,5.605705,7.757252,5.000932
