In [32]:
import os
os.environ["OMP_NUM_THREADS"] = "32"
from graph_tool.all import *
import pandas as pd
import numpy as np
import scipy as sp
from sklearn.covariance import LedoitWolf, OAS
import matplotlib.pyplot as plt
import matplotlib.cm as mpl
import seaborn as sns
import statsmodels.api as sm
from multipy.fdr import qvalue
from multipy.fdr import lsu

import dill

In [34]:
# Loading blocks...
def load_blocks(blocks):
    with open (blocks, "rb") as fh:
        bs = dill.load(fh)[0:6]
    return bs

def filterByFDR(g, level, pval, keepOnlyMain=False):
    # Filtering edges
    pvals = np.array(g.edge_properties[pval].a)

    fdr_ep = g.new_ep("bool", True)
    fdr_ep.a = lsu(pvals, q=level)

    tv = GraphView(g, efilt=fdr_ep)

    # Keeping largest component
    if keepOnlyMain:
        comp, hist = label_components(tv)
        main_component = tv.new_vp("bool", (comp.a == np.where(hist == max(hist))[0][0]))
        tv.vertex_properties["main_component"] = main_component
        tv.set_vertex_filter(main_component)
    return tv

def filterBySign(g, ep, positive = True, keepOnlyMain = False):
    # Filtering edges
    corr = g.edge_properties[ep]
    sign = g.new_ep("bool", True)
    if positive:
        sign.a = np.array(corr.a > 0)
    else:
        sign.a = np.array(corr.a < 0)    

    tv = GraphView(g, efilt=sign)

    # Keeping largest component
    if keepOnlyMain:
        comp, hist = label_components(tv)
        main_component = tv.new_vp("bool", (comp.a == np.where(hist == max(hist))[0][0]))
        tv.vertex_properties["main_component"] = main_component
        tv.set_vertex_filter(main_component)
    return tv
    
# For each unique element in x, find its first apperance in x
def first_occurrence(x):
    _, idx = np.unique(x, return_index=True)
    return idx

def labelVertices(state):
    g = state.g
    g.vp.level_0 = g.new_vertex_property("double", state.get_bs()[0])
    first = first_occurrence(np.array([g.vp.level_0.a]))
    state.g.vp.labels = g.new_vp("string", [str(int(x)) if i in first else "" for i, x in enumerate(g.vp.level_0.a)])
    return state

In [35]:
g_path = '../../SBM/snakemake/cache/trimmed_graph/fdr-1e-3/layered/'
tissues = ['head', 'body']
conditions = ['hs', 'ctrl']
labels = [f'{tissue}-{condition}' for tissue in tissues for condition in conditions]

In [None]:
# graphs = {f'{tissue}':load_graph(g_path + f'{tissue}.xml.gz') for tissue in tissues}
b_path = '../../SBM/snakemake/cache/MCMC/blocks/fdr-1e-3/layered/'
blocks = {f'{tissue}':load_blocks(b_path + f'{tissue}.dill') for tissue in tissues}

In [37]:
# Read cond graphs
cond_path = '../../cache/'
cond_graphs = {f'{tissue}-{condition}':load_graph(cond_path + f'cond_graph-{tissue}-{condition}.xml.gz') for tissue in tissues for condition in conditions}

In [None]:
# Read clip graphs
clip_path = '../../cache/'
clip_graphs = {f'{tissue}':load_graph(clip_path + f'clip_g_{tissue}.xml.gz') for tissue in tissues}

In [39]:
def makeClipGraph (current_tissue, clip_fdr, pos, n_edges = 50000, make_plots = False):

    # output path
    output_path = "../../tmp/clip/" + "fdr-" + str(clip_fdr) + "/" + current_tissue + "-nEdges_" + str(n_edges) 
    # create output directory
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    g_clip = clip_graphs[current_tissue]
    gNeg= filterByFDR(g_clip, clip_fdr, 'clip_p')
    gNeg = Graph(gNeg, prune=True)

    gNeg = filterBySign(gNeg, 'clip_c', positive = False)
    gNeg = Graph(gNeg, prune=True)
    gNeg.ep.clip_c.a = np.abs(gNeg.ep.clip_c.a)
    gNeg.vp.pos = gNeg.copy_property(pos)

    bs = blocks[current_tissue]

    s_Neg = NestedBlockState(gNeg, bs=bs,
                            state_args=dict(recs=[gNeg.ep.clip_c],
                                            rec_types=["real-normal"]))
    labelVertices(s_Neg)
    if make_plots:
        s_Neg.draw(eorder=gNeg.ep.clip_c,
                    pos = gNeg.vp.pos, 
                    edge_color=prop_to_size(gNeg.ep.clip_c, mi=0, ma=1, power=1, log=False),
                    ecmap=(mpl.inferno, .6), 
                    edge_gradient=[], 
                    vertex_size = 10,
                    vertex_text = s_Neg.g.vp.labels,
                    vertex_text_position='centered',
                    hvertex_size = 25,
                    hedge_pen_width = 3,
                    subsample_edges = n_edges,
                    output = output_path + "-decohere.png", 
                    output_size=(2000, 2000))

    g_clip = clip_graphs[current_tissue]
    gPos = filterByFDR(g_clip, clip_fdr, 'clip_p')
    gPos = Graph(gPos, prune=True)
    gPos = filterBySign(gPos, 'clip_c', positive = True)
    gPos = Graph(gPos, prune=True)
    gPos.vp.pos = gPos.copy_property(pos)

    bs = blocks[current_tissue]
    s_Pos = NestedBlockState(gPos, bs=bs,
                            state_args=dict(recs=[gPos.ep.clip_c],
                                            rec_types=["real-normal"]))
    labelVertices(s_Pos)
    if make_plots:
        s_Pos.draw( eorder=gPos.ep.clip_c,
                    pos = gPos.vp.pos, 
                    edge_color=prop_to_size(gPos.ep.clip_c, mi=0, ma=1, power=1, log=False),
                    ecmap=(mpl.inferno, .6), 
                    edge_gradient=[], 
                    vertex_size = 10,
                    vertex_text = s_Pos.g.vp.labels,
                    vertex_text_position='centered',
                    hvertex_size = 25,
                    hedge_pen_width = 3,
                    subsample_edges = n_edges,
                    output = output_path + "-integrate.png", 
                    output_size=(2000, 2000))
    return {"decohere": s_Neg, "integrate": s_Pos}


In [42]:
# Make clip graphs for various FDRs
fdrs = [1e-1, 1e-2, 1e-3, 1e-4]
n_edges = 50000
clip_state = {}
for tissue in tissues:
    print(f"Tissue: {tissue}")
    clip_state[tissue] = {}
    for fdr in fdrs:
        print(f"Making clip graphs for FDR {fdr}")
        l = tissue + '-ctrl'
        clip_state[tissue][fdr] = makeClipGraph(tissue, fdr, cond_graphs[l].vp.pos, n_edges)

Tissue: head
Making clip graphs for FDR 0.1
Making clip graphs for FDR 0.01
Making clip graphs for FDR 0.001
Making clip graphs for FDR 0.0001
Tissue: body
Making clip graphs for FDR 0.1
Making clip graphs for FDR 0.01
Making clip graphs for FDR 0.001
Making clip graphs for FDR 0.0001


In [66]:
def create_nestedBlock_df(g, corr, state):
    genes = g.vertex_properties["genes"]
    nested_block_df = pd.DataFrame(columns=('Gene', "Degree", "Average", 'Sum', 'B1', "B2", "B3", "B4", "B5", "B6"))
    for v in g.vertex_index:
        line = [genes[v]]
        line.append(g.get_total_degrees([v])[0])
        line.append(np.mean(g.get_all_edges(v, [corr] )[:,2]))
        line.append(np.sum(g.get_all_edges(v, [corr] )[:,2]))
        [line.append(i) for i in get_group(v, state)]
        nested_block_df.loc[v] = line
    nested_block_df = nested_block_df[nested_block_df.Degree > 0]
    return nested_block_df
def get_group(x, state):
    levels = state.get_levels()
    n_levels = len(levels)
    r = np.zeros(n_levels)
    r[0] = levels[0].get_blocks()[x]
    for i in range(1, n_levels):
        r[i] = levels[i].get_blocks()[r[i-1]]
    r = r.astype(int)
    return r

In [73]:
for t in tissues:
    decohere_df = create_nestedBlock_df(clip_state[t][1e-2]['decohere'].g, 
                                        clip_state[t][1e-2]['decohere'].g.ep.clip_c, 
                                        clip_state[t][1e-2]['decohere'])
    integrate_df = create_nestedBlock_df(clip_state[t][1e-2]['integrate'].g, 
                                         clip_state[t][1e-2]['integrate'].g.ep.clip_c, 
                                         clip_state[t][1e-2]['integrate'])
    merge = pd.merge(decohere_df, integrate_df, on='Gene', suffixes=('_decohere', '_integrate'))
    clip_Df = pd.concat([decohere_df, integrate_df],
                    axis=0, 
                    keys=['decohere', 'integrate'],
                    names=['direction']).reset_index(level=[0])
    # Write df to file
    clip_Df.to_csv("../../cache/" + f'clip_fdr-1e2_{t}.csv')

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=

In [64]:
genes = g.vertex_properties["genes"]
nested_block_df = pd.DataFrame(columns=('Gene', "Degree", "E_corr", 'B1', "B2", "B3", "B4", "B5", "B6"))
for v in g.vertex_index:
    line = [genes[v]]
    line.append(g.get_total_degrees([v])[0])
    line.append(np.mean(np.abs(g.get_all_edges(v, [corr] )[:,2])))
    [line.append(i) for i in get_group(v, state)]
    nested_block_df.loc[v] = line
# filter Degree = zero
#nested_block_df = nested_block_df[nested_block_df.Degree > 0]
# sort by Degree and E_corr
nested_block_df = nested_block_df.sort_values(by=['Degree', 'E_corr'], ascending=False)
nested_block_df

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=

Unnamed: 0,Gene,Degree,E_corr,B1,B2,B3,B4,B5,B6
6459,FBgn0053113,389,0.246210,230,46,9,1,1,1
303,FBgn0026372,301,0.250451,168,54,12,0,0,0
990,FBgn0014002,276,0.261460,247,63,14,0,0,0
151,FBgn0035423,268,0.304063,115,53,14,0,0,0
1816,FBgn0000042,265,0.313162,244,46,9,1,1,1
...,...,...,...,...,...,...,...,...,...
6819,FBgn0025684,0,,35,27,12,0,0,0
6821,FBgn0030738,0,,52,6,5,3,0,0
6822,FBgn0036973,0,,52,6,5,3,0,0
6823,FBgn0034439,0,,71,36,7,1,1,1


In [49]:

state = NestedBlockState(gNeg, bs=bs,
                        state_args=dict(recs=[gNeg.ep.clip_c],
                                        rec_types=["real-normal"]))
labelVertices(state)
pos, t, tpos = state.draw(eorder=state.g.ep.clip_c,
                        edge_color=prop_to_size(state.g.ep.clip_c, mi=0, ma=1, power=1, log=False),
                        ecmap=(mpl.inferno, .6), 
                        edge_gradient=[], 
                        vertex_size = 10,
                        vertex_text = state.g.vp.labels,
                        vertex_text_position='centered',
                        hvertex_size = 25,
                        hvertex_text = state.g.vp.labels,
                        hedge_pen_width = 3,
                        subsample_edges = 1000, 
                        output = "../../tmp/test.png", 
                        output_size=(2000, 2000))