In [7]:
import os
os.environ["OMP_NUM_THREADS"] = "32"
from graph_tool.all import *
import pandas as pd
import numpy as np
import scipy as sp
from sklearn.covariance import LedoitWolf, OAS
import matplotlib.pyplot as plt
import matplotlib.cm as mpl
import seaborn as sns
import statsmodels.api as sm
from multipy.fdr import qvalue
from multipy.fdr import lsu

import dill

In [8]:
# Loading blocks...
def load_blocks(blocks):
    with open (blocks, "rb") as fh:
        bs = dill.load(fh)[0:6]
    return bs

def filterByFDR(g, level, pval, keepOnlyMain=False):
    # Filtering edges
    pvals = np.array(g.edge_properties[pval].a)

    fdr_ep = g.new_ep("bool", True)
    fdr_ep.a = lsu(pvals, q=level)

    tv = GraphView(g, efilt=fdr_ep)

    # Keeping largest component
    if keepOnlyMain:
        comp, hist = label_components(tv)
        main_component = tv.new_vp("bool", (comp.a == np.where(hist == max(hist))[0][0]))
        tv.vertex_properties["main_component"] = main_component
        tv.set_vertex_filter(main_component)
    return tv

def filterBySign(g, ep, positive = True, keepOnlyMain = False):
    # Filtering edges
    corr = g.edge_properties[ep]
    sign = g.new_ep("bool", True)
    if positive:
        sign.a = np.array(corr.a > 0)
    else:
        sign.a = np.array(corr.a < 0)    

    tv = GraphView(g, efilt=sign)

    # Keeping largest component
    if keepOnlyMain:
        comp, hist = label_components(tv)
        main_component = tv.new_vp("bool", (comp.a == np.where(hist == max(hist))[0][0]))
        tv.vertex_properties["main_component"] = main_component
        tv.set_vertex_filter(main_component)
    return tv
    
# For each unique element in x, find its first apperance in x
def first_occurrence(x):
    _, idx = np.unique(x, return_index=True)
    return idx

def labelVertices(state):
    g = state.g
    g.vp.level_0 = g.new_vertex_property("double", state.get_bs()[0])
    first = first_occurrence(np.array([g.vp.level_0.a]))
    state.g.vp.labels = g.new_vp("string", [str(int(x)) if i in first else "" for i, x in enumerate(g.vp.level_0.a)])
    return state

In [9]:
g_path = '../../SBM/snakemake/cache/trimmed_graph/fdr-1e-3/layered/'
tissues = ['head', 'body']
conditions = ['hs', 'ctrl']
labels = [f'{tissue}-{condition}' for tissue in tissues for condition in conditions]
# graphs = {f'{tissue}':load_graph(g_path + f'{tissue}.xml.gz') for tissue in tissues}
b_path = '../../SBM/snakemake/cache/MCMC/blocks/fdr-1e-3/layered/'
blocks = {f'{tissue}':load_blocks(b_path + f'{tissue}.dill') for tissue in tissues}
# Read cond graphs
cond_path = '../../cache/'
cond_graphs = {f'{tissue}-{condition}':load_graph(cond_path + f'cond_graph-{tissue}-{condition}.xml.gz') for tissue in tissues for condition in conditions}

# Read clip graphs
clip_path = '../../cache/clip/'
clip_graphs = {f'{tissue}':load_graph(clip_path + f'clip_g_{tissue}.xml.gz') for tissue in tissues}

In [10]:
def makeClipGraph (current_tissue, clip_fdr, pos, n_edges = 50000, make_plots = False):

    # output path
    output_path = "../../tmp/clip/" + "fdr-" + str(clip_fdr) + "/" + current_tissue + "-nEdges_" + str(n_edges) 
    # create output directory
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    g_clip = clip_graphs[current_tissue]
    gNeg= filterByFDR(g_clip, clip_fdr, 'clip_p')
    gNeg = Graph(gNeg, prune=True)
    gNeg = filterBySign(gNeg, 'clip_shift', positive = False)
    gNeg = Graph(gNeg, prune=True)
    gNeg.ep.clip_shift.a = np.abs(gNeg.ep.clip_shift.a)
    gNeg.vp.pos = gNeg.copy_property(pos)

    bs = blocks[current_tissue]

    s_Neg = NestedBlockState(gNeg, bs=bs,
                             state_args=dict(recs=[gNeg.ep.clip_shift],
                                             rec_types=["real-normal"]))
    labelVertices(s_Neg)
    if make_plots:
        s_Neg.draw(eorder=gNeg.ep.clip_shift,
                    pos = gNeg.vp.pos, 
                    edge_color=prop_to_size(gNeg.ep.clip_shift, mi=0, ma=1, power=1, log=False),
                    ecmap=(mpl.inferno, .6), 
                    edge_gradient=[], 
                    vertex_size = 10,
                    vertex_text = s_Neg.g.vp.labels,
                    vertex_text_position='centered',
                    hvertex_size = 25,
                    hedge_pen_width = 3,
                    subsample_edges = n_edges,
                    output = output_path + "-decohere.png", 
                    output_size=(2000, 2000))

    g_clip = clip_graphs[current_tissue]
    gPos = filterByFDR(g_clip, clip_fdr, 'clip_p')
    gPos = Graph(gPos, prune=True)
    gPos = filterBySign(gPos, 'clip_shift', positive = True)
    gPos = Graph(gPos, prune=True)
    gPos.vp.pos = gPos.copy_property(pos)

    bs = blocks[current_tissue]
    s_Pos = NestedBlockState(gPos, bs=bs,
                            state_args=dict(recs=[gPos.ep.clip_shift],
                                            rec_types=["real-normal"]))
    labelVertices(s_Pos)
    if make_plots:
        s_Pos.draw( eorder=gPos.ep.clip_shift,
                    pos = gPos.vp.pos, 
                    edge_color=prop_to_size(gPos.ep.clip_shift, mi=0, ma=1, power=1, log=False),
                    ecmap=(mpl.inferno, .6), 
                    edge_gradient=[], 
                    vertex_size = 10,
                    vertex_text = s_Pos.g.vp.labels,
                    vertex_text_position='centered',
                    hvertex_size = 25,
                    hedge_pen_width = 3,
                    subsample_edges = n_edges,
                    output = output_path + "-integrate.png", 
                    output_size=(2000, 2000))
    return {"decohere": s_Neg, "integrate": s_Pos}


In [11]:
# Make clip graphs for various FDRs
fdrs = [1e-1, 1e-2, 1e-3, 1e-4]
n_edges = 50000
clip_state = {}
for tissue in tissues:
    print(f"Tissue: {tissue}")
    clip_state[tissue] = {}
    for fdr in fdrs:
        print(f"Making clip graphs for FDR {fdr}")
        l = tissue + '-ctrl'
        clip_state[tissue][fdr] = makeClipGraph(tissue, fdr, cond_graphs[l].vp.pos, n_edges, False)

Tissue: head
Making clip graphs for FDR 0.1
Making clip graphs for FDR 0.01
Making clip graphs for FDR 0.001
Making clip graphs for FDR 0.0001
Tissue: body
Making clip graphs for FDR 0.1
Making clip graphs for FDR 0.01
Making clip graphs for FDR 0.001
Making clip graphs for FDR 0.0001


In [20]:
E_folder = "../../tmp/clip/Ematrices"
for fdr in fdrs: 
    for t in tissues:
        out_folder = E_folder + "/" + f'fdr-{fdr}' + "/" + t + "/"
        os.makedirs(os.path.dirname(out_folder), exist_ok=True)
        for d in ['decohere', 'integrate']:
            state = clip_state[t][fdr][d]
            for i in range(5):
                B = state.get_levels()[i].get_nonempty_B()
                e_mat = state.get_levels()[i].get_matrix().todense()
                output_file = out_folder + f'{d}' + "_E_matrix_level" + str(i) + ".csv"
                pd.DataFrame(e_mat).to_csv(output_file)

In [29]:
g = clip_graphs['head']
genes = g.vp.genes
# write lines to file
with open('../../cache/long_clip.csv', 'w') as file:
    file.write('Gene1,Gene2,shift,pval\n')
    for e in g.edges():
        line = f'{genes[e.source()]},{genes[e.target()]},{g.ep.clip_shift[e]},{g.ep.clip_p[e]}\n'
        file.write(line)