In [1]:
import os
os.environ["OMP_NUM_THREADS"] = "32"
from graph_tool.all import *
import pandas as pd
import numpy as np
import scipy as sp
from sklearn.covariance import LedoitWolf, OAS
import matplotlib.pyplot as plt
import matplotlib.cm as mpl
import seaborn as sns
import statsmodels.api as sm
from multipy.fdr import qvalue
from multipy.fdr import lsu

import dill

In [2]:
# Loading blocks...
def load_blocks(blocks):
    with open (blocks, "rb") as fh:
        bs = dill.load(fh)[0:6]
    return bs

def filterByFDR(g, level, pval, keepOnlyMain=False):
    # Filtering edges
    pvals = np.array(g.edge_properties[pval].a)

    fdr_ep = g.new_ep("bool", True)
    fdr_ep.a = lsu(pvals, q=level)

    tv = GraphView(g, efilt=fdr_ep)

    # Keeping largest component
    if keepOnlyMain:
        comp, hist = label_components(tv)
        main_component = tv.new_vp("bool", (comp.a == np.where(hist == max(hist))[0][0]))
        tv.vertex_properties["main_component"] = main_component
        tv.set_vertex_filter(main_component)
    return tv

In [3]:
g_path = '../../SBM/snakemake-layer/cache/trimmed_graph/fdr-1e-3/layered/'
tissues = ['head', 'body']
conditions = ['hs', 'ctrl']
graphs = {f'{tissue}':load_graph(g_path + f'{tissue}.xml.gz') for tissue in tissues}
b_path = '../../SBM/snakemake-layer/cache/MCMC/blocks/fdr-1e-3/layered/'
blocks = {f'{tissue}':load_blocks(b_path + f'{tissue}.dill') for tissue in tissues}

labels = [f'{tissue}-{condition}' for tissue in tissues for condition in conditions]

for t in tissues:
    remove_parallel_edges(graphs[t])

In [4]:
data_dir = '/Genomics/argo/users/damelo/projects/HS-Expression-GxE/SBM/rawData/layered/head'
input_list = []
for file in os.listdir(data_dir):
    if file.endswith(".tsv"):
        input_list.append(file)
input_names = list(map(lambda p: p[:p.rfind('.')], input_list))
gene_expr = []
for file in input_list:
    gene_expr_raw = pd.read_table(os.path.join(data_dir, file))
    gene_expr.append(gene_expr_raw.T)

n_layers = len(gene_expr)

In [5]:
from sklearn.linear_model import LinearRegression
gene_expr_concat = pd.concat([gene_expr[0], gene_expr[1]], 
                   axis=0, 
                   keys=input_names,
                   names=['source']).reset_index(level=[0])
gene_expr_concat
# remove first column
ge = gene_expr_concat.drop(columns=['source'])
z_scores = (ge - ge.mean()) / np.sqrt(ge.var())
z_scores
# Extract source column
source = gene_expr_concat['source']
z_scores

# Model matrix
X = pd.get_dummies(source, drop_first=True)
X = np.column_stack((np.ones(X.shape[0]), X))

In [7]:
y = z_scores['FBgn0031081'] * z_scores['FBgn0031080']
mod = sm.OLS(y, X)
fii = mod.fit()
coef = fii.summary2().tables[1].iloc[1,0]
pvalue = fii.summary2().tables[1].iloc[1,3]
coef, pvalue

(0.005205305951119194, 0.9323166606097686)

In [10]:
g = graphs['head']

genes = g.vp.genes

clip_c = g.new_ep("double", 0)
clip_p = g.new_ep("double", 0)

for e in g.edges():
    v_i, v_j = e.source(), e.target()
    gene_i = genes[v_i]
    gene_j = genes[v_j]
    y = z_scores[gene_i] * z_scores[gene_j]
    mod = sm.OLS(y, X)
    fii = mod.fit()
    coef = fii.summary2().tables[1].iloc[1,0]
    pvalue = fii.summary2().tables[1].iloc[1,3]
    if pvalue < 0.0001:
        print(gene_i, gene_j, coef, pvalue)
    clip_c[e] = coef
    clip_p[e] = pvalue

FBgn0262738 FBgn0028369 0.1815104822158657 6.131331229436349e-05
FBgn0262738 FBgn0031681 -0.20168205305755632 4.8327839520932854e-05
FBgn0262738 FBgn0053113 -0.2170024614418307 1.927390164995191e-05
FBgn0262738 FBgn0045063 0.179120842440678 1.7302873783820657e-05
FBgn0262738 FBgn0027525 0.23600722113835812 1.07872771426916e-06
FBgn0262738 FBgn0087008 0.16741568263174694 6.40271559970323e-05
FBgn0262738 FBgn0260429 0.18746415867125563 2.790728295405866e-05
FBgn0262738 FBgn0038858 0.2001171441841852 1.712870545593612e-05
FBgn0262738 FBgn0250788 -0.18779078253579126 4.869985734275991e-05
FBgn0003896 FBgn0053113 -0.19118794243662146 4.894773430721694e-05
FBgn0030505 FBgn0025626 -0.19276689308451572 5.766832358407526e-05
FBgn0030505 FBgn0028369 0.28858936289976717 2.155870406888485e-10
FBgn0030505 FBgn0029768 0.19798036361738391 3.7747787903727115e-05
FBgn0030505 FBgn0029095 -0.24183891788528708 2.9974133157083783e-06
FBgn0030505 FBgn0017550 -0.27768480090178665 1.4653276445078363e-07
FBgn0

In [None]:
g.edge_properties["clip_c"] = clip_c
g.edge_properties["clip_p"] = clip_p

In [None]:
g.save('clip_g_head.xml.gz')