# Any benefits using direction constraints?

In [1]:
import numpy as np
import networkx as nx
from importlib import reload
from iddn_paper import sim3_h5op, tool_sys, sim3_network_toy
from iddn_paper.old_functions import sim0_synthetic_wang

A bipartite graph, nodes on the left side is a directed scale free graph. 
Point to the right side. Under what condition it will be harmful to ignore directions.

In [None]:
# reload(sim0_synthetic_wang)
# n_node = 100
# G = sim0_synthetic_wang.barabasi_albert_digraph(n_node, m_min=1, m_max=1, n_input=1, rep_init=1)
# tf_tf_mat = 1 * (nx.adjacency_matrix(G).todense() > 0)
# tf_tf_mat[np.arange(n_node), np.arange(n_node)] = 0
# G = nx.scale_free_graph(n_node)
# nx.draw(G)
# xx = np.random.randn(5,5)
# (np.tril(xx, -1))


In [2]:
n1 = 100
n2 = 150

dat1_lst = []
dat2_lst = []
con_mat1_lst = []
con_mat2_lst = []
comm_gt_lst = []
diff_gt_lst = []
dep_mat_prior_lst = []
dep_mat_prior_loose_lst = []
omega1_lst = []
omega2_lst = []
layer_count = np.array([n1, n2])
adj_mat1_lst = []
adj_mat2_lst = []

n_rep = 50

for n in range(n_rep):
    # The TF scale free graph
    # The matrix is upper triangular
    G = sim0_synthetic_wang.barabasi_albert_digraph(n1, m_min=1, m_max=1, n_input=1, rep_init=1)
    tf_tf_mat = 1 * (nx.adjacency_matrix(G).todense() > 0)
    tf_tf_mat[np.arange(n1), np.arange(n1)] = 0    

    # The TF gene to other gene edges
    regu_mat = np.zeros((n1, n2))
    for i in range(n2):
        parent = np.random.choice(n1, 5, replace=False)
        regu_mat[parent,i] = 1

    # Overall network
    n12 = n1 + n2
    adj_mat = np.zeros((n12, n12))
    adj_mat[:n1,:n1] = tf_tf_mat
    adj_mat[:n1,n1:] = regu_mat

    dep_mat_prior = np.zeros((n12, n12))
    dep_mat_prior[:n1,:] = 1
    
    # Make two undirected version as two conditions
    dep_mat_prior_loose = 1*((dep_mat_prior + dep_mat_prior.T)>0)
    con_mat = 1*((adj_mat + adj_mat.T)>0)
    dat1, dat2, con_mat1, con_mat2, comm_gt, diff_gt, om1, om2 = sim3_network_toy.post_process(
        con_mat, dep_mat_prior=dep_mat_prior, 
        ratio_diff=0.25, make_diff="remove", 
        diag_scale=None, ratio_diag=0.8
    )

    # Back to directed version
    # Use it for simulation on directed graph
    adj_mat1 = np.triu(con_mat1)
    adj_mat1[n1:,:] = 0
    adj_mat2 = np.triu(con_mat2)
    adj_mat2[n1:,:] = 0

    dat1_lst.append(dat1)
    dat2_lst.append(dat2)
    con_mat1_lst.append(con_mat1)
    con_mat2_lst.append(con_mat2)
    comm_gt_lst.append(comm_gt)
    diff_gt_lst.append(diff_gt)
    dep_mat_prior_lst.append(dep_mat_prior)
    dep_mat_prior_loose_lst.append(dep_mat_prior_loose)
    omega1_lst.append(om1)
    omega2_lst.append(om2)
    adj_mat1_lst.append(adj_mat1)
    adj_mat2_lst.append(adj_mat2)

dep_mat_prior = np.array(dep_mat_prior_lst)
dep_mat_prior_loose = np.array(dep_mat_prior_loose_lst)

Smallest eigen values  0.4537929421842978 (0.4629811948945235+0j)
Smallest eigen values  0.4217136459994748 0.37247539053344825
Smallest eigen values  0.5005664158810905 0.2854587438029894
Smallest eigen values  0.4507061114194293 (0.41052064675363115+0j)
Smallest eigen values  0.24055625815513076 0.043722232180836484
Smallest eigen values  0.36992032061533164 0.34531538454546684
Smallest eigen values  0.4145279047152862 0.7120961383299207
Smallest eigen values  0.3024096217372545 0.2783625091411785
Smallest eigen values  (0.35923564719909784+0j) 0.4514090825125418
Smallest eigen values  (0.4034224752972131+0j) 0.4574245406678142
Smallest eigen values  0.35422824685799337 0.20368350681240383
Smallest eigen values  (0.5656065975502766+0j) 0.6892143780047177
Smallest eigen values  (0.3517567545154164+0j) 0.3751579995152144
Smallest eigen values  0.25877875770198616 0.7600855655930348
Smallest eigen values  0.5008256988426969 0.3299993762454
Smallest eigen values  0.4241922416503452 0.527

GGM version

In [3]:
idx = np.random.randint(1000000)
f_out = f"sim3_direction_two_layer_batch_{idx}_ggm"
dat_file = tool_sys.get_work_folder() + f"/experiment_iddn_paper/sim_input/{f_out}.hdf5"
sim3_h5op.make_new_sim_data(
    dat_file,
    dat1=np.array(dat1_lst),
    dat2=np.array(dat2_lst),
    con_mat1=np.array(con_mat1_lst),
    con_mat2=np.array(con_mat2_lst),
    comm_gt=np.array(comm_gt_lst),
    diff_gt=np.array(diff_gt_lst),
    dep_mat_null=np.ones_like(dep_mat_prior),
    dep_mat_prior=dep_mat_prior,
    dep_mat_prior_loose=dep_mat_prior_loose,
    layer_count=layer_count,
)
print(f_out)

sim3_direction_two_layer_batch_428416_ggm


Linear flow version

In [4]:
reload(sim0_synthetic_wang)

dat1_lst_lin = []
dat2_lst_lin = []
for n in range(n_rep):
    adj_mat1 = adj_mat1_lst[n]
    adj_mat2 = adj_mat2_lst[n]
    dat1 = sim0_synthetic_wang.sim_steady_state_linear(adj_mat1, 500)
    dat2 = sim0_synthetic_wang.sim_steady_state_linear(adj_mat2, 500)
    dat1_lst_lin.append(dat1)
    dat2_lst_lin.append(dat2)


5 250.0
5 250.0
5 250.0
6 250.0
7 250.0
4 250.0
7 250.0
6 250.0
5 250.0
6 250.0
4 250.0
4 250.0
5 250.0
5 250.0
5 250.0
6 250.0
6 250.0
5 250.0
6 250.0
8 250.0
5 250.0
5 250.0
5 250.0
6 250.0
5 250.0
5 250.0
5 250.0
5 250.0
6 250.0
6 250.0
5 250.0
4 250.0
7 250.0
6 250.0
5 250.0
5 250.0
5 250.0
5 250.0
4 250.0
4 250.0
4 250.0
5 250.0
5 250.0
7 250.0
6 250.0
7 250.0
7 250.0
5 250.0
5 250.0
5 250.0
5 250.0
5 250.0
6 250.0
5 250.0
5 250.0
5 250.0
5 250.0
4 250.0
5 250.0
6 250.0
6 250.0
5 250.0
4 250.0
5 250.0
6 250.0
7 250.0
6 250.0
5 250.0
5 250.0
5 250.0
4 250.0
5 250.0
5 250.0
7 250.0
5 250.0
5 250.0
6 250.0
5 250.0
5 250.0
5 250.0
5 250.0
5 250.0
5 250.0
4 250.0
5 250.0
5 250.0
6 250.0
6 250.0
6 250.0
4 250.0
5 250.0
5 250.0
5 250.0
4 250.0
5 250.0
5 250.0
5 250.0
5 250.0
5 250.0
5 250.0


In [6]:
f_out = f"sim3_direction_two_layer_batch_{idx}_lin"
dat_file = tool_sys.get_work_folder() + f"/experiment_iddn_paper/sim_input/{f_out}.hdf5"
sim3_h5op.make_new_sim_data(
    dat_file,
    dat1=np.array(dat1_lst_lin),
    dat2=np.array(dat2_lst_lin),
    con_mat1=np.array(con_mat1_lst),
    con_mat2=np.array(con_mat2_lst),
    comm_gt=np.array(comm_gt_lst),
    diff_gt=np.array(diff_gt_lst),
    dep_mat_null=np.ones_like(dep_mat_prior),
    dep_mat_prior=dep_mat_prior,
    dep_mat_prior_loose=dep_mat_prior_loose,
    layer_count=layer_count,
)
print(f_out)

sim3_direction_two_layer_batch_428416_lin


In [None]:
# adj_mat1 = adj_mat1_lst[0]
# dat1 = sim0_synthetic_wang.sim_steady_state_linear(adj_mat1, 500)
# cc = np.corrcoef(dat1.T)
# ccinv = np.linalg.inv(cc)
# plt.imshow(np.abs(ccinv))
# plt.clim([0,2])

Non-linear flow version