# GGM simulation with three layers

In [1]:
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
from importlib import reload
from ddn3 import simulation
from ddn3_extra import simulation_r
from iddn_paper import sim3_h5op, tool_sys, sim3_network_toy


## Three layers
This is the older version. For the simulation in the paper, use the v2 version in the next section.

In [2]:
dat1_lst = []
dat2_lst = []
con_mat1_lst = []
con_mat2_lst = []
comm_gt_lst = []
diff_gt_lst = []
dep_mat_prior_lst = []
omega1_lst = []
omega2_lst = []

n_mrna, n_tf, n_mirna = 50, 50, 50

n_conn_tf_mrna, n_conn_mirna_mrna, ratio_diag = 2, 2, 0.8
# n_conn_tf_mrna, n_conn_mirna_mrna, ratio_diag = 5, 5, 0.75
# n_conn_tf_mrna, n_conn_mirna_mrna, ratio_diag = 8, 8, 0.73  # Showing the benefits of multi-omics

layer_count = np.array([n_mrna, n_tf, n_mirna])  # mRNA, TF, miRNA

reload(sim3_network_toy)
reload(simulation_r)
for n in range(2):
    print(n)
    dat1, dat2, con_mat1, con_mat2, comm_gt, diff_gt, dep_mat_prior, om1, om2 = sim3_network_toy.toy_three_layer(
        n_mrna=n_mrna,
        n_tf=n_tf,
        n_mirna=n_mirna,
        n_sample_gen=500,
        ratio_diag=ratio_diag,
        n_conn_tf_mrna=n_conn_tf_mrna,
        n_conn_mirna_mrna=n_conn_mirna_mrna,
    )
    dat1_lst.append(dat1)
    dat2_lst.append(dat2)
    con_mat1_lst.append(con_mat1)
    con_mat2_lst.append(con_mat2)
    comm_gt_lst.append(comm_gt)
    diff_gt_lst.append(diff_gt)
    dep_mat_prior_lst.append(dep_mat_prior)
    omega1_lst.append(om1)
    omega2_lst.append(om2)

dep_mat_prior = np.array(dep_mat_prior_lst)


0
Smallest eigen values  0.35292366273161796 0.007064696374069484
1
Smallest eigen values  0.13107961829398473 0.31871475416001793


In [3]:
idx = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
print(idx)
# idx = np.random.randint(1000000000)

2024_08_21_22_54_44


In [4]:
f_out = f"sim3_ggm_three_layer_batch_{idx}"
dat_file = tool_sys.get_work_folder() + f"/experiment_iddn_paper/sim_input/{f_out}.hdf5"
sim3_h5op.make_new_sim_data(
    dat_file,
    dat1=np.array(dat1_lst),
    dat2=np.array(dat2_lst),
    con_mat1=np.array(con_mat1_lst),
    con_mat2=np.array(con_mat2_lst),
    comm_gt=np.array(comm_gt_lst),
    diff_gt=np.array(diff_gt_lst),
    dep_mat_null=np.ones_like(dep_mat_prior),
    dep_mat_prior=dep_mat_prior,
    dep_mat_prior_loose=dep_mat_prior,
    layer_count=layer_count,
)
print(f_out)

sim3_ggm_three_layer_batch_2024_08_21_22_54_44


## Three layers v2
Networks among TFs, more detailed constraints

In [2]:
reload(sim3_network_toy)

dat1_lst = []
dat2_lst = []
con_mat1_lst = []
con_mat2_lst = []
comm_gt_lst = []
diff_gt_lst = []
dep_mat_prior_lst = []
omega1_lst = []
omega2_lst = []

# n_mrna, n_tf, n_mirna = 50, 50, 50

# For speed comparison
# n_mrna, n_tf, n_mirna = 25, 13, 12
# n_mrna, n_tf, n_mirna = 50, 25, 25
n_mrna, n_tf, n_mirna = 100, 50, 50
# n_mrna, n_tf, n_mirna = 200, 100, 100

n_conn_tf_mrna, n_conn_mirna_mrna, ratio_diag = 2, 2, 0.8
# n_conn_tf_mrna, n_conn_mirna_mrna, ratio_diag = 5, 5, 0.72

layer_count = np.array([n_mrna, n_tf, n_mirna])  # mRNA, TF, miRNA

reload(sim3_network_toy)
reload(simulation_r)
for n in range(50):
    print(n)
    dat1, dat2, con_mat1, con_mat2, comm_gt, diff_gt, dep_mat_prior, om1, om2 = sim3_network_toy.toy_three_layer_v2(
        n_mrna=n_mrna,
        n_tf=n_tf,
        n_mirna=n_mirna,
        # n_sample_gen=2000,
        n_sample_gen=500,
        ratio_diag=ratio_diag,
        n_conn_tf_mrna=n_conn_tf_mrna,
        n_conn_mirna_mrna=n_conn_mirna_mrna,
    )
    dat1_lst.append(dat1)
    dat2_lst.append(dat2)
    con_mat1_lst.append(con_mat1)
    con_mat2_lst.append(con_mat2)
    comm_gt_lst.append(comm_gt)
    diff_gt_lst.append(diff_gt)
    dep_mat_prior_lst.append(dep_mat_prior)
    omega1_lst.append(om1)
    omega2_lst.append(om2)

dep_mat_prior = np.array(dep_mat_prior_lst)


0
Smallest eigen values  (-0.11837723337776214+0j) 0.2834449150816196
Smallest eigen values  0.42362763912050344 0.351872150132205
1
Smallest eigen values  (0.03994476213054021+0j) 0.4006498337892226
2
Smallest eigen values  (0.534970759336634+0j) 0.37908035733851797
3
Smallest eigen values  0.0049702135507697955 0.2569344301906335
4
Smallest eigen values  0.10606736328500282 0.26955110818629713
5
Smallest eigen values  0.023856085841697565 (0.3018179005750703+0j)
6
Smallest eigen values  0.15992145489498005 0.25724969480259574
7
Smallest eigen values  0.13794712175634613 0.03048788603380487
8
Smallest eigen values  0.27460713432601763 0.06565895839019742
9
Smallest eigen values  (0.42409642738472453+0j) -0.02911873479176747
Smallest eigen values  0.21790479552098924 0.05974973613180339
10
Smallest eigen values  0.007822716104105279 (0.29896961737976113+0j)
11
Smallest eigen values  0.4140765125469717 0.059094022587302675
12
Smallest eigen values  0.12334824442470119 -0.011784173297633

In [None]:
# plt.imshow(dep_mat_prior[0,10])
# plt.imshow(comm_gt_lst[0])

In [3]:
idx = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
f_out = f"sim3_ggm_three_layer_v2_batch_{idx}"
dat_file = tool_sys.get_work_folder() + f"/experiment_iddn_paper/sim_input/{f_out}.hdf5"
sim3_h5op.make_new_sim_data(
    dat_file,
    dat1=np.array(dat1_lst),
    dat2=np.array(dat2_lst),
    con_mat1=np.array(con_mat1_lst),
    con_mat2=np.array(con_mat2_lst),
    comm_gt=np.array(comm_gt_lst),
    diff_gt=np.array(diff_gt_lst),
    dep_mat_null=dep_mat_prior[:,0],
    dep_mat_prior=dep_mat_prior[:,6],  # 50% removed 
    dep_mat_prior_loose=dep_mat_prior,
    layer_count=layer_count,
)
print(f_out)

sim3_ggm_three_layer_v2_batch_2024_08_15_13_31_59
