In [1]:
import numpy as np
import pandas as pd
from SERGIO.sergio import sergio

# Simulate Clean Data _ Steady-State Simulation

In [None]:
sim = sergio(number_genes=100, number_bins = 9, number_sc = 300, noise_params = 1, decays=0.8, sampling_state=15, noise_type='dpd')
input_file_taregts=  'data_sets/De-noised_100G_9T_300cPerT_4_DS1/Interaction_cID_4.txt'
input_file_regs='data_sets/De-noised_100G_9T_300cPerT_4_DS1/Regs_cID_4.txt'
sim.build_graph(input_file_taregts = input_file_taregts, input_file_regs= input_file_regs, shared_coop_state=2)
sim.simulate()
expr = sim.getExpressions()
expr_clean = np.concatenate(expr, axis = 1)

Start simulating new level
There are 4 genes to simulate in this layer
Done with current level
Start simulating new level
There are 6 genes to simulate in this layer
Done with current level
Start simulating new level
There are 90 genes to simulate in this layer


# Add Technical Noise _ Steady-State Simulations

In [None]:
"""
Add outlier genes
"""
expr_O = sim.outlier_effect(expr, outlier_prob = 0.01, mean = 0.8, scale = 1)

"""
Add Library Size Effect
"""
libFactor, expr_O_L = sim.lib_size_effect(expr_O, mean = 4.6, scale = 0.4)

"""
Add Dropouts
"""
binary_ind = sim.dropout_indicator(expr_O_L, shape = 6.5, percentile = 82)
expr_O_L_D = np.multiply(binary_ind, expr_O_L)

"""
Convert to UMI count
"""
count_matrix = sim.convert_to_UMIcounts(expr_O_L_D)

"""
Make a 2d gene expression matrix
"""
count_matrix = np.concatenate(count_matrix, axis = 1)

# Simulate Clean Data _ differentiation Simulation

In [None]:
df = pd.read_csv('data_sets/De-noised_100G_6T_300cPerT_dynamics_7_DS6/bMat_cID7.tab', sep='\t', header=None, index_col=None)
bMat = df.values

sim = sergio(number_genes=100, number_bins = 6, number_sc = 300, noise_params = 0.2, decays=0.8, sampling_state = 1, noise_params_splice = 0.07, noise_type='dpd', dynamics=True, bifurcation_matrix= bMat)
sim.build_graph(input_file_taregts ='data_sets/De-noised_100G_6T_300cPerT_dynamics_7_DS6/Interaction_cID_7.txt', input_file_regs='data_sets/De-noised_100G_6T_300cPerT_dynamics_7_DS6/Regs_cID_7.txt', shared_coop_state=2)
sim.simulate_dynamics()
exprU, exprS = sim.getExpressions_dynamics()
exprU_clean = np.concatenate(exprU, axis = 1)
exprS_clean = np.concatenate(exprS, axis = 1)

# Add Technical Noise _ differentiation Simulations

In [None]:
"""
Add outlier genes
"""
exprU_O, exprS_O = sim.outlier_effect_dynamics(exprU, exprS, outlier_prob = 0.01, mean = 0.8, scale = 1)

"""
Add Library Size Effect
"""
libFactor, exprU_O_L, exprS_O_L = sim.lib_size_effect_dynamics(exprU_O, exprS_O, mean = 4.6, scale = 0.4)

"""
Add Dropouts
"""
binary_indU, binary_indS = sim.dropout_indicator_dynamics(exprU_O_L, exprS_O_L, shape = 6.5, percentile = 82)
exprU_O_L_D = np.multiply(binary_indU, exprU_O_L)
exprS_O_L_D = np.multiply(binary_indS, exprS_O_L)

"""
Convert to UMI count
"""
count_matrix_U, count_matrix_S = sim.convert_to_UMIcounts_dynamics(exprU_O_L_D, exprS_O_L_D)

"""
Make 2d spliced and unspliced expression matrices
"""
count_matrix_U = np.concatenate(count_matrix_U, axis = 1)
count_matrix_S = np.concatenate(count_matrix_S, axis = 1)

### Create a network from here

In [None]:
import scanpy as sc
import pandas as pd
import csv
import networkx as nx
from SERGIO.utils import convert_interaction_net_to_networkx

In [107]:

adata = sc.AnnData(expr_clean.T)
adata

  adata = sc.AnnData(expr_clean.T)


AnnData object with n_obs × n_vars = 2700 × 100

In [109]:

G = convert_interaction_net_to_networkx(input_file_taregts)

NameError: name 'np' is not defined

In [105]:
adata.X[]

array([[12.460231  , 13.338497  ,  0.3373751 , ...,  7.00116   ,
        18.650307  ,  6.639076  ],
       [ 4.8804607 ,  3.7748215 ,  1.0696927 , ...,  5.180775  ,
         8.515057  ,  5.4331756 ],
       [ 8.568037  , 12.35842   ,  3.8735723 , ...,  4.542981  ,
         9.333601  ,  7.4984584 ],
       ...,
       [ 3.7847576 , 10.021104  ,  0.28865016, ...,  4.229526  ,
         8.635693  ,  0.5633098 ],
       [ 4.6068873 ,  5.993948  ,  1.4192683 , ...,  5.8673964 ,
        14.021153  ,  4.031814  ],
       [ 5.6650996 ,  3.6499734 ,  3.9508939 , ...,  4.011389  ,
         7.4217916 ,  5.4101853 ]], dtype=float32)

In [88]:
data = []
with open(input_file_taregts) as f:
    reader = csv.reader(f, delimiter="\t")
    for line in reader:
        data+=[line[0].split(',')[0]]
np.unique(np.rint(np.array(data,dtype = float)))

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
       13., 14., 15., 16., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
       27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39.,
       40., 41., 42., 43., 45., 46., 47., 48., 49., 50., 51., 52., 53.,
       54., 55., 57., 58., 59., 60., 61., 63., 64., 65., 66., 68., 69.,
       70., 71., 72., 73., 74., 75., 76., 77., 78., 79., 80., 81., 82.,
       83., 85., 86., 87., 88., 89., 90., 91., 92., 94., 95., 96., 97.,
       98., 99.])