In [1]:
import numpy as np
import pandas as pd
from SERGIO.sergio import sergio

# Simulate Clean Data _ Steady-State Simulation

In [None]:
sim = sergio(number_genes=1277, number_bins = 9, number_sc = 300, noise_params = 1, decays=0.8, sampling_state=15, noise_type='dpd')
sim.build_graph(input_file_taregts ='Input_with_Autoregulation/Ecoli_1200_net4_input_extended_GRN_1277_genes.txt', input_file_regs ='Input_with_Autoregulation/Ecoli_1200_net4_input_MRs.txt', shared_coop_state=2)
sim.simulate()
expr = sim.getExpressions()
expr_clean = np.concatenate(expr, axis = 1)

Finding levels
Start simulating new level
There are 2 genes to simulate in this layer
Done with current level
Start simulating new level
There are 3 genes to simulate in this layer
Done with current level
Start simulating new level
There are 5 genes to simulate in this layer
Done with current level
Start simulating new level
There are 9 genes to simulate in this layer
Done with current level
Start simulating new level
There are 27 genes to simulate in this layer
Done with current level
Start simulating new level
There are 83 genes to simulate in this layer
Done with current level
Start simulating new level
There are 1148 genes to simulate in this layer


In [7]:
np.save("autoregulatory_output/clean_gene_expression.npy", expr_clean)

# Add Technical Noise _ Steady-State Simulations

In [8]:
for replicate in range(100):
    """
    Add outlier genes
    """
    expr_O = sim.outlier_effect(expr, outlier_prob = 0.01, mean = 0.8, scale = 1)

    """
    Add Library Size Effect
    """
    libFactor, expr_O_L = sim.lib_size_effect(expr_O, mean = 4.6, scale = 0.4)

    """
    Add Dropouts
    """
    binary_ind = sim.dropout_indicator(expr_O_L, shape = 6.5, percentile = 82)
    expr_O_L_D = np.multiply(binary_ind, expr_O_L)

    """
    Convert to UMI count
    """
    count_matrix = sim.convert_to_UMIcounts(expr_O_L_D)

    """
    Make a 2d gene expression matrix
    """
    count_matrix = np.concatenate(count_matrix, axis = 1)
    
    np.save("autoregulatory_output/noisy_gene_expression_replicate_{}.npy".format(replicate), count_matrix)

# Simulate Clean Data _ differentiation Simulation

In [4]:
df = pd.read_csv('data_sets/De-noised_100G_6T_300cPerT_dynamics_7_DS6/bMat_cID7.tab', sep='\t', header=None, index_col=None)
bMat = df.values

sim = sergio(number_genes=100, number_bins = 6, number_sc = 300, noise_params = 0.2, decays=0.8, sampling_state = 1, noise_params_splice = 0.07, noise_type='dpd', dynamics=True, bifurcation_matrix= bMat)
sim.build_graph(input_file_taregts ='data_sets/De-noised_100G_6T_300cPerT_dynamics_7_DS6/Interaction_cID_7.txt', input_file_regs='data_sets/De-noised_100G_6T_300cPerT_dynamics_7_DS6/Regs_cID_7.txt', shared_coop_state=2)
sim.simulate_dynamics()
exprU, exprS = sim.getExpressions_dynamics()
exprU_clean = np.concatenate(exprU, axis = 1)
exprS_clean = np.concatenate(exprS, axis = 1)

Start simulating new cell type
binID: 0
number of initial cells: 21
Done with current cell type
Start simulating new cell type
binID: 1
number of initial cells: 24
Done with current cell type
Start simulating new cell type
binID: 2
number of initial cells: 27
Done with current cell type
Start simulating new cell type
binID: 5
number of initial cells: 24
Done with current cell type
Start simulating new cell type
binID: 4
number of initial cells: 24
Done with current cell type
Start simulating new cell type
binID: 3
number of initial cells: 24
Done with current cell type


# Add Technical Noise _ differentiation Simulations

In [5]:
"""
Add outlier genes
"""
exprU_O, exprS_O = sim.outlier_effect_dynamics(exprU, exprS, outlier_prob = 0.01, mean = 0.8, scale = 1)

"""
Add Library Size Effect
"""
libFactor, exprU_O_L, exprS_O_L = sim.lib_size_effect_dynamics(exprU_O, exprS_O, mean = 4.6, scale = 0.4)

"""
Add Dropouts
"""
binary_indU, binary_indS = sim.dropout_indicator_dynamics(exprU_O_L, exprS_O_L, shape = 6.5, percentile = 82)
exprU_O_L_D = np.multiply(binary_indU, exprU_O_L)
exprS_O_L_D = np.multiply(binary_indS, exprS_O_L)

"""
Convert to UMI count
"""
count_matrix_U, count_matrix_S = sim.convert_to_UMIcounts_dynamics(exprU_O_L_D, exprS_O_L_D)

"""
Make 2d spliced and unspliced expression matrices
"""
count_matrix_U = np.concatenate(count_matrix_U, axis = 1)
count_matrix_S = np.concatenate(count_matrix_S, axis = 1)