In [71]:
import sdevelo as sv
import numpy as np
import scvelo as scv
import scanpy as sc

In [None]:
SEED = 0
n_genes = 100

In [73]:
simulation = sv.SimData(n_vars=n_genes, 
                        K=8,
                        seed=SEED)
adata = simulation.generate()
adata

AnnData object with n_obs Ã— n_vars = 1600 Ã— 500
    obs: 'true_t'
    var: 'true_t_', 'true_alpha', 'true_beta', 'true_gamma', 'true_sigma_1', 'true_sigma_2', 'true_u0', 'true_s0'
    layers: 'unspliced', 'spliced'

In [74]:
unspliced = adata.layers["unspliced"]
spliced = adata.layers["spliced"]
beta = adata.var['true_beta'].to_numpy().astype(np.float64)
gamma = adata.var['true_gamma'].to_numpy().astype(np.float64)

In [75]:
true_velocity = beta * unspliced - gamma * spliced
adata.layers["true_velocity"] = true_velocity

In [76]:
scv.pp.remove_duplicate_cells(adata)
adata.layers['raw_spliced'] = np.rint(adata.layers['spliced']).astype(int)
adata.layers['raw_unspliced'] = np.rint(adata.layers['unspliced']).astype(int)
scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000)
if adata.n_vars < 2000:
    sc.pp.highly_variable_genes(adata, n_top_genes=adata.n_vars, subset=True)
scv.pp.moments(adata, n_neighbors=30, n_pcs=30)
sc.pp.neighbors(adata, n_neighbors=30, n_pcs=30)
sc.tl.umap(adata, random_state=SEED)
adata.obs['u_lib_size_raw'] = adata.layers['raw_unspliced'].sum(-1) 
adata.obs['s_lib_size_raw'] = adata.layers['raw_spliced'].sum(-1)

Skip filtering by dispersion since number of variables are less than `n_top_genes`.
Logarithmized X.
computing neighbors
    finished (0:00:00) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities


  log1p(adata)
  scv.pp.moments(adata, n_neighbors=30, n_pcs=30)
  neighbors(


    finished (0:00:00) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)


In [77]:
adata.write_h5ad(f"sde{n_genes}/sim_sde_{SEED}.h5ad")