In [5]:
import scvelo as scv
import numpy as np
import scanpy as sc
from scipy import sparse

In [6]:
SEED = 0
n_genes = 500

In [None]:
cells = [1000000]

: 

In [None]:
for n_cell in cells:
    adata = scv.datasets.simulation(n_obs=n_cell,
                                    n_vars=n_genes,
                                    alpha=5,
                                    beta=0.3,
                                    gamma=0.5, 
                                    random_seed=SEED)
    spliced = adata.layers["spliced"].astype(np.float64)
    unspliced = adata.layers["unspliced"].astype(np.float64)
    beta = adata.var['true_beta'].values.astype(np.float64)
    gamma = adata.var['true_gamma'].values.astype(np.float64)
    true_velocity = beta * unspliced - gamma * spliced
    adata.layers["true_velocity"] = true_velocity

    adata.layers['raw_spliced'] = np.rint(adata.layers['spliced']).astype(int)
    adata.layers['raw_unspliced'] = np.rint(adata.layers['unspliced']).astype(int)
    
    adata.X = sparse.csr_matrix(adata.X)
    adata.layers['spliced'] = sparse.csr_matrix(adata.layers['spliced'])
    adata.layers['unspliced'] = sparse.csr_matrix(adata.layers['unspliced'])
    adata.layers['raw_spliced'] = sparse.csr_matrix(adata.layers['raw_spliced'])
    adata.layers['raw_unspliced'] = sparse.csr_matrix(adata.layers['raw_unspliced'])
    
    scv.pp.remove_duplicate_cells(adata)
    scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000)
    if adata.n_vars < 2000:
        sc.pp.highly_variable_genes(adata, n_top_genes=adata.n_vars, subset=True)
    scv.pp.moments(adata, n_neighbors=30, n_pcs=30)
    sc.pp.neighbors(adata, n_neighbors=30, n_pcs=30)
    sc.tl.umap(adata, random_state=SEED)
    adata.obs['u_lib_size_raw'] = adata.layers['raw_unspliced'].sum(-1) 
    adata.obs['s_lib_size_raw'] = adata.layers['raw_spliced'].sum(-1)
    adata.write_h5ad(f"sim_cells_{n_cell}_genes_500.h5ad")