# Dynamo-scvelo benchmark

Here you will learn the basics of RNA velocity analysis.

For illustration, it is applied to endocrine development in the pancreas, with lineage commitment to four major fates: α, β, δ and ε-cells. <br/> 
See [here](https://scvelo.readthedocs.io/scvelo.datasets.pancreas.html) for more details. It can be applied to your own data along the same lines. 

The notebook is also available at
[Google Colab](https://colab.research.google.com/github/theislab/scvelo_notebooks/blob/master/VelocityBasics.ipynb)
and [nbviewer](https://nbviewer.jupyter.org/github/theislab/scvelo_notebooks/blob/master/VelocityBasics.ipynb).

In [None]:
# update to the latest version, if not done yet.
!pip install scvelo --upgrade --quiet

In [None]:
import scvelo as scv
scv.logging.print_version()

In [None]:
import dynamo
import dynamo as dyn
from dynamo.preprocessing import Preprocessor
import pearson_residual_normalization_recipe
from benchmark_utils import *
dyn_adata = dyn.sample_data.zebrafish()
print("original data shape:", dyn_adata.shape)
dyn_adata


In [None]:
celltype_key = "Cell_type"

In [None]:

preprocessor = Preprocessor()
preprocessor.config_monocle_recipe(dyn_adata) # use monocle as default base config
preprocessor.config_seurat_recipe()
# preprocessor.preprocess_adata_seurat(dyn_adata)

dyn.preprocessing.utils.unique_var_obs_adata(dyn_adata)
preprocessor.filter_genes_by_outliers(dyn_adata, shared_count=20)
preprocessor.normalize_by_cells(dyn_adata, skip_log=True)
preprocessor.select_genes(dyn_adata, recipe="seurat", n_top_genes=2000)
preprocessor.log1p(dyn_adata, layers=["X"])
preprocessor.pca(dyn_adata, pca_key="X_pca")

In [None]:
# dyn_adata.layers['spliced']
dyn_adata.layers["spliced"]

In [None]:
dyn_adata

In [None]:
scv.settings.verbosity = 3  # show errors(0), warnings(1), info(2), hints(3)
scv.settings.presenter_view = True  # set max width size for presenter view
scv.set_figure_params('scvelo')  # for beautified visualization


### Load the Data

## Preprocess by scv.pp

In [None]:
adata = dynamo.sample_data.zebrafish()

### Preprocess the Data by scelo

In [None]:
scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000, flavor="seurat")
scv.pp.moments(adata, n_pcs=30, n_neighbors=30)


## Check if gene subset used for pca are the same

In [None]:
dyn_pca_genes = set(dyn_adata.var_names[dyn_adata.var["use_for_pca"]])
print(len(set(adata.var_names).intersection(dyn_pca_genes)))
assert len(set(adata.var_names).intersection(dyn_pca_genes)) == len(adata.var_names)
assert len(set(adata.var_names).intersection(dyn_pca_genes)) == len(dyn_pca_genes)
print(len(set(adata.var_names).difference(dyn_pca_genes)))
print(len(set(dyn_pca_genes).difference(set(adata.var_names))))
assert len(set(dyn_pca_genes).difference(set(adata.var_names))) == 0
print(len(dyn_pca_genes))
print(adata.n_vars)

In [None]:
dyn_adata

In [None]:
adata.obs["initial_size"].describe()

In [None]:
print(adata.obsm["X_pca"].shape)
print(dyn_adata.obsm["X_pca"].shape)

## Comparison of X_pca, spliced and unspliced data  
Note: scvelo does not log1p transform unspliced and spliced layers/

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
fig, axes = plt.subplots(4, figsize=(10, 30))

key = "X_pca"
axes[0].scatter(adata.obsm[key].flatten(), dyn_adata.obsm[key].flatten())#, title="X_pca comparison")
axes[0].set_title(key)
# plot_scatter_sparse(adata.layers["spliced"], dyn_adata.layers["spliced"], ax=axes[1])
# plot_scatter_sparse(adata.layers["unspliced"], dyn_adata.layers["unspliced"], ax=axes[2])
# use_for_pca = dyn_adata.var[DKM.VAR_USE_FOR_PCA]
use_for_pca = list(set(adata.var_names).intersection(dyn_pca_genes))
plot_scatter_sparse(adata[:, use_for_pca].X, dyn_adata[:, use_for_pca].X, ax=axes[1])
axes[1].set_title("adata.X comparison")
axes[1].set_xlabel("scv")
axes[1].set_ylabel("dyn")

plot_scatter_sparse(adata[:, use_for_pca].layers["spliced"], dyn_adata[:, use_for_pca].layers["X_spliced"], ax=axes[2])
axes[2].set_title("adata spliced comparison")
axes[2].set_xlabel("scv")
axes[2].set_ylabel("dyn")
plot_scatter_sparse(adata[:, use_for_pca].layers["unspliced"], dyn_adata[:, use_for_pca].layers["X_unspliced"], ax=axes[3])
axes[3].set_title("adata unspliced comparison")
axes[3].set_xlabel("scv")
axes[3].set_ylabel("dyn")

print("scvelo seurat X_pca stats:")
print(pd.Series(adata.obsm["X_pca"].flatten()).describe())
print("dynamo seurat X_pca stats:")
pd.Series(dyn_adata.obsm["X_pca"].flatten()).describe()

In [None]:
sns.scatterplot(adata.obs["initial_size"].to_numpy(), dyn_adata.obs["Size_Factor"].to_numpy())

In [None]:
sns.scatterplot(adata.obs["initial_size_unspliced"].to_numpy(), dyn_adata.obs["unspliced_Size_Factor"].to_numpy())

In [None]:
sns.scatterplot(adata.obs["initial_size_spliced"].to_numpy(), dyn_adata.obs["spliced_Size_Factor"].to_numpy())

In [None]:
dyn.tl.reduceDimension(dyn_adata)
dyn.pl.umap(dyn_adata, color=celltype_key)

In [None]:
scv.tl.umap(adata)
scv.pl.umap(adata, color=celltype_key)