# RNA velocity

[Bergen et al., 2021](https://www.embopress.org/doi/full/10.15252/msb.202110282)
Beyond the scope of computational modeling, the statistical power of the methods depends on the curvature in the phase portrait since a lack of curvature challenges current models to distinguish whether an up- or down-regulation is occurring. The overall curvature of deviation from the steady-state line in the phase portrait is mostly impacted by the ratios of splicing to degradation rates (Box 1), **indicating that statistical inference is limited to genes where splicing is faster or comparable to degradation, while a small ratio would yield straight lines rather than an interpretable curvature.**

In [None]:
import scvelo as scv
import scanpy as sc
import numpy as np
import pandas as pd

import os

In [None]:
# rpy2 
os.environ['R_HOME'] = '/home/fdeckert/bin/miniconda3/envs/p.3.8.12-FD20200109SPLENO/lib/R'

In [None]:
sc.settings.vector_friendly = False
scv.set_figure_params(figsize=(2, 5))

In [None]:
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')

In [None]:
# Plotting 
import rpy2.robjects as robjects
color_load = robjects.r.source('plotting_global.R')
color = dict()
for i in range(len(color_load[0])):
    color[color_load[0].names[i]] = {key : color_load[0][i].rx2(key)[0] for key in color_load[0][i].names}

# Parameter 

In [None]:
result_h5ad = 'data/object/scvelo_nacl.h5ad'
result_latent_time = 'result/scvelo/latent_time_nacl.csv'
treatment = 'NaCl'

# Import data 

In [None]:
adata = sc.read_h5ad('data/object/velocyto.h5ad')
obs = pd.read_csv('data/object/adata_sct_hvg2000/meta/meta.csv', index_col=0)
obsm = pd.read_csv('data/object/adata_sct_hvg2000/reductions/X_umap_paga/reduction.csv', index_col=0)

# Filter velocity matrix and combine with obs and obsm from previous analysis. 

In [None]:
leiden_annotation = ['MPP', 'T lymphocyte', 'Meg', 'MEP', 'Ery (1)', 'Ery (2)', 'Ery (3)', 'Ery (4)']

In [None]:
# Filter obs by Ery annotation and treatment 
obs = obs[obs['leiden_annotation'].isin(leiden_annotation)]
obs = obs[obs['treatment']==treatment]

# Filter obsm by cell index
obsm = obsm[obsm.index.isin(obs.index)]

In [None]:
# Filter velocity adata by obs 
adata = adata[adata.obs.index.isin(obs.index)]

In [None]:
# Order index to match velocity adata 
obs = obs.reindex(adata.obs.index)
obsm = obsm.reindex(adata.obs.index)

adata.obs = obs
adata.obsm['X_umap'] = obsm.to_numpy()

In [None]:
def set_color(categories): 
    
    categories = [x for x in categories if x in list(adata.obs.columns)]

    for category in categories: 
        
        adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
        
        keys = list(color[category].keys())
        keys = [x for x in keys if x in list(adata.obs[category])]

        adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
        adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
        
# Set colors
set_color(list(color.keys()))

In [None]:
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'leiden_annotation', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pMt_RNA', 'pHb_RNA', 'pRb_RNA'], wspace=1, ncols=5)

In [None]:
adata_temp = adata.copy()

# Pre-process

In [None]:
scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000)

# Scvelo

In [None]:
scv.pp.moments(adata)
scv.tl.recover_dynamics(adata)

In [None]:
scv.tl.velocity(adata, mode='dynamical')
scv.tl.velocity_graph(adata)

In [None]:
scv.tl.latent_time(adata, min_likelihood=0.1)

In [None]:
scv.set_figure_params(figsize=(2, 5))
scv.pl.velocity_embedding_stream(adata, basis='X_umap', color=['leiden_annotation'])

In [None]:
sc.pl.umap(adata, color=['root_cells', 'end_points', 'velocity_pseudotime', 'latent_time'])

In [None]:
top_genes = adata.var['fit_likelihood'].sort_values(ascending=False).dropna().index
scv.tl.differential_kinetic_test(adata, var_names=top_genes, groupby='leiden_annotation')

In [None]:
adata.obs[['root_cells', 'end_points', 'velocity_pseudotime', 'latent_time']].to_csv(result_latent_time)

## Correct for DKG (Differential kinetics genes) 

In [None]:
scv.tl.velocity(adata, mode='dynamical', diff_kinetics=True)
scv.tl.velocity_graph(adata)

In [None]:
del adata.obs['root_cells']
del adata.obs['end_points']
del adata.obs['velocity_pseudotime']
del adata.obs['latent_time']
scv.tl.latent_time(adata, min_likelihood=0.1)

In [None]:
scv.set_figure_params(figsize=(2, 5))
scv.pl.velocity_embedding_stream(adata, basis='X_umap', color=['leiden_annotation'])

In [None]:
sc.pl.umap(adata, color=['root_cells', 'end_points', 'velocity_pseudotime', 'latent_time'])