# RNA velocity

[Bergen et al., 2021](https://www.embopress.org/doi/full/10.15252/msb.202110282)
Beyond the scope of computational modeling, the statistical power of the methods depends on the curvature in the phase portrait since a lack of curvature challenges current models to distinguish whether an up- or down-regulation is occurring. The overall curvature of deviation from the steady-state line in the phase portrait is mostly impacted by the ratios of splicing to degradation rates (Box 1), **indicating that statistical inference is limited to genes where splicing is faster or comparable to degradation, while a small ratio would yield straight lines rather than an interpretable curvature.**

In [None]:
import scvelo as scv
import numpy as np
import pandas as pd
import scipy
import scanpy as sc

import os

In [None]:
sc.settings.vector_friendly = False
scv.set_figure_params(figsize=(2, 5))

In [None]:
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')

# Import data 

In [None]:
adata = sc.read_h5ad('data/object/velocyto.h5ad')
obs = pd.read_csv('data/object/int/meta/meta.csv', index_col=0)
obsm = pd.read_csv('data/object/int/reductions/X_umap/reduction.csv', index_col=0)

# Filter velocity matrix and combine with obs and obsm from previous analysis. 

In [None]:
population_names = ['MPP (1)', 'Ery (1)', 'Ery (2)', 'Ery (3)', 'Ery (4)', 'Ery (5)', 'Ery (6)']
celltype_colours = [
    '#FA9FB5', 
    '#FC9272', 
    '#FB6A4A', 
    '#EF3B2C', 
    '#CB181D', 
    '#A50F15', 
    '#67000D'
]

In [None]:
# Filter obs by Ery annotation and treatment 
obs = obs[obs['leiden_annotation'].isin(population_names)]

# Filter obsm by cell index
obsm = obsm[obsm.index.isin(obs.index)]

In [None]:
# Filter velocity adata by obs 
adata = adata[adata.obs.index.isin(obs.index)]

In [None]:
# Order index to match velocity adata 
obs = obs.reindex(adata.obs.index)
obsm = obsm.reindex(adata.obs.index)

adata.obs = obs
adata.obsm['X_umap'] = obsm.to_numpy()

In [None]:
adata.uns['leiden_annotation_colors'] = celltype_colours

adata.obs['leiden_annotation'] = pd.Series(adata.obs['leiden_annotation'], dtype="category")
adata.obs['leiden_annotation'].cat.reorder_categories(population_names, inplace=True)

In [None]:
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'leiden_annotation', 'sample_rep', 'cc_phase_class', 'pMt_RNA', 'pHb_RNA', 'pRb_RNA'], wspace=1, ncols=5)

In [None]:
adata_temp = adata.copy()

# HVG

In [None]:
def hvg_select(subset):

    adata = adata_temp.copy()
    adata = adata[adata.obs['treatment']==subset]
    scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000)

    # Reset adata
    hvg = adata.var_names
    
    return(hvg)

In [None]:
hvg_nacl = hvg_select("NaCl")
hvg_cpg = hvg_select("CpG")

In [None]:
hvg = list(set(hvg_nacl) & set(hvg_cpg))

# Scvelo

In [None]:
# adata = adata_temp.copy()
# scv.pp.filter_and_normalize(adata, min_shared_counts=20, subset_highly_variable=False)
# adata = adata[:, hvg]

In [None]:
# scv.pp.moments(adata)

In [None]:
# scv.tl.recover_dynamics(adata)

In [None]:
# scv.tl.velocity(adata, mode='dynamical')
# scv.tl.velocity_graph(adata)

In [None]:
adata = sc.read_h5ad('data/object/scvelo.h5ad')

In [None]:
del adata.obs['root_cells']
del adata.obs['end_points']
del adata.obs['velocity_pseudotime']
del adata.obs['latent_time']
scv.tl.latent_time(adata, min_likelihood=0.01)

In [None]:
scv.set_figure_params(figsize=(2, 5))
scv.pl.velocity_embedding_stream(adata, basis='X_umap', color=['leiden_annotation'])

In [None]:
sc.pl.umap(adata, color=['root_cells', 'end_points', 'velocity_pseudotime', 'latent_time'])

In [None]:
# top_genes = adata.var['fit_likelihood'].sort_values(ascending=False).dropna().index
# scv.tl.differential_kinetic_test(adata, var_names=top_genes, groupby='leiden_annotation')

In [None]:
# adata.write_h5ad('data/object/scvelo.h5ad')

## Correct for DKG (Differential kinetics genes) 

In [None]:
# adata = sc.read_h5ad('data/object/scvelo.h5ad')

In [None]:
# scv.tl.velocity(adata, mode='dynamical', diff_kinetics=True)
# scv.tl.velocity_graph(adata)

In [None]:
adata = sc.read_h5ad('data/object/scvelo_dkg.h5ad')

In [None]:
del adata.obs['root_cells']
del adata.obs['end_points']
del adata.obs['velocity_pseudotime']
del adata.obs['latent_time']
scv.tl.latent_time(adata, min_likelihood=0.01)

In [None]:
scv.set_figure_params(figsize=(2, 5))
scv.pl.velocity_embedding_stream(adata, basis='X_umap', color=['leiden_annotation'])

In [None]:
sc.pl.umap(adata, color=['root_cells', 'end_points', 'velocity_pseudotime', 'latent_time'])

In [None]:
adata.write_h5ad('data/object/scvelo_dkg.h5ad')

## Model velocity by treatment groups

In [None]:
# adata = sc.read_h5ad('data/object/scvelo.h5ad')

In [None]:
# scv.tl.velocity(adata, mode='dynamical', groupby="treatment", groups=['NaCl', 'CpG'])
# scv.tl.velocity_graph(adata)

In [None]:
adata = sc.read_h5ad('data/object/scvelo_grp.h5ad')

In [None]:
del adata.obs['root_cells']
del adata.obs['end_points']
del adata.obs['velocity_pseudotime']
del adata.obs['latent_time']
scv.tl.latent_time(adata, min_likelihood=0.01)

In [None]:
scv.set_figure_params(figsize=(2, 5))
scv.pl.velocity_embedding_stream(adata, basis='X_umap', color=['leiden_annotation'])

In [None]:
sc.pl.umap(adata, color=['root_cells', 'end_points', 'velocity_pseudotime', 'latent_time'])

In [None]:
adata.write_h5ad('data/object/scvelo_grp.h5ad')

In [None]:
adata.obs[['latent_time']].to_csv('result/scvelo/latent_time_grp.csv')

## DKG and treatment groups

In [None]:
# adata = sc.read_h5ad('data/object/scvelo.h5ad')

In [None]:
# scv.tl.velocity(adata, mode='dynamical', diff_kinetics=True, groupby="treatment", groups=['NaCl', 'CpG'])
# scv.tl.velocity_graph(adata)

In [None]:
adata = sc.read_h5ad('data/object/scvelo_grp_dkg.h5ad')

In [None]:
del adata.obs['root_cells']
del adata.obs['end_points']
del adata.obs['velocity_pseudotime']
del adata.obs['latent_time']
scv.tl.latent_time(adata, min_likelihood=0.01)

In [None]:
scv.set_figure_params(figsize=(2, 5))
scv.pl.velocity_embedding_stream(adata, basis='X_umap', color=['leiden_annotation'])

In [None]:
sc.pl.umap(adata, color=['root_cells', 'end_points', 'velocity_pseudotime', 'latent_time'])

In [None]:
adata.write_h5ad('data/object/scvelo_grp_dkg.h5ad')