# CellRank 

[Lange et al., 2022](https://www.nature.com/articles/s41592-021-01346-6)

In [None]:
import scvelo as scv
import cellrank as cr
import scanpy as sc

import scipy

import numpy as np
import pandas as pd

import os

In [None]:
import warnings
warnings.simplefilter("ignore")

In [None]:
sc.set_figure_params(figsize=(5, 5), dpi_save=1200, fontsize=10, frameon=False, color_map='magma')
scv.set_figure_params(figsize=(5, 5), dpi_save=1200, fontsize=10, frameon=False, color_map='magma')

In [None]:
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')

# Import data 

In [None]:
adata = sc.read_h5ad('data/scRNAseq/object/pp.h5ad')
# adata = adata[adata.obs.celltype_low.isin(['MegP', 'MEP', 'Proerythroblast', 'Erythroblast'])]

In [None]:
from cellrank.kernels import VelocityKernel
vk_ery_wt_baseline_d6 = VelocityKernel.read('data/scRNAseq/object/cellrank/kernel/vk_ery_wt_baseline_d6.pickle')
vk_ery_wt_cpg_d6 = VelocityKernel.read('data/scRNAseq/object/cellrank/kernel/vk_ery_wt_cpg_d6.pickle')
vk_ery_ifnar_fl = VelocityKernel.read('data/scRNAseq/object/cellrank/kernel/vk_ery_ifnar_fl.pickle')
vk_ery_ifnar_fl_lysm_cre = VelocityKernel.read('data/scRNAseq/object/cellrank/kernel/vk_ery_ifnar_fl_lysm_cre.pickle')

from cellrank.kernels import PseudotimeKernel
pk_ery_wt_baseline_d6 = PseudotimeKernel.read('data/scRNAseq/object/cellrank/kernel/pk_ery_wt_baseline_d6.pickle')
pk_ery_wt_cpg_d6 = PseudotimeKernel.read('data/scRNAseq/object/cellrank/kernel/pk_ery_wt_cpg_d6.pickle')
pk_ery_ifnar_fl = PseudotimeKernel.read('data/scRNAseq/object/cellrank/kernel/pk_ery_ifnar_fl.pickle')
pk_ery_ifnar_fl_lysm_cre = PseudotimeKernel.read('data/scRNAseq/object/cellrank/kernel/pk_ery_ifnar_fl_lysm_cre.pickle')

from cellrank.kernels import ConnectivityKernel
ck_ery_wt_baseline_d6 = ConnectivityKernel.read('data/scRNAseq/object/cellrank/kernel/ck_ery_wt_baseline_d6.pickle')
ck_ery_wt_cpg_d6 = ConnectivityKernel.read('data/scRNAseq/object/cellrank/kernel/ck_ery_wt_cpg_d6.pickle')
ck_ery_ifnar_fl = ConnectivityKernel.read('data/scRNAseq/object/cellrank/kernel/ck_ery_ifnar_fl.pickle')
ck_ery_ifnar_fl_lysm_cre = ConnectivityKernel.read('data/scRNAseq/object/cellrank/kernel/ck_ery_ifnar_fl_lysm_cre.pickle')

# Workflow functions 

In [None]:
##########################
### Recompute velocity ###
##########################
def re_compute(adata): 
        
        # Get velocity and top likelihood fit genes 
        v_df = adata.var
        v_df = v_df[v_df.velocity_genes]
        v_df = v_df[v_df.fit_likelihood>=0.10]

        v_genes = v_df.index
        
        # Re-compute neighbors on latent space 
        sc.pp.neighbors(adata, n_neighbors=30, n_pcs=None, use_rep='latent')
            
        # Velocity graph (re-compute)
        scv.tl.velocity_graph(adata, gene_subset=v_genes)

        # Velocity pseudotime 
        scv.tl.velocity_pseudotime(adata)
            
        # Terminal and root cells 
        scv.tl.recover_latent_time(adata)
        
        return adata

In [None]:
##############
### Kernel ###
##############
def kernel_workflow(k_1, k_2, kr_1=1.0, kr_2=1.0): 

    # Set count matrix
    k_1.adata = k_1.adata.raw.to_adata()
    k_2.adata = k_2.adata.raw.to_adata()

    # Set raw 
    k_1.adata.raw = k_1.adata
    k_2.adata.raw = k_2.adata
    
    # Transission matrix
    k_1 = k_1.compute_transition_matrix()
    k_2 = k_2.compute_transition_matrix()
    
    k = kr_1*k_1 + kr_2*k_2
    
    return(k)

In [None]:
#############
### GPCCA ###
#############
def gpcca_workflow(k, n_components): 
    
    # Coarse-graining transition matrix into macro-states with GPCCA
    g = cr.estimators.GPCCA(k)
    g.compute_schur(n_components=n_components, method='brandts')
    
    return(g)

## CellRank Erythroid (WT Baseline) 

In [None]:
compute_ery_wt_baseline_d6 = False

In [None]:
if compute_ery_wt_baseline_d6: 
    
    # Re-compute velocity kernel 
    vk_ery_wt_baseline_d6 = VelocityKernel(re_compute(vk_ery_wt_baseline_d6.adata))
    
    # Combine Kernel
    k_ery_wt_baseline_d6 = kernel_workflow(vk_ery_wt_baseline_d6, ck_ery_wt_baseline_d6, 0.5, 0.5)
    k_ery_wt_baseline_d6.write('data/scRNAseq/object/cellrank/k_ery_wt_baseline_d6.pickle', write_adata=True)
    
    # Compute GPCCA
    g_ery_wt_baseline_d6 = gpcca_workflow(k_ery_wt_baseline_d6, n_components=20)
    g_ery_wt_baseline_d6.fit(n_states=15, n_cells=20, cluster_key="celltype_low")
    
    # Set Terminal states 
    g_ery_wt_baseline_d6.set_terminal_states(['MegP', 'Erythroblast_2'])
    
    # Compute fate probabilities 
    g_ery_wt_baseline_d6.compute_fate_probabilities(solver='gmres')
    
    # Save GPCCA
    g_ery_wt_baseline_d6.write('data/scRNAseq/object/cellrank/g_ery_wt_baseline_d6.pickle', write_adata=True)
    
else: 
    
    k_ery_wt_baseline_d6 = cr.kernels.PrecomputedKernel.read('data/scRNAseq/object/cellrank/k_ery_wt_baseline_d6.pickle')
    g_ery_wt_baseline_d6 = cr.estimators.GPCCA.read('data/scRNAseq/object/cellrank/g_ery_wt_baseline_d6.pickle')

In [None]:
ax = sc.pl.umap(adata, alpha=1, edgecolor='none', size=0, show=False)
sc.pl.umap(adata[adata.obs.celltype_low.isin(['MegP', 'MEP', 'Proerythroblast', 'Erythroblast'])], na_color='#7f7f7f', alpha=1, size=20, edgecolor='none', legend_loc="none", show=False, ax=ax)
sc.pl.umap(k_ery_wt_baseline_d6.adata, color='celltype_low', alpha=1, size=20, edgecolor='none', legend_loc="none", show=False, ax=ax)
k_ery_wt_baseline_d6.plot_projection(color='celltype_low', recompute=True, save="result/cellrank/umap_stream_ery_wt_nacl_d6.svg", size=0, legend_loc="none", density=1.5, max_length=10, integration_direction='both', ax=ax)

In [None]:
g_ery_wt_baseline_d6.plot_macrostates(which="all", discrete=True, legend_loc="on data", figsize=(3, 6), size=100)

In [None]:
g_ery_wt_baseline_d6.plot_macrostates(which="terminal", legend_loc="on data", discrete=False, figsize=(3, 6), size=100)

In [None]:
g_ery_wt_baseline_d6.plot_fate_probabilities(same_plot=True, ncols=5, legend_loc='none', cmap='magma', figsize=(3, 6), size=100)

In [None]:
sc.pp.normalize_total(g_ery_wt_baseline_d6.adata)
sc.pp.log1p(g_ery_wt_baseline_d6.adata)

In [None]:
drivers_2 = g_ery_wt_baseline_d6.compute_lineage_drivers(lineages='MegP', cluster_key='celltype_low', return_drivers=True)
g_ery_wt_baseline_d6.plot_lineage_drivers('MegP', n_genes=5, ncols=5, figsize=(15, 6), size=100)

In [None]:
drivers_3 = g_ery_wt_baseline_d6.compute_lineage_drivers(lineages='Erythroblast_2', cluster_key='celltype_low', return_drivers=True)
g_ery_wt_baseline_d6.plot_lineage_drivers('Erythroblast_2', n_genes=5, ncols=5, figsize=(15, 6), size=100)

## CellRank Erythroid (WT CpG D6) 

In [None]:
compute_ery_wt_cpg_d6 = False

In [None]:
if compute_ery_wt_cpg_d6: 
    
    # Re-compute velocity kernel 
    vk_ery_wt_cpg_d6 = VelocityKernel(re_compute(vk_ery_wt_cpg_d6.adata))
    
    # Combine Kernel
    k_ery_wt_cpg_d6 = kernel_workflow(vk_ery_wt_cpg_d6, ck_ery_wt_cpg_d6, 0.5, 0.5)
    k_ery_wt_cpg_d6.write('data/scRNAseq/object/cellrank/k_ery_wt_cpg_d6.pickle', write_adata=True)
    
    # Compute GPCCA
    g_ery_wt_cpg_d6 = gpcca_workflow(k_ery_wt_cpg_d6, n_components=20)
    g_ery_wt_cpg_d6.fit(n_states=15, n_cells=20, cluster_key="celltype_low")
    
    # Set Terminal states
    g_ery_wt_cpg_d6.set_terminal_states(['MegP', 'Erythroblast_5'])
    
    # Compute fate probabilities 
    g_ery_wt_cpg_d6.compute_fate_probabilities(solver='gmres')
    
    # Save GPCCA
    g_ery_wt_cpg_d6.write('data/scRNAseq/object/cellrank/g_ery_wt_cpg_d6.pickle', write_adata=True)
    
else: 
    
    k_ery_wt_cpg_d6 = cr.kernels.PrecomputedKernel.read('data/scRNAseq/object/cellrank/k_ery_wt_cpg_d6.pickle')
    g_ery_wt_cpg_d6 = cr.estimators.GPCCA.read('data/scRNAseq/object/cellrank/g_ery_wt_cpg_d6.pickle')

In [None]:
ax = sc.pl.umap(adata, alpha=1, edgecolor='none', size=0, show=False)
sc.pl.umap(adata[adata.obs.celltype_low.isin(['MegP', 'MEP', 'Proerythroblast', 'Erythroblast'])], na_color='#7f7f7f', alpha=1, size=20, edgecolor='none', legend_loc="none", show=False, ax=ax)
sc.pl.umap(k_ery_wt_cpg_d6.adata, color='celltype_low', alpha=1, size=20, edgecolor='none', legend_loc="none", show=False, ax=ax)
k_ery_wt_cpg_d6.plot_projection(color='celltype_low', recompute=True, save="result/cellrank/umap_stream_ery_wt_cpg_d6.svg", size=0, legend_loc="none", density=1.5, max_length=10, integration_direction='both', ax=ax)

In [None]:
g_ery_wt_cpg_d6.plot_macrostates(which="all", discrete=True, legend_loc="on data", figsize=(3, 6), size=100)

In [None]:
g_ery_wt_cpg_d6.plot_macrostates(which="terminal", legend_loc="on data", discrete=False, figsize=(3, 6), size=100)

In [None]:
g_ery_wt_cpg_d6.plot_fate_probabilities(same_plot=True, ncols=5, legend_loc='none', cmap='magma', figsize=(3, 6), size=100)

In [None]:
sc.pp.normalize_total(g_ery_wt_cpg_d6.adata)
sc.pp.log1p(g_ery_wt_cpg_d6.adata)

In [None]:
drivers_2 = g_ery_wt_cpg_d6.compute_lineage_drivers(lineages='MegP', cluster_key='celltype_low', return_drivers=True)
g_ery_wt_cpg_d6.plot_lineage_drivers('MegP', n_genes=5, ncols=5, figsize=(15, 6), size=100)

In [None]:
drivers_3 = g_ery_wt_cpg_d6.compute_lineage_drivers(lineages='Erythroblast_5', cluster_key='celltype_low', return_drivers=True)
g_ery_wt_cpg_d6.plot_lineage_drivers('Erythroblast_5', n_genes=5, ncols=5, figsize=(15, 6), size=100)

## CellRank Erythroid (IFNAR fl/fl) 

In [None]:
compute_ery_ifnar_fl = False

In [None]:
if compute_ery_ifnar_fl: 
    
    # Re-compute velocity kernel 
    vk_ery_ifnar_fl = VelocityKernel(re_compute(vk_ery_ifnar_fl.adata))
    
    # Combine Kernel
    k_ery_ifnar_fl = kernel_workflow(vk_ery_ifnar_fl, ck_ery_ifnar_fl, 0.5, 0.5)
    k_ery_ifnar_fl.write('data/scRNAseq/object/cellrank/k_ery_ifnar_fl.pickle', write_adata=True)
    
    # Compute GPCCA
    g_ery_ifnar_fl = gpcca_workflow(k_ery_ifnar_fl, n_components=20)
    g_ery_ifnar_fl.fit(n_states=15, n_cells=20, cluster_key="celltype_low")
    
    # Set Terminal states
    g_ery_ifnar_fl.set_terminal_states(['MegP', 'Erythroblast_1'])
    
    # Compute fate probabilities 
    g_ery_ifnar_fl.compute_fate_probabilities(solver='gmres')
    
    # Save GPCCA
    g_ery_ifnar_fl.write('data/scRNAseq/object/cellrank/g_ery_ifnar_fl.pickle', write_adata=True)
    
else: 
    
    k_ery_ifnar_fl = cr.kernels.PrecomputedKernel.read('data/scRNAseq/object/cellrank/k_ery_ifnar_fl.pickle')
    g_ery_ifnar_fl = cr.estimators.GPCCA.read('data/scRNAseq/object/cellrank/g_ery_ifnar_fl.pickle')

In [None]:
ax = sc.pl.umap(adata, alpha=1, edgecolor='none', size=0, show=False)
sc.pl.umap(adata[adata.obs.celltype_low.isin(['MegP', 'MEP', 'Proerythroblast', 'Erythroblast'])], na_color='#7f7f7f', alpha=1, size=20, edgecolor='none', legend_loc="none", show=False, ax=ax)
sc.pl.umap(k_ery_ifnar_fl.adata, color='celltype_low', alpha=1, size=20, edgecolor='none', legend_loc="none", show=False, ax=ax)
k_ery_ifnar_fl.plot_projection(color='celltype_low', recompute=True, save="result/cellrank/umap_stream_ery_ifnar_fl.svg", size=0, legend_loc="none", density=1.5, max_length=10, integration_direction='both', ax=ax)

In [None]:
g_ery_ifnar_fl.plot_macrostates(which="all", discrete=True, legend_loc="on data", figsize=(3, 6), size=100)

In [None]:
g_ery_ifnar_fl.plot_macrostates(which="terminal", legend_loc="on data", discrete=False, figsize=(3, 6), size=100)

In [None]:
g_ery_ifnar_fl.plot_fate_probabilities(same_plot=True, ncols=5, legend_loc='none', cmap='magma', figsize=(3, 6), size=100)

In [None]:
sc.pp.normalize_total(g_ery_ifnar_fl.adata)
sc.pp.log1p(g_ery_ifnar_fl.adata)

In [None]:
drivers_2 = g_ery_ifnar_fl.compute_lineage_drivers(lineages='MegP', cluster_key='celltype_low', return_drivers=True)
g_ery_ifnar_fl.plot_lineage_drivers('MegP', n_genes=5, ncols=5, figsize=(15, 6), size=100)

In [None]:
drivers_3 = g_ery_ifnar_fl.compute_lineage_drivers(lineages='Erythroblast_1', cluster_key='celltype_low', return_drivers=True)
g_ery_ifnar_fl.plot_lineage_drivers('Erythroblast_1', n_genes=5, ncols=5, figsize=(15, 6), size=100)

## CellRank Erythroid (IFNAR fl/fl LyzM Cre) 

In [None]:
compute_ery_ifnar_fl_lysm_cre = False

In [None]:
if compute_ery_ifnar_fl_lysm_cre: 
    
    # Re-compute velocity kernel 
    vk_ery_ifnar_fl_lysm_cre = VelocityKernel(re_compute(vk_ery_ifnar_fl_lysm_cre.adata))
    
    # Combine Kernel
    k_ery_ifnar_fl_lysm_cre = kernel_workflow(vk_ery_ifnar_fl_lysm_cre, ck_ery_ifnar_fl_lysm_cre, 0.5, 0.5)
    k_ery_ifnar_fl_lysm_cre.write('data/scRNAseq/object/cellrank/k_ery_ifnar_fl_lysm_cre.pickle', write_adata=True)
    
    # Compute GPCCA
    g_ery_ifnar_fl_lysm_cre = gpcca_workflow(k_ery_ifnar_fl_lysm_cre, n_components=20)
    g_ery_ifnar_fl_lysm_cre.fit(n_states=15, n_cells=20, cluster_key="celltype_low")
    
    # Set Terminal states
    g_ery_ifnar_fl_lysm_cre.set_terminal_states(['MegP', 'Erythroblast_1'])
    
    # Compute fate probabilities 
    g_ery_ifnar_fl_lysm_cre.compute_fate_probabilities(solver='gmres')
    
    # Save GPCCA
    g_ery_ifnar_fl_lysm_cre.write('data/scRNAseq/object/cellrank/g_ery_ifnar_fl_lysm_cre.pickle', write_adata=True)
    
else: 
    
    k_ery_ifnar_fl_lysm_cre = cr.kernels.PrecomputedKernel.read('data/scRNAseq/object/cellrank/k_ery_ifnar_fl_lysm_cre.pickle')
    g_ery_ifnar_fl_lysm_cre = cr.estimators.GPCCA.read('data/scRNAseq/object/cellrank/g_ery_ifnar_fl_lysm_cre.pickle')

In [None]:
ax = sc.pl.umap(adata, alpha=1, edgecolor='none', size=0, show=False)
sc.pl.umap(adata[adata.obs.celltype_low.isin(['MegP', 'MEP', 'Proerythroblast', 'Erythroblast'])], na_color='#7f7f7f', alpha=1, size=20, edgecolor='none', legend_loc="none", show=False, ax=ax)
sc.pl.umap(k_ery_ifnar_fl_lysm_cre.adata, color='celltype_low', alpha=1, size=20, edgecolor='none', legend_loc="none", show=False, ax=ax)
k_ery_ifnar_fl_lysm_cre.plot_projection(color='celltype_low', recompute=True, save="result/cellrank/umap_stream_ery_ifnar_fl_lym_cre.svg", size=0, legend_loc="none", density=1.5, max_length=10, integration_direction='both', ax=ax)

In [None]:
g_ery_ifnar_fl_lysm_cre.plot_macrostates(which="all", discrete=True, legend_loc="on data", figsize=(3, 6), size=100)

In [None]:
g_ery_ifnar_fl_lysm_cre.plot_macrostates(which="terminal", legend_loc="on data", discrete=False, figsize=(3, 6), size=100)

In [None]:
g_ery_ifnar_fl_lysm_cre.plot_fate_probabilities(same_plot=True, ncols=5, legend_loc='none', cmap='magma', figsize=(3, 6), size=100)

In [None]:
sc.pp.normalize_total(g_ery_ifnar_fl_lysm_cre.adata)
sc.pp.log1p(g_ery_ifnar_fl_lysm_cre.adata)

In [None]:
drivers_2 = g_ery_ifnar_fl_lysm_cre.compute_lineage_drivers(lineages='MegP', cluster_key='celltype_low', return_drivers=True)
g_ery_ifnar_fl_lysm_cre.plot_lineage_drivers('MegP', n_genes=5, ncols=5, figsize=(15, 6), size=100)

In [None]:
drivers_3 = g_ery_ifnar_fl_lysm_cre.compute_lineage_drivers(lineages='Erythroblast_1', cluster_key='celltype_low', return_drivers=True)
g_ery_ifnar_fl_lysm_cre.plot_lineage_drivers('Erythroblast_1', n_genes=5, ncols=5, figsize=(15, 6), size=100)