# CellRank 

[Lange et al., 2022](https://www.nature.com/articles/s41592-021-01346-6)

In [None]:
import scvelo as scv
import cellrank as cr
import scanpy as sc

import scanorama

import scipy

import numpy as np
import pandas as pd

import os

In [None]:
import warnings
warnings.simplefilter("ignore")

In [None]:
sc.set_figure_params(figsize=(2.5, 5), dpi_save=1200, fontsize=10, frameon=False, color_map='magma')
scv.set_figure_params(figsize=(2.5, 5), dpi_save=1200, fontsize=10, frameon=False, color_map='magma')

In [None]:
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')

# Import data 

In [None]:
from cellrank.kernels import VelocityKernel
vk_prog_baseline = VelocityKernel.read('data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/cellrank/kernel/vk_prog_baseline.pickle')
vk_prog_cpg = VelocityKernel.read('data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/cellrank/kernel/vk_prog_cpg.pickle')

from cellrank.kernels import PseudotimeKernel
pk_prog_baseline = PseudotimeKernel.read('data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/cellrank/kernel/pk_prog_baseline.pickle')
pk_prog_cpg = PseudotimeKernel.read('data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/cellrank/kernel/pk_prog_cpg.pickle')

from cellrank.kernels import ConnectivityKernel
ck_prog_baseline = ConnectivityKernel.read('data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/cellrank/kernel/ck_prog_baseline.pickle')
ck_prog_cpg = ConnectivityKernel.read('data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/cellrank/kernel/ck_prog_cpg.pickle')

# Workflow functions 

In [None]:
##########################
### Recompute velocity ###
##########################
def re_compute(adata): 
        
        # Get velocity and top likelihood fit genes 
        v_df = adata.var
        v_df = v_df[v_df.velocity_genes]
        v_df = v_df[v_df.fit_likelihood>=0.1]
        
        v_genes = v_df.index
        
        # Re-compute neighbors on latent space 
        sc.pp.neighbors(adata, n_neighbors=30, n_pcs=None, use_rep='latent')
            
        # Velocity graph (re-compute)
        scv.tl.velocity_graph(adata, gene_subset=v_genes)

        # Velocity pseudotime 
        scv.tl.velocity_pseudotime(adata)
            
        # Terminal and root cells 
        scv.tl.recover_latent_time(adata)
        
        return adata

In [None]:
##############
### Kernel ###
##############
def kernel_workflow(k_1, k_2, kr_1=1.0, kr_2=1.0): 
    
    # Transission matrix
    k_1 = k_1.compute_transition_matrix()
    k_2 = k_2.compute_transition_matrix()
    
    k = kr_1*k_1 + kr_2*k_2
    
    return(k)

In [None]:
#############
### GPCCA ###
#############
def gpcca_workflow(k, n_components): 
    
    # Coarse-graining transition matrix into macro-states with GPCCA
    g = cr.estimators.GPCCA(k)
    g.compute_schur(n_components=n_components)
    
    return(g)

## Steady state (Baseline)

In [None]:
compute_prog_baseline = True

In [None]:
if compute_prog_baseline: 
    
    # Re-compute velocity kernel 
    vk_prog_baseline = VelocityKernel(re_compute(vk_prog_baseline.adata))
    
    # Combine Kernel
    k_prog_baseline = kernel_workflow(vk_prog_baseline, ck_prog_baseline, 0.5, 0.5)
    k_prog_baseline.write('data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/cellrank/k_prog_baseline.pickle', write_adata=True)
    
    # Compute GPCCA
    g_prog_baseline = gpcca_workflow(k_prog_baseline, n_components=20)
    g_prog_baseline.fit(n_states=10, n_cells=20, cluster_key="leiden_cell_type_main")
    
    # Set Terminal states 
    g_prog_baseline.set_terminal_states(['Meg', 'EB III'])
    
    # Compute fate probabilities 
    g_prog_baseline.compute_fate_probabilities(solver='gmres')
    
    # Save GPCCA
    g_prog_baseline.write('data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/cellrank/g_prog_baseline.pickle', write_adata=True)
    
else: 
    
    k_prog_baseline = cr.kernels.PrecomputedKernel.read('data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/cellrank/k_prog_baseline.pickle')
    g_prog_baseline = cr.estimators.GPCCA.read('data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/cellrank/g_prog_baseline.pickle')

In [None]:
k_prog_baseline.plot_projection(color='leiden_cell_type_main', recompute=True)

In [None]:
g_prog_baseline.plot_macrostates(which="all", discrete=True, legend_loc="on data", figsize=(3, 6), size=100)

In [None]:
g_prog_baseline.plot_macrostates(which="terminal", legend_loc="on data", discrete=False, figsize=(3, 6), size=100)

In [None]:
g_prog_baseline.plot_fate_probabilities(same_plot=True, ncols=5, legend_loc='none', cmap='magma', figsize=(3, 6), size=100)

In [None]:
drivers_2 = g_prog_baseline.compute_lineage_drivers(lineages='Meg', cluster_key='leiden_cell_type_main', return_drivers=True)
g_prog_baseline.plot_lineage_drivers('Meg', n_genes=5, ncols=5, figsize=(15, 6), size=100)

In [None]:
drivers_3 = g_prog_baseline.compute_lineage_drivers(lineages='EB III', cluster_key='leiden_cell_type_main', return_drivers=True)
g_prog_baseline.plot_lineage_drivers('EB III', n_genes=5, ncols=5, figsize=(15, 6), size=100)

## Steady state (CpG)

In [None]:
compute_prog_cpg = True

In [None]:
if compute_prog_cpg: 
    
    # Re-compute velocity kernel 
    vk_prog_cpg = VelocityKernel(re_compute(vk_prog_cpg.adata))
    
    # Combine Kernel
    k_prog_cpg = kernel_workflow(vk_prog_cpg, ck_prog_cpg, 0.5, 0.5)
    k_prog_cpg.write('data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/cellrank/k_prog_cpg.pickle', write_adata=True)
    
    # Compute GPCCA
    g_prog_cpg = gpcca_workflow(k_prog_cpg, n_components=20)
    g_prog_cpg.fit(n_states=10, n_cells=20, cluster_key="leiden_cell_type_main")
    
    # Set Terminal states
    g_prog_cpg.set_terminal_states(['Meg', 'EB III'])
    
    # Compute fate probabilities 
    g_prog_cpg.compute_fate_probabilities(solver='gmres')
    
    # Save GPCCA
    g_prog_cpg.write('data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/cellrank/g_prog_cpg.pickle', write_adata=True)
    
else: 
    
    k_prog_cpg = cr.kernels.PrecomputedKernel.read('data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/cellrank/k_prog_cpg.pickle')
    g_prog_cpg = cr.estimators.GPCCA.read('data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/cellrank/g_prog_cpg.pickle')

In [None]:
k_prog_cpg.plot_projection(color='leiden_cell_type_main', recompute=True)

In [None]:
g_prog_cpg.plot_macrostates(which="all", discrete=True, legend_loc="on data", figsize=(3, 6), size=100)

In [None]:
g_prog_cpg.plot_macrostates(which="terminal", legend_loc="on data", discrete=False, figsize=(3, 6), size=100)

In [None]:
g_prog_cpg.plot_fate_probabilities(same_plot=True, ncols=5, legend_loc='none', cmap='magma', figsize=(3, 6), size=100)

In [None]:
drivers_2 = g_prog_cpg.compute_lineage_drivers(lineages='Meg', cluster_key='leiden_cell_type_main', return_drivers=True)
g_prog_cpg.plot_lineage_drivers('Meg', n_genes=5, ncols=5, figsize=(15, 6), size=100)

In [None]:
drivers_3 = g_prog_cpg.compute_lineage_drivers(lineages='EB III', cluster_key='leiden_cell_type_main', return_drivers=True)
g_prog_cpg.plot_lineage_drivers('EB III', n_genes=5, ncols=5, figsize=(15, 6), size=100)