# Cytotrace

[Gulati et al., 2020](https://www.science.org/doi/10.1126/science.aax0249?url_ver=Z39.88-2003&rfr_id=ori:rid:crossref.org&rfr_dat=cr_pub%20%200pubmed)  
[Cellrank tutorial](https://cellrank.readthedocs.io/en/stable/beyond_rna_velocity.html)

In [None]:
import scvelo as scv
import scanpy as sc
import cellrank as cr
import numpy as np
import pandas as pd

import os

In [None]:
# rpy2 
os.environ['R_HOME'] = '/home/fdeckert/bin/miniconda3/envs/p.3.8.12-FD20200109SPLENO/lib/R'

In [None]:
sc.settings.vector_friendly = False

sc.set_figure_params(figsize=(2, 3), dpi_save=1200, fontsize=8, frameon=False)
sc.settings.figdir = 'result/figures/'

In [None]:
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')

In [None]:
# Plotting 
import rpy2.robjects as robjects
color_load = robjects.r.source('plotting_global.R')
color = dict()
for i in range(len(color_load[0])):
    color[color_load[0].names[i]] = {key : color_load[0][i].rx2(key)[0] for key in color_load[0][i].names}

# Import data 

In [None]:
adata = sc.read_h5ad('data/object/pp.h5ad')
genes = pd.read_csv('data/object/genes.csv')
obs = pd.read_csv('data/object/components/meta.csv', index_col=0)
umap = pd.read_csv('data/object/components/umap.csv', index_col=0)

In [None]:
# Add meta data obs
adata = adata[adata.obs.index.isin(obs.index.tolist())]
obs = obs.reindex(adata.obs_names)
adata.obs = obs

In [None]:
# Select genes 
adata = adata[:, adata.var_names.isin(genes.iloc[:,0].tolist())]

In [None]:
# Add umap embeddings 
umap = umap[umap.index.isin(obs.index.tolist())]
umap = umap.reindex(adata.obs_names)
adata.obsm['X_umap'] = umap.to_numpy()

# Set colors

In [None]:
def set_color(adata, categories): 
    
    categories = [x for x in categories if x in list(adata.obs.columns)]

    for category in categories: 
        
        adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
        
        keys = list(color[category].keys())
        keys = [x for x in keys if x in list(adata.obs[category])]

        adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
        adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
        
# Set colors
set_color(adata, list(color.keys()))

In [None]:
sc.pl.umap(adata, color=['cell_type_fine', 'cc_phase_class', 'pMt_RNA', 'pHb_RNA', 'pRb_RNA'], wspace=0.8, ncols=5)

# Store copy of adata

In [None]:
adata_tmp = adata.copy()

# Store copy of adata

In [30]:
def ct_workflow(adata, suffix='', compute=False):
    
    if compute: 
        
        # filter, normalize total counts and log-transform
        scv.pp.normalize_per_cell(adata)
        sc.pp.log1p(adata)

        # hvg annotation
#         sc.pp.highly_variable_genes(adata)
#         print(f"This detected {np.sum(adata.var['highly_variable'])} highly variable genes. ")

        # use scVelo's `moments` function for imputation - note that hack we're using here:
        # we're copying our `.X` matrix into the layers because that's where `scv.tl.moments`
        # expects to find counts for imputation
        adata.layers["spliced"] = adata.X
        adata.layers["unspliced"] = adata.X
        scv.pp.moments(adata, n_pcs=30, n_neighbors=30)
        
        from cellrank.tl.kernels import CytoTRACEKernel
        ctk = CytoTRACEKernel(adata)
        
        ct_pseudotime=adata.obs['ct_pseudotime']
        ct_pseudotime = ct_pseudotime.reindex(adata.obs_names)
        ct_pseudotime.to_csv('result/cytotrace/ct_pseudotime'+suffix+'.csv')
        
        return adata
        
    else: 
        
        ct_pseudotime = pd.read_csv('result/cytotrace/ct_pseudotime'+suffix+'.csv', index_col=0)
        ct_pseudotime = ct_pseudotime.reindex(adata.obs_names)

        adata.obs['ct_pseudotime'] = ct_pseudotime['ct_pseudotime']
        
        return adata
        
    return adata

# Palantir (NaCl) 

In [37]:
adata = adata_tmp[adata_tmp.obs['treatment']=='NaCl'].copy()
adata = adata[adata.obs['cell_type_main'].isin(['MLP', 'GMP', 'MDP', 'MegP', 'MEP', 'ProEB', 'EB'])].copy()

In [None]:
adata = ct_workflow(adata, suffix='_prog_nacl', compute=True)

Normalized count data: X.


In [None]:
sc.pl.umap(adata, color=['cell_type_fine', 'ct_pseudotime'], wspace=0.8, ncols=2)

# Palantir (CpG) 

In [None]:
adata = adata_tmp[adata_tmp.obs['treatment']=='CpG'].copy()
adata = adata[adata.obs['cell_type_main'].isin(['MLP', 'GMP', 'MDP', 'MegP', 'MEP', 'ProEB', 'EB'])].copy()

In [None]:
adata = ct_workflow(adata, suffix='_prog_cpg', compute=True)

In [None]:
sc.pl.umap(adata, color=['cell_type_fine', 'ct_pseudotime'], wspace=0.8, ncols=2)