# Erythroid DPT

In [None]:
import cellrank as cr
import scanpy as sc

import scanorama

import scipy

import numpy as np
import pandas as pd

import os

In [None]:
import warnings
warnings.simplefilter("ignore")

In [None]:
sc.set_figure_params(figsize=(2.5, 5), dpi_save=1200, fontsize=12, frameon=False, color_map='magma')

In [None]:
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')

# Setup rpy2

In [None]:
os.environ['R_HOME'] = '/nobackup/peer/fdeckert/miniconda3/envs/r.4.1.0/lib/R'

In [None]:
import rpy2.rinterface_lib.callbacks
import logging

from rpy2.robjects import pandas2ri
import anndata2ri

# Color 

In [None]:
# Plotting 
import rpy2.robjects as robjects

color_load = robjects.r.source('plotting_global.R')
color = dict()
for i in range(len(color_load[0])):
    color[color_load[0].names[i]] = {key : color_load[0][i].rx2(key)[0] for key in color_load[0][i].names}

In [None]:
def set_color(adata, categories=color.keys()): 
    
    categories = [x for x in categories if x in list(adata.obs.columns)]
    
    for category in categories: 
        
        adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
        
        keys = list(color[category].keys())
        keys = [x for x in keys if x in list(adata.obs[category])]

        adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
        adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)

# Load data 

In [None]:
# Pre-processed data 
adata = sc.read_h5ad('data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/pp.h5ad')
set_color(adata)

In [None]:
# GPCCA
g_prog_baseline = cr.estimators.GPCCA.read('data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/cellrank/g_prog_baseline.pickle')
g_prog_cpg = cr.estimators.GPCCA.read('data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/cellrank/g_prog_cpg.pickle')

In [None]:
# Fate probabilities 
fate_probabilities = pd.concat(
    
    [
        
    pd.DataFrame(g_prog_baseline.fate_probabilities, columns=g_prog_baseline.fate_probabilities.names.tolist(), index=g_prog_baseline.adata.obs_names), 
    pd.DataFrame(g_prog_cpg.fate_probabilities, columns=g_prog_cpg.fate_probabilities.names.tolist(), index=g_prog_cpg.adata.obs_names)
    
    ]

)

In [None]:
adata.obs = adata.obs.merge(fate_probabilities, left_index=True, right_index=True, how='left')

# Subset by erythroid lineage 

In [None]:
cell_type_ery = [
    
    'MEP', 
    'ProEB', 
    'EB I',
    'EB II', 
    'EB III'
    
]

In [None]:
adata = adata[adata.obs.leiden_cell_type_main.isin(cell_type_ery)].copy()

# Compute scanorama embedding 

In [None]:
def scanorama_workflow(adata):
    
    # Set raw 
    adata = adata.raw.to_adata()
    
    # Store obs order 
    obs_names = adata.obs_names 
    
    # Split into list
    adata = [adata[adata.obs['sample_group'] == i] for i in adata.obs['sample_group'].unique()]

    # Scanorama
    scanorama.integrate_scanpy(adata, dimred=100, verbose=False)
    adata = sc.concat(adata, join='inner')
    
    # Set order 
    adata = adata[obs_names]
    
    X_scanorama = adata.obsm['X_scanorama']

    return(X_scanorama)

In [None]:
adata.obsm['X_scanorama'] = scanorama_workflow(adata)

In [None]:
sc.pp.neighbors(adata, n_neighbors=30, use_rep='latent')
sc.tl.umap(adata, min_dist=1)

# Diffusion pseudotime (DPT)

## Set root cell

In [None]:
adata.obs['dpt_s_cell'] = adata.obs['EB III'].apply(lambda x: 'ery_s_cell' if x == min(adata.obs['EB III']) else None)
adata.uns['iroot'] = np.flatnonzero(adata.obs['EB III'] == min(adata.obs['EB III']))[0]

## Compute DPT

In [None]:
sc.tl.diffmap(adata, n_comps=15)
sc.tl.dpt(adata, n_dcs=15)

In [None]:
sc.pl.umap(sc.pp.normalize_total(adata, copy=True), color=['leiden_cell_type_main', 'dpt_s_cell', 'dpt_pseudotime', 'pHb_RNA', 'EB III', 'Meg'], legend_loc='on data', ncols=7, size=100)

# Save results 

In [None]:
adata.obs[['dpt_pseudotime']].to_csv('result/lineage/ery_dpt.csv')