In [None]:
import scvelo as scv
import scanpy as sc
import cellrank as cr
import loompy as lp

import numpy as np
import pandas as pd
import re

import os
import sys

scv.settings.verbosity = 3
scv.settings.set_figure_params('scvelo')
cr.settings.verbosity = 2

In [None]:
import warnings
warnings.simplefilter('ignore', category=UserWarning)
warnings.simplefilter('ignore', category=FutureWarning)
warnings.simplefilter('ignore', category=DeprecationWarning)

In [None]:
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')

In [None]:
sys.path.append('bin/')
import SeuratFacility

# PAGA embeddings

## Import Seurat as adata

In [None]:
adata_1 = SeuratFacility.dir2adata('data/object/seurat_sct_nacl/', assay='SCT', slot='scale.data')
adata_2 = SeuratFacility.dir2adata('data/object/seurat_sct_cpg/', assay='SCT', slot='scale.data')

## Pre-process data

In [None]:
adata_1.X = adata_1.X.astype('float64')
adata_2.X = adata_2.X.astype('float64')

## PCA

In [None]:
sc.pp.pca(adata_1, n_comps=90, svd_solver='arpack', zero_center=False)
sc.pp.pca(adata_2, n_comps=90, svd_solver='arpack', zero_center=False)

## Neighbor graph

In [None]:
# Compute neighbor graph 
sc.pp.neighbors(adata_1, n_neighbors=5, n_pcs=90)
sc.pp.neighbors(adata_2, n_neighbors=5, n_pcs=90)

## Denoise neighbor graph

In [None]:
# Denoise neighbor graph 
sc.tl.diffmap(adata_1)
sc.pp.neighbors(adata_1, n_neighbors=10, use_rep='X_diffmap')
sc.tl.diffmap(adata_2)
sc.pp.neighbors(adata_2, n_neighbors=10, use_rep='X_diffmap')

## PAGA on leiden graph 

In [None]:
# Clustering and PAGA
sc.tl.leiden(adata_1, resolution=1.0)
sc.tl.leiden(adata_2, resolution=1.0)

In [None]:
sc.tl.paga(adata_1, groups='leiden')
sc.tl.paga(adata_2, groups='leiden')

### Cluster marker 
HSCs: Procr  
Erythroids: Gata1, Klf1, Epor, Gypa, Hba-a2, Hba-1, Spi1  
Neutrophils: Elane, Cebpe, Ctsg, Mpo, Gfi1  
Monocytes: Irf8, Csf1r, Ctsg, Mpo  
Megakaryocytes: Itga2b (encodes protein CD41), Pbx1, Sdpr, Vwf  
Basophils: Mcpt8, Prss34  
B cells: Cd19, Vpreb2, Cd79a  
Mast cells: Cma1, Gzmb, c-kit   
Mast cells & Basophils: Ms4a2, Fcer1a, Cpa3  

In [None]:
marker_genes = ['Gata2', 'Gata1', 'Hba-a2', 'Elane', 'Irf8', 'Itga2b', 'Prss34', 'Gzmb', 'Kit']

marker_genes_1 = [x for x in marker_genes if x in adata_1.var_names]
marker_genes_2 = [x for x in marker_genes if x in adata_2.var_names]

marker_genes = list(set(marker_genes_1) & set(marker_genes_2))

### PAGA graph (NaCl)

In [None]:
sc.pl.paga(adata_1, color=['tissue', 'cc_phase_class', 'fine_labels'] + marker_genes_1[0:3])
sc.pl.paga(adata_1, color=marker_genes_1[3:10])

### PAGA graph (CpG)

In [None]:
sc.pl.paga(adata_2, color=['tissue', 'cc_phase_class', 'fine_labels'] + marker_genes_2[0:3])
sc.pl.paga(adata_2, color= marker_genes_2[3:10])

## PAGA cell embeddings

In [None]:
# Recompute embedding suing PAGA-initialization 
sc.tl.draw_graph(adata_1, init_pos='leiden')
sc.tl.draw_graph(adata_2, init_pos='leiden')

### Embeddings (NaCl)

In [None]:
sc.pl.draw_graph(adata_1, color=['tissue', 'cc_phase_class', 'fine_labels'] + marker_genes, legend_loc='on data', ncols=6)

### Embeddings (CpG)

In [None]:
sc.pl.draw_graph(adata_2, color=['tissue', 'cc_phase_class', 'fine_labels'] + marker_genes, legend_loc='on data', ncols=6)

In [None]:
sc.pl.paga_compare(adata_1)

In [None]:
sc.pl.paga_compare(adata_2)

# RNA velocity 

## Import velocyto loom 

In [None]:
adata_v = scv.read_loom('data/object/velocyto.loom')
if not adata_v.var_names.is_unique: adata_v.var_names_make_unique()

## Subset data 

In [None]:
adata_v_1 = adata_v[adata_1.obs.index].copy()
adata_v_2 = adata_v[adata_2.obs.index].copy()

In [None]:
# Meta data to adata velocyto
adata_v_1.obs = adata_v_1.obs.merge(adata_1.obs, how='left', left_index=True, right_index=True, suffixes=('', ''))
adata_v_2.obs = adata_v_2.obs.merge(adata_2.obs, how='left', left_index=True, right_index=True, suffixes=('', ''))

In [None]:
adata_v_1.obs['sample_name'] = adata_v_1.obs['sample_name'].astype('category')
scv.pl.proportions(adata_v_1, groupby='sample_name')

adata_v_2.obs['sample_name'] = adata_v_2.obs['sample_name'].astype('category')
scv.pl.proportions(adata_v_2, groupby='sample_name')

## Filter and normalize 

In [None]:
scv.pp.filter_and_normalize(adata_v_1, min_shared_counts=20, n_top_genes=2000)
sc.tl.pca(adata_v_1)
sc.pp.neighbors(adata_v_1, n_pcs=30, n_neighbors=30)

scv.pp.filter_and_normalize(adata_v_2, min_shared_counts=20, n_top_genes=2000)
sc.tl.pca(adata_v_2)
sc.pp.neighbors(adata_v_2, n_pcs=30, n_neighbors=30)

## Compute RNA velocity

In [None]:
scv.pp.moments(adata_v_1, n_pcs=30, n_neighbors=20)
scv.pp.moments(adata_v_2, n_pcs=30, n_neighbors=20)

In [None]:
scv.tl.recover_dynamics(adata_v_1, n_jobs=8)
scv.tl.recover_dynamics(adata_v_2, n_jobs=8)

In [None]:
scv.tl.velocity(adata_v_1, mode='dynamical')
scv.tl.velocity(adata_v_2, mode='dynamical')

In [None]:
scv.tl.velocity_graph(adata_v_1)
scv.tl.velocity_graph(adata_v_2)

## Save/load results 

In [None]:
# adata_v_1.write('data/object/seurat_sct_nacl/scvelo.h5ad')
# adata_v_2.write('data/object/seurat_sct_cpg/scvelo.h5ad')

adata_v_1 = sc.read_h5ad('data/object/seurat_sct_nacl/scvelo.h5ad')
adata_v_2 = sc.read_h5ad('data/object/seurat_sct_cpg/scvelo.h5ad')

In [None]:
scv.pl.velocity_embedding_stream(adata_v_1, basis="umap", legend_fontsize=12, title="", smooth=0.8, min_mass=4, color = "SCT_snn_res.0.8")
scv.pl.velocity_embedding_stream(adata_v_2, basis="umap", legend_fontsize=12, title="", smooth=0.8, min_mass=4, color = "SCT_snn_res.0.8")