# Loading and preparing data 

In [None]:
import scanpy as sc

import pandas as pd
import subprocess
import os

import matplotlib as mpl

In [None]:
sc.set_figure_params(figsize=(10, 10), transparent=False)
mpl.rcParams['figure.facecolor'] = 'white'

In [None]:
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')

In [None]:
# Warnings Python 
import warnings
warnings.filterwarnings('ignore')

## Settup rpy2 

In [None]:
os.environ['R_HOME'] = '/nobackup/peer/fdeckert/miniconda3/envs/r.4.1.0/lib/R'

In [None]:
import rpy2.rinterface_lib.callbacks
import logging

from rpy2.robjects import pandas2ri
import anndata2ri

In [None]:
%load_ext rpy2.ipython

# Import AnnData

In [None]:
adata = sc.read_h5ad('data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/scvi/run_2/adata.h5ad')

In [None]:
# Re-set raw
adata = adata.raw.to_adata()
adata.raw = adata

## Cell type marker genes

In [None]:
cell_type_genes_dict = {
    
    'Cycling': ['Mki67'], 
    'Immmune': ['Ptprc'], 
    'Migration': ['Cx3cr1', 'Ccr2', 'Ccr5', 'Cxcr2', 'Ccr7', 'Ccr1'], # Cx3cr1:RP Mo, Ccr2:RP Mo, Ccr5:RP DC, Cxcr2:RP Neu, Ccr7:WP cDC, MG
    
    'MHC': ['H2-D1', 'H2-Ab1'], 
    
    'CD4': ['Cd4'], 
    'CD8': ['Cd8a', 'Cd8b1'], 
    
    'Progenitor': ['Kit', 'Cebpa', 'Irf8', 'Csf1r', 'Flt3'], 
    
    'Neutrophil': ['Elane', 'Gfi1'],  
    'Eo': ['Prg2', 'Prg3'],  
    'Basophil': ['Mcpt8', 'Prss34', 'Fcer1a', 'Cd200r3'], 
    'Mast': ['Gzmb', 'Cma1'], 
    'Meg': ['Pf4', 'Itga2b'],
    
    'Erythroid': ['Gata2', 'Gata1', 'Klf1', 'Epor', 'Hbb-bt', 'Hba-x'], 
    
    'Mo': ['Csf1r', 'Ccr2', 'Ly6c1', 'Ly6c2', 'Cx3cr1'],
    'RPM': ['Adgre1', 'Spic', 'Siglec1'], 
    
    'cDC': ['Zbtb46', 'Flt3', 'Itgax'], 
    'moDC': ['Ly6c2', 'Ccr2', 'Fcgr1', 'Cx3cr1', 'H2-Ab1'], 
    'cDC1': ['Xcr1', 'Clec9a', 'Batf3', 'Irf8', 'Tlr3'],
    'cDC2': ['Itgam', 'Sirpa', 'Clec4a1', 'Irf4', 'Cd24a'],
    'pDC': ['Bst2', 'Siglech', 'Irf7', 'Tcf4', 'Ly6d'],
    
    'B cell': ['Ms4a1', 'Ighm', 'Ighd', 'Igha', 'Ly6k'], 
    
    'T synapse': ['Trac', 'Trbc1', 'Trbc2', 'Trdc', 'Cd3e', 'Cd3d', 'Cd3g', 'Cd247']
    
    
}

# Sub-cluster

In [None]:
sc.tl.leiden(adata, resolution=0.3, restrict_to=('leiden', ['3']), key_added='leiden_restrict', flavor='igraph', n_iterations=2)
sc.tl.leiden(adata, resolution=0.2, restrict_to=('leiden_restrict', ['9']), key_added='leiden_restrict', flavor='igraph', n_iterations=2)
sc.tl.leiden(adata, resolution=0.3, restrict_to=('leiden_restrict', ['0']), key_added='leiden_restrict', flavor='igraph', n_iterations=2)
sc.tl.leiden(adata, resolution=0.3, restrict_to=('leiden_restrict', ['10']), key_added='leiden_restrict', flavor='igraph', n_iterations=2)

# Fine leiden annotation 

In [None]:
leiden_cell_type_fine = {
    
    '0,0': 'EB II (0,0)',
    '0,1': 'EB III (0,1)',
    '1': 'EB I (1)',
    '2': 'ProEB (2)',
    '3,0': 'MEP (3,0)',
    '3,1': 'MEP (3,1)',
    '3,2': 'Meg (3,2)',
    '4': 'cDC2 (4)',
    '5': 'ncMo (5)',
    '6': 'cMo (6)',
    '7': 'cDC1 CD8+ (7)', 
    '8': 'RPM (8)', 
    '9,0': 'Neu (9,0)', 
    '9,1': 'MPP (9,1)', 
    '9,2': 'Mast (9,2)', 
    '10,0': 'Baso act. (10,0)', 
    '10,1': 'Baso (10,1)', 
    '11': 'moDC (11)', 
    '12': 'Lymphocyte (12)',
    '13': 'cDC migratory (13)',
    '14': 'B cell (14)'

}

In [None]:
# Mapper 
leiden_cell_type_fine = pd.DataFrame({'leiden_cell_type_fine': adata.obs['leiden_restrict'].map(lambda x: leiden_cell_type_fine.get(x, x)).astype('category')})
adata.obs = adata.obs.merge(leiden_cell_type_fine, left_index=True, right_index=True, how='left')

In [None]:
adata.obs['leiden_cell_type_fine'] = adata.obs['leiden_cell_type_fine'].cat.reorder_categories([
    
    'MPP (9,1)', 
    'Neu (9,0)', 
    'Baso (10,1)', 
    'Baso act. (10,0)',
    'Mast (9,2)', 
    'Meg (3,2)',
    'MEP (3,1)',
    'MEP (3,0)',
    'ProEB (2)',
    'EB I (1)',
    'EB II (0,0)',
    'EB III (0,1)',
    'cMo (6)',
    'ncMo (5)',
    'RPM (8)', 
    'moDC (11)', 
    'cDC2 (4)',
    'cDC migratory (13)',
    'cDC1 CD8+ (7)', 
    'B cell (14)', 
    'Lymphocyte (12)',
    
])

In [None]:
dp = sc.pl.dotplot(adata, cell_type_genes_dict, groupby='leiden_cell_type_fine', standard_scale='var', use_raw=False, dendrogram=False, return_fig=True)
dp.add_totals().style(dot_edge_color='black', dot_edge_lw=0.5, cmap='Reds').show()

# Main leiden annotation 

In [None]:
leiden_cell_type_main = {
    
    '0,0': 'EB II',
    '0,1': 'EB III',
    '1': 'EB I',
    '2': 'ProEB',
    '3,0': 'MEP',
    '3,1': 'MEP',
    '3,2': 'Meg',
    '4': 'cDC2',
    '5': 'ncMo',
    '6': 'cMo',
    '7': 'cDC1 CD8+', 
    '8': 'RPM', 
    '9,0': 'Neu', 
    '9,1': 'MPP', 
    '9,2': 'Mast', 
    '10,0': 'Baso act.',
    '10,1': 'Baso', 
    '11': 'moDC', 
    '12': 'Lymphocyte',
    '13': 'cDC migratory',
    '14': 'B cell'

}

In [None]:
# Mapper 
leiden_cell_type_main = pd.DataFrame({'leiden_cell_type_main': adata.obs['leiden_restrict'].map(lambda x: leiden_cell_type_main.get(x, x)).astype('category')})
adata.obs = adata.obs.merge(leiden_cell_type_main, left_index=True, right_index=True, how='left')

In [None]:
adata.obs['leiden_cell_type_main'] = adata.obs['leiden_cell_type_main'].cat.reorder_categories([
    
    'MPP', 
    'Neu', 
    'Baso', 
    'Baso act.', 
    'Mast', 
    'Meg',
    'MEP',
    'ProEB',
    'EB I',
    'EB II',
    'EB III',
    'cMo',
    'ncMo',
    'RPM', 
    'moDC', 
    'cDC2',
    'cDC migratory',
    'cDC1 CD8+', 
    'B cell', 
    'Lymphocyte',
    
])

In [None]:
dp = sc.pl.dotplot(adata, cell_type_genes_dict, groupby='leiden_cell_type_main', standard_scale='var', use_raw=False, dendrogram=False, return_fig=True)
dp.add_totals().style(dot_edge_color='black', dot_edge_lw=0.5, cmap='Reds').show()

# UMAP

In [None]:
sc.pl.umap(adata, color=['leiden_cell_type_fine', 'leiden_cell_type_main'], legend_loc='on data', frameon=False, ncols=2, wspace=0.5, size=100)

# Save AnnData object

In [None]:
adata.write_h5ad('data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/pp.h5ad')

# Save Seurat object

In [None]:
cnt = adata.X.T.todense()
cell_id = adata.obs_names
gene_id = adata.var_names
meta = adata.obs
umap = adata.obsm['X_umap']
latent = adata.obsm['latent']

In [None]:
%%R -i cnt -i cell_id -i gene_id -i meta -i umap -i latent

colnames(cnt) <- cell_id
rownames(cnt) <- gene_id

so <- Seurat::CreateSeuratObject(cnt, meta.data=meta, project='FD20200109SPLENO', assay='RNA')

colnames(umap) <- c('UMAP_1', 'UMAP_2')
rownames(umap) <- cell_id
so[['umap']] <- Seurat::CreateDimReducObject(embeddings=as.matrix(umap), key='UMAP_')

colnames(latent) <- paste0("LATENT_", 1:ncol(latent))
rownames(latent) <- cell_id
so[['latent']] <- Seurat::CreateDimReducObject(embeddings=as.matrix(latent), key='LATENT_')

saveRDS(so, 'data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/pp.rds')