# Annotation 

[Dendritic](https://www.sciencedirect.com/science/article/abs/pii/S0008874918302259) and [macrophage](https://www.sciencedirect.com/science/article/pii/S1074761311003591) spleenic subsets. [Progenitor](https://www.sciencedirect.com/science/article/pii/S1074761318304461) 

[Lewis et al., 2011](https://www.sciencedirect.com/science/article/pii/S1074761311003591#dfig1)

In [None]:
options(warn=-1)

In [None]:
library_load <- suppressMessages(
    
    list(
        
        # Seurat 
        library(Seurat), 
        
        # Data 
        library(tidyverse), 
        
        # Plot 
        library(ggplot2), 
        library(ComplexHeatmap), 
        library(ggplotify), 
        library(grid), 
        library(circlize), 
        
        # Python 
        library(reticulate)
        
    )
)

In [None]:
random_seed <- 42
set.seed(random_seed)

In [None]:
# Configure reticulate 
use_condaenv(condaenv='p.3.8.12-FD20200109SPLENO', conda="/nobackup/peer/fdeckert/miniconda3/bin/conda", required=NULL)
py_config()

In [None]:
ht_opt$message=FALSE # ComplexHeatmap 

In [None]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [None]:
# Source files
source("plotting_global.R")
source("bin/cell_type.R")
source("bin/seurat_qc.R")
source("bin/pbDEA.R")
source("bin/raceid.R")
source("bin/gene_modules.R")

In [None]:
# Plotting Theme
ggplot2::theme_set(theme_global_set()) # From project global source()

# Parameter settings

In [None]:
so_file <- "data/object/sct/so_sct_int_hvg8000.rds"
so_pp_file <- "data/object/pp.rds"
h5ad_pp_file <- "data/object/pp.h5ad"

# Import Seurat object

In [None]:
so <- readRDS(so_file)

# Seurat dimensional reduction and clustering 

In [None]:
DefaultAssay(so) <- "integrated"

# Cluster all cells 
so <- FindNeighbors(so, dims=1:10, k.param=20, verbose=FALSE)
so <- FindClusters(so, verbose=FALSE, resolution=1, algorithm=1, group.singletons=TRUE)

In [None]:
# Umap dimensional reduction
so <- RunUMAP(so, dims=1:15, n.neighbors=100, min.dist=1, spread=1, verbose=FALSE, umap.method="umap-learn")

In [None]:
options(repr.plot.width=30, repr.plot.height=10)

dplot_1 <- dplot(so, reduction="umap", group_by="seurat_clusters", alpha=0.5)
dplot_2 <- dplot(so, reduction="umap", group_by="treatment", alpha=0.5) + scale_color_manual(values=color$treatment)
dplot_3 <- dplot(so, reduction="umap", group_by="cc_phase_class", alpha=0.5) + scale_color_manual(values=color$cc_phase_class)
fplot_1 <- fplot(so, reduction="umap", features="pRb_RNA") + ggtitle("Percentage Rb") + scale_color_viridis(option="G")
fplot_2 <- fplot(so, reduction="umap", features="pMt_RNA") + ggtitle("Percentage Mt") + scale_color_viridis(option="G")
fplot_3 <- fplot(so, reduction="umap", features="Ccr2", slot="data") + ggtitle("Ccr2") + scale_color_viridis(option="G")
fplot_4 <- fplot(so, reduction="umap", features="Ccr7", slot="data") + ggtitle("Ccr7") + scale_color_viridis(option="G")
fplot_5 <- fplot(so, reduction="umap", features="Ly6c2", slot="data") + ggtitle("Ly6c2") + scale_color_viridis(option="G")

dplot_1 + dplot_2 + dplot_3 + fplot_1 + fplot_2 + fplot_3 + fplot_4 + fplot_5 + plot_layout(nrow=2, ncol=6)

In [None]:
so@meta.data %>% dplyr::group_by(seurat_clusters, treatment, sample_rep) %>% 
    dplyr::summarise(n=n()) %>% data.frame() %>% 
    tidyr::spread(seurat_clusters, n) %>% 
    kableExtra::kable("html") %>% as.character() %>% IRdisplay::display_html()

In [None]:
# Rmove cluster wich have no evidence in replicates 
so <- subset(so, subset=seurat_clusters!="21")

In [None]:
DefaultAssay(so) <- "RNA"

# RaceID

In [None]:
raceid <- raceid_pp(so=so, suffix="", compute=FALSE)
so <- raceid_to_seurat(so, raceid)

In [None]:
options(repr.plot.width=6*6, repr.plot.height=6)

dplot_1 <- dplot(so, reduction="umap_varid", group_by="seurat_clusters", alpha=0.5)
dplot_2 <- dplot(so, reduction="umap_varid", group_by="varid_clusters", alpha=0.5)
dplot_3 <- dplot(so, reduction="umap_varid", group_by="treatment", alpha=0.5) + scale_color_manual(values=color$treatment)
dplot_4 <- dplot(so, reduction="umap_varid", group_by="cc_phase_class", alpha=0.5) + scale_color_manual(values=color$cc_phase_class)
dplot_5 <- dplot(so, reduction="umap_varid", group_by="label_fine_haemosphere", alpha=0.5) + scale_color_manual(values=color$label_fine_haemosphere)
fplot_1 <- fplot(so, reduction="umap_varid", features="pRb_RNA") + ggtitle("Percentage Rb") + scale_color_viridis(option="G")

dplot_1 + dplot_2 + dplot_3 + dplot_4 + dplot_5 + fplot_1 + plot_layout(nrow=1, ncol=6)

In [None]:
so@meta.data %>% dplyr::group_by(varid_clusters, treatment, sample_rep) %>% 
    dplyr::summarise(n=n()) %>% data.frame() %>% 
    tidyr::spread(varid_clusters, n) %>% 
    kableExtra::kable("html") %>% as.character() %>% IRdisplay::display_html()

# Progenitor cell annotation

## Progenitor gene modules 

In [None]:
seurat_clusters_prog <- c("17", "15", "2", "13", "5", "12", "11", "9", "7", "3", "0", "1")
so_prog <- subset(so, subset=seurat_clusters %in% seurat_clusters_prog)

In [None]:
so_prog <- AddModuleScore(so_prog, list(genes_hsc), assay="RNA", name="msHSC")
so_prog <- AddModuleScore(so_prog, list(genes_ly), assay="RNA", name="msLy")
so_prog <- AddModuleScore(so_prog, list(genes_meg), assay="RNA", name="msMeg")
so_prog <- AddModuleScore(so_prog, list(genes_ery), assay="RNA", name="msEry")
so_prog <- AddModuleScore(so_prog, list(genes_mo), assay="RNA", name="msMo")
so_prog <- AddModuleScore(so_prog, list(genes_neu), assay="RNA", name="msNeu")
so_prog <- AddModuleScore(so_prog, list(genes_eo), assay="RNA", name="msEo")
so_prog <- AddModuleScore(so_prog, list(genes_baso), assay="RNA", name="msBaso")
so_prog <- AddModuleScore(so_prog, list(genes_mast), assay="RNA", name="msMast")

## RaceID (Progenitors)

In [None]:
raceid_prog <- raceid_pp(so=so_prog, suffix="_prog", compute=FALSE)
so_prog <- raceid_to_seurat(so_prog, raceid_prog)

In [None]:
options(repr.plot.width=6*6, repr.plot.height=6)

dplot_1 <- dplot(so_prog, reduction="umap_varid", group_by="seurat_clusters", alpha=0.5)
dplot_2 <- dplot(so_prog, reduction="umap_varid", group_by="varid_clusters", alpha=0.5)
dplot_3 <- dplot(so_prog, reduction="umap_varid", group_by="treatment", alpha=0.5) + scale_color_manual(values=color$treatment)
dplot_4 <- dplot(so_prog, reduction="umap_varid", group_by="cc_phase_class", alpha=0.5) + scale_color_manual(values=color$cc_phase_class)
dplot_5 <- dplot(so_prog, reduction="umap_varid", group_by="label_fine_haemosphere", alpha=0.5) + scale_color_manual(values=color$label_fine_haemosphere)
fplot_1 <- fplot(so_prog, reduction="umap_varid", features="pRb_RNA") + ggtitle("Percentage Rb") + scale_color_viridis(option="G")

dplot_1 + dplot_2 + dplot_3 + dplot_4 + dplot_5 + fplot_1 + plot_layout(nrow=1, ncol=6)

## Marker gene heat map (Progenitors)

In [None]:
options(repr.plot.width=7.5, repr.plot.height=15)

# Get expression matrix 
mat <- AverageExpression(so_prog, assay="RNA", slot="data", features=genes_marker$genes, group.by="seurat_clusters")[["RNA"]]

mat <- mat[, seurat_clusters_prog]
mat <- t(scale(t(mat)))
breaks <- seq(-max(abs(mat)), +max(abs(mat)), by=0.1)

hm_1 <- ComplexHeatmap::Heatmap(
    
    matrix=mat,    
    name="z-score", 
    column_title="Progenitor marker expression",
    col=colorRamp2(breaks, mako(length(breaks))), 
    
    column_title_gp=gpar(fontsize=18, fontface="bold"), 
    column_names_gp =grid::gpar(fontsize=16), 
    row_title_gp=gpar(fontsize=16, fontface="bold"),
    row_names_gp=grid::gpar(fontsize=16), 
    
    row_split=genes_marker$cell_type,
    
    cluster_rows=TRUE, 
    cluster_columns=FALSE,
    show_row_names=TRUE,
    show_column_names=TRUE, 
    
    row_dend_width=unit(1.5, "cm"), 
    width=ncol(mat)*unit(5, "mm"), 
    height=nrow(mat)*unit(8, "mm"), 
    rect_gp=gpar(col="white", lwd=2), 
    
    heatmap_legend_param=list(title_gp=gpar(fontsize=16, fontface="bold"), labels_gp=gpar(fontsize=16))

) %>% as.ggplot()

hm_1

## Annotate erythroblasts 

In [None]:
seurat_clusters_eb <- c("13", "5", "12", "11", "9", "7", "3", "0", "1")
so_eb <- subset(so, subset=seurat_clusters %in% seurat_clusters_eb)

In [None]:
cell_type_eb <- data.frame(
    
    ident=c(
        
        "13", 
        "5", 
        "12", 
        "11", 
        "9", 
        "7", 
        "3", 
        "0", 
        "1"
        
    ), 
    
    cell_type_fine_detail=c(
    
        "ProEB (1)", 
        "ProEB (2)", 
        "ProEB (3)", 
        "ProEB (4)", 
        "EB (1)", 
        "EB (2)", 
        "EB (3)", 
        "EB (4)", 
        "EB (5)"
    
    ), 
    
    cell_type_fine=c(
        
        "ProEB (1)", 
        "ProEB (2)", 
        "ProEB (3)", 
        "ProEB (4)", 
        "EB (1)", 
        "EB (2)", 
        "EB (3)", 
        "EB (4)", 
        "EB (5)"
        
    ), 
    
    cell_type_main=c(
    
        "ProEB", 
        "ProEB",
        "ProEB",
        "ProEB",
        "EB", 
        "EB",
        "EB",
        "EB", 
        "EB"
        
    )
    
)

In [None]:
cell_type_eb <- dplyr::left_join(dplyr::select(so_eb@meta.data, seurat_clusters, cell_id), cell_type_eb, by=c("seurat_clusters"="ident")) %>% 
    column_to_rownames("cell_id")  %>% 
    dplyr::mutate(varid_clusters=paste0("varid_clusters_eb_NA")) %>% 
    dplyr::mutate(ident=paste0("seurat_clusters_eb_", seurat_clusters))

## RaceID (MPP) 

In [None]:
seurat_clusters_mpp <- c("17", "15", "2")
so_mpp <- subset(so_prog, subset=seurat_clusters %in% seurat_clusters_mpp)

In [None]:
raceid_mpp <- raceid_pp(so=so_mpp, suffix="_mpp", compute=FALSE)
so_mpp <- raceid_to_seurat(so_mpp, raceid_mpp)

In [None]:
options(repr.plot.width=6*6, repr.plot.height=3*6)

dplot_1 <- dplot(so_mpp, reduction="umap_varid", group_by="seurat_clusters", alpha=0.5)
dplot_2 <- dplot(so_mpp, reduction="umap_varid", group_by="varid_clusters", alpha=0.5)
dplot_3 <- dplot(so_mpp, reduction="umap_varid", group_by="treatment", alpha=0.5) + scale_color_manual(values=color$treatment)
dplot_4 <- dplot(so_mpp, reduction="umap_varid", group_by="cc_phase_class", alpha=0.5) + scale_color_manual(values=color$cc_phase_class)
fplot1 <- fplot(so_mpp, reduction="umap_varid", features="pRb_RNA") + ggtitle("Percentage Rb") + scale_color_viridis(option="G")
fplot2 <- fplot(so_mpp, reduction="umap_varid", features="pMt_RNA") + ggtitle("Percentage Mt") + scale_color_viridis(option="G")
fplot3 <- fplot(so_mpp, reduction="umap_varid", features="msHSC1") + ggtitle("msHSC1") + scale_color_viridis(option="G")
fplot4 <- fplot(so_mpp, reduction="umap_varid", features="msLy1") + ggtitle("msLy1") + scale_color_viridis(option="G")
fplot5 <- fplot(so_mpp, reduction="umap_varid", features="msMeg1") + ggtitle("msMeg1") + scale_color_viridis(option="G")
fplot6 <- fplot(so_mpp, reduction="umap_varid", features="msEry1") + ggtitle("msEry1") + scale_color_viridis(option="G")
fplot7 <- fplot(so_mpp, reduction="umap_varid", features="msMo1") + ggtitle("msMo1") + scale_color_viridis(option="G")
fplot8 <- fplot(so_mpp, reduction="umap_varid", features="msNeu1") + ggtitle("msNeu1") + scale_color_viridis(option="G")
fplot9 <- fplot(so_mpp, reduction="umap_varid", features="msEo1") + ggtitle("msEo1") + scale_color_viridis(option="G")
fplot10 <- fplot(so_mpp, reduction="umap_varid", features="msBaso1") + ggtitle("msBaso1") + scale_color_viridis(option="G")
fplot11 <- fplot(so_mpp, reduction="umap_varid", features="msMast1") + ggtitle("msMast1") + scale_color_viridis(option="G")

dplot_1 + dplot_2 + dplot_3 + dplot_4 + fplot1 + fplot2 + fplot3 + fplot4 + fplot5 + fplot6 + fplot7 + fplot8 + fplot9 + fplot10 + fplot11 + plot_layout(ncol=6)

In [None]:
options(repr.plot.width=7.5, repr.plot.height=15)

source("bin/gene_modules.R")

mat <- AverageExpression(so_mpp, assay="RNA", slot="data", features=genes_marker$genes, group.by="varid_clusters")[["RNA"]]

mat <- t(scale(t(mat)))
mat <- na.omit(mat) 

cell_type_mpp <- genes_marker$cell_type[genes_marker$genes %in% rownames(mat)]

breaks <- seq(-max(abs(mat)), +max(abs(mat)), by=0.1)

hm_1 <- ComplexHeatmap::Heatmap(
    
    matrix=mat,    
    name="z-score", 
    column_title="Progenitor marker expression",
    col=colorRamp2(breaks, mako(length(breaks))), 
    
    column_title_gp=gpar(fontsize=18, fontface="bold"), 
    column_names_gp =grid::gpar(fontsize=16), 
    row_title_gp=gpar(fontsize=16, fontface="bold"),
    row_names_gp=grid::gpar(fontsize=16), 
    
    row_split=cell_type_mpp,
    cluster_rows=TRUE, 
    cluster_columns=FALSE,
    show_row_names=TRUE,
    show_column_names=TRUE, 
    
    row_dend_width=unit(1.5, "cm"), 
    width=ncol(mat)*unit(5, "mm"), 
    height=nrow(mat)*unit(8, "mm"), 
    
    rect_gp=gpar(col="white", lwd=2), 
    heatmap_legend_param=list(title_gp=gpar(fontsize=16, fontface="bold"), labels_gp=gpar(fontsize=16))

) %>% as.ggplot()

hm_1

## Annotate MPP 

In [None]:
cell_type_mpp <- data.frame(
    
    ident=c(
        
        1, 
        2, 
        3, 
        4, 
        5, 
        6, 
        7, 
        8, 
        9, 
        10
        
    ), 
    
    cell_type_fine_detail=c(
    
        "MEP (2)", 
        "MEP (4)", 
        "MEP (3)", 
        "GMP", 
        "MegP", 
        "BasoP", 
        "MLP", 
        "MDP", 
        "MEP (1)", 
        "MastP"
    
    ), 
    
    cell_type_fine=c(
        
        "MEP (2)", 
        "MEP (4)", 
        "MEP (3)", 
        "GMP", 
        "MegP", 
        "BasoP", 
        "MLP", 
        "MDP", 
        "MEP (1)", 
        "MastP"

    ), 
    
    cell_type_main=c(
        
        "MEP", 
        "MEP", 
        "MEP", 
        "GMP", 
        "MegP", 
        "GMP", 
        "MLP", 
        "MDP", 
        "MEP", 
        "GMP"
    
    )

)

In [None]:
cell_type_mpp <- dplyr::left_join(dplyr::select(so_mpp@meta.data, seurat_clusters, varid_clusters, cell_id), cell_type_mpp, by=c("varid_clusters"="ident")) %>% 
    column_to_rownames("cell_id") %>% 
    dplyr::mutate(varid_clusters=paste0("varid_clusters_mpp_", varid_clusters)) %>% 
    dplyr::mutate(ident=varid_clusters)

# Myeloid cell annotation 

In [None]:
seurat_clusters_m <- c("10", "6", "18", "20", "19", "4", "8", "14", "16")
so_m <- subset(so, subset=seurat_clusters %in% seurat_clusters_m)

## RaceID  (Myeloid)

In [None]:
raceid_m <- raceid_pp(so=so_m, suffix="_m", compute=FALSE)
so_m <- raceid_to_seurat(so_m, raceid_m)

In [None]:
options(repr.plot.width=6*6, repr.plot.height=6)

dplot_1 <- dplot(so_m, reduction="umap_varid", group_by="seurat_clusters", alpha=0.5)
dplot_2 <- dplot(so_m, reduction="umap_varid", group_by="varid_clusters", alpha=0.5)
dplot_3 <- dplot(so_m, reduction="umap_varid", group_by="treatment", alpha=0.5) + scale_color_manual(values=color$treatment)
dplot_4 <- dplot(so_m, reduction="umap_varid", group_by="cc_phase_class", alpha=0.5) + scale_color_manual(values=color$cc_phase_class)
fplot_1 <- fplot(so_m, reduction="umap_varid", features="pRb_RNA") + ggtitle("Percentage Rb") + scale_color_viridis(option="G")
fplot_2 <- fplot(so_m, reduction="umap_varid", features="pMt_RNA") + ggtitle("Percentage Mt") + scale_color_viridis(option="G")

dplot_1 + dplot_2 + dplot_3 + dplot_4 + fplot_1 + fplot_2 + plot_layout(nrow=1, ncol=6)

In [None]:
options(repr.plot.width=6*6, repr.plot.height=6)

fplot_1 <- fplot(so_m, reduction="umap_varid", features="Ccr7", slot="data") + ggtitle("Ccr7 (TCZ)") + scale_color_viridis(option="G")
fplot_2 <- fplot(so_m, reduction="umap_varid", features="Cxcr5", slot="data") + ggtitle("Cxcr5 (BCZ)") + scale_color_viridis(option="G")
fplot_3 <- fplot(so_m, reduction="umap_varid", features="Cxcr4", slot="data") + ggtitle("Cxcr4 (RP)") + scale_color_viridis(option="G")
fplot_4 <- fplot(so_m, reduction="umap_varid", features="Ccr2", slot="data") + ggtitle("Ccr2 (Blood)") + scale_color_viridis(option="G")

fplot_1 + fplot_2 + fplot_3 + fplot_4 + plot_spacer() + plot_spacer() + plot_layout(nrow=1, ncol=6)

## Set varid_clusters to re-use the DEA results

In [None]:
so_m$varid_clusters <- paste0("varid_clusters_m_", so_m$varid_clusters)

## Canonical marker genes (Myeloid)

In [None]:
features_m <- c(
    
    "Adgre1", # F4/20
    "Itgax", # Cd11c
    "Itgam", # Cd11b
    "Ly6c2", # Ly6c
    "Csf1r", # Cd115
    "Cd8a", # Cd8 
    "Cd4", # Cd4
    "Napsa", # Napsa
    "Lsp1" # Lsp1
    
)

In [None]:
options(repr.plot.width=3*(2.5+length(features_m)/2.5), repr.plot.height=5)

dp_1 <- dp_feature(so_m, features_m, group_by="varid_clusters", scale=FALSE, title="Myeloid marker", range_max=8) + theme_global_set(1)
dp_2 <- dp_feature(so_m, features_m, group_by="varid_clusters", scale=TRUE, title="Myeloid marker", range_max=8) + theme_global_set(1)

dp_1 + dp_2

In [None]:
options(repr.plot.width=20, repr.plot.height=20)

results <- voomlmfit_marker(so_m, grouping_var="varid_clusters", pseudobatch_var="sample_group") 
hm_marker(results, so_m, grouping_var="varid_clusters", width=0.05, height=1)

## Macrophage marker genes

In [None]:
features_mac <- c(
    
    "Spic", # Spic | RPM marker
    "Vcam1", # Vcam1 | RPM marker
    "Slc40a1", # Slc40a1 | RPM marker | Iron transport
    "Siglec1", # Cd169 | MMM marker. Binds vaious cell type. 
    "Nr1h3", # Lxr alpha | MMMM marker. A transcription factor activated by oxysterols
    "Marco", # Marco | MZM marker
    "Cd209b", # SIGNR1 | MZM marker 
    "Gas6", # Gas6 | TBM
    "Mfge8"

)

split_mac <- c(

    "RPM", 
    "RPM", 
    "RPM",
    "MMM",
    "MMM", 
    "MZM", 
    "MZM", 
    "TBM", 
    "TBM"
    
)

In [None]:
options(repr.plot.width=3*(2.5+length(features_mac)/2.5), repr.plot.height=5)

dp_1 <- dp_feature(so_m, features_mac, split_mac, group_by="varid_clusters", scale=FALSE, title="Macrophage marker", range_max=8) + theme_global_set(1)
dp_2 <- dp_feature(so_m, features_mac, split_mac, group_by="varid_clusters", scale=TRUE, title="Macrophage marker", range_max=8) + theme_global_set(1)

dp_1 + dp_2

In [None]:
options(repr.plot.width=10, repr.plot.height=12)

results <- voomlmfit_marker(subset(so_m, subset=varid_clusters %in% c("varid_clusters_m_10", "varid_clusters_m_6")), grouping_var="varid_clusters", pseudobatch_var="sample_group") 
hm_marker(results, subset(so_m, subset=varid_clusters %in% c("varid_clusters_m_10", "varid_clusters_m_6")), grouping_var="varid_clusters")

## Dendritic cells marker genes

In [None]:
features_dc <- c(
    
    "Zbtb46", # Zbtb46 | Common DC progenitor (CDP) | TF
    "Xcr1", # Xcr1 | cDC1 of which most express CD8aa
    "Ly75", # Dec205 (Lectin receptor) | cDC1 subset in the WP | After immunization cDC1 colocalize with and preferentially activate Cd8+ T cells in the WP
    "Itgae", "Cd207", # Cd103 Langerin | cDC1 in the MZ and RP at stready-state
    "Sirpa", "Gpr183", "Mbtps1", # Sirpa Ebi2 (recognizes oxysterol ligands) S1P | cDC2 in the bridging channel (BC) at steady-state condition and also express Cd11b. Ebi2 and S1p maintain BC localization. 
    "Esam", "Notch2", "Rbpj", "Clec4a4", "Irf4", "Klf4", # Esam (endothelial cell adhesion moleculoe) Notch2 Rbpj Dcir2 | cDC2 Esam hi subset | Notch2 and Rbpj signaling is required for Esam expression. The subset also express Cd11b, Cd4 and Dcir2
    "Cx3cr1", "Il12a", "Il12b", "Tnf", # Cx3cr1 Il12 Il12 Infalpha | cDC2 Esam low subset | Independent of Notch2 and Irf4 signaling and producing innflammatory cytokines such as TNFa and Il12. Also positve for Cd11b, Dcir2 but less cd4 or double negative. 
    "Cd4", 
    "Cd8a", 
    "Ccr7", 
    "Cxcr5", 
    "Cxcr4", 
    "Ccr2", 
    "Pcna", 
    "Uhrf1"
    
)

split_dc <- c(

    "CDP", 
    rep("cDC1", 4), 
    rep("cDC2", 13), 
    rep("CD", 2), 
    rep("Chemokines", 4),
    rep("CC", 2)
    
)

In [None]:
options(repr.plot.width=2*(2.5+length(features_dc)/2.5), repr.plot.height=5)

dp_1 <- dp_feature(so_m, features_dc, split_dc, group_by="varid_clusters", scale=FALSE, range_max=8) + theme_global_set(1)
dp_2 <- dp_feature(so_m, features_dc, split_dc, group_by="varid_clusters", scale=TRUE, range_max=8) + theme_global_set(1)

dp_1 + dp_2

In [None]:
options(repr.plot.width=10, repr.plot.height=12)

results <- voomlmfit_marker(subset(so_m, subset=varid_clusters %in% c("varid_clusters_m_4", "varid_clusters_m_5", "varid_clusters_m_2", "varid_clusters_m_8", "varid_clusters_m_9")), grouping_var="varid_clusters", pseudobatch_var="sample_group") 
hm_marker(results, subset(so_m, subset=varid_clusters %in% c("varid_clusters_m_4", "varid_clusters_m_5", "varid_clusters_m_2", "varid_clusters_m_8", "varid_clusters_m_9")), grouping_var="varid_clusters")

## Monocyte marker genes

In [None]:
features_mo <- c(
    
    "Ly6c2", "Csf1r", "Spi1", "Spn", # Ly6c Ly6c Csf1r Pu1 Cd43 | Canonical marker genes | Two major monocyte populations in the blood which are Ly6c lo and Ly6c high. Cd43 is low in classical and high in int and non-classical. 
    "Jun", "Fos", "Irf8", "Klf4", "Vcan", "Cd163", "Cd63", "S100a8", # Ccr2 | Classical monocyte | Recruitment to blood by Ccr2
    "Treml4", "Cx3cr1", "Nr4a1", "Klf2", "Fcgr3", "Ifitm1", "Ifitm2", "Ifitm3", "Cdkn1c", "Mtss1", # Treml4 Cx3cr1 | Non-classical monocyte | Migrate in response to Cx3cr1 
    "H2-Ab1", "Cd74",  "Ccr5", "Cxcl9", "Cxcl10", 
    "Cd4", 
    "Cd8a", 
    "Ccr7", 
    "Cxcr5", 
    "Cxcr4", 
    "Ccr2", 
    "Pcna", 
    "Uhrf1"
)

split_mo <- c(

    rep("Canonical", 4), 
    rep("cMo", 8), 
    rep("ncMo", 10),
    rep("moDC", 5), 
    rep("CD", 2), 
    rep("Chemokines", 4),
    rep("CC", 2)
)

In [None]:
options(repr.plot.width=2*(2.5+length(features_mo)/2.5), repr.plot.height=5)

dp_1 <- dp_feature(so_m, features_mo, split_mo, group_by="varid_clusters", scale=FALSE, title="Monocyte marker", range_max=8) + theme_global_set(1)
dp_2 <- dp_feature(so_m, features_mo, split_mo, group_by="varid_clusters", scale=TRUE, title="Monocyte marker", range_max=8) + theme_global_set(1)

dp_1 + dp_2

In [None]:
options(repr.plot.width=10, repr.plot.height=12)

results <- voomlmfit_marker(subset(so_m, subset=varid_clusters %in% c("varid_clusters_m_3", "varid_clusters_m_1", "varid_clusters_m_11", "varid_clusters_m_7")), grouping_var="varid_clusters", pseudobatch_var="sample_group") 
hm_marker(results, subset(so_m, subset=varid_clusters %in% c("varid_clusters_m_3", "varid_clusters_m_1", "varid_clusters_m_11", "varid_clusters_m_7")), grouping_var="varid_clusters")

In [None]:
cell_type_m <- data.frame(
    
    ident=c(
        
        1, 
        2, 
        3, 
        4, 
        5, 
        6, 
        7, 
        8, 
        9, 
        10, 
        11
        
    ), 
    
    cell_type_fine_detail=c(
        
        "ncMo Cd4- (1)", 
        "cDC2 (1)", 
        "cMo Ly6c lo (2)",
        "cDC1 Cd8+ prolif. (1)", 
        "cDC1 Cd8+ (2)", 
        "RPM", 
        "ncMo Cd4+ (2)", 
        "cDC2 Ccr7+ (3)",   
        "cDC2 prolif. (2)",  
        "PreRPM", 
        "cMo Ly6c hi (1)" 

    ),
    
    cell_type_fine=c(
        
        "ncMo (1)", 
        "cDC2 (1)", 
        "cMo (2)", 
        "cDC1 (1)",  
        "cDC1 (2)", 
        "RPM", 
        "ncMo (2)", 
        "cDC2 (3)",  
        "cDC2 (2)", 
        "PreRPM", 
        "cMo (1)" 

    ), 
    
    cell_type_main=c(
        
        "ncMo", 
        "cDC2", 
        "cMo", 
        "cDC1", 
        "cDC1", 
        "RPM", 
        "ncMo", 
        "cDC2", 
        "cDC2", 
        "PreRPM", 
        "cMo"
    
    )

)

In [None]:
cell_type_m <- dplyr::mutate(cell_type_m, ident=paste0("varid_clusters_m_", ident)) %>% 
    dplyr::left_join(dplyr::select(so_m@meta.data, seurat_clusters, varid_clusters, cell_id), ., by=c("varid_clusters"="ident")) %>% 
    column_to_rownames("cell_id") %>% 
    dplyr::mutate(ident=varid_clusters)

# Combine annotation results

In [None]:
cell_type <- rbind(cell_type_eb, cell_type_mpp, cell_type_m) %>% dplyr::select(cell_type_main, cell_type_fine, cell_type_fine_detail, varid_clusters, ident)
so <- AddMetaData(so, cell_type)

In [None]:
unique(dplyr::select(cell_type, cell_type_main, cell_type_fine, cell_type_fine_detail, ident)) %>% write.csv("test.csv")

In [None]:
options(repr.plot.width=2*10, repr.plot.height=6)

# Source files
dplot_1 <- dplot(so, reduction="umap", group_by="cell_type_fine_detail", alpha=1, pt_size=1) + 
    scale_color_manual(values=color[["cell_type_fine_detail"]]) + 
    theme(
        legend.title=element_blank(), 
        plot.title=element_blank()
    )

dplot_2 <- dplot(so, reduction="umap", group_by="cell_type_main", alpha=1, pt_size=1) + 
    scale_color_manual(values=color[["cell_type_main"]]) + 
    theme(
        legend.title=element_blank(), 
        plot.title=element_blank()
    )

dplot_1 + dplot_2

# Save results 

In [None]:
# saveRDS(so, so_pp_file)

In [None]:
# write.csv(so@meta.data, "data/object/components/meta.csv")
# write.csv(rownames(so), "data/object/genes.csv", row.names=FALSE)
# write.csv(colnames(so), "data/object/cells.csv", row.names=FALSE)
# write.csv(so@reductions$umap@cell.embeddings, "data/object/components/umap.csv")

In [None]:
# # Store data as h5ad 
# adata <- import("anndata", as="adata", convert=FALSE)
# pd <- import("pandas", as="pd", convert=FALSE)
# np <- import("numpy", as="np", convert=FALSE)
    
# # Transform dgCMatrix to 
# X <- GetAssayData(so, assay="RNA", slot="counts") %>% as.matrix() %>% t()
# X <- np$array(X, dtype=np$int32)
    
# adata <- adata$AnnData(X=X, obs=so@meta.data)
# adata$var_names <- rownames(GetAssayData(so, assay="RNA", slot="counts"))

# adata$raw <- adata
# adata$write_h5ad(h5ad_pp_file)

# Session info

In [None]:
sessionInfo()