# Annotation 

[Dendritic](https://www.sciencedirect.com/science/article/abs/pii/S0008874918302259) and [macrophage](https://www.sciencedirect.com/science/article/pii/S1074761311003591) spleenic subsets. [Progenitor](https://www.sciencedirect.com/science/article/pii/S1074761318304461) 

[Lewis et al., 2011](https://www.sciencedirect.com/science/article/pii/S1074761311003591#dfig1)

In [None]:
options(warn=-1)

In [None]:
library_load <- suppressMessages(
    
    list(
        
        # Seurat 
        library(Seurat), 
        library(presto), 
        
        # Data 
        library(tidyverse), 
        
        # Plot 
        library(ggplot2), 
        library(ComplexHeatmap), 
        library(ggplotify), 
        library(grid), 
        library(circlize), 
        
        # Python 
        library(reticulate)
        
    )
)

In [None]:
random_seed <- 42
set.seed(random_seed)

In [None]:
# Configure reticulate 
use_condaenv(condaenv='p.3.8.12-FD20200109SPLENO', conda="/nobackup/peer/fdeckert/miniconda3/bin/conda", required=NULL)
py_config()

In [None]:
ht_opt$message=FALSE # ComplexHeatmap 

In [None]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [None]:
# Source files
source("bin/pl.R")
source("plotting_global.R")

In [None]:
# Plotting Theme
ggplot2::theme_set(theme_global_set(1)) # From project global source()

# Parameter settings

In [None]:
so_file <- "data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/sct/so_sct_int_hvg8000.rds"
so_pp_file <- "data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/pp.rds"
h5ad_pp_file <- "data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/pp.h5ad"

# Import Seurat object

In [None]:
so <- readRDS(so_file)

# Seurat dimensional reduction and clustering 

In [None]:
DefaultAssay(so) <- "integrated"

# Cluster all cells 
so <- FindNeighbors(so, dims=1:10, k.param=20, verbose=FALSE)
so <- FindClusters(so, verbose=FALSE, resolution=1, algorithm=1, group.singletons=TRUE)

In [None]:
# Subcluster
so <- FindSubCluster(so, "2", graph.name="integrated_snn", subcluster.name="seurat_clusters", resolution=0.5, algorithm=1)

In [None]:
# Umap dimensional reduction
so <- RunUMAP(so, dims=1:15, n.neighbors=100, min.dist=1, spread=1, verbose=FALSE, umap.method="umap-learn")

In [None]:
options(repr.plot.width=30, repr.plot.height=10)

dplot_1 <- dplot(so, reduction="umap", group_by="seurat_clusters", alpha=0.5, shuffle=TRUE)
dplot_2 <- dplot(so, reduction="umap", group_by="treatment", alpha=0.5, shuffle=TRUE) + scale_color_manual(values=color$treatment)
dplot_3 <- dplot(so, reduction="umap", group_by="cc_phase_class", alpha=0.5) + scale_color_manual(values=color$cc_phase_class)
fplot_1 <- fplot(so, reduction="umap", features="pRb_RNA") + ggtitle("Percentage Rb") + scale_color_viridis(option="G")
fplot_2 <- fplot(so, reduction="umap", features="pMt_RNA") + ggtitle("Percentage Mt") + scale_color_viridis(option="G")
fplot_3 <- fplot(so, reduction="umap", features="Ccr2", slot="data") + ggtitle("Ccr2") + scale_color_viridis(option="G")

dplot_1 + dplot_2 + dplot_3 + fplot_1 + fplot_2 + fplot_3 + plot_layout(nrow=1, ncol=6)

In [None]:
so@meta.data %>% dplyr::group_by(seurat_clusters, treatment, sample_rep) %>% 
    dplyr::summarise(n=n()) %>% data.frame() %>% 
    tidyr::spread(seurat_clusters, n) %>% 
    kableExtra::kable("html") %>% as.character() %>% IRdisplay::display_html()

In [None]:
# Rmove cluster wich have no evidence in replicates 
so <- subset(so, subset=seurat_clusters!="21")

In [None]:
# Set default assay
DefaultAssay(so) <- "RNA"

In [None]:
dplot(so, reduction="umap", group_by="seurat_clusters", alpha=0.5, shuffle=TRUE, label=TRUE)

In [None]:
annotation <- data.frame(
    
    seurat_clusters=c(
        
        "10",
        "6",
        "18",
        "16",
        "4",
        "14",
        "8",
        "20",
        "19",

        "17",
        "15",
        
        "2_3",
        "2_2",
        "2_1",
        "2_0",
        
        "13", 
        "5", 
        "12", 
        "11", 
        "9", 
        "7", 
        "3", 
        "0", 
        "1"
        
    ), 
    
    cell_type_fine=c(
        
        "cDC1",  
        "cDC2 I", 
        "cDC2 II",
        "ncMo I", 
        "ncMo II", 
        "ncMo III", 
        "cMo",
        "PreRPM", 
        "RPM", 

        "MP",
        "Granulocyte",

        "MegP",
        "MEP I",
        "MEP II",
        "MEP III",
        "ProEB I",
        "ProEB II",
        "ProEB III",
        "ProEB IV",
        "EB I",
        "EB II",
        "EB III",
        "EB IV",
        "EB V"
        
    ), 
    
    cell_type_main=c(
        
        "cDC1",  
        "cDC2", 
        "cDC2",
        "ncMo",
        "ncMo",
        "ncMo", 
         
        "cMo",
        "PreRPM", 
        "RPM", 

        "MP",
        "Granulocyte",
        
        "MegP",
        "MEP",
        "MEP",
        "MEP",
        "ProEB",
        "ProEB",
        "ProEB",
        "ProEB",
        "EB",
        "EB",
        "EB",
        "EB",
        "EB"
        
    )

)

In [None]:
so <- AddMetaData(so, dplyr::left_join(so@meta.data[, c("cell_id", "seurat_clusters"), drop=FALSE], annotation, by=join_by(seurat_clusters)) %>% tibble::column_to_rownames("cell_id"))

In [None]:
so$cell_type_fine <- factor(so$cell_type_fine, levels=annotation$cell_type_fine)
so$cell_type_main <- factor(so$cell_type_main, levels=unique(annotation$cell_type_main))

# Annotate cell types

In [None]:
options(repr.plot.width=30, repr.plot.height=10)
source("plotting_global.R")
dplot_1 <- dplot(so, reduction="umap", group_by="seurat_clusters", alpha=0.5, label=FALSE, shuffle=TRUE)
dplot_2 <- dplot(so, reduction="umap", group_by="cell_type_fine", alpha=0.5, label=FALSE, shuffle=TRUE) + scale_color_manual(values=color$cell_type_fine)
dplot_3 <- dplot(so, reduction="umap", group_by="cell_type_main", alpha=0.5, label=FALSE, shuffle=TRUE) + scale_color_manual(values=color$cell_type_main)

dplot_1 + dplot_2 + dplot_3 + patchwork::plot_layout(ncol=3)

# Full count matrix

In [None]:
meta <- read.csv("data/BSA_0355_SM01_10x_SPLENO/BSA_0355_SM01_10x_SPLENO.csv")

In [None]:
import_cnt <- function(sample_meta, cnt_type){
  
    print(paste("import:", paste0(sample_meta$sample_path, "/", cnt_type)))
  
    # Read 10x data in as sparse matrix (sm)
    sm_10x <- Read10X(data.dir=paste0(sample_meta$sample_path, "/", cnt_type))

    # Transform sparse matrix to seurat object (so)
    so_10x <- CreateSeuratObject(counts=sm_10x, min.cells=0, min.features=0)

    # Add meta data 
    so_10x$sample_name <- sample_meta$sample_name
    so_10x$sample_rep <- sample_meta$sample_rep
    so_10x$tissue <- sample_meta$tissue
    so_10x$treatment <- sample_meta$treatment
    so_10x$sample_group <- paste0(so_10x$treatment, "_", so_10x$sample_rep)
    so_10x$sample_path <- sample_meta$sample_path
    so_10x$sample_dir <- sample_meta$sample_dir

    so_10x$orig.ident <- NULL
    so_10x <- SetIdent(so_10x, value="sample_name")

    return(so_10x)
  
}

so_cnt <- lapply(split(meta, f=meta$sample_path), import_cnt, cnt_type="filtered_feature_bc_matrix")
so_cnt <- merge(x=so_cnt[[1]], y=so_cnt[2:length(so_cnt)])
so_cnt$cell_id <- colnames(so_cnt)
so_cnt <- subset(so_cnt, subset=cell_id %in% so$cell_id)

In [None]:
# Get count matrix
cnt <- GetAssayData(so_cnt, assay="RNA", slot="counts")
cnt <- cnt[, colnames(so)]
cnt <- cnt[rowSums(cnt>0)>=1, ]

In [None]:
# Create new Seurat Object for downstream analysis 
meta <- so@meta.data
pca <- so@reductions$pca@cell.embeddings
umap <- so@reductions$umap@cell.embeddings

In [None]:
so <- CreateSeuratObject(counts=cnt, meta.data=meta, assay="RNA", project="FD20200109SPLENO", min.cells=0, min.features=0)
so[["pca"]] <- CreateDimReducObject(embeddings=as.matrix(pca), key="PCA_")
so[["umap"]] <- CreateDimReducObject(embeddings=as.matrix(umap), key="UMAP_")

In [None]:
so <- NormalizeData(so)

# Marker genes

In [None]:
feature_select <- function(so, cnt_min=3, cell_min=3) {
    
    cnt <- GetAssayData(so, assay="RNA", layer="counts")
    cnt <- cnt[rowSums(cnt>=cnt_min)>=cell_min, ]

    so <- CreateSeuratObject(counts=cnt, meta.data=so@meta.data)
    
    return(so)
    
}

In [None]:
wilcox_marker <- function(so, ident, assay="RNA", slot="data") {
    
    data <- GetAssayData(so, assay=assay, slot=slot)
    ident <- so[[ident, drop=TRUE]]

    res <- presto::wilcoxauc(data, ident) 
    
    res <- res %>% 
        rename(
        
            gene=feature,
            avg_log2FC=logFC,
            p_val=pval,
            p_val_adj=padj,
            pct.1=pct_in,
            pct.2=pct_out
  
        )
    
    res$pct.1 <- res$pct.1/100
    res$pct.2 <- res$pct.2/100

    res <- split(res, f=res$group)
    
    return(res)
}

In [None]:
vp <- function(dea, log2_thr=1, adj_pval_thr=0.05, top_label=10, title=NULL, conserved=FALSE, color_pos=c("pos"="#0000ffff"), color_neg=c("neg"="#fd8008ff")) {
    
    # Set rownames to genes
    if("gene" %in% colnames(dea)) {rownames(dea) <- dea$gene}
    
    # Annotate entries significance by log2_thr and adj_pval_thr
    dea$p_val_adj <- ifelse(dea$p_val_adj == 0, min(dea$p_val_adj), dea$p_val_adj)
    dea$sig <- ifelse((abs(dea$avg_log2FC) >= log2_thr) & (dea$p_val_adj <= adj_pval_thr), "s", "ns")
    
    # Set color based on significance and direction of dea e.g. positive and negative 
    dea$color <- ifelse(dea$sig == "s" & dea$avg_log2FC > 0, names(color_pos), "n.s.")
    dea$color <- ifelse(dea$sig == "s" & dea$avg_log2FC < 0, names(color_neg), dea$color)
    
    color <- c(color_pos, "gray", "black", color_neg)
    names(color) <- c(names(color_pos), "n.s.", "black", names(color_neg))
    
    # Create labels based log2FC and p_val_adj
    dea_pos <- dea[dea$avg_log2FC > 0 & dea$sig == "s", ]
    dea_neg <- dea[dea$avg_log2FC < 0 & dea$sig == "s", ]

    pos_labels_log2FC <- dea_pos[rev(order(dea_pos$avg_log2FC)), ][1:top_label, ] %>% rownames()
    neg_labels_log2FC <- dea_neg[order(dea_neg$avg_log2FC), ][1:top_label, ] %>% rownames()
    
    pos_labels_p_val_adj <- dea_pos[order(dea_pos$p_val_adj), ][1:top_label, ] %>% rownames()
    neg_labels_p_val_adj <- dea_neg[order(dea_neg$p_val_adj), ][1:top_label, ] %>% rownames()
    
    pos_labels <- c(pos_labels_log2FC, pos_labels_p_val_adj)
    neg_labels <- c(neg_labels_log2FC, neg_labels_p_val_adj)
    
    # Set labels 
    dea$label <- ifelse(rownames(dea) %in% c(pos_labels, neg_labels), rownames(dea), NA)

    # Plot
    volcano_plot <- ggplot(dea, aes(x=pct.1, y=avg_log2FC, fill=dea$color, label=label), alpha=1) + 
    
        geom_point(size=4, shape=21, color="white") + 
        geom_hline(aes(yintercept=0), linetype="dotted", colour="black") +
        ggrepel::geom_text_repel(segment.color="black", force=20, force_pull=1, max.overlaps=getOption("ggrepel.max.overlaps", default=100), size=5, alpha=1, guide="none", segment.size=0.1, color='black') + 
        ylim(-max(abs(dea$avg_log2FC)), max(abs(dea$avg_log2FC))) +  
        xlim(0, 1) + 
        ggtitle(title) + xlab("expression reference [ratio]") + ylab("average log2FC") + 
        scale_fill_manual(values=color, name="DEA") + 
    
        guides(
            
            color=guide_legend(order=1, title="Group", size=2, keywidth=0.75, keyheight=0.75), 
            alpha="none"
            
        ) + 
    
    theme(
        
        legend.position="right", 
        aspect.ratio=1
        
    )
    
    return(volcano_plot)
    
}

In [None]:
res <- wilcox_marker(feature_select(so), "cell_type_fine", assay="RNA", slot="data")
res <- res[levels(so$cell_type_fine)]

In [None]:
options(repr.plot.width=4*6, repr.plot.height=6)

vp_1 <- lapply(names(res), function(i) vp(res[[i]], title=i, log2_thr=0, adj_pval_thr=0.01) + theme_global_set(size_select=1))
ggpubr::ggarrange(plotlist=vp_1, ncol=4, common.legend=TRUE, legend="bottom") %>% print()

# Marker expression 

In [None]:
cell_type_marker <- data.frame(
    
    gene=c(
        
        "Kit", "Procr",
        "H2-D1", "H2-Ab1", 
        "Cd8a", "Clec9a", "Flt3", "Zbtb46", "Cd4", "Ccr7", 
        "Ccr2", "Ly6c2", "Itgam", "Cx3cr1", "Itgal", # Cd11b:Itgam, Cx3cr1:ncMo, Fcgr3:nMo in red pulp
        "Siglec1", "Spic", "Adgre1", # Cd169:Siglec1
        "Elane", "Gfi1", 
        "Prg2", "Prg3",  
        "Prss34", "Mcpt8", 
        "Gzmb", "Cma1", 
        "Pf4", "Itga2b",
        "Gata2", "Gata1", "Klf1", "Epor"
        
    ), 

    cell_type=c(

        rep("Prog.", 2),
        rep("MHC", 2), 
        rep("DC", 6), 
        rep("Mono", 5), 
        rep("RPM", 3),
        rep("Neu", 2),
        rep("Eo", 2),
        rep("Baso", 2),
        rep("Mast", 2), 
        rep("Meg", 2),
        rep("Ery", 4)
    
    )

)

In [None]:
options(repr.plot.width=13, repr.plot.height=10)

dp_feature(so, cell_type_marker$gene, group_by="cell_type_fine", group_by_order=rev(levels(so$cell_type_fine)), split=cell_type_marker$cell_type, split_order=unique(cell_type_marker$cell_type), range_max=6, scale=TRUE)

# Save results 

In [None]:
saveRDS(so, so_pp_file)

In [None]:
umap <- so@reductions$umap@cell.embeddings %>% as.matrix()

In [None]:
# Store data as h5ad 
ad <- import("anndata", as="ad", convert=FALSE)
pd <- import("pandas", as="pd", convert=FALSE)
np <- import("numpy", as="np", convert=FALSE)
    
# Transform dgCMatrix to sparse sc_sparse matrix
X <- GetAssayData(so, assay="RNA", slot="counts")    
X <- ad$AnnData(X=X)$X$T

adata <- ad$AnnData(X=X, obs=so@meta.data)
adata$var_names <- rownames(GetAssayData(so, assay="RNA", slot="counts"))

adata$obsm <- list(X_umap=umap)

adata$raw <- adata
adata$write_h5ad(h5ad_pp_file)

# Session info

In [None]:
sessionInfo()