# Figure 1 

In [4]:
options(warn=-1)

In [5]:
library_load <- suppressMessages(
    
    suppressWarnings(
        
        list(
        
            # Seurat 
            library(Seurat), 

            # Data 
            library(tidyverse), 
            library(data.table), 

            # miloR
            library(miloR), 
            library(ggbeeswarm), 

            # Plotting 
            library(ggplot2), 
            library(patchwork), 
            library(cowplot), 
            library(ComplexHeatmap), 

            # Pyhton compatibility
            library(reticulate)

        )
    )
)

In [6]:
# Configure reticulate 
# use_condaenv(condaenv="p.3.10.16-FD20200109SPLENO", conda="/nobackup/peer/fdeckert/miniconda3/bin/conda", required=NULL)
# py_config()

In [7]:
random_seed <- 42
set.seed(random_seed)

In [8]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [9]:
# Plotting Theme
source("plotting_global.R")
ggplot2::theme_set(theme_global_set(size_select=4)) # From project global source()

In [10]:
# Helper functions
source("bin/so_pl.R")

source("bin/dea_pp.R")
source("bin/dea_pl.R")

source("bin/gsea_pp.R")
source("bin/gsea_pl.R")

source("bin/dp_feature.R")
source("bin/grn_pl.R")

# Import data 

In [11]:
so <- readRDS("data/scRNAseq/object/pp_0.rds")
so <- NormalizeData(so)

Normalizing layer: counts



In [12]:
so$sample_group <- factor(so$sample_group, levels=c("Bl6_NaCl_D6", "Bl6_CpG_D6"))

# Compute percentage primitive and definitive hemoglobin 

In [13]:
so <- PercentageFeatureSet(
    so,
    features = c("Hba-x", "Hbb-y", "Hbb-bh1"),
    col.name="pct_counts_primitive_HB", 
    assay = "RNA"
)

In [14]:
so <- PercentageFeatureSet(
    so,
    features = c("Hba-a1", "Hba-a2", "Hbb-bt", "Hbb-bs"),
    col.name="pct_counts_definitive_HB", 
    assay = "RNA"
)

# UMAP leiden celltype low

In [15]:
dp_1 <- dplot(so, group_by="celltype_low", alpha=1.0, pt_size=0.25, shuffle=TRUE, size_select=4, label=TRUE, label_box=TRUE, label_size=6) + 

    scale_color_manual(values=color$celltype_low) + 
    scale_fill_manual(values=color$celltype_low) + 
    guides(color=guide_legend(ncol=1, override.aes=list(alpha=1, size=1.5), keywidth=0, keyheight=0.25, default.unit="cm")) + 
    theme(
    
        legend.text.align=0
    
    ) 

[1m[22mScale for [32mfill[39m is already present.
Adding another scale for [32mfill[39m, which will replace the existing scale.


In [16]:
pdf("result/figures/figure_1/umap_celltype_low.pdf", width=3.5, height=2.5)

dp_1

dev.off()

# UMAP FACS

In [17]:
so$facs <- factor(so$facs, levels=c("Progenitor", "Myeloid"))

In [18]:
dp_1 <- dplot(so, group_by="facs", alpha=1.0, pt_size=0.25, shuffle=FALSE, size_select=4, label=FALSE, label_box=FALSE, label_size=6) + 

    scale_color_manual(values=color$facs) + 
    scale_fill_manual(values=color$facs) + 
    guides(color=guide_legend(ncol=1, override.aes=list(alpha=1, size=1.5), keywidth=0, keyheight=0.25, default.unit="cm")) + 
    theme(
    
        legend.text.align=0
    
    ) 

In [19]:
pdf("result/figures/figure_1/umap_facs.pdf", width=3.5, height=2.5)

gridExtra::grid.arrange(
    
    dp_1 %>% egg::set_panel_size(., width=unit(2.5, "cm"), height=unit(2.5, "cm"))

)

dev.off()

# UMAP Ctr vs CpG

In [20]:
so_tmp <- so

In [21]:
color_tmp <- color$sample_group
color_tmp["Bl6_CpG_D6"] <- "#7F7F7F"

In [22]:
data <- cbind(so_tmp[["umap"]]@cell.embeddings, so_tmp@meta.data)
data <- data[data$sample_group=="Bl6_NaCl_D6", ]

In [23]:
dp_1 <- dplot(so_tmp, group_by="sample_group", alpha=1, pt_size=0.25, shuffle=FALSE, size_select=4, order=c("Bl6_NaCl_D6", "Bl6_CpG_D6")) + 
    scale_color_manual(values=color_tmp) + 
    guides(color=guide_legend(ncol=1, override.aes=list(alpha=1, size=1.5), keywidth=0, keyheight=0.25, default.unit="cm")) + 
    stat_density_2d(data=data, aes(x=UMAP_1, y=UMAP_2), color="black", size=0.2, adjust=0.5, n=100, contour=TRUE) + 
    theme(legend.text.align=0) 

In [24]:
pdf("result/figures/figure_1/umap_wt_nacl_d6.pdf", width=2.5, height=2.5)

gridExtra::grid.arrange(
    
    dp_1 %>% egg::set_panel_size(., width=unit(2.5, "cm"), height=unit(2.5, "cm"))

)

dev.off()

In [25]:
so_tmp <- so

In [26]:
color_tmp <- color$sample_group
color_tmp["Bl6_NaCl_D6"] <- "#7F7F7F"

In [27]:
data <- cbind(so_tmp[["umap"]]@cell.embeddings, so_tmp@meta.data)
data <- data[data$sample_group=="Bl6_CpG_D6", ]

In [28]:
dp_1 <- dplot(so_tmp, group_by="sample_group", alpha=1, pt_size=0.25, shuffle=FALSE, size_select=4, order=c("Bl6_CpG_D6", "Bl6_NaCl_D6")) + 
    scale_color_manual(values=color_tmp) + 
    guides(color=guide_legend(ncol=1, override.aes=list(alpha=1, size=1.5), keywidth=0, keyheight=0.25, default.unit="cm")) + 
    stat_density_2d(data=data, aes(x=UMAP_1, y=UMAP_2), color="black", size=0.2, adjust=0.5, n=100, contour=TRUE) + 
    theme(legend.text.align=0) 

In [29]:
pdf("result/figures/figure_1/umap_wt_cpg_d6.pdf", width=2.5, height=2.5)

gridExtra::grid.arrange(
    
    dp_1 %>% egg::set_panel_size(., width=unit(2.5, "cm"), height=unit(2.5, "cm"))

)

dev.off()

# Barplot FACS ratio per celltype

In [30]:
bp_1 <- ggplot(so@meta.data %>% dplyr::mutate(celltype_low=factor(celltype_low, levels=rev(levels(celltype_low)))), aes(x=celltype_low, fill=facs)) + 
    geom_bar(position="fill", width=0.8, color="black", size=0.1) + 
    scale_fill_manual(values=color$facs) + 
    ggtitle("") + xlab("") + ylab("Celltype [ratio]") + 
    coord_flip() +
    theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1)) + 
    guides(fill=guide_legend(ncol=1, override.aes=list(alpha=1, size=1.5), keywidth=0.25, keyheight=0.25, default.unit="cm"))

In [31]:
pdf("result/figures/figure_1/bp_facs_celltype_low.pdf", width=2.5, height=2.5)

bp_1

dev.off()

# Barplot celltype ratio  

In [32]:
so$celltype_high <- ifelse(so$celltype_low %in% c("MEP", "Proerythroblast", "Erythroblast"), "E", ifelse(so$celltype_low %in% c("MegP", "GMP", "NeuP", "BasoP", "MastP"), "P", "M"))

In [33]:
so$sample_group_rep <- factor(so$sample_group_rep, levels=c("Bl6_NaCl_D6_Rep1", "Bl6_NaCl_D6_Rep2", "Bl6_CpG_D6_Rep1",  "Bl6_CpG_D6_Rep2" ))

In [34]:
bp_1 <- ggplot(so@meta.data[so$celltype_high=="E", ], aes(x=sample_group_rep, fill=celltype_low)) + 
    geom_bar(position="fill", width=0.8, color="black", size=0.1) + 
    scale_fill_manual(values=color$celltype_low) + 
    ggtitle("") + xlab("") + ylab("Celltype [ratio]") + 
    facet_wrap(~celltype_high, scales="free", ncol=1) + 
    scale_x_discrete(labels=c("Rep1", "Rep2", "Rep1", "Rep2")) + 
    theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1)) + 
    guides(fill=guide_legend(ncol=1, override.aes=list(alpha=1, size=1.5), keywidth=0.25, keyheight=0.25, default.unit="cm"))

bp_2 <- ggplot(so@meta.data[so$celltype_high=="P", ], aes(x=sample_group_rep, fill=celltype_low)) + 
    geom_bar(position="fill", width=0.8, color="black", size=0.1) + 
    scale_fill_manual(values=color$celltype_low) + 
    ggtitle("") + xlab("") + ylab("Celltype [ratio]") + 
    facet_wrap(~celltype_high, scales="free", ncol=1) + 
    scale_x_discrete(labels=c("Rep1", "Rep2", "Rep1", "Rep2")) +
    theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1)) + 
    guides(fill=guide_legend(ncol=1, override.aes=list(alpha=1, size=1.5), keywidth=0.25, keyheight=0.25, default.unit="cm"))

bp_3 <- ggplot(so@meta.data[so$celltype_high=="M", ], aes(x=sample_group_rep, fill=celltype_low)) + 
    geom_bar(position="fill", width=0.8, color="black", size=0.1) + 
    scale_fill_manual(values=color$celltype_low) + 
    ggtitle("") + xlab("") + ylab("Celltype [ratio]") + 
    facet_wrap(~celltype_high, scales="free", ncol=1) + 
    scale_x_discrete(labels=c("Rep1", "Rep2", "Rep1", "Rep2")) +
    theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=1)) + 
    guides(fill=guide_legend(ncol=1, override.aes=list(alpha=1, size=1.5), keywidth=0.25, keyheight=0.25, default.unit="cm"))

In [35]:
pdf("result/figures/figure_1/bp_celltype_low.pdf", width=2, height=4)

gridExtra::grid.arrange(

    bp_1 %>% egg::set_panel_size(., width=unit(4*0.3, "cm"), height=unit(2.0, "cm")),
    bp_2 %>% egg::set_panel_size(., width=unit(4*0.3, "cm"), height=unit(2.0, "cm")),
    bp_3 %>% egg::set_panel_size(., width=unit(4*0.3, "cm"), height=unit(2.0, "cm")), ncol=1
        
)

dev.off()

# Celltype marker genes

In [36]:
marker_genes=data.frame(

    celltype=c(
        
        "Prog.", "Prog.", "Prog.", "Prog.", "Prog.",

        "NeuP", 
               
        "Baso.", "Baso.", "Baso.", "Baso.", 
               
        "Mast", "Mast", 

        "MegP", "MegP", 
               
        "Erythroid", "Erythroid", "Erythroid", "Erythroid", "Erythroid", "Erythroid", "Erythroid", "Erythroid", "Erythroid", 
               
        "Mono.", "Mono.", "Mono.", "Mono.", 
               
        "RPM", "RPM", "RPM", 
               
        "cDC", "cDC", "cDC", "cDC", "cDC", "cDC", "cDC"
               
    ), 
    
    genes=c(
        
        "Hlf", "Cd34", "Kit", "Meis1", "Spi1",  

        "Elane", 
            
        "Cebpa", "Mcpt8", "Clec12a", "Itga2", 
            
        "Mcpt4", "Cma1", 

        "Pf4", "Itga2b",
            
        "Gata2", "Gata1", "Klf1", "Icam4", "Epor", "Sox6", "Tfrc", "Alas2", "Slc4a1",
            
        "Ly6c2", "Ccr2", "Cx3cr1", "Itgam",
            
        "Adgre1", "Spic", "Cd163",  
            
        "Sirpa", "Flt3", "Xcr1", "Cd8a", "Cd4", "Cd209a", "Ccr7"
    
    )
    
)

In [37]:
dp_1 <- dp_feature(so, marker_genes$genes, group_by="celltype_low", group_by_order=rev(levels(so$celltype_low)), split=marker_genes$celltype, split_order=unique(marker_genes$celltype), range_max=2.5) + 
    scale_y_discrete(labels=rev(levels(so$celltype_low))) + theme_global_set(4) + 

        theme(
            legend.position="bottom", 
            panel.spacing=unit(0.1, "lines"), 
            axis.text.x=element_text(angle=90, vjust=0.5, hjust=1, face="italic"),
            axis.text.y=element_text(angle=0, vjust=0.5, hjust=1), 
            strip.text.x=element_text(angle=90, vjust=0.5, hjust=0), 
            strip.text=element_text(margin=margin(0.1, 0.1, 0.1, 0.1, "lines")), 
            strip.background=element_rect(fill="transparent", color=NA), 
            legend.key.size=unit(0.25, "cm"), 
            legend.key.height=unit(0.25, "cm"), 
            legend.key.width=unit(0.25, "cm")
            
        )

[1m[22m`summarise()` has grouped output by 'celltype_low'. You can override using the
`.groups` argument.
[1m[22mScale for [32mfill[39m is already present.
Adding another scale for [32mfill[39m, which will replace the existing scale.


In [38]:
pdf("result/figures/figure_1/dp_celltype_low_marker_genes.pdf", width=4.0, height=3.2)

dp_1

dev.off()

# Celltype abundance test with milo (NaCl D6 vs CpG D6 WT)

In [39]:
# Import Milo object
milo <- readRDS(paste0("data/scRNAseq/milo/milo_k30.rds"))
results <- readRDS("data/scRNAseq/milo/resl.rds")
result <- results[[2]][["res_da"]][[1]]

In [40]:
alpha <- 0.05
group_by <- "celltype_low"

In [41]:
# Beeswarm 
data <- suppressMessages(plotDAbeeswarm(result, group.by=group_by, alpha=alpha))

In [42]:
data <- data$data %>%
    mutate(is_signif=ifelse(SpatialFDR < alpha, 1, 0)) %>%
    mutate(logFC_color=ifelse(is_signif==1, logFC, NA)) %>%
    arrange(group_by) %>%
    mutate(Nhood=factor(Nhood, levels=unique(Nhood))) %>%
    mutate(pos_x=pos_x, pos_y=pos_y) 

In [43]:
y_breaks <- c(-floor(max(abs(data$logFC))), -floor(max(abs(data$logFC)))/2, 0, floor(max(abs(data$logFC)))/2, floor(max(abs(data$logFC))))
n_groups <- length(unique(data$group_by))

In [44]:
options(repr.plot.width=4, repr.plot.height=10)

dabp <- ggplot(data, aes(pos_x, pos_y, color=logFC_color)) +
    scale_color_gradient2(low=color$sample_group["Bl6_NaCl_D6"], high=color$sample_group["Bl6_CpG_D6"]) + 
    guides(color="none") +
    xlab("") + ylab("Log Fold Change") + ggtitle("Differential abundance") + 
    scale_x_discrete(labels=setNames(levels(data$group_by), seq(1 ,n_groups))) + 
    scale_y_continuous(breaks=y_breaks, labels=y_breaks) +
    geom_hline(yintercept=0, size=0.1) + 
    geom_point(size=0.25) + 
    stat_summary(aes(x=group_by, y=logFC), fun.y=median, geom="point", size=0.5, color="black") +
    coord_flip() +
    ylim(-max(abs(data$logFC)), max(abs(data$logFC))) + 
    theme(axis.text.y=element_text(vjust=0.5, hjust=1))

[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.


In [45]:
pdf("result/figures/figure_1/dp_erythroid_milo.pdf", width=1.5, height=2.5)

dabp + theme_global_set(size_select=4)

dev.off()

# DEA 

# DEA overview 

In [46]:
adj_pval_thr <- 0.05
log2_thr <- 0.25

In [47]:
contrasts_vec <- c("Bl6_CpG_D6", "Bl6_NaCl_D6")
dea_res_1 <- readRDS(paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".rds"))

In [48]:
mat_1 <- lapply(dea_res_1, function(x) {x %>% dplyr::filter(p_val_adj <= adj_pval_thr & abs(avg_log2FC)>=log2_thr) %>% dplyr::pull(gene) %>% length}) %>% data.frame() %>% t()

In [49]:
pdf("result/figures/figure_1/hm_dea.pdf", width=1.5, height=3)

draw(dea_res_hm(mat_1, fontsize_select=2))

dev.off()

# Gene set enrichment analysis (GSEA)

## Gene Set M7

In [50]:
compute_gsea <- FALSE
celltype_select <- names(dea_res_1)

In [51]:
if(compute_gsea) {

    # Collapse gene set
    gene_set <- msigdbr::msigdbr(species="mouse", db_species="MM", category="M7", subcategory=NULL)
    gene_set <- gene_set[grep("_UP", gene_set$gs_name), ]
    gene_set$gs_name <- sapply(strsplit(gene_set$gs_name, "_"), function(x) {paste(x[max(1, length(x)-3):(length(x)-2)], collapse="_")})
    gene_set$gs_name <- gsub("CELL_|CD4_|CD8_|MONOCYTE_|MACROPHAGE_|TREG_|LANGERHANS_|NEUTROPHIL_|CDC1_|CDC2_|MIGDC_|ETAC_|GD_|ILC_|PDC_", "", gene_set$gs_name)
    gene_set <- gene_set %>% dplyr::select(gene_symbol, gs_name) %>% dplyr::distinct()

    gsea_res <- lapply(celltype_select, function(i) {
    
        gsea_res <- gsea(dea_res_1[[i]], gene_set=gene_set) %>% dplyr::mutate(col_split="Ctl vs D6", col_label=i)
    
        return(gsea_res)
        
    }
          )

    saveRDS(gsea_res, "result/gsea/scRNAseq/gsea_d6_vs_ctl.rds")
    
} else {

    gsea_res <- readRDS("result/gsea/scRNAseq/gsea_d6_vs_ctl.rds")
    
}

In [52]:
gsea_res <- do.call(rbind, gsea_res)

In [53]:
gs_name_select <- gsea_res %>% dplyr::filter(padj<=0.05 & abs(NES)>=1) %>% dplyr::pull(pathway)

In [54]:
mat_1 <- gsea_res %>% dplyr::filter(pathway %in% gs_name_select) %>% 

    tidyr::pivot_wider(
        
        id_cols=c(col_label, col_split),
        names_from=pathway,
        values_from=padj,
        values_fill=1
    
    )

In [55]:
mat_2 <- gsea_res %>% dplyr::filter(pathway %in% gs_name_select) %>% 

    tidyr::pivot_wider(
        
        id_cols=c(col_label, col_split),
        names_from=pathway,
        values_from=NES,
        values_fill=0
    
    )

In [56]:
row_split <- read.csv("data/reference/immune_dictionary/immune_dictionary.csv") %>% tibble::column_to_rownames("gs_name")
col_label <- mat_1$col_label
col_split <- mat_1$col_split

In [57]:
mat_1 <- mat_1 %>% dplyr::select(-col_label, -col_split) %>% t()
mat_2 <- mat_2 %>% dplyr::select(-col_label, -col_split) %>% t()

In [58]:
row_split <- row_split[rownames(mat_1), ]

In [59]:
rownames(mat_1) <- row_split$norm
rownames(mat_2) <- row_split$norm

In [60]:
row_split <- row_split %>% dplyr::select(norm, family) %>% dplyr::rename(row_split_label=family) %>% dplyr::distinct()
rownames(row_split) <- row_split$norm

In [61]:
col_label[col_label=="cDC.mig."] <- "cDC mig."

In [62]:
pdf("result/figures/figure_1/hm_gsea_d6_vs_ctl_m7.pdf", width=8, height=12)

gsea_hm(mat_1, mat_2, col_label=col_label, col_split=col_split, row_split=row_split, color_neg=color$sample_group["Bl6_NaCl_D6"], color_pos=color$sample_group["Bl6_CpG_D6"], fontsize_select=2)

dev.off()

## genotype comparison - HM

In [109]:
compute_gsea <- FALSE
celltype_select <- names(dea_res_1)

In [110]:
if(compute_gsea) {

    gsea_res <- lapply(celltype_select, function(i) {
    
        gsea_res <- gsea(dea_res_1[[i]], category="MH", subcategory=NULL, gene_set=NULL) %>% dplyr::mutate(col_split="Ctl vs D6", col_label=i)
    
        return(gsea_res)
        
    }
          )

    saveRDS(gsea_res, "result/gsea/scRNAseq/gsea_d6_vs_ctl_mh.rds")
    
} else {

    gsea_res <- readRDS("result/gsea/scRNAseq/gsea_d6_vs_ctl_mh.rds")
    
}

In [111]:
gsea_res <- do.call(rbind, gsea_res)

In [112]:
gs_name_select <- gsea_res %>% dplyr::filter(padj<=0.05 & abs(NES)>=1) %>% dplyr::pull(pathway)

In [113]:
gs_name_select <- c(
    
    "HALLMARK_E2F_TARGETS",
    "HALLMARK_MYC_TARGETS_V1",
    "HALLMARK_MYC_TARGETS_V2",
    "HALLMARK_HEME_METABOLISM",
    
    "HALLMARK_INTERFERON_ALPHA_RESPONSE",
    "HALLMARK_TNFA_SIGNALING_VIA_NFKB",
    
    "HALLMARK_OXIDATIVE_PHOSPHORYLATION"

) 

In [114]:
gsea_res$pathway <- factor(gsea_res$pathway, levels=gs_name_select)

In [115]:
mat_1 <- gsea_res %>% dplyr::filter(pathway %in% gs_name_select) %>% 

    tidyr::pivot_wider(
        
        id_cols=c(col_label, col_split),
        names_from=pathway,
        values_from=padj,
        values_fill=1
    
    )

In [116]:
mat_2 <- gsea_res %>% dplyr::filter(pathway %in% gs_name_select) %>% 

    tidyr::pivot_wider(
        
        id_cols=c(col_label, col_split),
        names_from=pathway,
        values_from=NES,
        values_fill=0
    
    )

In [119]:
mat_1 <- mat_1[c("col_label", "col_split", gs_name_select), ]
mat_2 <- mat_2[c("col_label", "col_split", gs_name_select), ]

In [120]:
col_label <- mat_1$col_label
col_split <- mat_1$col_split

In [121]:
mat_1 <- mat_1 %>% dplyr::select(-col_label, -col_split) %>% t()
mat_2 <- mat_2 %>% dplyr::select(-col_label, -col_split) %>% t()

In [122]:
col_label[col_label=="cDC.mig."] <- "cDC mig."

In [123]:
pdf("result/figures/figure_1/hm_gsea_d6_vs_ctl_mh.pdf", width=8, height=10)

gsea_hm(mat_1, mat_2, col_label=col_label, col_split=col_split, row_split=NULL, pathway_suffix="HALLMARK", color_neg=color$sample_group["Bl6_NaCl_D6"], color_pos=color$sample_group["Bl6_CpG_D6"], fontsize_select=2)

dev.off()

## genotype comparison - GO

In [None]:
compute_gsea <- FALSE
celltype_select <- names(dea_res_1)

In [None]:
if(compute_gsea) {

    gsea_res <- lapply(celltype_select, function(i) {
    
        gsea_res <- gsea_cp(dea_res_1[[i]]) %>% dplyr::mutate(col_split="Ctl vs D6", col_label=i)
    
        return(gsea_res)
        
    }
          )

    saveRDS(gsea_res, "result/gsea/scRNAseq/gsea_d6_vs_ctl_gobp.rds")
    
} else {

    gsea_res <- readRDS("result/gsea/scRNAseq/gsea_d6_vs_ctl_gobp.rds")
    
}

In [None]:
gsea_res <- do.call(rbind, gsea_res)

In [None]:
gs_name_select <- gsea_res %>% dplyr::filter(p.adjust<=0.05 & abs(NES)>=1) %>% dplyr::pull(ID)
gsea_res <- gsea_res %>% dplyr::filter(ID %in% gs_name_select)

In [None]:
gsea_res <- gsea_res[!gsea_res$Description %in% grep("T cell|B cell|CD4|CD8|lymphocyte|endothelial|epithelial", gsea_res$Description, value=TRUE), ]

In [None]:
if(compute_gsea) {

    gs_reduce <- gsea_cp_reduce(gsea_res)

    saveRDS(gs_reduce, "result/gsea/scRNAseq/gsea_reduce_d6_vs_ctl_gobp.rds")
    
} else {

    gs_reduce <- readRDS("result/gsea/scRNAseq/gsea_reduce_d6_vs_ctl_gobp.rds")
    
}

In [None]:
gsea_res <- gsea_res %>% dplyr::rename(pathway=Description, padj=p.adjust) %>% dplyr::filter(ID %in% gs_reduce$go)

In [None]:
gs_name_select <- gsea_res %>% dplyr::filter(padj<=0.05 & abs(NES)>=2) %>% dplyr::pull(ID)
gsea_res <- gsea_res %>% dplyr::filter(ID %in% gs_name_select)

In [None]:
mat_1 <- gsea_res %>% 

    tidyr::pivot_wider(
        
        id_cols=c(col_label, col_split),
        names_from=pathway,
        values_from=padj,
        values_fill=1
    
    )

In [None]:
mat_2 <- gsea_res %>% 

    tidyr::pivot_wider(
        
        id_cols=c(col_label, col_split),
        names_from=pathway,
        values_from=NES,
        values_fill=0
    
    )

In [None]:
col_label <- mat_1$col_label
col_split <- mat_1$col_split

In [None]:
mat_1 <- mat_1 %>% dplyr::select(-col_label, -col_split) %>% t()
mat_2 <- mat_2 %>% dplyr::select(-col_label, -col_split) %>% t()

In [None]:
row_split <- gs_reduce %>% dplyr::select(term, parentTerm) %>% dplyr::rename(row_split_label=parentTerm) %>% dplyr::distinct()
rownames(row_split) <- gs_reduce$term

In [None]:
col_label[col_label=="cDC.mig."] <- "cDC mig."

In [None]:
pdf("result/figures/figure_1/hm_gsea_d6_vs_ctl_gobp.pdf", width=10, height=20)

gsea_hm(mat_1, mat_2, col_label=col_label, col_split=col_split, row_split=row_split, pathway_suffix="WP", fontsize_select=2)

dev.off()

## Erythroid volcano plot

In [136]:
label_ery <- c(

    # DEV
    "Spi1", 

    # CpG group labels 
    "Hbb-a1", "Hba-a1", "Hbb-bt", "Hbb-bs", # Hemoglobin genes 
    "Alas2", "Trim10", # Hemoglobin biosynthesis and oxidative stress
    "Gbp2", # Hypoxia response gene but also inflammation 
    "H2-Q7", "H2-K1", "H2-D1", "Tap1", # MHC I, Tap1 is transporting peptides
    "Iigp1", # Essential for cell-autonomous resistance against intracellular pathogens, strongly IFN‑γ‑induced
    "Ifi47", # Critical for IFN‑γ‑mediated resistance to intracellular pathogens. Knockouts show enhanced susceptibility to infections, In hematopoiesis, Irgm1 (a related p47 GTPase) protects HSCs by dampening IFN-driven activation; Ifi47 may similarly modulate IFN signaling in the bone marrow
    "Isg20", # Antiviral, distinguishes non-self RNA
    "Ifi27", # Pro-apoptotic 
    "Irf7", 
    "Irf9", 
    "Ifi35", 
    "Isg15", 
    "Stat1", 
    "Ifitm2", "Ifitm3", # Membrane proteins induced by type I IFN, Block entry/fusion of enveloped viruses, IFITM3 also limits IFN‑β overproduction by promoting IRF3 degradation, acting in feedback regulation
    "Irf2bp2", # Transcriptional co-regulator binding IRF‑2’s repression domain; also IRF‑2 independent roles. Part of the LDB1 complex 
    "Oasl1", # Binds dsRNA (RNA/DNA), modulates IL-27 signaling. OASL1 inhibits translation of the type I interferon–regulating transcription factor IRF7 https://www.nature.com/articles/ni.2535
    "Mxd3", "Myc", # Generally, MXD proteins are functional antagonists of MYC, acting as transcriptional repressors to promote cell differentiation; however, MXD3 is an atypical member that has roles in cell cycle progression and cell proliferation rather than differentiation
    "E2f2", "E2f6", "E2f4", 
    'Igf1r', 'Akt3', 'Foxo3', "Eif4ebp1",  "Tsc22d1", "Pten", "Pik3cb", "Tsc22d2", # PI3K/Akt/mTOR
    "Gpc4", # Gpc4 modulates signaling of FGF, Wnt, and BMP pathways by stabilizing ligand–receptor interactions at the membrane.
    "Nfkb1", 
    "Apol11b", "Apol11a", # Plasmodium resistance
    "Cd24a", # Erythroid commitment 
    "Bcl2l1", # Survival 
    "Birc5", # Survival but missing evidence 
    "Car2", "Car1", "Ldb1", "Lmo2", # Erythroid differentation 
    "Mki67", "Top2a", # Proliferation 
    "Tfrc", # Transferrin Receptor 1 (TFRC) plays a crucial role in iron uptake and regulation

    # Ctr group labels 
    "Sox4", "Myb",  # MPP survival, might supress myeoloid differentiation. Myb also retains MPP state
    "Cd36", # Quiescence, self-renewal, and myeloid differentiation
    "Angpt1", # Helps maintain hematopoietic niche integrity
    "Lyl1", # Promotes progenitor proliferation, supports erythroid differentiation; blocks megakaryocytic and monocytic differentiation when overexpressed. 
    "Gfib", # Gfi-1B controls human erythroid and megakaryocytic differentiation by regulating TGF-β signaling at the bipotent erythro-megakaryocytic progenitor stage
    "Gzmb", "Mdga1", "Hes1", # Mast cell markers https://doi.org/10.1182/blood-2006-10-053124
    "Smo", # Is repressed by Hedhog signaling so that Gli1-3 get activate 
    "Vim"

)

In [137]:
options(repr.plot.width=20, repr.plot.height=10)

v_pl_1 <- v_pl(dea_res_1[["MEP"]], title="MEP", log2_thr=log2_thr, adj_pval_thr=adj_pval_thr, point_size=0.5, color_pos=unlist(color$sample_group["Bl6_CpG_D6"]), color_neg=unlist(color$sample_group["Bl6_NaCl_D6"]), label=label_ery, label_size=2, y_limit=3, aspect_ratio=0.8, top_label=100) + theme(legend.position="none") +  theme_global_set(size_select=4) 
v_pl_2 <- v_pl(dea_res_1[["Proerythroblast"]], title="ProEB", log2_thr=log2_thr, adj_pval_thr=adj_pval_thr, point_size=0.5, color_pos=unlist(color$sample_group["Bl6_CpG_D6"]), color_neg=unlist(color$sample_group["Bl6_NaCl_D6"]), label=label_ery, label_size=2, y_limit=3, aspect_ratio=0.8, top_label=100) + theme(legend.position="none") + theme_global_set(size_select=4) 
v_pl_3 <- v_pl(dea_res_1[["Erythroblast"]], title="EB", log2_thr=log2_thr, adj_pval_thr=adj_pval_thr, point_size=0.5, color_pos=unlist(color$sample_group["Bl6_CpG_D6"]), color_neg=unlist(color$sample_group["Bl6_NaCl_D6"]), label=label_ery, label_size=2, y_limit=3, aspect_ratio=0.8, top_label=50) + theme(legend.position="none") + theme_global_set(size_select=4) 

## Myeloid volcano plot

In [138]:
contrasts_vec <- c("Bl6_CpG_D6", "Bl6_NaCl_D6")
dea_res_1 <- readRDS(paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".rds"))

In [142]:
label_mac <- c(

    # RPM
    "Spic", "Zeb2", "Pparg", "Vcam", 
    "Itgax", "Zbtb46", "Batf", "Batf3", "Ciita", "H2-Aa", "H2-Ab1", "H2-Eb1", "Tap1", "Tap2", "Psmb8", "Psmb9", "Psmb10", "Ifi30", "Cd74", 
    "Slc40a1", 
    "Hmox1", 
    "Mertk", 
    "Abca1", 
    "Cd44", "Lrp1", "Igf1"

)

label_dc <- c(

    # cDC2
    "Ifitm1", "Ifi27", # Type I interferon stimulation and ativiral response
    "Psmb8", #  
    "Fcgr4", # Enhances immune complex uptake
    "Ccnb2", "Ube2c", "Prc1", "Birc5", # Proliferation and cDC maturation 
    "Zbtb46", "Sirpa", "Irf4", "cd209", "H2-M2", # cDC2 marker 
    "Spon1", # Extracellular matrix protein for cell adhesion and migration 
    "Htr7", # Serotonin receptor for cytokine (Il10, Il-12) production and migratory behavior 
    "H2-Q7", # MHC-I
    "Psmb8", # Antigen processing and MHC-I peptide loading
    "Ppp1r14a", # Migration 
    "Plac8", "Tmsb10", "Ly6a", # Generic activation marker shared with Mo and Mac
    "Cd52", # Mature DC marker 
    "Zfp521", "Zfp608", "Zbtb20", "Pid1", "Fbrsl1", # Transcriptional or epigenetic factors that could support development or maintenance of cDC2 identity
    "Fos", "Fosb", "Jun", "Egr1", "Stat4", "Il6st", # Classic immediate‑early transcription factors or signaling mediators. Their baseline upregulation suggests that cDC2 are in a pre‑activated or “alert” state, poised to respond quickly to CpG stimulation.
    "Zfp521" # MoDC Marker 
    
)

In [143]:
v_pl_4 <- v_pl(dea_res_1[["intMo"]], title="intMo", log2_thr=log2_thr, adj_pval_thr=adj_pval_thr, point_size=0.5, color_pos=unlist(color$sample_group["Bl6_CpG_D6"]), color_neg=unlist(color$sample_group["Bl6_NaCl_D6"]), label=label_mac, label_size=2, y_limit=3, aspect_ratio=0.8, top_label=5) + theme(legend.position="none") + theme_global_set(size_select=4)
v_pl_5 <- v_pl(dea_res_1[["RPM"]], title="RPM", log2_thr=log2_thr, adj_pval_thr=adj_pval_thr, point_size=0.5, color_pos=unlist(color$sample_group["Bl6_CpG_D6"]), color_neg=unlist(color$sample_group["Bl6_NaCl_D6"]), label=label_mac, label_size=2, y_limit=3, aspect_ratio=0.8, top_label=5) + theme(legend.position="none") + theme_global_set(size_select=4)
v_pl_6 <- v_pl(dea_res_1[["cDC2"]], title="cDC2", log2_thr=log2_thr, adj_pval_thr=adj_pval_thr, point_size=0.5, color_pos=unlist(color$sample_group["Bl6_CpG_D6"]), color_neg=unlist(color$sample_group["Bl6_NaCl_D6"]), label=label_dc, label_size=2, y_limit=3, aspect_ratio=0.8, top_label=5) + theme(legend.position="none") +  theme_global_set(size_select=4) 

In [144]:
pdf("result/figures/figure_1/v_pl.pdf", width=15, height=2)

gridExtra::grid.arrange(
    
    v_pl_1 %>% egg::set_panel_size(., width=unit(3, "cm"), height=unit(2.5, "cm")), 
    v_pl_2 %>% egg::set_panel_size(., width=unit(3, "cm"), height=unit(2.5, "cm")), 
    v_pl_3 %>% egg::set_panel_size(., width=unit(3, "cm"), height=unit(2.5, "cm")), 
    v_pl_4 %>% egg::set_panel_size(., width=unit(3, "cm"), height=unit(2.5, "cm")), 
    v_pl_5 %>% egg::set_panel_size(., width=unit(3, "cm"), height=unit(2.5, "cm")), 
    v_pl_6 %>% egg::set_panel_size(., width=unit(3, "cm"), height=unit(2.5, "cm")), ncol=6

)

dev.off()

# Expression UMAP

In [None]:
so <- NormalizeData(so, normalization.method="LogNormalize", scale.factor=1e4)

In [None]:
cell_id_thr <- so@reductions$umap@cell.embeddings %>% as.data.frame() %>% dplyr::filter(UMAP_1==min(UMAP_1) | UMAP_1==max(UMAP_1) | UMAP_2==min(UMAP_2) | UMAP_2==max(UMAP_2)) %>% rownames()

In [None]:
so_tmp <- so[, (so$celltype_low %in% c("GMP", "NeuP", "BasoP", "Basophil", "MastP", "MegP", "MEP", "Proerythroblast", "Erythroblast")) | so$cell_id %in% cell_id_thr]

In [None]:
pdf("result/figures/figure_1/umap_marker_genes.pdf", width=10, height=10)

    gridExtra::grid.arrange(

        fplot(so_tmp, features="pct_counts_definitive_HB", assay="RNA", slot="data", pt_size=0.25, size_select=4, max_cutoff=30) %>% egg::set_panel_size(., width=unit(2.5, "cm"), height=unit(2.5, "cm")), 
        fplot(so_tmp, features="pct_counts_primitive_HB", assay="RNA", slot="data", pt_size=0.25, size_select=4, max_cutoff=30) %>% egg::set_panel_size(., width=unit(2.5, "cm"), height=unit(2.5, "cm")), 
        fplot(so_tmp, features="Cd34", assay="RNA", slot="data", pt_size=0.25, size_select=4) %>% egg::set_panel_size(., width=unit(2.5, "cm"), height=unit(2.5, "cm")), 
        fplot(so_tmp, features="Meis1", assay="RNA", slot="data", pt_size=0.25, size_select=4) %>% egg::set_panel_size(., width=unit(2.5, "cm"), height=unit(2.5, "cm")), 
        fplot(so_tmp, features="Gata1", assay="RNA", slot="data", pt_size=0.25, size_select=4) %>% egg::set_panel_size(., width=unit(2.5, "cm"), height=unit(2.5, "cm")),
        fplot(so_tmp, features="Gata2", assay="RNA", slot="data", pt_size=0.25, size_select=4) %>% egg::set_panel_size(., width=unit(2.5, "cm"), height=unit(2.5, "cm")),
        fplot(so_tmp, features="Klf1", assay="RNA", slot="data", pt_size=0.25, size_select=4) %>% egg::set_panel_size(., width=unit(2.5, "cm"), height=unit(2.5, "cm")),
        fplot(so_tmp, features="Pf4", assay="RNA", slot="data", pt_size=0.25, size_select=4) %>% egg::set_panel_size(., width=unit(2.5, "cm"), height=unit(2.5, "cm")),
        fplot(so_tmp, features="Sox6", assay="RNA", slot="data", pt_size=0.25, size_select=4) %>% egg::set_panel_size(., width=unit(2.5, "cm"), height=unit(2.5, "cm")),
        fplot(so_tmp, features="Elane", assay="RNA", slot="data", pt_size=0.25, size_select=4) %>% egg::set_panel_size(., width=unit(2.5, "cm"), height=unit(2.5, "cm")),
        fplot(so_tmp, features="Prss34", assay="RNA", slot="data", pt_size=0.25, size_select=4) %>% egg::set_panel_size(., width=unit(2.5, "cm"), height=unit(2.5, "cm")),
        fplot(so_tmp, features="Cma1", assay="RNA", slot="data", pt_size=0.25, size_select=4) %>% egg::set_panel_size(., width=unit(2.5, "cm"), height=unit(2.5, "cm")), 
        fplot(so_tmp, features="Spi1", assay="RNA", slot="data", pt_size=0.25, size_select=4) %>% egg::set_panel_size(., width=unit(2.5, "cm"), height=unit(2.5, "cm")), 
            
        fplot(so_tmp, restrict=c("sample_group", "Bl6_NaCl_D6"), features="Gzmb", assay="RNA", slot="data", pt_size=0.25, size_select=4) %>% egg::set_panel_size(., width=unit(2.5, "cm"), height=unit(2.5, "cm")), 
        fplot(so_tmp, restrict=c("sample_group", "Bl6_CpG_D6"), features="Gzmb", assay="RNA", slot="data", pt_size=0.25, size_select=4) %>% egg::set_panel_size(., width=unit(2.5, "cm"), height=unit(2.5, "cm")),
            
        fplot(so_tmp, restrict=c("sample_group", "Bl6_NaCl_D6"), features="Hes1", assay="RNA", slot="data", pt_size=0.25, size_select=4) %>% egg::set_panel_size(., width=unit(2.5, "cm"), height=unit(2.5, "cm")), 
        fplot(so_tmp, restrict=c("sample_group", "Bl6_CpG_D6"), features="Hes1", assay="RNA", slot="data", pt_size=0.25, size_select=4) %>% egg::set_panel_size(., width=unit(2.5, "cm"), height=unit(2.5, "cm")),
            
        fplot(so_tmp, restrict=c("sample_group", "Bl6_NaCl_D6"), features="Egr1", assay="RNA", slot="data", pt_size=0.25, size_select=4) %>% egg::set_panel_size(., width=unit(2.5, "cm"), height=unit(2.5, "cm")), 
        fplot(so_tmp, restrict=c("sample_group", "Bl6_CpG_D6"), features="Egr1", assay="RNA", slot="data", pt_size=0.25, size_select=4, max_set=5) %>% egg::set_panel_size(., width=unit(2.5, "cm"), height=unit(2.5, "cm")), 
        
        ncol=4
    
    )

dev.off()