# Settings

In [1]:
# Load Reticulate function
Sys.setenv(RETICULATE_PYTHON="/home/luca/anaconda3/envs/reticulate/bin/python")
library(reticulate)
reticulate::use_python("/home/luca/anaconda3/envs/reticulate/bin/python")
reticulate::use_condaenv("/home/luca/anaconda3/envs/reticulate")
reticulate::py_module_available(module='anndata') #needs to be TRUE
reticulate::import('anndata') #good to make sure this doesn't error
reticulate::py_module_available(module='leidenalg') #needs to be TRUE
reticulate::import('leidenalg') #good to make sure this doesn't error

Module(anndata)

Module(leidenalg)

In [2]:
## Patch for annotations in R4.1
# BiocManager::install("Bioconductor/GenomeInfoDb",lib = "/home/luca/R/x86_64-pc-linux-gnu-library/4.1",force = TRUE)
# library(GenomeInfoDb,lib.loc="/home/luca/R/x86_64-pc-linux-gnu-library/4.1")

In [3]:
# Load packages
pacman::p_load(dplyr, stringr, data.table, tidyr, data.table, Matrix, tidyverse,
               hdf5r, Seurat, Signac,harmony, knitr, SoupX, fgsea,
               logr, parallel, DESeq2,
               ggplot2, ggpubr, ggrepel, ggbreak, gridExtra, patchwork, grid, ggh4x)

In [5]:
# Set options
options(stringsAsFactors = FALSE)
warnLevel <- getOption('warn')
options(warn = -1)
opts_chunk$set(tidy=TRUE)

In [6]:
# Set directories
assets.dir = "/nfs/lab/projects/COVID_mouse/assets/"
reference.dir = "/nfs/lab/projects/COVID_mouse/assets/BoneMarrowReference.rds"
snRNAseq.dir = "/nfs/lab/projects/COVID_mouse/seurat/5_clean.map/snRNAseq_mrg.clean.scTyped.rds"

reference.map.dir = "/nfs/lab/projects/COVID_mouse/5_clean.map/"
counts.dir = paste0(reference.map.dir, "Downstream_Files/RNA/perdonor/COUNTS/")
DESEQ.dir = paste0(reference.map.dir, "DESEQ2/")
GSEA.res.dir = paste0(reference.map.dir, "DESEQ2/GSEA/")


# DESeq2

In [7]:
# create metadata
samples = c("1_GFP1", "2_GFP2", "3_GFP3",
           "4_RBD1", "5_RBD2", "6_RBD3",
           "10_G1C1", "11_G1C2", "12_G1C3")

# R doesn't like columns that have a number, and will add an "X", will do the same here
samples = paste0("X", samples)
conditions = c("GFP", "GFP", "GFP",
             "RBD", "RBD", "RBD",
             "G1C", "G1C", "G1C")
meta = data.frame(donor = samples, condition = conditions)
meta

donor,condition
<chr>,<chr>
X1_GFP1,GFP
X2_GFP2,GFP
X3_GFP3,GFP
X4_RBD1,RBD
X5_RBD2,RBD
X6_RBD3,RBD
X10_G1C1,G1C
X11_G1C2,G1C
X12_G1C3,G1C


In [8]:
#Pseudobulk matrices directory
dir <- counts.dir
#Create outdir for results
outdir <- DESEQ.dir
# dir.create(outdir)
#Get list of pseudobulk files
files <- list.files(dir, pattern='_perdonor.gex_SoupX.RNA.counts')
#Cut off file suffices to get celltype names
cells <- gsub('_perdonor.gex_SoupX.RNA.counts','', files) 

In [9]:
files
cells

In [10]:
# List your contrasts  
conditions.1 = c("G1C", "RBD", "G1C") # disease
conditions.2 = c("GFP", "GFP", "RBD") # control
contrasts = c(paste0(conditions.1, "_vs_", conditions.2))

# Write your formual
deseq.formula <- as.formula('~  condition') # last variable is the testing variable

In [37]:
deseq.stats = NULL

for (i in seq_along(files)) {
    file.use = files[i]
    cell.use = cells[i]
    print(paste0("Analyzing: ", cell.use))
    raw_counts <- read.table(paste0(dir, file.use), header = TRUE, row.names = 1)
    # Filter for only samples with any counts in the cell type
    raw_counts = raw_counts[, (colSums(raw_counts != 0) > 0)]
    message("Donors detectected with celltype: ", ncol(raw_counts))
    meta_cell <- subset(meta, donor %in% colnames(raw_counts))
    rownames(meta_cell) = meta_cell$donor
    message("subsetting metadata accordingly - Check that value is equal to above: ", nrow(meta_cell))
    # Ensure that the column names of raw_counts are in the same order as the library identifiers in meta_cell
    meta_cell <- meta_cell[match(colnames(raw_counts), rownames(meta_cell)), ]
    
    for (c.i in seq_along(contrasts)) {
        contrast.use = contrasts[c.i]
        condition.1 = conditions.1[c.i]
        condition.2 = conditions.2[c.i]
        message("  - ", contrast.use)

        # Create vectors of samples for each condition
        samples.condition.1 <- meta_cell %>% 
                               dplyr::filter(condition %in% condition.1) %>% 
                               pull(donor)
        samples.condition.2 <- meta_cell %>% 
                               dplyr::filter(condition %in% condition.2) %>% 
                               pull(donor)
        
        # Subset meta_cell for the current contrast
        meta_cell.use <- meta_cell %>% 
                     dplyr::filter(donor %in% c(samples.condition.1, samples.condition.2)) 
        rownames(meta_cell.use) <- meta_cell.use$donor
        
        # Subset raw_counts for the donors in meta_cell
        raw_counts_subset <- raw_counts[, colnames(raw_counts) %in% rownames(meta_cell.use)]

        # Ensure that the column names of raw_counts are in the same order as the library identifiers in meta_cell
        meta_cell.use <- meta_cell.use[match(colnames(raw_counts_subset), rownames(meta_cell.use)), ]
        
        # Check if enough samples are present for both conditions
        if (length(unique(meta_cell$condition)) >= 2) {
            deseq.stats.tmp <- data.frame()
            
            # Light pre-filtering
            counts_filtered <- raw_counts_subset[rowSums(raw_counts_subset) >= 10,]

            # Run DESeq2
            dds <- DESeqDataSetFromMatrix(countData = round(counts_filtered),
                                          colData = meta_cell.use, 
                                          design = deseq.formula)
            dds$condition <- relevel(dds$condition, ref = condition.2)
            dds <- DESeq(dds)
            dds.results <- results(dds)
            dds.results <- na.omit(dds.results[order(dds.results$padj), ])

            # Write DESeq results to file
            write.table(dds.results, 
                        file = paste0(outdir, cell.use, "_", contrast.use, ".dds.res"), 
                        sep = '\t', quote = FALSE)
            
            # Collect summary statistics
            deseq.stats.tmp <- data.frame(
                celltype = cell.use,
                contrast = contrast.use,
                up_pval = sum(dds.results$log2FoldChange > 0 & dds.results$pvalue < 0.05),
                down_pval = sum(dds.results$log2FoldChange < 0 & dds.results$pvalue < 0.05),
                up_padj = sum(dds.results$log2FoldChange > 0 & dds.results$padj < 0.1),
                down_padj = sum(dds.results$log2FoldChange < 0 & dds.results$padj < 0.1))
                deseq.stats <- rbind(deseq.stats, deseq.stats.tmp)

                           # Make Volcano plot - Padj
                            res = as.data.frame(dds.results)
                            res$SYMBOL = rownames(res)
                            res = res %>%
                            dplyr::filter(!is.na(SYMBOL)) %>%
                            dplyr::filter(padj < 0.1) %>%
                            dplyr::group_by(sign(log2FoldChange)) %>%
                            dplyr::mutate(rank = abs(log2FoldChange) * -log10(padj)) %>%
                            dplyr::top_n(50, rank) %>%
                            dplyr::mutate(delabel = SYMBOL) %>%
                            ungroup() %>%
                            dplyr::select(delabel, SYMBOL) %>%
                            right_join(res) %>%
                            dplyr::mutate(delabel = if_else(is.na(delabel), "", as.character(delabel))) 

                            # Add a new column for color based on significance and direction
                            res$changes <- ifelse(res$log2FoldChange > 0 & res$padj < 0.1, "Up-regulated",
                                                ifelse(res$log2FoldChange < 0 & res$padj < 0.1, "Down-regulated", "NS"))
                            # Get the number of upregulated and downregulated genes
                            n_up <- sum(res$changes == "Up-regulated")
                            n_down <- sum(res$changes == "Down-regulated")

                            # Modify log2FoldChange values to be max x
                            res$log2FoldChange[res$log2FoldChange > 10] <- 10
                            res$log2FoldChange[res$log2FoldChange < -10] <- -10
                            # Modify padj values to be max x 
                            res$padj[res$padj < 10^-200] <- 10^-200
                    
                            plot = ggplot(res, aes(x=log2FoldChange, y=-log10(padj), label=delabel, color=changes)) +
                                theme_bw() +
                                labs(y="-log10(padj)", x="log2FoldChange", 
                                   title=paste(cell.use, " - ",
                                               condition.1, "Vs", condition.2, "\n",
                                              "Up: ", n_up, "-",
                                               "Down: ", n_down)) +
                                geom_point() +
                                scale_color_manual(values=c("blue", "grey", "red")) + 
                                theme(axis.text=element_text(size=16), axis.title=element_text(size=16, face="bold"),
                                    axis.text.x=element_text(), plot.title=element_text(size=18, face="bold", hjust=0.5)) +
                                scale_x_continuous(breaks=c(-10, -5, 0, 5, 10),
                                            labels=c("< -10", "-5", "0", "5", "> 10"), lim = c(-10, 10))+
                                scale_y_continuous(breaks=c(0, 20, 40, 60, 80, 100, 200),
                                            labels=c("0", "20", "40", "60", "80", "100", ">200"))+
                                geom_text_repel(label=res$delabel, size=3.5, max.overlaps=10,
                                              point.padding=0, min.segment.length=0,
                                              max.time=20, max.iter=1e5, box.padding=0.3)
                    
                            ggsave(filename = paste0(outdir, cell.use, "_", contrast.use, ".volcanoPlot.pdf"),
                                   height = 10, width = 10,
                                   plot = plot, device = "pdf")
        } else {
            message("Not enough samples for contrast: ", contrast.use)
        }
    }
}

[1] "Analyzing: B_cell"


Donors detectected with celltype: 9

subsetting metadata accordingly - Check that value is equal to above: 9

  - G1C_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - RBD_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - G1C_vs_RBD

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`


[1] "Analyzing: Dendritic_cells"


Donors detectected with celltype: 9

subsetting metadata accordingly - Check that value is equal to above: 9

  - G1C_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - RBD_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - G1C_vs_RBD

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`


[1] "Analyzing: Eo_Baso_prog."


Donors detectected with celltype: 9

subsetting metadata accordingly - Check that value is equal to above: 9

  - G1C_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - RBD_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - G1C_vs_RBD

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`


[1] "Analyzing: Ery_Mk_prog."


Donors detectected with celltype: 8

subsetting metadata accordingly - Check that value is equal to above: 8

  - G1C_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - RBD_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - G1C_vs_RBD

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`


[1] "Analyzing: Erythroblasts"


Donors detectected with celltype: 7

subsetting metadata accordingly - Check that value is equal to above: 7

  - G1C_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - RBD_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

-- note: fitType='parametric', but the dispersion trend was not well captured by the
   function: y = a/x + b, and a local regression fit was automatically substituted.
   specify fitType='local' or 'mean' to avoid this message next time.

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - G1C_vs_RBD

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estim

[1] "Analyzing: Gran_Mono_prog."


Donors detectected with celltype: 9

subsetting metadata accordingly - Check that value is equal to above: 9

  - G1C_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - RBD_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - G1C_vs_RBD

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`


[1] "Analyzing: large_pre-B."


Donors detectected with celltype: 9

subsetting metadata accordingly - Check that value is equal to above: 9

  - G1C_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - RBD_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - G1C_vs_RBD

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`


[1] "Analyzing: LMPPs"


Donors detectected with celltype: 9

subsetting metadata accordingly - Check that value is equal to above: 9

  - G1C_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - RBD_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - G1C_vs_RBD

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`


[1] "Analyzing: Mk_prog."


Donors detectected with celltype: 7

subsetting metadata accordingly - Check that value is equal to above: 7

  - G1C_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - RBD_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - G1C_vs_RBD

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`


[1] "Analyzing: Mono_prog."


Donors detectected with celltype: 9

subsetting metadata accordingly - Check that value is equal to above: 9

  - G1C_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - RBD_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - G1C_vs_RBD

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`


[1] "Analyzing: Monocytes"


Donors detectected with celltype: 9

subsetting metadata accordingly - Check that value is equal to above: 9

  - G1C_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - RBD_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - G1C_vs_RBD

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`


[1] "Analyzing: Neutro_prog."


Donors detectected with celltype: 9

subsetting metadata accordingly - Check that value is equal to above: 9

  - G1C_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - RBD_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - G1C_vs_RBD

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`


[1] "Analyzing: Neutrophils"


Donors detectected with celltype: 9

subsetting metadata accordingly - Check that value is equal to above: 9

  - G1C_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - RBD_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - G1C_vs_RBD

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`


[1] "Analyzing: NK_cells"


Donors detectected with celltype: 9

subsetting metadata accordingly - Check that value is equal to above: 9

  - G1C_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - RBD_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - G1C_vs_RBD

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`


[1] "Analyzing: pro-B"


Donors detectected with celltype: 9

subsetting metadata accordingly - Check that value is equal to above: 9

  - G1C_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - RBD_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - G1C_vs_RBD

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`


[1] "Analyzing: small_pre-B."


Donors detectected with celltype: 9

subsetting metadata accordingly - Check that value is equal to above: 9

  - G1C_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - RBD_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - G1C_vs_RBD

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`


[1] "Analyzing: T_cells"


Donors detectected with celltype: 9

subsetting metadata accordingly - Check that value is equal to above: 9

  - G1C_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - RBD_vs_GFP

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`
  - G1C_vs_RBD

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

[1m[22mJoining with `by = join_by(SYMBOL)`


In [38]:
deseq.stats

celltype,contrast,up_pval,down_pval,up_padj,down_padj
<chr>,<chr>,<int>,<int>,<int>,<int>
B_cell,G1C_vs_GFP,247,348,18,39
B_cell,RBD_vs_GFP,84,199,0,7
B_cell,G1C_vs_RBD,339,172,35,8
Dendritic_cells,G1C_vs_GFP,459,320,66,30
Dendritic_cells,RBD_vs_GFP,644,812,166,304
Dendritic_cells,G1C_vs_RBD,1321,1177,945,694
Eo_Baso_prog.,G1C_vs_GFP,101,111,3,3
Eo_Baso_prog.,RBD_vs_GFP,30,23,0,0
Eo_Baso_prog.,G1C_vs_RBD,75,78,4,1
Ery_Mk_prog.,G1C_vs_GFP,33,4,10,0


In [46]:
write.table(deseq.stats, 
            file = paste0(outdir, "DESEQ_summary.txt"), 
            sep = '\t', quote = FALSE)

# FGSEA

In [19]:
GSEA.dir = "/nfs/lab/Luca/Assets/Gene.Sets/mouse_categories/"

In [35]:
setwd(GSEA.dir)
gene.sets.ls = dir()

gene.set.db = NULL
for (i in seq_along(gene.sets.ls)){
    gene.set = gene.sets.ls[i]
    message("Processing ", gene.set)
    # Read in the GMT as a table, to get the name of the term
    gene.set.use = read.table(gene.set, sep = "\t", fill = TRUE)
    # Subset to just the first column
    gene.set.use = as.data.frame(gene.set.use[,1])
    colnames(gene.set.use) = "pathway"
    gene.set.use$term = as.factor(str_split_fixed(gene.set.use$pathway, pattern = "_", n = 2)[,2])
    gene.set.use$set = as.factor(str_split_fixed(gene.set.use$pathway, pattern = "_", n = 2)[,1])
            tmp = str_split_fixed(gene.set, pattern = "_", n = 2)[,2]
    gene.set.use$set.family = as.factor(str_split_fixed(tmp, pattern = "\\.", n = 2)[,1])
    gene.set.use$set.superfamily = as.factor(str_split_fixed(gene.set, pattern = "_", n = 2)[,1])

    gene.set.db = rbind(gene.set.db, gene.set.use)
}

gene.set.db$set.superfamily = factor(gene.set.db$set.superfamily, 
                                     levels = c("GO"))
gene.set.db$set.family = factor(gene.set.db$set.family, 
                                     levels = c("BP", "CC", "MF",
                                                "CP"))

Processing GO_BP.v2023.2.Mm.symbols.gmt.txt

Processing GO_CC.v2023.2.Mm.symbols.gmt.txt

Processing GO_CP.v2023.2.Mm.symbols.gmt.txt

Processing GO_MF.v2023.2.Mm.symbols.gmt.txt



In [40]:
# Load the pathways into a named list
GMT <- gmtPathways("/nfs/lab/Luca/Assets/Gene.Sets/msigdb.v2023.2.Mm.symbols.gmt")

In [41]:
#Pseudobulk matrices directory
dir <- DESEQ.dir
#Create outdir for results
outdir <- GSEA.res.dir
# dir.create(outdir)
#Get list of pseudobulk files
files <- list.files(dir, pattern='.dds.res')
files

In [45]:
# setup
tresh = 0.1
set.family.Keep = c("CP")

In [47]:
set.seed(999)

for (c.i in seq_along(contrasts)){
    contrast.use = contrasts[c.i]
        for (i in seq_along(files)){
            # load table
            file.use = files[i]
            cell.use = str_split_fixed(file.use, paste0("_", contrast.use), n = 2)[,1]
            De.analysis = paste0(cell.use, "_", contrast.use)
            print(paste0("Analyzing: ", De.analysis))
            res = read.table(paste0(dir,file.use), sep = '\t')

            # Formatting for FGSEA
            res$rank = res$stat
            res = data.frame("SYMBOL" = rownames(res),
                             "stat" = res$rank)
            res = res[!grepl(pattern = "NA", x = res$SYMBOL),]
            ranks <- deframe(res)
            ranks = sort(ranks, decreasing=TRUE)

            tryCatch({
            message("Running FGSEA")
            fgseaRes <- fgseaMultilevel(pathways=GMT,
                                        stats=ranks,
                                        minSize  = 10, 
                                        maxSize  = 500)
            message("Number of total enriched terms: ", nrow(fgseaRes))
            fgseaRes.tresh = fgseaRes[fgseaRes$padj < tresh,]
            message("Number of significant terms: ", nrow(fgseaRes.tresh))
            # Add categories
            fgseaRes.tresh = merge(fgseaRes.tresh, gene.set.db, by = "pathway")
            message("Double check that it's the same number of terms: ", nrow(fgseaRes.tresh))
            # Subset for GO families of interest
            fgseaRes.tresh.use = fgseaRes.tresh[fgseaRes.tresh$set.family %in% set.family.Keep, ]
            message("Number of significant terms: ", nrow(fgseaRes.tresh.use))
            # Collapse terms 
            collapsedPathways <- collapsePathways(fgseaRes.tresh.use[order(pval)][padj < tresh], 
                                                  GMT, ranks)
            mainPathways <- fgseaRes.tresh.use[pathway %in% collapsedPathways$mainPathways][
                                     order(-NES), pathway]
            fgseaResMain <- fgseaRes.tresh.use[match(mainPathways, pathway)]
            # Save them
            file = paste(outdir, "GSEA.CP_", De.analysis, ".res", sep = "")
            fwrite(fgseaRes.tresh.use, file = file, sep = "\t")
            # print plot
            plot = ggplot(fgseaResMain, aes(x = reorder(term, NES), y = NES)) +
              geom_col(aes(fill= NES>0)) +
              theme_bw()+
              labs(x="", y="Normalized Enrichment Score",
                   title= paste(De.analysis,
                                " \n Gene Ontologies")) + 
                theme(axis.text=element_text(size=10), axis.title=element_text(size=16, face="bold"),
                axis.text.x=element_text(), plot.title=element_text(size=16, face="bold", hjust=0.5)) +
              coord_flip() +
            facet_nested(rows = vars(set.family, set), drop = TRUE, scales = "free", space = "free")
            ggsave(filename = paste(outdir, "GSEA.CP_", De.analysis, ".pdf", sep = ""),
                                    height = 16, width = 20,
                                    plot = plot, device = "pdf")
                }, error = function(e) {
            message("Error or no results found for ", De.analysis, ": ", e$message)
        })
    }
}

[1] "Analyzing: B_cell_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9783

Number of significant terms: 1733

Double check that it's the same number of terms: 1025

Number of significant terms: 194



[1] "Analyzing: B_cell_G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9577

Number of significant terms: 1923

Double check that it's the same number of terms: 1155

Number of significant terms: 215



[1] "Analyzing: B_cell_RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9689

Number of significant terms: 703

Double check that it's the same number of terms: 467

Number of significant terms: 84



[1] "Analyzing: Dendritic_cells_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9736

Number of significant terms: 1959

Double check that it's the same number of terms: 1272

Number of significant terms: 230



[1] "Analyzing: Dendritic_cells_G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9585

Number of significant terms: 1258

Double check that it's the same number of terms: 918

Number of significant terms: 202



[1] "Analyzing: Dendritic_cells_RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9487

Number of significant terms: 1122

Double check that it's the same number of terms: 725

Number of significant terms: 189



[1] "Analyzing: Eo_Baso_prog._G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9464

Number of significant terms: 142

Double check that it's the same number of terms: 72

Number of significant terms: 3



[1] "Analyzing: Eo_Baso_prog._G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9455

Number of significant terms: 198

Double check that it's the same number of terms: 115

Number of significant terms: 19



[1] "Analyzing: Eo_Baso_prog._RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9192

Number of significant terms: 57

Double check that it's the same number of terms: 22

Number of significant terms: 7



[1] "Analyzing: Ery_Mk_prog._G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 3747

Number of significant terms: 70

Double check that it's the same number of terms: 39

Number of significant terms: 1



[1] "Analyzing: Ery_Mk_prog._G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 2258

Number of significant terms: 78

Double check that it's the same number of terms: 14

Number of significant terms: 1



[1] "Analyzing: Ery_Mk_prog._RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 6531

Number of significant terms: 325

Double check that it's the same number of terms: 196

Number of significant terms: 102



[1] "Analyzing: Erythroblasts_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 2530

Number of significant terms: 612

Double check that it's the same number of terms: 293

Number of significant terms: 33



[1] "Analyzing: Erythroblasts_G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 6101

Number of significant terms: 764

Double check that it's the same number of terms: 421

Number of significant terms: 117



[1] "Analyzing: Gran_Mono_prog._RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 8776

Number of significant terms: 268

Double check that it's the same number of terms: 190

Number of significant terms: 21



[1] "Analyzing: large_pre-B._G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9415

Number of significant terms: 784

Double check that it's the same number of terms: 541

Number of significant terms: 141



[1] "Analyzing: large_pre-B._G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 7445

Number of significant terms: 706

Double check that it's the same number of terms: 532

Number of significant terms: 101



[1] "Analyzing: large_pre-B._RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 4216

Number of significant terms: 626

Double check that it's the same number of terms: 434

Number of significant terms: 54



[1] "Analyzing: LMPPs_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9778

Number of significant terms: 339

Double check that it's the same number of terms: 192

Number of significant terms: 24



[1] "Analyzing: LMPPs_G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9758

Number of significant terms: 1694

Double check that it's the same number of terms: 1130

Number of significant terms: 209



[1] "Analyzing: LMPPs_RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9567

Number of significant terms: 1299

Double check that it's the same number of terms: 914

Number of significant terms: 168



[1] "Analyzing: Mk_prog._G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 7009

Number of significant terms: 361

Double check that it's the same number of terms: 250

Number of significant terms: 50



[1] "Analyzing: Mk_prog._G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 4276

Number of significant terms: 153

Double check that it's the same number of terms: 84

Number of significant terms: 13



[1] "Analyzing: Mk_prog._RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 5817

Number of significant terms: 69

Double check that it's the same number of terms: 38

Number of significant terms: 13



[1] "Analyzing: Mono_prog._G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 6513

Number of significant terms: 505

Double check that it's the same number of terms: 262

Number of significant terms: 49



[1] "Analyzing: Mono_prog._G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 6415

Number of significant terms: 255

Double check that it's the same number of terms: 144

Number of significant terms: 27



[1] "Analyzing: Mono_prog._RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 8694

Number of significant terms: 267

Double check that it's the same number of terms: 163

Number of significant terms: 53



[1] "Analyzing: Monocytes_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9968

Number of significant terms: 1661

Double check that it's the same number of terms: 1169

Number of significant terms: 212



[1] "Analyzing: Monocytes_G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9999

Number of significant terms: 1452

Double check that it's the same number of terms: 1008

Number of significant terms: 220



[1] "Analyzing: Monocytes_RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9909

Number of significant terms: 1595

Double check that it's the same number of terms: 935

Number of significant terms: 216



[1] "Analyzing: Neutro_prog._G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 1995

Number of significant terms: 141

Double check that it's the same number of terms: 71

Number of significant terms: 4



[1] "Analyzing: Neutro_prog._G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9314

Number of significant terms: 623

Double check that it's the same number of terms: 350

Number of significant terms: 75



[1] "Analyzing: Neutro_prog._RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 8544

Number of significant terms: 263

Double check that it's the same number of terms: 196

Number of significant terms: 30



[1] "Analyzing: Neutrophils_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9719

Number of significant terms: 3225

Double check that it's the same number of terms: 2507

Number of significant terms: 416



[1] "Analyzing: Neutrophils_G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9911

Number of significant terms: 3701

Double check that it's the same number of terms: 2675

Number of significant terms: 474



[1] "Analyzing: Neutrophils_RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 10392

Number of significant terms: 1253

Double check that it's the same number of terms: 650

Number of significant terms: 138



[1] "Analyzing: NK_cells_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9715

Number of significant terms: 812

Double check that it's the same number of terms: 483

Number of significant terms: 82



[1] "Analyzing: NK_cells_G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9724

Number of significant terms: 706

Double check that it's the same number of terms: 414

Number of significant terms: 89



[1] "Analyzing: NK_cells_RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9749

Number of significant terms: 179

Double check that it's the same number of terms: 105

Number of significant terms: 34



[1] "Analyzing: pro-B_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 8046

Number of significant terms: 1120

Double check that it's the same number of terms: 797

Number of significant terms: 207



[1] "Analyzing: pro-B_G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9338

Number of significant terms: 1357

Double check that it's the same number of terms: 981

Number of significant terms: 269



[1] "Analyzing: pro-B_RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 5191

Number of significant terms: 98

Double check that it's the same number of terms: 52

Number of significant terms: 15



[1] "Analyzing: small_pre-B._G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9076

Number of significant terms: 1951

Double check that it's the same number of terms: 1159

Number of significant terms: 234



[1] "Analyzing: small_pre-B._G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 8684

Number of significant terms: 1269

Double check that it's the same number of terms: 848

Number of significant terms: 115



[1] "Analyzing: small_pre-B._RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 8809

Number of significant terms: 893

Double check that it's the same number of terms: 679

Number of significant terms: 133



[1] "Analyzing: T_cells_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 6961

Number of significant terms: 483

Double check that it's the same number of terms: 306

Number of significant terms: 50



[1] "Analyzing: T_cells_G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 7425

Number of significant terms: 673

Double check that it's the same number of terms: 419

Number of significant terms: 93



[1] "Analyzing: T_cells_RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 8721

Number of significant terms: 1043

Double check that it's the same number of terms: 786

Number of significant terms: 130



[1] "Analyzing: B_cell_G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9783

Number of significant terms: 1732

Double check that it's the same number of terms: 1012

Number of significant terms: 196



[1] "Analyzing: B_cell_G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9577

Number of significant terms: 1871

Double check that it's the same number of terms: 1116

Number of significant terms: 207



[1] "Analyzing: B_cell_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9689

Number of significant terms: 759

Double check that it's the same number of terms: 508

Number of significant terms: 94



[1] "Analyzing: Dendritic_cells_G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9736

Number of significant terms: 1891

Double check that it's the same number of terms: 1240

Number of significant terms: 216



[1] "Analyzing: Dendritic_cells_G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9585

Number of significant terms: 1265

Double check that it's the same number of terms: 922

Number of significant terms: 200



[1] "Analyzing: Dendritic_cells_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9487

Number of significant terms: 1116

Double check that it's the same number of terms: 722

Number of significant terms: 185



[1] "Analyzing: Eo_Baso_prog._G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9464

Number of significant terms: 171

Double check that it's the same number of terms: 94

Number of significant terms: 8



[1] "Analyzing: Eo_Baso_prog._G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9455

Number of significant terms: 196

Double check that it's the same number of terms: 116

Number of significant terms: 16



[1] "Analyzing: Eo_Baso_prog._RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9192

Number of significant terms: 57

Double check that it's the same number of terms: 22

Number of significant terms: 7



[1] "Analyzing: Ery_Mk_prog._G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 3747

Number of significant terms: 89

Double check that it's the same number of terms: 47

Number of significant terms: 2



[1] "Analyzing: Ery_Mk_prog._G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 2258

Number of significant terms: 80

Double check that it's the same number of terms: 13

Number of significant terms: 1



[1] "Analyzing: Ery_Mk_prog._RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 6531

Number of significant terms: 347

Double check that it's the same number of terms: 204

Number of significant terms: 102



[1] "Analyzing: Erythroblasts_G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 2530

Number of significant terms: 660

Double check that it's the same number of terms: 331

Number of significant terms: 47



[1] "Analyzing: Erythroblasts_G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 7109

Number of significant terms: 808

Double check that it's the same number of terms: 466

Number of significant terms: 93



[1] "Analyzing: Erythroblasts_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 7606

Number of significant terms: 994

Double check that it's the same number of terms: 640

Number of significant terms: 180



[1] "Analyzing: Gran_Mono_prog._G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9368

Number of significant terms: 727

Double check that it's the same number of terms: 399

Number of significant terms: 125



[1] "Analyzing: Gran_Mono_prog._G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 6101

Number of significant terms: 769

Double check that it's the same number of terms: 427

Number of significant terms: 120



[1] "Analyzing: Gran_Mono_prog._RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 8776

Number of significant terms: 251

Double check that it's the same number of terms: 182

Number of significant terms: 22



[1] "Analyzing: large_pre-B._G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9415

Number of significant terms: 685

Double check that it's the same number of terms: 474

Number of significant terms: 130



[1] "Analyzing: large_pre-B._G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 7445

Number of significant terms: 669

Double check that it's the same number of terms: 502

Number of significant terms: 98



[1] "Analyzing: large_pre-B._RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 4216

Number of significant terms: 673

Double check that it's the same number of terms: 462

Number of significant terms: 55



[1] "Analyzing: LMPPs_G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9778

Number of significant terms: 337

Double check that it's the same number of terms: 189

Number of significant terms: 23



[1] "Analyzing: LMPPs_G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9758

Number of significant terms: 1593

Double check that it's the same number of terms: 1062

Number of significant terms: 195



[1] "Analyzing: LMPPs_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9567

Number of significant terms: 1331

Double check that it's the same number of terms: 939

Number of significant terms: 175



[1] "Analyzing: Mk_prog._G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 7009

Number of significant terms: 378

Double check that it's the same number of terms: 256

Number of significant terms: 51



[1] "Analyzing: Mk_prog._G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 4276

Number of significant terms: 161

Double check that it's the same number of terms: 89

Number of significant terms: 15



[1] "Analyzing: Mk_prog._RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 5817

Number of significant terms: 93

Double check that it's the same number of terms: 50

Number of significant terms: 21



[1] "Analyzing: Mono_prog._G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 6513

Number of significant terms: 406

Double check that it's the same number of terms: 207

Number of significant terms: 38

Number of total enriched terms: 6415

Number of significant terms: 256

Double check that it's the same number of terms: 145

Number of significant terms: 28



[1] "Analyzing: Mono_prog._RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 8694

Number of significant terms: 238

Double check that it's the same number of terms: 146

Number of significant terms: 48



[1] "Analyzing: Monocytes_G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9968

Number of significant terms: 1765

Double check that it's the same number of terms: 1230

Number of significant terms: 221



[1] "Analyzing: Monocytes_G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9999

Number of significant terms: 1513

Double check that it's the same number of terms: 1055

Number of significant terms: 221



[1] "Analyzing: Monocytes_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9909

Number of significant terms: 1551

Double check that it's the same number of terms: 913

Number of significant terms: 212



[1] "Analyzing: Neutro_prog._G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 1995

Number of significant terms: 132

Double check that it's the same number of terms: 65

Number of significant terms: 4



[1] "Analyzing: Neutro_prog._G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9314

Number of significant terms: 661

Double check that it's the same number of terms: 371

Number of significant terms: 78



[1] "Analyzing: Neutro_prog._RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 8544

Number of significant terms: 246

Double check that it's the same number of terms: 186

Number of significant terms: 25



[1] "Analyzing: Neutrophils_G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9719

Number of significant terms: 3207

Double check that it's the same number of terms: 2496

Number of significant terms: 414



[1] "Analyzing: Neutrophils_G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9911

Number of significant terms: 3452

Double check that it's the same number of terms: 2525

Number of significant terms: 452



[1] "Analyzing: Neutrophils_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 10392

Number of significant terms: 1101

Double check that it's the same number of terms: 562

Number of significant terms: 118



[1] "Analyzing: NK_cells_G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9715

Number of significant terms: 825

Double check that it's the same number of terms: 492

Number of significant terms: 84



[1] "Analyzing: pro-B_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 5191

Number of significant terms: 100

Double check that it's the same number of terms: 52

Number of significant terms: 14



[1] "Analyzing: small_pre-B._G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9076

Number of significant terms: 1909

Double check that it's the same number of terms: 1131

Number of significant terms: 231



[1] "Analyzing: small_pre-B._G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 8684

Number of significant terms: 1114

Double check that it's the same number of terms: 740

Number of significant terms: 98



[1] "Analyzing: small_pre-B._RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 8809

Number of significant terms: 863

Double check that it's the same number of terms: 652

Number of significant terms: 132



[1] "Analyzing: T_cells_G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 6961

Number of significant terms: 462

Double check that it's the same number of terms: 283

Number of significant terms: 45



[1] "Analyzing: T_cells_G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 7425

Number of significant terms: 771

Double check that it's the same number of terms: 477

Number of significant terms: 103



[1] "Analyzing: T_cells_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 8721

Number of significant terms: 995

Double check that it's the same number of terms: 754

Number of significant terms: 128



[1] "Analyzing: B_cell_G1C_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9783

Number of significant terms: 1567

Double check that it's the same number of terms: 917

Number of significant terms: 179



[1] "Analyzing: B_cell_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9577

Number of significant terms: 1723

Double check that it's the same number of terms: 1020

Number of significant terms: 190



[1] "Analyzing: B_cell_RBD_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9689

Number of significant terms: 758

Double check that it's the same number of terms: 508

Number of significant terms: 91



[1] "Analyzing: Dendritic_cells_G1C_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9736

Number of significant terms: 1870

Double check that it's the same number of terms: 1209

Number of significant terms: 215



[1] "Analyzing: Dendritic_cells_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9585

Number of significant terms: 1280

Double check that it's the same number of terms: 925

Number of significant terms: 205



[1] "Analyzing: Dendritic_cells_RBD_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9487

Number of significant terms: 1089

Double check that it's the same number of terms: 707

Number of significant terms: 183



[1] "Analyzing: Eo_Baso_prog._G1C_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9464

Number of significant terms: 147

Double check that it's the same number of terms: 76

Number of significant terms: 4



[1] "Analyzing: Eo_Baso_prog._G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9455

Number of significant terms: 162

Double check that it's the same number of terms: 95

Number of significant terms: 14



[1] "Analyzing: Eo_Baso_prog._RBD_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9192

Number of significant terms: 59

Double check that it's the same number of terms: 24

Number of significant terms: 7



[1] "Analyzing: Ery_Mk_prog._G1C_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 3747

Number of significant terms: 84

Double check that it's the same number of terms: 46

Number of significant terms: 2



[1] "Analyzing: Ery_Mk_prog._G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 2258

Number of significant terms: 75

Double check that it's the same number of terms: 12

Number of significant terms: 1



[1] "Analyzing: Ery_Mk_prog._RBD_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 6531

Number of significant terms: 370

Double check that it's the same number of terms: 215

Number of significant terms: 107



[1] "Analyzing: Erythroblasts_G1C_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 2530

Number of significant terms: 634

Double check that it's the same number of terms: 310

Number of significant terms: 36



[1] "Analyzing: Erythroblasts_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 7109

Number of significant terms: 852

Double check that it's the same number of terms: 499

Number of significant terms: 100



[1] "Analyzing: Erythroblasts_RBD_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 7606

Number of significant terms: 916

Double check that it's the same number of terms: 602

Number of significant terms: 173



[1] "Analyzing: Gran_Mono_prog._G1C_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9368

Number of significant terms: 700

Double check that it's the same number of terms: 387

Number of significant terms: 121



[1] "Analyzing: Gran_Mono_prog._G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 6101

Number of significant terms: 740

Double check that it's the same number of terms: 406

Number of significant terms: 118



[1] "Analyzing: Gran_Mono_prog._RBD_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 8776

Number of significant terms: 285

Double check that it's the same number of terms: 205

Number of significant terms: 26



[1] "Analyzing: large_pre-B._G1C_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9415

Number of significant terms: 778

Double check that it's the same number of terms: 529

Number of significant terms: 141



[1] "Analyzing: large_pre-B._G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 7445

Number of significant terms: 685

Double check that it's the same number of terms: 521

Number of significant terms: 98



[1] "Analyzing: large_pre-B._RBD_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 4216

Number of significant terms: 652

Double check that it's the same number of terms: 450

Number of significant terms: 52



[1] "Analyzing: LMPPs_G1C_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9778

Number of significant terms: 293

Double check that it's the same number of terms: 162

Number of significant terms: 22



[1] "Analyzing: LMPPs_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9758

Number of significant terms: 1635

Double check that it's the same number of terms: 1090

Number of significant terms: 197



[1] "Analyzing: LMPPs_RBD_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9567

Number of significant terms: 1352

Double check that it's the same number of terms: 946

Number of significant terms: 177



[1] "Analyzing: Mk_prog._G1C_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 7009

Number of significant terms: 364

Double check that it's the same number of terms: 250

Number of significant terms: 49



[1] "Analyzing: Mk_prog._G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 4276

Number of significant terms: 149

Double check that it's the same number of terms: 83

Number of significant terms: 13



[1] "Analyzing: Mk_prog._RBD_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 5817

Number of significant terms: 81

Double check that it's the same number of terms: 42

Number of significant terms: 15



[1] "Analyzing: Mono_prog._G1C_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 6513

Number of significant terms: 545

Double check that it's the same number of terms: 286

Number of significant terms: 52



[1] "Analyzing: Mono_prog._G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 6415

Number of significant terms: 281

Double check that it's the same number of terms: 162

Number of significant terms: 29



[1] "Analyzing: Mono_prog._RBD_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 8694

Number of significant terms: 260

Double check that it's the same number of terms: 157

Number of significant terms: 51



[1] "Analyzing: Monocytes_G1C_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9968

Number of significant terms: 1706

Double check that it's the same number of terms: 1189

Number of significant terms: 216



[1] "Analyzing: Monocytes_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9999

Number of significant terms: 1389

Double check that it's the same number of terms: 966

Number of significant terms: 216



[1] "Analyzing: Monocytes_RBD_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9909

Number of significant terms: 1447

Double check that it's the same number of terms: 839

Number of significant terms: 200



[1] "Analyzing: Neutro_prog._G1C_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 1995

Number of significant terms: 154

Double check that it's the same number of terms: 78

Number of significant terms: 4



[1] "Analyzing: Neutro_prog._G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9314

Number of significant terms: 589

Double check that it's the same number of terms: 331

Number of significant terms: 72



[1] "Analyzing: Neutro_prog._RBD_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 8544

Number of significant terms: 240

Double check that it's the same number of terms: 177

Number of significant terms: 26



[1] "Analyzing: Neutrophils_G1C_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9719

Number of significant terms: 3274

Double check that it's the same number of terms: 2536

Number of significant terms: 416



[1] "Analyzing: Neutrophils_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9911

Number of significant terms: 3525

Double check that it's the same number of terms: 2560

Number of significant terms: 466



[1] "Analyzing: Neutrophils_RBD_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 10392

Number of significant terms: 1139

Double check that it's the same number of terms: 581

Number of significant terms: 125



[1] "Analyzing: NK_cells_G1C_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9715

Number of significant terms: 839

Double check that it's the same number of terms: 504

Number of significant terms: 88



[1] "Analyzing: NK_cells_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9724

Number of significant terms: 608

Double check that it's the same number of terms: 354

Number of significant terms: 81



[1] "Analyzing: NK_cells_RBD_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9749

Number of significant terms: 175

Double check that it's the same number of terms: 100

Number of significant terms: 33



[1] "Analyzing: pro-B_G1C_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 8046

Number of significant terms: 1052

Double check that it's the same number of terms: 745

Number of significant terms: 195



[1] "Analyzing: pro-B_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9338

Number of significant terms: 1354

Double check that it's the same number of terms: 977

Number of significant terms: 264



[1] "Analyzing: pro-B_RBD_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 5191

Number of significant terms: 104

Double check that it's the same number of terms: 54

Number of significant terms: 15



[1] "Analyzing: small_pre-B._G1C_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9076

Number of significant terms: 1833

Double check that it's the same number of terms: 1097

Number of significant terms: 218



[1] "Analyzing: small_pre-B._G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 8684

Number of significant terms: 1186

Double check that it's the same number of terms: 789

Number of significant terms: 105



[1] "Analyzing: small_pre-B._RBD_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 8809

Number of significant terms: 909

Double check that it's the same number of terms: 688

Number of significant terms: 136



[1] "Analyzing: T_cells_G1C_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 6961

Number of significant terms: 448

Double check that it's the same number of terms: 276

Number of significant terms: 49



[1] "Analyzing: T_cells_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 7425

Number of significant terms: 692

Double check that it's the same number of terms: 433

Number of significant terms: 96



[1] "Analyzing: T_cells_RBD_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 8721

Number of significant terms: 985

Double check that it's the same number of terms: 750

Number of significant terms: 128



In [48]:
# setup
tresh = 0.1
set.family.Keep = c("BP")

In [None]:
set.seed(999)

for (c.i in seq_along(contrasts)){
    contrast.use = contrasts[c.i]
        for (i in seq_along(files)){
            # load table
            file.use = files[i]
            cell.use = str_split_fixed(file.use, paste0("_", contrast.use), n = 2)[,1]
            De.analysis = paste0(cell.use, "_", contrast.use)
            print(paste0("Analyzing: ", De.analysis))
            res = read.table(paste0(dir,file.use), sep = '\t')

            # Formatting for FGSEA
            res$rank = res$stat
            res = data.frame("SYMBOL" = rownames(res),
                             "stat" = res$rank)
            res = res[!grepl(pattern = "NA", x = res$SYMBOL),]
            ranks <- deframe(res)
            ranks = sort(ranks, decreasing=TRUE)

            tryCatch({
            message("Running FGSEA")
            fgseaRes <- fgseaMultilevel(pathways=GMT,
                                        stats=ranks,
                                        minSize  = 10, 
                                        maxSize  = 500)
            message("Number of total enriched terms: ", nrow(fgseaRes))
            fgseaRes.tresh = fgseaRes[fgseaRes$padj < tresh,]
            message("Number of significant terms: ", nrow(fgseaRes.tresh))
            # Add categories
            fgseaRes.tresh = merge(fgseaRes.tresh, gene.set.db, by = "pathway")
            message("Double check that it's the same number of terms: ", nrow(fgseaRes.tresh))
            # Subset for GO families of interest
            fgseaRes.tresh.use = fgseaRes.tresh[fgseaRes.tresh$set.family %in% set.family.Keep, ]
            message("Number of significant terms: ", nrow(fgseaRes.tresh.use))
            # Collapse terms 
            collapsedPathways <- collapsePathways(fgseaRes.tresh.use[order(pval)][padj < tresh], 
                                                  GMT, ranks)
            mainPathways <- fgseaRes.tresh.use[pathway %in% collapsedPathways$mainPathways][
                                     order(-NES), pathway]
            fgseaResMain <- fgseaRes.tresh.use[match(mainPathways, pathway)]
            # Save them
            file = paste(outdir, "GSEA.BP_", De.analysis, ".res", sep = "")
            fwrite(fgseaRes.tresh.use, file = file, sep = "\t")
            # print plot
            plot = ggplot(fgseaResMain, aes(x = reorder(term, NES), y = NES)) +
              geom_col(aes(fill= NES>0)) +
              theme_bw()+
              labs(x="", y="Normalized Enrichment Score",
                   title= paste(De.analysis,
                                " \n Gene Ontologies")) + 
                theme(axis.text=element_text(size=10), axis.title=element_text(size=16, face="bold"),
                axis.text.x=element_text(), plot.title=element_text(size=16, face="bold", hjust=0.5)) +
              coord_flip() +
            facet_nested(rows = vars(set.family, set), drop = TRUE, scales = "free", space = "free")
            ggsave(filename = paste(outdir, "GSEA.BP_", De.analysis, ".pdf", sep = ""),
                                    height = 16, width = 20,
                                    plot = plot, device = "pdf")
                }, error = function(e) {
            message("Error or no results found for ", De.analysis, ": ", e$message)
        })
    }
}

[1] "Analyzing: B_cell_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9783

Number of significant terms: 1733

Double check that it's the same number of terms: 1025

Number of significant terms: 634



[1] "Analyzing: B_cell_G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9577

Number of significant terms: 1844

Double check that it's the same number of terms: 1102

Number of significant terms: 673



[1] "Analyzing: B_cell_RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9689

Number of significant terms: 720

Double check that it's the same number of terms: 477

Number of significant terms: 308



[1] "Analyzing: Dendritic_cells_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9736

Number of significant terms: 1957

Double check that it's the same number of terms: 1272

Number of significant terms: 793



[1] "Analyzing: Dendritic_cells_G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9585

Number of significant terms: 1278

Double check that it's the same number of terms: 931

Number of significant terms: 499



[1] "Analyzing: Dendritic_cells_RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9487

Number of significant terms: 1177

Double check that it's the same number of terms: 769

Number of significant terms: 396



[1] "Analyzing: Eo_Baso_prog._G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9464

Number of significant terms: 166

Double check that it's the same number of terms: 92

Number of significant terms: 63



[1] "Analyzing: Eo_Baso_prog._G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9455

Number of significant terms: 167

Double check that it's the same number of terms: 97

Number of significant terms: 54



[1] "Analyzing: Eo_Baso_prog._RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9192

Number of significant terms: 58

Double check that it's the same number of terms: 23

Number of significant terms: 6



[1] "Analyzing: Ery_Mk_prog._G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 3747

Number of significant terms: 88

Double check that it's the same number of terms: 45

Number of significant terms: 29



[1] "Analyzing: Ery_Mk_prog._G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 2258

Number of significant terms: 79

Double check that it's the same number of terms: 15

Number of significant terms: 11



[1] "Analyzing: Ery_Mk_prog._RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 6531

Number of significant terms: 338

Double check that it's the same number of terms: 203

Number of significant terms: 57



[1] "Analyzing: Erythroblasts_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 2530

Number of significant terms: 615

Double check that it's the same number of terms: 299

Number of significant terms: 177



[1] "Analyzing: Erythroblasts_G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 7109

Number of significant terms: 791

Double check that it's the same number of terms: 456

Number of significant terms: 263



[1] "Analyzing: Erythroblasts_RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 7606

Number of significant terms: 969

Double check that it's the same number of terms: 617

Number of significant terms: 262



[1] "Analyzing: Gran_Mono_prog._G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9368

Number of significant terms: 737

Double check that it's the same number of terms: 402

Number of significant terms: 189



[1] "Analyzing: Gran_Mono_prog._G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 6101

Number of significant terms: 739

Double check that it's the same number of terms: 402

Number of significant terms: 175



[1] "Analyzing: Gran_Mono_prog._RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 8776

Number of significant terms: 260

Double check that it's the same number of terms: 184

Number of significant terms: 136



[1] "Analyzing: large_pre-B._G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9415

Number of significant terms: 714

Double check that it's the same number of terms: 493

Number of significant terms: 258



[1] "Analyzing: large_pre-B._G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 7445

Number of significant terms: 674

Double check that it's the same number of terms: 511

Number of significant terms: 294



[1] "Analyzing: large_pre-B._RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 4216

Number of significant terms: 673

Double check that it's the same number of terms: 466

Number of significant terms: 303



[1] "Analyzing: LMPPs_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9778

Number of significant terms: 280

Double check that it's the same number of terms: 151

Number of significant terms: 111



[1] "Analyzing: LMPPs_G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9758

Number of significant terms: 1716

Double check that it's the same number of terms: 1150

Number of significant terms: 718



[1] "Analyzing: LMPPs_RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9567

Number of significant terms: 1226

Double check that it's the same number of terms: 866

Number of significant terms: 526



[1] "Analyzing: Mk_prog._G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 7009

Number of significant terms: 394

Double check that it's the same number of terms: 273

Number of significant terms: 155



[1] "Analyzing: Mk_prog._G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 4276

Number of significant terms: 141

Double check that it's the same number of terms: 78

Number of significant terms: 41



[1] "Analyzing: Mk_prog._RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 5817

Number of significant terms: 63

Double check that it's the same number of terms: 33

Number of significant terms: 15



[1] "Analyzing: Mono_prog._G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 6513

Number of significant terms: 577

Double check that it's the same number of terms: 303

Number of significant terms: 172



[1] "Analyzing: Mono_prog._G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 6415

Number of significant terms: 258

Double check that it's the same number of terms: 144

Number of significant terms: 88



[1] "Analyzing: Mono_prog._RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 8694

Number of significant terms: 355

Double check that it's the same number of terms: 207

Number of significant terms: 84



[1] "Analyzing: Monocytes_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9968

Number of significant terms: 1555

Double check that it's the same number of terms: 1080

Number of significant terms: 686



[1] "Analyzing: Monocytes_G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9999

Number of significant terms: 1447

Double check that it's the same number of terms: 999

Number of significant terms: 578



[1] "Analyzing: Monocytes_RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9909

Number of significant terms: 1449

Double check that it's the same number of terms: 847

Number of significant terms: 464



[1] "Analyzing: Neutro_prog._G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 1995

Number of significant terms: 138

Double check that it's the same number of terms: 68

Number of significant terms: 35



[1] "Analyzing: Neutro_prog._G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9314

Number of significant terms: 679

Double check that it's the same number of terms: 388

Number of significant terms: 227



[1] "Analyzing: Neutro_prog._RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 8544

Number of significant terms: 269

Double check that it's the same number of terms: 200

Number of significant terms: 134



[1] "Analyzing: Neutrophils_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9719

Number of significant terms: 3369

Double check that it's the same number of terms: 2608

Number of significant terms: 1667



[1] "Analyzing: Neutrophils_G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9911

Number of significant terms: 3398

Double check that it's the same number of terms: 2473

Number of significant terms: 1554



[1] "Analyzing: Neutrophils_RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 10392

Number of significant terms: 1104

Double check that it's the same number of terms: 555

Number of significant terms: 325



[1] "Analyzing: NK_cells_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9715

Number of significant terms: 895

Double check that it's the same number of terms: 540

Number of significant terms: 338



[1] "Analyzing: NK_cells_G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9724

Number of significant terms: 629

Double check that it's the same number of terms: 366

Number of significant terms: 211



[1] "Analyzing: NK_cells_RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9749

Number of significant terms: 219

Double check that it's the same number of terms: 130

Number of significant terms: 74



[1] "Analyzing: pro-B_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 8046

Number of significant terms: 1107

Double check that it's the same number of terms: 778

Number of significant terms: 396



[1] "Analyzing: pro-B_G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9338

Number of significant terms: 1248

Double check that it's the same number of terms: 904

Number of significant terms: 431



[1] "Analyzing: pro-B_RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 5191

Number of significant terms: 106

Double check that it's the same number of terms: 57

Number of significant terms: 22



[1] "Analyzing: small_pre-B._G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 9076

Number of significant terms: 2057

Double check that it's the same number of terms: 1234

Number of significant terms: 781



[1] "Analyzing: small_pre-B._G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 8684

Number of significant terms: 1179

Double check that it's the same number of terms: 786

Number of significant terms: 522



[1] "Analyzing: small_pre-B._RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 8809

Number of significant terms: 863

Double check that it's the same number of terms: 655

Number of significant terms: 403



[1] "Analyzing: T_cells_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 6961

Number of significant terms: 467

Double check that it's the same number of terms: 288

Number of significant terms: 187



[1] "Analyzing: T_cells_G1C_vs_RBD.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 7425

Number of significant terms: 642

Double check that it's the same number of terms: 392

Number of significant terms: 251



[1] "Analyzing: T_cells_RBD_vs_GFP.dds.res_G1C_vs_GFP"


Running FGSEA

Number of total enriched terms: 8721

Number of significant terms: 976

Double check that it's the same number of terms: 746

Number of significant terms: 507



[1] "Analyzing: B_cell_G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9783

Number of significant terms: 1734

Double check that it's the same number of terms: 1026

Number of significant terms: 627



[1] "Analyzing: B_cell_G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9577

Number of significant terms: 1932

Double check that it's the same number of terms: 1158

Number of significant terms: 717



[1] "Analyzing: B_cell_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9689

Number of significant terms: 734

Double check that it's the same number of terms: 483

Number of significant terms: 309



[1] "Analyzing: Dendritic_cells_G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9736

Number of significant terms: 1862

Double check that it's the same number of terms: 1203

Number of significant terms: 743



[1] "Analyzing: Dendritic_cells_G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9585

Number of significant terms: 1236

Double check that it's the same number of terms: 897

Number of significant terms: 481



[1] "Analyzing: Dendritic_cells_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9487

Number of significant terms: 1102

Double check that it's the same number of terms: 719

Number of significant terms: 365



[1] "Analyzing: Eo_Baso_prog._G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9464

Number of significant terms: 178

Double check that it's the same number of terms: 98

Number of significant terms: 66



[1] "Analyzing: Eo_Baso_prog._G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9455

Number of significant terms: 177

Double check that it's the same number of terms: 104

Number of significant terms: 59



[1] "Analyzing: Eo_Baso_prog._RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9192

Number of significant terms: 55

Double check that it's the same number of terms: 22

Number of significant terms: 5



[1] "Analyzing: Ery_Mk_prog._G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 3747

Number of significant terms: 82

Double check that it's the same number of terms: 44

Number of significant terms: 28



[1] "Analyzing: Ery_Mk_prog._G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 2258

Number of significant terms: 67

Double check that it's the same number of terms: 11

Number of significant terms: 9



[1] "Analyzing: Ery_Mk_prog._RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 6531

Number of significant terms: 322

Double check that it's the same number of terms: 191

Number of significant terms: 51

Number of total enriched terms: 7606

Number of significant terms: 925

Double check that it's the same number of terms: 592

Number of significant terms: 247



[1] "Analyzing: Gran_Mono_prog._G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9368

Number of significant terms: 691

Double check that it's the same number of terms: 379

Number of significant terms: 174



[1] "Analyzing: Gran_Mono_prog._G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 6101

Number of significant terms: 792

Double check that it's the same number of terms: 436

Number of significant terms: 197



[1] "Analyzing: Gran_Mono_prog._RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 8776

Number of significant terms: 243

Double check that it's the same number of terms: 173

Number of significant terms: 128



[1] "Analyzing: large_pre-B._G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9415

Number of significant terms: 631

Double check that it's the same number of terms: 431

Number of significant terms: 225



[1] "Analyzing: large_pre-B._G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 7445

Number of significant terms: 675

Double check that it's the same number of terms: 506

Number of significant terms: 289



[1] "Analyzing: large_pre-B._RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 4216

Number of significant terms: 711

Double check that it's the same number of terms: 490

Number of significant terms: 314



[1] "Analyzing: LMPPs_G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9778

Number of significant terms: 297

Double check that it's the same number of terms: 162

Number of significant terms: 116



[1] "Analyzing: LMPPs_G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9758

Number of significant terms: 1607

Double check that it's the same number of terms: 1066

Number of significant terms: 672



[1] "Analyzing: LMPPs_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9567

Number of significant terms: 1260

Double check that it's the same number of terms: 886

Number of significant terms: 548



[1] "Analyzing: Mk_prog._G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 7009

Number of significant terms: 342

Double check that it's the same number of terms: 236

Number of significant terms: 134



[1] "Analyzing: Mk_prog._G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 4276

Number of significant terms: 157

Double check that it's the same number of terms: 86

Number of significant terms: 47



[1] "Analyzing: Mk_prog._RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 5817

Number of significant terms: 76

Double check that it's the same number of terms: 44

Number of significant terms: 20



[1] "Analyzing: Mono_prog._G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 6513

Number of significant terms: 540

Double check that it's the same number of terms: 282

Number of significant terms: 157



[1] "Analyzing: Mono_prog._G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 6415

Number of significant terms: 247

Double check that it's the same number of terms: 139

Number of significant terms: 87



[1] "Analyzing: Mono_prog._RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 8694

Number of significant terms: 254

Double check that it's the same number of terms: 155

Number of significant terms: 60



[1] "Analyzing: Monocytes_G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9968

Number of significant terms: 1695

Double check that it's the same number of terms: 1185

Number of significant terms: 752



[1] "Analyzing: Monocytes_G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9999

Number of significant terms: 1423

Double check that it's the same number of terms: 994

Number of significant terms: 572



[1] "Analyzing: Monocytes_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9909

Number of significant terms: 1541

Double check that it's the same number of terms: 911

Number of significant terms: 502



[1] "Analyzing: Neutro_prog._G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 1995

Number of significant terms: 132

Double check that it's the same number of terms: 65

Number of significant terms: 34



[1] "Analyzing: Neutro_prog._G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9314

Number of significant terms: 641

Double check that it's the same number of terms: 359

Number of significant terms: 208



[1] "Analyzing: Neutro_prog._RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 8544

Number of significant terms: 256

Double check that it's the same number of terms: 191

Number of significant terms: 122



[1] "Analyzing: Neutrophils_G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9719

Number of significant terms: 3237

Double check that it's the same number of terms: 2524

Number of significant terms: 1614



[1] "Analyzing: Neutrophils_G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9911

Number of significant terms: 3458

Double check that it's the same number of terms: 2517

Number of significant terms: 1584



[1] "Analyzing: Neutrophils_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 10392

Number of significant terms: 1084

Double check that it's the same number of terms: 555

Number of significant terms: 320



[1] "Analyzing: NK_cells_G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9715

Number of significant terms: 922

Double check that it's the same number of terms: 557

Number of significant terms: 346



[1] "Analyzing: NK_cells_G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9724

Number of significant terms: 654

Double check that it's the same number of terms: 384

Number of significant terms: 217



[1] "Analyzing: NK_cells_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9749

Number of significant terms: 189

Double check that it's the same number of terms: 110

Number of significant terms: 59



[1] "Analyzing: pro-B_G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 8046

Number of significant terms: 1080

Double check that it's the same number of terms: 764

Number of significant terms: 386



[1] "Analyzing: pro-B_G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9338

Number of significant terms: 1391

Double check that it's the same number of terms: 1003

Number of significant terms: 487



[1] "Analyzing: pro-B_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 5191

Number of significant terms: 94

Double check that it's the same number of terms: 48

Number of significant terms: 17



[1] "Analyzing: small_pre-B._G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 9076

Number of significant terms: 1988

Double check that it's the same number of terms: 1193

Number of significant terms: 760



[1] "Analyzing: small_pre-B._G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 8684

Number of significant terms: 1195

Double check that it's the same number of terms: 795

Number of significant terms: 527



[1] "Analyzing: small_pre-B._RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 8809

Number of significant terms: 878

Double check that it's the same number of terms: 665

Number of significant terms: 403



[1] "Analyzing: T_cells_G1C_vs_GFP.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 6961

Number of significant terms: 473

Double check that it's the same number of terms: 296

Number of significant terms: 189



[1] "Analyzing: T_cells_G1C_vs_RBD.dds.res_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 7425

Number of significant terms: 715

Double check that it's the same number of terms: 439

Number of significant terms: 277



[1] "Analyzing: T_cells_RBD_vs_GFP"


Running FGSEA

Number of total enriched terms: 8721

Number of significant terms: 969

Double check that it's the same number of terms: 738

Number of significant terms: 504



[1] "Analyzing: B_cell_G1C_vs_GFP.dds.res_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9783

Number of significant terms: 1552

Double check that it's the same number of terms: 906

Number of significant terms: 556



[1] "Analyzing: B_cell_G1C_vs_RBD"


Running FGSEA

Number of total enriched terms: 9577

Number of significant terms: 1860

Double check that it's the same number of terms: 1111

Number of significant terms: 682



In [None]:
# setup
tresh = 0.1
set.family.Keep = c("CC")

In [None]:
set.seed(999)

for (c.i in seq_along(contrasts)){
    contrast.use = contrasts[c.i]
        for (i in seq_along(files)){
            # load table
            file.use = files[i]
            cell.use = str_split_fixed(file.use, paste0("_", contrast.use), n = 2)[,1]
            De.analysis = paste0(cell.use, "_", contrast.use)
            print(paste0("Analyzing: ", De.analysis))
            res = read.table(paste0(dir,file.use), sep = '\t')

            # Formatting for FGSEA
            res$rank = res$stat
            res = data.frame("SYMBOL" = rownames(res),
                             "stat" = res$rank)
            res = res[!grepl(pattern = "NA", x = res$SYMBOL),]
            ranks <- deframe(res)
            ranks = sort(ranks, decreasing=TRUE)

            tryCatch({
            message("Running FGSEA")
            fgseaRes <- fgseaMultilevel(pathways=GMT,
                                        stats=ranks,
                                        minSize  = 10, 
                                        maxSize  = 500)
            message("Number of total enriched terms: ", nrow(fgseaRes))
            fgseaRes.tresh = fgseaRes[fgseaRes$padj < tresh,]
            message("Number of significant terms: ", nrow(fgseaRes.tresh))
            # Add categories
            fgseaRes.tresh = merge(fgseaRes.tresh, gene.set.db, by = "pathway")
            message("Double check that it's the same number of terms: ", nrow(fgseaRes.tresh))
            # Subset for GO families of interest
            fgseaRes.tresh.use = fgseaRes.tresh[fgseaRes.tresh$set.family %in% set.family.Keep, ]
            message("Number of significant terms: ", nrow(fgseaRes.tresh.use))
            # Collapse terms 
            collapsedPathways <- collapsePathways(fgseaRes.tresh.use[order(pval)][padj < tresh], 
                                                  GMT, ranks)
            mainPathways <- fgseaRes.tresh.use[pathway %in% collapsedPathways$mainPathways][
                                     order(-NES), pathway]
            fgseaResMain <- fgseaRes.tresh.use[match(mainPathways, pathway)]
            # Save them
            file = paste(outdir, "GSEA.CC_", De.analysis, ".res", sep = "")
            fwrite(fgseaRes.tresh.use, file = file, sep = "\t")
            # print plot
            plot = ggplot(fgseaResMain, aes(x = reorder(term, NES), y = NES)) +
              geom_col(aes(fill= NES>0)) +
              theme_bw()+
              labs(x="", y="Normalized Enrichment Score",
                   title= paste(De.analysis,
                                " \n Gene Ontologies")) + 
                theme(axis.text=element_text(size=10), axis.title=element_text(size=16, face="bold"),
                axis.text.x=element_text(), plot.title=element_text(size=16, face="bold", hjust=0.5)) +
              coord_flip() +
            facet_nested(rows = vars(set.family, set), drop = TRUE, scales = "free", space = "free")
            ggsave(filename = paste(outdir, "GSEA.CC_", De.analysis, ".pdf", sep = ""),
                                    height = 16, width = 20,
                                    plot = plot, device = "pdf")
                }, error = function(e) {
            message("Error or no results found for ", De.analysis, ": ", e$message)
        })
    }
}