In [1]:
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(stringr))
library(parallel)
library(fgsea)
library(data.table)
source('/home//jupyter/BRI_Figures_Final_V2/helper_function/helper_function_IHA.r')


Attaching package: ‘data.table’


The following objects are masked from ‘package:dplyr’:

    between, first, last




In [2]:
library(robustbase)

In [14]:
# SLEA - assigning pathway scores
doSLEA <- function(expressionSet, geneSet) {
  # scale expression
  exprsMat <- expressionSet
  # extract expression of leGenes of each geneset
  comm <- intersect(geneSet, rownames(exprsMat))
  gsDF <- exprsMat[comm, ]
  # calculate mean expression per sample
  gsM <- colMeans(gsDF, na.rm = TRUE)
  # extract random genes of size of the geneSet from full probeset and calculate mean
  # and perform this for 'n' permutations
  nperm <- lapply(1:1000, function(j) {
    # set seed for every permutation
    set.seed(j)
    rGSDF <- exprsMat[sample.int(nrow(exprsMat),length(comm)), ]
    rGSM <- colMeans(rGSDF)
    return(value = rGSM)
  })
  permDF <- do.call(rbind, nperm)
  zscore <- (gsM - colMeans(permDF, na.rm = TRUE)) / apply(permDF,2,sd, na.rm = TRUE)
  sleaDF <- zscore %>% as.data.frame()
  return(value = sleaDF)
}

In [4]:
meta_data=read.csv("/home/jupyter/BRI_Figures_Final_V2/Extended-Figure4//01_Frequency_Comparison/selected_samples_with_acutal_flu_year.csv")

In [5]:
file_list<-paste0("/home/jupyter/BRI_Figures_Final_V2/Dataset/scRNA/BRI/Average_LogNormalized_Expression/Average_LogNormalized_Expression_of_Celltypes_by_Sample_AIFI_L3/",meta_data$pbmc_sample_id,".csv")

df_list<-read_pseudobulk_expression(file_list)


[1] "Total reading time: 24.465 seconds"
[1] "The length of the list matches the length of the input path."


In [11]:
background_gene<-read.csv('filtered_gene_Y2020-2021_Y2021-2022_D7.csv') 

In [15]:
combined_df <- data.frame()

for (celltype_single in c('CD27+ effector B cell', 'Core memory B cell', 'Plasma cell')) {
  
  background_gene_sub <- background_gene %>% filter(AIFI_L3 == celltype_single)
  
  celltype_list <- lapply(df_list, function(df) {
    df <- df[background_gene_sub$gene, grep(celltype_single, names(df), fixed = TRUE), drop = FALSE]
  })
  
  exp_matrix <- do.call(cbind, celltype_list)
  
  colnames(exp_matrix) <- sub(":.*", "", colnames(exp_matrix))
  rownames(meta_data) <- meta_data$pbmc_sample_id
  
  gene_set <- read.csv('gsea_result.csv') %>% filter(celltype == celltype_single) %>% arrange(NES)
  
  for (pathway_single in gene_set$pathway) {
    gene_set_sub <- gene_set %>% filter(pathway == pathway_single)
    gene_vector <- eval(parse(text = gene_set_sub$leadingEdge))
    res <- doSLEA(exp_matrix, gene_vector)
    colnames(res) <- "pathway_score"
    res$pbmc_sample_id <- rownames(res)
    res <- left_join(res, meta_data)
    
    res_summary <- res %>% group_by(Flu_Year, Flu_Day, cohort.cohortGuid) %>% 
      summarise(mean_pathway_score = mean(pathway_score))
    res_summary$celltype <- celltype_single
    res_summary$pathway <- pathway_single
    
    combined_df <- rbind(combined_df, res_summary)
  }
}


[1m[22mJoining with `by = join_by(pbmc_sample_id)`
[1m[22m`summarise()` has grouped output by 'Flu_Year', 'Flu_Day'. You can override
using the `.groups` argument.
[1m[22mJoining with `by = join_by(pbmc_sample_id)`
[1m[22m`summarise()` has grouped output by 'Flu_Year', 'Flu_Day'. You can override
using the `.groups` argument.
[1m[22mJoining with `by = join_by(pbmc_sample_id)`
[1m[22m`summarise()` has grouped output by 'Flu_Year', 'Flu_Day'. You can override
using the `.groups` argument.
[1m[22mJoining with `by = join_by(pbmc_sample_id)`
[1m[22m`summarise()` has grouped output by 'Flu_Year', 'Flu_Day'. You can override
using the `.groups` argument.
[1m[22mJoining with `by = join_by(pbmc_sample_id)`
[1m[22m`summarise()` has grouped output by 'Flu_Year', 'Flu_Day'. You can override
using the `.groups` argument.
[1m[22mJoining with `by = join_by(pbmc_sample_id)`
[1m[22m`summarise()` has grouped output by 'Flu_Year', 'Flu_Day'. You can override
using the `.groups` ar

In [16]:
write.csv(combined_df,"SLEA_3_Bcells.csv")