In [1]:
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(stringr))
library(parallel)
library(fgsea)
library(data.table)
source('/home//jupyter/BRI_Figures_Final_V2/helper_function/helper_function_IHA.r')


Attaching package: ‘data.table’


The following objects are masked from ‘package:dplyr’:

    between, first, last




In [2]:
df<-read.csv("Deseq2_Result_Y2020-2021_Y2021-2022_D7.csv")

In [3]:
keep_celltypes <- c("Activated memory B cell", 
                    "CD27+ effector B cell", 
                    "CD27- effector B cell", 
                    "CD95 memory B cell",
                    "Core memory B cell",
                    "Core naive B cell", 
                    "Early memory B cell",
                    "ISG+ naive B cell",
                    "Plasma cell",
                    "Transitional B cell", 
                    "Type 2 polarized memory B cell")

df_filtered<-df%>% filter(celltype %in% keep_celltypes)

In [12]:
df_filtered<-df_filtered %>%
    dplyr::mutate(rank_metric = ifelse(is.na(padj), NA, -log10(pvalue) * sign(log2FoldChange)))%>%
    filter(!is.na(rank_metric))



In [13]:
all_combined_dfs <- list()
pathway_df <- list()

# Loop over each dataframe in deseq2_results
for (cell_type in unique(df_filtered$celltype)) {
  print(cell_type)

  x <- df_filtered %>% filter(celltype==cell_type)
  rownames(x)<-x$gene

  ranked_genes <- setNames(x$rank_metric, rownames(x))
  
  # Load pathway file
  pathways <- gmtPathways("Hallmark_pathways.gmt")


  # Run GSEA
  set.seed(42)
  fgseaResults <- fgsea(pathways = pathways, stats = ranked_genes)
  significantPathways <- fgseaResults[fgseaResults$padj < 0.05,]
  
  # Skip if no significant pathways
  if (nrow(significantPathways) == 0) next
  
  df <- as.data.frame(significantPathways)
  df <- setorder(df, -NES)
  df <- as.data.frame(df)
    print(1)
  pathway_df[[cell_type]] <- df
  
  # Make a dataframe of the leading edge genes
  pathway_names <- df$pathway
  leadingEdges <- df$leadingEdge
    
  # Filter out leading edges with length less than 6
  filteredIndices <- sapply(leadingEdges, function(le) length(unlist(le)) >= 6)
  filteredPathwayNames <- pathway_names[filteredIndices]
  filteredLeadingEdges <- leadingEdges[filteredIndices]
                          
  significantPathways$celltype<-cell_type                      

  
  # Add the combinedDf to the list
  all_combined_dfs[[cell_type]] <- significantPathways
}
                        

[1] "Activated memory B cell"
[1] 1
[1] "CD27+ effector B cell"
[1] 1
[1] "CD27- effector B cell"
[1] 1
[1] "CD95 memory B cell"
[1] "Core memory B cell"
[1] 1
[1] "Core naive B cell"
[1] 1
[1] "Early memory B cell"
[1] "ISG+ naive B cell"
[1] "Plasma cell"
[1] 1
[1] "Transitional B cell"
[1] 1
[1] "Type 2 polarized memory B cell"


In [14]:
df<-do.call(rbind,all_combined_dfs)


In [18]:
df$leadingEdge<-as.character(df$leadingEdge)


In [19]:
write.csv(df,"gsea_result.csv")

In [23]:
df_filtered_cd27pos_effector_B<-df_filtered%>% filter(celltype=="CD27+ effector B cell")

In [24]:
write.csv(df_filtered_cd27pos_effector_B,"df_filtered_cd27pos_effector_B.csv")

In [2]:
sessionInfo()

R version 4.3.2 (2023-10-31)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS

Matrix products: default
BLAS/LAPACK: /opt/conda/lib/libopenblasp-r0.3.25.so;  LAPACK version 3.11.0

locale:
 [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
 [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
 [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
[10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   

time zone: Etc/UTC
tzcode source: system (glibc)

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
[1] data.table_1.15.4 fgsea_1.28.0      stringr_1.5.1     tibble_3.2.1     
[5] tidyr_1.3.1       dplyr_1.1.4      

loaded via a namespace (and not attached):
 [1] Matrix_1.6-3        gtable_0.3.5        jsonlite_1.8.8     
 [4] compiler_4.3.2      crayon_1.5.2        tidyselect_1.2.1   
 [7] Rcpp