In [75]:
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(stringr))
library(ggrepel)
library(reshape2)
library(ggsci)
library(rstatix)
library(ggpubr)
library(data.table)
library(ggtern)
library(viridis)
library(scales)
library(RColorBrewer)
library(parallel)
library(purrr)
source('/home//jupyter/BRI_Figures_Final_V2/helper_function/helper_function_IHA.r')

In [76]:
meta_data=read.csv("/home/jupyter/BRI_Figures_Final_V2/Figure5/01_Frequency_Comparison/selected_samples_with_acutal_flu_year.csv")

# Get Pathway Scores

In [77]:
# SLEA - assigning pathway scores
doSLEA <- function(expressionSet, geneSet) {
  # scale expression
  exprsMat <- expressionSet
  #exprsMat <- t(scale(t(exprsMat)))
  # extract expression of leGenes of each geneset
  comm <- intersect(geneSet, rownames(expressionSet))
  gsDF <- exprsMat[comm, ]
  # calculate mean expression per sample
  gsM <- colMeans(gsDF)
  # extract random genes of size of the geneSet from full probeset and calculate mean
  # and perform this for 'n' permutations
  nperm <- lapply(1:1000, function(j) {
    # set seed for every permutation
    set.seed(j)
    rGSDF <- exprsMat[sample.int(nrow(exprsMat),length(comm)), ]
    rGSM <- colMeans(rGSDF)
    return(value = rGSM)
  })
  permDF <- do.call(rbind, nperm)
  zscore <- (gsM - colMeans(permDF)) / apply(permDF,2,sd)
  sleaDF <- zscore %>% as.data.frame()
  return(value = sleaDF)
}

In [78]:
meta_data_selected_RNA_subset_D0<-meta_data %>% filter(Flu_Year=="2020-2021",sample.visitName	%in%c("Flu Year 1 Day 7") ) 


In [79]:
file_list<-paste0("/home/jupyter/BRI_Figures_Final_V2/Dataset/scRNA/BRI/Average_LogNormalized_Expression/Average_LogNormalized_Expression_of_Celltypes_by_Sample_AIFI_L3/",meta_data_selected_RNA_subset_D0$pbmc_sample_id,".csv")

df_list<-read_pseudobulk_expression(file_list)


[1] "Total reading time: 5.47800000000001 seconds"
[1] "The length of the list matches the length of the input path."


In [80]:
background_gene<-read.csv('/home/jupyter/BRI_Figures_Final_V2/Figure5/02_DEGs_and_EnrichmentAnalysis/filtered_gene_Y2020-2021_Y2021-2022_D7.csv') %>% filter(AIFI_L3=='Core memory B cell')

In [81]:
Selected_Pathways<-c("HALLMARK_TNFA_SIGNALING_VIA_NFKB",
                     'HALLMARK_PI3K_AKT_MTOR_SIGNALING',
                     "HALLMARK_MTORC1_SIGNALING",
                    'HALLMARK_MYC_TARGETS_V1',
                     'HALLMARK_MYC_TARGETS_V2',
                     'HALLMARK_ALLOGRAFT_REJECTION')

In [82]:
res_list<-list()
for (pathway_single in Selected_Pathways){
gene_set<-read.csv('/home/jupyter/BRI_Figures_Final_V2/Figure5/02_DEGs_and_EnrichmentAnalysis/gsea_result_all.csv') %>% 
filter(pathway==pathway_single,
celltype=='Core memory B cell')
gene_vector <- eval(parse(text = gene_set$leadingEdge))

  
celltype_list <- lapply(df_list, function(df) { df <- df[background_gene$gene, grep('Core memory B cell', names(df), fixed = TRUE),drop=FALSE] })


exp_matrix<-do.call(cbind,celltype_list)
colnames(exp_matrix)<-sub(":.*", "", colnames(exp_matrix))
rownames(meta_data)<-meta_data$pbmc_sample_id

res<-doSLEA(exp_matrix,gene_vector)

res$pbmc_sample_id<-rownames(res)
res<-left_join(res,meta_data)

res<-res[c('subject.subjectGuid','.')]
colnames(res)<-c('subject.subjectGuid',paste0("CoreMemoryB_",pathway_single))
res_list[[pathway_single]]<-res
    
}

[1m[22mJoining with `by = join_by(pbmc_sample_id)`
[1m[22mJoining with `by = join_by(pbmc_sample_id)`
[1m[22mJoining with `by = join_by(pbmc_sample_id)`
[1m[22mJoining with `by = join_by(pbmc_sample_id)`
[1m[22mJoining with `by = join_by(pbmc_sample_id)`
[1m[22mJoining with `by = join_by(pbmc_sample_id)`


In [83]:
pathway_variable<- reduce(res_list, full_join, by = "subject.subjectGuid")

# HAI assay phuket Delta changes

In [84]:
df<-read.csv("/home//jupyter/BRI_Figures_Final_V2/Extended_Figure5_1//05_HAI/FC_phuket.csv")

In [85]:
df<-df[c('subject.subjectGuid','fold_change')]

In [86]:
colnames(df)<-c(c('subject.subjectGuid','HAI_fold_change'))

In [87]:
HAI_variable<-df

# MSD assay phuket Fold Changes

In [88]:
df<-read.csv('/home//jupyter/BRI_Figures_Final_V2/Figure5/04_MSD/MSD_Normalized_Y2020-2021_Phuket.csv')

In [89]:
df<-df %>% filter(Visit=='Flu Year 1 Day 7')

In [90]:
df<-df[c('subject.subjectGuid','Normalized_Concentration')]

In [91]:
colnames(df)<-c(c('subject.subjectGuid','MSD_phuket_fold_change'))

In [92]:
MSD_variable<-df

# Frequency Changes

In [129]:
df<-read.csv('/home//jupyter/BRI_Figures_Final_V2/Figure5/01_Frequency_Comparison/freq_actual_flu_year_1_D0_D7.csv')

In [134]:
df_CoreMemoryB<-df %>% filter(Flu_Year=='2020-2021',AIFI_L3=='Core memory B cell') %>%  
select(subject.subjectGuid,AIFI_L3_clr,Flu_Day)%>%
  dplyr::group_by(subject.subjectGuid) %>%
  tidyr::spread(Flu_Day, AIFI_L3_clr) %>%
  dplyr::mutate( CoreMemoryB_Freq_Fold_Changes = `Day 7` - `Day 0`) %>% 
  select(subject.subjectGuid,CoreMemoryB_Freq_Fold_Changes)

df_Plasma<-df %>% filter(Flu_Year=='2020-2021',AIFI_L3=='Plasma cell') %>%  
select(subject.subjectGuid,AIFI_L3_clr,Flu_Day)%>%
  dplyr::group_by(subject.subjectGuid) %>%
  tidyr::spread(Flu_Day, AIFI_L3_clr) %>%
  dplyr::mutate( Plasma_Freq_Fold_Changes = `Day 7` - `Day 0`)%>% 
  select(subject.subjectGuid,Plasma_Freq_Fold_Changes)

df_CD95memB<-df %>% filter(Flu_Year=='2020-2021',AIFI_L3=='CD95 memory B cell') %>%  
select(subject.subjectGuid,AIFI_L3_clr,Flu_Day)%>%
  dplyr::group_by(subject.subjectGuid) %>%
  tidyr::spread(Flu_Day, AIFI_L3_clr) %>%
  dplyr::mutate( CD95memB_Freq_Fold_Changes = `Day 7` - `Day 0`)%>% 
  select(subject.subjectGuid,CD95memB_Freq_Fold_Changes)

In [135]:
scRNA_Fold_Change_variable<-left_join(df_Plasma,df_CoreMemoryB)
scRNA_Fold_Change_variable<-left_join(scRNA_Fold_Change_variable,df_CD95memB)

[1m[22mJoining with `by = join_by(subject.subjectGuid)`
[1m[22mJoining with `by = join_by(subject.subjectGuid)`


# Mean Expression  CD27- effector B

In [136]:
meta_data_selected_RNA_subset_D7<-meta_data %>% filter(Flu_Year=="2020-2021",sample.visitName	%in%c("Flu Year 1 Day 7") ) 


In [137]:
aggregated_count_file_list<-paste0("/home/jupyter/BRI_Figures_Final_V2/Dataset/scRNA/BRI/Average_LogNormalized_Expression/Average_LogNormalized_Expression_of_Celltypes_by_Sample_AIFI_L3/",meta_data_selected_RNA_subset_D0$pbmc_sample_id,".csv")
df_list<-read_pseudobulk_expression(aggregated_count_file_list)

[1] "Total reading time: 5.73400000000004 seconds"
[1] "The length of the list matches the length of the input path."


In [138]:
selected_genes<-c("IGHG3","CD19")


In [139]:
df<-filter_genes_and_celltype(df_list, selected_genes, c('CD27- effector B cell'), longformat = TRUE)




In [140]:
df<-left_join(df,meta_data_selected_RNA_subset_D7)

[1m[22mJoining with `by = join_by(pbmc_sample_id)`


In [141]:
expression_IGHG3<-df %>% filter(Gene=="IGHG3") %>% 
select(subject.subjectGuid,Mean_Expression)%>% as.data.frame()

expression_CD19<-df %>% filter(Gene=="CD19") %>% 
select(subject.subjectGuid,Mean_Expression)%>% as.data.frame()

In [142]:
colnames(expression_IGHG3)<-c("subject.subjectGuid","Mean_Expression_IGHG3_CD27negB")
colnames(expression_CD19)<-c("subject.subjectGuid","Mean_Expression_CD19_CD27negB")

In [143]:
mean_expression_variable_CD27negB<-reduce(list(expression_IGHG3,expression_CD19), full_join, by = "subject.subjectGuid")

# Mean Expression CD95 memory

In [144]:
meta_data_selected_RNA_subset_D7<-meta_data %>% filter(Flu_Year=="2020-2021",sample.visitName	%in%c("Flu Year 1 Day 7") ) 


In [145]:
aggregated_count_file_list<-paste0("/home/jupyter/BRI_Figures_Final_V2/Dataset/scRNA/BRI/Average_LogNormalized_Expression/Average_LogNormalized_Expression_of_Celltypes_by_Sample_AIFI_L3/",meta_data_selected_RNA_subset_D0$pbmc_sample_id,".csv")
df_list<-read_pseudobulk_expression(aggregated_count_file_list)

[1] "Total reading time: 5.82299999999998 seconds"
[1] "The length of the list matches the length of the input path."


In [146]:
selected_genes<-c("IGHG4")


In [147]:
df<-filter_genes_and_celltype(df_list, selected_genes, c('CD95 memory B cell'), longformat = TRUE)




In [148]:
df<-left_join(df,meta_data_selected_RNA_subset_D7)

[1m[22mJoining with `by = join_by(pbmc_sample_id)`


In [149]:
expression_IGHG4<-df %>% filter(Gene=="IGHG4") %>% 
select(subject.subjectGuid,Mean_Expression)%>% as.data.frame()


In [150]:
colnames(expression_IGHG4)<-c("subject.subjectGuid","Mean_Expression_IGHG4_CD95memB")


In [151]:
mean_expression_variable_CD95memB<-expression_IGHG4

# Mean Expression Core Mem B

In [152]:
meta_data_selected_RNA_subset_D7<-meta_data %>% filter(Flu_Year=="2020-2021",sample.visitName	%in%c("Flu Year 1 Day 7") ) 


In [153]:
aggregated_count_file_list<-paste0("/home/jupyter/BRI_Figures_Final_V2/Dataset/scRNA/BRI/Average_LogNormalized_Expression/Average_LogNormalized_Expression_of_Celltypes_by_Sample_AIFI_L3/",meta_data_selected_RNA_subset_D0$pbmc_sample_id,".csv")
df_list<-read_pseudobulk_expression(aggregated_count_file_list)

[1] "Total reading time: 5.59299999999996 seconds"
[1] "The length of the list matches the length of the input path."


In [154]:
selected_genes<-c("IGHG4")


In [155]:
df<-filter_genes_and_celltype(df_list, selected_genes, c('Core memory B cell'), longformat = TRUE)




In [156]:
df<-left_join(df,meta_data_selected_RNA_subset_D7)

[1m[22mJoining with `by = join_by(pbmc_sample_id)`


In [157]:
expression_IGHG4<-df %>% filter(Gene=="IGHG4") %>% 
select(subject.subjectGuid,Mean_Expression)%>% as.data.frame()


In [158]:
colnames(expression_IGHG4)<-c("subject.subjectGuid","Mean_Expression_IGHG4_CorememB")


In [159]:
mean_expression_variable_CorememB<-expression_IGHG4

# Combine them together

In [160]:
combined_df <- reduce(list(pathway_variable,
                           MSD_variable,
                          HAI_variable,
                          scRNA_Fold_Change_variable,
                           mean_expression_variable_CorememB,
                           mean_expression_variable_CD95memB,
                           mean_expression_variable_CD27negB), 
                      full_join, 
                      by = "subject.subjectGuid")

In [161]:
write.csv(combined_df,
          "BRI_Bcell_data_forcorrelation.csv")