In [76]:
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(stringr))
library(parallel)
library(DESeq2)
library(hise)
source('/home//jupyter/BRI_Figures_Final_V1/helper_function/helper_function_IHA.r')

# Read Meta Data

In [77]:
meta_data=read.csv("/home/jupyter/BRI_Figures_Final_V1/Dataset/scRNA_meta_data-2024-05-09.csv")

# Perform Deseq2 on Young

In [78]:
meta_data_subset=meta_data %>% filter(sample.visitName=="Flu Year 1 Day 0",cohort.cohortGuid=='BR1')


In [79]:
filtered_gene_set<-read.csv("fitlered_gene_BR1.csv")

In [80]:
aggregated_count_file_list<-paste0("/home/jupyter/BRI_Figures_Final_V1/Dataset/scRNA/BRI/Aggregated_Count/Aggregated_Raw_Expression_of_Celltypes_by_Sample_AIFI_L3/",meta_data_subset$pbmc_sample_id,".csv")

df_list<-read_pseudobulk_expression(aggregated_count_file_list)

[1] "Total reading time: 4.36700000000019 seconds"
[1] "The length of the list matches the length of the input path."


In [81]:
celltypes <- unique(unlist(lapply(df_list, names)))
celltypes<-unique(sub(".*:", "", celltypes))

In [82]:
length(celltypes)

In [83]:
res_list<-mclapply(celltypes, function(celltype){
    celltype_list <- lapply(df_list, function(df) { df <- df[, grep(celltype, names(df), fixed = TRUE),drop=FALSE] })
    exp_matrix<-do.call(cbind,celltype_list)
    colnames(exp_matrix)<-sub(":.*", "", colnames(exp_matrix))
    rownames(meta_data_subset)<-meta_data_subset$pbmc_sample_id
    filtered_gene_set_filtered<-filtered_gene_set%>% filter(AIFI_L3==celltype)%>% select(gene) %>%pull()
    
    res=deseq2_analysis(exp_matrix,
                     meta_data=meta_data_subset,
                     filtered_gene_set=filtered_gene_set_filtered,
                     formula= ~ CMV+subject.biologicalSex,
                     comparisons=list(c("CMV", "Positive", "Negative")),
                     celltype=celltype)
    return(res)
},mc.cores=60)

In [84]:
res_all<-do.call(rbind,res_list)

In [85]:
write.csv(res_all,"Deseq2_Result_BR1_CMVpos_vs_CMVneg.csv")

# Perform Deseq2 on Young

In [86]:
meta_data_subset=meta_data %>% filter(sample.visitName=="Flu Year 1 Day 0",cohort.cohortGuid=='BR2')


In [87]:
filtered_gene_set<-read.csv("fitlered_gene_BR2.csv")

In [88]:
aggregated_count_file_list<-paste0("/home/jupyter/BRI_Figures_Final_V1/Dataset/scRNA/BRI/Aggregated_Count/Aggregated_Raw_Expression_of_Celltypes_by_Sample_AIFI_L3/",meta_data_subset$pbmc_sample_id,".csv")

df_list<-read_pseudobulk_expression(aggregated_count_file_list)

[1] "Total reading time: 4.03600000000006 seconds"
[1] "The length of the list matches the length of the input path."


In [89]:
celltypes <- unique(unlist(lapply(df_list, names)))
celltypes<-unique(sub(".*:", "", celltypes))

In [90]:
length(celltypes)

In [91]:
res_list<-mclapply(celltypes, function(celltype){
    celltype_list <- lapply(df_list, function(df) { df <- df[, grep(celltype, names(df), fixed = TRUE),drop=FALSE] })
    exp_matrix<-do.call(cbind,celltype_list)
    colnames(exp_matrix)<-sub(":.*", "", colnames(exp_matrix))
    rownames(meta_data_subset)<-meta_data_subset$pbmc_sample_id
    filtered_gene_set_filtered<-filtered_gene_set%>% filter(AIFI_L3==celltype)%>% select(gene) %>%pull()
    
    res=deseq2_analysis(exp_matrix,
                     meta_data=meta_data_subset,
                     filtered_gene_set=filtered_gene_set_filtered,
                     formula= ~ CMV+subject.biologicalSex,
                     comparisons=list(c("CMV", "Positive", "Negative")),
                     celltype=celltype)
    return(res)
},mc.cores=60)

In [92]:
res_all<-do.call(rbind,res_list)

In [93]:
write.csv(res_all,"Deseq2_Result_BR2_CMVpos_vs_CMVneg.csv")