In [53]:
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(stringr))
library(parallel)
library(DESeq2)
library(hise)
source('/home//jupyter/BRI_Figures_Final_V1/helper_function/helper_function_IHA.r')

# Read Meta Data

In [54]:
meta_data=read.csv("/home/jupyter/BRI_Figures_Final_V1/Dataset/scRNA_meta_data-2024-05-09.csv")

# Perform Deseq2 on Young

In [55]:
meta_data_subset=meta_data %>% filter(sample.visitName=="Flu Year 2 Day 90",Covid_exclusion=='no',cohort.cohortGuid=='BR1')


In [56]:
filtered_gene_set<-read.csv("fitlered_gene_BR1_Y2D90.csv")

In [57]:
aggregated_count_file_list<-paste0("/home/jupyter/BRI_Figures_Final_V1/Dataset/scRNA/BRI/Aggregated_Count/Aggregated_Raw_Expression_of_Celltypes_by_Sample_AIFI_L3/",meta_data_subset$pbmc_sample_id,".csv")

df_list<-read_pseudobulk_expression(aggregated_count_file_list)

[1] "Total reading time: 3.435 seconds"
[1] "The length of the list matches the length of the input path."


In [58]:
celltypes <- unique(unlist(lapply(df_list, names)))
celltypes<-unique(sub(".*:", "", celltypes))

In [59]:
length(celltypes)

In [60]:
res_list<-mclapply(celltypes, function(celltype){
    celltype_list <- lapply(df_list, function(df) { df <- df[, grep(celltype, names(df), fixed = TRUE),drop=FALSE] })
    exp_matrix<-do.call(cbind,celltype_list)
    colnames(exp_matrix)<-sub(":.*", "", colnames(exp_matrix))
    rownames(meta_data_subset)<-meta_data_subset$pbmc_sample_id
    filtered_gene_set_filtered<-filtered_gene_set%>% filter(AIFI_L3==celltype)%>% select(gene) %>%pull()
    
    res=deseq2_analysis(exp_matrix,
                     meta_data=meta_data_subset,
                     filtered_gene_set=filtered_gene_set_filtered,
                     formula= ~ CMV+subject.biologicalSex,
                     comparisons=list(c("CMV", "Positive", "Negative")),
                     celltype=celltype)
    return(res)
},mc.cores=60)

In [61]:
res_all<-do.call(rbind,res_list)

In [62]:
write.csv(res_all,"Deseq2_Result_BR1_Y1D90_CMVpos_vs_CMVneg.csv")

# Perform Deseq2 on Young

In [63]:
meta_data_subset=meta_data %>% filter(sample.visitName=="Flu Year 2 Day 90",Covid_exclusion=='no',cohort.cohortGuid=='BR2')

In [64]:
filtered_gene_set<-read.csv("fitlered_gene_BR2_Y2D90.csv")

In [65]:
aggregated_count_file_list<-paste0("/home/jupyter/BRI_Figures_Final_V1/Dataset/scRNA/BRI/Aggregated_Count/Aggregated_Raw_Expression_of_Celltypes_by_Sample_AIFI_L3/",meta_data_subset$pbmc_sample_id,".csv")

df_list<-read_pseudobulk_expression(aggregated_count_file_list)

[1] "Total reading time: 4.50700000000001 seconds"
[1] "The length of the list matches the length of the input path."


In [66]:
celltypes <- unique(unlist(lapply(df_list, names)))
celltypes<-unique(sub(".*:", "", celltypes))

In [67]:
length(celltypes)

In [68]:
res_list<-mclapply(celltypes, function(celltype){
    celltype_list <- lapply(df_list, function(df) { df <- df[, grep(celltype, names(df), fixed = TRUE),drop=FALSE] })
    exp_matrix<-do.call(cbind,celltype_list)
    colnames(exp_matrix)<-sub(":.*", "", colnames(exp_matrix))
    rownames(meta_data_subset)<-meta_data_subset$pbmc_sample_id
    filtered_gene_set_filtered<-filtered_gene_set%>% filter(AIFI_L3==celltype)%>% select(gene) %>%pull()
    
    res=deseq2_analysis(exp_matrix,
                     meta_data=meta_data_subset,
                     filtered_gene_set=filtered_gene_set_filtered,
                     formula= ~ CMV+subject.biologicalSex,
                     comparisons=list(c("CMV", "Positive", "Negative")),
                     celltype=celltype)
    return(res)
},mc.cores=60)

In [69]:
res_all<-do.call(rbind,res_list)

In [70]:
write.csv(res_all,"Deseq2_Result_BR2_Y1D90_CMVpos_vs_CMVneg.csv")

In [72]:
sessionInfo()

R version 4.3.2 (2023-10-31)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS

Matrix products: default
BLAS/LAPACK: /opt/conda/lib/libopenblasp-r0.3.25.so;  LAPACK version 3.11.0

locale:
 [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
 [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
 [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
[10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   

time zone: Etc/UTC
tzcode source: system (glibc)

attached base packages:
[1] stats4    parallel  stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] hise_2.16.0                 DESeq2_1.42.0              
 [3] SummarizedExperiment_1.32.0 Biobase_2.62.0             
 [5] MatrixGenerics_1.14.0       matrixStats_1.2.0          
 [7] GenomicRanges_1.54.1        GenomeInfoDb_1.38.5        
 [9] IRanges_2.36.0              S4Vectors_0.