In [1]:
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(stringr))
library(parallel)
library(DESeq2)
library(hise)
source('/home//jupyter/BRI_Figures_Final_V2/helper_function/helper_function_IHA.r')

Loading required package: S4Vectors

Loading required package: stats4

Loading required package: BiocGenerics


Attaching package: ‘BiocGenerics’


The following objects are masked from ‘package:dplyr’:

    combine, intersect, setdiff, union


The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs


The following objects are masked from ‘package:base’:

    anyDuplicated, aperm, append, as.data.frame, basename, cbind,
    colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
    get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
    Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
    table, tapply, union, unique, unsplit, which.max, which.min



Attaching package: ‘S4Vectors’


The following object is masked from ‘package:tidyr’:

    expand


The following objects are masked from ‘package:dplyr’:

    first, rename


The following object is masked from

# Read Meta Data

In [2]:
meta_data=read.csv("/home/jupyter/BRI_Figures_Final_V2/Dataset/scRNA_meta_data-2024-05-09.csv")

In [26]:
df<-as.data.frame(table(meta_data$subject.subjectGuid))

In [25]:
df$sub<-df$Var1

In [31]:
df$cohort<-substr(as.character(df$Var1),1,3)

In [32]:
as.character(df$Var1)

In [33]:
df

Var1,Freq,cohort
<fct>,<int>,<chr>
BR1001,3,BR1
BR1002,10,BR1
BR1003,10,BR1
BR1004,10,BR1
BR1005,10,BR1
BR1006,10,BR1
BR1007,10,BR1
BR1008,6,BR1
BR1009,10,BR1
BR1010,10,BR1


In [34]:
table (df$cohort)


BR1 BR2 
 49  47 

# Perform Deseq2 on Year 1 Day 0 samples

In [3]:
filtered_gene_set<-read.csv("filtered_gene_Y1D0.csv")

In [9]:
meta_data_subset=meta_data %>% filter(sample.visitName=="Flu Year 1 Day 0",Covid_exclusion=='no')


In [12]:
table(meta_data_subset$CMV)


Negative Positive 
      50       42 

In [7]:
meta_data_subset=meta_data %>% filter(sample.visitName=="Flu Year 1 Day 0")

aggregated_count_file_list<-paste0("/home/jupyter/BRI_Figures_Final_V2/Dataset/scRNA/BRI/Aggregated_Count/Aggregated_Raw_Expression_of_Celltypes_by_Sample_AIFI_L3/",meta_data_subset$pbmc_sample_id,".csv")

df_list<-read_pseudobulk_expression(aggregated_count_file_list)

[1] "Total reading time: 14.875 seconds"
[1] "The length of the list matches the length of the input path."


In [8]:
celltypes <- unique(unlist(lapply(df_list, names)))
celltypes<-unique(sub(".*:", "", celltypes))

In [9]:
length(celltypes)

In [10]:
res_list<-mclapply(celltypes, function(celltype){
    print(celltype)
    celltype_list <- lapply(df_list, function(df) { df <- df[, grep(celltype, names(df), fixed = TRUE),drop=FALSE] })
    exp_matrix<-do.call(cbind,celltype_list)
    colnames(exp_matrix)<-sub(":.*", "", colnames(exp_matrix))
    rownames(meta_data_subset)<-meta_data_subset$pbmc_sample_id
    filtered_gene_set_filtered<-filtered_gene_set%>% filter(AIFI_L3==celltype)%>% select(gene) %>%pull()
    
    res=deseq2_analysis(exp_matrix,
                     meta_data=meta_data_subset,
                     filtered_gene_set=filtered_gene_set_filtered,
                     formula= ~  cohort.cohortGuid+subject.biologicalSex+CMV,
                     comparisons=list(c("subject.biologicalSex", "Male", "Female"),
                                      c("cohort.cohortGuid", "BR2", "BR1"),
                                      c("CMV", "Positive", "Negative")),
                     celltype=celltype)
    return(res)
},mc.cores=60)

In [11]:
res_all<-do.call(rbind,res_list)

In [12]:
write.csv(res_all,"Deseq2_Result_Y1D0.csv")