In [204]:
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(stringr))
library(parallel)
library(DESeq2)
library(hise)
source('/home//jupyter/BRI_Figures/helper_function/helper_function_IHA.r')

# Read Meta Data

In [207]:
meta_data=read.csv("/home/jupyter/BRI_Figures/Dataset/SF4_meta_data-2024-05-05.csv")

In [208]:
meta_data$ageAtEnrollment[meta_data$ageAtEnrollment == '89+'] <- '90'
meta_data$ageAtEnrollment<-as.numeric(meta_data$ageAtEnrollment)

In [209]:
meta_data$AgeGroup <- as.character(cut(meta_data$ageAtEnrollment, breaks=seq(40, 90, by=10), include.lowest=TRUE, right=FALSE, labels=FALSE))

# Convert the group to a factor with meaningful labels
#meta_data$AgeGroup <- factor(meta_data$AgeGroup, labels = c("40-50", "50-60", "60-70", "70-80", "80-90"))


In [131]:
num_bins <- 5
sample_counts <- table(cut(meta_data$ageAtEnrollment, seq(min(meta_data$ageAtEnrollment), max(meta_data$ageAtEnrollment), length.out = num_bins + 1), include.lowest = TRUE))
sample_counts


[40,50] (50,60] (60,70] (70,80] (80,90] 
     60      65      33      46      30 

# Perform Deseq2 on Year 1 Day 0 samples

In [14]:
filtered_gene_set<-read.csv("fitlered_gene_SF4.csv")

In [11]:
aggregated_count_file_list<-paste0("/home/jupyter/BRI_Figures/Dataset/scRNA/SF4/Aggregated_Count/Aggregated_Raw_Expression_of_Celltypes_by_Sample_AIFI_L3/",meta_data$pbmc_sample_id,".csv")

df_list<-read_pseudobulk_expression(aggregated_count_file_list)

[1] "Total reading time: 19.847 seconds"


In [12]:
celltypes <- unique(unlist(lapply(df_list, names)))
celltypes<-unique(sub(".*:", "", celltypes))

In [15]:
length(celltypes)

In [23]:
res_list<-mclapply(celltypes, function(celltype){
    celltype_list <- lapply(df_list, function(df) { df <- df[, grep(celltype, names(df), fixed = TRUE),drop=FALSE] })
    exp_matrix<-do.call(cbind,celltype_list)
    colnames(exp_matrix)<-sub(":.*", "", colnames(exp_matrix))
    rownames(meta_data)<-meta_data$pbmc_sample_id
    filtered_gene_set_filtered<-filtered_gene_set%>% filter(AIFI_L3==celltype)%>% select(gene) %>%pull()
    
    res=deseq2_analysis(exp_matrix,
                     meta_data=meta_data,
                     filtered_gene_set=filtered_gene_set_filtered,
                     formula= ~  ageAtEnrollment+subject.biologicalSex+CMV,
                     comparisons=list(c("CMV", "Positive", "Negative")),
                     celltype=celltype)
    return(res)
},mc.cores=60)

In [24]:
res_all<-do.call(rbind,res_list)

In [25]:
write.csv(res_all,"Deseq2_Result_SF4.csv")

In [163]:
res_all_signi<-res_all %>% filter (padj<0.05,abs(log2FoldChange)>0.1)

In [164]:
table(res_all_signi$celltype)


                    Adaptive NK cell                CD27- effector B cell 
                                 239                                   86 
                            CD4 MAIT                             CD8 MAIT 
                                   3                                    2 
                                cDC1            GZMB- CD27+ EM CD4 T cell 
                                   1                                    2 
               GZMK- CD56dim NK cell            GZMK+ CD27+ EM CD8 T cell 
                                  19                                   25 
               GZMK+ CD56dim NK cell                  IL1B+ CD14 monocyte 
                                   3                                    1 
                ISG+ CD56dim NK cell               KLRB1+ memory CD8 Treg 
                                   3                                    1 
             KLRF1- effector Vd1 gdT     KLRF1- GZMB+ CD27- EM CD8 T cell 
                        

In [165]:
celltype="KLRF1- GZMB+ CD27- memory CD4 T cell"

In [166]:
    celltype_list <- lapply(df_list, function(df) { df <- df[, grep(celltype, names(df), fixed = TRUE),drop=FALSE] })
    exp_matrix<-do.call(cbind,celltype_list)
    colnames(exp_matrix)<-sub(":.*", "", colnames(exp_matrix))
    rownames(meta_data)<-meta_data$pbmc_sample_id
    filtered_gene_set_filtered<-filtered_gene_set%>% filter(AIFI_L3==celltype)%>% select(gene) %>%pull()

# Upload Files

In [8]:
input_uuid_1<-read.csv("/home//jupyter/BRI_Figures/Dataset/scRNA_BRI_Aggregated_Count_uuid.csv")
input_uuid_2<-read.csv("/home//jupyter/BRI_Figures/Dataset/scRNA_BRI_meta_data_uuid.csv")

In [9]:
input_uuid<-c(input_uuid_1$id,input_uuid_2$id)

In [16]:
uploadFiles(
  list('/home/jupyter/BRI_Figures/Figure2/Deseq2_Result_Y1D0.csv'),
  studySpaceId = 'de025812-5e73-4b3c-9c3b-6d0eac412f2a',
  title = paste0('Cert_Pro_Deseq2_Result_Y1D0-2024-05-17-QG'), fileTypes = list('csv'),
  inputFileIds = as.list(input_uuid)
)

You are trying to upload the following files:  /home/jupyter/BRI_Figures/Figure2/Deseq2_Result_Y1D0.csv



(y/n) y


In [17]:
uploadFiles(
  list('/home/jupyter/BRI_Figures/Figure2/Deseq2_Result_Y1D0_Y1D7.csv'),
  studySpaceId = 'de025812-5e73-4b3c-9c3b-6d0eac412f2a',
  title = paste0('Cert_Pro_Deseq2_Result_Y1D0_Y1D7-2024-05-17-QG'), fileTypes = list('csv'),
  inputFileIds = as.list(input_uuid)
)

You are trying to upload the following files:  /home/jupyter/BRI_Figures/Figure2/Deseq2_Result_Y1D0_Y1D7.csv



(y/n) Y
