In [1]:
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(stringr))
library(parallel)
library(reshape2)
library(hise)
source('/home//jupyter/BRI_Figures_Final_V2/helper_function/helper_function_IHA.r')


Attaching package: ‘reshape2’


The following object is masked from ‘package:tidyr’:

    smiths




# Read Mean Expression Data

In [2]:
meta_data=read.csv("/home/jupyter/BRI_Figures_Final_V2/Dataset/SF4_meta_data-2024-05-05.csv")

In [3]:
meta_data$ageAtEnrollment[meta_data$ageAtEnrollment == '89+'] <- '90'
meta_data$ageAtEnrollment<-as.numeric(meta_data$ageAtEnrollment)
meta_data<-meta_data[!is.na(meta_data$ageAtEnrollment),]
meta_data$Ages<-meta_data$ageAtEnrollment

In [4]:
file_list<-paste0("/home/jupyter/BRI_Figures_Final_V2/Dataset/scRNA/SF4/Average_LogNormalized_Expression/Average_LogNormalized_Expression_of_Celltypes_by_Sample_AIFI_L3/", meta_data$pbmc_sample_id, ".csv")

In [5]:
df_list<-read_pseudobulk_expression(file_list, mc_cores = 60)

[1] "Total reading time: 19.604 seconds"
[1] "The length of the list matches the length of the input path."


# Read Deseq2 Result

In [6]:
deseq2_result_Y1D0<-read.csv('/home/jupyter/BRI_Figures_Final_V2/Figure2/02_DEG/Deseq2_Result_Y1D0.csv')

In [7]:
# filter significant genes 
deseq2_result_Y1D0_AgeGroup_sig<-deseq2_result_Y1D0 %>% 
  filter(contrast == "cohort.cohortGuid") %>%
  filter(padj < 0.05, 
         abs(log2FoldChange) > 0.1)


# Filter Genes and CellTypes

In [8]:
df_degs_counts<-as.data.frame(table(deseq2_result_Y1D0_AgeGroup_sig$celltype,
                                    deseq2_result_Y1D0_AgeGroup_sig$Direction))

In [9]:
df_degs_counts_filtered<-df_degs_counts %>% filter(Var2=="HigherInBR2") %>% arrange(Var1)   %>% filter(Freq>20)

# Construct Composite Score

In [10]:
score_df_list <- mclapply(unique(df_degs_counts_filtered$Var1), function(celltype_single) {
    top_n <- 100
    Direction_of_DEG <- "HigherInBR2"
    
    selected_genes <- deseq2_result_Y1D0_AgeGroup_sig %>%
        filter(celltype == celltype_single, Direction == Direction_of_DEG) %>%
        arrange(padj, desc(abs(log2FoldChange))) %>%
        slice_head(n = top_n) %>%
        select(gene) %>%
        pull()
    
    long_format <- filter_genes_and_celltype(df_list, selected_genes, celltype_single, longformat = TRUE) %>%
        left_join(meta_data) %>%
        filter(!is.na(Mean_Expression)) %>%
        group_by(Gene) %>%
        mutate(Z_score_Mean_Expression = scale(Mean_Expression)) %>%
        ungroup()
    
    composite_score_df <- long_format %>%
        group_by(pbmc_sample_id) %>%
        summarise(
            composite_score_sum_mean = sum(Mean_Expression, na.rm = TRUE),
            composite_score_sum_scaled_mean = sum(Z_score_Mean_Expression, na.rm = TRUE)
        ) %>%
        left_join(meta_data) %>%
        mutate(celltype = celltype_single) %>%
        as.data.frame()
    
    return(composite_score_df)
}, mc.cores = 10)


In [11]:
composite_score_df_all<-do.call(rbind,score_df_list)

In [12]:
write.csv(composite_score_df_all,'SF4_CompositeScore.csv')