In [2]:
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(stringr))
library(parallel)
library(reshape2)
library(hise)
source('/home//jupyter/BRI_Figures_Final_V1/helper_function/helper_function_IHA.r')


Attaching package: ‘reshape2’


The following object is masked from ‘package:tidyr’:

    smiths




# Read Mean Expression Data

In [35]:
meta_data=read.csv("/home/jupyter/BRI_Figures_Final_V1/Dataset/SF4_meta_data-2024-05-05.csv")

In [36]:
meta_data$ageAtEnrollment[meta_data$ageAtEnrollment == '89+'] <- '90'
meta_data$ageAtEnrollment<-as.numeric(meta_data$ageAtEnrollment)
meta_data<-meta_data[!is.na(meta_data$ageAtEnrollment),]
meta_data$Ages<-meta_data$ageAtEnrollment

In [37]:
file_list<-paste0("/home/jupyter/BRI_Figures_Final_V1/Dataset/scRNA/SF4/Average_LogNormalized_Expression/Average_LogNormalized_Expression_of_Celltypes_by_Sample_AIFI_L3/", meta_data$pbmc_sample_id, ".csv")

In [38]:
df_list<-read_pseudobulk_expression(file_list, mc_cores = 60)

[1] "Total reading time: 11.653 seconds"
[1] "The length of the list matches the length of the input path."


# Read Deseq2 Result

In [39]:
deseq2_result_Y1D0<-read.csv('/home/jupyter/BRI_Figures_Final_V1/Figure2/02_DEG/Deseq2_Result_Y1D0.csv')

In [40]:
# filter significant genes 
deseq2_result_Y1D0_AgeGroup_sig<-deseq2_result_Y1D0 %>% 
  filter(contrast == "cohort.cohortGuid") %>%
  filter(padj < 0.05, 
         abs(log2FoldChange) > 0.1)


# Filter Genes and CellTypes

In [41]:
df_degs_counts<-as.data.frame(table(deseq2_result_Y1D0_AgeGroup_sig$celltype,
                                    deseq2_result_Y1D0_AgeGroup_sig$Direction))

In [42]:
df_degs_counts_filtered<-df_degs_counts %>% filter(Var2=="HigherInBR2") %>% arrange(Var1)   %>% filter(Freq>20)

# Construct Composite Score

In [52]:
score_df_list <- mclapply(unique(df_degs_counts_filtered$Var1), function(celltype_single) {
    top_n <- 100
    Direction_of_DEG <- "HigherInBR2"
    
    selected_genes <- deseq2_result_Y1D0_AgeGroup_sig %>%
        filter(celltype == celltype_single, Direction == Direction_of_DEG) %>%
        arrange(padj, desc(abs(log2FoldChange))) %>%
        slice_head(n = top_n) %>%
        select(gene) %>%
        pull()
    
    long_format <- filter_genes_and_celltype(df_list, selected_genes, celltype_single, longformat = TRUE) %>%
        left_join(meta_data) %>%
        filter(!is.na(Mean_Expression)) %>%
        group_by(Gene) %>%
        mutate(Z_score_Mean_Expression = scale(Mean_Expression)) %>%
        ungroup()
    
    composite_score_df <- long_format %>%
        group_by(pbmc_sample_id) %>%
        summarise(
            composite_score_sum_mean = sum(Mean_Expression, na.rm = TRUE),
            composite_score_sum_scaled_mean = sum(Z_score_Mean_Expression, na.rm = TRUE)
        ) %>%
        left_join(meta_data) %>%
        mutate(celltype = celltype_single) %>%
        as.data.frame()
    
    return(composite_score_df)
}, mc.cores = 10)


In [53]:
composite_score_df_all<-do.call(rbind,score_df_list)

In [54]:
write.csv(composite_score_df_all,'SF4_CompositeScore.csv')

# Upload Files

In [3]:
input_uuid_meta_data=pull(read.csv("/home/jupyter/BRI_Figures_Final_V1/Dataset/scRNA_BRI_meta_data_uuid.csv")['id'])
input_uuid_pesudobulk=pull(read.csv("/home/jupyter/BRI_Figures_Final_V1/Dataset/scRNA_SF4_Average_LogNormalized_Expression_uuid.csv")['id'])


In [5]:
study_space_uuid <- 'de025812-5e73-4b3c-9c3b-6d0eac412f2a'
date_today <- Sys.Date()
working_dir <- getwd()
notebook_name <- basename("BRI_Figures_Final_V1/Figure2/04_CompositeScore/01A_Aging_CompositeScore_SF4.ipynb")  

title <- sprintf("Cert-Pro_IHA_Figures_Files_%s_from_%s/%s", date_today, working_dir, notebook_name)
title <- str_replace_all(title, c("/" = "-", "\\." = "_"))

title

In [7]:
uploadFiles(
  list('/home/jupyter/BRI_Figures_Final_V1/Figure2/04_CompositeScore/SF4_CompositeScore.csv'),
  studySpaceId = 'de025812-5e73-4b3c-9c3b-6d0eac412f2a',
  title = title, fileTypes = list('csv'),destination=title,
  inputFileIds = as.list(c(input_uuid_meta_data,input_uuid_pesudobulk))
)

[1] "Cannot determine the current notebook."
[1] "1) /home/jupyter/BRI_Figures_Final_V1/Figure2/04_CompositeScore/02A_Aging_CompositeScore_SF4.ipynb"
[1] "2) /home/jupyter/BRI_Figures_Final_V1/Figure2/04_CompositeScore/01A_Aging_CompositeScore_BRI.ipynb"
[1] "3) /home/jupyter/BRI_Figures_Final_V1/Figure2/02_DEG/02B_DEG_analysis.ipynb"


Please select (1-3)  1


You are trying to upload the following files:  /home/jupyter/BRI_Figures_Final_V1/Figure2/04_CompositeScore/SF4_CompositeScore.csv



(y/n) Y
