In [2]:
suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(patchwork))
suppressPackageStartupMessages(library(pheatmap))
suppressPackageStartupMessages(library(stringr))
library(hise)
library(plyr)
library(purrr)
library(vegan)
#library(arrow)
library(rstatix)
library(parallel)
library(ggpubr)
library(ComplexHeatmap)
library(compositions)
library(ggsci)
library(ggpubr)

ERROR: Error in library(compositions): there is no package called ‘compositions’


## Helper functions

In [3]:
cache_uuid_path <- function(uuid) {
    if(!dir.exists(paste0("cache/", uuid))) {
        hise_res <- cacheFiles(list(uuid))
    }
    
    cache_path <- paste0("cache/",uuid)
    cache_file <- list.files(cache_path, full.names = TRUE)
    
    cache_file
}

In [4]:
stored_file_df <- function(store_name) {
    ps_files <- listFilesInProjectStores(list(store_name))
    ps_files <- map(
        ps_files$files, 
        function(l) {
            l <- l[c("id", "name")]
            as.data.frame(l)
        }) %>%
      list_rbind()
    
    ps_files
}

## Identify files for use in HISE

In [5]:
search_id <- "fumur-jufir-vuzag"

Retrieve the list of files stored in our HISE project store

In [6]:
ps_files <- stored_file_df("cohorts")

Filter for files from the previous notebook using our search_id and the .tar extension

In [7]:
freq_files <- ps_files %>%
  filter(grepl(search_id, name))

In [8]:
freq_files

id,name
<chr>,<chr>
b04e3ccd-e437-41ae-97fe-63a0c0a24995,fumur-jufir-vuzag/diha_AIFI_L1_frequencies_2024-05-05.csv
6c82ee51-1898-4699-b382-59e619a05b82,fumur-jufir-vuzag/diha_AIFI_L2_frequencies_2024-05-05.csv
577902fe-094d-499e-86a1-e5fe67131fb1,fumur-jufir-vuzag/diha_AIFI_L3_frequencies_2024-05-05.csv


## Download files from HISE

In [9]:
file_paths <- map(freq_files$id, cache_uuid_path)
file_paths

# Load data

In [10]:
freq_data <- map(file_paths, read.csv)
names(freq_data) <- sub(".+diha_(.+)_freq.+", "\\1", file_paths)

Now we have a list of frequency data at each level:

In [11]:
# l1 <- freq_data[["AIFI_L1"]]
# head(l1)
# nrow(l1)

In [12]:
# l2 <- freq_data[["AIFI_L2"]]
# head(l2)
# nrow(l2)

In [13]:
l3 <- freq_data[["AIFI_L3"]]
head(l3)
nrow(l3)
dim(l3)

Unnamed: 0_level_0,cohort.cohortGuid,subject.subjectGuid,subject.biologicalSex,subject.cmv,subject.bmi,subject.race,subject.ethnicity,subject.birthYear,subject.ageAtFirstDraw,sample.sampleKitGuid,⋯,AIFI_L2,AIFI_L3,AIFI_L3_count,total_cells,scrna.lymphocyte_count,bc.lymphocyte_count,alc_ratio,AIFI_L3_frac_total,AIFI_L3_alc,AIFI_L3_clr
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<chr>,<int>,<int>,<chr>,⋯,<chr>,<chr>,<int>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>
1,BR1,BR1001,Female,Negative,23,Caucasian,Non-Hispanic origin,1987,32,KT00001,⋯,Effector B cell,CD27+ effector B cell,71,18231,13903,1337,0.0961663,0.0038944655,6.8278069,0.3324226
2,BR1,BR1001,Female,Negative,23,Caucasian,Non-Hispanic origin,1987,32,KT00001,⋯,Effector B cell,CD27- effector B cell,26,18231,13903,1337,0.0961663,0.0014261423,2.5003237,-0.6721608
3,BR1,BR1001,Female,Negative,23,Caucasian,Non-Hispanic origin,1987,32,KT00001,⋯,Memory B cell,Activated memory B cell,3,18231,13903,1337,0.0961663,0.0001645549,0.2884989,-2.831645
4,BR1,BR1001,Female,Negative,23,Caucasian,Non-Hispanic origin,1987,32,KT00001,⋯,Memory B cell,CD95 memory B cell,15,18231,13903,1337,0.0961663,0.0008227744,1.4424944,-1.2222071
5,BR1,BR1001,Female,Negative,23,Caucasian,Non-Hispanic origin,1987,32,KT00001,⋯,Memory B cell,Core memory B cell,329,18231,13903,1337,0.0961663,0.0180461851,31.6387111,1.8658004
6,BR1,BR1001,Female,Negative,23,Caucasian,Non-Hispanic origin,1987,32,KT00001,⋯,Memory B cell,Early memory B cell,10,18231,13903,1337,0.0961663,0.0005485163,0.961663,-1.6276722


In [14]:
all_df <- l3

In [15]:
rownames(all_df)<-all_df$cell_uuid

In [16]:
head(all_df)
colnames(all_df)

Unnamed: 0_level_0,cohort.cohortGuid,subject.subjectGuid,subject.biologicalSex,subject.cmv,subject.bmi,subject.race,subject.ethnicity,subject.birthYear,subject.ageAtFirstDraw,sample.sampleKitGuid,⋯,AIFI_L2,AIFI_L3,AIFI_L3_count,total_cells,scrna.lymphocyte_count,bc.lymphocyte_count,alc_ratio,AIFI_L3_frac_total,AIFI_L3_alc,AIFI_L3_clr
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<chr>,<int>,<int>,<chr>,⋯,<chr>,<chr>,<int>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>
1,BR1,BR1001,Female,Negative,23,Caucasian,Non-Hispanic origin,1987,32,KT00001,⋯,Effector B cell,CD27+ effector B cell,71,18231,13903,1337,0.0961663,0.0038944655,6.8278069,0.3324226
2,BR1,BR1001,Female,Negative,23,Caucasian,Non-Hispanic origin,1987,32,KT00001,⋯,Effector B cell,CD27- effector B cell,26,18231,13903,1337,0.0961663,0.0014261423,2.5003237,-0.6721608
3,BR1,BR1001,Female,Negative,23,Caucasian,Non-Hispanic origin,1987,32,KT00001,⋯,Memory B cell,Activated memory B cell,3,18231,13903,1337,0.0961663,0.0001645549,0.2884989,-2.831645
4,BR1,BR1001,Female,Negative,23,Caucasian,Non-Hispanic origin,1987,32,KT00001,⋯,Memory B cell,CD95 memory B cell,15,18231,13903,1337,0.0961663,0.0008227744,1.4424944,-1.2222071
5,BR1,BR1001,Female,Negative,23,Caucasian,Non-Hispanic origin,1987,32,KT00001,⋯,Memory B cell,Core memory B cell,329,18231,13903,1337,0.0961663,0.0180461851,31.6387111,1.8658004
6,BR1,BR1001,Female,Negative,23,Caucasian,Non-Hispanic origin,1987,32,KT00001,⋯,Memory B cell,Early memory B cell,10,18231,13903,1337,0.0961663,0.0005485163,0.961663,-1.6276722


In [17]:
all_df$facet_combined <- paste( all_df$subject.biologicalSex,all_df$cohort.cohortGuid, sep = "_")
all_df$facet_combined<- factor(all_df$facet_combined)
unique(all_df$facet_combined)

In [18]:
all_df <- all_df %>% filter(sample.visitName == "Flu Year 1 Day 0")

# Final Loop code

## AGE and SEX grouping

In [19]:
# BR1_Female, BR1_Male, BR2_Female, BR2_Male
#my_comparisons <- list(c("BR1_Female", "BR2_Female"), c("BR1_Female", "BR1_Male"), c("BR1_Male", "BR2_Male"), c("BR2_Female", "BR2_Male") )
my_comparisons <- list(c("Female_BR1", "Female_BR2"), c("Male_BR1", "Male_BR2"), c("Female_BR1", "Male_BR1"), c("Female_BR2", "Male_BR2"))

In [20]:
all_df %>% head()

Unnamed: 0_level_0,cohort.cohortGuid,subject.subjectGuid,subject.biologicalSex,subject.cmv,subject.bmi,subject.race,subject.ethnicity,subject.birthYear,subject.ageAtFirstDraw,sample.sampleKitGuid,⋯,AIFI_L3,AIFI_L3_count,total_cells,scrna.lymphocyte_count,bc.lymphocyte_count,alc_ratio,AIFI_L3_frac_total,AIFI_L3_alc,AIFI_L3_clr,facet_combined
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<chr>,<int>,<int>,<chr>,⋯,<chr>,<int>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<fct>
1,BR1,BR1001,Female,Negative,23,Caucasian,Non-Hispanic origin,1987,32,KT00001,⋯,CD27+ effector B cell,71,18231,13903,1337,0.0961663,0.0038944655,6.8278069,0.3324226,Female_BR1
2,BR1,BR1001,Female,Negative,23,Caucasian,Non-Hispanic origin,1987,32,KT00001,⋯,CD27- effector B cell,26,18231,13903,1337,0.0961663,0.0014261423,2.5003237,-0.6721608,Female_BR1
3,BR1,BR1001,Female,Negative,23,Caucasian,Non-Hispanic origin,1987,32,KT00001,⋯,Activated memory B cell,3,18231,13903,1337,0.0961663,0.0001645549,0.2884989,-2.831645,Female_BR1
4,BR1,BR1001,Female,Negative,23,Caucasian,Non-Hispanic origin,1987,32,KT00001,⋯,CD95 memory B cell,15,18231,13903,1337,0.0961663,0.0008227744,1.4424944,-1.2222071,Female_BR1
5,BR1,BR1001,Female,Negative,23,Caucasian,Non-Hispanic origin,1987,32,KT00001,⋯,Core memory B cell,329,18231,13903,1337,0.0961663,0.0180461851,31.6387111,1.8658004,Female_BR1
6,BR1,BR1001,Female,Negative,23,Caucasian,Non-Hispanic origin,1987,32,KT00001,⋯,Early memory B cell,10,18231,13903,1337,0.0961663,0.0005485163,0.961663,-1.6276722,Female_BR1


In [22]:
##For CLR USE GGPLOt2

my_comparisons <- list(c("Female_BR1", "Female_BR2"), c("Male_BR1", "Male_BR2"), c("Female_BR1", "Male_BR1"), c("Female_BR2", "Male_BR2"))

for(cell in unique(all_df$AIFI_L3)){
    
    print(cell)
    cell_df <- all_df %>% dplyr::filter(AIFI_L3 == cell)
    
    stat_test <- wilcox_test(AIFI_L3_clr ~ facet_combined, data = cell_df, comparisons = my_comparisons, paired= FALSE)%>% 
           adjust_pvalue(method = 'BH') %>% 
           add_xy_position(x = "facet_combined", dodge = 0.8)
    
    outfile <- paste("/home/jupyter/IH-A-Aging-Analysis-Notebooks_old/Mansi_Notebooks/scRNA/CertPro_analysis/scripts/Figure_Notebooks/Loop_plots/CLR_Comparisons_cohort_&_Sex/CLR_Comparisons_cohort_&_Sex_", cell, ".pdf", sep="")
    options(repr.plot.width = 18, repr.plot.height = 10)
    p3 <- ggplot(cell_df, aes(x = facet_combined, y = AIFI_L3_clr, color = subject.biologicalSex)) +
      geom_boxplot(width = 0.5, outlier.shape = NA) +
      geom_jitter(width =0.1) +
      ylab('CLR') + 
      xlab(cell) +
      scale_color_manual(values = c("#9970ab", "#5aae61")) +
      theme(axis.text.x = element_text(size = 12, angle = 90 ),     # Adjust font size of x-axis text
            axis.text.y = element_text(size = 12),     # Adjust font size of y-axis text
            axis.title = element_text(size = 14),
            panel.background = element_rect(fill = "white"),
            axis.line = element_line(color = "black"),  # Set axis line color to black,  # Set major grid line color to black
            panel.grid.minor = element_blank(),
            legend.position = "top",  # Set legend position to top
            #legend.title = element_blank()
           ) +
      geom_boxplot(position = position_dodge(width = 1))+
      stat_pvalue_manual(stat_test, label = "padj = {p.adj.signif}", tip.length = 0) +
      scale_y_continuous(expand = expansion(mult = c(0, 0.1)))
      pdf(file = outfile, width = 7, height = 11)
      print(p3)
      dev.off()

    
    }





[1] "CD27+ effector B cell"
[1] "CD27- effector B cell"
[1] "Activated memory B cell"
[1] "CD95 memory B cell"
[1] "Core memory B cell"
[1] "Early memory B cell"
[1] "Type 2 polarized memory B cell"
[1] "Core naive B cell"
[1] "ISG+ naive B cell"
[1] "Plasma cell"
[1] "Transitional B cell"
[1] "ASDC"
[1] "cDC1"
[1] "CD14+ cDC2"
[1] "HLA-DRhi cDC2"
[1] "ISG+ cDC2"
[1] "pDC"
[1] "Erythrocyte"
[1] "ILC"
[1] "Core CD14 monocyte"
[1] "IL1B+ CD14 monocyte"
[1] "ISG+ CD14 monocyte"
[1] "C1Q+ CD16 monocyte"
[1] "Core CD16 monocyte"
[1] "ISG+ CD16 monocyte"
[1] "Intermediate monocyte"
[1] "CD56bright NK cell"
[1] "Adaptive NK cell"
[1] "GZMK+ CD56dim NK cell"
[1] "GZMK- CD56dim NK cell"
[1] "ISG+ CD56dim NK cell"
[1] "Proliferating NK cell"
[1] "Platelet"
[1] "BaEoMaP cell"
[1] "CLP cell"
[1] "CMP cell"
[1] "CD8aa"
[1] "DN T cell"
[1] "CD4 MAIT"
[1] "CD8 MAIT"
[1] "ISG+ MAIT"
[1] "CM CD4 T cell"
[1] "GZMB- CD27+ EM CD4 T cell"
[1] "GZMB- CD27- EM CD4 T cell"
[1] "ISG+ memory CD4 T cell"
[1] "KL

In [26]:
# Output stat table

my_comparisons <- list(c("Female_BR1", "Female_BR2"), c("Male_BR1", "Male_BR2"), c("Female_BR1", "Male_BR1"), c("Female_BR2", "Male_BR2"))

for(cell in unique(all_df$AIFI_L3)){
    
    print(cell)
    cell_df <- all_df %>% dplyr::filter(AIFI_L3 == cell)
    
    # stat_test <- wilcox_test(AIFI_L3_clr ~ facet_combined, data = cell_df, comparisons = my_comparisons, paired= FALSE)%>% 
    #        adjust_pvalue(method = 'BH') %>% 
    #        add_xy_position(x = "facet_combined", dodge = 0.8)
    
     stat_test <- wilcox_test(AIFI_L3_clr ~ facet_combined, data = cell_df, comparisons = my_comparisons, paired= FALSE)%>% 
           adjust_pvalue(method = 'BH') 
  
    
    # Define the output file path for the CSV
  outfile_csv <- paste("/home/jupyter/IH-A-Aging-Analysis-Notebooks_old/Mansi_Notebooks/scRNA/CertPro_analysis/scripts/Figure_Notebooks/Loop_plots/CLR_Comparisons_cohort_&_Sex_stat_tables/CLR_Comparisons_cohort_&_Sex_stat_table_", cell, ".csv", sep="")
  
  # Write the data frame to a CSV file
  write.csv(stat_test, outfile_csv, row.names = FALSE)
    
    }



[1] "CD27+ effector B cell"
[1] "CD27- effector B cell"
[1] "Activated memory B cell"
[1] "CD95 memory B cell"
[1] "Core memory B cell"
[1] "Early memory B cell"
[1] "Type 2 polarized memory B cell"
[1] "Core naive B cell"
[1] "ISG+ naive B cell"
[1] "Plasma cell"
[1] "Transitional B cell"
[1] "ASDC"
[1] "cDC1"
[1] "CD14+ cDC2"
[1] "HLA-DRhi cDC2"
[1] "ISG+ cDC2"
[1] "pDC"
[1] "Erythrocyte"
[1] "ILC"
[1] "Core CD14 monocyte"
[1] "IL1B+ CD14 monocyte"
[1] "ISG+ CD14 monocyte"
[1] "C1Q+ CD16 monocyte"
[1] "Core CD16 monocyte"
[1] "ISG+ CD16 monocyte"
[1] "Intermediate monocyte"
[1] "CD56bright NK cell"
[1] "Adaptive NK cell"
[1] "GZMK+ CD56dim NK cell"
[1] "GZMK- CD56dim NK cell"
[1] "ISG+ CD56dim NK cell"
[1] "Proliferating NK cell"
[1] "Platelet"
[1] "BaEoMaP cell"
[1] "CLP cell"
[1] "CMP cell"
[1] "CD8aa"
[1] "DN T cell"
[1] "CD4 MAIT"
[1] "CD8 MAIT"
[1] "ISG+ MAIT"
[1] "CM CD4 T cell"
[1] "GZMB- CD27+ EM CD4 T cell"
[1] "GZMB- CD27- EM CD4 T cell"
[1] "ISG+ memory CD4 T cell"
[1] "KL

In [25]:
##For percentage USE GGPLOt2

my_comparisons <- list(c("BR1_Female", "BR2_Female"), c("BR1_Female", "BR1_Male"), c("BR1_Male", "BR2_Male"), c("BR2_Female", "BR2_Male") )

for(cell in unique(result$AIFI_L3)){
    
    print(cell)
    cell_df <- result %>% dplyr::filter(AIFI_L3 == cell)
    
    stat_test <- wilcox_test(percentage ~ facet_combined, data = cell_df, comparisons = my_comparisons, paired= FALSE)%>% 
           adjust_pvalue(method = 'BH') %>% 
           add_xy_position(x = "facet_combined", dodge = 0.8)
        
    outfile <- paste("/home/jupyter/Myeloid_cells/Plots/Percentage_Comparisons_cohort_&_Sex/Percentage_Comparisons_cohort_&_Sex_", cell, ".pdf", sep="")
    options(repr.plot.width = 18, repr.plot.height = 10)
    p3 <- ggplot(cell_df, aes(x = facet_combined, y = percentage, color = subject.biologicalSex)) +
      geom_boxplot(width = 0.5, outlier.shape = NA) +
      geom_jitter(width =0.1) +
      ylab('percentage') + 
      xlab(cell) +
      scale_color_manual(values = c( "red","blue")) +
      theme(axis.text.x = element_text(size = 12, angle = 90 ),     # Adjust font size of x-axis text
            axis.text.y = element_text(size = 12),     # Adjust font size of y-axis text
            axis.title = element_text(size = 14),
            panel.background = element_rect(fill = "white"),
            axis.line = element_line(color = "black"),  # Set axis line color to black,  # Set major grid line color to black
            panel.grid.minor = element_blank(),
            legend.position = "top",  # Set legend position to top
            #legend.title = element_blank()
           ) +
      geom_boxplot(position = position_dodge(width = 1))+
      stat_pvalue_manual(stat_test, label = "p.adj.signif", tip.length = 0) +
      scale_y_continuous(expand = expansion(mult = c(0, 0.1)))
      pdf(file = outfile, width = 7, height = 11)
      print(p3)
      dev.off()

    
    }





ERROR: Error in eval(expr, envir, enclos): object 'result' not found
