In [None]:
suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(patchwork))
suppressPackageStartupMessages(library(pheatmap))
suppressPackageStartupMessages(library(stringr))
library(hise)
library(plyr)
library(purrr)
library(vegan)
#library(arrow)
library(rstatix)
library(parallel)
library(ggpubr)
library(ComplexHeatmap)
library(compositions)
library(ggsci)

In [None]:
colors = c('#FDC086','#FFFF99','#F0027F','#BF5B17','#666666',
          '#1B9E77','#D95F02','#7570B3','#E7298A','#66A61E','#E6AB02','#A6761D','#666666',
          '#A6CEE3','#1F78B4','#B2DF8A','#33A02C','#FB9A99','#E31A1C','#FDBF6F','#FF7F00',
          '#CAB2D6','#6A3D9A','#FFFF99','#B15928','#FBB4AE','#B3CDE3','#CCEBC5','#DECBE4',
          '#FED9A6','#FFFFCC','#E5D8BD','#FDDAEC','#F2F2F2','#B3E2CD','#FDCDAC','#CBD5E8',
          '#F4CAE4','#E6F5C9','#FFF2AE','#F1E2CC','#CCCCCC','#E41A1C','#377EB8','#4DAF4A',
          '#984EA3','#FF7F00','#FFFF33','#A65628','#F781BF','#999999','#66C2A5','#FC8D62',
          '#8DA0CB','#E78AC3','#A6D854','#FFD92F','#E5C494','#B3B3B3','#8DD3C7','#FFFFB3',
          '#BEBADA','#FB8072','#80B1D3','#FDB462','#B3DE69','#FCCDE5','#D9D9D9','#BC80BD',
          '#CCEBC5','#FFED6F')

# Reading the file

In [None]:
# Read the CSV file into a data frame (No UP1)
flu_df <- read.csv('/home/jupyter/Myeloid_cells/files/AIFI_fluYr1Yr2_raw.csv')


In [None]:
dict <- read.csv('/home/jupyter/Myeloid_cells/files/Dictionary_AIFI_Reference.csv')

In [None]:
select_samples <- read.csv("/home/jupyter/Myeloid_cells/files/formansi_filtered_samples_fluspecific_bcells.csv")

In [None]:
head(select_samples)
colnames(select_samples)


In [None]:
samps <- unique(select_samples$subject.subjectGuid)
length(samps)

In [None]:
# Filter flu_df to include only samples that match the identified subjects
filtered_flu_df <- flu_df[flu_df$subject.subjectGuid %in% samps, ]

In [None]:
head(filtered_flu_df)
length(unique(filtered_flu_df$subject.subjectGuid))

In [None]:
unique(filtered_flu_df$cohort.cohortGuid)
#unique(flu_df$AIFI_L3.5)
#unique(flu_df$AIFI_L1)

In [None]:
nrow(filtered_flu_df)

In [None]:
rownames(filtered_flu_df)<-filtered_flu_df$cell_uuid

In [None]:
#length(unique(myeloid_df$subject.subjectGuid))

In [None]:
#unique(myeloid_df$AIFI_L1)

In [None]:
colnames(flu_df)

# CLR Transform


In [None]:
clr_transform <- function(x) {
  if (length(x) == 0) {
    return(NA)  # return NA for empty vectors
  }
  geom_mean <- exp(mean(log(x)))
  return(log(x / geom_mean))
}

# Comparsion within total PBMCs (Percentage and  CLR)

In [None]:
## Percentage of total

result <- flu_df %>%
  dplyr::group_by(AIFI_L3, subject.subjectGuid, sample.visitName ) %>%               
  dplyr::summarise(count = n()) %>%                  # Count occurrences
  dplyr::group_by(subject.subjectGuid) %>%                       
  dplyr::mutate(percentage = (count / sum(count)) * 100) %>% 
  dplyr::group_by(subject.subjectGuid) %>%     
  dplyr::mutate(percentage_clr = clr_transform((percentage))) %>% # Group again by subjectGuid for CLR# Calculate percentage
  dplyr::ungroup()

head(result)
#sum(result$subject.subjectGuid)

In [None]:
result %>% filter(subject.subjectGuid == "BR1001") %>% 
      select(percentage) %>%
      pull() %>% 
      sum()

In [None]:
result<-left_join(result,unique(flu_df[c('cohort.cohortGuid','subject.subjectGuid','subject.biologicalSex', 'CMV.IgG.Serology.Result.Interpretation')]),by=c('subject.subjectGuid'))

In [None]:
head(result)


# Global Plot

In [None]:
result_yr1 <- result %>% filter(sample.visitName == "Flu Year 1 Day 0" | sample.visitName == "Flu Year 1 Day 7")

In [None]:
result_yr2 <- result %>% filter(sample.visitName == "Flu Year 2 Day 0" | sample.visitName == "Flu Year 2 Day 7")

In [None]:
dim(result_yr1)
head(result_yr1)

In [None]:
len(result_yr1$sample.visitName)

In [None]:
# Percentage comparing Age WITHOUT CMV seperation

#result_cohort <- result[result$CMV.IgG.Serology.Result.Interpretation == 'Negative', ]
#result_cohort <- result[result$CMV.IgG.Serology.Result.Interpretation == 'Positive', ]

stat_test<- result_yr1 %>%
  select(percentage,percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage_clr ~ sample.visitName) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance()
#stat_test

significant_cell_types <- stat_test %>%
  filter(p.adj < 0.05) %>%
  select(AIFI_L3, .y., p.adj, p.adj.signif) %>%
  unique()

print(significant_cell_types)

# Create the plot
#options(repr.plot.width = 35, repr.plot.height = 12)
p4 <- ggplot(result_yr1, aes(x = reorder(AIFI_L3, desc(percentage)) , y = percentage, fill = sample.visitName)) +
  geom_boxplot(width = .5, outlier.shape = '.') +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  geom_pwc(
    aes(group = sample.visitName),
    tip.length = 0, p.adjust.method = "BH",
    method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =10  # or "panel"
  ) +
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(text = element_text(size=30),)
     # axis.text.x = element_text(size = 18, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
        #axis.text.y = element_text(size = 18, color = "black"),
        #axis.title.x = element_text(size = 18, color = "black"),
        #axis.title.y = element_text(size = 18, color = "black"),
       #legend.text = element_text(size = 13, color = "black"),  # Adjust the font size of legend text
        #legend.title = element_text(size = 15, color = "black"))

# Display the plot
#print(p4)


#Percentage CLR comparing Age WITHOUT CMV seperation
p5 <- ggplot(result_yr1, aes(x = reorder(AIFI_L3, desc(percentage_clr)) , y = percentage_clr, fill = sample.visitName)) +
  geom_boxplot(width = .5, outlier.shape = '.') +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  geom_pwc(
    aes(group = sample.visitName),
    tip.length = 0, p.adjust.method = "BH",
    method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =10  # or "panel"
  ) +
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(text = element_text(size=30, color = "black"))
     # axis.text.x = element_text(size = 18, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
        #axis.text.y = element_text(size = 18, color = "black"),
        #axis.title.x = element_text(size = 18, color = "black"),
        #axis.title.y = element_text(size = 18, color = "black"),
       #legend.text = element_text(size = 13, color = "black"),  # Adjust the font size of legend text
        #legend.title = element_text(size = 15, color = "black"))


#print(p5)

#Combine Plot
library(cowplot)

options(repr.plot.width = 50, repr.plot.height = 30)
# Combine two plots vertically
combined_plot <- plot_grid(p4, p5, labels = c("Frequency_Percentage", "Frequency_Percentage_CLR"), ncol =1)

combined_plot


In [None]:
# Percentage comparing Age WITHOUT CMV seperation

#result_cohort <- result[result$CMV.IgG.Serology.Result.Interpretation == 'Negative', ]
#result_cohort <- result[result$CMV.IgG.Serology.Result.Interpretation == 'Positive', ]

stat_test<- result_yr2 %>%
  select(percentage,percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage_clr ~ sample.visitName) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance()
#stat_test

significant_cell_types <- stat_test %>%
  filter(p.adj < 0.05) %>%
  select(AIFI_L3, .y., p.adj, p.adj.signif) %>%
  unique()

print(significant_cell_types)

# Create the plot
#options(repr.plot.width = 35, repr.plot.height = 12)
p4 <- ggplot(result_yr2, aes(x = reorder(AIFI_L3, desc(percentage)) , y = percentage, fill = sample.visitName)) +
  geom_boxplot(width = .5, outlier.shape = '.') +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  geom_pwc(
    aes(group = sample.visitName),
    tip.length = 0, p.adjust.method = "BH",
    method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =10  # or "panel"
  ) +
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(text = element_text(size=30),)
     # axis.text.x = element_text(size = 18, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
        #axis.text.y = element_text(size = 18, color = "black"),
        #axis.title.x = element_text(size = 18, color = "black"),
        #axis.title.y = element_text(size = 18, color = "black"),
       #legend.text = element_text(size = 13, color = "black"),  # Adjust the font size of legend text
        #legend.title = element_text(size = 15, color = "black"))

# Display the plot
#print(p4)


#Percentage CLR comparing Age WITHOUT CMV seperation
p5 <- ggplot(result_yr2, aes(x = reorder(AIFI_L3, desc(percentage_clr)) , y = percentage_clr, fill = sample.visitName)) +
  geom_boxplot(width = .5, outlier.shape = '.') +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  geom_pwc(
    aes(group = sample.visitName),
    tip.length = 0, p.adjust.method = "BH",
    method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =10  # or "panel"
  ) +
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(text = element_text(size=30, color = "black"))
     # axis.text.x = element_text(size = 18, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
        #axis.text.y = element_text(size = 18, color = "black"),
        #axis.title.x = element_text(size = 18, color = "black"),
        #axis.title.y = element_text(size = 18, color = "black"),
       #legend.text = element_text(size = 13, color = "black"),  # Adjust the font size of legend text
        #legend.title = element_text(size = 15, color = "black"))


#print(p5)

#Combine Plot
library(cowplot)

options(repr.plot.width = 50, repr.plot.height = 30)
# Combine two plots vertically
combined_plot <- plot_grid(p4, p5, labels = c("Frequency_Percentage", "Frequency_Percentage_CLR"), ncol =1)

combined_plot


## Wilcoxon signed rank test - Fix for Flu and Null Vax comparisons

In [None]:

result <- filtered_flu_df %>%
  dplyr::group_by(AIFI_L3, subject.subjectGuid, sample.visitName ) %>%               
  dplyr::summarise(count = n()) %>%                  # Count occurrences
  dplyr::group_by(subject.subjectGuid, sample.visitName) %>%                       
  dplyr::mutate(percentage = (count / sum(count)) * 100) %>% 
  dplyr::group_by(subject.subjectGuid, sample.visitName) %>%     
  dplyr::mutate(percentage_clr = clr_transform((percentage))) %>% # Group again by subjectGuid for CLR# Calculate percentage
  dplyr::ungroup()

head(result)



In [None]:
length(result$AIFI_L3)

In [None]:
result<-left_join(result,unique(filtered_flu_df[c('cohort.cohortGuid','subject.subjectGuid','subject.biologicalSex', 'CMV.IgG.Serology.Result.Interpretation')]),by=c('subject.subjectGuid'))

In [None]:
result

In [None]:
result_yr1 <- result %>% filter(sample.visitName == "Flu Year 1 Day 0" | sample.visitName == "Flu Year 1 Day 7")

In [None]:
result_yr1
#length(unique(result_yr1$subject.subjectGuid))

In [None]:
# Check the structure of your data frame
str(result_yr1)

# Check the lengths of percentage and visitName within each group of AIFI_L3
result_yr1 %>%
  group_by(AIFI_L3) %>%
  summarize(
    percentage_length = length(percentage),
    visitName_length = length(sample.visitName)
  )


In [None]:
filtered_data <- result_yr1 %>%
  group_by(AIFI_L3,subject.subjectGuid) %>%
  filter(all(c("Flu Year 1 Day 0", "Flu Year 1 Day 7") %in% sample.visitName))
filtered_data

In [None]:
#Samir sanity check (code review)

stat_test<- filtered_data %>%
  select(percentage,percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage_clr ~ sample.visitName, paired= TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance()

significant_cell_types <- stat_test %>%
  filter(p.adj < 0.05) %>%
  select(AIFI_L3, .y., p, p.adj, p.adj.signif) %>%
  unique()

print(significant_cell_types)


filtered_data = data.table::data.table(filtered_data)
plasma = filtered_data[AIFI_L3=='Plasma cell']

wilcox.test(plasma$percentage_clr ~ plasma$sample.visitName, paired=T)

In [None]:
stat_test

In [None]:
# Check unique combinations of AIFI_L3 and sample.visitName
filtered_data %>%
  group_by(AIFI_L3, sample.visitName) %>%
  dplyr::summarize(n = n())


In [None]:
stat_test

### Global Plot for Wilcox signed rank test (paired)

In [None]:
head(filtered_data)

In [None]:
#stat test - percentage
stat_test_1<- filtered_data %>%
  select(percentage,percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage ~ sample.visitName, paired= TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>%
  add_xy_position(x = "sample.visitName", dodge = 0.8)

In [None]:
head(stat_test_1)

In [None]:
stat_test_1_test <- stat_test_1 %>% add_xy_position(x = "percentage")
head(stat_test_1_test)

In [None]:
filtered_data %>% head()


In [None]:
# Convert sample.visitName to a factor with a specified order (optional)
filtered_data$sample.visitName <- factor(filtered_data$sample.visitName, levels = unique(filtered_data$sample.visitName))


In [None]:
#stat test - percentage
stat_test_1<- filtered_data %>%
  select(percentage,percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage ~ sample.visitName, paired= TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>% add_xy_position(x= "AIFI_L3", fun = "max", comparisons = list(c("Flu Year 1 Day 0", "Flu Year 1 Day 7"))) 

# significant_cell_types_1 <- stat_test_1 %>%
#   filter(p.adj < 0.05) %>%
#   select(AIFI_L3, .y., p.adj, p.adj.signif) %>%
#   unique()

# print(significant_cell_types_1)

# Find the maximum y-value for positioning
max_y <- max(filtered_data$percentage, na.rm = TRUE)
transformed_max_y <- log10(max_y)
buffer <- 0.2  # Adjust the buffer as needed

#stat_pvalue_manual(stat_test_1, label = "p.adj.signif", tip.length = 0, 
                    # y.position = (transformed_max_y + buffer), size = 5)

# Create the plot
#options(repr.plot.width = 35, repr.plot.height = 12)
p4 <- ggplot(filtered_data, aes(x = AIFI_L3 , y = percentage, color = sample.visitName )) +
  geom_boxplot(width = 0.5, outlier.shape = '.', position = position_dodge(width = 0.75), size=1) +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  stat_pvalue_manual(stat_test_1, label = "p.adj.signif", tip.length = 0, y.position = (transformed_max_y + buffer), label.size=12) +
  #scale_y_continuous(trans = 'log10', labels = scales::math_format(10^.x)) +
  scale_y_log10(breaks = c(0.00001,0.0001,0.001,0.01,0.1,1,10,100))+
  scale_color_manual(values = c("Flu Year 1 Day 0" = "#1b9e77", "Flu Year 1 Day 7" = "#d95f02")) + 
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
# scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(text = element_text(size=45, color="black"), 
      axis.text.x = element_text(color = "black"), 
      axis.text.y = element_text(color = "black"), 
      axis.title.x = element_text(color = "black"), 
      axis.title.y = element_text(color = "black"), 
      legend.text = element_text(color = "black"), 
      legend.title = element_text(color = "black"))+
    ggtitle("Percentage") + theme(plot.title = element_text(size = 35, hjust = 0.5, face = "bold"))
     

# Display the plot
#print(p4)


#stat test - CLR
stat_test_2<- filtered_data %>%
  select(percentage,percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage_clr ~ sample.visitName, paired= TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>% add_xy_position(x= "AIFI_L3", fun = "max", comparisons = list(c("Flu Year 1 Day 0", "Flu Year 1 Day 7"))) 

# significant_cell_types_2 <- stat_test_2 %>%
#   filter(p.adj < 0.05) %>%
#   select(AIFI_L3, .y., p.adj, p.adj.signif) %>%
#   unique()

#print(significant_cell_types_2)


# Create the plot
#options(repr.plot.width = 35, repr.plot.height = 12)
p5 <- ggplot(filtered_data, aes(x = AIFI_L3 , y = percentage_clr, color = sample.visitName )) +
  geom_boxplot(width = 0.5, outlier.shape = '.', position = position_dodge(width = 0.75), size=1) +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  stat_pvalue_manual(stat_test_2, label = "p.adj.signif", tip.length=0, y.position = 10, label.size = 12)+
  scale_color_manual(values = c("Flu Year 1 Day 0" = "#1b9e77", "Flu Year 1 Day 7" = "#d95f02")) + # Adjust colors as needed +
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(text = element_text(size=45, color="black"), 
      axis.text.x = element_text(color = "black"), 
      axis.text.y = element_text(color = "black"), 
      axis.title.x = element_text(color = "black"), 
      axis.title.y = element_text(color = "black"), 
      legend.text = element_text(color = "black"), 
      legend.title = element_text(color = "black"))+
     ggtitle("CLR") + theme(plot.title = element_text(size = 35, hjust = 0.5, face = "bold"))

# Display the plot
#print(p2)

library(cowplot)
options(repr.plot.width = 60, repr.plot.height = 40)
# Combine two plots vertically
#combined_plot <- plot_grid(p1, p2, labels = c("CMV_neg", "CMV_pos"), ncol =1, label_size = 20, vjust = 0)
#combined_plot <- plot_grid(p1, p2, ncol=1,  nrow = 2, rel_heights = c(1, 2))

stacked_plots <- plot_grid(p4 + theme(plot.margin = margin(b = 20)), p5, nrow = 2, rel_heights = c(1, 1))

# Print the stacked plots
stacked_plots


# # Display the plot
# print(p4)


# #Combine Plot
# library(cowplot)

# options(repr.plot.width = 50, repr.plot.height = 30)
# # Combine two plots vertically
# combined_plot <- plot_grid(p4, p5, labels = c("Frequency_Percentage", "Frequency_Percentage_CLR"), ncol =1)

# combined_plot


In [None]:
# Box plot facetted by "AIFI_L3"
p <- ggpaired(filtered_data, x = "sample.visitName", y = "percentage",
          color = "sample.visitName", palette = "jco", 
          line.color = "gray", line.size = 0.4,
          facet.by = "AIFI_L3", short.panel.labs = FALSE) + geom_line(aes(group = subject.subjectGuid))
# Use only p.format as label. Remove method name.
p + stat_compare_means(label = "p.signif", paired = TRUE)

In [None]:
head(filtered_data)

### Global facet plots

In [None]:
# WORKS WELL!!!!! Perform Wilcoxon signed-rank test - for percentage
# Y-scale : log10(percentage)
stat_test_1 <- filtered_data %>%
  select(percentage, percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique() %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage ~ sample.visitName, paired = TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>%
  add_xy_position(x = "AIFI_L3", fun = "max")

# Create the base plot with modified theme settings
p <- ggplot(filtered_data, aes(x = sample.visitName, y = percentage)) + 
  geom_boxplot(aes(fill = sample.visitName), alpha = .2, ) +  # Faint gray and dotted line color
  geom_line(aes(group = subject.subjectGuid), color = "black", linetype = "dotted") + 
  geom_point(size = 2) + 
  facet_wrap(~ AIFI_L3, scales = "free", drop= FALSE) +
  scale_y_log10(breaks = c(0.00001,0.0001,0.001,0.01,0.1,1,10,100)) +
  theme(
    text = element_text(size = 35),  # Increase font size for all text elements
    axis.title = element_text(size = 20),
    axis.text.x = element_text(size = 20),# Increase font size for axis titles
    legend.title = element_text(size = 20),  # Increase font size for legend title
    legend.text = element_text(size = 18)  # Increase font size for legend text
  )+
  labs(y = "Frequency (%)")

# Add p-values to the plot
options(repr.plot.width = 70, repr.plot.height = 60)
p + geom_text(data = stat_test_1, aes(label = paste("Wilcoxon_signed_rank_test_p.adj =", p.adj.signif)),  
              x = 1, y = Inf, hjust = 0.29, vjust = 1, size = 7, color = "black")  # Centered and larger p-value text


In [None]:
# WORKS WELL!!!!! Perform Wilcoxon signed-rank test - for percentage
# Y-scale : Percentage
stat_test_1 <- filtered_data %>%
  select(percentage, percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique() %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage ~ sample.visitName, paired = TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>%
  add_xy_position(x = "AIFI_L3", fun = "max")

# Create the base plot with modified theme settings
p <- ggplot(filtered_data, aes(x = sample.visitName, y = percentage)) + 
  geom_boxplot(aes(fill = sample.visitName), alpha = .2, ) +  # Faint gray and dotted line color
  geom_line(aes(group = subject.subjectGuid), color = "black", linetype = "dotted") + 
  geom_point(size = 2) + 
  facet_wrap(~ AIFI_L3, scales = "free",drop= FALSE) +
  theme(
    text = element_text(size = 35),  # Increase font size for all text elements
    axis.title = element_text(size = 20),
    axis.text.x = element_text(size = 20),# Increase font size for axis titles
    legend.title = element_text(size = 20),  # Increase font size for legend title
    legend.text = element_text(size = 18)  # Increase font size for legend text
  )+
  labs(y = "Percentage")

# Add p-values to the plot
options(repr.plot.width = 70, repr.plot.height = 60)
p + geom_text(data = stat_test_1, aes(label = paste("Wilcoxon_signed_rank_test_p.adj =", p.adj.signif)),  
              x = 1, y = Inf, hjust = 0.29, vjust = 1, size = 7, color = "black")


In [None]:
# WORKS WELL!!!!! Perform Wilcoxon signed-rank test - for percentage
# Y-scale : CLR
stat_test_1 <- filtered_data %>%
  select(percentage, percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique() %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage_clr ~ sample.visitName, paired = TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>%
  add_xy_position(x = "AIFI_L3", fun = "max")

p <- ggplot(filtered_data, aes(x = sample.visitName, y = percentage_clr)) + 
  geom_boxplot(aes(fill = sample.visitName), alpha = .2, ) +  # Faint gray and dotted line color
  geom_line(aes(group = subject.subjectGuid), color = "black", linetype = "dotted") + 
  geom_point(size = 2) + 
  facet_wrap(~ AIFI_L3, scales = "free",drop= FALSE) +
  theme(
    text = element_text(size = 35),  # Increase font size for all text elements
    axis.title = element_text(size = 35),
    axis.text.x = element_text(size = 20),# Increase font size for axis titles
    legend.title = element_text(size = 25),  # Increase font size for legend title
    legend.text = element_text(size = 25)  # Increase font size for legend text
  )+
  labs(y = "CLR")

# Add p-values to the plot
options(repr.plot.width = 70, repr.plot.height = 60)
p + geom_text(data = stat_test_1, aes(label = paste("Wilcoxon_signed_rank_test_p.adj =", p.adj.signif)),  
              x = 1, y = Inf, hjust = 0.29, vjust = 1, size = 7, color = "black")

### Global facet plots - B cells and Monocytes

In [None]:
merged_df <- inner_join(filtered_data, dict, by = 'AIFI_L3')

In [None]:
head(merged_df)

In [None]:
# subset fro B cell and monocytes,  Perform Wilcoxon signed-rank test
# Y-scale : log10(percentage)

subset_data <- merged_df %>%
  filter(AIFI_L1 %in% c("B cell"))

stat_test_1 <- subset_data %>%
  select(percentage, percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique() %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage ~ sample.visitName, paired = TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>%
  add_xy_position(x = "AIFI_L3", fun = "max")

# Create the base plot with modified theme settings
p <- ggplot(subset_data, aes(x = sample.visitName, y = percentage)) + 
  geom_boxplot(aes(fill = sample.visitName), alpha = .2, ) +  # Faint gray and dotted line color
  geom_line(aes(group = subject.subjectGuid), color = "black", linetype = "dotted") + 
  geom_point(size = 2) + 
  facet_wrap(~ AIFI_L3, scales = "free", drop= FALSE) +
  scale_y_log10(
      breaks = c(0.00001,0.0001,0.001,0.01,0.1,1,10,100))+
#      breaks =scales::trans_breaks('log10',function(x) 10^x),
 #     labels=scales::trans_format('log10',scales::math_format(10^.x))) +
   theme(
    text = element_text(size = 40, color= "black"),  # Increase font size for all text elements
    axis.title = element_text(size = 40),  # Increase font size for axis titles
    legend.title = element_text(size = 50),  # Increase font size for legend title
    legend.text = element_text(size = 50)  # Increase font size for legend text
  )  + labs(y = 'Frequency(%)')

# Add p-values to the plot
p + geom_text(data = stat_test_1, aes(label = paste("Wilcoxon paired test p =", p.adj.signif)),  
              x = 1, y = Inf, hjust = 0.1, vjust = 1, size = 10, color = "black")
# Centered and larger p-value text


In [None]:
# subset fro B cell and monocytes,  Perform Wilcoxon signed-rank test
# Y-scale : percentage

subset_data <- merged_df %>%
  filter(AIFI_L1 %in% c("B cell"))

stat_test_1 <- subset_data %>%
  select(percentage, percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique() %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage ~ sample.visitName, paired = TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>%
  add_xy_position(x = "AIFI_L3", fun = "max")

# Create the base plot with modified theme settings
p <- ggplot(subset_data, aes(x = sample.visitName, y = percentage)) + 
  geom_boxplot(aes(fill = sample.visitName), alpha = .2, ) +  # Faint gray and dotted line color
  geom_line(aes(group = subject.subjectGuid), color = "black", linetype = "dotted") + 
  geom_point(size = 2) + 
  facet_wrap(~ AIFI_L3, scales = "free", drop= FALSE) +
  theme(
    text = element_text(size = 40, color= "black"),  # Increase font size for all text elements
    axis.title = element_text(size = 40),  # Increase font size for axis titles
    legend.title = element_text(size = 50),  # Increase font size for legend title
    legend.text = element_text(size = 50)  # Increase font size for legend text
  ) + labs(y = "percentage")

# Add p-values to the plot
p + geom_text(data = stat_test_1, aes(label = paste("Wilcoxon signed rank test p.adj =", p.adj.signif)),  
              x = 1, y = Inf, hjust = 0.1, vjust = 1, size = 8, color = "black")  # Centered and larger p-value text


In [None]:
# subset fro B cell and monocytes,  Perform Wilcoxon signed-rank test
# Y-scale : percentage _CLR

subset_data <- merged_df %>%
  filter(AIFI_L1 %in% c("B cell", "Monocyte"))

stat_test_1 <- subset_data %>%
  select(percentage, percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique() %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage_clr ~ sample.visitName, paired = TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>%
  add_xy_position(x = "AIFI_L3", fun = "max")

# Create the base plot with modified theme settings
p <- ggplot(subset_data, aes(x = sample.visitName, y = percentage_clr)) + 
  geom_boxplot(aes(fill = sample.visitName), alpha = .2, ) +  # Faint gray and dotted line color
  geom_line(aes(group = subject.subjectGuid), color = "black", linetype = "dotted") + 
  geom_point(size = 2) + 
  facet_wrap(~ AIFI_L3, scales = "free", drop= FALSE) +
  theme(
    text = element_text(size = 40, color= "black"),  # Increase font size for all text elements
    axis.title = element_text(size = 40),  # Increase font size for axis titles
    legend.title = element_text(size = 50),  # Increase font size for legend title
    legend.text = element_text(size = 50)  # Increase font size for legend text
  ) + labs(y = "CLR")

# Add p-values to the plot
p + geom_text(data = stat_test_1, aes(label = paste("Wilcoxon signed rank test p.adj =", p.adj.signif)),  
              x = 1, y = Inf, hjust = 0.1, vjust = 1, size = 8, color = "black")  # Centered and larger p-value text


In [None]:


# Perform Wilcoxon signed-rank test
stat_test_1<- filtered_data %>%
  select(percentage,percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage ~ sample.visitName, paired= TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>% add_xy_position(x= "AIFI_L3", fun = "max",) 
                                         #comparisons = list(c("Flu Year 1 Day 0", "Flu Year 1 Day 7")))
#stat_test_1

# Create the base plot
p <- ggplot(filtered_data, aes(x = sample.visitName, y = percentage)) + 
  geom_boxplot(aes(fill = sample.visitName), alpha = .2) +
  geom_line(aes(group = subject.subjectGuid)) + 
  geom_point(size = 2) + 
  facet_wrap(~ AIFI_L3) +
  scale_y_log10()

# Add p-values to the plot
p + geom_text(data = stat_test_1, aes(label = p.adj.signif),  x = 1, y = Inf, hjust = 1, vjust = 1, size = 5, color = "black")



In [None]:
stat_test_1 %>% head()

In [None]:
stat_test_1_test %>% head()

In [None]:
filtered_data %>% head()

# Split by Cohort

In [None]:
head(result_yr1)
unique(result_yr1$cohort.cohortGuid)

In [None]:
# Percentage comparing Sex across BRI

####################. BR1 ####################
result_cohort <- result_yr1[result_yr1$cohort.cohortGuid == "BR1", ]

#result_cohort <- result[result$CMV.IgG.Serology.Result.Interpretation == 'Positive', ]

stat_test<- result_cohort %>%
  select(percentage, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName ) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage ~ sample.visitName) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance()


# Create the plot
#options(repr.plot.width = 35, repr.plot.height = 12)
p1 <- ggplot(result_cohort, aes(x = reorder(AIFI_L3, desc(percentage)) , y = percentage, fill = sample.visitName)) +
  geom_boxplot(width = .5, outlier.shape = '.') +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  geom_pwc(
    aes(group = sample.visitName),
    tip.length = 0, p.adjust.method = "BH",
    method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =7  # or "panel"
  ) +
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(axis.text.x = element_text(size = 18, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
        axis.text.y = element_text(size = 18, color = "black"),
        axis.title.x = element_text(size = 18, color = "black"),
        axis.title.y = element_text(size = 18, color = "black"),
       legend.text = element_text(size = 13, color = "black"),  # Adjust the font size of legend text
        legend.title = element_text(size = 15, color = "black")) +
   ggtitle("BR1") + theme(plot.title = element_text(size = 25, hjust = 0.5, face = "bold"))

# Display the plot
#print(p1)


####################.BR2 ####################
result_cohort <- result_yr1[result_yr1$cohort.cohortGuid == "BR2", ]


stat_test<- result_cohort %>%
  select(percentage, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage ~ sample.visitName) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance()


# Create the plot
#options(repr.plot.width = 35, repr.plot.height = 12)
p2 <- ggplot(result_cohort, aes(x = reorder(AIFI_L3, desc(percentage)) , y = percentage, fill = sample.visitName)) +
  geom_boxplot(width = .5, outlier.shape = '.') +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  geom_pwc(
    aes(group = sample.visitName),
    tip.length = 0, p.adjust.method = "BH",
    method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =7  # or "panel"
  ) +
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(axis.text.x = element_text(size = 18, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
        axis.text.y = element_text(size = 18, color = "black"),
        axis.title.x = element_text(size = 18, color = "black"),
        axis.title.y = element_text(size = 18, color = "black"),
       legend.text = element_text(size = 13, color = "black"),  # Adjust the font size of legend text
        legend.title = element_text(size = 15, color = "black"))+
  ggtitle("BR2") + theme(plot.title = element_text(size = 25, hjust = 0.5, face = "bold"))

# Display the plot
#print(p2)

library(cowplot)
options(repr.plot.width = 40, repr.plot.height = 25)
# Combine two plots vertically
#combined_plot <- plot_grid(p1, p2, labels = c("CMV_neg", "CMV_pos"), ncol =1, label_size = 20, vjust = 0)
#combined_plot <- plot_grid(p1, p2, ncol=1,  nrow = 2, rel_heights = c(1, 2))

stacked_plots <- plot_grid(p1 + theme(plot.margin = margin(b = 20)), p2, nrow = 2, rel_heights = c(1, 1))

# Print the stacked plots
stacked_plots


#combined_plot



In [None]:
#stat test - percentage

####################. BR1 ####################
result_cohort <- filtered_data[filtered_data$cohort.cohortGuid == "BR1", ]

stat_test_1<- result_cohort %>%
  select(percentage,percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage ~ sample.visitName, paired= TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>% add_xy_position(x= "AIFI_L3", fun = "max", comparisons = list(c("Flu Year 1 Day 0", "Flu Year 1 Day 7"))) 

significant_cell_types_1 <- stat_test_1 %>%
  filter(p.adj < 0.05) %>%
  select(AIFI_L3, .y., p.adj, p.adj.signif) %>%
  unique()

print(significant_cell_types_1)

# Find the maximum y-value for positioning
max_y <- max(filtered_data$percentage, na.rm = TRUE)
transformed_max_y <- log10(max_y)
buffer <- 0.2  # Adjust the buffer as needed

#stat_pvalue_manual(stat_test_1, label = "p.adj.signif", tip.length = 0, 
                    # y.position = (transformed_max_y + buffer), size = 5)

# Create the plot
#options(repr.plot.width = 35, repr.plot.height = 12)
p4 <- ggplot(filtered_data, aes(x = AIFI_L3 , y = percentage, color = sample.visitName )) +
  geom_boxplot(width = 0.5, outlier.shape = '.', position = position_dodge(width = 0.75), size=1) +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  stat_pvalue_manual(stat_test_1, label = "p.adj.signif", tip.length = 0, y.position = (transformed_max_y + buffer), label.size=12) +
  #scale_y_continuous(trans = 'log10', labels = scales::math_format(10^.x)) +
  scale_y_log10()+
  scale_color_manual(values = c("Flu Year 1 Day 0" = "#1b9e77", "Flu Year 1 Day 7" = "#d95f02")) + 
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
# scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(text = element_text(size=45, color="black"), 
      axis.text.x = element_text(color = "black"), 
      axis.text.y = element_text(color = "black"), 
      axis.title.x = element_text(color = "black"), 
      axis.title.y = element_text(color = "black"), 
      legend.text = element_text(color = "black"), 
      legend.title = element_text(color = "black"))+
    ggtitle("BR1") + theme(plot.title = element_text(size = 35, hjust = 0.5, face = "bold"))
     

# Display the plot
#print(p4)
####################. BR1 ####################
result_cohort <- filtered_data[filtered_data$cohort.cohortGuid == "BR2", ]

stat_test_2<- result_cohort %>%
  select(percentage,percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage~ sample.visitName, paired= TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>% add_xy_position(x= "AIFI_L3", fun = "max", comparisons = list(c("Flu Year 1 Day 0", "Flu Year 1 Day 7"))) 

significant_cell_types_2 <- stat_test_2 %>%
  filter(p.adj < 0.05) %>%
  select(AIFI_L3, .y., p.adj, p.adj.signif) %>%
  unique()

print(significant_cell_types_2)


# Find the maximum y-value for positioning
max_y <- max(filtered_data$percentage, na.rm = TRUE)
transformed_max_y <- log10(max_y)
buffer <- 0.2  # Adjust the buffer as needed

# Create the plot
#options(repr.plot.width = 35, repr.plot.height = 12)
p5 <- ggplot(filtered_data, aes(x = AIFI_L3 , y = percentage, color = sample.visitName )) +
  geom_boxplot(width = 0.5, outlier.shape = '.', position = position_dodge(width = 0.75), size=1) +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  stat_pvalue_manual(stat_test_2, label = "p.adj.signif", tip.length=0, y.position = (transformed_max_y + buffer), label.size = 12)+
  scale_color_manual(values = c("Flu Year 1 Day 0" = "#1b9e77", "Flu Year 1 Day 7" = "#d95f02")) + 
  scale_y_log10()+
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
  #scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(text = element_text(size=45, color="black"), 
      axis.text.x = element_text(color = "black"), 
      axis.text.y = element_text(color = "black"), 
      axis.title.x = element_text(color = "black"), 
      axis.title.y = element_text(color = "black"), 
      legend.text = element_text(color = "black"), 
      legend.title = element_text(color = "black"))+
     ggtitle("BR2") + theme(plot.title = element_text(size = 35, hjust = 0.5, face = "bold"))

# Display the plot
#print(p2)

library(cowplot)
options(repr.plot.width = 60, repr.plot.height = 40)
# Combine two plots vertically
#combined_plot <- plot_grid(p1, p2, labels = c("CMV_neg", "CMV_pos"), ncol =1, label_size = 20, vjust = 0)
#combined_plot <- plot_grid(p1, p2, ncol=1,  nrow = 2, rel_heights = c(1, 2))

stacked_plots <- plot_grid(p4 + theme(plot.margin = margin(b = 20)), p5, nrow = 2, rel_heights = c(1, 1))

# Print the stacked plots
stacked_plots


# # Display the plot
# print(p4)


# #Combine Plot
# library(cowplot)

# options(repr.plot.width = 50, repr.plot.height = 30)
# # Combine two plots vertically
# combined_plot <- plot_grid(p4, p5, labels = c("Frequency_Percentage", "Frequency_Percentage_CLR"), ncol =1)

# combined_plot


In [None]:
#stat test - CLR

####################. BR1 ####################
result_cohort <- filtered_data[filtered_data$cohort.cohortGuid == "BR1", ]

stat_test_1<- result_cohort %>%
  select(percentage,percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage_clr ~ sample.visitName, paired= TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>% add_xy_position(x= "AIFI_L3", fun = "max", comparisons = list(c("Flu Year 1 Day 0", "Flu Year 1 Day 7"))) 

significant_cell_types_1 <- stat_test_1 %>%
  filter(p.adj < 0.05) %>%
  select(AIFI_L3, .y., p.adj, p.adj.signif) %>%
  unique()

print(significant_cell_types_1)

# Find the maximum y-value for positioning
max_y <- max(filtered_data$percentage_clr, na.rm = TRUE)
transformed_max_y <- log10(max_y)
buffer <- 0.2  # Adjust the buffer as needed

#stat_pvalue_manual(stat_test_1, label = "p.adj.signif", tip.length = 0, 
                    # y.position = (transformed_max_y + buffer), size = 5)

# Create the plot
#options(repr.plot.width = 35, repr.plot.height = 12)
p4 <- ggplot(filtered_data, aes(x = AIFI_L3 , y = percentage_clr, color = sample.visitName )) +
  geom_boxplot(width = 0.5, outlier.shape = '.', position = position_dodge(width = 0.75), size=1) +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  stat_pvalue_manual(stat_test_1, label = "p.adj.signif", tip.length = 0, y.position = (max_y + buffer), label.size=12) +
  #scale_y_continuous(trans = 'log10', labels = scales::math_format(10^.x)) +
  scale_color_manual(values = c("Flu Year 1 Day 0" = "#1b9e77", "Flu Year 1 Day 7" = "#d95f02")) + 
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
# scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(text = element_text(size=45, color="black"), 
      axis.text.x = element_text(color = "black"), 
      axis.text.y = element_text(color = "black"), 
      axis.title.x = element_text(color = "black"), 
      axis.title.y = element_text(color = "black"), 
      legend.text = element_text(color = "black"), 
      legend.title = element_text(color = "black"))+
    ggtitle("BR1") + theme(plot.title = element_text(size = 35, hjust = 0.5, face = "bold"))
     

# Display the plot
#print(p4)
####################. BR1 ####################
result_cohort <- filtered_data[filtered_data$cohort.cohortGuid == "BR2", ]

stat_test_2<- result_cohort %>%
  select(percentage,percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage_clr~ sample.visitName, paired= TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>% add_xy_position(x= "AIFI_L3", fun = "max", comparisons = list(c("Flu Year 1 Day 0", "Flu Year 1 Day 7"))) 

significant_cell_types_2 <- stat_test_2 %>%
  filter(p.adj < 0.05) %>%
  select(AIFI_L3, .y., p.adj, p.adj.signif) %>%
  unique()

print(significant_cell_types_2)


# Find the maximum y-value for positioning
max_y <- max(filtered_data$percentage_clr, na.rm = TRUE)
transformed_max_y <- log10(max_y)
buffer <- 0.2  # Adjust the buffer as needed

# Create the plot
#options(repr.plot.width = 35, repr.plot.height = 12)
p5 <- ggplot(filtered_data, aes(x = AIFI_L3 , y = percentage_clr, color = sample.visitName )) +
  geom_boxplot(width = 0.5, outlier.shape = '.', position = position_dodge(width = 0.75), size=1) +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  stat_pvalue_manual(stat_test_2, label = "p.adj.signif", tip.length=0, y.position = (max_y + buffer), label.size = 12)+
  scale_color_manual(values = c("Flu Year 1 Day 0" = "#1b9e77", "Flu Year 1 Day 7" = "#d95f02")) + 
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
  #scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(text = element_text(size=45, color="black"), 
      axis.text.x = element_text(color = "black"), 
      axis.text.y = element_text(color = "black"), 
      axis.title.x = element_text(color = "black"), 
      axis.title.y = element_text(color = "black"), 
      legend.text = element_text(color = "black"), 
      legend.title = element_text(color = "black"))+
     ggtitle("BR2") + theme(plot.title = element_text(size = 35, hjust = 0.5, face = "bold"))

# Display the plot
#print(p2)

library(cowplot)
options(repr.plot.width = 60, repr.plot.height = 40)
# Combine two plots vertically
#combined_plot <- plot_grid(p1, p2, labels = c("CMV_neg", "CMV_pos"), ncol =1, label_size = 20, vjust = 0)
#combined_plot <- plot_grid(p1, p2, ncol=1,  nrow = 2, rel_heights = c(1, 2))

stacked_plots <- plot_grid(p4 + theme(plot.margin = margin(b = 20)), p5, nrow = 2, rel_heights = c(1, 1))

# Print the stacked plots
stacked_plots


# # Display the plot
# print(p4)


# #Combine Plot
# library(cowplot)

# options(repr.plot.width = 50, repr.plot.height = 30)
# # Combine two plots vertically
# combined_plot <- plot_grid(p4, p5, labels = c("Frequency_Percentage", "Frequency_Percentage_CLR"), ncol =1)

# combined_plot


In [None]:
# WIlcoxon PAIRED TEST (GIving it a try)
library(ggpubr)
####################. BR1 ####################
result_cohort <- result_yr1[result_yr1$cohort.cohortGuid == "BR1", ]

#result_cohort <- result[result$CMV.IgG.Serology.Result.Interpretation == 'Positive', ]


# Perform paired Wilcoxon signed-rank test
# Convert sample.visitName to factor if it's not already
result_cohort$sample.visitName <- as.factor(result_cohort$sample.visitName)

# Ensure result_cohort$percentage is numeric
result_cohort$percentage <- as.numeric(result_cohort$AIFI_L3)

# Perform paired Wilcoxon signed-rank test
paired_test_results <- pairwise.wilcox.test(result_cohort$percentage, result_cohort$sample.visitName, p.adjust.method = "BH")

print(paired_test_results)
# stat_test<- result_cohort %>%
#   select(percentage, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName ) %>%
#   unique(.) %>%
#   group_by(AIFI_L3) %>%
#   wilcox_test(percentage ~ sample.visitName) %>%
#   adjust_pvalue(method = "BH") %>%
#   add_significance()


# Create the plot
options(repr.plot.width = 35, repr.plot.height = 12)
p1 <- ggplot(result_cohort, aes(x = reorder(AIFI_L3, desc(percentage)), y = percentage, fill = sample.visitName)) +
  geom_boxplot(width = .5, outlier.shape = '.') +
  scale_x_discrete(name = "AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  geom_signif(data = paired_test_results, aes(y_position = c(80, 85), 
                                              annotations = p.adj.signif), 
              textsize = 7, hide.ns = TRUE) +  # Perform paired comparison annotations
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(axis.text.x = element_text(size = 18, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
        axis.text.y = element_text(size = 18, color = "black"),
        axis.title.x = element_text(size = 18, color = "black"),
        axis.title.y = element_text(size = 18, color = "black"),
        legend.text = element_text(size = 13, color = "black"),
        legend.title = element_text(size = 15, color = "black")) +
  ggtitle("BR1") + 
  theme(plot.title = element_text(size = 25, hjust = 0.5, face = "bold"))

# Display the plot
print(p1)







# library(ggpubr)

# p1 <- ggplot(result_cohort, aes(x = reorder(AIFI_L3, desc(percentage)), y = percentage, fill = sample.visitName)) +
#   geom_boxplot(width = .5, outlier.shape = '.') +
#   scale_x_discrete(name = "AIFI_L3", guide = guide_axis(angle = 90)) +
#   theme_bw(base_size = 12) +
#   geom_signif(test="wilcox.test", comparisons = list(c("Flu Year 1 Day 0", "Flu Year 1 Day 7")), 
#               textsize = 7, hide.ns = FALSE) +  # Perform paired comparison annotations
#   scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
#   theme(axis.text.x = element_text(size = 18, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
#         axis.text.y = element_text(size = 18, color = "black"),
#         axis.title.x = element_text(size = 18, color = "black"),
#         axis.title.y = element_text(size = 18, color = "black"),
#         legend.text = element_text(size = 13, color = "black"),
#         legend.title = element_text(size = 15, color = "black")) +
#   ggtitle("BR1") + 
#   theme(plot.title = element_text(size = 25, hjust = 0.5, face = "bold"))
# # Display the plot
# print(p1)



####################.BR2 ####################
# result_cohort <- result_yr1[result_yr1$cohort.cohortGuid == "BR2", ]


# stat_test<- result_cohort %>%
#   select(percentage, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName) %>%
#   unique(.) %>%
#   group_by(AIFI_L3) %>%
#   wilcox_test(percentage ~ sample.visitName) %>%
#   adjust_pvalue(method = "BH") %>%
#   add_significance()


# # Create the plot
# #options(repr.plot.width = 35, repr.plot.height = 12)
# p2 <- ggplot(result_cohort, aes(x = reorder(AIFI_L3, desc(percentage)) , y = percentage, fill = sample.visitName)) +
#   geom_boxplot(width = .5, outlier.shape = '.') +
#   scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
#   theme_bw(base_size = 12) +
#   geom_pwc(
#     aes(group = sample.visitName),
#     tip.length = 0, p.adjust.method = "BH",
#     method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =7  # or "panel"
#   ) +
#   #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
#    scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
#   theme(axis.text.x = element_text(size = 18, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
#         axis.text.y = element_text(size = 18, color = "black"),
#         axis.title.x = element_text(size = 18, color = "black"),
#         axis.title.y = element_text(size = 18, color = "black"),
#        legend.text = element_text(size = 13, color = "black"),  # Adjust the font size of legend text
#         legend.title = element_text(size = 15, color = "black"))+
#   ggtitle("BR2") + theme(plot.title = element_text(size = 25, hjust = 0.5, face = "bold"))

# # Display the plot
# #print(p2)

# library(cowplot)
# options(repr.plot.width = 40, repr.plot.height = 25)
# # Combine two plots vertically
# #combined_plot <- plot_grid(p1, p2, labels = c("CMV_neg", "CMV_pos"), ncol =1, label_size = 20, vjust = 0)
# #combined_plot <- plot_grid(p1, p2, ncol=1,  nrow = 2, rel_heights = c(1, 2))

# stacked_plots <- plot_grid(p1 + theme(plot.margin = margin(b = 20)), p2, nrow = 2, rel_heights = c(1, 1))

# # Print the stacked plots
# stacked_plots


#combined_plot



In [None]:
# CLR comparing Sex across BRI

####################. BR1 ####################
result_cohort <- result_yr1[result_yr1$cohort.cohortGuid == "BR1", ]

#result_cohort <- result[result$CMV.IgG.Serology.Result.Interpretation == 'Positive', ]

stat_test<- result_cohort %>%
  select(percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName ) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage_clr ~ sample.visitName) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance()


# Create the plot
#options(repr.plot.width = 35, repr.plot.height = 12)
p1 <- ggplot(result_cohort, aes(x = reorder(AIFI_L3, desc(percentage_clr)) , y = percentage_clr, fill = sample.visitName)) +
  geom_boxplot(width = .5, outlier.shape = '.') +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  geom_pwc(
    aes(group = sample.visitName),
    tip.length = 0, p.adjust.method = "BH",
    method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =7  # or "panel"
  ) +
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(axis.text.x = element_text(size = 18, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
        axis.text.y = element_text(size = 18, color = "black"),
        axis.title.x = element_text(size = 18, color = "black"),
        axis.title.y = element_text(size = 18, color = "black"),
       legend.text = element_text(size = 13, color = "black"),  # Adjust the font size of legend text
        legend.title = element_text(size = 15, color = "black")) +
   ggtitle("BR1") + theme(plot.title = element_text(size = 25, hjust = 0.5, face = "bold"))

# Display the plot
#print(p1)


####################.BR2 ####################
result_cohort <- result_yr1[result_yr1$cohort.cohortGuid == "BR2", ]


stat_test<- result_cohort %>%
  select(percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage_clr ~ sample.visitName) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance()


# Create the plot
#options(repr.plot.width = 35, repr.plot.height = 12)
p2 <- ggplot(result_cohort, aes(x = reorder(AIFI_L3, desc(percentage_clr)) , y = percentage_clr, fill = sample.visitName)) +
  geom_boxplot(width = .5, outlier.shape = '.') +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  geom_pwc(
    aes(group = sample.visitName),
    tip.length = 0, p.adjust.method = "BH",
    method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =7  # or "panel"
  ) +
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(axis.text.x = element_text(size = 18, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
        axis.text.y = element_text(size = 18, color = "black"),
        axis.title.x = element_text(size = 18, color = "black"),
        axis.title.y = element_text(size = 18, color = "black"),
       legend.text = element_text(size = 13, color = "black"),  # Adjust the font size of legend text
        legend.title = element_text(size = 15, color = "black"))+
  ggtitle("BR2") + theme(plot.title = element_text(size = 25, hjust = 0.5, face = "bold"))

# Display the plot
#print(p2)

library(cowplot)
options(repr.plot.width = 40, repr.plot.height = 25)
# Combine two plots vertically
#combined_plot <- plot_grid(p1, p2, labels = c("CMV_neg", "CMV_pos"), ncol =1, label_size = 20, vjust = 0)
#combined_plot <- plot_grid(p1, p2, ncol=1,  nrow = 2, rel_heights = c(1, 2))

stacked_plots <- plot_grid(p1 + theme(plot.margin = margin(b = 20)), p2, nrow = 2, rel_heights = c(1, 1))

# Print the stacked plots
stacked_plots


#combined_plot



## Whole PBMC- split by cohort for B cells (Facet_plots)

In [None]:
# this is filtered for the selects 92 samples for B cells
filtered_data
length(unique(filtered_data$subject.subjectGuid))

In [None]:
merged_df <- inner_join(filtered_data, dict, by = 'AIFI_L3')

In [None]:
head(merged_df)
length(unique(merged_df$subject.subjectGuid))

In [None]:
subset_data1 <- merged_df %>% filter(AIFI_L1 == "B cell" & cohort.cohortGuid == "BR1")
subset_data2 <- merged_df %>% filter(AIFI_L1 == "B cell" & cohort.cohortGuid == "BR2")

write.csv(subset_data1, "/home/jupyter/IH-A-Aging-Analysis-Notebooks_old/Mansi_Notebooks/scRNA/Frequency_Plot/subset_BR1_Bcells_wholepbmc.csv")
write.csv(subset_data2, "/home/jupyter/IH-A-Aging-Analysis-Notebooks_old/Mansi_Notebooks/scRNA/Frequency_Plot/subset_BR2_Bcells_wholepbmc.csv")

In [None]:
# subset fro B cell and monocytes,  Perform Wilcoxon signed-rank test
# Y-scale : percentage


#subset BR1
subset_data1 <- merged_df %>%
  filter(AIFI_L1 == "B cell" & cohort.cohortGuid == "BR1")

stat_test_1 <- subset_data1 %>%
  select(percentage, percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique() %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage ~ sample.visitName, paired = TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>%
  add_xy_position(x = "AIFI_L3", fun = "max")

# Create the base plot with modified theme settings
p1 <- ggplot(subset_data1, aes(x = sample.visitName, y = percentage)) + 
  geom_boxplot(aes(fill = sample.visitName), alpha = .2, ) +  # Faint gray and dotted line color
  geom_line(aes(group = subject.subjectGuid), color = "black", linetype = "dotted") + 
  geom_point(size = 2) + 
  facet_wrap(~ AIFI_L3, scales = "free", drop= FALSE) +
  theme(
    text = element_text(size = 40, color= "black"),  # Increase font size for all text elements
    axis.title = element_text(size = 40),  # Increase font size for axis titles
    legend.title = element_text(size = 50),  # Increase font size for legend title
    legend.text = element_text(size = 50)  # Increase font size for legend text
  ) + labs(y = "percentage")

# Add p-values to the plot
options(repr.plot.width = 50, repr.plot.height = 50)
p1 + geom_text(data = stat_test_1, aes(label = paste("Wilcoxon signed rank test p.adj =", p.adj.signif)),  
              x = 1, y = Inf, hjust = 0.1, vjust = 1, size = 8, color = "black") + ggtitle("BR1- Whole PBMC (Percentage)") + theme(plot.title = element_text(size = 35, hjust = 0.5, face = "bold"))  # Centered and larger p-value text

#subset BR2
subset_data2 <- merged_df %>%
  filter(AIFI_L1 == "B cell" & cohort.cohortGuid == "BR2")

stat_test_2 <- subset_data2 %>%
  select(percentage, percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique() %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage ~ sample.visitName, paired = TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>%
  add_xy_position(x = "AIFI_L3", fun = "max")

# Create the base plot with modified theme settings
p1 <- ggplot(subset_data2, aes(x = sample.visitName, y = percentage)) + 
  geom_boxplot(aes(fill = sample.visitName), alpha = .2, ) +  # Faint gray and dotted line color
  geom_line(aes(group = subject.subjectGuid), color = "black", linetype = "dotted") + 
  geom_point(size = 2) + 
  facet_wrap(~ AIFI_L3, scales = "free", drop= FALSE) +
  theme(
    text = element_text(size = 40, color= "black"),  # Increase font size for all text elements
    axis.title = element_text(size = 40),  # Increase font size for axis titles
    legend.title = element_text(size = 50),  # Increase font size for legend title
    legend.text = element_text(size = 50)  # Increase font size for legend text
  ) + labs(y = "percentage")

# Add p-values to the plot
options(repr.plot.width = 50, repr.plot.height = 50)
p1 + geom_text(data = stat_test_2, aes(label = paste("Wilcoxon signed rank test p.adj =", p.adj.signif)),  
              x = 1, y = Inf, hjust = 0.1, vjust = 1, size = 8, color = "black") + ggtitle("BR2- Whole PBMC (Percentage)") + theme(plot.title = element_text(size = 35, hjust = 0.5, face = "bold")) # Centered and larger p-value text


In [None]:
# subset fro B cell and monocytes,  Perform Wilcoxon signed-rank test
# Y-scale : percentage _CLR

library(dplyr)

#subset BR1
subset_data1 <- merged_df %>%
  filter(AIFI_L1 == "B cell" & cohort.cohortGuid == "BR1")


stat_test_1 <- subset_data1 %>%
  select(percentage, percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique() %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage_clr ~ sample.visitName, paired = TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>%
  add_xy_position(x = "AIFI_L3", fun = "max")

# Create the base plot with modified theme settings
p1 <- ggplot(subset_data1, aes(x = sample.visitName, y = percentage_clr)) + 
  geom_boxplot(aes(fill = sample.visitName), alpha = .2, ) +  # Faint gray and dotted line color
  geom_line(aes(group = subject.subjectGuid), color = "black", linetype = "dotted") + 
  geom_point(size = 2) + 
  facet_wrap(~ AIFI_L3, scales = "free", drop= FALSE) +
  theme(
    text = element_text(size = 40, color= "black"),  # Increase font size for all text elements
    axis.title = element_text(size = 40),  # Increase font size for axis titles
    legend.title = element_text(size = 50),  # Increase font size for legend title
    legend.text = element_text(size = 50)  # Increase font size for legend text
  ) + labs(y = "CLR")

# Add p-values to the plot
p1 + geom_text(data = stat_test_1, aes(label = paste("Wilcoxon signed rank test p.adj =", p.adj.signif)),  
              x = 1, y = Inf, hjust = 0.1, vjust = 1, size = 8, color = "black")+ ggtitle("BR1- Whole PBMC (CLR)") + theme(plot.title = element_text(size = 35, hjust = 0.5, face = "bold"))  # Centered and larger p-value text


#subset BR2
subset_data2 <- merged_df %>%
  filter(AIFI_L1 == "B cell" & cohort.cohortGuid == "BR2")


stat_test_2 <- subset_data2 %>%
  select(percentage, percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique() %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage_clr ~ sample.visitName, paired = TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>%
  add_xy_position(x = "AIFI_L3", fun = "max")

# Create the base plot with modified theme settings
p2 <- ggplot(subset_data2, aes(x = sample.visitName, y = percentage_clr)) + 
  geom_boxplot(aes(fill = sample.visitName), alpha = .2, ) +  # Faint gray and dotted line color
  geom_line(aes(group = subject.subjectGuid), color = "black", linetype = "dotted") + 
  geom_point(size = 2) + 
  facet_wrap(~ AIFI_L3, scales = "free", drop= FALSE) +
  theme(
    text = element_text(size = 40, color= "black"),  # Increase font size for all text elements
    axis.title = element_text(size = 40),  # Increase font size for axis titles
    legend.title = element_text(size = 50),  # Increase font size for legend title
    legend.text = element_text(size = 50)  # Increase font size for legend text
  ) + labs(y = "CLR")

# Add p-values to the plot
p2 + geom_text(data = stat_test_2, aes(label = paste("Wilcoxon signed rank test p.adj =", p.adj.signif)),  
              x = 1, y = Inf, hjust = 0.1, vjust = 1, size = 8, color = "black") + ggtitle("BR2- Whole PBMC (CLR)") + theme(plot.title = element_text(size = 35, hjust = 0.5, face = "bold")) # Centered and larger p-value text


In [None]:
length(unique(subset_data1$subject.subjectGuid))

In [None]:
length(unique(subset_data2$subject.subjectGuid))

In [None]:
47+45

## Split by cohort- Global

In [None]:
#stat test - percentage

####################. BR1 ####################
result_cohort <- filtered_data[filtered_data$cohort.cohortGuid == "BR1", ]

stat_test_1<- result_cohort %>%
  select(percentage,percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage ~ sample.visitName, paired= TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>% add_xy_position(x= "AIFI_L3", fun = "max", comparisons = list(c("Flu Year 1 Day 0", "Flu Year 1 Day 7"))) 

significant_cell_types_1 <- stat_test_1 %>%
  filter(p.adj < 0.05) %>%
  select(AIFI_L3, .y., p.adj, p.adj.signif) %>%
  unique()

print(significant_cell_types_1)

# Find the maximum y-value for positioning
max_y <- max(filtered_data$percentage, na.rm = TRUE)
transformed_max_y <- log10(max_y)
buffer <- 0.2  # Adjust the buffer as needed

#stat_pvalue_manual(stat_test_1, label = "p.adj.signif", tip.length = 0, 
                    # y.position = (transformed_max_y + buffer), size = 5)

# Create the plot
#options(repr.plot.width = 35, repr.plot.height = 12)
p4 <- ggplot(filtered_data, aes(x = AIFI_L3 , y = percentage, color = sample.visitName )) +
  geom_boxplot(width = 0.5, outlier.shape = '.', position = position_dodge(width = 0.75), size=1) +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  stat_pvalue_manual(stat_test_1, label = "p.adj.signif", tip.length = 0, y.position = (transformed_max_y + buffer), label.size=12) +
  #scale_y_continuous(trans = 'log10', labels = scales::math_format(10^.x)) +
  scale_y_log10()+
  scale_color_manual(values = c("Flu Year 1 Day 0" = "#1b9e77", "Flu Year 1 Day 7" = "#d95f02")) + 
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
# scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(text = element_text(size=45, color="black"), 
      axis.text.x = element_text(color = "black"), 
      axis.text.y = element_text(color = "black"), 
      axis.title.x = element_text(color = "black"), 
      axis.title.y = element_text(color = "black"), 
      legend.text = element_text(color = "black"), 
      legend.title = element_text(color = "black"))+
    ggtitle("BR1") + theme(plot.title = element_text(size = 35, hjust = 0.5, face = "bold"))
     

# Display the plot
#print(p4)
####################. BR1 ####################
result_cohort <- filtered_data[filtered_data$cohort.cohortGuid == "BR2", ]

stat_test_2<- result_cohort %>%
  select(percentage,percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage~ sample.visitName, paired= TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>% add_xy_position(x= "AIFI_L3", fun = "max", comparisons = list(c("Flu Year 1 Day 0", "Flu Year 1 Day 7"))) 

significant_cell_types_2 <- stat_test_2 %>%
  filter(p.adj < 0.05) %>%
  select(AIFI_L3, .y., p.adj, p.adj.signif) %>%
  unique()

print(significant_cell_types_2)


# Find the maximum y-value for positioning
max_y <- max(filtered_data$percentage, na.rm = TRUE)
transformed_max_y <- log10(max_y)
buffer <- 0.2  # Adjust the buffer as needed

# Create the plot
#options(repr.plot.width = 35, repr.plot.height = 12)
p5 <- ggplot(filtered_data, aes(x = AIFI_L3 , y = percentage, color = sample.visitName )) +
  geom_boxplot(width = 0.5, outlier.shape = '.', position = position_dodge(width = 0.75), size=1) +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  stat_pvalue_manual(stat_test_2, label = "p.adj.signif", tip.length=0, y.position = (transformed_max_y + buffer), label.size = 12)+
  scale_color_manual(values = c("Flu Year 1 Day 0" = "#1b9e77", "Flu Year 1 Day 7" = "#d95f02")) + 
  scale_y_log10()+
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
  #scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(text = element_text(size=45, color="black"), 
      axis.text.x = element_text(color = "black"), 
      axis.text.y = element_text(color = "black"), 
      axis.title.x = element_text(color = "black"), 
      axis.title.y = element_text(color = "black"), 
      legend.text = element_text(color = "black"), 
      legend.title = element_text(color = "black"))+
     ggtitle("BR2") + theme(plot.title = element_text(size = 35, hjust = 0.5, face = "bold"))

# Display the plot
#print(p2)

library(cowplot)
options(repr.plot.width = 60, repr.plot.height = 40)
# Combine two plots vertically
#combined_plot <- plot_grid(p1, p2, labels = c("CMV_neg", "CMV_pos"), ncol =1, label_size = 20, vjust = 0)
#combined_plot <- plot_grid(p1, p2, ncol=1,  nrow = 2, rel_heights = c(1, 2))

stacked_plots <- plot_grid(p4 + theme(plot.margin = margin(b = 20)), p5, nrow = 2, rel_heights = c(1, 1))

# Print the stacked plots
stacked_plots


# # Display the plot
# print(p4)


# #Combine Plot
# library(cowplot)

# options(repr.plot.width = 50, repr.plot.height = 30)
# # Combine two plots vertically
# combined_plot <- plot_grid(p4, p5, labels = c("Frequency_Percentage", "Frequency_Percentage_CLR"), ncol =1)

# combined_plot


# Comparsion within B cells(Percentage and Percentage CLR - AIFI Level 1 celltypes)
¶

In [None]:
cell_df <- flu_df %>% filter(AIFI_L1 == "B cell" , sample.visitName == "Flu Year 1 Day 0" | sample.visitName == "Flu Year 1 Day 7" )

In [None]:
unique(cell_df$sample.visitName)
unique(cell_df$AIFI_L3)

In [None]:
dict <- dict %>% filter(AIFI_L1 == "B cell")
dict

In [None]:
# Perform inner join based on 'Cell_Type' column
merged_df <- inner_join(cell_df, dict, by = 'AIFI_L3')

In [None]:
unique(merged_df$AIFI_L3)

In [None]:
unique(cell_df$AIFI_L3)
unique(cell_df$AIFI_L1)
unique(cell_df$sample.visitName)
length(unique(cell_df$subject.subjectGuid))
colnames(cell_df)

In [None]:
## Percentage of total

result_cell <- merged_df %>%
  dplyr::group_by(AIFI_L3, subject.subjectGuid,sample.visitName ) %>%               
  dplyr::summarise(count = n()) %>%                  # Count occurrences
  dplyr::group_by(subject.subjectGuid, sample.visitName) %>%                       
  dplyr::mutate(percentage = (count / sum(count)) * 100) %>% 
  dplyr::group_by(subject.subjectGuid, sample.visitName) %>%     
  dplyr::mutate(percentage_clr = clr_transform((percentage))) %>% # Group again by subjectGuid for CLR# Calculate percentage
  dplyr::ungroup()

head(result_cell)

#sum(result_Bcell$subject.subjectGuid)

In [None]:
w <- result_cell %>% filter(subject.subjectGuid == "BR1003")
head(w)
unique(w$sample.visitName)

In [None]:
result_cell_final<-inner_join(result_cell,unique(cell_df[c('cohort.cohortGuid','subject.subjectGuid','subject.biologicalSex','CMV.IgG.Serology.Result.Interpretation')]),by=c('subject.subjectGuid'))

In [None]:
head(result_cell_final)
length(unique(result_cell_final$subject.subjectGuid))

In [None]:
dim(result_cell_final)
dim(result_cell)
dim(cell_df)
dim(merged_df)

In [None]:
result_cell_final %>% filter(subject.subjectGuid == "BR1002") %>% 
      select(percentage) %>%
      pull() %>% 
      sum()

In [None]:
x <- result_cell_final %>% filter(subject.subjectGuid == "BR1003")
head(x)
unique(x$sample.visitName)

In [None]:
# result_cell <- result_cell %>%
#   filter( !(sample.visitName == "Flu Year 2 Day 0" | sample.visitName == "Flu Year 2 Day 7") )


In [None]:
result_cell %>% filter(subject.subjectGuid == "BR1002") %>% 
      select(percentage) %>%
      pull() %>% 
      sum()

In [None]:
#############Individual Plots##################

In [None]:
stat_test<- result_cell_final %>%
  select(percentage,percentage_clr, AIFI_L3, , subject.biologicalSex, sample.visitName,subject.subjectGuid ) %>%
  unique(.) %>%
  group_by(AIFI_L3 ) %>%
  wilcox_test(percentage~ sample.visitName) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance()
#stat_test

In [None]:
stat_test

In [None]:
result_cell_final

In [None]:


stat_test<- result_cell_final %>%
  select(percentage,percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName ) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage_clr ~ sample.visitName) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance()


# Create the plot
#options(repr.plot.width = 35, repr.plot.height = 12)
p1 <- ggplot(result_cell_final, aes(x = reorder(AIFI_L3, desc(percentage)) , y = percentage, fill = sample.visitName)) +
  geom_boxplot(width = .5, outlier.shape = '.') +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  geom_pwc(
    aes(group = sample.visitName),
    tip.length = 0, p.adjust.method = "BH",
    method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =7  # or "panel"
  ) +
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(axis.text.x = element_text(size = 18, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
        axis.text.y = element_text(size = 18, color = "black"),
        axis.title.x = element_text(size = 18, color = "black"),
        axis.title.y = element_text(size = 18, color = "black"),
       legend.text = element_text(size = 13, color = "black"),  # Adjust the font size of legend text
        legend.title = element_text(size = 15, color = "black")) +
   ggtitle("Percentage") + theme(plot.title = element_text(size = 25, hjust = 0.5, face = "bold"))






# Create the plot
#options(repr.plot.width = 35, repr.plot.height = 12)
p2 <- ggplot(result_cell_final, aes(x = reorder(AIFI_L3, desc(percentage_clr)) , y = percentage_clr, fill = sample.visitName)) +
  geom_boxplot(width = .5, outlier.shape = '.') +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  geom_pwc(
    aes(group = sample.visitName),
    tip.length = 0, p.adjust.method = "BH",
    method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =7  # or "panel"
  ) +
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(axis.text.x = element_text(size = 18, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
        axis.text.y = element_text(size = 18, color = "black"),
        axis.title.x = element_text(size = 18, color = "black"),
        axis.title.y = element_text(size = 18, color = "black"),
       legend.text = element_text(size = 13, color = "black"),  # Adjust the font size of legend text
        legend.title = element_text(size = 15, color = "black"))+
  ggtitle("CLR") + theme(plot.title = element_text(size = 25, hjust = 0.5, face = "bold"))

# Display the plot
#print(p2)

library(cowplot)
options(repr.plot.width = 40, repr.plot.height = 25)


stacked_plots <- plot_grid(p1 + theme(plot.margin = margin(b = 20)), p2, nrow = 2, rel_heights = c(1, 1))

# Print the stacked plots
stacked_plots


#combined_plot



# Comparsion within B cells- Lv1 (Facet plot)¶

In [None]:
head(filtered_flu_df)

In [None]:
cell_df <- filtered_flu_df %>% filter(AIFI_L1 == "B cell" , sample.visitName == "Flu Year 1 Day 0" | sample.visitName == "Flu Year 1 Day 7" )

unique(cell_df$sample.visitName)
unique(cell_df$AIFI_L3)

In [None]:
dict_final <- dict %>% filter(AIFI_L1 == "B cell")
dict_final

In [None]:
# Perform inner join based on 'Cell_Type' column
merged_df <- inner_join(cell_df, dict_final, by = 'AIFI_L3')
unique(merged_df$AIFI_L3)

In [None]:
## Percentage of total

result_cell <- merged_df %>%
  dplyr::group_by(AIFI_L3, subject.subjectGuid,sample.visitName ) %>%               
  dplyr::summarise(count = n()) %>%                  # Count occurrences
  dplyr::group_by(subject.subjectGuid, sample.visitName) %>%                       
  dplyr::mutate(percentage = (count / sum(count)) * 100) %>% 
  dplyr::group_by(subject.subjectGuid, sample.visitName) %>%     
  dplyr::mutate(percentage_clr = clr_transform((percentage))) %>% # Group again by subjectGuid for CLR# Calculate percentage
  dplyr::ungroup()

#head(result_cell)

#sum(result_Bcell$subject.subjectGuid)

In [None]:
result_cell_final<-inner_join(result_cell,unique(cell_df[c('cohort.cohortGuid','subject.subjectGuid','subject.biologicalSex','CMV.IgG.Serology.Result.Interpretation')]),by=c('subject.subjectGuid'))

In [None]:
head(result_cell_final)
length(unique(result_cell_final$subject.subjectGuid))

In [None]:
# Check unique combinations of AIFI_L3 and sample.visitName
result_cell_final %>%
  group_by(AIFI_L3, sample.visitName) %>%
  dplyr::summarize(n = n())


In [None]:
filtered_data <- result_cell_final %>%
  group_by(AIFI_L3,subject.subjectGuid) %>%
  filter(all(c("Flu Year 1 Day 0", "Flu Year 1 Day 7") %in% sample.visitName))
filtered_data

In [None]:
filtered_data %>%
  group_by(AIFI_L3, sample.visitName) %>%
  dplyr::summarize(n = n())

In [None]:
# subset fro B cell and monocytes,  Perform Wilcoxon signed-rank test
# Y-scale : percentage

stat_test_1 <- filtered_data %>%
  select(percentage, percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique() %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage ~ sample.visitName, paired = TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>%
  add_xy_position(x = "AIFI_L3", fun = "max")

# Create the base plot with modified theme settings
p <- ggplot(filtered_data, aes(x = sample.visitName, y = percentage)) + 
  geom_boxplot(aes(fill = sample.visitName), alpha = .2, ) +  # Faint gray and dotted line color
  geom_line(aes(group = subject.subjectGuid), color = "black", linetype = "dotted") + 
  geom_point(size = 2) + 
  facet_wrap(~ AIFI_L3, scales = "free", drop= FALSE) +
  theme(
    text = element_text(size = 40, color= "black"),  # Increase font size for all text elements
    axis.title = element_text(size = 40),  # Increase font size for axis titles
    legend.title = element_text(size = 50),  # Increase font size for legend title
    legend.text = element_text(size = 50)  # Increase font size for legend text
  ) + labs(y = "percentage")

# Add p-values to the plot
p + geom_text(data = stat_test_1, aes(label = paste("Wilcoxon signed rank test p.adj =", p.adj.signif)),  
              x = 1, y = Inf, hjust = 0.1, vjust = 1, size = 8, color = "black")  # Centered and larger p-value text


### Split by Cohort- facet plot

In [None]:
## "filtered_data" was created in the "Comparison within B cells- Lv1 (Facet_plot)"

In [None]:
result_cohort1 <- filtered_data[filtered_data$cohort.cohortGuid == "BR1", ]
result_cohort2 <- filtered_data[filtered_data$cohort.cohortGuid == "BR2", ]

write.csv(result_cohort1, "/home/jupyter/IH-A-Aging-Analysis-Notebooks_old/Mansi_Notebooks/scRNA/Frequency_Plot/subset_BR1_Bcells_parent_CLR_percentage.csv")
write.csv(result_cohort2, "/home/jupyter/IH-A-Aging-Analysis-Notebooks_old/Mansi_Notebooks/scRNA/Frequency_Plot/subset_BR2_Bcells_parent_CLR_percentage.csv")

In [None]:
############## BR2 = percentage
result_cohort <- filtered_data[filtered_data$cohort.cohortGuid == "BR1", ]

stat_test_1 <- result_cohort %>%
  select(percentage, percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique() %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage ~ sample.visitName, paired = TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>%
  add_xy_position(x = "AIFI_L3", fun = "max")

# Create the base plot with modified theme settings
p <- ggplot(result_cohort, aes(x = sample.visitName, y = percentage)) + 
  geom_boxplot(aes(fill = sample.visitName), alpha = .2, ) +  # Faint gray and dotted line color
  geom_line(aes(group = subject.subjectGuid), color = "black", linetype = "dotted") + 
  geom_point(size = 2) + 
  facet_wrap(~ AIFI_L3, scales = "free", drop= FALSE) +
  theme(
    text = element_text(size = 40, color= "black"),  # Increase font size for all text elements
    axis.title = element_text(size = 40),  # Increase font size for axis titles
    legend.title = element_text(size = 50),  # Increase font size for legend title
    legend.text = element_text(size = 50)  # Increase font size for legend text
  ) + labs(y = "Percentage") +  ggtitle("BR1- ParentLv1 (Percentage)") + theme(plot.title = element_text(size = 60, hjust = 0.5, face = "bold"))

# Add p-values to the plot
p + geom_text(data = stat_test_1, aes(label = paste("Wilcoxon signed rank test p.adj =", p.adj.signif)),  
              x = 1, y = Inf, hjust = 0.1, vjust = 1, size = 8, color = "black")  # Centered and larger p-value text

############## BR2 = percentage
result_cohort <- filtered_data[filtered_data$cohort.cohortGuid == "BR2", ]

stat_test_1 <- result_cohort %>%
  select(percentage, percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique() %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage ~ sample.visitName, paired = TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>%
  add_xy_position(x = "AIFI_L3", fun = "max")

# Create the base plot with modified theme settings
p <- ggplot(result_cohort, aes(x = sample.visitName, y = percentage)) + 
  geom_boxplot(aes(fill = sample.visitName), alpha = .2, ) +  # Faint gray and dotted line color
  geom_line(aes(group = subject.subjectGuid), color = "black", linetype = "dotted") + 
  geom_point(size = 2) + 
  facet_wrap(~ AIFI_L3, scales = "free", drop= FALSE) +
  theme(
    text = element_text(size = 40, color= "black"),  # Increase font size for all text elements
    axis.title = element_text(size = 40),  # Increase font size for axis titles
    legend.title = element_text(size = 50),  # Increase font size for legend title
    legend.text = element_text(size = 50)  # Increase font size for legend text
  ) + labs(y = "Percentage") +  ggtitle("BR2- ParentLv1 (Percentage)") + theme(plot.title = element_text(size = 60, hjust = 0.5, face = "bold"))

# Add p-values to the plot
p + geom_text(data = stat_test_1, aes(label = paste("Wilcoxon signed rank test p.adj =", p.adj.signif)),  
              x = 1, y = Inf, hjust = 0.1, vjust = 1, size = 8, color = "black")  # Centered and larger p-value text

In [None]:
############## BR1 = percentage (log 10 scale)
result_cohort <- filtered_data[filtered_data$cohort.cohortGuid == "BR1", ]

stat_test_1 <- result_cohort %>%
  select(percentage, percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique() %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage ~ sample.visitName, paired = TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>%
  add_xy_position(x = "AIFI_L3", fun = "max")

# Create the base plot with modified theme settings
p <- ggplot(result_cohort, aes(x = sample.visitName, y = percentage)) + 
  geom_boxplot(aes(fill = sample.visitName), alpha = .2, ) +  # Faint gray and dotted line color
  geom_line(aes(group = subject.subjectGuid), color = "black", linetype = "dotted") + 
  geom_point(size = 2) +
  scale_y_log10(breaks = c(0.00001,0.0001,0.001,0.01,0.1,1,10,100))+
  facet_wrap(~ AIFI_L3, scales = "free", drop= FALSE) +
  theme(
    text = element_text(size = 40, color= "black"),  # Increase font size for all text elements
    axis.title = element_text(size = 40),  # Increase font size for axis titles
    legend.title = element_text(size = 50),  # Increase font size for legend title
    legend.text = element_text(size = 50)  # Increase font size for legend text
  ) + labs(y = "Percentage (log10 scale)") +  ggtitle("BR1") + theme(plot.title = element_text(size = 60, hjust = 0.5, face = "bold"))

# Add p-values to the plot
p + geom_text(data = stat_test_1, aes(label = paste("Wilcoxon signed rank test p.adj =", p.adj.signif)),  
              x = 1, y = Inf, hjust = 0.1, vjust = 1, size = 8, color = "black")  # Centered and larger p-value text

############## BR2 = percentage (log 10 scale)
result_cohort <- filtered_data[filtered_data$cohort.cohortGuid == "BR2", ]

stat_test_1 <- result_cohort %>%
  select(percentage, percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique() %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage ~ sample.visitName, paired = TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>%
  add_xy_position(x = "AIFI_L3", fun = "max")

# Create the base plot with modified theme settings
p <- ggplot(result_cohort, aes(x = sample.visitName, y = percentage)) + 
  geom_boxplot(aes(fill = sample.visitName), alpha = .2, ) +  # Faint gray and dotted line color
  geom_line(aes(group = subject.subjectGuid), color = "black", linetype = "dotted") + 
  geom_point(size = 2) + 
  scale_y_log10(breaks = c(0.00001,0.0001,0.001,0.01,0.1,1,10,100))+
  facet_wrap(~ AIFI_L3, scales = "free", drop= FALSE) +
  theme(
    text = element_text(size = 40, color= "black"),  # Increase font size for all text elements
    axis.title = element_text(size = 40),  # Increase font size for axis titles
    legend.title = element_text(size = 50),  # Increase font size for legend title
    legend.text = element_text(size = 50)  # Increase font size for legend text
  ) + labs(y = "Percentage (log10 scale)") +  ggtitle("BR2") + theme(plot.title = element_text(size = 60, hjust = 0.5, face = "bold"))

# Add p-values to the plot
p + geom_text(data = stat_test_1, aes(label = paste("Wilcoxon signed rank test p.adj =", p.adj.signif)),  
              x = 1, y = Inf, hjust = 0.1, vjust = 1, size = 8, color = "black")  # Centered and larger p-value text

In [None]:
############## BR2 = CLR
result_cohort <- filtered_data[filtered_data$cohort.cohortGuid == "BR1", ]

stat_test_1 <- result_cohort %>%
  select(percentage, percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique() %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage_clr ~ sample.visitName, paired = TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>%
  add_xy_position(x = "AIFI_L3", fun = "max")

# Create the base plot with modified theme settings
p <- ggplot(result_cohort, aes(x = sample.visitName, y = percentage_clr)) + 
  geom_boxplot(aes(fill = sample.visitName), alpha = .2, ) +  # Faint gray and dotted line color
  geom_line(aes(group = subject.subjectGuid), color = "black", linetype = "dotted") + 
  geom_point(size = 2) + 
  facet_wrap(~ AIFI_L3, scales = "free", drop= FALSE) +
  theme(
    text = element_text(size = 40, color= "black"),  # Increase font size for all text elements
    axis.title = element_text(size = 40),  # Increase font size for axis titles
    legend.title = element_text(size = 50),  # Increase font size for legend title
    legend.text = element_text(size = 50)  # Increase font size for legend text
  ) + labs(y = "CLR") +  ggtitle("BR1- ParentLv1 (CLR)") + theme(plot.title = element_text(size = 60, hjust = 0.5, face = "bold"))

# Add p-values to the plot
p + geom_text(data = stat_test_1, aes(label = paste("Wilcoxon signed rank test p.adj =", p.adj.signif)),  
              x = 1, y = Inf, hjust = 0.1, vjust = 1, size = 8, color = "black")  # Centered and larger p-value text

############## BR2 = CLR
result_cohort <- filtered_data[filtered_data$cohort.cohortGuid == "BR2", ]

stat_test_1 <- result_cohort %>%
  select(percentage, percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique() %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage_clr ~ sample.visitName, paired = TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>%
  add_xy_position(x = "AIFI_L3", fun = "max")

# Create the base plot with modified theme settings
p <- ggplot(result_cohort, aes(x = sample.visitName, y = percentage_clr)) + 
  geom_boxplot(aes(fill = sample.visitName), alpha = .2, ) +  # Faint gray and dotted line color
  geom_line(aes(group = subject.subjectGuid), color = "black", linetype = "dotted") + 
  geom_point(size = 2) + 
  facet_wrap(~ AIFI_L3, scales = "free", drop= FALSE) +
  theme(
    text = element_text(size = 40, color= "black"),  # Increase font size for all text elements
    axis.title = element_text(size = 40),  # Increase font size for axis titles
    legend.title = element_text(size = 50),  # Increase font size for legend title
    legend.text = element_text(size = 50)  # Increase font size for legend text
  ) + labs(y = "CLR") +  ggtitle("BR2- ParentLv1 (CLR)") + theme(plot.title = element_text(size = 60, hjust = 0.5, face = "bold"))

# Add p-values to the plot
p + geom_text(data = stat_test_1, aes(label = paste("Wilcoxon signed rank test p.adj =", p.adj.signif)),  
              x = 1, y = Inf, hjust = 0.1, vjust = 1, size = 8, color = "black")  # Centered and larger p-value text

In [None]:
result_cohort <- filtered_data[filtered_data$cohort.cohortGuid == "BR1", ]

stat_test_1 <- result_cohort %>%
  select(percentage, percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique() %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage_clr ~ sample.visitName, paired = TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>%
  add_xy_position(x = "AIFI_L3", fun = "max")


print(stat_test_1)
# significant_cell_types <- stat_test %>%
#   filter(p.adj < 0.05) %>%
#   select(AIFI_L3, .y., p, p.adj, p.adj.signif) %>%
#   unique()

# print(significant_cell_types)
##
result_cohort <- filtered_data[filtered_data$cohort.cohortGuid == "BR2", ]

stat_test_2 <- result_cohort %>%
  select(percentage, percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique() %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage_clr ~ sample.visitName, paired = TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>%
  add_xy_position(x = "AIFI_L3", fun = "max")

print(stat_test_2)


# significant_cell_types_2 <- stat_test_2 %>%
#   filter(p.adj < 0.05) %>%
#   select(AIFI_L3, .y., p, p.adj, p.adj.signif) %>%
#   unique()

# print(significant_cell_types_2)

# Comparsion within Monocyte cells(Percentage and Percentage CLR - AIFI Level 1 celltypes)¶

In [None]:
cell_df <- flu_df %>% filter(AIFI_L1 == "T cell" , sample.visitName == "Flu Year 1 Day 0" | sample.visitName == "Flu Year 1 Day 7" )

unique(cell_df$sample.visitName)
unique(cell_df$AIFI_L3)

In [None]:
dict_final <- dict %>% filter(AIFI_L1 == "T cell")
dict_final

In [None]:
# Perform inner join based on 'Cell_Type' column
merged_df <- inner_join(cell_df, dict_final, by = 'AIFI_L3')
unique(merged_df$AIFI_L3)

In [None]:
## Percentage of total

result_cell <- merged_df %>%
  dplyr::group_by(AIFI_L3, subject.subjectGuid,sample.visitName ) %>%               
  dplyr::summarise(count = n()) %>%                  # Count occurrences
  dplyr::group_by(subject.subjectGuid, sample.visitName) %>%                       
  dplyr::mutate(percentage = (count / sum(count)) * 100) %>% 
  dplyr::group_by(subject.subjectGuid, sample.visitName) %>%     
  dplyr::mutate(percentage_clr = clr_transform((percentage))) %>% # Group again by subjectGuid for CLR# Calculate percentage
  dplyr::ungroup()

#head(result_cell)

#sum(result_Bcell$subject.subjectGuid)

In [None]:
result_cell_final<-inner_join(result_cell,unique(cell_df[c('cohort.cohortGuid','subject.subjectGuid','subject.biologicalSex','CMV.IgG.Serology.Result.Interpretation')]),by=c('subject.subjectGuid'))

In [None]:


stat_test<- result_cell_final %>%
  select(percentage,percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName ) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage_clr ~ sample.visitName) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance()


# Create the plot
#options(repr.plot.width = 35, repr.plot.height = 12)
p1 <- ggplot(result_cell_final, aes(x = reorder(AIFI_L3, desc(percentage)) , y = percentage, fill = sample.visitName)) +
  geom_boxplot(width = .5, outlier.shape = '.') +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  geom_pwc(
    aes(group = sample.visitName),
    tip.length = 0, p.adjust.method = "BH",
    method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =7  # or "panel"
  ) +
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(axis.text.x = element_text(size = 18, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
        axis.text.y = element_text(size = 18, color = "black"),
        axis.title.x = element_text(size = 18, color = "black"),
        axis.title.y = element_text(size = 18, color = "black"),
       legend.text = element_text(size = 13, color = "black"),  # Adjust the font size of legend text
        legend.title = element_text(size = 15, color = "black")) +
   ggtitle("Percentage") + theme(plot.title = element_text(size = 25, hjust = 0.5, face = "bold"))






# Create the plot
#options(repr.plot.width = 35, repr.plot.height = 12)
p2 <- ggplot(result_cell_final, aes(x = reorder(AIFI_L3, desc(percentage_clr)) , y = percentage_clr, fill = sample.visitName)) +
  geom_boxplot(width = .5, outlier.shape = '.') +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  geom_pwc(
    aes(group = sample.visitName),
    tip.length = 0, p.adjust.method = "BH",
    method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =7  # or "panel"
  ) +
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(axis.text.x = element_text(size = 18, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
        axis.text.y = element_text(size = 18, color = "black"),
        axis.title.x = element_text(size = 18, color = "black"),
        axis.title.y = element_text(size = 18, color = "black"),
       legend.text = element_text(size = 13, color = "black"),  # Adjust the font size of legend text
        legend.title = element_text(size = 15, color = "black"))+
  ggtitle("CLR") + theme(plot.title = element_text(size = 25, hjust = 0.5, face = "bold"))

# Display the plot
#print(p2)

library(cowplot)
options(repr.plot.width = 40, repr.plot.height = 25)


stacked_plots <- plot_grid(p1 + theme(plot.margin = margin(b = 20)), p2, nrow = 2, rel_heights = c(1, 1))

# Print the stacked plots
stacked_plots


#combined_plot



In [None]:
dict

# Loop code for parent celltypes

In [None]:
#, sample.visitName == "Flu Year 1 Day 0" | sample.visitName == "Flu Year 1 Day 7"

In [None]:
#This works but has graph visual errors- code review with Samir


unique_AIFI_L1 <- unique(flu_df$AIFI_L1)


for(cell in unique_AIFI_L1){
    
    cell_df <- flu_df %>% filter(AIFI_L1 == cell , sample.visitName == "Flu Year 1 Day 0" | sample.visitName == "Flu Year 1 Day 7" )
    
    dict_final <- dict %>% filter(AIFI_L1 == cell)
    
    # Perform inner join based on 'AIFI_L3' column
    merged_df <- inner_join(cell_df, dict_final, by = 'AIFI_L3')
    
    result_cell <- merged_df %>%
          dplyr::group_by(AIFI_L3, subject.subjectGuid,sample.visitName ) %>%               
          dplyr::summarise(count = n()) %>%                  # Count occurrences
          dplyr::group_by(subject.subjectGuid, sample.visitName) %>%                       
          dplyr::mutate(percentage = (count / sum(count)) * 100) %>% 
          dplyr::group_by(subject.subjectGuid, sample.visitName) %>%     
          dplyr::mutate(percentage_clr = clr_transform((percentage))) %>% # Group again by subjectGuid for CLR# Calculate percentage
          dplyr::ungroup()
    
    result_cell_final<-inner_join(result_cell,unique(cell_df[c('cohort.cohortGuid','subject.subjectGuid','subject.biologicalSex','CMV.IgG.Serology.Result.Interpretation')]),by=c('subject.subjectGuid'))
    
    
    outfile <- paste("/home/jupyter/Myeloid_cells/Plots/FluYr1_D0vsD7_Percentage_&_CLR_Comparisons_Parent/FluYr1_D0vsD7_Percentage_&_CLR_Comparisons_", cell, ".pdf", sep="")
    options(repr.plot.width = 18, repr.plot.height = 10)
    #Percentage
    p1 <- ggplot(result_cell_final, aes(x = reorder(AIFI_L3, desc(percentage)) , y = percentage, fill = sample.visitName)) +
              geom_boxplot(width = .5, outlier.shape = '.') +
              scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
              theme_bw(base_size = 10) +
              geom_pwc(
                aes(group = sample.visitName),
                tip.length = 0, p.adjust.method = "BH",
                method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =5  # or "panel"
              ) +
              #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
               scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
              theme(axis.text.x = element_text(size = 11, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
                    axis.text.y = element_text(size = 11, color = "black"),
                    axis.title.x = element_text(size = 11, color = "black"),
                    axis.title.y = element_text(size = 11, color = "black"),
                   legend.text = element_text(size = 11, color = "black"),  # Adjust the font size of legend text
                    legend.title = element_text(size = 11, color = "black"))+
              ggtitle("Percentage") + theme(plot.title = element_text(size = 25, hjust = 0.5, face = "bold"))


    #CLR
    p2 <- ggplot(result_cell_final, aes(x = reorder(AIFI_L3, desc(percentage_clr)) , y = percentage_clr, fill = sample.visitName)) +
              geom_boxplot(width = .5, outlier.shape = '.') +
              scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
              theme_bw(base_size = 10) +
              geom_pwc(
                aes(group = sample.visitName),
                tip.length = 0, p.adjust.method = "BH",
                method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =5  # or "panel"
              ) +
              #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
               scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
              theme(axis.text.x = element_text(size = 11, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),v
                    axis.text.y = element_text(size = 11, color = "black"),
                    axis.title.x = element_text(size = 11, color = "black"),
                    axis.title.y = element_text(size = 11, color = "black"),
                   legend.text = element_text(size = 11, color = "black"),  # Adjust the font size of legend text
                    legend.title = element_text(size = 11, color = "black"))+
              ggtitle("CLR") + theme(plot.title = element_text(size = 20, hjust = 0.5, face = "bold"))

     stacked_plots <- plot_grid(p1 + theme(plot.margin = margin(b = 20)), p2, nrow = 2, rel_heights = c(1, 1))
     pdf(file = outfile, width =16 , height = 20)
     print(stacked_plots)
     dev.off()
    
    
    
    
    
    }

In [None]:
result_yr1 <- result %>% filter(sample.visitName == "Flu Year 1 Day 0" | sample.visitName == "Flu Year 1 Day 7")


filtered_data <- result_yr1 %>%
  group_by(AIFI_L3,subject.subjectGuid) %>%
  filter(all(c("Flu Year 1 Day 0", "Flu Year 1 Day 7") %in% sample.visitName))
filtered_data


#stat test - percentage
stat_test_1<- filtered_data %>%
  select(percentage,percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage ~ sample.visitName, paired= TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>% add_xy_position(x= "AIFI_L3", fun = "max", comparisons = list(c("Flu Year 1 Day 0", "Flu Year 1 Day 7"))) 

# significant_cell_types_1 <- stat_test_1 %>%
#   filter(p.adj < 0.05) %>%
#   select(AIFI_L3, .y., p.adj, p.adj.signif) %>%
#   unique()

# print(significant_cell_types_1)


# Create the plot
#options(repr.plot.width = 35, repr.plot.height = 12)
p4 <- ggplot(filtered_data, aes(x = AIFI_L3 , y = percentage, color = sample.visitName )) +
  geom_boxplot(width = 0.5, outlier.shape = '.', position = position_dodge(width = 0.75), size=1) +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  stat_pvalue_manual(stat_test_1, label = "p.adj.signif", tip.length=0, y.position = 42, label.size = 12)+
  scale_color_manual(values = c("Flu Year 1 Day 0" = "#1b9e77", "Flu Year 1 Day 7" = "#d95f02")) + # Adjust colors as needed +
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(text = element_text(size=45, color="black"), 
      axis.text.x = element_text(color = "black"), 
      axis.text.y = element_text(color = "black"), 
      axis.title.x = element_text(color = "black"), 
      axis.title.y = element_text(color = "black"), 
      legend.text = element_text(color = "black"), 
      legend.title = element_text(color = "black"))+
    ggtitle("Percentage") + theme(plot.title = element_text(size = 35, hjust = 0.5, face = "bold"))
     

# Display the plot
#print(p4)


#stat test - CLR
stat_test_2<- filtered_data %>%
  select(percentage,percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage_clr ~ sample.visitName, paired= TRUE) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance() %>% add_xy_position(x= "AIFI_L3", fun = "max", comparisons = list(c("Flu Year 1 Day 0", "Flu Year 1 Day 7"))) 

# significant_cell_types_2 <- stat_test_2 %>%
#   filter(p.adj < 0.05) %>%
#   select(AIFI_L3, .y., p.adj, p.adj.signif) %>%
#   unique()

#print(significant_cell_types_2)


# Create the plot
#options(repr.plot.width = 35, repr.plot.height = 12)
p5 <- ggplot(filtered_data, aes(x = AIFI_L3 , y = percentage_clr, color = sample.visitName )) +
  geom_boxplot(width = 0.5, outlier.shape = '.', position = position_dodge(width = 0.75), size=1) +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  stat_pvalue_manual(stat_test_2, label = "p.adj.signif", tip.length=0, y.position = 10, label.size = 12)+
  scale_color_manual(values = c("Flu Year 1 Day 0" = "#1b9e77", "Flu Year 1 Day 7" = "#d95f02")) + # Adjust colors as needed +
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(text = element_text(size=45, color="black"), 
      axis.text.x = element_text(color = "black"), 
      axis.text.y = element_text(color = "black"), 
      axis.title.x = element_text(color = "black"), 
      axis.title.y = element_text(color = "black"), 
      legend.text = element_text(color = "black"), 
      legend.title = element_text(color = "black"))+
     ggtitle("CLR") + theme(plot.title = element_text(size = 35, hjust = 0.5, face = "bold"))

# Display the plot
#print(p2)

library(cowplot)
options(repr.plot.width = 60, repr.plot.height = 40)
# Combine two plots vertically
#combined_plot <- plot_grid(p1, p2, labels = c("CMV_neg", "CMV_pos"), ncol =1, label_size = 20, vjust = 0)
#combined_plot <- plot_grid(p1, p2, ncol=1,  nrow = 2, rel_heights = c(1, 2))

stacked_plots <- plot_grid(p4 + theme(plot.margin = margin(b = 20)), p5, nrow = 2, rel_heights = c(1, 1))

# Print the stacked plots
stacked_plots




In [None]:
#This works but has graph visual errors- code review with Samir


unique_AIFI_L1 <- unique(flu_df$AIFI_L1)


for(cell in unique_AIFI_L1){
    
    cell_df <- flu_df %>% filter(AIFI_L1 == cell , sample.visitName == "Flu Year 1 Day 0" | sample.visitName == "Flu Year 1 Day 7" )
    
    dict_final <- dict %>% filter(AIFI_L1 == cell)
    
    # Perform inner join based on 'AIFI_L3' column
    merged_df <- inner_join(cell_df, dict_final, by = 'AIFI_L3')
    
    result_cell <- merged_df %>%
          dplyr::group_by(AIFI_L3, subject.subjectGuid,sample.visitName ) %>%               
          dplyr::summarise(count = n()) %>%                  # Count occurrences
          dplyr::group_by(subject.subjectGuid, sample.visitName) %>%                       
          dplyr::mutate(percentage = (count / sum(count)) * 100) %>% 
          dplyr::group_by(subject.subjectGuid, sample.visitName) %>%     
          dplyr::mutate(percentage_clr = clr_transform((percentage))) %>% # Group again by subjectGuid for CLR# Calculate percentage
          dplyr::ungroup()
    
    result_cell_final<-inner_join(result_cell,unique(cell_df[c('cohort.cohortGuid','subject.subjectGuid','subject.biologicalSex','CMV.IgG.Serology.Result.Interpretation')]),by=c('subject.subjectGuid'))
    
    result_yr1 <- result_cell_final %>% filter(sample.visitName == "Flu Year 1 Day 0" | sample.visitName == "Flu Year 1 Day 7")


    filtered_data <- result_yr1 %>%
      group_by(AIFI_L3,subject.subjectGuid) %>%
      filter(all(c("Flu Year 1 Day 0", "Flu Year 1 Day 7") %in% sample.visitName))
    
    
    outfile <- paste("/home/jupyter/Myeloid_cells/Plots/FluYr1_D0vsD7_Percentage_&_CLR_Comparisons_Parent_paired/FluYr1_D0vsD7_Percentage_&_CLR_Comparisons_Parent_paired_", cell, ".pdf", sep="")
    options(repr.plot.width = 18, repr.plot.height = 10)
    #Percentage
    stat_test_1<- filtered_data %>%
          select(percentage,percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
          unique(.) %>%
          group_by(AIFI_L3) %>%
          wilcox_test(percentage ~ sample.visitName, paired= TRUE) %>%
          adjust_pvalue(method = "BH") %>%
          add_significance() %>% add_xy_position(x= "AIFI_L3", fun = "max", comparisons = list(c("Flu Year 1 Day 0", "Flu Year 1 Day 7"))) 
 
    p1 <- ggplot(filtered_data, aes(x = AIFI_L3 , y = percentage, color = sample.visitName )) +
          geom_boxplot(width = 0.5, outlier.shape = '.', position = position_dodge(width = 0.75), size=1) +
          scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
          theme_bw(base_size = 12) +
          stat_pvalue_manual(stat_test_1, label = "p.adj.signif", tip.length=0, y.position = 42, label.size = 12)+
          scale_color_manual(values = c("Flu Year 1 Day 0" = "#1b9e77", "Flu Year 1 Day 7" = "#d95f02")) + # Adjust colors as needed +
          #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
           scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
          theme(text = element_text(size=45, color="black"), 
              axis.text.x = element_text(color = "black"), 
              axis.text.y = element_text(color = "black"), 
              axis.title.x = element_text(color = "black"), 
              axis.title.y = element_text(color = "black"), 
              legend.text = element_text(color = "black"), 
              legend.title = element_text(color = "black"))+
            ggtitle("Percentage") + theme(plot.title = element_text(size = 35, hjust = 0.5, face = "bold"))
     
    
    
    


    #CLR
    p2 <- ggplot(result_cell_final, aes(x = reorder(AIFI_L3, desc(percentage_clr)) , y = percentage_clr, fill = sample.visitName)) +
              geom_boxplot(width = .5, outlier.shape = '.') +
              scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
              theme_bw(base_size = 10) +
              geom_pwc(
                aes(group = sample.visitName),
                tip.length = 0, p.adjust.method = "BH",
                method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =5  # or "panel"
              ) +
              #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
               scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
              theme(axis.text.x = element_text(size = 11, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
                    axis.text.y = element_text(size = 11, color = "black"),
                    axis.title.x = element_text(size = 11, color = "black"),
                    axis.title.y = element_text(size = 11, color = "black"),
                   legend.text = element_text(size = 11, color = "black"),  # Adjust the font size of legend text
                    legend.title = element_text(size = 11, color = "black"))+
              ggtitle("CLR") + theme(plot.title = element_text(size = 20, hjust = 0.5, face = "bold"))

     stacked_plots <- plot_grid(p1 + theme(plot.margin = margin(b = 20)), p2, nrow = 2, rel_heights = c(1, 1))
     pdf(file = outfile, width =16 , height = 20)
     print(stacked_plots)
     dev.off()
    
    
    
    
    
    }

# Loop code for parent celltypes, split by cohort

In [None]:
#This works but has graph visual errors- code review with Samir

#Percentage code (BR1 vs BR2)
unique_AIFI_L1 <- unique(flu_df$AIFI_L1)


for(cell in unique_AIFI_L1){
    
    cell_df <- flu_df %>% filter(AIFI_L1 == cell , sample.visitName == "Flu Year 1 Day 0" | sample.visitName == "Flu Year 1 Day 7" )
    
    dict_final <- dict %>% filter(AIFI_L1 == cell)
    
    # Perform inner join based on 'AIFI_L3' column
    merged_df <- inner_join(cell_df, dict_final, by = 'AIFI_L3')
    
    result_cell <- merged_df %>%
          dplyr::group_by(AIFI_L3, subject.subjectGuid,sample.visitName ) %>%               
          dplyr::summarise(count = n()) %>%                  # Count occurrences
          dplyr::group_by(subject.subjectGuid, sample.visitName) %>%                       
          dplyr::mutate(percentage = (count / sum(count)) * 100) %>% 
          dplyr::group_by(subject.subjectGuid, sample.visitName) %>%     
          dplyr::mutate(percentage_clr = clr_transform((percentage))) %>% # Group again by subjectGuid for CLR# Calculate percentage
          dplyr::ungroup()
    
    result_cell_final<-inner_join(result_cell,unique(cell_df[c('cohort.cohortGuid','subject.subjectGuid','subject.biologicalSex','CMV.IgG.Serology.Result.Interpretation')]),by=c('subject.subjectGuid'))
    
    
    outfile <- paste("/home/jupyter/Myeloid_cells/Plots/FluYr1_D0vsD7_Percentage_By_Cohort_Comparisons_Parent/FluYr1_D0vsD7_Percentage_By_Cohort_Comparisons_Parent_", cell, ".pdf", sep="")
    options(repr.plot.width = 18, repr.plot.height = 10)
    #BR1
    result_cohort <- result_cell_final[result_cell_final$cohort.cohortGuid == "BR1", ]
    p1 <- ggplot(result_cohort, aes(x = reorder(AIFI_L3, desc(percentage)) , y = percentage, fill = sample.visitName)) +
              geom_boxplot(width = .5, outlier.shape = '.') +
              scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
              theme_bw(base_size = 12) +
              geom_pwc(
                aes(group = sample.visitName),
                tip.length = 0, p.adjust.method = "BH",
                method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =7  # or "panel"
              ) +
              #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
              scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
              theme(axis.text.x = element_text(size = 11, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
                    axis.text.y = element_text(size = 11, color = "black"),
                    axis.title.x = element_text(size = 11, color = "black"),
                    axis.title.y = element_text(size = 11, color = "black"),
                   legend.text = element_text(size = 11, color = "black"),  # Adjust the font size of legend text
                    legend.title = element_text(size = 11, color = "black"))+
              ggtitle("BR1") + theme(plot.title = element_text(size = 25, hjust = 0.5, face = "bold"))


    #BR2
    result_cohort <- result_cell_final[result_cell_final$cohort.cohortGuid == "BR2", ]
    p2 <- ggplot(result_cohort, aes(x = reorder(AIFI_L3, desc(percentage)) , y = percentage, fill = sample.visitName)) +
              geom_boxplot(width = .5, outlier.shape = '.') +
              scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
              theme_bw(base_size = 12) +
              geom_pwc(
                aes(group = sample.visitName),
                tip.length = 0, p.adjust.method = "BH",
                method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =7  # or "panel"
              ) +
              #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
              scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
              theme(axis.text.x = element_text(size = 11, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
                    axis.text.y = element_text(size = 11, color = "black"),
                    axis.title.x = element_text(size = 11, color = "black"),
                    axis.title.y = element_text(size = 11, color = "black"),
                   legend.text = element_text(size = 11, color = "black"),  # Adjust the font size of legend text
                    legend.title = element_text(size = 11, color = "black"))+
              ggtitle("BR2") + theme(plot.title = element_text(size = 20, hjust = 0.5, face = "bold"))

     stacked_plots <- plot_grid(p1 + theme(plot.margin = margin(b = 20)), p2, nrow = 2, rel_heights = c(1, 1))
     pdf(file = outfile, width =16 , height = 20)
     print(stacked_plots)
     dev.off()
    
    
    
    
    
    }

In [None]:
#CLR code (BR1 vs BR2)


unique_AIFI_L1 <- unique(flu_df$AIFI_L1)


for(cell in unique_AIFI_L1){
    
    cell_df <- flu_df %>% filter(AIFI_L1 == cell , sample.visitName == "Flu Year 1 Day 0" | sample.visitName == "Flu Year 1 Day 7" )
    
    dict_final <- dict %>% filter(AIFI_L1 == cell)
    
    # Perform inner join based on 'AIFI_L3' column
    merged_df <- inner_join(cell_df, dict_final, by = 'AIFI_L3')
    
    result_cell <- merged_df %>%
          dplyr::group_by(AIFI_L3, subject.subjectGuid,sample.visitName ) %>%               
          dplyr::summarise(count = n()) %>%                  # Count occurrences
          dplyr::group_by(subject.subjectGuid, sample.visitName) %>%                       
          dplyr::mutate(percentage = (count / sum(count)) * 100) %>% 
          dplyr::group_by(subject.subjectGuid, sample.visitName) %>%     
          dplyr::mutate(percentage_clr = clr_transform((percentage))) %>% # Group again by subjectGuid for CLR# Calculate percentage
          dplyr::ungroup()
    
    result_cell_final<-inner_join(result_cell,unique(cell_df[c('cohort.cohortGuid','subject.subjectGuid','subject.biologicalSex','CMV.IgG.Serology.Result.Interpretation')]),by=c('subject.subjectGuid'))
    
    
    outfile <- paste("/home/jupyter/Myeloid_cells/Plots/FluYr1_D0vsD7_CLR_By_Cohort_Comparisons_Parent/FluYr1_D0vsD7_CLR_By_Cohort_Comparisons_Parent_", cell, ".pdf", sep="")
    options(repr.plot.width = 18, repr.plot.height = 10)
    #BR1
    result_cohort <- result_cell_final[result_cell_final$cohort.cohortGuid == "BR1", ]
    p1 <- ggplot(result_cohort, aes(x = reorder(AIFI_L3, desc(percentage_clr)) , y = percentage_clr, fill = sample.visitName)) +
              geom_boxplot(width = .5, outlier.shape = '.') +
              scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
              theme_bw(base_size = 12) +
              geom_pwc(
                aes(group = sample.visitName),
                tip.length = 0, p.adjust.method = "BH",
                method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =7  # or "panel"
              ) +
              #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
              scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
              theme(axis.text.x = element_text(size = 11, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
                    axis.text.y = element_text(size = 11, color = "black"),
                    axis.title.x = element_text(size = 11, color = "black"),
                    axis.title.y = element_text(size = 11, color = "black"),
                   legend.text = element_text(size = 11, color = "black"),  # Adjust the font size of legend text
                    legend.title = element_text(size = 11, color = "black"))+
              ggtitle("BR1") + theme(plot.title = element_text(size = 25, hjust = 0.5, face = "bold"))


    #BR2
    result_cohort <- result_cell_final[result_cell_final$cohort.cohortGuid == "BR2", ]
    p2 <- ggplot(result_cohort, aes(x = reorder(AIFI_L3, desc(percentage_clr)) , y = percentage_clr, fill = sample.visitName)) +
              geom_boxplot(width = .5, outlier.shape = '.') +
              scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
              theme_bw(base_size = 12) +
              geom_pwc(
                aes(group = sample.visitName),
                tip.length = 0, p.adjust.method = "BH",
                method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =7  # or "panel"
              ) +
              #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
              scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
              theme(axis.text.x = element_text(size = 11, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
                    axis.text.y = element_text(size = 11, color = "black"),
                    axis.title.x = element_text(size = 11, color = "black"),
                    axis.title.y = element_text(size = 11, color = "black"),
                   legend.text = element_text(size = 11, color = "black"),  # Adjust the font size of legend text
                    legend.title = element_text(size = 11, color = "black"))+
              ggtitle("BR2") + theme(plot.title = element_text(size = 20, hjust = 0.5, face = "bold"))

     stacked_plots <- plot_grid(p1 + theme(plot.margin = margin(b = 20)), p2, nrow = 2, rel_heights = c(1, 1))
     pdf(file = outfile, width =16 , height = 20)
     print(stacked_plots)
     dev.off()
    
    
    
    
    
    }