In [2]:
suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(patchwork))
suppressPackageStartupMessages(library(pheatmap))
suppressPackageStartupMessages(library(stringr))
library(hise)
library(plyr)
library(purrr)
library(vegan)
#library(arrow)
library(rstatix)
library(parallel)
library(ggpubr)
library(ComplexHeatmap)
library(compositions)
library(ggsci)

ERROR: Error in library(compositions): there is no package called ‘compositions’


In [3]:
colors = c('#FDC086','#FFFF99','#F0027F','#BF5B17','#666666',
          '#1B9E77','#D95F02','#7570B3','#E7298A','#66A61E','#E6AB02','#A6761D','#666666',
          '#A6CEE3','#1F78B4','#B2DF8A','#33A02C','#FB9A99','#E31A1C','#FDBF6F','#FF7F00',
          '#CAB2D6','#6A3D9A','#FFFF99','#B15928','#FBB4AE','#B3CDE3','#CCEBC5','#DECBE4',
          '#FED9A6','#FFFFCC','#E5D8BD','#FDDAEC','#F2F2F2','#B3E2CD','#FDCDAC','#CBD5E8',
          '#F4CAE4','#E6F5C9','#FFF2AE','#F1E2CC','#CCCCCC','#E41A1C','#377EB8','#4DAF4A',
          '#984EA3','#FF7F00','#FFFF33','#A65628','#F781BF','#999999','#66C2A5','#FC8D62',
          '#8DA0CB','#E78AC3','#A6D854','#FFD92F','#E5C494','#B3B3B3','#8DD3C7','#FFFFB3',
          '#BEBADA','#FB8072','#80B1D3','#FDB462','#B3DE69','#FCCDE5','#D9D9D9','#BC80BD',
          '#CCEBC5','#FFED6F')

# Reading the file

In [4]:
# Read the CSV file into a data frame (No UP1)
flu_df <- read.csv('/home/jupyter/Myeloid_cells/files/AIFI_fluYr1Yr2_raw.csv')


In [65]:
unique(flu_df$cohort.cohortGuid)
unique(flu_df$AIFI_L3.5)
unique(flu_df$AIFI_L1)

In [6]:
nrow(flu_df)

In [7]:
rownames(flu_df)<-flu_df$cell_uuid

In [None]:
#length(unique(myeloid_df$subject.subjectGuid))

In [None]:
#unique(myeloid_df$AIFI_L1)

In [8]:
colnames(flu_df)

# CLR Transform


In [9]:
clr_transform <- function(x) {
  if (length(x) == 0) {
    return(NA)  # return NA for empty vectors
  }
  geom_mean <- exp(mean(log(x)))
  return(log(x / geom_mean))
}

# Comparsion within total PBMCs (Percentage and  CLR)

In [None]:
## Percentage of total

result <- flu_df %>%
  dplyr::group_by(AIFI_L3, subject.subjectGuid, sample.visitName ) %>%               
  dplyr::summarise(count = n()) %>%                  # Count occurrences
  dplyr::group_by(subject.subjectGuid) %>%                       
  dplyr::mutate(percentage = (count / sum(count)) * 100) %>% 
  dplyr::group_by(subject.subjectGuid) %>%     
  dplyr::mutate(percentage_clr = clr_transform((percentage))) %>% # Group again by subjectGuid for CLR# Calculate percentage
  dplyr::ungroup()

head(result)
#sum(result$subject.subjectGuid)

In [None]:
result %>% filter(subject.subjectGuid == "BR1001") %>% 
      select(percentage) %>%
      pull() %>% 
      sum()

In [None]:
result<-left_join(result,unique(flu_df[c('cohort.cohortGuid','subject.subjectGuid','subject.biologicalSex', 'CMV.IgG.Serology.Result.Interpretation')]),by=c('subject.subjectGuid'))

In [None]:
head(result)


# Global Plot

In [None]:
result_yr1 <- result %>% filter(sample.visitName == "Flu Year 1 Day 0" | sample.visitName == "Flu Year 1 Day 7")

In [None]:
result_yr2 <- result %>% filter(sample.visitName == "Flu Year 2 Day 0" | sample.visitName == "Flu Year 2 Day 7")

In [None]:
dim(result_yr1)
head(result_yr2)

In [None]:
len(result_yr1$sample.visitName)

In [None]:
# Percentage comparing Age WITHOUT CMV seperation

#result_cohort <- result[result$CMV.IgG.Serology.Result.Interpretation == 'Negative', ]
#result_cohort <- result[result$CMV.IgG.Serology.Result.Interpretation == 'Positive', ]

stat_test<- result_yr1 %>%
  select(percentage,percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage_clr ~ sample.visitName) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance()
#stat_test

significant_cell_types <- stat_test %>%
  filter(p.adj < 0.05) %>%
  select(AIFI_L3, .y., p.adj, p.adj.signif) %>%
  unique()

print(significant_cell_types)

# Create the plot
#options(repr.plot.width = 35, repr.plot.height = 12)
p4 <- ggplot(result_yr1, aes(x = reorder(AIFI_L3, desc(percentage)) , y = percentage, fill = sample.visitName)) +
  geom_boxplot(width = .5, outlier.shape = '.') +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  geom_pwc(
    aes(group = sample.visitName),
    tip.length = 0, p.adjust.method = "BH",
    method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =10  # or "panel"
  ) +
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(text = element_text(size=30),)
     # axis.text.x = element_text(size = 18, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
        #axis.text.y = element_text(size = 18, color = "black"),
        #axis.title.x = element_text(size = 18, color = "black"),
        #axis.title.y = element_text(size = 18, color = "black"),
       #legend.text = element_text(size = 13, color = "black"),  # Adjust the font size of legend text
        #legend.title = element_text(size = 15, color = "black"))

# Display the plot
#print(p4)


#Percentage CLR comparing Age WITHOUT CMV seperation
p5 <- ggplot(result_yr1, aes(x = reorder(AIFI_L3, desc(percentage_clr)) , y = percentage_clr, fill = sample.visitName)) +
  geom_boxplot(width = .5, outlier.shape = '.') +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  geom_pwc(
    aes(group = sample.visitName),
    tip.length = 0, p.adjust.method = "BH",
    method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =10  # or "panel"
  ) +
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(text = element_text(size=30, color = "black"))
     # axis.text.x = element_text(size = 18, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
        #axis.text.y = element_text(size = 18, color = "black"),
        #axis.title.x = element_text(size = 18, color = "black"),
        #axis.title.y = element_text(size = 18, color = "black"),
       #legend.text = element_text(size = 13, color = "black"),  # Adjust the font size of legend text
        #legend.title = element_text(size = 15, color = "black"))


#print(p5)

#Combine Plot
library(cowplot)

options(repr.plot.width = 50, repr.plot.height = 30)
# Combine two plots vertically
combined_plot <- plot_grid(p4, p5, labels = c("Frequency_Percentage", "Frequency_Percentage_CLR"), ncol =1)

combined_plot


In [None]:
# Percentage comparing Age WITHOUT CMV seperation

#result_cohort <- result[result$CMV.IgG.Serology.Result.Interpretation == 'Negative', ]
#result_cohort <- result[result$CMV.IgG.Serology.Result.Interpretation == 'Positive', ]

stat_test<- result_yr2 %>%
  select(percentage,percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage_clr ~ sample.visitName) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance()
#stat_test

significant_cell_types <- stat_test %>%
  filter(p.adj < 0.05) %>%
  select(AIFI_L3, .y., p.adj, p.adj.signif) %>%
  unique()

print(significant_cell_types)

# Create the plot
#options(repr.plot.width = 35, repr.plot.height = 12)
p4 <- ggplot(result_yr2, aes(x = reorder(AIFI_L3, desc(percentage)) , y = percentage, fill = sample.visitName)) +
  geom_boxplot(width = .5, outlier.shape = '.') +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  geom_pwc(
    aes(group = sample.visitName),
    tip.length = 0, p.adjust.method = "BH",
    method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =10  # or "panel"
  ) +
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(text = element_text(size=30),)
     # axis.text.x = element_text(size = 18, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
        #axis.text.y = element_text(size = 18, color = "black"),
        #axis.title.x = element_text(size = 18, color = "black"),
        #axis.title.y = element_text(size = 18, color = "black"),
       #legend.text = element_text(size = 13, color = "black"),  # Adjust the font size of legend text
        #legend.title = element_text(size = 15, color = "black"))

# Display the plot
#print(p4)


#Percentage CLR comparing Age WITHOUT CMV seperation
p5 <- ggplot(result_yr2, aes(x = reorder(AIFI_L3, desc(percentage_clr)) , y = percentage_clr, fill = sample.visitName)) +
  geom_boxplot(width = .5, outlier.shape = '.') +
  scale_x_discrete(name ="AIFI_L3", guide = guide_axis(angle = 90)) +
  theme_bw(base_size = 12) +
  geom_pwc(
    aes(group = sample.visitName),
    tip.length = 0, p.adjust.method = "BH",
    method = "wilcox_test", label ="{p.adj.signif}", angle = 0, hide.ns = FALSE, label.size =10  # or "panel"
  ) +
  #facet_grid(cohort.cohortGuid ~ ., scales = "free_y", space = "free_y") +
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  theme(text = element_text(size=30, color = "black"))
     # axis.text.x = element_text(size = 18, color = "black", angle = 90, hjust = 0.5, vjust = 0.5),
        #axis.text.y = element_text(size = 18, color = "black"),
        #axis.title.x = element_text(size = 18, color = "black"),
        #axis.title.y = element_text(size = 18, color = "black"),
       #legend.text = element_text(size = 13, color = "black"),  # Adjust the font size of legend text
        #legend.title = element_text(size = 15, color = "black"))


#print(p5)

#Combine Plot
library(cowplot)

options(repr.plot.width = 50, repr.plot.height = 30)
# Combine two plots vertically
combined_plot <- plot_grid(p4, p5, labels = c("Frequency_Percentage", "Frequency_Percentage_CLR"), ncol =1)

combined_plot


# Comparsion within B cells(Percentage and Percentage CLR - AIFI Level 1 celltypes)¶

In [10]:
Bcell_df <- flu_df %>% filter(AIFI_L1 == 'B cell' , sample.visitName == "Flu Year 1 Day 0" | sample.visitName == "Flu Year 1 Day 7" )

In [38]:
Bcell_df <- flu_df %>% filter(AIFI_L1 == 'B cell')

In [63]:
unique(Bcell_df$AIFI_L3)
unique(Bcell_df$AIFI_L1)
unique(Bcell_df$sample.visitName)
colnames(Bcell_df)

In [44]:
## Percentage of total

result_Bcell <- Bcell_df %>%
  dplyr::group_by(AIFI_L3, subject.subjectGuid, sample.visitName) %>%               
  dplyr::summarise(count = n()) %>%                  # Count occurrences
  dplyr::group_by(subject.subjectGuid, sample.visitName) %>%                       
  dplyr::mutate(percentage = (count / sum(count)) * 100) %>% 
  dplyr::group_by(subject.subjectGuid, sample.visitName) %>%     
  dplyr::mutate(percentage_clr = clr_transform((percentage))) %>% # Group again by subjectGuid for CLR# Calculate percentage
  dplyr::ungroup()

head(result_Bcell)

#sum(result_Bcell$subject.subjectGuid)

[1m[22m`summarise()` has grouped output by 'AIFI_L3', 'subject.subjectGuid'. You can
override using the `.groups` argument.


AIFI_L3,subject.subjectGuid,sample.visitName,count,percentage,percentage_clr
<chr>,<chr>,<chr>,<int>,<dbl>,<dbl>
ASDC,BR1059,Flu Year 2 Day 7,1,0.05931198,-3.364498
ASDC,BR2042,Flu Year 2 Day 0,1,0.0621118,-2.490027
ASDC,BR2047,Flu Year 1 Day 0,1,0.05537099,-3.191707
Activated memory B cell,BR1001,Flu Year 1 Day 0,4,0.22026432,-2.368099
Activated memory B cell,BR1001,Flu Year 1 Day 7,2,0.09324009,-3.187636
Activated memory B cell,BR1002,Flu Year 2 Day 0,7,0.66037736,-1.291274


In [45]:
result_Bcell<-left_join(result_Bcell,unique(flu_df[c('cohort.cohortGuid','subject.subjectGuid','subject.biologicalSex','CMV.IgG.Serology.Result.Interpretation')]),by=c('subject.subjectGuid'))

In [46]:
head(result_Bcell)

AIFI_L3,subject.subjectGuid,sample.visitName,count,percentage,percentage_clr,cohort.cohortGuid,subject.biologicalSex,CMV.IgG.Serology.Result.Interpretation
<chr>,<chr>,<chr>,<int>,<dbl>,<dbl>,<chr>,<chr>,<chr>
ASDC,BR1059,Flu Year 2 Day 7,1,0.05931198,-3.364498,BR1,Male,Negative
ASDC,BR2042,Flu Year 2 Day 0,1,0.0621118,-2.490027,BR2,Female,Negative
ASDC,BR2047,Flu Year 1 Day 0,1,0.05537099,-3.191707,BR2,Male,Positive
Activated memory B cell,BR1001,Flu Year 1 Day 0,4,0.22026432,-2.368099,BR1,Female,Negative
Activated memory B cell,BR1001,Flu Year 1 Day 7,2,0.09324009,-3.187636,BR1,Female,Negative
Activated memory B cell,BR1002,Flu Year 2 Day 0,7,0.66037736,-1.291274,BR1,Male,Negative


In [47]:
result_Bcell %>% filter(subject.subjectGuid == "BR1001") %>% 
      select(percentage) %>%
      pull() %>% 
      sum()

In [50]:
x <- result_Bcell %>% filter(subject.subjectGuid == "BR1002")
head(x)
unique(x$sample.visitName)

AIFI_L3,subject.subjectGuid,sample.visitName,count,percentage,percentage_clr,cohort.cohortGuid,subject.biologicalSex,CMV.IgG.Serology.Result.Interpretation
<chr>,<chr>,<chr>,<int>,<dbl>,<dbl>,<chr>,<chr>,<chr>
Activated memory B cell,BR1002,Flu Year 2 Day 0,7,0.6603774,-1.2912737,BR1,Male,Negative
Activated memory B cell,BR1002,Flu Year 2 Day 7,2,0.1881468,-2.5500634,BR1,Male,Negative
CD27+ effector B cell,BR1002,Flu Year 1 Day 0,29,3.1351351,0.5955402,BR1,Male,Negative
CD27+ effector B cell,BR1002,Flu Year 1 Day 7,39,3.6654135,0.4573892,BR1,Male,Negative
CD27+ effector B cell,BR1002,Flu Year 2 Day 0,41,3.8679245,0.4763882,BR1,Male,Negative
CD27+ effector B cell,BR1002,Flu Year 2 Day 7,71,6.6792098,1.0194693,BR1,Male,Negative


In [51]:
result_Bcell <- result_Bcell %>%
  filter( !(sample.visitName == "Flu Year 2 Day 0" | sample.visitName == "Flu Year 2 Day 7") )


In [None]:
#result_ind <- result_Bcell %>% filter(sample.visitName == "Flu Year 1 Day 0" | sample.visitName == "Flu Year 1 Day 7")

In [52]:
head(result_Bcell)

AIFI_L3,subject.subjectGuid,sample.visitName,count,percentage,percentage_clr,cohort.cohortGuid,subject.biologicalSex,CMV.IgG.Serology.Result.Interpretation
<chr>,<chr>,<chr>,<int>,<dbl>,<dbl>,<chr>,<chr>,<chr>
ASDC,BR2047,Flu Year 1 Day 0,1,0.05537099,-3.191707,BR2,Male,Positive
Activated memory B cell,BR1001,Flu Year 1 Day 0,4,0.22026432,-2.368099,BR1,Female,Negative
Activated memory B cell,BR1001,Flu Year 1 Day 7,2,0.09324009,-3.187636,BR1,Female,Negative
Activated memory B cell,BR1003,Flu Year 1 Day 0,5,0.28473804,-1.806394,BR1,Female,Negative
Activated memory B cell,BR1003,Flu Year 1 Day 7,1,0.06090134,-3.364823,BR1,Female,Negative
Activated memory B cell,BR1004,Flu Year 1 Day 0,1,0.11363636,-3.222762,BR1,Male,Negative


In [54]:
result_Bcell %>% filter(subject.subjectGuid == "BR1002") %>% 
      select(percentage) %>%
      pull() %>% 
      sum()

In [None]:
#############Individual Plots##################

In [60]:
stat_test<- result_Bcell %>%
  select(percentage,percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName,subject.subjectGuid ) %>%
  unique(.) %>%
  group_by(AIFI_L3 ) %>%
  wilcox_test(percentage~ sample.visitName) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance()
#stat_test

ERROR: [1m[33mError[39m in `mutate()`:[22m
[1m[22m[36mℹ[39m In argument: `data = map(.data$data, .f, ...)`.
[1mCaused by error in `map()`:[22m
[1m[22m[36mℹ[39m In index: 1.
[1mCaused by error in `wilcox.test.default()`:[22m
[33m![39m not enough 'y' observations


In [None]:
stat_test

In [None]:

#########Percentage
stat_test<- result_ind %>%
  select(percentage, percentage_clr, AIFI_L3, cohort.cohortGuid, CMV.IgG.Serology.Result.Interpretation, subject.biologicalSex, sample.visitName, subject.subjectGuid) %>%
  unique(.) %>%
  group_by(AIFI_L3) %>%
  wilcox_test(percentage ~ sample.visitName) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance()


# significant_cell_types <- stat_test %>%
#   filter(p.adj < 0.05) %>%
#   select(AIFI_L3, .y., p.adj, p.adj.signif) %>%
#   unique()

# print(significant_cell_types)


