In [1]:
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(stringr))
library(reshape2)
library(rstatix)
library(ggpubr)
library(parallel)

“package ‘dplyr’ was built under R version 4.3.3”
“package ‘tidyr’ was built under R version 4.3.3”
“package ‘tibble’ was built under R version 4.3.3”
“package ‘stringr’ was built under R version 4.3.3”
“package ‘reshape2’ was built under R version 4.3.3”

Attaching package: ‘reshape2’


The following object is masked from ‘package:tidyr’:

    smiths


“package ‘rstatix’ was built under R version 4.3.3”

Attaching package: ‘rstatix’


The following object is masked from ‘package:stats’:

    filter


Loading required package: ggplot2



# Main Cohort

In [2]:
BRI_Freq_Data<-read.csv("../Dataset/diha_AIFI_L3_frequencies_2024-05-05.csv")


In [3]:

BRI_Freq_Data<-BRI_Freq_Data %>% group_by(sample.sampleKitGuid) %>%
  dplyr::mutate(percentage = (AIFI_L3_count / sum(AIFI_L3_count)) * 100) %>% ungroup()

In [4]:
BRI_Freq_Data_subset<-BRI_Freq_Data %>% filter(sample.visitName=="Flu Year 1 Day 0")

In [5]:
freq_changes <- BRI_Freq_Data_subset %>%
  select(percentage, AIFI_L3, AIFI_L3_clr, cohort.cohortGuid, subject.biologicalSex, subject.cmv) %>%
  group_by(AIFI_L3, subject.cmv) %>%
  summarise(mean_AIFI_L3_clr = median(AIFI_L3_clr, na.rm = TRUE)) %>%
  spread(subject.cmv, mean_AIFI_L3_clr) %>%
  mutate(delta_change = `Positive` - `Negative`)

observed_delta_change <- freq_changes

[1m[22m`summarise()` has grouped output by 'AIFI_L3'. You can override using the
`.groups` argument.


In [6]:
n_perm <- 100000


permute_single_iteration <- function(seed) {
  set.seed(seed)
  permuted_data <- BRI_Freq_Data_subset %>%
    mutate(subject.cmv = sample(subject.cmv)) %>%
    group_by(AIFI_L3, subject.cmv) %>%
    summarise(mean_AIFI_L3_clr = median(AIFI_L3_clr, na.rm = TRUE)) %>%
    spread(subject.cmv, mean_AIFI_L3_clr) %>%
    mutate(delta_change = `Positive` - `Negative`)%>% ungroup() %>% as.data.frame()
  permuted_data$iteration<-seed
  return(permuted_data)
}

In [7]:
permuted_delta_change <- do.call(
  rbind,
  suppressMessages(suppressWarnings(mclapply(1:n_perm, permute_single_iteration, mc.cores = 30)))
)

In [8]:
observed_delta_map <- observed_delta_change %>%
  select(AIFI_L3, delta_change) %>%
  rename(observed_delta_change = delta_change)

In [9]:
p_values_main <- permuted_delta_change %>%
  left_join(observed_delta_map, by = "AIFI_L3") %>%
  group_by(AIFI_L3) %>%
  summarise(
    p_value = (sum(abs(delta_change) >= abs(observed_delta_change)) + 1) /
              (n() + 1)
  )

# Follow Up Cohort

In [10]:
SF4_Freq_Data<-read.csv("../Figure2/03_Get_Counts/SF4_Freq.csv")


In [11]:
freq_changes <- SF4_Freq_Data %>%
  select(percentage,celltypist_l3, percentage_clr, cohort.cohortGuid, subject.biologicalSex, CMV) %>%
  group_by(celltypist_l3, CMV) %>%
  summarise(mean_AIFI_L3_clr = median(percentage_clr, na.rm = TRUE)) %>%
  spread(CMV, mean_AIFI_L3_clr) %>%
  mutate(delta_change = `Positive` - `Negative`)

observed_delta_change <- freq_changes

[1m[22m`summarise()` has grouped output by 'celltypist_l3'. You can override using the
`.groups` argument.


In [12]:
n_perm <- 100000

permute_single_iteration <- function(seed) {
  set.seed(seed)
  permuted_data <- SF4_Freq_Data %>%
    mutate(CMV = sample(CMV)) %>%
    group_by(celltypist_l3, CMV) %>%
    summarise(mean_AIFI_L3_clr = median(percentage_clr, na.rm = TRUE)) %>%
    spread(CMV, mean_AIFI_L3_clr) %>%
    mutate(delta_change = `Positive` - `Negative`)%>% ungroup() %>% as.data.frame()
  permuted_data$iteration<-seed
  return(permuted_data)
}

In [13]:
permuted_delta_change <- do.call(
  rbind,
  suppressMessages(suppressWarnings(mclapply(1:n_perm, permute_single_iteration, mc.cores = 30)))
)

In [14]:
observed_delta_map <- observed_delta_change %>%
  select(celltypist_l3, delta_change) %>%
  rename(observed_delta_change = delta_change)

In [15]:
p_values_follow_up <- permuted_delta_change %>%
  left_join(observed_delta_map, by = "celltypist_l3") %>%
  group_by(celltypist_l3) %>%
  summarise(
    p_value = (sum(abs(delta_change) >= abs(observed_delta_change)) + 1) /
              (n() + 1)
  )

# Combine

In [21]:
df<-left_join(p_values_main,p_values_follow_up,by=c("AIFI_L3"="celltypist_l3"))

In [22]:
colnames(df)[2:3]<-c("Main Cohort","Follow Up Cohort")

In [23]:
write.csv(df,"Permutation_P_values.csv")