In [5]:
library(performance)
library(ggplot2)
library(stats)
library(parallel)
library(dplyr)
library(readxl)
library(tidyverse)
library(rstatix)
library(ggpubr)
library(cluster)
library(factoextra)
library(lmerTest)
library(lme4)
library(splines)

“package ‘performance’ was built under R version 4.3.3”
“package ‘dplyr’ was built under R version 4.3.3”

Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


“package ‘readxl’ was built under R version 4.3.3”
“package ‘tidyverse’ was built under R version 4.3.3”
“package ‘tibble’ was built under R version 4.3.3”
“package ‘tidyr’ was built under R version 4.3.3”
“package ‘readr’ was built under R version 4.3.3”
“package ‘purrr’ was built under R version 4.3.3”
“package ‘stringr’ was built under R version 4.3.3”
“package ‘forcats’ was built under R version 4.3.3”
“package ‘lubridate’ was built under R version 4.3.3”
── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34m

In [1]:
df_HAI <- read.csv("/home/workspace/IHA_Figure_Revision//Figure4//02_HAI_MSD_Assay/HAI_Processed.csv")

In [6]:
df_HAI <- df_HAI %>%
    group_by(Assay, Batch) %>%
    mutate(Adjusted_HAI.Mean_Perc_Inhib = Mean_Perc_Inhib - min(Mean_Perc_Inhib) * 1.00001)
# mutate(Adjusted_HAI.Mean_Perc_Inhib=Mean_Perc_Inhib-min(Mean_Perc_Inhib)/(max(Mean_Perc_Inhib) - min(Mean_Perc_Inhib)))

# (df_HAI$Mean_Perc_Inhib - min(df_HAI$Mean_Perc_Inhib)) / (max(df_HAI$Mean_Perc_Inhib) - min(df_HAI$Mean_Perc_Inhib))

In [7]:
table(df_HAI$Assay, df_HAI$Flu_Year)

              
               2019-2020 2020-2021 2021-2022
  A/Brisbane         117       484       267
  A/Cambodia         117       484       267
  A/Guangdong        117       484       267
  A/HongKong         117       484       267
  A/Kansas           117       484       267
  A/Shanghai         117       484       267
  A/Wisconsin        117       484       267
  B/Phuket           117       484       267
  B/Washington       117       484       267
  BSA                117       484       267

In [8]:
df_HAI <- df_HAI %>%
    mutate(Assay = case_when(
        Assay == "A/Shanghai" ~ "Flu A/Shanghai H7", Assay == "A/Brisbane" ~ "Flu A/Brisbane (H1N1)", Assay == "B/Washington" ~ "Flu B/Washington HA",
        Assay == "B/Phuket" ~ "Flu B/Phuket HA", Assay == "A/HongKong" ~ "Flu A/Hong Kong H3", TRUE ~ Assay
    ))

In [9]:
table(df_HAI$Assay, df_HAI$Flu_Year)

                       
                        2019-2020 2020-2021 2021-2022
  A/Cambodia                  117       484       267
  A/Guangdong                 117       484       267
  A/Kansas                    117       484       267
  A/Wisconsin                 117       484       267
  BSA                         117       484       267
  Flu A/Brisbane (H1N1)       117       484       267
  Flu A/Hong Kong H3          117       484       267
  Flu A/Shanghai H7           117       484       267
  Flu B/Phuket HA             117       484       267
  Flu B/Washington HA         117       484       267

In [10]:
df_HAI <- df_HAI[c(
    "sample.sampleKitGuid", "Assay", "Mean_Perc_Inhib", "Adjusted_HAI.Mean_Perc_Inhib", "cohort.cohortGuid", "subjectGuid", "CMV", "Flu_Year",
    "Flu_Day", "Visit_Type", "Year_N", "Visit_Flu_Year", "subject.biologicalSex"
)]

In [11]:
colnames(df_HAI)[1:3] <- c("sample.sampleKitGuid", "Assay", "HAI.Mean_Perc_Inhib")

# Day 7 and Day 90 - HAI

In [12]:
df_HAI <- df_HAI %>%
  filter(Visit_Type == "FluYear", Flu_Day != "Stand-Alone") %>%
  mutate(`Age Group` = case_when(
    cohort.cohortGuid == "BR1" ~ "Young",
    cohort.cohortGuid == "BR2" ~ "Older",
    TRUE ~ NA_character_
  )) %>%
  group_by(subjectGuid, Assay, Flu_Year) %>%
  arrange(Flu_Day) %>%
  mutate(
    across(starts_with("Adjusted_HAI.Mean_Perc_Inhib"), ~ . - nth(., 2), .names = "DeltaDiff_{col}"),
    Day0_Inhibition = first(Adjusted_HAI.Mean_Perc_Inhib),
    Day7_Inhibition = nth(Adjusted_HAI.Mean_Perc_Inhib, 2)
  ) %>%
  ungroup() %>%
  group_by(subjectGuid, Flu_Year, Assay) %>%
  mutate(entry_count = n()) %>%
  filter(entry_count == 3) %>%
  ungroup()

# all four HAI strains

In [13]:
intercept_list <- list()

df_HAI_subset <- df_HAI %>%
  filter(
    Flu_Day %in% c("Day_90"),
    Assay %in% c("A/Guangdong", "A/Cambodia", "Flu B/Phuket HA", "Flu B/Washington HA"),
    Visit_Type == "FluYear"
  ) %>%
  filter(
    !(Assay == "A/Guangdong" & Flu_Year == "2021-2022"),
    !(Assay == "A/Cambodia" & Flu_Year == "2020-2021")
  ) %>%
  group_by(Assay, Flu_Day) %>%
  mutate(
    Day0_Inhibition = as.numeric(datawizard::standardize(Day0_Inhibition)),
    DeltaDiff_Adjusted_HAI.Mean_Perc_Inhib = as.numeric(datawizard::standardize(DeltaDiff_Adjusted_HAI.Mean_Perc_Inhib))
  ) %>%
  filter(Flu_Year %in% c("2020-2021", "2021-2022"))

for (year in c("2020-2021", "2021-2022")) {
  
  strains <- if (year == "2020-2021") {
    c("A/Guangdong", "Flu B/Phuket HA", "Flu B/Washington HA")
  } else {
    c("A/Cambodia", "Flu B/Phuket HA", "Flu B/Washington HA")
  }
  
  for (STRAIN in strains) {
      df_HAI_subset_single_strain<-df_HAI_subset %>% filter(Assay == STRAIN, Flu_Year == year)
   fit <- lm(DeltaDiff_Adjusted_HAI.Mean_Perc_Inhib ~   Day7_Inhibition + subject.biologicalSex + CMV , data =df_HAI_subset_single_strain)
    
    df_HAI_subset_single_strain$residual <- resid(fit)
    df_HAI_subset_single_strain$responder <- ifelse(df_HAI_subset_single_strain$residual <= quantile(df_HAI_subset_single_strain$residual, 0.25), "low responder", ifelse(df_HAI_subset_single_strain$residual>=
    quantile(df_HAI_subset_single_strain$residual, 0.75), "high responder", "middle responder"))
    df_res<-df_HAI_subset_single_strain %>% ungroup() %>% select(subjectGuid,residual,responder,cohort.cohortGuid)
    df_res$Assay <- STRAIN
    df_res$Flu_Year <- year
    
    intercept_list[[paste0(STRAIN, '-', year)]] <- df_res
  }
}


In [14]:
df_res <- do.call(rbind, intercept_list)

In [15]:
write.csv(df_res,"lm_res.csv")

In [16]:
table(df_res$cohort.cohortGuid, df_res$responder, df_res$Assay, df_res$Flu_Year)

, ,  = A/Cambodia,  = 2020-2021

     
      high responder low responder middle responder
  BR1              0             0                0
  BR2              0             0                0

, ,  = A/Guangdong,  = 2020-2021

     
      high responder low responder middle responder
  BR1             11            11               21
  BR2             11            11               23

, ,  = Flu B/Phuket HA,  = 2020-2021

     
      high responder low responder middle responder
  BR1             10            11               22
  BR2             12            11               22

, ,  = Flu B/Washington HA,  = 2020-2021

     
      high responder low responder middle responder
  BR1              8            14               21
  BR2             14             8               23

, ,  = A/Cambodia,  = 2021-2022

     
      high responder low responder middle responder
  BR1              6             6               12
  BR2             11            11               20

, ,  