In [16]:
library(hise)
library(dplyr)
library(ggplot2)
library(stats)
library(tidyverse)

In [17]:
df<-read.csv("/home//jupyter/BRI_Figures_Final_V2/Dataset/MSD/MSD Recent_20231026_092142.csv")

In [18]:
#subset on BR1 and BR2 cohort, drop NAs and make everythign else into numerics
BR1_BR2_subset <- subset(df, df$Cohort %in% c("BR1", "BR2"))
BR1_BR2_subset<-BR1_BR2_subset[!is.na(BR1_BR2_subset$Calc..Conc..Mean),]
BR1_BR2_subset$Calc..Conc..Mean <- gsub(",", "", BR1_BR2_subset$Calc..Conc..Mean)
BR1_BR2_subset$Calc..Conc..Mean <- as.numeric(BR1_BR2_subset$Calc..Conc..Mean)

In [19]:
#Load meta data (flu antigen specific meta data)
metadata <- read.csv("/home//jupyter/BRI_Figures_Final_V2/Figure5/01_Frequency_Comparison/selected_samples_with_acutal_flu_year.csv")


In [20]:
BR1_BR2_subset <- BR1_BR2_subset %>%
  semi_join(metadata, by = c("Subject" = "subject.subjectGuid"))

In [21]:
unique(BR1_BR2_subset$Assay)

In [22]:
#create subsets for year-flu specific serology
one_yearflu_only <- c("Flu Year 1 Day 0", "Flu Year 1 Day 7")
BR1_BR2_subset <- BR1_BR2_subset[BR1_BR2_subset$Visit %in% one_yearflu_only, ]

# specific to flu year, kept hongkong out because HAI only has washington and Phuket
flu_year_specific <- c("Flu B/Washington HA")
BR1_BR2_subset_flu_specific <- BR1_BR2_subset[BR1_BR2_subset$Assay %in% flu_year_specific, ]


In [23]:
#get rid of the duplicate rows (because we are only looking at the mean, not replicates)
BR1_BR2_subset_flu_specific <- BR1_BR2_subset_flu_specific %>%
  distinct(Subject, Visit, Assay, Cohort, .keep_all = TRUE)
#split based on different assays, so that we can normalize to each assay later
BR1_BR2_subset_flu_specific_split <- split(BR1_BR2_subset_flu_specific, 
                                           f = BR1_BR2_subset_flu_specific$Assay)

In [24]:
# normalize to each assay
normalized_BR1_BR2_subset_flu_specific_split <- list()
for (i in 1:length(BR1_BR2_subset_flu_specific_split)) {
  df <- as.data.frame(BR1_BR2_subset_flu_specific_split[[i]])
   df<- left_join(df,metadata,by=c("Sample.Kit.Barcode"="sample.sampleKitGuid"))
    normalized_df <- df %>%
      group_by(Subject) %>% arrange(Visit) %>%
      mutate(Normalized_Concentration = Calc..Conc..Mean / first(Calc..Conc..Mean[Flu_Year == "2020-2021"])) %>%
      ungroup()
    
  normalized_BR1_BR2_subset_flu_specific_split[[i]] <- normalized_df
}

In [25]:
df_normalized<-normalized_BR1_BR2_subset_flu_specific_split[[1]]

In [26]:
df_normalized<-df_normalized[!is.na(df_normalized$Normalized_Concentration),]

In [27]:
write.csv(df_normalized,"MSD_Normalized_Y2020-2021_Washington.csv")