In [14]:
library(hise)
library(dplyr)
library(ggplot2)
library(stats)
library(tidyverse)

In [15]:
df<-read.csv("/home//jupyter/BRI_Figures_Final_V2/Dataset/MSD/MSD Recent_20231026_092142.csv")

In [16]:
#subset on BR1 and BR2 cohort, drop NAs and make everythign else into numerics
BR1_BR2_subset <- subset(df, df$Cohort %in% c("BR1", "BR2"))
BR1_BR2_subset<-BR1_BR2_subset[!is.na(BR1_BR2_subset$Calc..Conc..Mean),]
BR1_BR2_subset$Calc..Conc..Mean <- gsub(",", "", BR1_BR2_subset$Calc..Conc..Mean)
BR1_BR2_subset$Calc..Conc..Mean <- as.numeric(BR1_BR2_subset$Calc..Conc..Mean)

In [17]:
#Load meta data (flu antigen specific meta data)
metadata <- read.csv("/home//jupyter/BRI_Figures_Final_V2/Figure5/01_Frequency_Comparison/selected_samples_with_acutal_flu_year.csv")


In [18]:
BR1_BR2_subset <- BR1_BR2_subset %>%
  semi_join(metadata, by = c("Subject" = "subject.subjectGuid"))

In [19]:
#create subsets for year-flu specific serology
one_yearflu_only <- c("Flu Year 1 Day 0", "Flu Year 1 Day 7")
BR1_BR2_subset <- BR1_BR2_subset[BR1_BR2_subset$Visit %in% one_yearflu_only, ]

# specific to flu year, kept hongkong out because HAI only has washington and Phuket
flu_year_specific <- c("Flu B/Phuket HA")
BR1_BR2_subset_flu_specific <- BR1_BR2_subset[BR1_BR2_subset$Assay %in% flu_year_specific, ]


In [20]:
#get rid of the duplicate rows (because we are only looking at the mean, not replicates)
BR1_BR2_subset_flu_specific <- BR1_BR2_subset_flu_specific %>%
  distinct(Subject, Visit, Assay, Cohort, .keep_all = TRUE)
#split based on different assays, so that we can normalize to each assay later
BR1_BR2_subset_flu_specific_split <- split(BR1_BR2_subset_flu_specific, 
                                           f = BR1_BR2_subset_flu_specific$Assay)

In [21]:
# normalize to each assay
normalized_BR1_BR2_subset_flu_specific_split <- list()
for (i in 1:length(BR1_BR2_subset_flu_specific_split)) {
  df <- as.data.frame(BR1_BR2_subset_flu_specific_split[[i]])
   df<- left_join(df,metadata,by=c("Sample.Kit.Barcode"="sample.sampleKitGuid"))
    normalized_df <- df %>%
      group_by(Subject) %>% arrange(Visit) %>%
      mutate(Normalized_Concentration = Calc..Conc..Mean / first(Calc..Conc..Mean[Flu_Year == "2020-2021"])) %>%
      ungroup()
    
  normalized_BR1_BR2_subset_flu_specific_split[[i]] <- normalized_df
}

In [22]:
df_normalized<-normalized_BR1_BR2_subset_flu_specific_split[[1]]

In [23]:
df_normalized<-df_normalized[!is.na(df_normalized$Normalized_Concentration),]

In [24]:
write.csv(df_normalized,"MSD_Normalized_Y2020-2021_Phuket.csv")

In [25]:
df_normalized

Sample,Type,Subject,Sample.Kit.Barcode,Cohort,Visit,Batch.ID,well,Assay,Dilution,⋯,sample.diseaseStatesRecordedAtVisit,pbmc_sample_id,subject.covidVaxDose1.daysSinceFirstVisit,subject.covidVaxDose2.daysSinceFirstVisit,Covid_exclusion,subjectGuid,CMV,Flu_Year,Flu_Day,Normalized_Concentration
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,⋯,<lgl>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>
PL00334-03,Plasma,BR2009,KT00334,BR2,Flu Year 1 Day 0,MSD-00008,,Flu B/Phuket HA,5000,⋯,,PB00334-01,330,351,no,BR2009,Negative,2020-2021,Day 0,1
PL00338-02,Plasma,BR1021,KT00338,BR1,Flu Year 1 Day 0,MSD-00010,,Flu B/Phuket HA,5000,⋯,,PB00338-01,345,366,no,BR1021,Negative,2020-2021,Day 0,1
PL00339-02,Plasma,BR2038,KT00339,BR2,Flu Year 1 Day 0,MSD-00009,,Flu B/Phuket HA,5000,⋯,,PB00339-01,118,139,no,BR2038,Negative,2020-2021,Day 0,1
PL00341-02,Plasma,BR1037,KT00341,BR1,Flu Year 1 Day 0,MSD-00017,,Flu B/Phuket HA,5000,⋯,,PB00341-01,288,309,no,BR1037,Positive,2020-2021,Day 0,1
PL00342-02,Plasma,BR2025,KT00342,BR2,Flu Year 1 Day 0,MSD-00008,,Flu B/Phuket HA,5000,⋯,,PB00342-01,223,244,no,BR2025,Negative,2020-2021,Day 0,1
PL00345-05,Plasma,BR1031,KT00345,BR1,Flu Year 1 Day 0,MSD-00010,,Flu B/Phuket HA,5000,⋯,,PB00345-01,310,366,no,BR1031,Negative,2020-2021,Day 0,1
PL00347-02,Plasma,BR1026,KT00347,BR1,Flu Year 1 Day 0,MSD-00039,G09,Flu B/Phuket HA,10000,⋯,,PB00347-01,349,370,no,BR1026,Negative,2020-2021,Day 0,1
PL00349-02,Plasma,BR1025,KT00349,BR1,Flu Year 1 Day 0,MSD-00011,,Flu B/Phuket HA,5000,⋯,,PB00349-01,342,363,no,BR1025,Positive,2020-2021,Day 0,1
PL00350-02,Plasma,BR1024,KT00350,BR1,Flu Year 1 Day 0,MSD-00017,,Flu B/Phuket HA,5000,⋯,,PB00350-01,349,370,no,BR1024,Negative,2020-2021,Day 0,1
PL00352-02,Plasma,BR2010,KT00352,BR2,Flu Year 1 Day 0,MSD-00008,,Flu B/Phuket HA,5000,⋯,,PB00352-04,321,342,no,BR2010,Positive,2020-2021,Day 0,1
