In [1]:
library(hise)
library(dplyr)
library(ggplot2)
library(stats)
library(tidyverse)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mpurrr    [39m 1.0.2     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mreadr    [39m 2.1.4     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors


In [2]:
df<-read.csv("/home//jupyter/BRI_Figures_Final_V2/Dataset/MSD/MSD Recent_20231026_092142.csv")

In [3]:
#subset on BR1 and BR2 cohort, drop NAs and make everythign else into numerics
BR1_BR2_subset <- subset(df, df$Cohort %in% c("BR1", "BR2"))
BR1_BR2_subset <- BR1_BR2_subset %>%
  filter(!is.na(`Calc..Conc..Mean`)) %>%
  mutate(`Calc..Conc..Mean` = as.numeric(`Calc..Conc..Mean`))

[1m[22m[36mℹ[39m In argument: `Calc..Conc..Mean = as.numeric(Calc..Conc..Mean)`.
[33m![39m NAs introduced by coercion”


In [5]:
#Load meta data (flu antigen specific meta data)
metadata <- read.csv("/home//jupyter/BRI_Figures_Final_V2/Extended-Figure4//01_Frequency_Comparison/selected_samples_with_acutal_flu_year.csv")


In [6]:
BR1_BR2_subset <- BR1_BR2_subset %>%
  semi_join(metadata, by = c("Subject" = "subject.subjectGuid"))

In [8]:
unique(BR1_BR2_subset$Assay)

In [9]:
#create subsets for year-flu specific serology
one_yearflu_only <- c("Flu Year 1 Day 0", "Flu Year 1 Day 7")
BR1_BR2_subset <- BR1_BR2_subset[BR1_BR2_subset$Visit %in% one_yearflu_only, ]

# specific to flu year, kept hongkong out because HAI only has washington and Phuket
flu_year_specific <- c("Flu B/Washington HA")
BR1_BR2_subset_flu_specific <- BR1_BR2_subset[BR1_BR2_subset$Assay %in% flu_year_specific, ]


In [10]:
#get rid of the duplicate rows (because we are only looking at the mean, not replicates)
BR1_BR2_subset_flu_specific <- BR1_BR2_subset_flu_specific %>%
  distinct(Subject, Visit, Assay, Cohort, .keep_all = TRUE)
#split based on different assays, so that we can normalize to each assay later
BR1_BR2_subset_flu_specific_split <- split(BR1_BR2_subset_flu_specific, f = BR1_BR2_subset_flu_specific$Assay)

In [11]:
# normalize to each assay
normalized_BR1_BR2_subset_flu_specific_split <- list()
for (i in 1:length(BR1_BR2_subset_flu_specific_split)) {
  df <- as.data.frame(BR1_BR2_subset_flu_specific_split[[i]])
   df<- left_join(df,metadata,by=c("Sample.Kit.Barcode"="sample.sampleKitGuid"))
    normalized_df <- df %>%
      group_by(Subject) %>% arrange(Visit) %>%
      mutate(Normalized_Concentration = Calc..Conc..Mean / first(Calc..Conc..Mean[Flu_Year == "2020-2021"])) %>%
      ungroup()
    
  normalized_BR1_BR2_subset_flu_specific_split[[i]] <- normalized_df
}

In [12]:
df_normalized<-normalized_BR1_BR2_subset_flu_specific_split[[1]]

In [13]:
df_normalized<-df_normalized[!is.na(df_normalized$Normalized_Concentration),]

In [14]:
write.csv(df_normalized,"MSD_Normalized_Y2020-2021_Washington.csv")