In [1]:
library(hise)
library(dplyr)
library(ggplot2)
library(stats)
library(tidyverse)
library(readxl)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mpurrr    [39m 1.0.2     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mreadr    [39m 2.1.5     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors


In [2]:
# load metadata
metadata <- read.csv("/home//jupyter/BRI_Figures_Final_V2/Figure5/01_Frequency_Comparison/selected_samples_with_acutal_flu_year.csv")
metadata$pbmc_sample_id <- sub("-.*", "", metadata$pbmc_sample_id)

In [3]:
 # function to load plate layout csv for the three plates and convert to long format to later merge with raw data
reshape_to_long_format <- function(file_path) {
  # load csv
  data <- read.csv(file_path, header = FALSE)
  
  # add row and column names A01 to H12
  colnames(data) <- sprintf("%02d", 1:12)
  rownames(data) <- LETTERS[1:8]
  
  # long format
  long_format <- data %>%
    rownames_to_column(var = "Row") %>%
    gather(key = "Column", value = "pbmc_sample_id", -Row) %>%
    mutate(Well = paste0(Row, Column)) %>%
    select(pbmc_sample_id, Well)
  
  return(long_format)
}

# Define file paths and corresponding data frame names
file_paths <- c('/home//jupyter/BRI_Figures_Final_V2/Dataset/HAI/EXP-01111 MSD Plate1 Plate_2BMACAI036 worklist.csv',
                '/home//jupyter/BRI_Figures_Final_V2/Dataset/HAI/EXP-01111 MSD Plate2 Plate_2BMACAI037 worklist.csv',
                '/home//jupyter/BRI_Figures_Final_V2/Dataset/HAI/EXP-01111 MSD Plate3 Plate_2BMACA7038 worklist.csv')
data_frame_names <- c('Plate_2BMACAI036', 'Plate_2BMACAI037', 'Plate_2BMACA7038')

# Initialize an empty list to store the data frames
data_frames <- list()

# Apply the function to each file and assign to named data frames
for (i in 1:length(file_paths)) {
  data_frames[[data_frame_names[i]]] <- reshape_to_long_format(file_paths[i])
}

# Assign data frames to the global environment with custom names
for (i in 1:length(data_frame_names)) {
  assign(data_frame_names[i], data_frames[[data_frame_names[i]]])
}

In [4]:
# load HAI data
MSD_HAI <- read.csv("/home//jupyter/BRI_Figures_Final_V2/Dataset/HAI/EXP-01111 MSD HAI Data.csv", skip = 1)

#add a percent inhibition column. use H01, G01, H02 and G02 as the blank columns. average them for calibration zero
MSD_HAI_good <- MSD_HAI %>%
  group_by(Plate.Name, Assay) %>%
  mutate(calib_mean = mean(Mean[Well %in% c("H01", "G01", "H02", "G02")], na.rm = TRUE)) %>%
  mutate(perc_inhib = (1 - (Mean / calib_mean)) * 100)

In [6]:
# Get plate names
plate_names <- unique(MSD_HAI_good$Plate.Name)

# Initialize
plate_dfs <- list()

# Split the data frame by the Plate.Name column and store in the list
for (plate in plate_names) {
  # Create a subset for each plate
  subset_df <- MSD_HAI_good %>% filter(Plate.Name == plate)
  
  # Create a valid list name by replacing spaces and such
  valid_plate_name <- make.names(plate)
  
  # Store the subset data frame in the list
  plate_dfs[[valid_plate_name]] <- subset_df
}

In [7]:
# Rename each plate accordingly and make into dataframe
Plate_2BMACA7038_data <- as.data.frame(plate_dfs[[make.names(plate_names[3])]])
Plate_2BMACA7037_data <- as.data.frame(plate_dfs[[make.names(plate_names[1])]])
Plate_2BMACA7036_data <- as.data.frame(plate_dfs[[make.names(plate_names[2])]])

In [8]:
# Merge with PBMC-ID data from plate layout
plate_7036 <- left_join(Plate_2BMACA7036_data, Plate_2BMACAI036, by = "Well")
plate_7037 <- left_join(Plate_2BMACA7037_data, Plate_2BMACAI037, by = "Well")
plate_7038 <- left_join(Plate_2BMACA7038_data, Plate_2BMACA7038, by = "Well")

In [9]:
## load pilot data (7 samples each cohort)
MSD_pilot_HAI <- read.csv("/home//jupyter/BRI_Figures_Final_V2/Dataset/HAI/EXP-01072 MSD Raw Data_pilot2.csv", skip = 1)

#add a percent inhibition column
MSD_pilot_HAI_good <- MSD_pilot_HAI %>%
                        group_by(Plate.Name, Assay) %>%
                        mutate(calib_mean = mean(Mean[Well %in% c("H01", "H02")], na.rm = TRUE)) %>%
                        mutate(perc_inhib = (1- (Mean / calib_mean))*100)

# load plate layout
sampleinfo <- "/home//jupyter/BRI_Figures_Final_V2/Dataset/HAI//HAI_pilot_Sample_Info_031924.xlsx"
data_sheet2 <- read_excel(sampleinfo, sheet = 2)
data_sheet2_top <- head(data_sheet2, n = 8)
data_sheet2_top <- as.data.frame(data_sheet2_top)
rownames(data_sheet2_top) <- data_sheet2_top[[1]]
data_sheet2_top <- data_sheet2_top[-1]

colnames(data_sheet2_top) <- sprintf("%02d", 1:12)
rownames(data_sheet2_top) <- LETTERS[1:8]

long_format <- data_sheet2_top %>%
    rownames_to_column(var = "Row") %>%
    gather(key = "Column", value = "pbmc_sample_id", -Row) %>%
    mutate(Well = paste0(Row, Column)) %>%
    select(pbmc_sample_id, Well)

MSD_pilot_HAI_PBMCs <- left_join(MSD_pilot_HAI_good, long_format, by = "Well")

MSD_pilot_HAI_PBMCs <- MSD_pilot_HAI_PBMCs %>%
  filter(Plate.Name %in% c("Plate_2BMACAF015", "Plate_2BMACAP012", "Plate_2BMACAZ010"))

[1m[22mNew names:
[36m•[39m `` -> `...1`


In [10]:
# combine all three plates
plates_combined <- rbind(plate_7036, plate_7037, plate_7038, MSD_pilot_HAI_PBMCs)

# change the way pbmc_sample_id are labelled for merging with metadata later
plates_combined$pbmc_sample_id <- sub("-.*", "", plates_combined$pbmc_sample_id)
plates_combined$pbmc_sample_id <- gsub("^PL", "PB", plates_combined$pbmc_sample_id)

In [11]:
# process each plate. keep the mean perc_inhib, and single value
plates_combined <- plates_combined %>%
  group_by(Assay, pbmc_sample_id) %>%
  mutate(Mean_Perc_Inhib = mean(perc_inhib, na.rm = TRUE)) %>%
  ungroup()

plates_combined <- plates_combined %>%
  distinct(Assay, pbmc_sample_id, .keep_all = TRUE)

In [12]:
plates_combined_merge <- merge(plates_combined, metadata, by = "pbmc_sample_id")


In [12]:
write.csv(plates_combined_merge,"HAI_Y2020-2021_Phuket.csv")