In [25]:
suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(patchwork))
suppressPackageStartupMessages(library(pheatmap))
suppressPackageStartupMessages(library(stringr))
library(hise)
library(plyr)
library(purrr)

In [26]:
fileDescToDataframe_beta <- function(descriptors,keep_labs = FALSE) {
  descriptors<-descriptors[[1]]
  assertthat::assert_that(typeof(descriptors) == "list")
  assertthat::assert_that(typeof(keep_labs) == "logical")
  
  do.call(
    rbind.fill,
    lapply(
      descriptors,
      function(desc) {
        desc <- unlist(desc)
        desc <- desc[!grepl("scheme", names(desc))]
        names(desc) <- sub("^descriptors.","",names(desc))
        desc <- desc[!grepl("^specimens", names(desc))]
        desc <- desc[!grepl("^lab", names(desc))]
        desc <- desc[!grepl("^emr", names(desc))]
        desc <- desc[!grepl("^survey", names(desc))]
        desc <- desc[!grepl("^revision", names(desc))]
        desc <- desc[!grepl("^file.userTags", names(desc))]
        desc <- as.list(desc)
        df <- as.data.frame(desc)
        df
      }
    )
  )
}

fileDescToDataframe <- function(descriptors,
                                keep_labs = FALSE) {
  
  assertthat::assert_that(typeof(descriptors) == "list")
  assertthat::assert_that(typeof(keep_labs) == "logical")
  
  do.call(
    rbind,
    lapply(
      descriptors,
      function(desc) {
        desc <- unlist(desc)
        desc <- desc[!grepl("scheme", names(desc))]
        names(desc) <- sub("^descriptors.","",names(desc))
        desc <- desc[!grepl("^specimens", names(desc))]
        desc <- desc[!grepl("^lab", names(desc))]
        desc <- desc[!grepl("^emr", names(desc))]
        desc <- desc[!grepl("^survey", names(desc))]
        desc <- desc[!grepl("^revision", names(desc))]
        desc <- desc[!grepl("^file.userTags", names(desc))]
        desc <- as.list(desc)
        df <- as.data.frame(desc)
        df
      }
    )
  )
}

In [27]:
BR1_rna_desc <- getFileDescriptors(
    fileType = "scRNA-seq-labeled", 
    filter = list(cohort.cohortGuid = "BR1"))
BR2_rna_desc <- getFileDescriptors(
    fileType = "scRNA-seq-labeled", 
    filter = list(cohort.cohortGuid = "BR2"))

In [28]:
BR1_rna_desc <- fileDescToDataframe_beta(BR1_rna_desc)
BR2_rna_desc <- fileDescToDataframe_beta(BR2_rna_desc)

In [29]:
meta_data<-rbind.fill(BR1_rna_desc , BR2_rna_desc )

In [30]:
meta_data<-meta_data%>% filter(!file.batchID=="B004")%>% filter(!subject.subjectGuid%in% c("BR2007","BR2049",'BR1034'))

In [31]:
meta_data$pbmc_sample_id<-gsub("_","",paste0("PB0",substr(sub(".*PB0", "", meta_data$file.name),1,8)))
meta_data<-meta_data %>% filter(!grepl("EXP",file.batchID))%>% arrange(pbmc_sample_id)
meta_data<-meta_data[!duplicated(meta_data[, c("sample.sampleKitGuid")], fromLast=T),]%>% arrange(sample.sampleKitGuid)
meta_data<-meta_data%>%mutate(sample.visitName=ifelse(sample.visitName=="Other - Non-Flu",sample.visitDetails,sample.visitName) )

In [32]:
meta_data_non_Y1D0<-meta_data %>% filter(sample.visitName!='Flu Year 1 Day 0')
table(meta_data_non_Y1D0$cohort.cohortGuid)


BR1 BR2 
371 405 

In [33]:
write.csv(meta_data_non_Y1D0,paste0("hise_meta_data_",Sys.Date(),"_nonY1D0.csv"))

In [83]:
meta_data_noY1D0<-read.csv('/home/jupyter/BRI_Analysis/scRNA/hise_meta_data_2024-01-22_nonY1D0.csv')
meta_data_Y1DO<-read.csv('/home/jupyter/BRI_Analysis/scRNA/hise_meta_data_2023-11-19.csv')
meta_data<-rbind.fill(meta_data_Y1DO,meta_data_noY1D0)
slims<-read.csv('Slims_20240116.csv') %>%
  filter(str_starts(Subject, "BR"))
slims$First.Visit.Date<-as.Date(unlist(map(strsplit(slims$First.Visit.Date, ' '),1)),"%m/%d/%Y")
slims$Sample_Draw_date<-slims$First.Visit.Date+slims$Days.Since.First.Visit
slims<-slims[c("AIFI.Barcode","Sample_Draw_date")]
colnames(slims)[1]<-"sample.sampleKitGuid"

In [84]:
meta_data<-left_join(meta_data,slims)

[1m[22mJoining with `by = join_by(sample.sampleKitGuid)`


In [85]:
covid<-read.csv('Covid.csv')
covid$Covid_Dose_1<-as.Date(covid$Dose.1.Date,"%m/%d/%y")
covid$Covid_Dose_2<-as.Date(covid$Dase.2.Date,"%m/%d/%y")
covid<-covid %>% select (AllenID,Covid_Dose_1,Covid_Dose_2)
colnames(covid)[1]<-'subject.subjectGuid'


In [86]:
meta_data<-left_join(meta_data,covid)

[1m[22mJoining with `by = join_by(subject.subjectGuid)`


In [87]:
meta_data<-meta_data %>% mutate(Covid_exlcusion=case_when(
(Covid_Dose_1<Sample_Draw_date & (Covid_Dose_2+45)>Sample_Draw_date)~"yes",
(Covid_Dose_1<Sample_Draw_date & (Covid_Dose_1+45)>Sample_Draw_date)~"yes",TRUE~"no"
))


In [88]:
CMV<-read.csv('CMV_20220718.csv')[2:3]
BMI<-read.csv('BR-BR-Clinical-BMI.csv')[c(1,7)]

In [89]:
meta_data<-left_join(meta_data,CMV)
meta_data<-left_join(meta_data,BMI)

[1m[22mJoining with `by = join_by(subject.subjectGuid)`
[1m[22mJoining with `by = join_by(subject.subjectGuid)`


In [90]:
meta_data<-meta_data %>% filter(cohort.cohortGuid%in%c("BR1",'BR2'))

In [91]:
table(meta_data$sample.visitName)


       Flu Year 1 Day 0        Flu Year 1 Day 7       Flu Year 1 Day 90 
                     92                      92                      89 
 Flu Year 1 Stand-Alone        Flu Year 2 Day 0        Flu Year 2 Day 7 
                     14                      84                      84 
      Flu Year 2 Day 90  Flu Year 2 Stand-Alone  Flu Year 3 Stand-Alone 
                     82                      22                      47 
 Immune Variation Day 0  Immune Variation Day 7 Immune Variation Day 90 
                     89                      89                      84 

In [92]:
folder_names <- list.dirs(path = 'cache', full.names = FALSE, recursive = TRUE)
meta_data_scRNA_list<-list()
for (i in 1:dim(meta_data)[1]){
if (meta_data$file.id[i] %in% folder_names){
next
} 
meta_data_scRNA_single <- cacheFiles(list(meta_data$file.id[i]))
print (i)
}

In [93]:
meta_data$file.path<-NULL
meta_data$file.name.downloaded<-NULL


In [94]:
files<-data.frame(list.files(path = "cache/", pattern = 'h5', all.files = TRUE,
           full.names = FALSE, recursive = TRUE,
           ignore.case = FALSE, include.dirs = FALSE, no.. = FALSE))

In [95]:
colnames(files)<-'file.path'
separated <- strsplit(files$file.path, split = '/')

In [96]:
separated_matrix <- do.call(rbind, lapply(files$file.path, function(x) unlist(strsplit(x, split = '/'))))
df <- as.data.frame(separated_matrix, stringsAsFactors = FALSE)
colnames(df) <- c("file.id", "file.name.downloaded")

In [97]:
meta_data<-left_join(meta_data,cbind(df,files),by=('file.id'))

In [99]:
meta_data$file.path<-paste0('/home/jupyter/BRI_Analysis/scRNA/cache/',meta_data$file.path)

In [100]:
write.csv(meta_data,paste0("hise_meta_data_",Sys.Date(),"_fixed.csv"))