In [8]:
library(sceasy)
suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(H5weaver))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(patchwork))
suppressPackageStartupMessages(library(pheatmap))
suppressPackageStartupMessages(library(lme4))
suppressPackageStartupMessages(library(MAST))
suppressPackageStartupMessages(library(stringr))
library(parallel)

In [9]:
meta_data=read.csv('/home//jupyter/BRI_Analysis/scRNA/hise_meta_data_2024-01-23_fixed.csv')

In [10]:
meta_data<-meta_data %>% filter(Covid_exlcusion=='no',sample.visitName %in% c('Flu Year 1 Day 0','Flu Year 1 Day 7',
                                                                              'Flu Year 2 Day 0','Flu Year 2 Day 7'))

In [11]:
options(mc.cores = 10)


# Y1

In [12]:
#for (i in unique(meta_data$subject.subjectGuid)[1]){
mclapply(unique(meta_data$subject.subjectGuid),function(i){
meta_data_single_donor<-meta_data %>% filter(subject.subjectGuid==i)

if (unique(c('Flu Year 1 Day 0','Flu Year 1 Day 7') %in% meta_data_single_donor$sample.visitName)){
sample_id_Y1D0<-pull(meta_data_single_donor[meta_data_single_donor$sample.visitName=='Flu Year 1 Day 0',]['pbmc_sample_id'])    
pbmc_Y1D0 <-sceasy::convertFormat(paste0('/home/jupyter/BRI_Analysis/scRNA/h5_cleaned_by_sample/',sample_id_Y1D0,'.h5ad'), from="anndata", to="seurat")

sample_id_Y1D7<-pull(meta_data_single_donor[meta_data_single_donor$sample.visitName=='Flu Year 1 Day 7',]['pbmc_sample_id'])    
pbmc_Y1D7 <-sceasy::convertFormat(paste0('/home/jupyter/BRI_Analysis/scRNA/h5_cleaned_by_sample/',sample_id_Y1D7,'.h5ad'), from="anndata", to="seurat")
rownames(pbmc_Y1D0@meta.data)<-pbmc_Y1D0[[]]$barcodes
rownames(pbmc_Y1D7@meta.data)<-pbmc_Y1D7[[]]$barcodes
pbmc_combined_Y1<-merge(pbmc_Y1D0, y = pbmc_Y1D7, add.cell.ids = c("D0", "D7"))
pbmc_combined_Y1 <- NormalizeData(pbmc_combined_Y1, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc_combined_Y1@meta.data$CDR <- scale(pbmc_combined_Y1@meta.data$nFeaturess_RNA, scale = TRUE, center = TRUE)
pbmc_combined_Y1@meta.data$CMV<-pbmc_combined_Y1@meta.data$CMV.IgG.Serology.Result.Interpretation
pbmc_combined_Y1@meta.data$Visit<-gsub(" ","_",pbmc_combined_Y1@meta.data$sample.visitName)
pbmc_combined_Y1@meta.data$Sex<-pbmc_combined_Y1@meta.data$subject.biologicalSex
pbmc_combined_Y1@meta.data$Donor<-pbmc_combined_Y1@meta.data$subject.subjectGuid
rownames(pbmc_combined_Y1@meta.data)<-pbmc_combined_Y1@meta.data$barcodes
pbmc_combined_Y1<-RenameCells(pbmc_combined_Y1, new.names = rownames(pbmc_combined_Y1[[]]))

    
for (celltype in unique(pbmc_combined_Y1[[]]$AIFI_L3)){

  tryCatch({
    if (file.exists(paste0("01B_MAST/",i,'_Y1_',celltype,".csv"))) {
      print("comparison have run before")
      next
    }

    if (length(pbmc_combined_Y1@meta.data %>% filter(AIFI_L3==celltype) %>% select(Visit) %>% unique(.) %>% pull) > 1) {
      selCells <- pbmc_combined_Y1@meta.data %>% filter(AIFI_L3==celltype) %>%
        as.data.frame() %>%
        rownames(.)
      esetNormsub2 <- pbmc_combined_Y1[, selCells]
      normCountssub <- esetNormsub2[["RNA"]]@data
      min_expr = 0.1
      selGenes <- data.frame(num_cells_expressed =
                               rowSums(normCountssub > min_expr)) %>%
        rownames_to_column(var = "Gene") %>%
        filter(num_cells_expressed >=
                 min_expr * length(selCells))

      # remove ribosomal, mitochondrial, LINCs, orfs
      rmGenes <- selGenes$Gene[grep("^RP|^MT-|^LINC|orf",
                                     selGenes$Gene)]
      selGenes <- selGenes %>% filter(!Gene %in% rmGenes) %>% .$Gene
      normCountssub <- normCountssub[selGenes, ]

      fdat <- data.frame(rownames(x = normCountssub))
      colnames(x = fdat)[1] <- "primerid"
      rownames(x = fdat) <- fdat[, 1]

      cdat <- esetNormsub2@meta.data %>%
        as.data.frame() %>%
        dplyr::select(pbmc_sample_id, Donor, cohort.cohortGuid, CMV, Sex, Visit, CDR)
      cdat <- cdat[colnames(normCountssub), , drop = FALSE]
      table(rownames(cdat) == colnames(normCountssub))
      cdat[] <- lapply(cdat, as.character)
      cdat$Visit <- factor(cdat$Visit)
      cdat$Visit <- relevel(cdat$Visit, ref = "Flu_Year_1_Day_0")

      cdat$CDR <- as.numeric(cdat$CDR)

      sca <- FromMatrix(exprsArray = as.matrix(normCountssub),
                        cData = cdat,
                        fData = fdat)
      zlmCond <- zlm(formula = ~Visit + CDR,
                     sca = sca,
                     method = "bayesglm",
                     ebayes = TRUE,
                     parallel = TRUE)

      summaryCond <- summary(zlmCond, doLRT = 'VisitFlu_Year_1_Day_7', parallel = TRUE)

      summaryDt <- summaryCond$datatable

      summaryDt$AIFI_L3 <- celltype
      counts <- as.data.frame(table(cdat$Visit)) %>% arrange(Var1)
      summaryDt$cell_counts_D0 <- counts[1,2]
      summaryDt$cell_counts_D7 <- counts[2,2]

      write.csv(summaryDt, paste0("01B_MAST/", i, '_Y1_', celltype, ".csv"))
    }
  }, error = function(e) {
    cat("Error processing cell type:", celltype, "- skipping to next.\n")
  })
}

}},mc.cores=60)

# Y2

In [15]:
#for (i in unique(meta_data$subject.subjectGuid)[1]){
mclapply(unique(meta_data$subject.subjectGuid),function(i){
meta_data_single_donor<-meta_data %>% filter(subject.subjectGuid==i)

if (unique(c('Flu Year 2 Day 0','Flu Year 2 Day 7') %in% meta_data_single_donor$sample.visitName)){
sample_id_Y1D0<-pull(meta_data_single_donor[meta_data_single_donor$sample.visitName=='Flu Year 2 Day 0',]['pbmc_sample_id'])    
pbmc_Y1D0 <-sceasy::convertFormat(paste0('/home/jupyter/BRI_Analysis/scRNA/h5_cleaned_by_sample/',sample_id_Y1D0,'.h5ad'), from="anndata", to="seurat")

sample_id_Y1D7<-pull(meta_data_single_donor[meta_data_single_donor$sample.visitName=='Flu Year 2 Day 7',]['pbmc_sample_id'])    
pbmc_Y1D7 <-sceasy::convertFormat(paste0('/home/jupyter/BRI_Analysis/scRNA/h5_cleaned_by_sample/',sample_id_Y1D7,'.h5ad'), from="anndata", to="seurat")
rownames(pbmc_Y1D0@meta.data)<-pbmc_Y1D0[[]]$barcodes
rownames(pbmc_Y1D7@meta.data)<-pbmc_Y1D7[[]]$barcodes
pbmc_combined_Y1<-merge(pbmc_Y1D0, y = pbmc_Y1D7, add.cell.ids = c("D0", "D7"))
pbmc_combined_Y1 <- NormalizeData(pbmc_combined_Y1, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc_combined_Y1@meta.data$CDR <- scale(pbmc_combined_Y1@meta.data$nFeaturess_RNA, scale = TRUE, center = TRUE)
pbmc_combined_Y1@meta.data$CMV<-pbmc_combined_Y1@meta.data$CMV.IgG.Serology.Result.Interpretation
pbmc_combined_Y1@meta.data$Visit<-gsub(" ","_",pbmc_combined_Y1@meta.data$sample.visitName)
pbmc_combined_Y1@meta.data$Sex<-pbmc_combined_Y1@meta.data$subject.biologicalSex
pbmc_combined_Y1@meta.data$Donor<-pbmc_combined_Y1@meta.data$subject.subjectGuid
rownames(pbmc_combined_Y1@meta.data)<-pbmc_combined_Y1@meta.data$barcodes
pbmc_combined_Y1<-RenameCells(pbmc_combined_Y1, new.names = rownames(pbmc_combined_Y1[[]]))

    
for (celltype in unique(pbmc_combined_Y1[[]]$AIFI_L3)){

  tryCatch({
    if (file.exists(paste0("01B_MAST/",i,'_Y2_',celltype,".csv"))) {
      print("comparison have run before")
      next
    }

    if (length(pbmc_combined_Y1@meta.data %>% filter(AIFI_L3==celltype) %>% select(Visit) %>% unique(.) %>% pull) > 1) {
      selCells <- pbmc_combined_Y1@meta.data %>% filter(AIFI_L3==celltype) %>%
        as.data.frame() %>%
        rownames(.)
      esetNormsub2 <- pbmc_combined_Y1[, selCells]
      normCountssub <- esetNormsub2[["RNA"]]@data
      min_expr = 0.1
      selGenes <- data.frame(num_cells_expressed =
                               rowSums(normCountssub > min_expr)) %>%
        rownames_to_column(var = "Gene") %>%
        filter(num_cells_expressed >=
                 min_expr * length(selCells))

      # remove ribosomal, mitochondrial, LINCs, orfs
      rmGenes <- selGenes$Gene[grep("^RP|^MT-|^LINC|orf",
                                     selGenes$Gene)]
      selGenes <- selGenes %>% filter(!Gene %in% rmGenes) %>% .$Gene
      normCountssub <- normCountssub[selGenes, ]

      fdat <- data.frame(rownames(x = normCountssub))
      colnames(x = fdat)[1] <- "primerid"
      rownames(x = fdat) <- fdat[, 1]

      cdat <- esetNormsub2@meta.data %>%
        as.data.frame() %>%
        dplyr::select(pbmc_sample_id, Donor, cohort.cohortGuid, CMV, Sex, Visit, CDR)
      cdat <- cdat[colnames(normCountssub), , drop = FALSE]
      table(rownames(cdat) == colnames(normCountssub))
      cdat[] <- lapply(cdat, as.character)
      cdat$Visit <- factor(cdat$Visit)
      cdat$Visit <- relevel(cdat$Visit, ref = "Flu_Year_2_Day_0")

      cdat$CDR <- as.numeric(cdat$CDR)

      sca <- FromMatrix(exprsArray = as.matrix(normCountssub),
                        cData = cdat,
                        fData = fdat)
      zlmCond <- zlm(formula = ~Visit + CDR,
                     sca = sca,
                     method = "bayesglm",
                     ebayes = TRUE,
                     parallel = TRUE)

      summaryCond <- summary(zlmCond, doLRT = 'VisitFlu_Year_2_Day_7', parallel = TRUE)

      summaryDt <- summaryCond$datatable

      summaryDt$AIFI_L3 <- celltype
      counts <- as.data.frame(table(cdat$Visit)) %>% arrange(Var1)
      summaryDt$cell_counts_D0 <- counts[1,2]
      summaryDt$cell_counts_D7 <- counts[2,2]

      write.csv(summaryDt, paste0("01B_MAST/", i, '_Y2_', celltype, ".csv"))
    }
  }, error = function(e) {
    cat("Error processing cell type:", celltype, "- skipping to next.\n")
  })
}

}},mc.cores=60)


In [14]:
i

ERROR: Error in eval(expr, envir, enclos): object 'i' not found
