In [1]:
library(Matrix)
library(sceasy)
library(anndata)
suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(H5weaver))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(patchwork))
suppressPackageStartupMessages(library(pheatmap))
suppressPackageStartupMessages(library(lme4))
suppressPackageStartupMessages(library(stringr))
library(hise)
library(parallel)

Loading required package: reticulate



In [91]:
calculate_stats <- function(df) {
  row_means <- rowMeans(df, na.rm = TRUE)
  row_vars <- apply(df, 1, var, na.rm = TRUE)
  row_sds <- sqrt(row_vars)
  row_cvs <- row_sds / row_means
  result <- data.frame(mean = row_means, variance = row_vars, sd = row_sds, cv = row_cvs)
  return(result)
}


# Make Mean expression

In [17]:
H5AD_FILES<-list.files(path = "h5ad_baselines_after_qc/", pattern = "h5ad", all.files = TRUE,
           full.names = TRUE, recursive = FALSE,
           ignore.case = TRUE, include.dirs = TRUE, no.. = FALSE)

In [21]:
mclapply(H5AD_FILES,function(x){
pbmc <- read_h5ad(x)
pbmc <- CreateSeuratObject(counts = t(pbmc$X), meta.data = pbmc$obs)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
df<-AverageExpression(pbmc,assays = 'RNA',group.by = "AIFI_L3",add.ident = NULL,slot = "data",verbose = TRUE,)$RNA
selected_celltypes<-names(table(pbmc[[]]$AIFI_L3)[table(pbmc[[]]$AIFI_L3)>10])
df=df[,selected_celltypes]    
write.csv(df,paste0('01A_Mean_Expression_by_Celltype_and_Sample/',pbmc[[]]$pbmc_sample_id[1],'_AIFI_L3.csv'))
},mc.cores=60)

# Calculate CV

In [22]:
Mean_Expression_FILES<-list.files(path = "01A_Mean_Expression_by_Celltype_and_Sample/", pattern = "csv", all.files = TRUE,
           full.names = TRUE, recursive = FALSE,
           ignore.case = TRUE, include.dirs = TRUE, no.. = FALSE)


In [24]:
meta_data<-read.csv('/home//jupyter/BRI_Analysis/scRNA/hise_meta_data_2024-01-09.csv')

In [27]:
meta_data<-meta_data %>% filter(sample.visitName %in% c('Flu Year 1 Day 0','Flu Year 1 Day 90','Flu Year 2 Day 0','Flu Year 2 Day 90'))

In [32]:
donors<-names(table(meta_data$subject.subjectGuid)[table(meta_data$subject.subjectGuid)==4])

In [36]:
meta_data<-meta_data %>% filter(subject.subjectGuid %in% donors)

In [None]:
for (i in donors){

print(i)
meta_data_donor<-meta_data %>% filter(subject.subjectGuid == i)
    
df_list<-mclapply(seq(from = 1, to = 4, by = 1),function(x){
df<-read.csv(paste0('01A_Mean_Expression_by_Celltype_and_Sample/',pull(meta_data_donor['pbmc_sample_id'])[x],'_AIFI_L3.csv'),row.names = 1,check.names = FALSE)
df<-as.data.frame(df)
colnames(df)<-paste0(pull(meta_data_donor['sample.visitName'])[x],':',colnames(df))   
return(df)
},mc.cores=4)

df_all<-do.call(cbind,df_list)
colnames(df_all)<-gsub(" ",'_',colnames(df_all))
col_df<-as.data.frame(t(as.data.frame(str_split(colnames(df_all),":"))))
selected_celltypes<-names(table(col_df$V2)[table(col_df$V2)==4])
mclapply(selected_celltypes,function(celltypes){
df_all_single_celltype <- df_all[, grepl(celltypes, names(df_all), fixed = TRUE)]
df_all_single_celltype <- df_all_single_celltype[rowSums(df_all_single_celltype != 0) > 0, ]    
stats <- calculate_stats(df_all_single_celltype)
stats$celltypes<-celltypes
stats$donor<-i
write.csv(stats,paste0('01A_CV_result/',i,'_',celltypes,'.csv'))
},mc.cores=60)

}

[1] "BR1002"
[1] "BR1003"
[1] "BR1004"
[1] "BR1005"
[1] "BR1006"
[1] "BR1007"
[1] "BR1008"
[1] "BR1009"
[1] "BR1010"
[1] "BR1011"
[1] "BR1012"
[1] "BR1013"
[1] "BR1014"
[1] "BR1016"
[1] "BR1017"
[1] "BR1018"
[1] "BR1021"
[1] "BR1023"
[1] "BR1024"
[1] "BR1025"
[1] "BR1026"
[1] "BR1028"
[1] "BR1030"
[1] "BR1031"
[1] "BR1032"
[1] "BR1037"
[1] "BR1040"
[1] "BR1041"
[1] "BR1043"
[1] "BR1047"
[1] "BR1049"
[1] "BR1050"
[1] "BR1051"
[1] "BR1054"
[1] "BR1056"
[1] "BR1057"
[1] "BR1058"
[1] "BR1059"
[1] "BR2002"
[1] "BR2003"
[1] "BR2005"
[1] "BR2008"
[1] "BR2009"
[1] "BR2010"
[1] "BR2012"
[1] "BR2013"
[1] "BR2014"
[1] "BR2015"
[1] "BR2016"
[1] "BR2018"
[1] "BR2019"
[1] "BR2021"
[1] "BR2022"
[1] "BR2023"
[1] "BR2024"
[1] "BR2025"
[1] "BR2026"
[1] "BR2028"
[1] "BR2029"
[1] "BR2030"
[1] "BR2031"
[1] "BR2032"
[1] "BR2033"
[1] "BR2034"
[1] "BR2035"
[1] "BR2036"
[1] "BR2037"
[1] "BR2038"
[1] "BR2039"
[1] "BR2040"


“scheduled core 1 encountered error in user code, all values of the job will be affected”
