In [1]:
library(Matrix)
library(sceasy)
library(anndata)
suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(plyr))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(patchwork))
suppressPackageStartupMessages(library(pheatmap))
suppressPackageStartupMessages(library(lme4))
suppressPackageStartupMessages(library(stringr))
library(hise)
library(parallel)

Loading required package: reticulate



In [2]:
calculate_stats <- function(df) {
  row_means <- rowMeans(df, na.rm = TRUE)
  row_vars <- apply(df, 1, var, na.rm = TRUE)
  row_sds <- sqrt(row_vars)
  row_cvs <- row_sds / row_means
  result <- data.frame(mean = row_means, variance = row_vars, sd = row_sds, cv = row_cvs)
  return(result)
}


# Make Mean expression

In [3]:
H5AD_FILES<-list.files(path = "h5ad_baselines_after_qc/", pattern = "h5ad", all.files = TRUE,
           full.names = TRUE, recursive = FALSE,
           ignore.case = TRUE, include.dirs = TRUE, no.. = FALSE)

In [51]:
mclapply(H5AD_FILES,function(x){
pbmc <- read_h5ad(x)
pbmc <- CreateSeuratObject(counts = t(pbmc$X), meta.data = pbmc$obs)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)

cell_types <- pull(pbmc[[]]['AIFI_L3'])    
expr_data <- GetAssayData(pbmc, assay = "RNA", slot = "data")     
avg_expression <- matrix(0, nrow = nrow(expr_data), ncol = length(unique(cell_types)))
colnames(avg_expression) <- unique(cell_types)
rownames(avg_expression) <- rownames(expr_data)

for (type in unique(cell_types)) {
  cells_in_type <- which(cell_types == type)
  avg_expression[, type] <- rowMeans(as.data.frame(expr_data[, c(rownames(pbmc[[]] %>% filter(AIFI_L3==type)))]))
}
# Convert to a data frame for easier viewing/manipulation
df <- as.data.frame(avg_expression)    
    
selected_celltypes<-names(table(pbmc[[]]$AIFI_L3)[table(pbmc[[]]$AIFI_L3)>10])
df=df[,selected_celltypes]    
write.csv(df,paste0('01A_Mean_Expression_by_Celltype_and_Sample/',pbmc[[]]$pbmc_sample_id[1],'_AIFI_L3.csv'))
},mc.cores=60)


# Calculate CV

In [52]:
Mean_Expression_FILES<-list.files(path = "01A_Mean_Expression_by_Celltype_and_Sample/", pattern = "csv", all.files = TRUE,
           full.names = TRUE, recursive = FALSE,
           ignore.case = TRUE, include.dirs = TRUE, no.. = FALSE)


In [53]:
meta_data<-read.csv('/home//jupyter/BRI_Analysis/scRNA/meta_data_2024-01-16_fixed.csv')
meta_data<-meta_data %>% filter(Covid_exlcusion=='no')

meta_data<-meta_data %>% filter(sample.visitName %in% c('Flu Year 1 Day 0','Flu Year 1 Day 90','Flu Year 2 Day 0','Flu Year 2 Day 90'))
donors<-names(table(meta_data$subject.subjectGuid)[table(meta_data$subject.subjectGuid)==4])
meta_data<-meta_data %>% filter(subject.subjectGuid %in% donors)

In [54]:
for (i in donors){

print(i)
meta_data_donor<-meta_data %>% filter(subject.subjectGuid == i)
    
df_list<-mclapply(seq(from = 1, to = 4, by = 1),function(x){
df<-read.csv(paste0('01A_Mean_Expression_by_Celltype_and_Sample/',pull(meta_data_donor['pbmc_sample_id'])[x],'_AIFI_L3.csv'),row.names = 1,check.names = FALSE)
df<-as.data.frame(df)
colnames(df)<-paste0(pull(meta_data_donor['sample.visitName'])[x],':',colnames(df))   
return(df)
},mc.cores=4)

df_all<-do.call(cbind,df_list)
colnames(df_all)<-gsub(" ",'_',colnames(df_all))
col_df<-as.data.frame(t(as.data.frame(str_split(colnames(df_all),":"))))
selected_celltypes<-names(table(col_df$V2)[table(col_df$V2)==4])
mclapply(selected_celltypes,function(celltypes){
df_all_single_celltype <- df_all[, grepl(celltypes, names(df_all), fixed = TRUE)]
df_all_single_celltype <- df_all_single_celltype[rowSums(df_all_single_celltype != 0) > 0, ]    
stats <- calculate_stats(df_all_single_celltype)
stats$celltypes<-celltypes
stats$donor<-i
write.csv(stats,paste0('01A_CV_result/',i,'_',celltypes,'.csv'))
},mc.cores=60)
}


[1] "BR1002"
[1] "BR1003"
[1] "BR1004"
[1] "BR1006"
[1] "BR1007"
[1] "BR1008"
[1] "BR1009"
[1] "BR1010"
[1] "BR1011"
[1] "BR1012"
[1] "BR1013"
[1] "BR1014"
[1] "BR1016"
[1] "BR1017"
[1] "BR1018"
[1] "BR1021"
[1] "BR1023"
[1] "BR1024"
[1] "BR1025"
[1] "BR1026"
[1] "BR1028"
[1] "BR1030"
[1] "BR1031"
[1] "BR1032"
[1] "BR1037"
[1] "BR1040"
[1] "BR1041"
[1] "BR1047"
[1] "BR1049"
[1] "BR1050"
[1] "BR1054"
[1] "BR1057"
[1] "BR2002"
[1] "BR2003"
[1] "BR2005"
[1] "BR2008"
[1] "BR2009"
[1] "BR2010"
[1] "BR2012"
[1] "BR2015"
[1] "BR2016"
[1] "BR2018"
[1] "BR2019"
[1] "BR2021"
[1] "BR2022"
[1] "BR2023"
[1] "BR2024"
[1] "BR2025"
[1] "BR2026"
[1] "BR2028"
[1] "BR2029"
[1] "BR2031"
[1] "BR2032"
[1] "BR2033"


“unable to terminate child: No such process”


[1] "BR2034"
[1] "BR2035"
[1] "BR2036"
[1] "BR2037"
[1] "BR2038"
[1] "BR2039"
[1] "BR2040"
[1] "BR2041"
[1] "BR2042"
[1] "BR2043"
[1] "BR2044"
[1] "BR2045"
[1] "BR2047"
[1] "BR2048"
[1] "BR2051"
[1] "BR2052"
[1] "BR2053"
