In [32]:
library(Matrix)
library(sceasy)
library(anndata)
suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(plyr))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(patchwork))
suppressPackageStartupMessages(library(pheatmap))
suppressPackageStartupMessages(library(lme4))
suppressPackageStartupMessages(library(stringr))
library(hise)
library(parallel)

In [33]:
meta_data<-read.csv('/home//jupyter/BRI_Analysis/scRNA/Analysis_SF4/SF4_meta_data-2024-05-05.csv')

In [34]:
meta_data$sampleID<-sub("^.*?(PB[^_]*).*", "\\1", meta_data$file.name)

In [35]:
H5AD_FILES<-paste0('h5_by_sample/',meta_data$sampleID,'.h5ad')

# Mean of log normalized expression as well as cell counts

In [36]:
cell_counts_list<-mclapply(H5AD_FILES,function(x){
pbmc <- read_h5ad(x)
pbmc <- CreateSeuratObject(counts = t(pbmc$X), meta.data = pbmc$obs)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)

cell_types <- pull(pbmc[[]]['celltypist_l3'])    
expr_data <- GetAssayData(pbmc, assay = "RNA", slot = "data")     
avg_expression <- matrix(0, nrow = nrow(expr_data), ncol = length(unique(cell_types)))
colnames(avg_expression) <- unique(cell_types)
rownames(avg_expression) <- rownames(expr_data)
  
    
for (type in unique(cell_types)) {
  cells_in_type <- which(cell_types == type)
  avg_expression[, type] <- rowMeans(as.data.frame(expr_data[, c(rownames(pbmc[[]] %>% filter(celltypist_l3==type)))]))
}
df <- as.data.frame(avg_expression)    
cell_counts<-as.data.frame(table(pbmc[[]]['celltypist_l3']))
cell_counts$pbmc_sample_id<-as.character(pbmc[[]]['sampleID'][,1][1])
write.csv(df,paste0('Average_Expression_by_Celltype_and_Sample/',pbmc[[]]$sampleID[1],'_AIFI_L3.csv'))
return(cell_counts)
},mc.cores=60)

In [39]:
count_df<-do.call(rbind,cell_counts_list)
count_df_wide <- pivot_wider(count_df, names_from = celltypist_l3, values_from = Freq)
count_df_wide[is.na(count_df_wide)] <- 0
write.csv(count_df_wide,'cell_counts_SF4.csv')

# Aggregate Counts

In [42]:
mclapply(H5AD_FILES,function(x){
pbmc <- read_h5ad(x)
pbmc <- CreateSeuratObject(counts = t(pbmc$X), meta.data = pbmc$obs)

cell_types <- pull(pbmc[[]]['celltypist_l3'])    
expr_data <- GetAssayData(pbmc, assay = "RNA", slot = "count")     
sum_expression <- matrix(0, nrow = nrow(expr_data), ncol = length(unique(cell_types)))
colnames(sum_expression) <- unique(cell_types)
rownames(sum_expression) <- rownames(expr_data)
  
    
for (type in unique(cell_types)) {
  cells_in_type <- which(cell_types == type)
  sum_expression[, type] <- rowSums(as.data.frame(expr_data[, c(rownames(pbmc[[]] %>% filter(celltypist_l3==type)))]))
}
df <- as.data.frame(sum_expression)    

write.csv(df,paste0('Sum_Raw_Expression_by_Celltype_and_Sample/',pbmc[[]]$sampleID[1],'_AIFI_L3.csv'))
},mc.cores=60)