In [1]:
library(Matrix)
library(sceasy)
library(anndata)
suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(plyr))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(patchwork))
suppressPackageStartupMessages(library(pheatmap))
suppressPackageStartupMessages(library(lme4))
suppressPackageStartupMessages(library(stringr))
library(hise)
library(parallel)

Loading required package: reticulate



In [2]:
meta_data<-read.csv('/home//jupyter/BRI_Analysis/scRNA/hise_meta_data_2024-01-23_fixed.csv')

In [3]:
H5AD_FILES<-paste0('/home//jupyter/BRI_Analysis/scRNA/h5_cleaned_by_sample/',meta_data$pbmc_sample_id,'.h5ad')

# Average Expression

In [4]:
cell_counts_list<-mclapply(H5AD_FILES,function(x){
pbmc <- read_h5ad(x)
pbmc <- CreateSeuratObject(counts = t(pbmc$X), meta.data = pbmc$obs)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)

cell_types <- pull(pbmc[[]]['AIFI_L3'])    
expr_data <- GetAssayData(pbmc, assay = "RNA", slot = "data")     
avg_expression <- matrix(0, nrow = nrow(expr_data), ncol = length(unique(cell_types)))
colnames(avg_expression) <- unique(cell_types)
rownames(avg_expression) <- rownames(expr_data)
  
    
for (type in unique(cell_types)) {
  cells_in_type <- which(cell_types == type)
  avg_expression[, type] <- rowMeans(as.data.frame(expr_data[, c(rownames(pbmc[[]] %>% filter(AIFI_L3==type)))]))
}
df <- as.data.frame(avg_expression)    
cell_counts<-as.data.frame(table(pbmc[[]]['AIFI_L3']))
cell_counts$pbmc_sample_id<-as.character(pbmc[[]]['pbmc_sample_id'][,1][1])
write.csv(df,paste0('Average_Expression_by_Celltype_and_Sample/',pbmc[[]]$pbmc_sample_id[1],'_AIFI_L3.csv'))
return(cell_counts)
},mc.cores=60)

In [13]:
count_df<-do.call(rbind,cell_counts_list)
count_df_wide <- pivot_wider(count_df, names_from = AIFI_L3, values_from = Freq)
count_df_wide[is.na(count_df_wide)] <- 0

In [14]:
write.csv(count_df_wide,'cell_counts_BRI.csv')

# Aggregate Expression


In [19]:
mclapply(H5AD_FILES,function(x){
pbmc <- read_h5ad(x)
pbmc <- CreateSeuratObject(counts = t(pbmc$X), meta.data = pbmc$obs)

cell_types <- pull(pbmc[[]]['AIFI_L3'])    
expr_data <- GetAssayData(pbmc, assay = "RNA", slot = "count")     
sum_expression <- matrix(0, nrow = nrow(expr_data), ncol = length(unique(cell_types)))
colnames(sum_expression) <- unique(cell_types)
rownames(sum_expression) <- rownames(expr_data)
  
    
for (type in unique(cell_types)) {
  cells_in_type <- which(cell_types == type)
  sum_expression[, type] <- rowSums(as.data.frame(expr_data[, c(rownames(pbmc[[]] %>% filter(AIFI_L3==type)))]))
}
df <- as.data.frame(sum_expression)    

write.csv(df,paste0('Sum_Raw_Expression_by_Celltype_and_Sample/',pbmc[[]]$pbmc_sample_id[1],'_AIFI_L3.csv'))
},mc.cores=60)