# create Pseudobulk summarized experiment deg analysis for Longitudinal study
## AIFI l3 Cert pro deep clean data - all 3 aims

In [1]:
# load library
quiet_library <- function(...) {
    suppressPackageStartupMessages(library(...))
}
quiet_library('Seurat')
quiet_library('tidyverse')
quiet_library('ggplot2')
quiet_library('Matrix')
quiet_library('dplyr')
quiet_library('viridis')
quiet_library('MAST')
quiet_library('scran')
quiet_library('data.table')
quiet_library('SingleCellExperiment')
quiet_library('SeuratDisk')
quiet_library('DESeq2')
quiet_library('SummarizedExperiment')

“package ‘scran’ was built under R version 4.3.3”
“package ‘DESeq2’ was built under R version 4.3.3”


In [2]:
packageVersion('SeuratDisk')
set.seed(1234)

[1] ‘0.0.0.9021’

In [3]:
# define working path
data_path = '/home/jupyter/data/ra_longitudinal/scrna/certPro/counts/aifi_l3'
fig_path = '/home/jupyter/data/ra_longitudinal/figures'
meta_path = '/home/jupyter/github/ra-longitudinal/metadata'
output_path = '/home/jupyter/data/ra_longitudinal/scrna/certPro'
proj_name = 'ALTRA_scRNA_AIFI_L3_Pseudobulk_certPro_'

In [4]:
# define the color palette to be used
npg_color <- c("#E64B35FF", "#4DBBD5FF", "#00A087FF", "#3C5488FF", "#F39B7FFF", 
               "#8491B4FF", "#91D1C2FF", "#DC0000FF", "#7E6148FF", "#B09C85FF")
nejm_color <- c("#BC3C29FF", "#0072B5FF", "#E18727FF", "#20854EFF", "#7876B1FF", "#6F99ADFF", "#FFDC91FF", "#EE4C97FF")
jama_color <- c("#374E55FF", "#DF8F44FF", "#00A1D5FF", "#B24745FF", "#79AF97FF", "#6A6599FF", "#80796BFF")
jco_color <- c("#0073C2FF", "#EFC000FF", "#868686FF", "#CD534CFF", "#7AA6DCFF", "#003C67FF", "#8F7700FF")
cluster_colors <- c("#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", 
    "#E78AC3", "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", 
    "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000", "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00")
cluster_colors_ext <- colorRampPalette(cluster_colors)(36)
options(repr.plot.width = 20, repr.plot.height = 15)

In [5]:
source('/home/jupyter/github/ra-longitudinal/scRNA/ALTRA_scRNA_R_helper_functions.r')

## load psedocounts and metadata in each aim and cell types

In [6]:
# convert anndata into sce
counts_files <- list.files(pattern = '*psbulk_counts.tsv', data_path, recursive = TRUE)
meta_files <- list.files(pattern = '*psbulk_metadata.csv', data_path, recursive = TRUE)
# double check the cell type matches
cell_types = counts_files%>%str_remove('aim\\d/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__')%>%
    str_remove('_psbulk_counts.tsv')
cell_type2 = meta_files%>%str_remove('aim\\d/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__')%>%
    str_remove('_psbulk_metadata.csv')
all(cell_types==cell_type2)
# create a table to point to all data
files_tb <- tibble( 'Aim'=str_extract(counts_files, 'aim\\d'), 'cell_type' = cell_types,
                   'counts_file'=counts_files, 'meta_file'=meta_files,)
files_tb%>%tail()

Aim,cell_type,counts_file,meta_file
<chr>,<chr>,<chr>,<chr>
aim3,Proliferating T cell,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__Proliferating T cell_psbulk_counts.tsv,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__Proliferating T cell_psbulk_metadata.csv
aim3,SOX4+ naive CD4 T cell,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__SOX4+ naive CD4 T cell_psbulk_counts.tsv,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__SOX4+ naive CD4 T cell_psbulk_metadata.csv
aim3,SOX4+ naive CD8 T cell,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__SOX4+ naive CD8 T cell_psbulk_counts.tsv,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__SOX4+ naive CD8 T cell_psbulk_metadata.csv
aim3,SOX4+ Vd1 gdT,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__SOX4+ Vd1 gdT_psbulk_counts.tsv,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__SOX4+ Vd1 gdT_psbulk_metadata.csv
aim3,Transitional B cell,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__Transitional B cell_psbulk_counts.tsv,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__Transitional B cell_psbulk_metadata.csv
aim3,Type 2 polarized memory B cell,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__Type 2 polarized memory B cell_psbulk_counts.tsv,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__Type 2 polarized memory B cell_psbulk_metadata.csv


In [7]:
files_tb%>%tail()

Aim,cell_type,counts_file,meta_file
<chr>,<chr>,<chr>,<chr>
aim3,Proliferating T cell,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__Proliferating T cell_psbulk_counts.tsv,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__Proliferating T cell_psbulk_metadata.csv
aim3,SOX4+ naive CD4 T cell,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__SOX4+ naive CD4 T cell_psbulk_counts.tsv,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__SOX4+ naive CD4 T cell_psbulk_metadata.csv
aim3,SOX4+ naive CD8 T cell,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__SOX4+ naive CD8 T cell_psbulk_counts.tsv,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__SOX4+ naive CD8 T cell_psbulk_metadata.csv
aim3,SOX4+ Vd1 gdT,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__SOX4+ Vd1 gdT_psbulk_counts.tsv,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__SOX4+ Vd1 gdT_psbulk_metadata.csv
aim3,Transitional B cell,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__Transitional B cell_psbulk_counts.tsv,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__Transitional B cell_psbulk_metadata.csv
aim3,Type 2 polarized memory B cell,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__Type 2 polarized memory B cell_psbulk_counts.tsv,aim3/ALTRA_scRNA_AIFI_L3_certpro_pseudobulk__Type 2 polarized memory B cell_psbulk_metadata.csv


In [8]:
# testing
# # load data for one cell type
# pb_counts <- fread(file.path(data_path, files_tb$counts_file[3]))%>% rename('V1'='index')
# # load the metadata
# pb_meta <- fread(file.path(data_path, files_tb$meta_file[3]))%>% rename('V1'='index')
# # check the index 
# stopifnot(all(pb_counts$index==pb_meta$index))
# # make count matrix
# pb_counts_mx <- pb_counts %>% select(-index)%>%
#     as.data.frame()%>% t()
# # make summarized experiment
# pb_se <- SummarizedExperiment(assays=list(counts=pb_counts_mx), colData=pb_meta)
# pb_se

# Construct SE objects for all cell type in all aims

In [9]:
# loop for all cell type in AIM1
aim1_files <- files_tb %>% filter(Aim=='aim1')
pb_se_aim1 <- makePseudoSE(aim1_files)

In [10]:
length(pb_se_aim1)

In [11]:
pb_se_aim1%>%saveRDS(file.path(output_path, paste0(proj_name, 'AIM1_psedobulk_object_list.rds')))

In [12]:
# loop for all cell type in AIM2
aim2_files <- files_tb %>% filter(Aim=='aim2')
pb_se_aim2 <- makePseudoSE(aim2_files)

In [13]:
length(pb_se_aim2)

In [14]:
pb_se_aim2%>%saveRDS(file.path(output_path, paste0(proj_name, 'AIM2_psedobulk_object_list.rds')))

In [15]:
# loop for all cell type in AIM3
aim3_files <- files_tb %>% filter(Aim=='aim3')
pb_se_aim3 <- makePseudoSE(aim3_files)

In [16]:
length(pb_se_aim3)

In [17]:
pb_se_aim3%>%saveRDS(file.path(output_path, paste0(proj_name, 'AIM3_psedobulk_object_list.rds')))