In [1]:
####~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
####  Mouse C3KO-RichLi snRNA
####  2023-05-16 by Yi Zhao (Texas Heart Institute, US)
####~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


####~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
####  Initiate  ####
####~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~----
Ver <- '0'
Step <- 'PART01_Data_Collection'
Project <- '2023_neoc3ko_rli'

Code_dir <- paste0('/Volumes/shire/project/', Project, '/code/mouse_v', Ver, '/')

source(Sys.readlink(paste0(Code_dir, 'src/bioinformatics.R')))
source(Sys.readlink(paste0(Code_dir, 'src/scRNAseq.R')))
source(Sys.readlink(paste0(Code_dir, 'src/scATACseq.R')))
source(paste0(Code_dir, 'mouse_v', Ver, '.helper_functions.R'))

InitiateProject('Rivendell', Ver, Step, 'mouse', Project, 'shire')
####~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


In [6]:
####~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


####~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
####  Global Functions  ####
####~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~----
MakeSrt <- function(mode, matrix_dir, sample, study, method, platform, protocol, processed, tissue, enrichment,
                    genotype, condition, sex, age, replicate) {
        if (mode == '10x') {
            print(paste0(matrix_dir, '/outs/filtered_feature_bc_matrix/'))
                matrix <- paste0(matrix_dir, '/outs/filtered_feature_bc_matrix/')
                srt <- CreateSeuratObject(counts = Read10X(data.dir = matrix),
                                          min.cells = 1, min.features = 1, project = study)
        }
        else if (mode == 'cellbender') {
                matrix <- paste0(matrix_dir, '/cellbender_filtered.h5')
                srt <- CreateSeuratObject(counts = ReadCB_h5(matrix), min.cells = 1, min.features = 1, project = study)

        }
        else if (mode == 'matrix') {
                matrix <- read.table(gzfile(paste0(matrix_dir[1], '.matrix.csv.gz')), header = T, sep = ',')
                srt <- CreateSeuratObject(counts = matrix, min.cells = 1, min.features = 1, project = study)
        }
        srt$sample <- sample
        srt$orig.name <- Cells(srt)
        srt$study <- study
        srt$method <- method
        srt$platform <- platform
        srt$protocol <- protocol
        srt$processed <- processed
        srt$tissue <- tissue
        srt$enrichment <- enrichment
        srt$genotype <- genotype
        srt$condition <- condition
        srt$sex <- sex
        srt$age <- age
        srt$replicate <- replicate
        srt <- RenameCells(srt, new.names = paste(srt$study,
                                                  srt$sample,
                                                  srt$orig.name,
                                                  sep = ':'), for.merge = F)
        srt <- PercentageFeatureSet(srt, pattern = '^mt-', col.name = 'pct_mito', assay = 'RNA')
        srt$pct_mito[is.nan(srt$pct_mito)] <- 0
        return(srt)
}
MakeDataset <- function(study, study_id, sample_name, mode, matrix_dir, starting_sample = 1){
        srt.list <- list()
        for(i in 1:L(sample_name)) {
                sample_id = paste0(study_id, '_S', str_pad(starting_sample - 1 + i, 3, pad = '0'))
                message('Processing sample:', sample_id)
                sample_meta_sub.df <- sample_meta.df[sample_meta.df$study == study &
                                                             sample_meta.df$sample_id == sample_id &
                                                             sample_meta.df$library == sample_name[i], ]
#print(c(study,sample_id,sample_name[i]))

                if(nrow(sample_meta_sub.df) == 1){message('Sample metadata found')
                        } else{stop('Sample metadata error')}
                srt.list[[i]] <- MakeSrt(mode = mode,
                                         matrix_dir = matrix_dir[i],
                                         study = study,
                                         sample = sample_id,
                                         method = sample_meta_sub.df$method,
                                         platform = sample_meta_sub.df$platform,
                                         protocol = sample_meta_sub.df$protocol,
                                         processed = sample_meta_sub.df$processed,
                                         tissue = sample_meta_sub.df$tissue,
                                         enrichment = sample_meta_sub.df$enrichment,
                                         genotype = sample_meta_sub.df$genotype,
                                         condition = sample_meta_sub.df$condition,
                                         sex = sample_meta_sub.df$sex,
                                         age = sample_meta_sub.df$age,
                                         replicate = sample_meta_sub.df$replicate
                )
                print('Seurat generated...')
                # print(srt.list[[i]])
                # cat('\n_____________________________________________________\n')
        }
        if(L(srt.list) > 1) {
                merge.srt <- merge(srt.list[[1]], srt.list[2:L(srt.list)])
        } else {
                merge.srt <- srt.list[[1]]
        }
        return(merge.srt)
}
MakeRawDataset <- function(study, sample_name, raw_matrix_type, starting_sample = 1){
        no. <- names(studies[studies==study])
        message('Collecting Raw Data...')
        print( paste0('/Volumes/shire/data/scrnaseq/',study, '/matrix/', sample_name))

        merge.srt <- MakeDataset(study = study,
                                 study_id = no.,
                                 sample_name = sample_name,
                                 mode = raw_matrix_type,
                                 matrix_dir = paste0('/Volumes/shire/data/scrnaseq/',
                                                     study, '/matrix/', sample_name),
                                 starting_sample = starting_sample)
        message('Processing Raw Seurat Object...')
        # merge.srt <- Process(merge.srt, assay = 'RNA')
        message('Saving Raw Seurat Object...')
        saveRDS(merge.srt, paste0('individual/', no., '.', study, '.raw.srt.rds'))
        # SaveH5ad(merge.srt, path = 'individual/', name = paste0(no., '.', study, '.raw'),
        #          assay = 'RNA', raw_count_only = F, verbose = F)
        rm(merge.srt)
        gc()
}
MakeCbnDataset <- function(study, sample_name, raw_matrix_type, starting_sample = 1, cb_folder = 'cellbender_v1'){
        no. <- names(studies[studies==study])
        message('Collecting CellBender Data...')
        merge.srt <- MakeDataset(study = study,
                                 study_id = no.,
                                 sample_name = sample_name,
                                 mode = 'cellbender',
                                 matrix_dir = paste0('/Volumes/shire/data/scrnaseq/',
                                                     study, '/matrix/', cb_folder, '/', sample_name),
                                 starting_sample = starting_sample)
        message('Processing CellBender Seurat Object...')
        # merge.srt <- Process(merge.srt, assay = 'RNA')
        message('Saving CellBender Seurat Object...')
        saveRDS(merge.srt, paste0('individual/', no., '.', study, '.cbn.srt.rds'))
        # SaveH5ad(merge.srt, path = 'individual/', name = paste0(no., '.', study, '.cbn'),
        #          assay = 'RNA', raw_count_only = F, verbose = F)
        rm(merge.srt)
        gc()
}

MakeScmultiSrt <- function(gex_filtered_h5_file, atac_frag_file){
        # load both modalities
        inputdata.10x <- Read10X_h5(gex_filtered_h5_file)
        # extract ATAC data
        atac_counts <- inputdata.10x$Peaks
        # only use peaks in standard chromosomes
        grange.counts <- StringToGRanges(rownames(atac_counts), sep = c(":", "-"))
        grange.use <- seqnames(grange.counts) %in% standardChromosomes(grange.counts)
        atac_counts <- atac_counts[as.vector(grange.use), ]
        annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Mmusculus.v79)
        seqlevelsStyle(annotations) <- 'UCSC'
        genome(annotations) <- "mm10"
        chrom_assay <- CreateChromatinAssay(
                counts = atac_counts,
                sep = c(":", "-"),
                genome = 'mm10',
                fragments = atac_frag_file,
                min.cells = 1,
                min.features = 1,
                annotation = annotations
        )
        srt <- CreateSeuratObject(
                counts = chrom_assay,
                assay = "ATAC"
        )
        # extract RNA data
        rna_counts <- inputdata.10x$`Gene Expression`
        # Create Seurat object
        srt2 <- CreateSeuratObject(counts = rna_counts)
        return(list(srt, srt2))
}
MakeScmultiGexSrt <- function(gex_filtered_h5_file, atac_frag_file){
        # load both modalities
        inputdata.10x <- Read10X_h5(gex_filtered_h5_file)
        # extract ATAC data
        rna_counts <- inputdata.10x$
                # only use peaks in standard chromosomes
                grange.counts <- StringToGRanges(rownames(atac_counts), sep = c(":", "-"))
        grange.use <- seqnames(grange.counts) %in% standardChromosomes(grange.counts)
        atac_counts <- atac_counts[as.vector(grange.use), ]
        annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Mmusculus.v79)
        seqlevelsStyle(annotations) <- 'UCSC'
        genome(annotations) <- "mm10"
        chrom_assay <- CreateChromatinAssay(
                counts = atac_counts,
                sep = c(":", "-"),
                genome = 'mm10',
                fragments = atac_frag_file,
                min.cells = 1,
                min.features = 1,
                annotation = annotations
        )
        srt <- CreateSeuratObject(
                counts = chrom_assay,
                assay = "ATAC"
        )
        return(srt)
}
####~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~----



In [7]:
####~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
####  Load sample metadata  ####
####~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~----
sample_meta.df <- read.csv(paste0(Docu_dir, 'mouse_sample_meta.csv'))
studies <- U(sample_meta.df$study)
names(studies) <- U(sample_meta.df$study_id)
studies_cellbender <- studies[studies %in% sample_meta.df$study[sample_meta.df$platform %in% c('10X', 'Drop-seq')]]
####~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [8]:
studies

In [9]:
####~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
####  Dataset #4: 2023_Latscko_Ctsai (snRNA-seq)  ####
####~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~----
study <- '2023_NeoC3KO_RLi'
sample_name <- c('2023_NeoC3KO_RLi_wt_p2m1', '2023_NeoC3KO_RLi_wt_p2m2',
                 '2023_NeoC3KO_RLi_wt_p2s1', '2023_NeoC3KO_RLi_wt_p2s2',
                 '2023_NeoC3KO_RLi_c3_p2m1', '2023_NeoC3KO_RLi_c3_p2m2',
                 '2023_NeoC3KO_RLi_c3_p2s1', '2023_NeoC3KO_RLi_c3_p2s2') 
matrix_type <- '10x'
MakeRawDataset(study = study, sample_name = sample_name, raw_matrix_type = matrix_type, starting_sample = 1)
MakeCbnDataset(study = study, sample_name = sample_name, raw_matrix_type = matrix_type, starting_sample = 1)


Collecting Raw Data...



[1] "/Volumes/shire/data/scrnaseq/2023_NeoC3KO_RLi/matrix/2023_NeoC3KO_RLi_wt_p2m1"
[2] "/Volumes/shire/data/scrnaseq/2023_NeoC3KO_RLi/matrix/2023_NeoC3KO_RLi_wt_p2m2"
[3] "/Volumes/shire/data/scrnaseq/2023_NeoC3KO_RLi/matrix/2023_NeoC3KO_RLi_wt_p2s1"
[4] "/Volumes/shire/data/scrnaseq/2023_NeoC3KO_RLi/matrix/2023_NeoC3KO_RLi_wt_p2s2"
[5] "/Volumes/shire/data/scrnaseq/2023_NeoC3KO_RLi/matrix/2023_NeoC3KO_RLi_c3_p2m1"
[6] "/Volumes/shire/data/scrnaseq/2023_NeoC3KO_RLi/matrix/2023_NeoC3KO_RLi_c3_p2m2"
[7] "/Volumes/shire/data/scrnaseq/2023_NeoC3KO_RLi/matrix/2023_NeoC3KO_RLi_c3_p2s1"
[8] "/Volumes/shire/data/scrnaseq/2023_NeoC3KO_RLi/matrix/2023_NeoC3KO_RLi_c3_p2s2"


Processing sample:P01_S001

Sample metadata found



[1] "/Volumes/shire/data/scrnaseq/2023_NeoC3KO_RLi/matrix/2023_NeoC3KO_RLi_wt_p2m1/outs/filtered_feature_bc_matrix/"
[1] "Seurat generated..."


Processing sample:P01_S002

Sample metadata found



[1] "/Volumes/shire/data/scrnaseq/2023_NeoC3KO_RLi/matrix/2023_NeoC3KO_RLi_wt_p2m2/outs/filtered_feature_bc_matrix/"
[1] "Seurat generated..."


Processing sample:P01_S003

Sample metadata found



[1] "/Volumes/shire/data/scrnaseq/2023_NeoC3KO_RLi/matrix/2023_NeoC3KO_RLi_wt_p2s1/outs/filtered_feature_bc_matrix/"
[1] "Seurat generated..."


Processing sample:P01_S004

Sample metadata found



[1] "/Volumes/shire/data/scrnaseq/2023_NeoC3KO_RLi/matrix/2023_NeoC3KO_RLi_wt_p2s2/outs/filtered_feature_bc_matrix/"
[1] "Seurat generated..."


Processing sample:P01_S005

Sample metadata found



[1] "/Volumes/shire/data/scrnaseq/2023_NeoC3KO_RLi/matrix/2023_NeoC3KO_RLi_c3_p2m1/outs/filtered_feature_bc_matrix/"
[1] "Seurat generated..."


Processing sample:P01_S006

Sample metadata found



[1] "/Volumes/shire/data/scrnaseq/2023_NeoC3KO_RLi/matrix/2023_NeoC3KO_RLi_c3_p2m2/outs/filtered_feature_bc_matrix/"
[1] "Seurat generated..."


Processing sample:P01_S007

Sample metadata found



[1] "/Volumes/shire/data/scrnaseq/2023_NeoC3KO_RLi/matrix/2023_NeoC3KO_RLi_c3_p2s1/outs/filtered_feature_bc_matrix/"
[1] "Seurat generated..."


Processing sample:P01_S008

Sample metadata found



[1] "/Volumes/shire/data/scrnaseq/2023_NeoC3KO_RLi/matrix/2023_NeoC3KO_RLi_c3_p2s2/outs/filtered_feature_bc_matrix/"
[1] "Seurat generated..."


Processing Raw Seurat Object...

Saving Raw Seurat Object...



Unnamed: 0,used,(Mb),gc trigger,(Mb).1,limit (Mb),max used,(Mb).2
Ncells,14694953,784.8,27524648,1470.0,,18043232,963.7
Vcells,26932145,205.5,1628071717,12421.3,131072.0,1688861592,12885.0


Collecting CellBender Data...

Processing sample:P01_S001

Sample metadata found

CellRanger version 3+ format H5

"'giveCsparse' is deprecated; setting repr="T" for you"
as(<dgTMatrix>, "dgCMatrix") is deprecated since Matrix 1.5-0; do as(., "CsparseMatrix") instead



[1] "Seurat generated..."


Processing sample:P01_S002

Sample metadata found

CellRanger version 3+ format H5

"'giveCsparse' is deprecated; setting repr="T" for you"


[1] "Seurat generated..."


Processing sample:P01_S003

Sample metadata found

CellRanger version 3+ format H5

"'giveCsparse' is deprecated; setting repr="T" for you"


[1] "Seurat generated..."


Processing sample:P01_S004

Sample metadata found

CellRanger version 3+ format H5

"'giveCsparse' is deprecated; setting repr="T" for you"


[1] "Seurat generated..."


Processing sample:P01_S005

Sample metadata found

CellRanger version 3+ format H5

"'giveCsparse' is deprecated; setting repr="T" for you"


[1] "Seurat generated..."


Processing sample:P01_S006

Sample metadata found

CellRanger version 3+ format H5

"'giveCsparse' is deprecated; setting repr="T" for you"


[1] "Seurat generated..."


Processing sample:P01_S007

Sample metadata found

CellRanger version 3+ format H5

"'giveCsparse' is deprecated; setting repr="T" for you"


[1] "Seurat generated..."


Processing sample:P01_S008

Sample metadata found

CellRanger version 3+ format H5

"'giveCsparse' is deprecated; setting repr="T" for you"


[1] "Seurat generated..."


Processing CellBender Seurat Object...

Saving CellBender Seurat Object...



Unnamed: 0,used,(Mb),gc trigger,(Mb).1,limit (Mb),max used,(Mb).2
Ncells,14705635,785.4,27524648,1470.0,,18043232,963.7
Vcells,26952051,205.7,1530273722,11675.1,131072.0,1688861592,12885.0
