In [1]:
library(Signac)
library(Seurat)
library(GenomicRanges)
library(BSgenome.Mmusculus.UCSC.mm10)
library(EnsDb.Mmusculus.v79)
library(Matrix)

annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Mmusculus.v79)
seqlevelsStyle(annotations) <- "UCSC"
genome(annotations) <- "mm10"

The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
which was just loaded, will retire in October 2023.
Please refer to R-spatial evolution reports for details, especially
https://r-spatial.org/r/2023/05/15/evolution4.html.
It may be desirable to make the sf package available;
package maintainers should consider adding sf to Suggests:.
The sp package is now running under evolution status 2
     (status 2 uses the sf package in place of rgdal)

Attaching SeuratObject

Loading required package: stats4

Loading required package: BiocGenerics


Attaching package: ‘BiocGenerics’


The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs


The following objects are masked from ‘package:base’:

    anyDuplicated, aperm, append, as.data.frame, basename, cbind,
    colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
    get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
    match, mget, order, paste, pmax, pmax.int,

In [2]:
# # load metadata from original paper
# metadata <- read.table("/mnt/disk1/xiaojk/data/hippocampus/Supplementary_Table_2-Metatable_of_nuclei.tsv", sep="\t", skip=1)
# rownames(metadata) <- metadata$V1
# colnames(metadata) <- c("cell", "sample", "barcode", "logUM", "TSSe", "class", "MajorType", "SubType", "na")
# cells <- metadata$cell

In [3]:
# download from http://catlas.org/mousebrain/#!/cellBrowser
# metadata1 <- read.table("/mnt/disk1/xiaojk/data/hippocampus/NonN_meta.tsv", sep="\t", header=TRUE)  #, skip=1
# metadata2 <- read.table("/mnt/disk1/xiaojk/data/hippocampus/Glutamate_meta.tsv", sep="\t", header=TRUE)  #, skip=1
# metadata3 <- read.table("/mnt/disk1/xiaojk/data/hippocampus/GABA_meta.tsv", sep="\t", header=TRUE)  #, skip=1
# metadata <- rbind(metadata1, metadata2, metadata3)
# write.table(metadata, "/mnt/disk1/xiaojk/data/hippocampus/all_meta.tsv", sep="\t")

In [4]:
metadata <- read.table("/mnt/disk1/xiaojk/data/hippocampus/all_meta.tsv", sep="\t", header=TRUE, row.names = 1)  #, skip=1
metadata_sub <- metadata[metadata$DissectionRegion %in% c('8J','8E') & metadata$replicate %in% c(1), ]
cells <- metadata_sub$cellID
length(metadata_sub)

In [5]:
metadata_sub

Unnamed: 0_level_0,cellID,sample,replicate,logUMI,tsse,DissectionRegion,RegionName,MajorRegion,SubRegion,Slice,SubClass
Unnamed: 0_level_1,<chr>,<chr>,<int>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
245363,CEMBA190711_8E.AAACTACCAGAAATTGAGGAGG,CEMBA190711_8E,1,3.201943,22.07792,8E,CA-1,HPF,CA,slice: 8,ASC
245364,CEMBA190711_8E.AAACTACCAGAACCCACTATCT,CEMBA190711_8E,1,3.595717,24.71591,8E,CA-1,HPF,CA,slice: 8,OPC
245365,CEMBA190711_8E.AAACTACCAGGAAGGGTGGTAT,CEMBA190711_8E,1,3.289143,23.52941,8E,CA-1,HPF,CA,slice: 8,ASC
245366,CEMBA190711_8E.AAACTACCAGGCGAACGTAGAC,CEMBA190711_8E,1,3.349472,26.34508,8E,CA-1,HPF,CA,slice: 8,ASC
245367,CEMBA190711_8E.AAACTACCAGGGCCTCTGCAAT,CEMBA190711_8E,1,3.385070,23.86364,8E,CA-1,HPF,CA,slice: 8,OGC
245368,CEMBA190711_8E.AACTGCGCCAAAGTGTCGTGGA,CEMBA190711_8E,1,3.073718,32.00000,8E,CA-1,HPF,CA,slice: 8,ASC
245369,CEMBA190711_8E.AACTGCGCCACTTGAGACTCCG,CEMBA190711_8E,1,3.080987,23.77622,8E,CA-1,HPF,CA,slice: 8,OGC
245370,CEMBA190711_8E.AACTTCTGCTGGGAACAAGTCA,CEMBA190711_8E,1,3.053463,21.59091,8E,CA-1,HPF,CA,slice: 8,ASC
245371,CEMBA190711_8E.AACTTCTGCTTGCGAAGCTCAC,CEMBA190711_8E,1,3.171726,20.29598,8E,CA-1,HPF,CA,slice: 8,ASC
245372,CEMBA190711_8E.AAGCTATACCAGGCTCTGCTGA,CEMBA190711_8E,1,3.258158,22.11302,8E,CA-1,HPF,CA,slice: 8,OPC


In [6]:
frags <- CreateFragmentObject(
  path = "/mnt/disk1/xiaojk/data/hippocampus/fragments.sort.bed.gz",
  cells = cells,
  validate.fragments = FALSE
)

Computing hash



In [7]:
str(frags)

Formal class 'Fragment' [package "Signac"] with 3 slots
  ..@ path : chr "/mnt/disk1/xiaojk/data/hippocampus/fragments.sort.bed.gz"
  ..@ hash : chr [1:2] "c3e4d94988849c6ee45f24379bd4e1f7" "19e8bc2f519d7036867e77231c50e608"
  ..@ cells: Named chr [1:16270] "CEMBA190711_8E.AAACTACCAGAAATTGAGGAGG" "CEMBA190711_8E.AAACTACCAGAACCCACTATCT" "CEMBA190711_8E.AAACTACCAGGAAGGGTGGTAT" "CEMBA190711_8E.AAACTACCAGGCGAACGTAGAC" ...
  .. ..- attr(*, "names")= chr [1:16270] "CEMBA190711_8E.AAACTACCAGAAATTGAGGAGG" "CEMBA190711_8E.AAACTACCAGAACCCACTATCT" "CEMBA190711_8E.AAACTACCAGGAAGGGTGGTAT" "CEMBA190711_8E.AAACTACCAGGCGAACGTAGAC" ...


In [8]:
attr(frags@cells, ".match.hash") <- NULL

In [9]:
unified.peaks <- read.table("/mnt/disk1/xiaojk/data/hippocampus/unified_peaks.bed", sep = "\t", header = TRUE)
unified.peaks <- makeGRangesFromDataFrame(unified.peaks)

In [10]:
unified.peaks

GRanges object with 88333 ranges and 0 metadata columns:
          seqnames          ranges strand
             <Rle>       <IRanges>  <Rle>
      [1]     chr1 3012371-3012843      *
      [2]     chr1 3060652-3061121      *
      [3]     chr1 3094790-3095399      *
      [4]     chr1 3113044-3113920      *
      [5]     chr1 3119264-3120628      *
      ...      ...             ...    ...
  [88329]     chrY   872791-873259      *
  [88330]     chrY   896966-898147      *
  [88331]     chrY 1009367-1010739      *
  [88332]     chrY 1114059-1114362      *
  [88333]     chrY 1244820-1246026      *
  -------
  seqinfo: 21 sequences from an unspecified genome; no seqlengths

In [11]:
# quantify  这里是关键，统计生成peak矩阵
counts <- FeatureMatrix(
  fragments = frags,
  features = unified.peaks,
#   cells = cells,
  process_n = 2000
)
#   cells = cells

Extracting reads overlapping genomic regions



In [12]:
counts

  [[ suppressing 32 column names 'CEMBA190711_8E.AAACTACCAGAAATTGAGGAGG', 'CEMBA190711_8E.AAACTACCAGAACCCACTATCT', 'CEMBA190711_8E.AAACTACCAGGAAGGGTGGTAT' ... ]]

  [[ suppressing 32 column names 'CEMBA190711_8E.AAACTACCAGAAATTGAGGAGG', 'CEMBA190711_8E.AAACTACCAGAACCCACTATCT', 'CEMBA190711_8E.AAACTACCAGGAAGGGTGGTAT' ... ]]

  [[ suppressing 32 column names 'CEMBA190711_8E.AAACTACCAGAAATTGAGGAGG', 'CEMBA190711_8E.AAACTACCAGAACCCACTATCT', 'CEMBA190711_8E.AAACTACCAGGAAGGGTGGTAT' ... ]]



88333 x 16270 sparse Matrix of class "ngCMatrix"
                                                                                             
chr1-3012371-3012843   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
chr1-3060652-3061121   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
chr1-3094790-3095399   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
chr1-3113044-3113920   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
chr1-3119264-3120628   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
chr1-3216886-3217550   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
chr1-3235287-3235667   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
chr1-3251606-3251980   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
chr1-3292454-3293041   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
chr1-329937