This notebook provides example code for converting a sparse counts matrix in R (i.e. from Seurat) into a compressed Matrix Market object (.mtx.gz, default output from cellranger) readable by *CAT python code. 

Acceptable *CAT inputs are .mtx.gz and .h5ad counts matrices.

In [1]:
suppressPackageStartupMessages({
    library(tidyverse)
    library(data.table)
    library(Matrix)
    library(Seurat)
    library(R.utils)
})

“package ‘tidyr’ was built under R version 4.1.2”
“package ‘readr’ was built under R version 4.1.2”
“package ‘Matrix’ was built under R version 4.1.2”


In [15]:
seu_path = '/Sparks2023/GSE206265_covid_flu.CITEseq.Seurat.obj.RDS'
seu = readRDS(seu_path)

In [20]:
seu %>% str(2)

Formal class 'Seurat' [package "SeuratObject"] with 13 slots
  ..@ assays      :List of 4
  ..@ meta.data   :'data.frame':	632100 obs. of  26 variables:
  ..@ active.assay: chr "CITE"
  ..@ active.ident: Factor w/ 32 levels "Mono_Classical",..: 1 2 3 1 3 3 1 5 1 3 ...
  .. ..- attr(*, "names")= chr [1:632100] "Batch1_COVFLU_CITE_multi5P12_ACTGATGGTTCAGCGC-1" "Batch1_COVFLU_CITE_multi5P16_CCCAGTTTCGGCTTGG-1" "Batch1_COVFLU_CITE_multi5P09_GCATGCGCAGCCTATA-1" "Batch1_COVFLU_CITE_multi5P11_CATCCACAGCTCTCGG-1" ...
  ..@ graphs      : list()
  ..@ neighbors   : list()
  ..@ reductions  :List of 1
  ..@ images      : list()
  ..@ project.name: chr "SeuratProject"
  ..@ misc        : list()
  ..@ version     :Classes 'package_version', 'numeric_version'  hidden list of 1
  ..@ commands    :List of 5
  ..@ tools       : list()


In [22]:
seu@meta.data %>% colnames

In [67]:
rna = seu@assays$RNA@counts
adt = seu@assays$CITE@counts
counts = rbind(rna, adt)

In [80]:
counts %>% head

   [[ suppressing 34 column names ‘Batch1_COVFLU_CITE_multi5P09_GCATGCGCAGCCTATA-1’, ‘Batch1_COVFLU_CITE_multi5P02_TTGTAGGCAGTATCTG-1’, ‘Batch1_COVFLU_CITE_multi5P02_GATGCTACAGCTTAAC-1’ ... ]]



6 x 336739 sparse Matrix of class "dgCMatrix"
                                                                               
RP11-34P13.7  . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
AL627309.1    . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
RP11-34P13.14 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
RP11-34P13.9  . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
AP006222.2    . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
RP4-669L17.10 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
                      
RP11-34P13.7  . ......
AL627309.1    . ......
RP11-34P13.14 . ......
RP11-34P13.9  . ......
AP006222.2    . ......
RP4-669L17.10 . ......

 .....suppressing 336705 columns in show(); maybe adjust 'options(max.print= *, width = *)'
 ..............................

In [89]:
# Output counts matrix
writeMM(counts, 
        '/Sparks2023/T_fromSeurat_RNA_ADT_HTO/matrix.mtx')
gzip('/Sparks2023/T_fromSeurat_RNA_ADT_HTO/matrix.mtx')

# Output cell barcodes
barcodes <- colnames(counts)
write_delim(as.data.frame(barcodes),
            '/Sparks2023/T_fromSeurat_RNA_ADT_HTO/barcodes.tsv',
           col_names = FALSE)
gzip('/Sparks2023/T_fromSeurat_RNA_ADT_HTO/barcodes.tsv')

# Output feature names
gene_names <- rownames(counts)
features <- data.frame("gene_id" = gene_names,"gene_name" = gene_names,type = "Gene Expression")
write_delim(as.data.frame(features),delim = "\t",
            '/Sparks2023/T_fromSeurat_RNA_ADT_HTO/features.tsv',
           col_names = FALSE)
gzip('/Sparks2023/T_fromSeurat_RNA_ADT_HTO/features.tsv')

In [105]:
# Output cell metadata
write.table(x = seu@meta.data, quote = FALSE, row.names = TRUE,
            file =  '/Sparks2023/T_fromSeurat_metadata.txt',
           sep = '\t', )