In [2]:
args <- commandArgs(T) 

In [3]:
library(here)
library(rjson)
library(Matrix)
library(tidyverse)
library(dplyr)
library(DropletUtils) 

library(Seurat)
library(Signac)
library(SeuratDisk)
library(SeuratData)

library(rhdf5)
library(anndata)

In [4]:
# InstallData("stxBrain")

In [5]:
# brain <- LoadData("stxBrain", type = "anterior1")

# convert spatial h5ad to rds

## lymph node

In [6]:
input_path <- "/home/wsg/BM/data/SPATIAL/RNA+ADT/lymph_node"
output_path <- "/home/wsg/BM/data/SPATIAL/RNA+ADT/lymph_node"
# dataset <- unlist(fromJSON(file = "/home/wsg/BM/data/SPATIAL/RNA_ADT/lymph_node/lymph_node.json"))

In [12]:
h5ad <- read_h5ad(file = paste0(input_path, "/lymph_node-CITE_seq_RNA-counts.h5ad"))

In [13]:
h5ad$obs

Unnamed: 0_level_0,batch
Unnamed: 0_level_1,<fct>
AACACTTGGCAAGGAA-1_1,A1
AACAGGATTCATAGTT-1_1,A1
AACAGGTTATTGCACC-1_1,A1
AACAGGTTCACCGAAG-1_1,A1
AACAGTCAGGCTCCGC-1_1,A1
AACAGTCGTGTCGCGG-1_1,A1
AACATACTCATATGCG-1_1,A1
AACATAGTCTATCTAC-1_1,A1
AACATATTCTTGCGAA-1_1,A1
AACATCGCGTGACCAC-1_1,A1


In [14]:
metadata <- h5ad$obs
metadata['barcode'] <- rownames(metadata)

In [15]:
spatial_axis <- as.data.frame(h5ad$obsm$spatial)
colnames(spatial_axis) = c("X", "Y")
metadata <- cbind(metadata, spatial_axis)
head(metadata)

Unnamed: 0_level_0,batch,barcode,X,Y
Unnamed: 0_level_1,<fct>,<chr>,<dbl>,<dbl>
AACACTTGGCAAGGAA-1_1,A1,AACACTTGGCAAGGAA-1_1,47,71
AACAGGATTCATAGTT-1_1,A1,AACAGGATTCATAGTT-1_1,49,43
AACAGGTTATTGCACC-1_1,A1,AACAGGTTATTGCACC-1_1,28,86
AACAGGTTCACCGAAG-1_1,A1,AACAGGTTCACCGAAG-1_1,51,41
AACAGTCAGGCTCCGC-1_1,A1,AACAGTCAGGCTCCGC-1_1,24,6
AACAGTCGTGTCGCGG-1_1,A1,AACAGTCGTGTCGCGG-1_1,44,124


In [18]:
write_csv(metadata, here(output_path, "metadata.csv"))

In [16]:
process = "raw"
RNA_counts <- t(h5ad$X)
dim(RNA_counts)

In [17]:
RNA_counts <- as(RNA_counts, 'CsparseMatrix')
RNA_counts

  [[ suppressing 32 column names ‘AACACTTGGCAAGGAA-1_1’, ‘AACAGGATTCATAGTT-1_1’, ‘AACAGGTTATTGCACC-1_1’ ... ]]

  [[ suppressing 32 column names ‘AACACTTGGCAAGGAA-1_1’, ‘AACAGGATTCATAGTT-1_1’, ‘AACAGGTTATTGCACC-1_1’ ... ]]

  [[ suppressing 32 column names ‘AACACTTGGCAAGGAA-1_1’, ‘AACAGGATTCATAGTT-1_1’, ‘AACAGGTTATTGCACC-1_1’ ... ]]



18085 x 6843 sparse Matrix of class "dgCMatrix"
                                                                                         
ENSG00000187634 . . . .  . .  . . .  .  . . . .  . . . . . . . . .  1 .  .  .  . . .  . .
ENSG00000188976 . . . .  . .  . . .  .  . . . 1  . . . . . . . . .  1 .  2  .  . . .  . .
ENSG00000187961 . . . .  1 .  1 . .  .  . 1 . .  . . . . . . . . .  . .  .  .  . . 1  . .
ENSG00000187583 . . . .  1 .  . . .  .  . . . .  . . . . . . . . .  . .  .  .  1 . 1  . .
ENSG00000187642 . . . .  . .  . . .  .  . . . .  . . . . . . . . .  . .  .  .  . . .  . .
ENSG00000188290 . . . .  1 .  . . .  .  . . . .  . . . . . . . . 1  2 .  1  .  . . .  . .
ENSG00000187608 . . . .  . 1  . . .  .  . . . .  1 . . . . . . . .  . .  .  .  1 . .  . .
ENSG00000188157 . . . .  . .  . . .  .  . . . .  1 . 2 . . . . . 1  . .  .  .  . . 1  . .
ENSG00000237330 1 . . .  . .  . . .  .  . . . .  . . . . . . . . .  1 .  .  .  . . .  . .
ENSG00000131591 . . . .  . .  . . .  .  . . . .  . .

In [10]:
# Make Dir
if (!dir.exists(output_path)){
    dir.create(output_path)
}

# save raw rna to mtx
data_path <- here(output_path,
                 paste("lymph_node-CITE_seq", process,
                       "RNA", "counts.mtx", sep = "-"))
write10xCounts(x = RNA_counts, path = data_path, version = "3")
write_csv(metadata, here(output_path, "metadata.csv"))

# save raw rna to rds
saveRDS(RNA_counts, 
        file = here(output_path, 
                    paste("lymph_node-CITE_seq", process,
                          "RNA", "counts.rds", sep = "-")))
# Create Seurat Object
RNA_subset <- CreateSeuratObject(counts = RNA_counts, meta.data = metadata)

# save Seurat to h5Seurat
SaveH5Seurat(RNA_subset, overwrite = TRUE, 
             filename = here(output_path, 
                             paste("lymph_node-CITE_seq", process,
                                   "RNA", "counts.h5Seurat", sep = "-")))

# # Convert h5Seurat to h5ad
# setwd(output_path)
# Convert(paste("lymph_node-CITE_seq", process, "RNA", "counts.h5Seurat", sep = "-"), 
#         dest = "h5ad")

Creating h5Seurat file for version 3.1.5.9900

Adding counts for RNA

Adding data for RNA

No variable features found for RNA

No feature-level metadata found for RNA



In [11]:
h5ad <- read_h5ad(file = paste0(input_path, "/lymph_node-CITE_seq-raw-ADT-counts.h5ad"))

In [12]:
h5ad

AnnData object with n_obs × n_vars = 6843 × 29
    var: 'gene_ids', 'gene_ids_x', 'feature_types', 'genome'
    obsm: 'spatial'

In [13]:
metadata <- h5ad$obs
metadata['barcode'] <- rownames(metadata)

In [14]:
spatial_axis <- as.data.frame(h5ad$obsm$spatial)
colnames(spatial_axis) = c("X", "Y")
metadata <- cbind(metadata, spatial_axis)
head(metadata)

Unnamed: 0_level_0,barcode,X,Y
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>
AACACTTGGCAAGGAA-1_1,AACACTTGGCAAGGAA-1_1,47,71
AACAGGATTCATAGTT-1_1,AACAGGATTCATAGTT-1_1,49,43
AACAGGTTATTGCACC-1_1,AACAGGTTATTGCACC-1_1,28,86
AACAGGTTCACCGAAG-1_1,AACAGGTTCACCGAAG-1_1,51,41
AACAGTCAGGCTCCGC-1_1,AACAGTCAGGCTCCGC-1_1,24,6
AACAGTCGTGTCGCGG-1_1,AACAGTCGTGTCGCGG-1_1,44,124


In [15]:
process = "raw"
ADT_counts <- t(h5ad$X)
ADT_counts <- as(ADT_counts, "sparseMatrix")
dim(ADT_counts)

In [16]:
ADT_counts <- as(ADT_counts, 'CsparseMatrix')
ADT_counts

  [[ suppressing 33 column names ‘AACACTTGGCAAGGAA-1_1’, ‘AACAGGATTCATAGTT-1_1’, ‘AACAGGTTATTGCACC-1_1’ ... ]]



29 x 6843 sparse Matrix of class "dgCMatrix"
                                                                               
ENSG00000177575 25227 21833 19010 21842 18494  22857  23373 35757  27142  21875
ENSG00000117322 28409 23666 30989 26140 46666  11428 109390 30606  17142  20625
ENSG00000132646  7045  8833  3516  4824  7043   4285  15040 22424   8571   4375
ENSG00000026025 19318 22000 22417 21578 29086  65714  25853 67272  47142  63750
ENSG00000186081   681   333   439   438   268   1428    569   606      .    625
ENSG00000129226 12954 12500  9780  5438  8924   4285  18943 28484   2857   7500
ENSG00000124469  3636  1166  4615   438   913   4285    569  1212   5714  15000
ENSG00000204287  2727  5833  3516 21842 28978   1428  53455  7575      .   1875
ENSG00000196092  1590  2500  2197  1666  5376   1428   6341  5454      .      .
ENSG00000115884  1136  2500  1098  2105  2365   2857   4430  3333      .    625
ENSG00000153563 18863 12000 13956 28333 22043   1428  17073 20303  10000   

In [22]:
# ADT_counts
write_csv(metadata, here(output_path, "metadata.csv"))

In [17]:
# save raw adt to mtx
data_path <- here(output_path,
                 paste("lymph_node-CITE_seq", process,
                       "ADT", "counts.mtx", sep = "-"))
write10xCounts(x = ADT_counts, path = data_path, version = "3")
# write_csv(metadata, here(output_path, "metadata.csv"))

# save raw adt to rds
saveRDS(ADT_counts, 
        file = here(output_path, 
                    paste("lymph_node-CITE_seq", process,
                          "ADT", "counts.rds", sep = "-")))

ADT_subset <- CreateSeuratObject(counts = ADT_counts, assay = "ADT", meta.data = metadata)

# save raw rna to h5Seurat
SaveH5Seurat(ADT_subset, overwrite = TRUE, 
             filename = here(output_path, 
                             paste("lymph_node-CITE_seq", process,
                                   "ADT", "counts.h5Seurat", sep = "-")))

# # save raw adt to h5ad
# setwd(output_path)
# Convert(paste("lymph_node-CITE_seq", process, "ADT", "counts.h5Seurat", sep = "-"), 
#         dest = "h5ad")

Creating h5Seurat file for version 3.1.5.9900

Adding counts for ADT

Adding data for ADT

No variable features found for ADT

No feature-level metadata found for ADT



In [28]:
process = "p10"
# save raw rna to rds
RNA_subset_counts <- HSPC_RNA_p10$X
saveRDS(RNA_subset_counts, 
        file = here(output_path, 
                    paste(dataset["data_name"], 
                          dataset["task_type"], 
                          process,
                          "RNA", "counts.rds", sep = "-")))

# metadata
metadata <- HSPC_RNA_p10$obs
# Create Seurat Object
RNA_subset <- CreateSeuratObject(counts = RNA_subset_counts, meta.data = metadata)

# save Seurat to h5Seurat
SaveH5Seurat(RNA_subset, overwrite = TRUE, 
             filename = here(output_path, 
                             paste(dataset["data_name"], 
                                   dataset["task_type"], 
                                   process, 
                                   "RNA", "counts.h5Seurat", sep = "-")))

“Some cells in meta.data not present in provided counts matrix”
Creating h5Seurat file for version 3.1.5.9900

Adding counts for RNA

Adding data for RNA

No variable features found for RNA

No feature-level metadata found for RNA



Unnamed: 0_level_0,V1,V2
Unnamed: 0_level_1,<dbl>,<dbl>
AACACTTGGCAAGGAA-1_1,47,71
AACAGGATTCATAGTT-1_1,49,43
AACAGGTTATTGCACC-1_1,28,86
AACAGGTTCACCGAAG-1_1,51,41
AACAGTCAGGCTCCGC-1_1,24,6
AACAGTCGTGTCGCGG-1_1,44,124
AACATACTCATATGCG-1_1,50,6
AACATAGTCTATCTAC-1_1,58,26
AACATATTCTTGCGAA-1_1,41,123
AACATCGCGTGACCAC-1_1,46,124


In [None]:
# ATAC

In [31]:
input_path <- "/home/wsg/BM/data/HSPC/RNA+ATAC/p10"
output_path <- "/home/wsg/BM/data/HSPC/RNA+ATAC/p10"
dataset <- unlist(fromJSON(file = "/home/wsg/BM/data/HSPC/RNA+ATAC/p10/p10.json"))

In [122]:
h5Seurat@assays

$Spatial_RNA
Assay data with 18085 features for 6843 cells
First 10 features:
 ENSG00000187634, ENSG00000188976, ENSG00000187961, ENSG00000187583,
ENSG00000187642, ENSG00000188290, ENSG00000187608, ENSG00000188157,
ENSG00000237330, ENSG00000131591 


In [32]:
HSPC_ATAC_p10 <- read_h5ad(file = paste0(output_path, "/HSPC-multiome-p10-ATAC-peaks.h5ad"))
HSPC_ATAC_p10

AnnData object with n_obs × n_vars = 10594 × 228942
    obs: 'day', 'donor', 'cell_type', 'technology', 'barcode'

In [33]:
process = "p10"
# save raw rna to rds
ATAC_subset_counts <- HSPC_ATAC_p10$X
saveRDS(ATAC_subset_counts, 
        file = here(output_path, 
                    paste(dataset["data_name"], 
                          dataset["task_type"], 
                          process,
                          "ATAC", "peaks.rds", sep = "-")))

# metadata
metadata <- HSPC_ATAC_p10$obs
# Create Seurat Object
ATAC_subset <- CreateSeuratObject(counts = ATAC_subset_counts, meta.data = metadata)

# save Seurat to h5Seurat
SaveH5Seurat(ATAC_subset, overwrite = TRUE, 
             filename = here(output_path, 
                             paste(dataset["data_name"], 
                                   dataset["task_type"], 
                                   process, 
                                   "ATAC", "peaks.h5Seurat", sep = "-")))

“Some cells in meta.data not present in provided counts matrix”
Creating h5Seurat file for version 3.1.5.9900

Adding counts for RNA

Adding data for RNA

No variable features found for RNA

No feature-level metadata found for RNA



## thymus

In [19]:
input_path <- "/home/wsg/BM/data/SPATIAL/RNA+ADT/thymus"
output_path <- "/home/wsg/BM/data/SPATIAL/RNA+ADT/thymus"

In [20]:
h5ad <- read_h5ad(file = paste0(input_path, "/thymus-CITE_seq-raw-RNA-counts.h5ad"))

In [21]:
h5ad$obs

Unnamed: 0_level_0,orig.ident,x,y,batch
Unnamed: 0_level_1,<fct>,<dbl>,<dbl>,<fct>
0_22_1,sample,8919,13270,thymus_1
0_33_1,sample,8919,14370,thymus_1
0_40_1,sample,8919,15070,thymus_1
0_41_1,sample,8919,15170,thymus_1
0_42_1,sample,8919,15270,thymus_1
10_10_1,sample,9919,12070,thymus_1
10_11_1,sample,9919,12170,thymus_1
10_12_1,sample,9919,12270,thymus_1
10_13_1,sample,9919,12370,thymus_1
10_14_1,sample,9919,12470,thymus_1


In [22]:
metadata <- h5ad$obs
metadata['barcode'] <- rownames(metadata)

In [23]:
spatial_axis <- as.data.frame(h5ad$obsm$spatial)
colnames(spatial_axis) = c("X", "Y")
metadata <- cbind(metadata, spatial_axis)
head(metadata)

Unnamed: 0_level_0,orig.ident,x,y,batch,barcode,X,Y
Unnamed: 0_level_1,<fct>,<dbl>,<dbl>,<fct>,<chr>,<dbl>,<dbl>
0_22_1,sample,8919,13270,thymus_1,0_22_1,15319,17370
0_33_1,sample,8919,14370,thymus_1,0_33_1,13319,14170
0_40_1,sample,8919,15070,thymus_1,0_40_1,10419,12170
0_41_1,sample,8919,15170,thymus_1,0_41_1,14519,15470
0_42_1,sample,8919,15270,thymus_1,0_42_1,13519,15670
10_10_1,sample,9919,12070,thymus_1,10_10_1,11119,12370


In [24]:
write_csv(metadata, here(output_path, "metadata.csv"))

In [23]:
process = "raw"
RNA_counts <- t(h5ad$X)
dim(RNA_counts)

In [24]:
RNA_counts <- as(RNA_counts, 'CsparseMatrix')
RNA_counts

  [[ suppressing 32 column names ‘0_22_1’, ‘0_33_1’, ‘0_40_1’ ... ]]

  [[ suppressing 32 column names ‘0_22_1’, ‘0_33_1’, ‘0_40_1’ ... ]]

  [[ suppressing 32 column names ‘0_22_1’, ‘0_33_1’, ‘0_40_1’ ... ]]



26857 x 17824 sparse Matrix of class "dgCMatrix"
                                                                                    
Gm1992        . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
Gm37381       . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
Rp1           . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
Xkr4          . . . . . . . . . . . . . . . . . . 1 . . . . . . . . . . . . . ......
Sox17         . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
Mrpl15        . . . . 2 . . . . . 1 . . . . . . 1 . . . . 1 1 . . . . . . . . ......
Rgs20         . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
Tcea1         . . . . . . . . . . . . . . . . . . . . . . . 1 . . . . . . . . ......
Gm37988       . 1 . . . . . . . . 1 . . . . . . . . . . . . . . . . . . . . . ......
Oprk1         . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
4732440D04Rik . 

In [25]:
# Make Dir
if (!dir.exists(output_path)){
    dir.create(output_path)
}

# save raw rna to mtx
data_path <- here(output_path,
                 paste("thymus-CITE_seq", process,
                       "RNA", "counts.mtx", sep = "-"))
write10xCounts(x = RNA_counts, path = data_path, version = "3")
write_csv(metadata, here(output_path, "metadata.csv"))

# save raw rna to rds
saveRDS(RNA_counts, 
        file = here(output_path, 
                    paste("thymus-CITE_seq", process,
                          "RNA", "counts.rds", sep = "-")))
# Create Seurat Object
RNA_subset <- CreateSeuratObject(counts = RNA_counts, meta.data = metadata)

# save Seurat to h5Seurat
SaveH5Seurat(RNA_subset, overwrite = TRUE, 
             filename = here(output_path, 
                             paste("thymus-CITE_seq", process,
                                   "RNA", "counts.h5Seurat", sep = "-")))

# # Convert h5Seurat to h5ad
# setwd(output_path)
# Convert(paste("thymus-CITE_seq", process, "RNA", "counts.h5Seurat", sep = "-"), 
#         dest = "h5ad")

Creating h5Seurat file for version 3.1.5.9900

Adding counts for RNA

Adding data for RNA

No variable features found for RNA

No feature-level metadata found for RNA



In [27]:
h5ad <- read_h5ad(file = paste0(input_path, "/thymus-CITE_seq-raw-ADT-counts.h5ad"))

In [28]:
h5ad

AnnData object with n_obs × n_vars = 17824 × 30
    obs: 'orig.ident', 'x', 'y'
    var: 'adt_name', 'Symbol', 'ADT', 'ADT_Type'
    obsm: 'spatial'

In [29]:
metadata <- h5ad$obs
metadata['barcode'] <- rownames(metadata)

In [30]:
spatial_axis <- as.data.frame(h5ad$obsm$spatial)
colnames(spatial_axis) = c("X", "Y")
metadata <- cbind(metadata, spatial_axis)
head(metadata)

Unnamed: 0_level_0,orig.ident,x,y,barcode,X,Y
Unnamed: 0_level_1,<fct>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
0_22_1,sample,8917,13284,0_22_1,15317,17384
0_33_1,sample,8917,14384,0_33_1,13317,14184
0_40_1,sample,8917,15084,0_40_1,10417,12184
0_41_1,sample,8917,15184,0_41_1,14517,15484
0_42_1,sample,8917,15284,0_42_1,13517,15684
10_10_1,sample,9917,12084,10_10_1,11117,12384


In [31]:
process = "raw"
ADT_counts <- t(h5ad$X)
ADT_counts <- as(ADT_counts, "sparseMatrix")
dim(ADT_counts)

In [32]:
ADT_counts <- as(ADT_counts, 'CsparseMatrix')
ADT_counts

  [[ suppressing 33 column names ‘0_22_1’, ‘0_33_1’, ‘0_40_1’ ... ]]



30 x 17824 sparse Matrix of class "dgCMatrix"
                                                                            
Cd4     705 8045 9120 5109 6495 3323 1787 10064 8124 5498 8834 8509 424 9501
Itgb1   248 1709 1348 1626 1357 1631  504  1883 1992  901 2754 1589 430 1228
Cd19     21   11   26   16   11   24   22    14   20   14   22   14  41   13
Pecam1   55  374  141  193  232  162   45   243  166   95  597  167  84  165
Cd38     38   78   46   42   52   53   29    54   32   25   89   41 107   57
Siglec1  31   21   46   88   38   56   50    37   35   46   74   37  78   24
Cd163     .    .    .    .    .    .    .     .    .    .    .    .   .    .
Nrp1      .    .    .    .    .    .    .     .    .    .    .    .   .    .
Cd226     .    .    1    .    .    .    .     .    .    .    .    .   .    .
Cd8a      .    .    .    .    .    .    .     .    .    .    1    .   .    .
Cd14      .    1    .    .    .    .    .     .    1    .    1    .   .    .
Ncr1      .    .    .    .    

In [33]:
# save raw adt to mtx
data_path <- here(output_path,
                 paste("thymus-CITE_seq", process,
                       "ADT", "counts.mtx", sep = "-"))
write10xCounts(x = ADT_counts, path = data_path, version = "3")
# write_csv(metadata, here(output_path, "metadata.csv"))

# save raw adt to rds
saveRDS(ADT_counts, 
        file = here(output_path, 
                    paste("thymus-CITE_seq", process,
                          "ADT", "counts.rds", sep = "-")))

ADT_subset <- CreateSeuratObject(counts = ADT_counts, assay = "ADT", meta.data = metadata)

# save raw rna to h5Seurat
SaveH5Seurat(ADT_subset, overwrite = TRUE, 
             filename = here(output_path, 
                             paste("thymus-CITE_seq", process,
                                   "ADT", "counts.h5Seurat", sep = "-")))

# # save raw adt to h5ad
# setwd(output_path)
# Convert(paste("thymus-CITE_seq", process, "ADT", "counts.h5Seurat", sep = "-"), 
#         dest = "h5ad")

“Non-unique features (rownames) present in the input matrix, making unique”
Creating h5Seurat file for version 3.1.5.9900

Adding counts for ADT

Adding data for ADT

No variable features found for ADT

No feature-level metadata found for ADT



## spleen

In [25]:
input_path <- "/home/wsg/BM/data/SPATIAL/RNA+ADT/spleen"
output_path <- "/home/wsg/BM/data/SPATIAL/RNA+ADT/spleen"

In [26]:
h5ad <- read_h5ad(file = paste0(input_path, "/spleen-CITE_seq-raw-RNA-counts.h5ad"))

In [27]:
metadata <- h5ad$obs
metadata['barcode'] <- rownames(metadata)

In [28]:
spatial_axis <- as.data.frame(h5ad$obsm$spatial)
colnames(spatial_axis) = c("X", "Y")
metadata <- cbind(metadata, spatial_axis)
head(metadata)

Unnamed: 0_level_0,batch,barcode,X,Y
Unnamed: 0_level_1,<fct>,<chr>,<dbl>,<dbl>
AAACACCAATAACTGC-1_1,spleen_1,AAACACCAATAACTGC-1_1,59,19
AAACAGAGCGACTCCT-1_1,spleen_1,AAACAGAGCGACTCCT-1_1,14,94
AAACAGCTTTCAGAAG-1_1,spleen_1,AAACAGCTTTCAGAAG-1_1,43,9
AAACAGGGTCTATATT-1_1,spleen_1,AAACAGGGTCTATATT-1_1,47,13
AAACCGGGTAGGTACC-1_1,spleen_1,AAACCGGGTAGGTACC-1_1,42,28
AAACCGTTCGTCCAGG-1_1,spleen_1,AAACCGTTCGTCCAGG-1_1,52,42


In [31]:
write_csv(metadata, here(output_path, "metadata.csv"))

In [29]:
process = "raw"
RNA_counts <- t(h5ad$X)
dim(RNA_counts)

In [30]:
RNA_counts <- as(RNA_counts, 'CsparseMatrix')
RNA_counts

  [[ suppressing 32 column names ‘AAACACCAATAACTGC-1_1’, ‘AAACAGAGCGACTCCT-1_1’, ‘AAACAGCTTTCAGAAG-1_1’ ... ]]

  [[ suppressing 32 column names ‘AAACACCAATAACTGC-1_1’, ‘AAACAGAGCGACTCCT-1_1’, ‘AAACAGCTTTCAGAAG-1_1’ ... ]]

  [[ suppressing 32 column names ‘AAACACCAATAACTGC-1_1’, ‘AAACAGAGCGACTCCT-1_1’, ‘AAACAGCTTTCAGAAG-1_1’ ... ]]



32285 x 5336 sparse Matrix of class "dgCMatrix"
                                                                                            
ENSMUSG00000051951  . .  .  .  .  .  .  . .  .  .  .  .  .  .  .  .  .  .  .  .  .  . . .  .
ENSMUSG00000089699  . .  .  .  .  .  .  . .  .  .  .  .  .  .  .  .  .  .  .  .  .  . . .  .
ENSMUSG00000102331  . .  .  .  .  .  .  . .  .  .  .  .  .  .  .  .  .  .  .  .  .  . . .  .
ENSMUSG00000102343  . .  .  .  .  .  .  . .  .  .  .  .  .  .  .  .  .  .  .  .  .  . . .  .
ENSMUSG00000025900  . .  .  .  .  .  .  . .  .  .  .  .  .  .  .  .  .  .  .  .  .  . . .  .
ENSMUSG00000025902  . .  .  .  .  .  .  . .  .  .  .  1  .  .  .  .  1  .  .  .  .  . . .  .
ENSMUSG00000104238  . .  .  .  .  .  .  . .  .  .  .  .  .  .  .  .  .  .  .  .  .  . . .  .
ENSMUSG00000104328  . .  .  .  .  .  .  . .  .  .  .  .  .  .  .  .  .  .  .  .  .  . . .  .
ENSMUSG00000033845  3 1  .  1  .  2  .  1 .  1  1  .  .  .  .  .  3  1  1  .  2  1  1 1 .  2
ENSMUSG00000025903  . 

In [47]:
# Make Dir
if (!dir.exists(output_path)){
    dir.create(output_path)
}

# save raw rna to mtx
data_path <- here(output_path,
                 paste("spleen-CITE_seq", process,
                       "RNA", "counts.mtx", sep = "-"))
write10xCounts(x = RNA_counts, path = data_path, version = "3")
write_csv(metadata, here(output_path, "metadata.csv"))

# save raw rna to rds
saveRDS(RNA_counts, 
        file = here(output_path, 
                    paste("spleen-CITE_seq", process,
                          "RNA", "counts.rds", sep = "-")))
# Create Seurat Object
RNA_subset <- CreateSeuratObject(counts = RNA_counts, meta.data = metadata)

# save Seurat to h5Seurat
SaveH5Seurat(RNA_subset, overwrite = TRUE, 
             filename = here(output_path, 
                             paste("spleen-CITE_seq", process,
                                   "RNA", "counts.h5Seurat", sep = "-")))

# # Convert h5Seurat to h5ad
# setwd(output_path)
# Convert(paste("spleen-CITE_seq", process, "RNA", "counts.h5Seurat", sep = "-"), 
#         dest = "h5ad")

Creating h5Seurat file for version 3.1.5.9900

Adding counts for RNA

Adding data for RNA

No variable features found for RNA

No feature-level metadata found for RNA



In [48]:
h5ad <- read_h5ad(file = paste0(input_path, "/spleen-CITE_seq-raw-ADT-counts.h5ad"))

In [49]:
metadata <- h5ad$obs
metadata['barcode'] <- rownames(metadata)

In [50]:
spatial_axis <- as.data.frame(h5ad$obsm$spatial)
colnames(spatial_axis) = c("X", "Y")
metadata <- cbind(metadata, spatial_axis)
head(metadata)

Unnamed: 0_level_0,barcode,X,Y
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>
AAACACCAATAACTGC-1_1,AAACACCAATAACTGC-1_1,59,19
AAACAGAGCGACTCCT-1_1,AAACAGAGCGACTCCT-1_1,14,94
AAACAGCTTTCAGAAG-1_1,AAACAGCTTTCAGAAG-1_1,43,9
AAACAGGGTCTATATT-1_1,AAACAGGGTCTATATT-1_1,47,13
AAACCGGGTAGGTACC-1_1,AAACCGGGTAGGTACC-1_1,42,28
AAACCGTTCGTCCAGG-1_1,AAACCGTTCGTCCAGG-1_1,52,42


In [51]:
process = "raw"
ADT_counts <- t(h5ad$X)
ADT_counts <- as(ADT_counts, "sparseMatrix")
dim(ADT_counts)

In [52]:
ADT_counts <- as(ADT_counts, 'CsparseMatrix')
ADT_counts

  [[ suppressing 5336 column names ‘AAACACCAATAACTGC-1_1’, ‘AAACAGAGCGACTCCT-1_1’, ‘AAACAGCTTTCAGAAG-1_1’ ... ]]



10 x 5336 sparse Matrix of class "dgCMatrix"
                                                                             
ENSMUSG00000025809 103 38 124  56  91  40  28  52  74  96  91 111  75  59  49
ENSMUSG00000023274  57 25  74  69  72 173  69 104  56  70  60  58  82  70  71
ENSMUSG00000030724  66 39  71 124 100  59 123 124  91  91  95  76  44 111 130
ENSMUSG00000026814  87 31 125  66  53  41  37  39  69  94  87  88  65  51  78
ENSMUSG00000053977 109 31 126  70 125  98  81 107  76  98  90 113  95  77  91
ENSMUSG00000020717  98 31  98  60  61  58  54  64  75 105  70  98  53  53  93
ENSMUSG00000008845 161 37 109  31  46  20  18  30  87  88  63 126  40  26  59
ENSMUSG00000029084 216 90 216 268 249 122 160 238 194 216 208 199 153 170 269
ENSMUSG00000024673  35 12  27  17  21  18  22  23  16  37  15  28  14  17  26
ENSMUSG00000030786  62 17  72  41  52  35  29  32  56  73  72  63  59  56  61
                                                                             
ENSMUSG00000025809 

In [53]:
# save raw adt to mtx
data_path <- here(output_path,
                 paste("spleen-CITE_seq", process,
                       "ADT", "counts.mtx", sep = "-"))
write10xCounts(x = ADT_counts, path = data_path, version = "3")
# write_csv(metadata, here(output_path, "metadata.csv"))

# save raw adt to rds
saveRDS(ADT_counts, 
        file = here(output_path, 
                    paste("spleen-CITE_seq", process,
                          "ADT", "counts.rds", sep = "-")))

ADT_subset <- CreateSeuratObject(counts = ADT_counts, assay = "ADT", meta.data = metadata)

# save raw rna to h5Seurat
SaveH5Seurat(ADT_subset, overwrite = TRUE, 
             filename = here(output_path, 
                             paste("spleen-CITE_seq", process,
                                   "ADT", "counts.h5Seurat", sep = "-")))

# # save raw adt to h5ad
# setwd(output_path)
# Convert(paste("spleen-CITE_seq", process, "ADT", "counts.h5Seurat", sep = "-"), 
#         dest = "h5ad")

Creating h5Seurat file for version 3.1.5.9900

Adding counts for ADT

Adding data for ADT

No variable features found for ADT

No feature-level metadata found for ADT



## CITE-seq

In [7]:
SPOTS_cite_RNA_path = "/home/wsg/BM/data/SPOTS/GEO/GSE198353_spleen_rep_1_filtered_feature_bc_matrix.h5"

In [13]:
library(rhdf5)

# 打开并读取HDF5文件
file_path <- SPOTS_cite_RNA_path
dataset <- h5read(file = file_path, name = "matrix")

In [16]:
str(dataset)

List of 6
 $ barcodes: chr [1:2653(1d)] "AAACACCAATAACTGC-1" "AAACAGAGCGACTCCT-1" "AAACAGCTTTCAGAAG-1" "AAACAGGGTCTATATT-1" ...
 $ data    : int [1:8956698(1d)] 3 2 1 1 2 1 10 2 1 1 ...
 $ features:List of 8
  ..$ _all_tag_keys: chr [1:4(1d)] "genome" "read" "pattern" "sequence"
  ..$ feature_type : chr [1:32306(1d)] "Gene Expression" "Gene Expression" "Gene Expression" "Gene Expression" ...
  ..$ genome       : chr [1:32306(1d)] "mm10" "mm10" "mm10" "mm10" ...
  ..$ id           : chr [1:32306(1d)] "ENSMUSG00000051951" "ENSMUSG00000089699" "ENSMUSG00000102331" "ENSMUSG00000102343" ...
  ..$ name         : chr [1:32306(1d)] "Xkr4" "Gm1992" "Gm19938" "Gm37381" ...
  ..$ pattern      : chr [1:32306(1d)] "" "" "" "" ...
  ..$ read         : chr [1:32306(1d)] "" "" "" "" ...
  ..$ sequence     : chr [1:32306(1d)] "" "" "" "" ...
 $ indices : int [1:8956698(1d)] 8 13 32 50 52 57 63 64 115 123 ...
 $ indptr  : int [1:2654(1d)] 0 3590 5796 8974 12387 15017 18036 22792 27080 29158 ...
 $ shape

In [17]:
as.matrix(dataset)

0,1
barcodes,AAACACCA....
data,"3, 2, 1,...."
features,"c(""genom...."
indices,"8, 13, 3...."
indptr,"0, 3590,...."
shape,"32306, 2653"


In [8]:
h5ls(SPOTS_cite_RNA_path)

Unnamed: 0_level_0,group,name,otype,dclass,dim
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>
0,/,matrix,H5I_GROUP,,
1,/matrix,barcodes,H5I_DATASET,STRING,2653.0
2,/matrix,data,H5I_DATASET,INTEGER,8956698.0
3,/matrix,features,H5I_GROUP,,
4,/matrix/features,_all_tag_keys,H5I_DATASET,STRING,4.0
5,/matrix/features,feature_type,H5I_DATASET,STRING,32306.0
6,/matrix/features,genome,H5I_DATASET,STRING,32306.0
7,/matrix/features,id,H5I_DATASET,STRING,32306.0
8,/matrix/features,name,H5I_DATASET,STRING,32306.0
9,/matrix/features,pattern,H5I_DATASET,STRING,32306.0


In [35]:
input_path <- "/home/wsg/BM/data/HSPC/RNA+ADT/p10"
output_path <- "/home/wsg/BM/data/HSPC/RNA+ADT/p10"
dataset <- unlist(fromJSON(file = "/home/wsg/BM/data/HSPC/RNA+ADT/p10/p10.json"))

In [36]:
HSPC_RNA_p10 <- read_h5ad(file = paste0(output_path, "/HSPC-CITE_seq-p10-RNA-counts.h5ad"))

In [37]:
HSPC_RNA_p10

AnnData object with n_obs × n_vars = 7099 × 22050
    obs: 'day', 'donor', 'cell_type', 'technology', 'barcode'

In [38]:
process = "p10"
# save raw rna to rds
RNA_subset_counts <- HSPC_RNA_p10$X
saveRDS(RNA_subset_counts, 
        file = here(output_path, 
                    paste(dataset["data_name"], 
                          dataset["task_type"], 
                          process,
                          "RNA", "counts.rds", sep = "-")))

# metadata
metadata <- HSPC_RNA_p10$obs
# Create Seurat Object
RNA_subset <- CreateSeuratObject(counts = RNA_subset_counts, meta.data = metadata)

# save Seurat to h5Seurat
SaveH5Seurat(RNA_subset, overwrite = TRUE, 
             filename = here(output_path, 
                             paste(dataset["data_name"], 
                                   dataset["task_type"], 
                                   process, 
                                   "RNA", "counts.h5Seurat", sep = "-")))

“Some cells in meta.data not present in provided counts matrix”
Creating h5Seurat file for version 3.1.5.9900

Adding counts for RNA

Adding data for RNA

No variable features found for RNA

No feature-level metadata found for RNA



In [41]:
HSPC_ADT_p10 <- read_h5ad(file = paste0(output_path, "/HSPC-CITE_seq-p10-ADT-counts.h5ad"))
HSPC_ADT_p10

AnnData object with n_obs × n_vars = 7099 × 140
    obs: 'day', 'donor', 'cell_type', 'technology', 'barcode'

In [42]:
process = "p10"
# save raw rna to rds
ADT_subset_counts <- HSPC_ADT_p10$X
saveRDS(ADT_subset_counts, 
        file = here(output_path, 
                    paste(dataset["data_name"], 
                          dataset["task_type"], 
                          process,
                          "ADT", "counts.rds", sep = "-")))

# metadata
metadata <- HSPC_ADT_p10$obs
# Create Seurat Object
ADT_subset <- CreateSeuratObject(counts = ADT_subset_counts, meta.data = metadata)

# save Seurat to h5Seurat
SaveH5Seurat(ADT_subset, overwrite = TRUE, 
             filename = here(output_path, 
                             paste(dataset["data_name"], 
                                   dataset["task_type"], 
                                   process, 
                                   "ADT", "counts.h5Seurat", sep = "-")))

“Some cells in meta.data not present in provided counts matrix”
Creating h5Seurat file for version 3.1.5.9900

Adding counts for RNA

Adding data for RNA

No variable features found for RNA

No feature-level metadata found for RNA



In [12]:
# save raw rna to h5ad
setwd(output_path)
Convert(paste(dataset["data_name"],"raw", dataset["task_type"], "RNA", "counts.h5Seurat", sep = "-"), 
        dest = "h5ad")

Validating h5Seurat file

Adding data from RNA as X

Adding counts from RNA as raw

Transfering meta.data to obs



In [3]:
# Load ATAC
BMMC_ATAC_Dir <- "/Data/wangsg/BM/pipeline/results/BMMC/data_preprocess/BMMC-raw-pair-ATAC-peaks.mtx/"
BMMC_ATAC_counts <- Read10X(data.dir = BMMC_ATAC_Dir, gene.column = 1)

as(<dgTMatrix>, "dgCMatrix") is deprecated since Matrix 1.5-0; do as(., "CsparseMatrix") instead



In [7]:
metadata <- read.csv(paste0(BMMC_ATAC_Dir, "/metadata.csv"), row.names = 1)
# 添加barcode到metadata
metadata['barcode'] <- rownames(metadata)

# set.seed(1234)
# random sample 500 cells of each donor
# bmmc_rna_500_meta <- metadata %>% group_by(batch) %>% slice_sample(n=500)

# random sample 10% cells of each donor
# bmmc_rna_10p_meta <- metadata %>% group_by(batch) %>% sample_frac(.1)

table(metadata$batch)
bmmc_atac_10p_meta <- metadata[rownames(metadata) %in% rownames(bmmc_rna_10p_meta), ]
table(bmmc_atac_10p_meta$batch)


 s1d1  s1d2  s1d3  s2d1  s2d4  s2d5 s3d10  s3d3  s3d6  s3d7  s4d1  s4d8  s4d9 
 6224  6740  4279  4220  6111  4895  6781  4325  1679  1771  8023  9876  4325 


 s1d1  s1d2  s1d3  s2d1  s2d4  s2d5 s3d10  s3d3  s3d6  s3d7  s4d1  s4d8  s4d9 
  622   674   428   422   611   490   678   432   168   177   802   988   432 

In [8]:
BMMC_ATAC_counts_10p <- BMMC_ATAC_counts[ , colnames(BMMC_ATAC_counts) %in% bmmc_atac_10p_meta$barcode]

In [9]:
# save raw atac to mtx
data_path <- here(output_path,
                 paste(dataset["data_name"], 
                       "raw",
                       dataset["task_type"], 
                       "ATAC", "peaks.mtx", sep = "-"))
write10xCounts(x = BMMC_ATAC_counts_10p, path = data_path, version = "3")
write_csv(bmmc_atac_10p_meta, here(data_path, "metadata.csv"))

In [10]:
# save raw atac to rds
saveRDS(BMMC_ATAC_counts_10p, 
        file = here(output_path, 
                    paste(dataset["data_name"], 
                          "raw",
                          dataset["task_type"], 
                          "ATAC", "peaks.rds", sep = "-")))

In [11]:
chrom_assay <- CreateChromatinAssay(
    counts = BMMC_ATAC_counts_10p,
    sep = c("-", "-")
)
bmmc_atac_10p <- CreateSeuratObject(counts = chrom_assay, assay = "ATAC", meta.data = bmmc_atac_10p_meta)

"Keys should be one or more alphanumeric characters followed by an underscore, setting key from atac to atac_"
