In [2]:
args <- commandArgs(T) 

In [10]:
library(here)
library(rjson)
library(Matrix)
library(tidyverse)
library(dplyr)
library(DropletUtils) 

library(Seurat)
library(Signac)
library(SeuratDisk)

library(rhdf5)
library(anndata)

# convert H5Seurat

## Multiome

In [13]:
input_path <- "/home/wsg/BM/data/10x_kidney_cancer/RawData"
output_path <- "/home/wsg/BM/data/10x_kidney_cancer/RNA+ADT/RawData"

In [15]:
kidney_multi <- Read10X_h5(file = here(input_path, "4plex_DTC_kidney_lung_breast_TotalSeqC_multiplex_Multiplex_count_raw_feature_bc_matrix.h5"))

Genome matrix has multiple modalities, returning a list of matrices for this genome



In [28]:
barcodes_json <- fromJSON(file = here(input_path, "4plex_DTC_kidney_lung_breast_TotalSeqC_multiplex_Multiplex_multiplexing_analysis_cells_per_tag.json"))
print(length(barcodes_json$BC001))
print(length(barcodes_json$BC002))
sapply(barcodes_json, length)

[1] 10043
[1] 10931


In [35]:
rep1 <- data.frame(barcode = barcodes_json$BC001,
                   batch = rep("kidney_rep1", length(barcodes_json$BC001)),
                   probe_tag = rep("BC001", length(barcodes_json$BC001)))

rep2 <- data.frame(barcode = barcodes_json$BC002,
                   batch = rep("kidney_rep2", length(barcodes_json$BC002)),
                   probe_tag = rep("BC002", length(barcodes_json$BC002)))

In [38]:
metadata <- rbind(rep1, rep2)

In [40]:
table(metadata$batch)


kidney_rep1 kidney_rep2 
      10043       10931 

In [41]:
# metadata
metadata
write_csv(metadata, here(output_path, "metadata.csv"))

barcode,batch,probe_tag
<chr>,<chr>,<chr>
AAACAAGCACACCCACACTTTAGG-1,kidney_rep1,BC001
AAACAAGCATAACCAGACTTTAGG-1,kidney_rep1,BC001
AAACCAATCACCCTGGACTTTAGG-1,kidney_rep1,BC001
AAACCAATCATAACCTACTTTAGG-1,kidney_rep1,BC001
AAACCAATCGCAGGACACTTTAGG-1,kidney_rep1,BC001
AAACCAATCTTTAGCGACTTTAGG-1,kidney_rep1,BC001
AAACCAGGTCAACATCACTTTAGG-1,kidney_rep1,BC001
AAACCAGGTGGTTAAGACTTTAGG-1,kidney_rep1,BC001
AAACCAGGTTAGCGCCACTTTAGG-1,kidney_rep1,BC001
AAACCAGGTTCACCAGACTTTAGG-1,kidney_rep1,BC001


In [43]:
RNA_counts <- kidney_multi$`Gene Expression`

In [44]:
RNA_counts

  [[ suppressing 32 column names ‘AAACAAGCAAACAAGAAACGGGAA-1’, ‘AAACAAGCAAACAAGAACTTTAGG-1’, ‘AAACAAGCAAACAAGAAGTAGGCT-1’ ... ]]

  [[ suppressing 32 column names ‘AAACAAGCAAACAAGAAACGGGAA-1’, ‘AAACAAGCAAACAAGAACTTTAGG-1’, ‘AAACAAGCAAACAAGAAGTAGGCT-1’ ... ]]

  [[ suppressing 32 column names ‘AAACAAGCAAACAAGAAACGGGAA-1’, ‘AAACAAGCAAACAAGAACTTTAGG-1’, ‘AAACAAGCAAACAAGAAGTAGGCT-1’ ... ]]



37143 x 3700623 sparse Matrix of class "dgCMatrix"
                                                                                   
MIR1302-2HG  . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
FAM138A      . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
OR4F5        . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
AL627309.1   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
AL627309.3   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
AL627309.2   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
AL627309.5   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
AL627309.4   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
AP006222.2   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
AL732372.1   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
OR4F29       . . . . . . 

In [49]:
sum(metadata$barcode %in% colnames(RNA_counts))

In [50]:
RNA_subset_counts <- RNA_counts[, metadata$barcode]

In [51]:
process = "raw"

# save raw rna to mtx
data_path <- here(output_path, "kidney-CITE_seq-raw-RNA-counts.mtx")
write10xCounts(x = RNA_subset_counts, path = data_path, version = "3")

# save raw rna to rds
saveRDS(RNA_subset_counts, 
        file = here(output_path, "kidney-CITE_seq-raw-RNA-counts.rds"))

# Create Seurat Object
RNA_subset <- CreateSeuratObject(counts = RNA_subset_counts, meta.data = metadata)

# save Seurat to h5Seurat
SaveH5Seurat(RNA_subset, overwrite = TRUE, 
             filename = here(output_path, "kidney-CITE_seq-raw-RNA-counts.h5Seurat"))

# Convert h5Seurat to h5ad
setwd(output_path)
Convert(here(output_path, "kidney-CITE_seq-raw-RNA-counts.h5Seurat"), dest = "h5ad")

“Some cells in meta.data not present in provided counts matrix”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Creating h5Seurat file for version 3.1.5.9900

Adding counts for RNA

Adding data for RNA

No variable features found for RNA

No feature-level metadata found for RNA

Validating h5Seurat file

Adding data from RNA as X

Adding counts from RNA as raw

Transfering meta.data to obs



In [None]:
# ADT

In [54]:
ADT_counts <- kidney_multi$`Antibody Capture`

In [55]:
ADT_counts

  [[ suppressing 33 column names ‘AAACAAGCAAACAAGAAACGGGAA-1’, ‘AAACAAGCAAACAAGAACTTTAGG-1’, ‘AAACAAGCAAACAAGAAGTAGGCT-1’ ... ]]



32 x 3700623 sparse Matrix of class "dgCMatrix"
                                                                               
CD3_CD3E          . . . . . . . . . . . . . . . . 4 . 1 . . 1 1 . . . . . . . .
CD4_CD4           . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
CD8a_CD8A         . . . . . . . . . . . . . . . . 1 . . . . . . . . . . . . . 1
CD11c_ITGAX       . . . 1 . . . . . . . 1 . . 1 . 2 . . . . 1 . . . . . . . . .
CD14_CD14         . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
CD15_FUT4         1 . . . . 1 . . . . . 1 . . . . 2 . . . . . . . . . . . . . .
CD16_FCGR3A       . . . . . . . . . . . 1 . . . . . . . . . 1 . . . . . . . . .
CD19_CD19         . . . . . . . . . . . 3 . . . . 1 . . . . . . . . . . . . . .
CD127_IL7R        . . . . . . . . . . . . . . . . . . . . . 1 . . . . . . . . .
CD25_IL2RA        . . . . . . . . . . . . . . . . . . . . . . . . . . . 1 . . .
CD56_NCAM1        . . . . . . . . . . . . . . . . . . . . . . . . . . . 

In [74]:
ADT_subset_counts <- ADT_counts[, metadata$barcode]
rownames(ADT_subset_counts) <- sapply(strsplit(rownames(ADT_subset_counts),"_"), tail, 1)

In [75]:
ADT_subset_counts

  [[ suppressing 33 column names ‘AAACAAGCACACCCACACTTTAGG-1’, ‘AAACAAGCATAACCAGACTTTAGG-1’, ‘AAACCAATCACCCTGGACTTTAGG-1’ ... ]]



32 x 20974 sparse Matrix of class "dgCMatrix"
                                                                             
CD3E     32   37   70 50 254   29 754   43  35   6 1066  3 500   27  901   35
CD4      17   29   15  9   3   25 885   21   9   2    .  3   1   23    4  110
CD8A     27   27   38 29 250   34   7   42  19   4    2  1 249   37 1058    2
ITGAX   127  293   63 24   1   58  27   33  40 345   59  3   5  118   37 1764
CD14     20   25   17  6   .   10   4   12  14   1    .  1   2   18    4  219
FUT4    516   84  629 14   1   66   5 3510 453   2    3 74   2   92    2   16
FCGR3A    8   27   14  .   .    4   6    4   2   .   44  1   .   24    1    1
CD19     11   23    5  8   1    8   .   16   9   .    1  .   .    6    1    .
IL7R      9   19   19 10   1   23 217   18   6   .    8  1   .   25    4    8
IL2RA    13   42    7 20   1   16   3   37   8   2    .  .   .   18    .    2
NCAM1     3    4    1  4   .    2   2    3   3  13    6  .   .    .    1    .
CD45RO    7    4  

In [76]:
# save raw ADT to mtx
data_path <- here(output_path, "kidney-CITE_seq-raw-ADT-counts.mtx")
write10xCounts(x = ADT_subset_counts, path = data_path, version = "3")

# save raw ADT to rds
saveRDS(ADT_subset_counts, 
        file = here(output_path, "kidney-CITE_seq-raw-ADT-counts.rds"))

# Create Seurat Object
ADT_subset <- CreateSeuratObject(counts = ADT_subset_counts, meta.data = metadata)

# save Seurat to h5Seurat
SaveH5Seurat(ADT_subset, overwrite = TRUE, 
             filename = here(output_path, "kidney-CITE_seq-raw-ADT-counts.h5Seurat"))

# Convert h5Seurat to h5ad
setwd(output_path)
Convert(here(output_path, "kidney-CITE_seq-raw-ADT-counts.h5Seurat"), dest = "h5ad")

“Some cells in meta.data not present in provided counts matrix”
Creating h5Seurat file for version 3.1.5.9900

Adding counts for RNA

Adding data for RNA

No variable features found for RNA

No feature-level metadata found for RNA

Validating h5Seurat file

Adding data from RNA as X

Adding counts from RNA as raw

Transfering meta.data to obs

