In [2]:
args <- commandArgs(T) 

In [3]:
library(here)
library(rjson)
library(Matrix)
library(tidyverse)
library(tidyr)
library(dplyr)
library(DropletUtils) 

library(Seurat)
library(Signac)
library(SeuratDisk)

library(rhdf5)
library(anndata)

# convert H5Seurat

## metadata

In [4]:
input_path <- "/home/wsg/BM/data/10x_mouse_brain/RawData"
output_path <- "/home/wsg/BM/data/10x_mouse_brain/RNA+ATAC/2p5"

In [5]:
brain_multi <- Read10X_h5(file = here(input_path, "Multiome_RNA_ATAC_Mouse_Brain_Alzheimers_AppNote_filtered_feature_bc_matrix.h5"))

Genome matrix has multiple modalities, returning a list of matrices for this genome



In [9]:
column_names <- colnames(RNA_counts)
column_names_split <- strsplit(column_names, "-")

In [10]:
metadata <- data.frame(barcode = column_names, 
                       index = sapply(column_names_split, tail, 1))
match_list <- c('1' = 'AD_17p9_rep4', 
                '2' = 'AD_17p9_rep5', 
                '3' = 'AD_2p5_rep2',
                '4' = 'AD_2p5_rep3', 
                '5' = 'AD_5p7_rep2', 
                '6' = 'AD_5p7_rep6',
                '7' = 'WT_13p4_rep2', 
                '8' = 'WT_13p4_rep5', 
                '9' = 'WT_2p5_rep2',
                '10' = 'WT_2p5_rep7', 
                '11' = 'WT_5p7_rep2', 
                '12' = 'WT_5p7_rep3')
metadata$sample <- match_list[as.character(metadata$index)]

In [11]:
metadata$separate <- metadata$sample
metadata <- metadata %>%
    separate(col = separate, into = c("type", "time", "rep"), sep = "_")
metadata

barcode,index,sample,type,time,rep
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
AAACAGCCAAACTAAG-8,8,WT_13p4_rep5,WT,13p4,rep5
AAACAGCCAAGCGAGC-10,10,WT_2p5_rep7,WT,2p5,rep7
AAACAGCCAAGCGATG-9,9,WT_2p5_rep2,WT,2p5,rep2
AAACAGCCAATAACCT-11,11,WT_5p7_rep2,WT,5p7,rep2
AAACAGCCAATGCCCG-8,8,WT_13p4_rep5,WT,13p4,rep5
AAACAGCCAATTGAGA-9,9,WT_2p5_rep2,WT,2p5,rep2
AAACAGCCACAGGGAC-5,5,AD_5p7_rep2,AD,5p7,rep2
AAACAGCCACCTACGG-7,7,WT_13p4_rep2,WT,13p4,rep2
AAACAGCCAGCACGAA-8,8,WT_13p4_rep5,WT,13p4,rep5
AAACAGCCAGCACGTT-7,7,WT_13p4_rep2,WT,13p4,rep2


In [14]:
metadata_2p5 <- metadata[which(metadata$time == "2p5"),]
rownames(metadata_2p5) <- metadata_2p5$barcode
metadata_2p5

Unnamed: 0_level_0,barcode,index,sample,type,time,rep
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
AAACAGCCAAGCGAGC-10,AAACAGCCAAGCGAGC-10,10,WT_2p5_rep7,WT,2p5,rep7
AAACAGCCAAGCGATG-9,AAACAGCCAAGCGATG-9,9,WT_2p5_rep2,WT,2p5,rep2
AAACAGCCAATTGAGA-9,AAACAGCCAATTGAGA-9,9,WT_2p5_rep2,WT,2p5,rep2
AAACAGCCAGCATTAT-3,AAACAGCCAGCATTAT-3,3,AD_2p5_rep2,AD,2p5,rep2
AAACAGCCATTGACAT-3,AAACAGCCATTGACAT-3,3,AD_2p5_rep2,AD,2p5,rep2
AAACAGCCATTGCAGC-4,AAACAGCCATTGCAGC-4,4,AD_2p5_rep3,AD,2p5,rep3
AAACATGCAATAGCAA-9,AAACATGCAATAGCAA-9,9,WT_2p5_rep2,WT,2p5,rep2
AAACATGCAGAGGGAG-4,AAACATGCAGAGGGAG-4,4,AD_2p5_rep3,AD,2p5,rep3
AAACATGCATAATCCG-10,AAACATGCATAATCCG-10,10,WT_2p5_rep7,WT,2p5,rep7
AAACCAACAACCCTAA-10,AAACCAACAACCCTAA-10,10,WT_2p5_rep7,WT,2p5,rep7


In [14]:
# metadata
metadata_2p5
write_csv(metadata_2p5, here(output_path, "metadata.csv"))

Unnamed: 0_level_0,barcode,index,sample,type,time,rep
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
AAACAGCCAAGCGAGC-10,AAACAGCCAAGCGAGC-10,10,WT_2p5_rep7,WT,2p5,rep7
AAACAGCCAAGCGATG-9,AAACAGCCAAGCGATG-9,9,WT_2p5_rep2,WT,2p5,rep2
AAACAGCCAATTGAGA-9,AAACAGCCAATTGAGA-9,9,WT_2p5_rep2,WT,2p5,rep2
AAACAGCCAGCATTAT-3,AAACAGCCAGCATTAT-3,3,AD_2p5_rep2,AD,2p5,rep2
AAACAGCCATTGACAT-3,AAACAGCCATTGACAT-3,3,AD_2p5_rep2,AD,2p5,rep2
AAACAGCCATTGCAGC-4,AAACAGCCATTGCAGC-4,4,AD_2p5_rep3,AD,2p5,rep3
AAACATGCAATAGCAA-9,AAACATGCAATAGCAA-9,9,WT_2p5_rep2,WT,2p5,rep2
AAACATGCAGAGGGAG-4,AAACATGCAGAGGGAG-4,4,AD_2p5_rep3,AD,2p5,rep3
AAACATGCATAATCCG-10,AAACATGCATAATCCG-10,10,WT_2p5_rep7,WT,2p5,rep7
AAACCAACAACCCTAA-10,AAACCAACAACCCTAA-10,10,WT_2p5_rep7,WT,2p5,rep7


ERROR: Error: Cannot open file for writing:
* '/home/wsg/BM/data/10x_mouse_brain/RNA+ATAC/RawData/metadata.csv'


## RNA

In [7]:
RNA_counts <- brain_multi$`Gene Expression`

In [8]:
RNA_counts

  [[ suppressing 32 column names ‘AAACAGCCAAACTAAG-8’, ‘AAACAGCCAAGCGAGC-10’, ‘AAACAGCCAAGCGATG-9’ ... ]]

  [[ suppressing 32 column names ‘AAACAGCCAAACTAAG-8’, ‘AAACAGCCAAGCGAGC-10’, ‘AAACAGCCAAGCGATG-9’ ... ]]

  [[ suppressing 32 column names ‘AAACAGCCAAACTAAG-8’, ‘AAACAGCCAAGCGAGC-10’, ‘AAACAGCCAAGCGATG-9’ ... ]]



32286 x 33459 sparse Matrix of class "dgCMatrix"
                                                                                             
Xkr4          12 17  26 19  21 .  21  7 12  .  4 .   9 31  19  40 169  . 1  24 .  . . .  .  .
Gm1992         .  .   .  .   . .   1  .  .  .  . .   .  1   .   1   .  . .   . .  . . .  .  .
Gm19938        .  1   .  .   . .   1  .  .  .  . .   1  1   .   3   3  . .   . .  . . .  .  .
Gm37381        .  .   .  .   . .   .  .  .  .  . .   .  .   .   .   .  . .   . .  . . .  .  .
Rp1            .  .   .  .   . .   .  .  .  .  . .   .  .   .   .   .  . .   . .  . . .  .  .
Sox17          .  .   .  .   . .   .  .  .  .  . .   .  .   .   .   .  . .   . .  . . .  .  .
Gm37587        .  .   .  .   . .   .  .  .  .  . .   .  .   .   .   .  . .   . .  . . .  .  .
Gm37323        .  .   .  .   . .   .  .  .  .  . .   .  .   .   .   .  . .   . .  . . .  .  .
Mrpl15         1  .   .  .   2 .   1  .  .  .  . .   .  .   1   1   1  . .   . .  . . .  .  .
Lypla1     

In [15]:
sum(metadata_2p5$barcode %in% colnames(RNA_counts))

In [41]:
RNA_subset_counts <- RNA_counts[, metadata_2p5$barcode]

In [42]:
RNA_subset_counts

  [[ suppressing 32 column names ‘AAACAGCCAAGCGAGC-10’, ‘AAACAGCCAAGCGATG-9’, ‘AAACAGCCAATTGAGA-9’ ... ]]

  [[ suppressing 32 column names ‘AAACAGCCAAGCGAGC-10’, ‘AAACAGCCAAGCGATG-9’, ‘AAACAGCCAATTGAGA-9’ ... ]]

  [[ suppressing 32 column names ‘AAACAGCCAAGCGAGC-10’, ‘AAACAGCCAAGCGATG-9’, ‘AAACAGCCAATTGAGA-9’ ... ]]



32286 x 12138 sparse Matrix of class "dgCMatrix"
                                                                                             
Xkr4          17  26 . . 169  . . .  .  24  . . .  5 3  .  .  2 21  2 23  43  6  7  . 10 .  .
Gm1992         .   . . .   .  . . .  .   2  . . .  . .  .  .  .  .  .  .   .  .  .  .  . .  .
Gm19938        1   . . .   3  . . .  .   .  . . .  . .  .  .  .  1  .  1   1  .  .  .  . .  .
Gm37381        .   . . .   .  . . .  .   .  . . .  . .  .  .  .  .  .  .   .  .  .  .  . .  .
Rp1            .   . . .   .  . . .  .   .  . . .  . .  .  .  .  .  .  .   .  .  .  .  . .  .
Sox17          .   . . .   .  . . .  .   .  . . .  . .  .  .  .  .  .  .   .  .  .  .  . .  .
Gm37587        .   . . .   .  . . .  .   .  . . .  . .  .  .  .  .  .  .   .  .  .  .  . .  .
Gm37323        .   . . .   .  . . .  .   .  . . .  . .  .  .  .  .  .  .   .  .  .  .  . .  .
Mrpl15         .   . . .   1  . . .  .   .  . . .  . .  .  .  .  .  .  .   1  .  .  .  . .  .
Lypla1     

In [49]:
process = "2p5"

# save 2p5 rna to mtx
data_path <- here(output_path, "brain-multiome-2p5-RNA-counts.mtx")
write10xCounts(x = RNA_subset_counts, path = data_path, version = "3")

# save 2p5 rna to rds
saveRDS(RNA_subset_counts, 
        file = here(output_path, "brain-multiome-2p5-RNA-counts.rds"))

# Create Seurat Object
RNA_subset <- CreateSeuratObject(counts = RNA_subset_counts, meta.data = metadata_2p5)

# save Seurat to h5Seurat
SaveH5Seurat(RNA_subset, overwrite = TRUE, 
             filename = here(output_path, "brain-multiome-2p5-RNA-counts.h5Seurat"))

# Convert h5Seurat to h5ad
setwd(output_path)
Convert(here(output_path, "brain-multiome-2p5-RNA-counts.h5Seurat"), dest = "h5ad")

Creating h5Seurat file for version 3.1.5.9900

Adding counts for RNA

Adding data for RNA

No variable features found for RNA

No feature-level metadata found for RNA

Validating h5Seurat file

Adding data from RNA as X

Adding counts from RNA as raw

Transfering meta.data to obs



## ATAC

In [15]:
ATAC_counts <- brain_multi$Peaks

In [16]:
ATAC_counts_rename <- ATAC_counts
rownames(ATAC_counts_rename) <- gsub(rownames(ATAC_counts_rename), pattern = ":", replacement = "-")

In [17]:
ATAC_subset_counts <- ATAC_counts_rename[, metadata_2p5$barcode]

In [28]:
rows_to_keep <- !grepl("^GRCh38_chr21", rownames(ATAC_subset_counts))
ATAC_subset_counts <- ATAC_subset_counts[rows_to_keep, ]

In [32]:
ATAC_subset_counts
# tail(ATAC_subset_counts)

  [[ suppressing 32 column names ‘AAACAGCCAAGCGAGC-10’, ‘AAACAGCCAAGCGATG-9’, ‘AAACAGCCAATTGAGA-9’ ... ]]

  [[ suppressing 32 column names ‘AAACAGCCAAGCGAGC-10’, ‘AAACAGCCAAGCGATG-9’, ‘AAACAGCCAATTGAGA-9’ ... ]]

  [[ suppressing 32 column names ‘AAACAGCCAAGCGAGC-10’, ‘AAACAGCCAAGCGATG-9’, ‘AAACAGCCAATTGAGA-9’ ... ]]



66909 x 12138 sparse Matrix of class "dgCMatrix"
                                                                                             
chr1-3119557-3120226   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
chr1-3120334-3120722   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
chr1-3292408-3293249   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
chr1-3309766-3310590   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
chr1-3311241-3312081   . . . . . . . . . . . . . . . . . . . . . 1 . . . . . . . . . . ......
chr1-3391754-3392593   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
chr1-3398654-3399381   . . . . . . . . . . . . . . . . . . . . . . 2 . . . . . . . . . ......
chr1-3399768-3400563   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
chr1-3405700-3406587   . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ......
chr1-340711

In [33]:
# save 2p5 ATAC to mtx
data_path <- here(output_path, "brain-multiome-2p5-ATAC-peaks.mtx")
write10xCounts(x = ATAC_subset_counts, path = data_path, version = "3")

# save 2p5 ATAC to rds
saveRDS(ATAC_subset_counts, 
        file = here(output_path, "brain-multiome-2p5-ATAC-peaks.rds"))

# Create Seurat Object
ATAC_subset <- CreateSeuratObject(counts = ATAC_subset_counts, meta.data = metadata_2p5)

# save Seurat to h5Seurat
SaveH5Seurat(ATAC_subset, overwrite = TRUE, 
             filename = here(output_path, "brain-multiome-2p5-ATAC-peaks.h5Seurat"))

# Convert h5Seurat to h5ad
setwd(output_path)
Convert(here(output_path, "brain-multiome-2p5-ATAC-peaks.h5Seurat"), dest = "h5ad")

Creating h5Seurat file for version 3.1.5.9900

Adding counts for RNA

Adding data for RNA

No variable features found for RNA

No feature-level metadata found for RNA

Validating h5Seurat file

Adding data from RNA as X

Adding counts from RNA as raw

Transfering meta.data to obs

