# Create subsets of a Seurat object
**Authorship:** Adam Klie (last updated: 10/04/2023)<br>
***
**Description:**
Adapted from Mei's notebook for doing this

In [17]:
suppressMessages(library(Seurat))
suppressMessages(library(Signac))
source(here::here("/cellar/users/aklie/opt/igvf-ucsd/single_cell_utilities/data_wrangling", 'io.R'))

In [21]:
seurat_dir <- "/cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/annotation/25Aug23/seurat"
seurat_file <- file.path(seurat_dir, "25Aug23_new_mpeak_all.cells.rds")

In [22]:
adata <- readRDS(seurat_file)
adata

An object of class Seurat 
598905 features across 83289 samples within 4 assays 
Active assay: mpeak (262611 features, 0 variable features)
 3 other assays present: RNA, SCT, mpeak_27Aug23
 6 dimensional reductions calculated: pca, harmony, lsi, atac.harmony, umap.wnn, sct.harmony

# DM041_control
DM0B, DM25A, DM35A, DM45A

## Seurat object

In [23]:
# Subset the Seurat object to include only the cells that have "sample" in DM0B, DM25A, DM35A, DM45A
adata_dm041_control <- subset(adata, subset = sample %in% c("DM0B", "DM25A", "DM35A", "DM45A"))

In [24]:
# Double check the remaining samples
table(adata_dm041_control$sample)


 DM0B DM25A DM35A DM45A 
 4000  5182  3156  3254 

In [25]:
# Save this subsetted Seurat object
saveRDS(adata_dm041_control, file.path(seurat_dir, "25Aug23_dm041_control.cells.rds"))

## RNA matrix, barcodes, and features

In [26]:
# Let's round the counts
soupx_cnts <- adata_dm041_control@assays$RNA@counts
rounded_soupx_cnts <- round(soupx_cnts)
rounded_soupx_cnts[1:15, 1:15]

  [[ suppressing 15 column names ‘DM00_AAACAGCCATGAGTTT-1’, ‘DM00_AAACAGCCATGTTGCA-1’, ‘DM00_AAACAGCCATTGTGAT-1’ ... ]]



15 x 15 sparse Matrix of class "dgCMatrix"
                                         
MIR1302-2HG . . . . . . . . . . . . . . .
FAM138A     . . . . . . . . . . . . . . .
OR4F5       . . . . . . . . . . . . . . .
AL627309.1  . . . . . . . . . . . . . . .
AL627309.3  . . . . . . . . . . . . . . .
AL627309.2  . . . . . . . . . . . . . . .
AL627309.5  . . . . . . . . . . . . . 0 .
AL627309.4  . . . . . . . . . . . . . . .
AP006222.2  . . . . . . . . . . . . . . .
AL732372.1  . . . . . . . . . . . . . . .
OR4F29      . . . . . . . . . . . . . . .
AC114498.1  . . . . . . . . . . . . . . .
OR4F16      . . . . . . . . . . . . . . .
AL669831.2  . . . . . . . . . . . . . . .
LINC01409   . . . . . . . . . . . . . 1 .

In [27]:
# Using v3 CellRanger as an output, create the inputs for platinum
write10xCounts(x=rounded_soupx_cnts, path=file.path(seurat_dir, "RNA_25Aug23_dm041_control"), version="3")

In [31]:
# Write the csv file out for the metadata
write.csv(x=adata_dm041_control@meta.data, file=file.path(seurat_dir, "RNA_25Aug23_dm041_control", "metadata.csv"), quote=FALSE)

## ATAC matrix, barcodes, and features

In [32]:
write10xCounts(x=adata_dm041_control@assays$mpeak_27Aug23@counts,  path=file.path(seurat_dir, "ATAC_25Aug23_dm041_control"), gene.type="Peaks", version="3")

## Fragment files

# DM017_control
MO1, MO3, MO14, MO22, MO29, MO38

In [33]:
# Subset the Seurat object to include only the cells that have "sample" in MO1, MO3, MO14, MO22, MO29, MO38
adata_dm017_control <- subset(adata, subset = sample %in% c("MO1", "MO3", "MO14", "MO22", "MO29", "MO38"))

In [34]:
# Double check the remaining samples
table(adata_dm017_control$sample)


 MO1 MO14 MO22 MO29  MO3 MO38 
1571 1198 2092  746 1163 3727 

In [35]:
# Save this subsetted Seurat object
saveRDS(adata_dm017_control, file.path(seurat_dir, "25Aug23_dm017_control.cells.rds"))

## RNA matrix, barcodes, and features

In [36]:
# Let's round the counts
soupx_cnts <- adata_dm017_control@assays$RNA@counts
rounded_soupx_cnts <- round(soupx_cnts)
rounded_soupx_cnts[1:15, 1:15]

  [[ suppressing 15 column names ‘MO1_AAACAGCCAGCAATAA-1’, ‘MO1_AAACCAACAACCGCCA-1’, ‘MO1_AAACCGCGTATTGTGG-1’ ... ]]



15 x 15 sparse Matrix of class "dgCMatrix"
                                         
MIR1302-2HG . . . . . . . . . . . . . . .
FAM138A     . . . . . . . . . . . . . . .
OR4F5       . . . . . . . . . . . . . . .
AL627309.1  . . . . . . . . . 1 . . . . .
AL627309.3  . . . . . . . . . . . . . . .
AL627309.2  . . . . . . . . . . . . . . .
AL627309.5  1 . . . . . . 1 . . . . . . .
AL627309.4  . . . . . . . . . . . . . . .
AP006222.2  . . . . . . . . . . . . . . .
AL732372.1  . . . . . . . . . . . . . . .
OR4F29      . . . . . . . . . . . . . . .
AC114498.1  . . . . . . . . . . . . . . .
OR4F16      . . . . . . . . . . . . . . .
AL669831.2  . . . . . . . . . . . . . . .
LINC01409   . . . 1 . . . . . . . . . . .

In [37]:
# Using v3 CellRanger as an output, create the inputs for platinum
write10xCounts(x=rounded_soupx_cnts, path=file.path(seurat_dir, "RNA_25Aug23_dm017_control"), version="3")

In [None]:
# Write the csv file out for the metadata
write.csv(x=adata_dm017_control@meta.data, file=file.path(seurat_dir, "RNA_25Aug23_dm017_control", "metadata.csv"), quote=FALSE)

## ATAC matrix, barcodes, and features

In [None]:
write10xCounts(x=adata_dm017_control@assays$mpeak_27Aug23@counts,  path=file.path(seurat_dir, "ATAC_25Aug23_dm017_control"), gene.type="Peaks", version="3")

## Fragment files