-
Notifications
You must be signed in to change notification settings - Fork 5
/
PBMC.rmd
50 lines (50 loc) · 2.71 KB
/
PBMC.rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
---
title: "Cell type identification improvement"
output: html_notebook
---
### Setup knitr and load utility functions
```{r setup}
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_knit$set(root.dir="E:/DISC/reproducibility")
```
```{r}
utilities_path = "./source/utilities.r"
source(utilities_path)
```
### Filter cells as Seurat tutorial
Following <a href="https://satijalab.org/seurat/v3.0/pbmc3k_tutorial.html">Seurat tutorial</a>, we download the original PBMC_3k data and perform cell filtering.</br>
The original PBMC data can be found at https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.1.0/frozen_pbmc_donor_a (the original page of 10X) and https://s3-us-west-2.amazonaws.com/10x.files/samples/cell/pbmc3k/pbmc3k_filtered_gene_bc_matrices.tar.gz (the link in Seurat tutorial).
We cached this dataset in our github, which can be found at https://github.com/iyhaoo/DISC_data_availability/tree/master/PBMC/filtered_gene_bc_matrices/hg19.
```{r}
if(!file.exists("./data/PBMC/raw.loom")){
temp <- tempfile()
download.file("http://cf.10xgenomics.com/samples/cell-exp/1.1.0/frozen_pbmc_donor_a/frozen_pbmc_donor_a_filtered_gene_bc_matrices.tar.gz", temp)
exdir = "./data/PBMC"
untar(temp, exdir = exdir)
unlink(temp)
original_data <- Read10X(data.dir = paste0(exdir, "/filtered_gene_bc_matrices/hg19/"))
save_h5("./data/PBMC/original.loom", as.matrix(t(original_data)))
seurat_obj <- CreateSeuratObject(counts = as.data.frame(original_data), min.cells = 3, min.features = 200)
seurat_obj[["percent.mt"]] <- PercentageFeatureSet(seurat_obj, pattern = "^MT-")
seurat_obj <- subset(seurat_obj, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
keep_cell_names = colnames(seurat_obj[["RNA"]]@counts)
keep_gene_names = rownames(seurat_obj[["RNA"]]@counts)
saveRDS(keep_gene_names, "./data/PBMC/used_gene_names.rds")
raw_data = as.matrix(original_data[, keep_cell_names])
save_h5("./data/PBMC/raw.loom", t(raw_data))
}else{
raw_data = readh5_loom("./data/PBMC/raw.loom")
keep_gene_names = readRDS("./data/PBMC/used_gene_names.rds")
}
```
### Identify cells as Seurat tutorial
Following <a href="https://satijalab.org/seurat/v3.0/pbmc3k_tutorial.html">Seurat tutorial</a>, we identify cells.
```{r fig.height=8, fig.width=11}
if(!file.exists("./data/PBMC/cell_type.rds")){
cell_type_identification = seurat_classification(gene_bc_mat = raw_data[gsub(pattern = "_", replacement = "-", rownames(raw_data)) %in% keep_gene_names, ], pca_dim = 10, res = 0.5, min_pct = 0.25, show_plots = T, cell_type_identification_fun = cell_type_identification_pbmc)
cell_type= cell_type_identification$assignment
saveRDS(cell_type, "./data/PBMC/cell_type.rds")
}else{
cell_type = readRDS("./data/PBMC/cell_type.rds")
}
```