Skip to content

Commit

Permalink
version 0.1.1
Browse files Browse the repository at this point in the history
  • Loading branch information
xiayh17 authored and cran-robot committed Sep 22, 2021
0 parents commit 5baba40
Show file tree
Hide file tree
Showing 22 changed files with 683 additions and 0 deletions.
36 changes: 36 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
Package: scRNAstat
Type: Package
Title: A Pipeline to Process Single Cell RNAseq Data
Version: 0.1.1
Authors@R: c(
person("Jianming","Zeng",
email = "jmzeng1314@163.com",
role = "aut"),
person(given = "Yonghe",
family = "Xia",
role = c("ctb", "cre"),
email = "xiayh17@gmail.com"),
person("Biotrainee group",role = c("cph","fnd"))
)
Maintainer: Yonghe Xia <xiayh17@gmail.com>
Description: A pipeline that can process single or multiple Single Cell RNAseq
samples primarily specializes in Clustering and Dimensionality Reduction.
Meanwhile we use common cell type marker genes for T cells, B cells, Myeloid cells,
Epithelial cells, and stromal cells (Fiboblast, Endothelial cells, Pericyte,
Smooth muscle cells) to visualize the Seurat clusters, to facilitate labeling
them by biological names. Once users named each cluster, they can evaluate the
quality of them again and find the de novo marker genes also.
License: AGPL (>= 3)
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.1.2
Depends: R (>= 2.10)
Imports: Seurat, ggplot2, stringr, clustree, magrittr, Matrix, dplyr,
patchwork
NeedsCompilation: no
Packaged: 2021-09-21 02:39:07 UTC; yonghe
Author: Jianming Zeng [aut],
Yonghe Xia [ctb, cre],
Biotrainee group [cph, fnd]
Repository: CRAN
Date/Publication: 2021-09-22 08:10:02 UTC
21 changes: 21 additions & 0 deletions MD5
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
31de7f08a555dd57527ca0b08a1cd48b *DESCRIPTION
1c9f0566fd793b5739bcd92627aa91ef *NAMESPACE
af2aeb9cf224fb53d4feea3a132af876 *R/AJ064_small_last_sce.R
e687f56fa7689a4b88df3b7b2de6e52c *R/AJ064_small_sce.R
b9285b03752abf3bb29868ef423d4e39 *R/basic_filter.R
c27cd23af10863e3092192ad60afb1ca *R/basic_find_markers.R
d86e792691e003cff21e14e6c070e0b5 *R/basic_markers.R
b803c51e36a9da420703bfc0a2ac6b0e *R/basic_qc.R
7aae5c29be2039bf68dff9194953f311 *R/basic_workflow.R
4f84b6a4364d620edfb9ac3a1ec59597 *R/global.R
b56ff2b193ecba52f9ec7f3b10cca008 *R/utils-pipe.R
f81552edb9b7d8ef56948d01f5db6749 *data/AJ064_small_last_sce.rda
099afff1e9a10d71f7a5bfdd58358fa1 *data/AJ064_small_sce.rda
b981eb1be99da48b154349b7dfb44a7d *man/AJ064_small_last_sce.Rd
60f901b91a7ce544c2a4d96fc1962a9b *man/AJ064_small_sce.Rd
9432069f95ceb1a1c69b88460715cc4c *man/basic_filter.Rd
6970f9a2c2446b445d0b27c48536d733 *man/basic_find_markers.Rd
62ac8723b7bc2f1995969a9e55aa2216 *man/basic_markers.Rd
8f5a9658d03955eb0026bef6bf991971 *man/basic_qc.Rd
71c7c629e288ba3a769eca273050fff5 *man/basic_workflow.Rd
774d9de8e95aa151215efcd304549d41 *man/pipe.Rd
38 changes: 38 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(basic_filter)
export(basic_find_markers)
export(basic_markers)
export(basic_qc)
export(basic_workflow)
import(Seurat)
import(clustree)
import(ggplot2)
import(patchwork)
importFrom(Matrix,rowSums)
importFrom(Seurat,DimPlot)
importFrom(Seurat,FeatureScatter)
importFrom(Seurat,FindClusters)
importFrom(Seurat,FindNeighbors)
importFrom(Seurat,FindVariableFeatures)
importFrom(Seurat,GetAssay)
importFrom(Seurat,NoLegend)
importFrom(Seurat,NormalizeData)
importFrom(Seurat,RunPCA)
importFrom(Seurat,RunTSNE)
importFrom(Seurat,RunUMAP)
importFrom(Seurat,ScaleData)
importFrom(Seurat,VariableFeatures)
importFrom(Seurat,VlnPlot)
importFrom(clustree,clustree)
importFrom(dplyr,group_by)
importFrom(dplyr,top_n)
importFrom(ggplot2,coord_flip)
importFrom(ggplot2,ggsave)
importFrom(ggplot2,scale_y_continuous)
importFrom(grDevices,dev.off)
importFrom(grDevices,pdf)
importFrom(magrittr,"%>%")
importFrom(stringr,str_to_title)
importFrom(utils,write.csv)
4 changes: 4 additions & 0 deletions R/AJ064_small_last_sce.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#' Small `AJ064` Seurat Data After Processed
#'
#' An object of class Seurat
"AJ064_small_last_sce"
4 changes: 4 additions & 0 deletions R/AJ064_small_sce.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#' Small `AJ064` Seurat Data Set
#'
#' An object of class Seurat
"AJ064_small_sce"
32 changes: 32 additions & 0 deletions R/basic_filter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#' basic_filter
#'
#' filter the genes which show expression less than 3 cells.
#' filter the cells which percent_mito < 25 & percent_ribo > 3 & percent_hb < 10
#' filter the cells which nFeature_RNA > 300 & nFeature_RNA < 8000
#'
#' @param sce An object of class Seurat
#'
#' @return sce.all.filt An object of class Seurat
#' @import Seurat
#' @importFrom Matrix rowSums
#' @export
#'
#' @examples
#' basic_filter(AJ064_small_sce)
#'
basic_filter <- function(sce){
if (!'percent_mito' %in% colnames(sce@meta.data)){
sce=basic_qc(sce,dir = tempdir())
}
selected_f <- rownames(sce)[Matrix::rowSums(sce@assays$RNA@counts > 0 ) > 3]
sce.all.filt <- subset(sce, features = selected_f)
sce.all.filt
sce.all.filt <- subset(sce.all.filt,
percent_mito < 25 & percent_ribo > 3 & percent_hb < 10)

sce.all.filt <- subset(sce.all.filt,
nFeature_RNA > 300 & nFeature_RNA < 8000 )
sce.all.filt

return(sce.all.filt)
}
58 changes: 58 additions & 0 deletions R/basic_find_markers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#' Basic Find Markers
#'
#' To find de `novo` markers by `FindAllMarkers` from Seurat with default setting.
#'
#' @param sce An object of class Seurat
#' @param group default:seurat_clusters, you can change it to celltype
#' @param dir path for saving results
#'
#' @return sce.markers a data.frame of markers.
#'
#' @import Seurat
#' @importFrom ggplot2 ggsave coord_flip
#' @importFrom utils write.csv
#' @importFrom dplyr group_by top_n
#' @export
#'
#' @examples
#' \donttest{
#' basic_find_markers(AJ064_small_last_sce,dir=tempdir())
#' }
basic_find_markers <- function(sce,group='seurat_clusters',dir='.'){

Seurat::Idents(sce)= sce@meta.data[,group]
message( table(Seurat::Idents(sce)) )
n=length(table(Seurat::Idents(sce)))
# library(future)
# plan("multiprocess", workers = 8)
sce.markers <- Seurat::FindAllMarkers(object = sce, only.pos = TRUE, min.pct = 0.25,
thresh.use = 0.25)
utils::write.csv(sce.markers,
file=file.path(dir,paste0(group,'_sce.markers.csv')))

top10 <- sce.markers %>% dplyr::group_by(cluster) %>% dplyr::top_n(10, avg_log2FC)
Seurat::DoHeatmap(sce,top10$gene,size=3)
ggplot2::ggsave(filename=file.path(dir,paste0(group,'_sce.markers_check_top10_heatmap.pdf')),
height = 15,width = 18)

p <- Seurat::DotPlot(sce, features = unique(top10$gene),
assay='RNA' ) + coord_flip()


ggplot2::ggsave(file.path(dir,paste0(group,'_DotPlot_check_top10_markers_by_clusters.pdf')),
height = 18)

top3 <- sce.markers %>% dplyr::group_by(cluster) %>% top_n(3, avg_log2FC)
DoHeatmap(sce,top3$gene,size=3)
ggplot2::ggsave(file.path(dir,paste0(group,'_DoHeatmap_check_top3_markers_by_clusters.pdf')))


p <- DotPlot(sce, features = unique(top3$gene),
assay='RNA' ) + coord_flip()


ggsave(file.path(dir,paste0(group,'_DotPlot_check_top3_markers_by_clusters.pdf')),height = 8)
save(sce.markers,file = file.path(dir,paste0(group,'_sce.markers.Rdata')))
return(sce.markers)

}
164 changes: 164 additions & 0 deletions R/basic_markers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
#' Basic Markers
#'
#' @param sce An object of class Seurat
#' @param org human or mouse, default: human
#' @param group default:`orig.ident`, you can change it to `seurat_clusters` or `celltype`
#' @param dir the path for saving the figures by `DotPlot` with known famous markers.
#'
#' @return a list of figures by `DotPlot`
#' @import ggplot2
#' @import Seurat
#' @import clustree
#' @importFrom stringr str_to_title
#' @importFrom grDevices pdf dev.off
#' @export
#'
#' @examples
#' \donttest{
#' basic_markers(AJ064_small_last_sce,dir=tempdir())
#' }
basic_markers <- function(sce,org='human',group='orig.ident',dir='.'){

# T Cells (CD3D, CD3E, CD8A),
# B cells (CD19, CD79A, MS4A1 [CD20]),
# Plasma cells (IGHG1, MZB1, SDC1, CD79A),
# Monocytes and macrophages (CD68, CD163, CD14),
# NK Cells (FGFBP2, FCG3RA, CX3CR1),
# Photoreceptor cells (RCVRN),
# Fibroblasts (FGF7, MME),
# Endothelial cells (PECAM1, VWF).
# epi or tumor (EPCAM, KRT19, PROM1, ALDH1A1, CD24).
# immune (CD45+,PTPRC), epithelial/cancer (EpCAM+,EPCAM),
# stromal (CD10+,MME,fibo or CD31+,PECAM1,endo)

all_markers =c('PTPRC', 'CD3D', 'CD3E', 'CD4','CD8A',
'CD19', 'CD79A', 'MS4A1' ,
'IGHG1', 'MZB1', 'SDC1',
'CD68', 'CD163', 'CD14',
'TPSAB1' , 'TPSB2', # mast cells,
'RCVRN','FPR1' , 'ITGAM' ,
'C1QA', 'C1QB', # mac
'S100A9', 'S100A8', 'MMP19',# monocyte
'LAMP3', 'IDO1','IDO2',## DC3
'CD1E','CD1C', # DC2
'KLRB1','NCR1', # NK
'FGF7','MME', 'ACTA2',
'PECAM1', 'VWF',
'EPCAM' , 'KRT19', 'PROM1', 'ALDH1A1' )



# TH17 cells (C-12, CCR6+ and RORC)
# type 1 helper T cells (TH1; C-17, CXCR3+ and IFNG and TBX21)
# heterogeneous continuum of intermediate TH1/TH17 states (C-13, C-16 and C-19)
# with varying degrees of CXCR3, CCR6, CCR5 and CD161 surface protein expression and RORC and TBX21 expression
# CD161+ subset of type 2 helper T (TH2) cells (C-14),
# described as pathogenic, with higher expression of allergy-associated HPGDS and IL17RB
# naive (LEF1, SELL, TCF7),
# effector (IFNG),
# cytotoxicity (GZMB, PRF1),
# early and general exhaustion (PDCD1, CTLA4, ENTPD1 ) .
# antigen presentation (CD74, HLA-DRB1/5, HLA-DQA2)
# FCGR3A (FCGR3A+ Monocyte), KLRF1 (NK), FCER1A (DC), and PF4 (MP/platelets).

Tcells_markers = c('PTPRC', 'CD3D', 'CD3E', 'CD4','CD8A',
'LEF1', 'SELL' , 'TCF7', # naive
'FOXP3',
'CCR6', 'RORC' , # TH17 cells
'TBX21', 'CXCR3', 'IFNG', # type 1 helper T cells
'CCR3','CCR4',
'PDCD1', 'CTLA4','ENTPD1', # early and general exhaustion
'GZMB', 'GZMK','PRF1', # cytotoxicity
'IFNG', 'CCL3' ,'CXCR6' , 'ITGA1', # effector
'NKG7','KLRF1','MKI67','PF4','FCER1A','FCGR3A')

# mast cells, TPSAB1 and TPSB2
# B cell, CD79A and MS4A1 (CD20)
# naive B cells, such as MS4A1 (CD20), CD19, CD22, TCL1A, and CD83,
# plasma B cells, such as CD38, TNFRSF17 (BCMA), and IGHG1/IGHG4
Bcells_markers = c('CD3D','MS4A1','CD79A',
'CD19', 'CD22', 'TCL1A', 'CD83', # naive B cells
'CD38','TNFRSF17','IGHG1','IGHG4', # plasma B cells,
'TPSAB1' , 'TPSB2', # mast cells,
'PTPRC' )


Myeloid_markers =c('CD68', 'CD163', 'CD14', 'CD86','C1QA', 'C1QB', # mac
'S100A9', 'S100A8', 'MMP19',# monocyte
'LAMP3', 'IDO1','IDO2',## DC3
'MRC1','MSR1','ITGAE','ITGAM','ITGAX','SIGLEC7',
'CD1E','CD1C', # DC2
'XCR1','CLEC9A','FCER1A',# DC1
'GZMB','TCF4','IRF7')

# epi or tumor (EPCAM, KRT19, PROM1, ALDH1A1, CD24).
# - alveolar type I cell (AT1; AGER+)
# - alveolar type II cell (AT2; SFTPA1)
# - secretory club cell (Club; SCGB1A1+)
# - basal airway epithelial cells (Basal; KRT17+)
# - ciliated airway epithelial cells (Ciliated; TPPP3+)

epi_markers = c( 'EPCAM' , 'KRT19', 'PROM1', 'ALDH1A1' ,
'AGER','SFTPA1','SCGB1A1','KRT17','TPPP3',
'KRT4','KRT14','KRT8','KRT18',
'CD3D','PTPRC' )


stromal_markers = c('TEK',"PTPRC","EPCAM","PDPN","PECAM1",'PDGFRB',
'CSPG4','GJB2', 'RGS5','ITGA7',
'ACTA2','RBP1','CD36', 'ADGRE5','COL11A1','FGF7', 'MME')



# basal (e.g., KRT5, ACTA2, MYLK, SNAI2),
# luminal progenitor (TNFRSF11A (RANK), KIT),
# mature luminal cells (ESR1, PGR, FOXA1)

brca_markers = c( 'EPCAM' , 'KRT19', 'PROM1', 'ALDH1A1' ,
'KRT5', 'ACTA2', 'MYLK', 'SNAI2', # basal
'RANK', 'KIT', # luminal progenitor
'ESR1', 'PGR', 'FOXA1',# mature luminal cells
'KRT4','KRT14','KRT8','KRT18',
'CD3D','PTPRC' )
if(org=='human'){
}else if(org=='mouse'){
all_markers=str_to_title(all_markers)
Tcells_markers=str_to_title(Tcells_markers)
Bcells_markers=str_to_title(Bcells_markers)
Myeloid_markers=str_to_title(Myeloid_markers)
epi_markers=str_to_title(epi_markers)
stromal_markers=str_to_title(stromal_markers)
brca_markers=str_to_title(brca_markers)
} else {
stop('So far, we only accept human and mouse')
}
genes_to_check=list(
all_markers=unique(all_markers),
Tcells_markers=unique(Tcells_markers),
Bcells_markers=unique(Bcells_markers),
Myeloid_markers=unique(Myeloid_markers),
epi_markers=unique(epi_markers),
stromal_markers=unique(stromal_markers),
brca_markers=unique(brca_markers)
)
# dpl: dotplot, list
dpl <- lapply(genes_to_check, function(cg){
DotPlot(sce, assay = "RNA",
features = cg,
group.by = group

) + coord_flip()
})

pdf(file.path(dir,
paste0( 'markers_based_on_',group,'.pdf')))
lapply(1:length(genes_to_check), function(i){
p=names(genes_to_check)[i]
print(dpl[[i]]+ggtitle(p))
})
dev.off()
return(dpl)

}


Loading

0 comments on commit 5baba40

Please sign in to comment.