Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Andysheh Mohajeri
authored and
Andysheh Mohajeri
committed
Oct 3, 2017
1 parent
4fab0d7
commit c4288a5
Showing
51 changed files
with
2,728 additions
and
935 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
library(monocle) | ||
library(HSMMSingleCell) | ||
context("detectGenes functions properly") | ||
|
||
data(HSMM_expr_matrix) | ||
data(HSMM_gene_annotation) | ||
data(HSMM_sample_sheet) | ||
|
||
pd <- new("AnnotatedDataFrame", data = HSMM_sample_sheet) | ||
fd <- new("AnnotatedDataFrame", data = HSMM_gene_annotation) | ||
HSMM <- newCellDataSet(as.matrix(HSMM_expr_matrix), | ||
phenoData = pd, | ||
featureData = fd, | ||
lowerDetectionLimit = 0.1, | ||
expressionFamily = tobit(Lower = 0.1)) | ||
|
||
HSMM <- estimateSizeFactors(HSMM) | ||
HSMM <- estimateDispersions(HSMM) | ||
HSMM <- detectGenes(HSMM, min_expr = 0.1) | ||
HSMM <- HSMM[,pData(HSMM)$Total_mRNAs < 1e6] | ||
upper_bound <- 10^(mean(log10(pData(HSMM)$Total_mRNAs)) + 2*sd(log10(pData(HSMM)$Total_mRNAs))) | ||
lower_bound <- 10^(mean(log10(pData(HSMM)$Total_mRNAs)) - 2*sd(log10(pData(HSMM)$Total_mRNAs))) | ||
HSMM <- HSMM[,pData(HSMM)$Total_mRNAs > lower_bound & pData(HSMM)$Total_mRNAs < upper_bound] | ||
cth <- newCellTypeHierarchy() | ||
|
||
MYF5_id <- row.names(subset(fData(HSMM), gene_short_name == "MYF5")) | ||
|
||
#write test code for this: | ||
|
||
test_that("test addCellType works properly", { | ||
expect_error(cth <- addCellType(cth, "Myoblast", classify_func = function(x) {x[MYF5_id,] >= 1}), NA) | ||
}) | ||
|
||
test_that("test addCellType throws error when same cell name is used", { | ||
cth <- addCellType(cth, "Myoblast1", classify_func = function(x) {x[MYF5_id,] >= 1}) | ||
expect_error(cth <- addCellType(cth, "Myoblast1", classify_func = function(x) {x[MYF5_id,] >= 1}), NA) | ||
}) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
library(monocle) | ||
library(HSMMSingleCell) | ||
context("calculateMarkerSpecificity is functioning properly") | ||
|
||
pd <- new("AnnotatedDataFrame", data = HSMM_sample_sheet) | ||
fd <- new("AnnotatedDataFrame", data = HSMM_gene_annotation) | ||
HSMM <- newCellDataSet(as.matrix(HSMM_expr_matrix), phenoData = pd, featureData = fd) | ||
|
||
HSMM <- newCellDataSet(as(umi_matrix, "sparseMatrix"), | ||
phenoData = pd, | ||
featureData = fd, | ||
lowerDetectionLimit = 0.5, | ||
expressionFamily = negbinomial.size()) | ||
|
||
cellranger_pipestance_path <- "/path/to/your/pipeline/output/directory" | ||
gbm <- load_cellranger_matrix(cellranger_pipestance_path) | ||
gbm_cds <- newCellDataSet(exprs(gbm), | ||
phenoData = new("AnnotatedDataFrame", data = pData(gbm)), | ||
phenoData = new("AnnotatedDataFrame", data = fData(gbm)), | ||
lowerDetectionLimit = 0.5, | ||
expressionFamily = negbinomial.size()) | ||
|
||
HSMM <- detectGenes(HSMM, min_expr = 0.1) | ||
expressed_genes <- row.names(subset(fData(HSMM), num_cells_expressed >= 10)) | ||
valid_cells <- row.names(subset(pData(HSMM), | ||
Cells.in.Well == 1 & | ||
Control == FALSE & | ||
Clump == FALSE & | ||
Debris == FALSE & | ||
Mapped.Fragments > 1000000)) | ||
HSMM <- HSMM[,valid_cells] | ||
|
||
pData(HSMM)$Total_mRNAs <- Matrix::colSums(exprs(HSMM)) | ||
|
||
HSMM <- HSMM[,pData(HSMM)$Total_mRNAs < 1e6] | ||
|
||
upper_bound <- 10^(mean(log10(pData(HSMM)$Total_mRNAs)) + | ||
2*sd(log10(pData(HSMM)$Total_mRNAs))) | ||
lower_bound <- 10^(mean(log10(pData(HSMM)$Total_mRNAs)) - | ||
2*sd(log10(pData(HSMM)$Total_mRNAs))) | ||
|
||
HSMM <- HSMM[,pData(HSMM)$Total_mRNAs > lower_bound & | ||
pData(HSMM)$Total_mRNAs < upper_bound] | ||
HSMM <- detectGenes(HSMM, min_expr = 0.1) | ||
|
||
# Log-transform each value in the expression matrix. | ||
L <- log(exprs(HSMM[expressed_genes,])) | ||
|
||
# Standardize each gene, so that they are all on the same scale, | ||
# Then melt the data with plyr so we can plot it easily | ||
melted_dens_df <- melt(Matrix::t(scale(Matrix::t(L)))) | ||
|
||
# Plot the distribution of the standardized gene expression values. | ||
qplot(value, geom = "density", data = melted_dens_df) + | ||
stat_function(fun = dnorm, size = 0.5, color = 'red') + | ||
xlab("Standardized log(FPKM)") + | ||
ylab("Density") | ||
|
||
MYF5_id <- row.names(subset(fData(HSMM), gene_short_name == "MYF5")) | ||
ANPEP_id <- row.names(subset(fData(HSMM), gene_short_name == "ANPEP")) | ||
|
||
cth <- newCellTypeHierarchy() | ||
cth <- addCellType(cth, "Myoblast", classify_func = function(x) { x[MYF5_id,] >= 1 }) | ||
cth <- addCellType(cth, "Fibroblast", classify_func = function(x) | ||
{ x[MYF5_id,] < 1 & x[ANPEP_id,] > 1 }) | ||
|
||
HSMM <- classifyCells(HSMM, cth, 0.1) | ||
marker_diff <- markerDiffTable(HSMM[expressed_genes,], | ||
cth, | ||
residualModelFormulaStr = "~Media + num_genes_expressed", | ||
cores = 1) | ||
candidate_clustering_genes <- row.names(subset(marker_diff, qval < 0.01)) | ||
|
||
test_that("calculateMarkerSpecificity functions properly in vignette", | ||
expect_error(calculateMarkerSpecificity(HSMM[candidate_clustering_genes,], cth), NA) | ||
) | ||
|
||
test_that("calculateMarkerSpecificity throws error if cds is not of type CellDataSet", | ||
expect_error(calculateMarkerSpecificity(cth, cth), NA) | ||
) | ||
|
||
test_that("calculateMarkerSpecificity throws error if cth is not of type CellTypeHierarchy", | ||
expect_error(calculateMarkerSpecificity(HSMM[candidate_clustering_genes,], HSMM[candidate_clustering_genes,]), NA) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
library(monocle) | ||
library(HSMMSingleCell) | ||
context("classifyCells functions properly") | ||
|
||
data(HSMM_expr_matrix) | ||
data(HSMM_gene_annotation) | ||
data(HSMM_sample_sheet) | ||
|
||
pd <- new("AnnotatedDataFrame", data = HSMM_sample_sheet) | ||
fd <- new("AnnotatedDataFrame", data = HSMM_gene_annotation) | ||
HSMM <- newCellDataSet(as.matrix(HSMM_expr_matrix), | ||
phenoData = pd, | ||
featureData = fd, | ||
lowerDetectionLimit=0.1, | ||
expressionFamily=tobit(Lower=0.1)) | ||
|
||
|
||
rpc_matrix <- relative2abs(HSMM, method = "num_genes") | ||
|
||
|
||
HSMM <- newCellDataSet(as(as.matrix(rpc_matrix), "sparseMatrix"), | ||
phenoData = pd, | ||
featureData = fd, | ||
lowerDetectionLimit=0.5, | ||
expressionFamily=negbinomial.size()) | ||
|
||
HSMM <- estimateSizeFactors(HSMM) | ||
HSMM <- estimateDispersions(HSMM) | ||
HSMM <- detectGenes(HSMM, min_expr = 0.1) | ||
HSMM <- HSMM[,pData(HSMM)$Total_mRNAs < 1e6] | ||
upper_bound <- 10^(mean(log10(pData(HSMM)$Total_mRNAs)) + 2*sd(log10(pData(HSMM)$Total_mRNAs))) | ||
lower_bound <- 10^(mean(log10(pData(HSMM)$Total_mRNAs)) - 2*sd(log10(pData(HSMM)$Total_mRNAs))) | ||
HSMM <- HSMM[,pData(HSMM)$Total_mRNAs > lower_bound & pData(HSMM)$Total_mRNAs < upper_bound] | ||
cth <- newCellTypeHierarchy() | ||
|
||
MYF5_id <- row.names(subset(fData(HSMM), gene_short_name == "MYF5")) | ||
ANPEP_id <- row.names(subset(fData(HSMM), gene_short_name == "ANPEP")) | ||
|
||
cth <- newCellTypeHierarchy() | ||
cth <- addCellType(cth, "Myoblast", classify_func=function(x) {x[MYF5_id,] >= 1}) | ||
cth <- addCellType(cth, "Fibroblast", classify_func=function(x) | ||
{x[MYF5_id,] < 1 & x[ANPEP_id,] > 1}) | ||
|
||
#write test code for this: | ||
|
||
test_that("test classifyCells throws error if frequency_thresh & enrichment_thresh are both NULL and character string is passed to method", { | ||
expect_error(expect_error(classifyCells(HSMM, cth, frequency_thresh = NULL, enrichment_thresh = NULL, test = c("t", "e", "s", "t")), "Error: to use classifyCells in grouped mode, you must also set frequency_thresh")) | ||
}) | ||
|
||
test_that("test classifyCells works properly 1", { | ||
expect_error(classifyCells(HSMM, cth, 0.1), NA) | ||
}) | ||
|
||
test_that("test classifyCells works when enrichment_thresh is passed and frequency_thresh is null", { | ||
expect_error(classifyCells(HSMM, cth, enrichment_thresh = 0.1), NA) | ||
}) | ||
|
||
test_that("test classifyCells works when frequency_thresh is passed and enrichment_thresh is null", { | ||
expect_error(classifyCells(HSMM, cth, frequency_thresh = 0.1)) | ||
}) | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
library(monocle) | ||
library(HSMMSingleCell) | ||
context("clusterCells is functioning properly") | ||
|
||
pd <- new("AnnotatedDataFrame", data = HSMM_sample_sheet) | ||
fd <- new("AnnotatedDataFrame", data = HSMM_gene_annotation) | ||
HSMM <- newCellDataSet(as.matrix(HSMM_expr_matrix), phenoData = pd, featureData = fd) | ||
|
||
HSMM <- newCellDataSet(as(umi_matrix, "sparseMatrix"), | ||
phenoData = pd, | ||
featureData = fd, | ||
lowerDetectionLimit = 0.5, | ||
expressionFamily = negbinomial.size()) | ||
|
||
cellranger_pipestance_path <- "/path/to/your/pipeline/output/directory" | ||
gbm <- load_cellranger_matrix(cellranger_pipestance_path) | ||
gbm_cds <- newCellDataSet(exprs(gbm), | ||
phenoData = new("AnnotatedDataFrame", data = pData(gbm)), | ||
phenoData = new("AnnotatedDataFrame", data = fData(gbm)), | ||
lowerDetectionLimit = 0.5, | ||
expressionFamily = negbinomial.size()) | ||
|
||
HSMM <- detectGenes(HSMM, min_expr = 0.1) | ||
expressed_genes <- row.names(subset(fData(HSMM), num_cells_expressed >= 10)) | ||
valid_cells <- row.names(subset(pData(HSMM), | ||
Cells.in.Well == 1 & | ||
Control == FALSE & | ||
Clump == FALSE & | ||
Debris == FALSE & | ||
Mapped.Fragments > 1000000)) | ||
HSMM <- HSMM[,valid_cells] | ||
|
||
pData(HSMM)$Total_mRNAs <- Matrix::colSums(exprs(HSMM)) | ||
|
||
HSMM <- HSMM[,pData(HSMM)$Total_mRNAs < 1e6] | ||
|
||
upper_bound <- 10^(mean(log10(pData(HSMM)$Total_mRNAs)) + | ||
2*sd(log10(pData(HSMM)$Total_mRNAs))) | ||
lower_bound <- 10^(mean(log10(pData(HSMM)$Total_mRNAs)) - | ||
2*sd(log10(pData(HSMM)$Total_mRNAs))) | ||
|
||
HSMM <- HSMM[,pData(HSMM)$Total_mRNAs > lower_bound & | ||
pData(HSMM)$Total_mRNAs < upper_bound] | ||
HSMM <- detectGenes(HSMM, min_expr = 0.1) | ||
|
||
# Log-transform each value in the expression matrix. | ||
L <- log(exprs(HSMM[expressed_genes,])) | ||
|
||
# Standardize each gene, so that they are all on the same scale, | ||
# Then melt the data with plyr so we can plot it easily | ||
melted_dens_df <- melt(Matrix::t(scale(Matrix::t(L)))) | ||
|
||
# Plot the distribution of the standardized gene expression values. | ||
qplot(value, geom = "density", data = melted_dens_df) + | ||
stat_function(fun = dnorm, size = 0.5, color = 'red') + | ||
xlab("Standardized log(FPKM)") + | ||
ylab("Density") | ||
|
||
MYF5_id <- row.names(subset(fData(HSMM), gene_short_name == "MYF5")) | ||
ANPEP_id <- row.names(subset(fData(HSMM), gene_short_name == "ANPEP")) | ||
|
||
cth <- newCellTypeHierarchy() | ||
cth <- addCellType(cth, "Myoblast", classify_func = function(x) { x[MYF5_id,] >= 1 }) | ||
cth <- addCellType(cth, "Fibroblast", classify_func = function(x) | ||
{ x[MYF5_id,] < 1 & x[ANPEP_id,] > 1 }) | ||
|
||
HSMM <- classifyCells(HSMM, cth, 0.1) | ||
marker_diff <- markerDiffTable(HSMM[expressed_genes,], | ||
cth, | ||
residualModelFormulaStr = "~Media + num_genes_expressed", | ||
cores = 1) | ||
candidate_clustering_genes <- row.names(subset(marker_diff, qval < 0.01)) | ||
marker_spec <- calculateMarkerSpecificity(HSMM[candidate_clustering_genes,], cth) | ||
semisup_clustering_genes <- unique(selectTopMarkers(marker_spec, 500)$gene_id) | ||
HSMM <- setOrderingFilter(HSMM, semisup_clustering_genes) | ||
HSMM <- reduceDimension(HSMM, max_components = 2, num_dim = 3, norm_method = 'log', | ||
reduction_method = 'tSNE', | ||
residualModelFormulaStr = "~Media + num_genes_expressed", | ||
verbose = T) | ||
|
||
test_that("clusterCells functions normally in vignette", | ||
expect_error(clusterCells(HSMM, num_clusters = 2), NA)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
library(monocle) | ||
library(HSMMSingleCell) | ||
context("detectGenes functions properly 1") | ||
|
||
|
||
#write test code for this: | ||
|
||
test_that("test detectGenes works properly", { | ||
data(HSMM_expr_matrix) | ||
data(HSMM_gene_annotation) | ||
data(HSMM_sample_sheet) | ||
pd <- new("AnnotatedDataFrame", data = HSMM_sample_sheet) | ||
fd <- new("AnnotatedDataFrame", data = HSMM_gene_annotation) | ||
HSMM <- newCellDataSet(as.matrix(HSMM_expr_matrix), phenoData = pd, featureData = fd) | ||
|
||
HSMM <- estimateSizeFactors(HSMM) | ||
HSMM <- estimateDispersions(HSMM) | ||
|
||
expect_error(detectGenes(HSMM), NA) | ||
}) | ||
|
||
|
Oops, something went wrong.