updated testing documentation

cole-trapnell-lab · Oct 3, 2017 · c4288a5 · c4288a5
1 parent 4fab0d7
commit c4288a5
Show file tree

Hide file tree

Showing 51 changed files with 2,728 additions and 935 deletions.
diff --git a/inst/tests/test.addCellType.R b/inst/tests/test.addCellType.R
@@ -0,0 +1,39 @@
+library(monocle)
+library(HSMMSingleCell)
+context("detectGenes functions properly")
+
+data(HSMM_expr_matrix)
+data(HSMM_gene_annotation)
+data(HSMM_sample_sheet)
+
+pd <- new("AnnotatedDataFrame", data = HSMM_sample_sheet)
+fd <- new("AnnotatedDataFrame", data = HSMM_gene_annotation)
+HSMM <- newCellDataSet(as.matrix(HSMM_expr_matrix),
+                       phenoData = pd,
+                       featureData = fd,
+                       lowerDetectionLimit = 0.1,
+                       expressionFamily = tobit(Lower = 0.1))
+
+HSMM <- estimateSizeFactors(HSMM)
+HSMM <- estimateDispersions(HSMM)
+HSMM <- detectGenes(HSMM, min_expr = 0.1)
+HSMM <- HSMM[,pData(HSMM)$Total_mRNAs < 1e6]
+upper_bound <- 10^(mean(log10(pData(HSMM)$Total_mRNAs)) + 2*sd(log10(pData(HSMM)$Total_mRNAs)))
+lower_bound <- 10^(mean(log10(pData(HSMM)$Total_mRNAs)) - 2*sd(log10(pData(HSMM)$Total_mRNAs)))
+HSMM <- HSMM[,pData(HSMM)$Total_mRNAs > lower_bound & pData(HSMM)$Total_mRNAs < upper_bound]
+cth <- newCellTypeHierarchy()
+
+MYF5_id <- row.names(subset(fData(HSMM), gene_short_name == "MYF5"))
+
+#write test code for this: 
+
+test_that("test addCellType works properly", {
+  expect_error(cth <- addCellType(cth, "Myoblast", classify_func = function(x) {x[MYF5_id,] >= 1}), NA)
+})
+
+test_that("test addCellType throws error when same cell name is used", {
+  cth <- addCellType(cth, "Myoblast1", classify_func = function(x) {x[MYF5_id,] >= 1})
+  expect_error(cth <- addCellType(cth, "Myoblast1", classify_func = function(x) {x[MYF5_id,] >= 1}), NA)
+})
+
+
diff --git a/inst/tests/test.calculateMarkerSpecificity.R b/inst/tests/test.calculateMarkerSpecificity.R
@@ -0,0 +1,84 @@
+library(monocle)
+library(HSMMSingleCell)
+context("calculateMarkerSpecificity is functioning properly")
+
+pd <- new("AnnotatedDataFrame", data = HSMM_sample_sheet)
+fd <- new("AnnotatedDataFrame", data = HSMM_gene_annotation)
+HSMM <- newCellDataSet(as.matrix(HSMM_expr_matrix), phenoData = pd, featureData = fd)
+
+HSMM <- newCellDataSet(as(umi_matrix, "sparseMatrix"),
+                       phenoData = pd,
+                       featureData = fd,
+                       lowerDetectionLimit = 0.5,
+                       expressionFamily = negbinomial.size())
+
+cellranger_pipestance_path <- "/path/to/your/pipeline/output/directory"
+gbm <- load_cellranger_matrix(cellranger_pipestance_path)
+gbm_cds <- newCellDataSet(exprs(gbm),
+                          phenoData = new("AnnotatedDataFrame", data = pData(gbm)),
+                          phenoData = new("AnnotatedDataFrame", data = fData(gbm)),
+                          lowerDetectionLimit = 0.5,
+                          expressionFamily = negbinomial.size())
+
+HSMM <- detectGenes(HSMM, min_expr = 0.1)
+expressed_genes <- row.names(subset(fData(HSMM), num_cells_expressed >= 10))
+valid_cells <- row.names(subset(pData(HSMM),
+                                Cells.in.Well == 1 &
+                                  Control == FALSE &
+                                  Clump == FALSE &
+                                  Debris == FALSE &
+                                  Mapped.Fragments > 1000000))
+HSMM <- HSMM[,valid_cells]
+
+pData(HSMM)$Total_mRNAs <- Matrix::colSums(exprs(HSMM))
+
+HSMM <- HSMM[,pData(HSMM)$Total_mRNAs < 1e6]
+
+upper_bound <- 10^(mean(log10(pData(HSMM)$Total_mRNAs)) +
+                     2*sd(log10(pData(HSMM)$Total_mRNAs)))
+lower_bound <- 10^(mean(log10(pData(HSMM)$Total_mRNAs)) -
+                     2*sd(log10(pData(HSMM)$Total_mRNAs)))
+
+HSMM <- HSMM[,pData(HSMM)$Total_mRNAs > lower_bound &
+               pData(HSMM)$Total_mRNAs < upper_bound]
+HSMM <- detectGenes(HSMM, min_expr = 0.1)
+
+# Log-transform each value in the expression matrix.
+L <- log(exprs(HSMM[expressed_genes,]))
+
+# Standardize each gene, so that they are all on the same scale,
+# Then melt the data with plyr so we can plot it easily
+melted_dens_df <- melt(Matrix::t(scale(Matrix::t(L))))
+
+# Plot the distribution of the standardized gene expression values.
+qplot(value, geom = "density", data = melted_dens_df) +
+  stat_function(fun = dnorm, size = 0.5, color = 'red') +
+  xlab("Standardized log(FPKM)") +
+  ylab("Density")
+
+MYF5_id <- row.names(subset(fData(HSMM), gene_short_name == "MYF5"))
+ANPEP_id <- row.names(subset(fData(HSMM), gene_short_name == "ANPEP"))
+
+cth <- newCellTypeHierarchy()
+cth <- addCellType(cth, "Myoblast", classify_func = function(x) { x[MYF5_id,] >= 1 })
+cth <- addCellType(cth, "Fibroblast", classify_func = function(x)
+{ x[MYF5_id,] < 1 & x[ANPEP_id,] > 1 })
+
+HSMM <- classifyCells(HSMM, cth, 0.1)
+marker_diff <- markerDiffTable(HSMM[expressed_genes,],
+                               cth,
+                               residualModelFormulaStr = "~Media + num_genes_expressed",
+                               cores = 1)
+candidate_clustering_genes <- row.names(subset(marker_diff, qval < 0.01))
+
+test_that("calculateMarkerSpecificity functions properly in vignette", 
+          expect_error(calculateMarkerSpecificity(HSMM[candidate_clustering_genes,], cth), NA)
+          )
+
+test_that("calculateMarkerSpecificity throws error if cds is not of type CellDataSet", 
+          expect_error(calculateMarkerSpecificity(cth, cth), NA)
+)
+
+test_that("calculateMarkerSpecificity throws error if cth is not of type CellTypeHierarchy", 
+          expect_error(calculateMarkerSpecificity(HSMM[candidate_clustering_genes,], HSMM[candidate_clustering_genes,]), NA)
+)
diff --git a/inst/tests/test.classifyCells.R b/inst/tests/test.classifyCells.R
@@ -0,0 +1,64 @@
+library(monocle)
+library(HSMMSingleCell)
+context("classifyCells functions properly")
+
+data(HSMM_expr_matrix)
+data(HSMM_gene_annotation)
+data(HSMM_sample_sheet)
+
+pd <- new("AnnotatedDataFrame", data = HSMM_sample_sheet)
+fd <- new("AnnotatedDataFrame", data = HSMM_gene_annotation)
+HSMM <- newCellDataSet(as.matrix(HSMM_expr_matrix),   
+                       phenoData = pd, 
+                       featureData = fd,
+                       lowerDetectionLimit=0.1,
+                       expressionFamily=tobit(Lower=0.1))
+
+
+rpc_matrix <- relative2abs(HSMM, method = "num_genes")
+
+
+HSMM <- newCellDataSet(as(as.matrix(rpc_matrix), "sparseMatrix"),
+                       phenoData = pd, 
+                       featureData = fd,
+                       lowerDetectionLimit=0.5,
+                       expressionFamily=negbinomial.size())
+
+HSMM <- estimateSizeFactors(HSMM)
+HSMM <- estimateDispersions(HSMM)
+HSMM <- detectGenes(HSMM, min_expr = 0.1)
+HSMM <- HSMM[,pData(HSMM)$Total_mRNAs < 1e6]
+upper_bound <- 10^(mean(log10(pData(HSMM)$Total_mRNAs)) + 2*sd(log10(pData(HSMM)$Total_mRNAs)))
+lower_bound <- 10^(mean(log10(pData(HSMM)$Total_mRNAs)) - 2*sd(log10(pData(HSMM)$Total_mRNAs)))
+HSMM <- HSMM[,pData(HSMM)$Total_mRNAs > lower_bound & pData(HSMM)$Total_mRNAs < upper_bound]
+cth <- newCellTypeHierarchy()
+
+MYF5_id <- row.names(subset(fData(HSMM), gene_short_name == "MYF5"))
+ANPEP_id <- row.names(subset(fData(HSMM), gene_short_name == "ANPEP"))
+
+cth <- newCellTypeHierarchy()
+cth <- addCellType(cth, "Myoblast", classify_func=function(x) {x[MYF5_id,] >= 1})
+cth <- addCellType(cth, "Fibroblast", classify_func=function(x)
+{x[MYF5_id,] < 1 & x[ANPEP_id,] > 1})
+
+#write test code for this: 
+
+test_that("test classifyCells throws error if frequency_thresh & enrichment_thresh are both NULL and character string is passed to method", {
+  expect_error(expect_error(classifyCells(HSMM, cth, frequency_thresh = NULL, enrichment_thresh = NULL, test = c("t", "e", "s", "t")), "Error: to use classifyCells in grouped mode, you must also set frequency_thresh"))
+})
+
+test_that("test classifyCells works properly 1", {
+  expect_error(classifyCells(HSMM, cth, 0.1), NA)
+})
+
+test_that("test classifyCells works when enrichment_thresh is passed and frequency_thresh is null", {
+  expect_error(classifyCells(HSMM, cth, enrichment_thresh = 0.1), NA)
+})
+
+test_that("test classifyCells works when frequency_thresh is passed and enrichment_thresh is null", {
+  expect_error(classifyCells(HSMM, cth, frequency_thresh = 0.1))
+})
+
+
+
+
diff --git a/inst/tests/test.clusterCells.R b/inst/tests/test.clusterCells.R
@@ -0,0 +1,82 @@
+library(monocle)
+library(HSMMSingleCell)
+context("clusterCells is functioning properly")
+
+pd <- new("AnnotatedDataFrame", data = HSMM_sample_sheet)
+fd <- new("AnnotatedDataFrame", data = HSMM_gene_annotation)
+HSMM <- newCellDataSet(as.matrix(HSMM_expr_matrix), phenoData = pd, featureData = fd)
+
+HSMM <- newCellDataSet(as(umi_matrix, "sparseMatrix"),
+                       phenoData = pd,
+                       featureData = fd,
+                       lowerDetectionLimit = 0.5,
+                       expressionFamily = negbinomial.size())
+
+cellranger_pipestance_path <- "/path/to/your/pipeline/output/directory"
+gbm <- load_cellranger_matrix(cellranger_pipestance_path)
+gbm_cds <- newCellDataSet(exprs(gbm),
+                          phenoData = new("AnnotatedDataFrame", data = pData(gbm)),
+                          phenoData = new("AnnotatedDataFrame", data = fData(gbm)),
+                          lowerDetectionLimit = 0.5,
+                          expressionFamily = negbinomial.size())
+
+HSMM <- detectGenes(HSMM, min_expr = 0.1)
+expressed_genes <- row.names(subset(fData(HSMM), num_cells_expressed >= 10))
+valid_cells <- row.names(subset(pData(HSMM),
+                                Cells.in.Well == 1 &
+                                  Control == FALSE &
+                                  Clump == FALSE &
+                                  Debris == FALSE &
+                                  Mapped.Fragments > 1000000))
+HSMM <- HSMM[,valid_cells]
+
+pData(HSMM)$Total_mRNAs <- Matrix::colSums(exprs(HSMM))
+
+HSMM <- HSMM[,pData(HSMM)$Total_mRNAs < 1e6]
+
+upper_bound <- 10^(mean(log10(pData(HSMM)$Total_mRNAs)) +
+                     2*sd(log10(pData(HSMM)$Total_mRNAs)))
+lower_bound <- 10^(mean(log10(pData(HSMM)$Total_mRNAs)) -
+                     2*sd(log10(pData(HSMM)$Total_mRNAs)))
+
+HSMM <- HSMM[,pData(HSMM)$Total_mRNAs > lower_bound &
+               pData(HSMM)$Total_mRNAs < upper_bound]
+HSMM <- detectGenes(HSMM, min_expr = 0.1)
+
+# Log-transform each value in the expression matrix.
+L <- log(exprs(HSMM[expressed_genes,]))
+
+# Standardize each gene, so that they are all on the same scale,
+# Then melt the data with plyr so we can plot it easily
+melted_dens_df <- melt(Matrix::t(scale(Matrix::t(L))))
+
+# Plot the distribution of the standardized gene expression values.
+qplot(value, geom = "density", data = melted_dens_df) +
+  stat_function(fun = dnorm, size = 0.5, color = 'red') +
+  xlab("Standardized log(FPKM)") +
+  ylab("Density")
+
+MYF5_id <- row.names(subset(fData(HSMM), gene_short_name == "MYF5"))
+ANPEP_id <- row.names(subset(fData(HSMM), gene_short_name == "ANPEP"))
+
+cth <- newCellTypeHierarchy()
+cth <- addCellType(cth, "Myoblast", classify_func = function(x) { x[MYF5_id,] >= 1 })
+cth <- addCellType(cth, "Fibroblast", classify_func = function(x)
+{ x[MYF5_id,] < 1 & x[ANPEP_id,] > 1 })
+
+HSMM <- classifyCells(HSMM, cth, 0.1)
+marker_diff <- markerDiffTable(HSMM[expressed_genes,],
+                               cth,
+                               residualModelFormulaStr = "~Media + num_genes_expressed",
+                               cores = 1)
+candidate_clustering_genes <- row.names(subset(marker_diff, qval < 0.01))
+marker_spec <- calculateMarkerSpecificity(HSMM[candidate_clustering_genes,], cth)
+semisup_clustering_genes <- unique(selectTopMarkers(marker_spec, 500)$gene_id)
+HSMM <- setOrderingFilter(HSMM, semisup_clustering_genes)
+HSMM <- reduceDimension(HSMM, max_components = 2, num_dim = 3, norm_method = 'log',
+                        reduction_method = 'tSNE',
+                        residualModelFormulaStr = "~Media + num_genes_expressed",
+                        verbose = T)
+
+test_that("clusterCells functions normally in vignette", 
+          expect_error(clusterCells(HSMM, num_clusters = 2), NA))
diff --git a/inst/tests/test.detectGenes.R b/inst/tests/test.detectGenes.R
@@ -0,0 +1,22 @@
+library(monocle)
+library(HSMMSingleCell)
+context("detectGenes functions properly 1")
+
+
+#write test code for this: 
+
+test_that("test detectGenes works properly", {
+  data(HSMM_expr_matrix)
+  data(HSMM_gene_annotation)
+  data(HSMM_sample_sheet)
+  pd <- new("AnnotatedDataFrame", data = HSMM_sample_sheet)
+  fd <- new("AnnotatedDataFrame", data = HSMM_gene_annotation)
+  HSMM <- newCellDataSet(as.matrix(HSMM_expr_matrix), phenoData = pd, featureData = fd)
+
+  HSMM <- estimateSizeFactors(HSMM)
+  HSMM <- estimateDispersions(HSMM)
+
+  expect_error(detectGenes(HSMM), NA)
+})
+
+