Skip to content

Commit

Permalink
version 0.2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
jdanielnd authored and gaborcsardi committed Jul 29, 2015
1 parent 9308093 commit 5678fab
Show file tree
Hide file tree
Showing 23 changed files with 621 additions and 150 deletions.
22 changes: 16 additions & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
Package: slfm
Type: Package
Title: Tools for fitting sparse latent factor model
Version: 0.1
Title: Tools for Fitting Sparse Latent Factor Model
Version: 0.2.0
Author: Joao Duarte and Vinicius Mayrink
Maintainer: Joao Duarte <jdanielnd@gmail.com>
Description: slfm is a set of tools to find coherent patterns in microarray data using a Bayesian sparse latent factor model. Considerable effort has been put into making slfm fast and memory efficient, turning it an interesting alternative to simpler methods in terms of execution time.
Description: Set of tools to find coherent patterns in microarray data
using a Bayesian sparse latent factor model (Duarte and Mayrink 2015 -
http://link.springer.com/chapter/10.1007%2F978-3-319-12454-4_15).
Considerable effort has been put into making slfm fast and memory efficient,
turning it an interesting alternative to simpler methods in terms
of execution time. It implements versions of the SLFM using both type
of mixtures: using a degenerate distribution or a very concentrated
normal distribution for the spike part of the mixture. It also implements
additional functions to help pre-process the data and fit the model
for a large number of arrays.
URL: https://github.com/jdanielnd/slfm
BugReports: https://github.com/jdanielnd/slfm/issues
Depends: R (>= 3.1.0)
Imports: Rcpp (>= 0.11.0), coda
Imports: Rcpp (>= 0.11.0), coda, lattice
LinkingTo: Rcpp, RcppArmadillo
License: GPL-2
Packaged: 2014-05-08 11:44:40 UTC; jdanielnd
NeedsCompilation: yes
Packaged: 2015-07-29 00:03:10 UTC; jdanielnd
Repository: CRAN
Date/Publication: 2014-05-08 15:00:06
Date/Publication: 2015-07-29 06:21:43
38 changes: 21 additions & 17 deletions MD5
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
67f632137f2c72344fd625e100a956f8 *DESCRIPTION
842e742ed56f4760c893e34cc569b8c9 *NAMESPACE
e9743678da83348d9a7e570d75f7e1cf *NEWS
014adecf1f180367189bb57bbca5195f *R/RcppExports.R
5f03ec1b26946fd9756df798882cb4d9 *R/class_interval.r
dce71247a3c7006c24a7e75259758a68 *R/process_matrix.r
e56aec246621d85de4cc226d3d5a3f96 *R/slfm-package.r
6295402ccac87eb26b013500e8a98633 *R/slfm.r
6df4e4bfbf4d4f524495699ba00dc1d1 *R/slfm_list.r
c693f6ae8f9d620381d2369d5a3852df *README.md
23a0867b7f440797a1cd5e50889435e6 *man/process_matrix.Rd
63a06fe206d9f2a1c266c5c44c9b6f4d *man/slfm-package.Rd
a5b8b0defcabdc9c48b9ac9a2108896c *man/slfm.Rd
677de0b916ca383f9b522735fd5c2fb4 *man/slfm_list.Rd
c818f4958497e27ca11f51adab2aa3d4 *src/Makevars.win
3bb4238fe46039b75ec894824dffd73d *src/RcppExports.cpp
3e25ef05cb2044a839656bfae7ad5510 *src/gibbs.cpp
09c6391016c6b753e69a7534e10b8727 *DESCRIPTION
87b17ef5c347ec4ca6d1641066700c30 *NAMESPACE
bd3d6f4d64ee1b4f4804ad427a2745d2 *NEWS
3e11a37d28ca0244100992886a82d3d5 *R/RcppExports.R
c2bee89222b5b1d2f8a3b4ee0d5c59a6 *R/class_interval.r
baabf72d4bc884b4c1a64efa0ab6929f *R/plot_matrix.r
87f7eeceee7273541541ba5c589103ec *R/process_matrix.r
43023f17cf4534c205f467c51594dff3 *R/slfm-package.r
75aa158541ee3aed3c5084841a138dbc *R/slfm.r
ccb2e56a2738d8fd42b70f0bf0adc061 *R/slfm_list.r
540f05f21a09f459cc33f9d244ac919d *man/plot_matrix.Rd
59958c4ef88f62f062a57ede17dd64c7 *man/process_matrix.Rd
50feb9e98613c1d6ee4e2ca0aa62a5fd *man/slfm-package.Rd
7ca8f4fcb3b7aab69b8abaabecf6c60c *man/slfm.Rd
c34c4e9de5e7ea3372dd64a3f187bc2f *man/slfm_list.Rd
bcc42aaeb0a69ec1b279b42021751d66 *src/Makevars
3ef757384e0c95c1492b069cf4548b01 *src/Makevars.win
a3d13222e4eea03a3a5673c2a529bfe0 *src/RcppExports.cpp
3bc74fd8d172c42465d00b496b4753de *src/gibbs.cpp
940a1289008060e66afcc95dda92d9a6 *src/slfm_MDN.cpp
42a422b6d0c3b3d44a779cad0174ecc0 *src/slfm_MNN.cpp
13 changes: 13 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,8 +1,21 @@
# Generated by roxygen2 (4.1.1): do not edit by hand

export(plot_matrix)
export(process_matrix)
export(slfm)
export(slfm_list)
importFrom(Rcpp,evalCpp)
importFrom(coda,HPDinterval)
importFrom(coda,as.mcmc)
importFrom(grDevices,colorRampPalette)
importFrom(lattice,levelplot)
importFrom(stats,cov)
importFrom(stats,median)
importFrom(stats,quantile)
importFrom(stats,window)
importFrom(tools,file_path_sans_ext)
importFrom(utils,read.table)
importFrom(utils,setTxtProgressBar)
importFrom(utils,txtProgressBar)
importFrom(utils,write.table)
useDynLib(slfm)
13 changes: 12 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
slfm 0.1
slfm 0.2.0
===========

* alpha inference now uses just the interested part of the chain

* user can now choose which type of mixture to use (degenerate TRUE or FALSE)

* user can now select a lag for MCMC chain

* new `plot_matrix` function

slfm 0.1.0
===========

* new `slfm` function which fits a model to a matrix of microarray data
Expand Down
12 changes: 10 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
# This file was generated by Rcpp::compileAttributes
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

gibbs <- function(x, ite, a = 2.1, b = 1.1, gamma_a = 1, gamma_b = 1, omega = 10, omega_1 = 0.01) {
.Call('slfm_gibbs', PACKAGE = 'slfm', x, ite, a, b, gamma_a, gamma_b, omega, omega_1)
gibbs <- function(x, ite, a = 2.1, b = 1.1, gamma_a = 1, gamma_b = 1, omega_0 = 0.01, omega_1 = 10, degenerate = FALSE) {
.Call('slfm_gibbs', PACKAGE = 'slfm', x, ite, a, b, gamma_a, gamma_b, omega_0, omega_1, degenerate)
}

slfm_MDN <- function(x, a = 2.1, b = 1.1, gamma_a = 1, gamma_b = 1, omega_1 = 10, burnin = 1000L, lag = 1L, npost = 500L) {
.Call('slfm_slfm_MDN', PACKAGE = 'slfm', x, a, b, gamma_a, gamma_b, omega_1, burnin, lag, npost)
}

slfm_MNN <- function(x, a = 2.1, b = 1.1, gamma_a = 1, gamma_b = 1, omega_0 = 0.01, omega_1 = 10, burnin = 1000L, lag = 1L, npost = 500L) {
.Call('slfm_slfm_MNN', PACKAGE = 'slfm', x, a, b, gamma_a, gamma_b, omega_0, omega_1, burnin, lag, npost)
}

6 changes: 5 additions & 1 deletion R/class_interval.r
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class_interval_uni <- function(x, cv = 0.5) {
return(clas)
}

class_interval <- function(x, cv = 0.5) {
class_interval <- function(x) {
apply(x, 1, class_interval_uni)
}

Expand All @@ -21,4 +21,8 @@ format_classification <- function(x) {
lvls[lvls == "A"] <- "Absent"
levels(x) <- lvls
x
}

class_mean <- function(x, cv=0.5) {
x[,1] > cv
}
62 changes: 62 additions & 0 deletions R/plot_matrix.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#' Plot matrix
#'
#' This function plots a data matrix
#'
#' @param y matrix to be ploted
#' @param standardize.rows standardize matrix rows for plot
#' @param reorder.rows reorder matrix rows based on pattern
#' @param reorder.cols reorder matrix cols based on pattern
#' @param high.contrast apply transformation to matrix to increase contrast
#' @importFrom lattice levelplot
#' @importFrom grDevices colorRampPalette
#' @importFrom stats median
#' @importFrom stats quantile
#' @export

plot_matrix <- function(y, standardize.rows = TRUE, reorder.rows = TRUE, reorder.cols = TRUE, high.contrast = TRUE) {

y <- as.matrix(y)
xl <- "Microarrays"
yl <- "Probes"

if(standardize.rows) {
y <- t(apply(y,1,scale)) # standardize the rows of y
}

if(reorder.rows) {
medrow <- apply(y,1,median) # median of the rows of y
y <- y[order(medrow),] # order the rows of y
}

if(reorder.cols) {
medcol <- apply(y,2,median) # median of the columns of y
y <- y[,order(medcol)] # order the columns of y
}

if(high.contrast) {
y <- (abs(y)^(1/3))*sign(y)
} # higher contrast

mi <- as.numeric(quantile(y,0.001))
ma <- as.numeric(quantile(y,0.999))
nr <- nrow(y)
nc <- ncol(y)

if(nr==nc) { # Image of Correlation and Covariance matrices
xl <- ""
yl <- ""
mi <- ifelse(ma==1, -1, ma)
}

spr <- ifelse(nr<=5, 1, round(nr/10))
spc <- ifelse(nc<=5, 1, round(nc/10))
sc <- list(
x = list(at=c(seq(1,nc, spc),nc), labels=c(seq(1,nc,spc),nc)),
y = list(draw=FALSE)
)
col.l <- colorRampPalette(c('blue','white','red'))
cbar <- seq(mi,ma,length.out=100)

levelplot(t(y[nr:1,]),col.regions=col.l,xlab=xl,ylab=yl,scales=sc,at=cbar,aspect="fill")

}
3 changes: 3 additions & 0 deletions R/process_matrix.r
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
#' @param path path containing the set of matrices to be processed
#' @param output_path path to save the processed matrices
#' @param sample_size number of matrices to be used on the principal component analysis
#' @importFrom stats cov
#' @importFrom utils read.table
#' @importFrom utils write.table
#' @export

process_matrix <- function(path, output_path, sample_size) {
Expand Down
15 changes: 10 additions & 5 deletions R/slfm-package.r
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
#' slfm: the sparse latent factor model package for R.
#'
#' slfm is a set of tools to fit sparse latent factor models to microarray data. This includes
#' functions to:
#' Set of tools to find coherent patterns in microarray data
#' using a Bayesian sparse latent factor model (Duarte and Mayrink 2015 -
#' http://link.springer.com/chapter/10.1007%2F978-3-319-12454-4_15).
#' Considerable effort has been put into making slfm fast and memory efficient,
#' turning it an interesting alternative to simpler methods in terms
#' of execution time. It implements versions of the SLFM using both type
#' of mixtures: using a degenerate distribution or a very concentrated
#' normal distribution for the spike part of the mixture. It also implements
#' additional functions to help pre-process the data and fit the model
#' for a large number of arrays. Includes functions to:
#'
#' * pre-process a set of matrices
#' * fit models to a set of matrices
#' * detailed summary of model fit
#'
#' Considerable effort has been put into making slfm fast and memory efficient, so as slfm is an
#' attractive alternative to simpler methods in terms of execution time.
#'
#' @docType package
#' @name slfm-package
NULL
81 changes: 62 additions & 19 deletions R/slfm.r
Original file line number Diff line number Diff line change
Expand Up @@ -2,66 +2,91 @@
#'
#' This function is used to fit a Bayesian sparse
#' latent factor model.
#'
#' @references
#' 1. Duarte, J. D. N. and Mayrink, V. D. (2015). Factor analysis with mixture modeling to evaluate coherent patterns in microarray data. In Interdisciplinary Bayesian Statistics, volume 118 of Springer Proceedings in Mathematics & Statistics, pages 185-195. Springer International Publishing.
#'
#' @param x matrix with the pre-processed data
#' @param ite number of iterations of the MCMC algorithm
#' @param a prior shape parameter for Gamma distribution
#' @param b prior scale parameter for Gamma distribution
#' @param gamma_a prior parameter for Beta distribution
#' @param gamma_b prior parameter for Beta distribution
#' @param omega prior variance of the slab component
#' @param omega_1 prior variance of the spike component
#' @param omega_0 prior variance of the spike component
#' @param omega_1 prior variance of the slab component
#' @param sample sample size after burn-in
#' @param burnin burn-in size
#' @param lag lag for MCMC
#' @param degenerate use the degenerate version of mixture
#' @return x: data matrix
#' @return p_star: matrix of MCMC chains for p_star parameter
#' @return q_star: matrix of MCMC chains for q_star parameter
#' @return alpha: summary table of MCMC chains for alpha parameter
#' @return lambda: summary table of MCMC chains for lambda parameter
#' @return sigma: summary table of MCMC chains for sigma parameter
#' @return classification: classification of each alpha (`present`, `marginal`, `absent`)
#' @export
#' @importFrom coda as.mcmc
#' @importFrom stats window
#' @importFrom Rcpp evalCpp
#' @useDynLib slfm
#' @examples
#' mat <- matrix(rnorm(2000), nrow = 20)
#' slfm(mat, ite = 1000)
#' slfm(mat, sample = 1000)
slfm <- function(
x, ite, a = 2.1, b = 1.1, gamma_a = 1, gamma_b = 1,
omega = 10, omega_1 = 0.01, burnin = round(0.25*ite)) {
x, a = 2.1, b = 1.1, gamma_a = 1, gamma_b = 1,
omega_0 = 0.01, omega_1 = 10, sample = 1000, burnin = round(0.25*sample), lag = 1, degenerate = FALSE) {

ite <- sample + burnin

# Convert the x input to numeric matrix
x <- data.matrix(x)

res <- gibbs(x, ite, a, b, gamma_a, gamma_b, omega, omega_1)
p_star_matrix <- coda::as.mcmc(res[["p_star"]][burnin:ite,])
alpha_matrix <- coda::as.mcmc(res[["alpha"]][burnin:ite,])
lambda_matrix <- coda::as.mcmc(res[["lambda"]][burnin:ite,])
sigma_matrix <- coda::as.mcmc(res[["sigma"]][burnin:ite,])
if(degenerate) {
res <- slfm_MDN(x, a, b, gamma_a, gamma_b, omega_1, burnin, lag, sample)
} else {
res <- slfm_MNN(x, a, b, gamma_a, gamma_b, omega_0, omega_1, burnin, lag, sample)
}

after_burnin <- (burnin + 1):ite

q_star_matrix <- coda::as.mcmc(res[["qstar"]][after_burnin,])
q_star_matrix <- window(q_star_matrix, thin = lag)
hpds_q_star <- coda::HPDinterval(q_star_matrix)
stats_q_star <- summary(q_star_matrix)$statistics
alpha_clas <- format_classification(class_interval(hpds_q_star))
alpha_clas_mean <- class_mean(stats_q_star)

stats_alpha <- summary(alpha_matrix)$statistics
hpds_alpha <- coda::HPDinterval(alpha_matrix)
table_alpha <- cbind(stats_alpha, hpds_alpha)[,-4]
colnames(table_alpha)[4:5] = c("Upper HPD", "Lower HPD")
alpha_matrix <- coda::as.mcmc(res[["alpha"]][after_burnin,])
alpha_matrix <- window(alpha_matrix, thin = lag)

table_alpha <- alpha_estimation(res[["alpha"]][after_burnin,], alpha_clas_mean, res[["qstar"]][after_burnin,])

lambda_matrix <- coda::as.mcmc(res[["lambda"]][after_burnin,])
lambda_matrix <- window(lambda_matrix, thin = lag)
stats_lambda <- summary(lambda_matrix)$statistics
hpds_lambda <- coda::HPDinterval(lambda_matrix)
table_lambda <- cbind(stats_lambda, hpds_lambda)[,-4]
colnames(table_lambda)[4:5] = c("Upper HPD", "Lower HPD")

sigma_matrix <- coda::as.mcmc(res[["sigma2"]][after_burnin,])
sigma_matrix <- window(sigma_matrix, thin = lag)
stats_sigma <- summary(sigma_matrix)$statistics
hpds_sigma <- coda::HPDinterval(sigma_matrix)
table_sigma <- cbind(stats_sigma, hpds_sigma)[,-4]
colnames(table_sigma)[4:5] = c("Upper HPD", "Lower HPD")

hpds_p_star <- coda::HPDinterval(p_star_matrix)
alpha_clas <- format_classification(class_interval(hpds_p_star))
z_matrix <- coda::as.mcmc(res[["z"]][after_burnin,])
z_matrix <- window(z_matrix, thin = lag)

obj <- list(
x = x,
p_star = p_star_matrix,
alpha = table_alpha,
lambda = table_lambda,
sigma = table_sigma,
alpha_matrix = alpha_matrix,
lambda_matrix = lambda_matrix,
sigma_matrix = sigma_matrix,
q_star = q_star_matrix,
z_matrix = z_matrix,
classification = alpha_clas)
class(obj) <- "slfm"
obj
Expand All @@ -76,4 +101,22 @@ print.slfm <- function(x) {
cat("\n")
cat("Classification:","\n")
print(x$classification)
}

alpha_estimation <- function(x, alpha_clas, q_star_matrix) {
table_list <- lapply(1:length(alpha_clas), function(i) {
chain_indicator <- q_star_matrix[, i] > 0.5
if(alpha_clas[i]) {
chain <- x[chain_indicator, i]
} else {
chain <- x[!chain_indicator, i]
}
chain.mcmc <- coda::as.mcmc(chain)
stats <- summary(chain.mcmc)$statistics
hpds <- coda::HPDinterval(chain.mcmc)
table <- c(stats, hpds)[-4]
})
table <- do.call(rbind, table_list)
colnames(table)[4:5] = c("Upper HPD", "Lower HPD")
table
}

0 comments on commit 5678fab

Please sign in to comment.