diff --git a/DESCRIPTION b/DESCRIPTION index 57178c1..4c9ed8a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,6 +31,6 @@ Suggests: License: GPL (>= 2) Maintainer: Mihaljevic Bojan VignetteBuilder: knitr -RoxygenNote: 7.0.2 +RoxygenNote: 7.1.0 LinkingTo: Rcpp, BH SystemRequirements: C++11 diff --git a/NAMESPACE b/NAMESPACE index b6129d8..17dd57a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,6 +11,7 @@ S3method(logLik,bnc_bn) S3method(lp_implement,bnc_aode) S3method(lp_implement,bnc_dag) S3method(lp_implement,bnc_multinet) +S3method(plot,bnc_base) S3method(plot,bnc_dag) S3method(predict,bnc_fit) S3method(print,bnc_base) @@ -40,7 +41,7 @@ export(lp) export(makeRLearner.bnc) export(manb_arc_posterior) export(modelstring) -export(multinet_tan) +export(multinet_cl) export(narcs) export(nb) export(nparams) diff --git a/R/0bnclassify-doc.R b/R/0bnclassify-doc.R index 0d7b2a7..cebbd4f 100644 --- a/R/0bnclassify-doc.R +++ b/R/0bnclassify-doc.R @@ -16,6 +16,7 @@ #' \item \code{\link{tan_hc}}: Hill-climbing tree augmented naive Bayes (TAN-HC) (Keogh and Pazzani, 2002) #' \item \code{\link{tan_hcsp}}: Hill-climbing super-parent tree augmented naive Bayes (TAN-HCSP) (Keogh and Pazzani, 2002) #' \item \code{\link{aode}}: Averaged one-dependence estimators (AODE) (Webb et al., 2005) +#' \item \code {\link{multinet_tan}}: create a multinet using a Chow-Liu's algorithm (multinet_tan) (Friedman et al., 1997) #' } #' #' Parameter learning methods (\code{\link{lp}}): @@ -218,6 +219,32 @@ NULL #' bic <- tan_cl('class', car, score = 'bic') NULL +#' Create an ensemble of Bayesian network using a Chow-Liu's algorithm. +#' +#' create an ensemble of Bayesian network using a Chow-Liu's algorithm, by +#' maximizing either log-likelihood, the AIC or BIC scores; maximizing +#' log-likelihood corresponds to the well-known tree augmented naive Bayes +#' (Friedman et al., 1997). +#' +#' @name multinet_cl +#' +#' @inheritParams nb +#' @param root the class column of the dataset. The class column will +#' divide in diferents class levels to be used as root of +#' the diferent augmenting trees +#' @return A \code{\link{bnc_dag}} object. +#' +#' @references Friedman N, Geiger D and Goldszmidt M (1997). Bayesian network +#' classifiers. \emph{Machine Learning}, \bold{29}, pp. 131--163. +#' @examples +#' data(car) +#' ll <- multinet_cl('class', car) +#' ll <- multinet_cl('class', car, score = 'loglik') +#' \dontrun{plot(ll)} +#' aic <- multinet_cl('class', car, score = 'aic') +#' bic <- multinet_cl('class', car, score = 'bic') +NULL + #' Learn the parameters of a Bayesian network structure. #' #' Learn parameters with maximum likelihood or Bayesian estimation, the diff --git a/R/bnc-dag-operate.R b/R/bnc-dag-operate.R index 75dd27e..7f81c93 100644 --- a/R/bnc-dag-operate.R +++ b/R/bnc-dag-operate.R @@ -42,6 +42,20 @@ plot.bnc_dag <- function(x, y, layoutType='dot', fontsize = NULL, ...) { l <- Rgraphviz::layoutGraph(g, layoutType = layoutType) Rgraphviz::renderGraph(l, graph.pars = list(nodes = node_pars)) } + +#' plot a network +#' @export +#' @keywords internal +plot.bnc_base <- function(x, y, layoutType='dot', fontsize = NULL, ...) { + is_bnc_dag <- inherits(x, "bnc_dag") + if(is_bnc_dag){ + plot.bnc_dag(x,y) + } + else{ + print("An ensemble of Bayesian network classifiers cannot be plotted") + } +} + #' Print basic information about a classifier. #' @export #' @keywords internal @@ -69,6 +83,8 @@ print.bnc_base <- function(x, ...) { cat(" learning algorithm: ", as.character(x$.call_struct[[1]]), "\n") } } + + #' @export #' @describeIn inspect_bnc_dag Returns TRUE if \code{x} is a semi-naive Bayes. is_semi_naive <- function(x) { diff --git a/R/bncs.R b/R/bncs.R index d7c6f92..97fc93b 100644 --- a/R/bncs.R +++ b/R/bncs.R @@ -18,7 +18,7 @@ bnc_aode_bns <- function(x, fit_models) { } #' Returns a \code{c("bnc_multinet", "bnc")} object. #' @keywords internal -bnc_multinet_tan <- function(class, dataset, features) { +bnc_multinet_tan <- function(class, dataset, features,scores) { if (!is.null(dataset)) { features <- get_features(class = class, dataset = dataset) } @@ -29,7 +29,7 @@ bnc_multinet_tan <- function(class, dataset, features) { datasets <- split(dataset, dataset[[class]]) models <- vector("list") for (i in levels(dataset[[class]])){ - models[[i]]<-tan_cl("class", datasets[[i]])} + models[[i]]<-tan_cl("class", datasets[[i]],scores)} stopifnot(length(models) > 0) stopifnot(all(vapply(models, is_ode, FUN.VALUE = logical(1)))) bnc <- bnc_base(class = class, features = features) diff --git a/R/infer.R b/R/infer.R index 7af8276..37d3f3b 100644 --- a/R/infer.R +++ b/R/infer.R @@ -83,7 +83,6 @@ compute_log_joint_complete.bnc_multinet<-function(x, dataset){ for (i in classes){ m[, i]<-compute_log_joint_complete(models(x)[[i]], dataset)[,i] } - browser() prior <- rep(multinet_apriori(x), each = nrow(m)) m * prior } diff --git a/R/learn-params.R b/R/learn-params.R index dd14d1e..092acd4 100644 --- a/R/learn-params.R +++ b/R/learn-params.R @@ -52,9 +52,10 @@ lp_implement.bnc_aode <- function(x, dataset, smooth, awnb_trees = NULL, lp_implement.bnc_multinet <- function(x, dataset, smooth, awnb_trees = NULL, awnb_bootstrap = NULL, manb_prior = NULL, wanbia = NULL, .mem_cpts=NULL, ...) { # TODO: we need to specify the class as parameter of the function - datasets <- split(dataset, dataset[["class"]]) + class=class_var(x) + datasets <- split(dataset, dataset[[class]]) models <- vector("list") - for (i in levels(dataset[["class"]])){ + for (i in levels(dataset[[class]])){ models[[i]]<-lp_implement(models(x)[[i]], datasets[[i]], smooth)} apriori <- extract_cpt(class_var(x), dataset = dataset, smooth = smooth) bnc_multinet_bns(x, models, apriori) diff --git a/R/learn-struct.R b/R/learn-struct.R index ee7cbbd..a3a3684 100644 --- a/R/learn-struct.R +++ b/R/learn-struct.R @@ -115,9 +115,9 @@ aode <- function(class, dataset, features = NULL) { #' #' @export #' @inheritParams nb -#' @return A \code{bnc_multinet_tan} or a \code{bnc_dag} (if returning a naive Bayes) -multinet_tan <- function(class, dataset, features=NULL) { - x <- bnc_multinet_tan(class=class, dataset=dataset, features=features) - add_dag_call_arg(x, fun_name = 'bnc_multinet_tan', call = match.call(), +#' @return A \code{multinet_cl} or a \code{bnc_dag} (if returning a naive Bayes) +multinet_cl <- function(class, dataset, features=NULL, score='loglik') { + x <- bnc_multinet_tan(class=class, dataset=dataset, features=features, score=score) + add_dag_call_arg(x, fun_name = 'multinet_cl', call = match.call(), env = parent.frame(), force = TRUE) } \ No newline at end of file diff --git a/man/bnc_multinet_bns.Rd b/man/bnc_multinet_bns.Rd index 8cab5fc..fbde0c9 100644 --- a/man/bnc_multinet_bns.Rd +++ b/man/bnc_multinet_bns.Rd @@ -4,7 +4,7 @@ \alias{bnc_multinet_bns} \title{Fits an multinet model.} \usage{ -bnc_multinet_bns(x, fit_models) +bnc_multinet_bns(x, fit_models, apriori) } \description{ Fits an multinet model. diff --git a/man/bnc_multinet_tan.Rd b/man/bnc_multinet_tan.Rd index dc2f97e..2fe359b 100644 --- a/man/bnc_multinet_tan.Rd +++ b/man/bnc_multinet_tan.Rd @@ -4,7 +4,7 @@ \alias{bnc_multinet_tan} \title{Returns a \code{c("bnc_multinet", "bnc")} object.} \usage{ -bnc_multinet_tan(class, dataset, features) +bnc_multinet_tan(class, dataset, features, scores) } \description{ Returns a \code{c("bnc_multinet", "bnc")} object. diff --git a/man/bnclassify.Rd b/man/bnclassify.Rd index beae43b..8e2c62f 100644 --- a/man/bnclassify.Rd +++ b/man/bnclassify.Rd @@ -22,6 +22,7 @@ Structure learning algorithms: \item \code{\link{tan_hc}}: Hill-climbing tree augmented naive Bayes (TAN-HC) (Keogh and Pazzani, 2002) \item \code{\link{tan_hcsp}}: Hill-climbing super-parent tree augmented naive Bayes (TAN-HCSP) (Keogh and Pazzani, 2002) \item \code{\link{aode}}: Averaged one-dependence estimators (AODE) (Webb et al., 2005) +\item \code {\link{multinet_tan}}: create a multinet using a Chow-Liu's algorithm (multinet_tan) (Friedman et al., 1997) } Parameter learning methods (\code{\link{lp}}): diff --git a/man/car.Rd b/man/car.Rd index 81ca615..8af9fe2 100644 --- a/man/car.Rd +++ b/man/car.Rd @@ -4,7 +4,9 @@ \name{car} \alias{car} \title{Car Evaluation Data Set.} -\format{A \code{data.frame} with 7 columns and 1728 rows.} +\format{ +A \code{data.frame} with 7 columns and 1728 rows. +} \source{ \url{http://goo.gl/GTXrCz} } diff --git a/man/is_ensemble.Rd b/man/is_ensemble.Rd new file mode 100644 index 0000000..cb51240 --- /dev/null +++ b/man/is_ensemble.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bncs.R +\name{is_ensemble} +\alias{is_ensemble} +\title{Is it an ensemble?} +\usage{ +is_ensemble(x) +} +\description{ +Is it an ensemble? +} +\keyword{internal} diff --git a/man/multinet_apriori.Rd b/man/multinet_apriori.Rd new file mode 100644 index 0000000..4227953 --- /dev/null +++ b/man/multinet_apriori.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bncs.R +\name{multinet_apriori} +\alias{multinet_apriori} +\title{Return a priori class probabilities} +\usage{ +multinet_apriori(x) +} +\description{ +Return a priori class probabilities +} +\keyword{internal} diff --git a/man/multinet_cl.Rd b/man/multinet_cl.Rd new file mode 100644 index 0000000..e6160fd --- /dev/null +++ b/man/multinet_cl.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/0bnclassify-doc.R, R/learn-struct.R +\name{multinet_cl} +\alias{multinet_cl} +\title{Create an ensemble of Bayesian network using a Chow-Liu's algorithm.} +\usage{ +multinet_cl(class, dataset, features = NULL, score = "loglik") +} +\arguments{ +\item{class}{A character. Name of the class variable.} + +\item{dataset}{The data frame from which to learn the classifier.} + +\item{features}{A character vector. The names of the features. This argument +is ignored if \code{dataset} is provided.} + +\item{root}{the class column of the dataset. The class column will +divide in diferents class levels to be used as root of +the diferent augmenting trees} +} +\value{ +A \code{\link{bnc_dag}} object. + +A \code{multinet_cl} or a \code{bnc_dag} (if returning a naive Bayes) +} +\description{ +create an ensemble of Bayesian network using a Chow-Liu's algorithm, by +maximizing either log-likelihood, the AIC or BIC scores; maximizing +log-likelihood corresponds to the well-known tree augmented naive Bayes +(Friedman et al., 1997). + +If there is a single predictor then returns a naive Bayes. +} +\examples{ +data(car) +ll <- multinet_cl('class', car) +ll <- multinet_cl('class', car, score = 'loglik') +\dontrun{plot(ll)} +aic <- multinet_cl('class', car, score = 'aic') +bic <- multinet_cl('class', car, score = 'bic') +} +\references{ +Friedman N, Geiger D and Goldszmidt M (1997). Bayesian network + classifiers. \emph{Machine Learning}, \bold{29}, pp. 131--163. +} diff --git a/man/multinet_tan.Rd b/man/multinet_tan.Rd deleted file mode 100644 index f8a1329..0000000 --- a/man/multinet_tan.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/learn-struct.R -\name{multinet_tan} -\alias{multinet_tan} -\title{Learn an TAN ensemble.} -\usage{ -multinet_tan(class, dataset, features = NULL) -} -\arguments{ -\item{class}{A character. Name of the class variable.} - -\item{dataset}{The data frame from which to learn the classifier.} - -\item{features}{A character vector. The names of the features. This argument -is ignored if \code{dataset} is provided.} -} -\value{ -A \code{bnc_multinet_tan} or a \code{bnc_dag} (if returning a naive Bayes) -} -\description{ -If there is a single predictor then returns a naive Bayes. -} diff --git a/man/plot.bnc_base.Rd b/man/plot.bnc_base.Rd new file mode 100644 index 0000000..1b299b8 --- /dev/null +++ b/man/plot.bnc_base.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bnc-dag-operate.R +\name{plot.bnc_base} +\alias{plot.bnc_base} +\title{plot a network} +\usage{ +\method{plot}{bnc_base}(x, y, layoutType = "dot", fontsize = NULL, ...) +} +\description{ +plot a network +} +\keyword{internal} diff --git a/man/voting.Rd b/man/voting.Rd index ba05523..96a6297 100644 --- a/man/voting.Rd +++ b/man/voting.Rd @@ -4,7 +4,9 @@ \name{voting} \alias{voting} \title{Congress Voting Data Set.} -\format{A \code{data.frame} with 17 columns and 435 rows.} +\format{ +A \code{data.frame} with 17 columns and 435 rows. +} \source{ \url{http://goo.gl/GTXrCz} } diff --git a/tests/testthat/test-multinet.R b/tests/testthat/test-multinet.R index 704e769..d467361 100644 --- a/tests/testthat/test-multinet.R +++ b/tests/testthat/test-multinet.R @@ -1,13 +1,13 @@ context("inference") test_that("plot", { - bn <- multinet_tan(class = 'class', dataset = car) + bn <- multinet_cl(class = 'class', dataset = car) # Should be a message instead of an error plot(bn) }) test_that("Predict", { - bn <- multinet_tan(class = 'class', dataset = car) + bn <- multinet_cl(class = 'class', dataset = car) bn <- lp(bn, car, smooth = 0.1) a <- compute_cp(x = bn, car) }) @@ -15,4 +15,4 @@ test_that("Predict", { test_that("bnc function", { nb <- bnc() plot(nb) -}) \ No newline at end of file +})