Skip to content

Commit

Permalink
Merge pull request #74 from fabrice-rossi/utf8
Browse files Browse the repository at this point in the history
Add UTF-8 support to draw functions.
Closes #66
  • Loading branch information
fabrice-rossi committed Mar 23, 2024
2 parents 23e4950 + 936495d commit 8d94060
Show file tree
Hide file tree
Showing 37 changed files with 2,002 additions and 736 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ LazyData: true
Imports:
assertthat,
butcher,
cli,
ggplot2,
methods,
nnet,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ S3method(trim,vlmc_cpp)
export(as_covlmc)
export(as_sequence)
export(as_vlmc)
export(charset_ascii)
export(charset_utf8)
export(children)
export(context_number)
export(contexts)
Expand Down
21 changes: 15 additions & 6 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# mixvlmc (development version)
## Breaking changes
* `draw()` has now a second parameter `format` with no default. This may break
codes that used the fact that the `control` parameter was previously the
second one.
* the `digits` parameter of `draw.covlmc()` has been removed and replaced by a
similar parameter in `draw_control()`
* defaults for `draw.covlmc()` have changed (issue #66):
* `p_value` is now `FALSE` by default
* when `p_value` is `TRUE` the p-values are represented between separators
Expand All @@ -14,18 +19,16 @@
`time_sep` parameter has been removed from `draw.covlmc()`
* the new parameter `constant_as_prob` is set to `TRUE` which modifies also
the default display
* the `digits` parameter of `draw.covlmc()` has been removed and replaced by a
similar parameter in `draw_control()`
* `draw()` has now a second parameter `format` with no default. This may break
codes that used the fact that the `control` parameter was previously the
second one.
* the interface of `draw_control()` has been significantly changed (issue #66)
which should break most customisation code at the level of characters used
by `draw()` to display a model

## New features
### Model representation (with `draw()`)
A major change of `draw()` is the support of multiple output formats. This is
done via a `format` parameter. It supports currently:

* the format of previous versions of `mixvlmc` with `format="ascii"`. This is
* the format of previous versions of `mixvlmc` with `format="text"`. This is
the default text based representation.
* a new LaTeX export with `format="latex"` based on the LaTeX package `forest`
(<https://ctan.org/pkg/forest>) as per issue #66. This can be used to include
Expand All @@ -34,6 +37,12 @@ done via a `format` parameter. It supports currently:
In addition, text based model representation has been improved (as per
issue #66) as follows:

* a new global option `mixvlmc.charset` can be used to select the characters
used by `draw()` when `format="text"` between pure ASCII and UTF-8
* when possible, the text based representation will default to UTF-8 symbols to
provide a cleaner display of the context tree
* full customisation of the character set has moved from `draw_control()` to
`charset_ascii()` and `charset_utf8()`
* the `draw.covlmc()` function uses arguably better default parameters
(described above)
* when a logistic model does not use the covariates, it is now represented
Expand Down
75 changes: 47 additions & 28 deletions R/covlmc_draw.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,16 @@ draw_covlmc_model <- function(coefficients, p_value, hsize, names, lev,
if (collapsed) {
stringr::str_c(p_value_str, coeffs, sep = " ")
} else {
stringr::str_c(p_value_str, "[", coeffs, "]", sep = " ")
stringr::str_c(p_value_str, control$open_model, coeffs,
control$close_model,
sep = " "
)
}
} else {
if (collapsed) {
coeffs
} else {
stringr::str_c("[", coeffs, "]", sep = " ")
stringr::str_c(control$open_model, coeffs, control$close_model, sep = " ")
}
}
} else {
Expand All @@ -66,16 +69,16 @@ draw_covlmc_model <- function(coefficients, p_value, hsize, names, lev,
control$close_p_value,
sep = ""
)
pad <- stringr::str_pad("", stringr::str_length(p_value_str) + 2)
coeffs[1] <- stringr::str_c(p_value_str, "[", coeffs[1], sep = " ")
pad <- stringr::str_pad("", cli::utf8_nchar(p_value_str, "width") + 2)
coeffs[1] <- stringr::str_c(p_value_str, control$open_model, coeffs[1], sep = " ")
} else {
coeffs[1] <- stringr::str_c("[", coeffs[1], sep = " ")
coeffs[1] <- stringr::str_c(control$open_model, coeffs[1], sep = " ")
pad <- " "
}
for (k in 2:length(coeffs)) {
coeffs[k] <- stringr::str_c(pad, coeffs[k], sep = " ")
}
coeffs[length(coeffs)] <- stringr::str_c(coeffs[length(coeffs)], "]", sep = " ")
coeffs[length(coeffs)] <- stringr::str_c(coeffs[length(coeffs)], control$close_model, sep = " ")
coeffs
}
} else {
Expand All @@ -100,10 +103,12 @@ rec_draw_covlmc <- function(label, prefix, ct, vals, control, node2txt) {
c_symbol <- control$first_node
idx <- 1
nst <- nb_sub_tree(ct)
nb_nodes <- sum(sapply(ct$children, \(x) length(x) > 0))
if (is.null(ct[["merged_model"]])) {
active_children <- seq_along(ct$children)
} else {
active_children <- setdiff(seq_along(ct$children), ct$merged)
nb_nodes <- nb_nodes + 1
}
for (v in active_children) {
child <- ct$children[[v]]
Expand All @@ -112,24 +117,28 @@ rec_draw_covlmc <- function(label, prefix, ct, vals, control, node2txt) {
if (idx < nst) {
c_prefix <- control$vbranch
} else {
c_prefix <- stringr::str_pad("", stringr::str_length(control$vbranch))
c_prefix <- stringr::str_pad("", cli::utf8_nchar(control$vbranch, "width"))
}
c_prefix <- stringr::str_pad(c_prefix, stringr::str_length(c_prelabel), side = "right")
c_prefix <- utf8_pad(c_prefix, cli::utf8_nchar(c_prelabel, "width"), "right")
## recursive call
rec_draw_covlmc(
stringr::str_c(prefix, c_prelabel, vals[v]),
stringr::str_c(prefix, c_prefix), child, vals, control, node2txt
)
## prepare for next child
c_symbol <- control$next_node
idx <- idx + 1
if (idx == nb_nodes) {
c_symbol <- control$final_node
} else {
c_symbol <- control$next_node
}
}
}
if (!is.null(ct[["merged_model"]])) {
the_merged_vals <- stringr::str_c(vals[ct$merged], collapse = ", ")
c_prelabel <- stringr::str_c(c_symbol, control$hbranch, " ")
c_prefix <- stringr::str_pad("", stringr::str_length(control$vbranch))
c_prefix <- stringr::str_pad(c_prefix, stringr::str_length(c_prelabel), side = "right")
c_prelabel <- stringr::str_c(control$final_node, control$hbranch, " ")
c_prefix <- stringr::str_pad("", cli::utf8_nchar(control$vbranch, "width"))
c_prefix <- utf8_pad(c_prefix, cli::utf8_nchar(c_prelabel, "width"), "right")
c_label <- stringr::str_c(prefix, c_prelabel, the_merged_vals)
c_prefix <- stringr::str_c(prefix, c_prefix)
cat(c_label)
Expand All @@ -155,9 +164,12 @@ covlmc_node2txt <- function(node, vals, control) {
model_levels <- vals
}
}
var_names <- node$model$var_names
## intercept
var_names[1] <- control$intercept
draw_covlmc_model(
node$model$coefficients, node$model$p_value, node$model$hsize,
node$model$var_names, model_levels, control,
var_names, model_levels, control,
node$model$model
)
} else if (!is.null(node$p_value) && isTRUE(control$p_value)) {
Expand All @@ -182,7 +194,7 @@ covlmc_node2txt <- function(node, vals, control) {
#'
#' Contrarily to [draw()] functions adapted to context trees [draw.ctx_tree()]
#' and VLMC [draw.vlmc()], the present function does not try to produce similar
#' results for the `"ascii"` format and the `"latex"` format as the `"ascii"`
#' results for the `"text"` format and the `"latex"` format as the `"text"`
#' format is intrinsically more limited in terms of model representations. This
#' is detailed below.
#'
Expand All @@ -197,16 +209,15 @@ covlmc_node2txt <- function(node, vals, control) {
#' @param with_state specifies whether to display the state associated to each
#' dimension of the logistic model (see details).
#' @param constant_as_prob specifies how to represent constant logistic models
#' for `format="ascii"` (defaults to `TRUE`, see details). Disregarded when
#' for `format="text"` (defaults to `TRUE`, see details). Disregarded when
#' `format="latex"`.
#'
#' @inheritParams draw
#' @section Format:
#'
#' The `format` parameter specifies the format used for the textual output.
#' With the default value `ascii` the output is produced in "ascii art" using
#' by default only ascii characters (notice that `draw_control()` can be used
#' to specified non ascii characters, but this is discouraged).
#' With the default value `"text"` the output is produced in "ascii art" using
#' the charset specified by the global option `mixvlmc.charset`.
#'
#' With the `latex` value, the output is produced in LaTeX, leveraging the
#' [forest](https://ctan.org/pkg/forest) Latex package (see
Expand All @@ -216,10 +227,10 @@ covlmc_node2txt <- function(node, vals, control) {
#' The LaTeX output is sanitized to avoid potential problems induced by
#' special characters in the names of the states of the context tree.
#'
#' @section `"ascii"` format:
#' @section `"text"` format:
#' ## Parameters
#'
#' When `format="ascii"` the parameters are interpreted as follows:
#' When `format="text"` the parameters are interpreted as follows:
#'
#' - `model`: the default `model="coef"` represents only the *coefficients*
#' of the logistic models associated to each context. `model="full"` includes
Expand All @@ -233,7 +244,10 @@ covlmc_node2txt <- function(node, vals, control) {
#' `constant_as_prob=TRUE`) or as normal models (when set to `FALSE`). This is
#' not taken into account when `model` is not set to `"coef"`.
#'
#' - fields of the `control` list:
#' - fields of the `control` list (including the charset):
#'
#' - `intercept` : character(s) used to represent the intercept when
#' `model="full"`
#'
#' - `intercept_sep`: character(s) used to separate the intercept from
#' the other coefficients in model representation.
Expand All @@ -248,6 +262,9 @@ covlmc_node2txt <- function(node, vals, control) {
#' - `open_p_value` and `close_p_value`: delimiters used around the p-values
#' when `p_value=TRUE`
#'
#' - `open_model` and `close_model`: delimiters around the model when `model`
#' is not `NULL`
#'
#' ## State representation
#'
#' When `model` is not `NULL`, the coefficients of the logistic models are
Expand Down Expand Up @@ -306,9 +323,11 @@ covlmc_node2txt <- function(node, vals, control) {
#' When the representation includes the names of the variables used by the
#' logistic models, they are the one generated by the underlying logistic model,
#' e.g. [stats::glm()]. Numerical variable names are used as is, while factors
#' have levels appended. The intercept is denoted `(I)` to save space.
#' have levels appended. The intercept is denoted by the `intercept` member
#' of the `control` list when`format="text"` (as part of the charset). It is
#' always represented by `(I)` when `format="latex"`.
#'
#' When `format="ascii"`, the time delays are represented by an underscore
#' When `format="text"`, the time delays are represented by an underscore
#' followed by the time delay. For instance if the model uses the numerical
#' covariate `y` with two delays, it will appear with two variables `y_1` and
#' `y_2`.
Expand All @@ -326,8 +345,8 @@ covlmc_node2txt <- function(node, vals, control) {
#' draw(m_cov, control = draw_control(digits = 3))
#' draw(m_cov, model = NULL)
#' draw(m_cov, p_value = TRUE)
#' draw(m_cov, p_value = FALSE, control = draw_control(time_sep = " ^ "))
#' draw(m_cov, model = "full", control = draw_control(time_sep = " ^ "))
#' draw(m_cov, p_value = FALSE, control = draw_control(digits = 2))
#' draw(m_cov, model = "full", control = draw_control(digits = 3))
#' draw(m_cov, format = "latex", control = draw_control(orientation = "h"))
#' @export
draw.covlmc <- function(ct, format,
Expand All @@ -336,9 +355,9 @@ draw.covlmc <- function(ct, format,
constant_as_prob = TRUE,
...) {
if (rlang::is_missing(format)) {
format <- "ascii"
format <- "text"
} else {
format <- match.arg(format, c("ascii", "latex"))
format <- match.arg(format, c("text", "latex"))
}
if (is.null(model)) {
model <- "none"
Expand All @@ -347,7 +366,7 @@ draw.covlmc <- function(ct, format,
}
dot_params <- list(...)
dot_params$with_state <- with_state
if (format == "ascii") {
if (format == "text") {
rec_draw_covlmc(
control$root, "", ct, ct$vals,
c(control, list(
Expand Down
4 changes: 2 additions & 2 deletions R/covlmc_metrics.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ print.metrics.covlmc <- function(x, ...) {
cat(paste(" Confusion matrix:", "\n"))
pcm <- pp_mat(x$conf_mat, colnames = colnames(x$conf_mat))
rn <- rownames(x$conf_mat)
l_rn <- max(stringr::str_length(rn))
rn <- stringr::str_pad(c("", rn), l_rn, side = "right")
l_rn <- max(cli::utf8_nchar(rn, "width"))
rn <- utf8_pad(c("", rn), l_rn, "right")
for (k in seq_along(rn)) {
cat(paste(" ", rn[k], " ", pcm[k], sep = ""), "\n")
}
Expand Down
21 changes: 11 additions & 10 deletions R/ctx_tree_cpp_draw.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ rec_draw_cpp <- function(label, prefix, tree, ct, vals, control, node2txt) {
if (nst > 1) {
c_symbol <- control$first_node
} else {
c_symbol <- control$next_node
c_symbol <- control$final_node
}
idx <- 1
for (v in seq_along(ct$children)) {
Expand All @@ -23,21 +23,22 @@ rec_draw_cpp <- function(label, prefix, tree, ct, vals, control, node2txt) {
if (idx < nst) {
c_prefix <- control$vbranch
} else {
c_prefix <- stringr::str_pad("", stringr::str_length(control$vbranch))
c_prefix <- stringr::str_pad("", cli::utf8_nchar(control$vbranch, "width"))
}
c_prefix <-
stringr::str_pad(c_prefix, stringr::str_length(c_prelabel),
side = "right"
)
c_prefix <- utf8_pad(c_prefix, cli::utf8_nchar(c_prelabel, "width"), "right")
## recursive call
rec_draw_cpp(
stringr::str_c(prefix, c_prelabel, vals[v]),
stringr::str_c(prefix, c_prefix),
tree, child, vals, control, node2txt
)
## prepare for next child
c_symbol <- control$next_node
idx <- idx + 1
if (idx == nst) {
c_symbol <- control$final_node
} else {
c_symbol <- control$next_node
}
}
}
}
Expand All @@ -49,13 +50,13 @@ rec_draw_cpp <- function(label, prefix, tree, ct, vals, control, node2txt) {
draw.ctx_tree_cpp <- function(ct, format, control = draw_control(),
frequency = NULL, ...) {
if (rlang::is_missing(format)) {
format <- "ascii"
format <- "text"
} else {
format <- match.arg(format, c("ascii", "latex"))
format <- match.arg(format, c("text", "latex"))
}
restore_model(ct)
ct_r <- ct$root$representation()
if (format == "ascii") {
if (format == "text") {
if (is.null(frequency)) {
rec_draw_cpp(
control$root, "", ct_r, ct_r[[1]], ct$vals,
Expand Down
Loading

0 comments on commit 8d94060

Please sign in to comment.