Merge pull request #29 from UCLATALL/feature/mixed-models

Add support for mixed models
UCLATALL · Nov 5, 2019 · 003838e · 003838e
2 parents cfee237 + b55ffd2
commit 003838e
Show file tree

Hide file tree

Showing 12 changed files with 382 additions and 321 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: supernova
 Type: Package
 Title: Judd, McClelland, & Ryan Formatting for ANOVA Output
-Version: 2.2.0
-Date: 2019-09-20
+Version: 2.2.1
+Date: 2019-11-05
 Authors@R: c(person("Adam", "Blake", email = "theadamattack@gmail.com", role = "aut"),
              person("Jeff", "Chrabaszcz", role = "aut"),
              person("Ji", "Son", email = "json2@calstatela.edu", role = "aut"),

diff --git a/R/supernova.R b/R/supernova.R
@@ -123,9 +123,17 @@ supernova.lmerMod <- function(fit, type = 3, verbose = FALSE) {
          effects (e.g. repeated measures models).")
   }
 
+  if (verbose) {
+    warning("There is currently no verbose version of the supernova table for
+         lmer() models. Switching to non-verbose.")
+    verbose <- FALSE
+  }
+
   model_full <- fit
   model_data <- model_full@frame
   vars_all <- supernova::variables(model_full)
+  vars_within_simple <- grep("[:]", vars_all[["within"]], value = TRUE, invert = TRUE)
+  vars_between_simple <- grep("[:]", vars_all[["between"]], value = TRUE, invert = TRUE)
 
   # get formula with no random terms
   formula_complex <- formula(model_full)
@@ -144,57 +152,96 @@ supernova.lmerMod <- function(fit, type = 3, verbose = FALSE) {
   # TREATMENT ROWS
   anova_lm <- anova_tbl(model_lm) %>% dplyr::select(-dplyr::one_of("F"))
   anova_lmer <- anova_tbl(model_full) %>% dplyr::select(dplyr::one_of(c("term", "F")))
-  treatment_rows <- dplyr::right_join(anova_lm, anova_lmer, by = "term")
+  partial_rows <- dplyr::right_join(anova_lm, anova_lmer, by = "term")
+
+  # TREATMENT WITHIN
+  partial_within <- partial_rows[partial_rows[["term"]] %in% vars_all[["within"]],]
+  treatment_within <- if (length(vars_within_simple) == 0) {
+    # No within vars
+
+    data.frame()
+  } else if (length(vars_within_simple) == 1) {
+    # A single within var, only need one error term
+    df_error_within <- df_total_within - sum(partial_within[["df"]])
+    partial_within_error <- data.frame(
+      term = "Error within subjects",
+      df = df_error_within,
+      MS = partial_within[["MS"]][[1]] / partial_within[["F"]][[1]],
+      stringsAsFactors = FALSE
+    )
+    partial_within_error[["SS"]] <-
+      partial_within_error[["MS"]] * partial_within_error[["df"]]
+    partial_within[["PRE"]] <-
+      partial_within[["SS"]] / (partial_within_error[["SS"]] + partial_within[["SS"]])
+    partial_within[["p"]] <- pf(partial_within[["F"]], partial_within[["df"]],
+                                df_error_within, lower.tail = FALSE)
+
+    dplyr::bind_rows(partial_within, partial_within_error)
+  } else {
+    # Multiple within vars, need to compute separate error terms
+    df_error_within <- partial_within[["df"]] * df_total_between
+    partial_within_error <- data.frame(
+      match = vars_all[["within"]],
+      term = paste(vars_all[["within"]], "error"),
+      df = df_error_within,
+      MS = partial_within[["MS"]] / partial_within[["F"]],
+      stringsAsFactors = FALSE
+    )
+    partial_within_error[["SS"]] <-
+      partial_within_error[["MS"]] * partial_within_error[["df"]]
 
-  get_partials <- function(treatment_rows, type) {
-    treatment <- treatment_rows[treatment_rows[["term"]] %in% vars_all[[type]]]
-    if (ncol(treatment) == 0) {
-      error <- data.frame(match = character(0))
-    } else {
-      part_total_df <- if (type == "within") df_total_within else df_total_between
-      error_df <- if (type == "between") {
-        part_total_df - sum(treatment[["df"]])
-      } else {
-        treatment[["df"]] * df_total_between
-      }
-      error <- data.frame(
-        match = vars_all[[type]],
-        term = paste(vars_all[[type]], "error"),
-        df = error_df,
-        MS = treatment[["MS"]] / treatment[["F"]],
-        stringsAsFactors = FALSE
-      )
-      error[["SS"]] <- error[["MS"]] * error[["df"]]
-    }
-    purrr::map_dfr(vars_all[[type]], function(x) {
+    purrr::map_dfr(vars_all[["within"]], function(x) {
       part <- dplyr::bind_rows(
-        dplyr::filter_at(treatment, dplyr::vars("term"), ~ . == x),
-        dplyr::filter_at(error, dplyr::vars("match"), ~ . == x)
-      ) %>%
-        dplyr::select(-dplyr::one_of("match"))
+        dplyr::filter_at(partial_within, dplyr::vars("term"), ~ . == x),
+        dplyr::filter_at(partial_within_error, dplyr::vars("match"), ~ . == x)
+      ) %>% dplyr::select(-dplyr::one_of("match"))
 
       part[, c("PRE", "p")] <- NA_real_
       part[["PRE"]][[1]] <- part[["SS"]][[1]] / sum(part[["SS"]])
-      part[["p"]][[1]] <- pf(
-        part[["F"]][[1]],
-        part[["df"]][[1]],
-        part[["df"]][[2]],
-        lower.tail = FALSE
-      )
+      part[["p"]][[1]] <- pf(part[["F"]][[1]], part[["df"]][[1]], part[["df"]][[2]],
+                             lower.tail = FALSE)
       dplyr::select(part, dplyr::one_of("term", "SS", "df", "MS", "F", "PRE", "p"))
     })
   }
+
+  # TREATMENT BETWEEN
+  partial_between <- partial_rows[partial_rows[["term"]] %in% vars_all[["between"]],]
+  treatment_between <- if (length(vars_between_simple) == 0) {
+    # No between vars
+
+    data.frame()
+  } else {
+    # Between vars never need separate error terms
+    df_error_between <- df_total_between - sum(partial_between[["df"]])
+    partial_between_error <- data.frame(
+      term = "Error between subjects",
+      df = df_error_between,
+      MS = partial_between[["MS"]][[1]] / partial_between[["F"]][[1]],
+      stringsAsFactors = FALSE
+    )
+    partial_between_error[["SS"]] <-
+      partial_between_error[["MS"]] * partial_between_error[["df"]]
+    partial_between[["PRE"]] <-
+      partial_between[["SS"]] / (partial_between_error[["SS"]] + partial_between[["SS"]])
+    partial_between[["p"]] <- pf(partial_between[["F"]], partial_between[["df"]],
+                                 df_error_between, lower.tail = FALSE)
+
+    dplyr::bind_rows(partial_between, partial_between_error)
+  }
+
   partials <- list(
-    within = get_partials(treatment_rows, "within"),
-    between = get_partials(treatment_rows, "between")
+    within = treatment_within,
+    between = treatment_between
   )
 
   # PART TOTALS
   get_partial_total_ss <- function(partials, type) {
     other_type <- setdiff(c("between", "within"), type)
     if (length(vars_all[[type]]) == 0) {
+      # none of this type of variable, have to infer total
       total[["SS"]] - sum(partials[[other_type]][["SS"]])
     } else {
+      # total is the explicit sum of the partials
       sum(partials[[type]][["SS"]])
     }
   }
@@ -235,16 +282,10 @@ superanova <- supernova
 
 #' @export
 print.supernova <- function(x, pcut = 4, ...) {
-  verbose <- attr(x, "verbose")
+  is_verbose <- attr(x, "verbose") == TRUE
   is_lmer_model <- "lmerMod" %in% class(x$fit)
   is_null_model <- length(variables(x$fit)$predictor) == 0
 
-  if (is_lmer_model & verbose) {
-    warning("There is currently no verbose version of the supernova table for
-         lmer() models. Switching to non-verbose.")
-    attr(x, "verbose") <- FALSE
-  }
-
   # setup
   tbl <- x$tbl
 
@@ -277,8 +318,6 @@ print.supernova <- function(x, pcut = 4, ...) {
     tbl <- insert_row(tbl, 1, c("Between Subjects", rep("", 6)))
     tbl <- insert_row(tbl, grep("^Total between subjects", tbl$term) + 1,
                       c("Within Subjects", rep("", 6)))
-    tbl[tbl$term == "Total between subjects", ][["term"]] <- "  Total"
-    tbl[tbl$term == "Total within subjects", ][["term"]] <- "  Total"
 
     pred_terms <- variables(x$fit)$predictor
     error_terms <- paste(pred_terms, "error")
@@ -289,21 +328,23 @@ print.supernova <- function(x, pcut = 4, ...) {
       tbl[["term"]], paste0("(", paste0(pred_terms, collapse = "|"), ")"), "  \\1"
     )
     tbl <- insert_rule(tbl, 1)
-    tbl <- insert_rule(tbl, grep("^  Total", tbl$term)[[1]] + 1)
-    tbl <- insert_rule(tbl, grep("^  Total", tbl$term)[[2]] + 1)
+    tbl <- insert_rule(tbl, grep("Total between subjects", tbl$term) + 1)
+    tbl <- insert_rule(tbl, grep("Total within subjects", tbl$term) + 1)
+    tbl[["term"]] <- tbl[["term"]] %>%
+      stringr::str_replace("(Total|Error) (?:between|within) subjects", "\\1")
   }
 
   # add spaces and a vertical bar to separate the terms & desc from values
   barHelp <- function(x, y) paste0(x, y, " |")
-  bar_col <- if (verbose) "description" else "term"
+  bar_col <- if (!is_lmer_model & is_verbose) "description" else "term"
   spaces_to_add <- max(nchar(tbl[[bar_col]])) - nchar(tbl[[bar_col]])
   tbl[[bar_col]] <- mapply(barHelp, tbl[[bar_col]], strrep(" ", spaces_to_add))
 
   # remove unnecessary column names
   names(tbl)[names(tbl) %in% c("term", "description")] <- ""
 
   # remove unnecessary columns
-  if (!verbose && !is_lmer_model) tbl[[2]] <- NULL
+  if (!is_verbose && !is_lmer_model) tbl[[2]] <- NULL
 
   # printing
   cat_line(" Analysis of Variance Table (Type ", attr(x, "type"), " SS)")

diff --git a/R/utils.R b/R/utils.R
@@ -97,37 +97,7 @@ pad_len <- function(x, len, after = length(x), pad = NA) {
 #' @keywords internal
 resolve_type <- function(type) {
   clean_type <- trimws(tolower(type))
-  if (clean_type %in% c("1", "i", "sequential")) return (1)
-  if (clean_type %in% c("2", "ii", "hierarchical")) return (2)
-  if (clean_type %in% c("3", "iii", "orthogonal")) return (3)
-}
-
-#' Replace and subset only matched elements
-#'
-#' Search and subset on pattern matches, then make replacements.
-#'
-#' @param x a character vector where matches are sought, or an object which can
-#'   be coerced by as.character to a character vector. \link{Long vectors} are
-#'   supported.
-#' @param pattern character string containing a \link{regular expression} (or
-#'   character string for \code{fixed = TRUE}) to be matched in the given
-#'   character vector. Coerced by \code{\link{as.character}} to a character
-#'   string if possible. If a character vector of length 2 or more is supplied,
-#'   the first element is used with a warning.  Missing values are allowed
-#'   except for \code{regexpr}, \code{gregexpr} and \code{regexec}.
-#' @param replacement a replacement for matched pattern in \code{sub} and
-#'   \code{gsub}. Coerced to character if possible. For \code{fixed = FALSE}
-#'   this can include backreferences \code{"\\1"} to \code{"\\9"} to
-#'   parenthesized subexpressions of \code{pattern}.  For \code{perl = TRUE}
-#'   only, it can also contain \code{"\\U"} or \code{"\\L"} to convert the rest
-#'   of the replacement to upper or lower case and \code{"\\E"} to end case
-#'   conversion. If a character vector of length 2 or more is supplied, the
-#'   first element is used with a warning.  If \code{NA}, all elements in the
-#'   result corresponding to matches will be set to \code{NA}.
-#'
-#' @return a character vector containing only the modified elements.
-#' @keywords internal
-sub_matches <- function(x, pattern, replacement) {
-  matches <- grep(pattern, x, value = TRUE)
-  gsub(pattern, replacement, matches)
+  if (clean_type %in% c("1", "i", "sequential")) return(1)
+  if (clean_type %in% c("2", "ii", "hierarchical")) return(2)
+  if (clean_type %in% c("3", "iii", "orthogonal")) return(3)
 }
diff --git a/R/variables.R b/R/variables.R
@@ -25,15 +25,21 @@ variables <- function(object) {
     vars_between <- vars_pred
   } else if ("lmerMod" %in% class(object)) {
     # need to determine which are within vs. between
-    data <- if (class(object) %in% "lmerMod") object@frame else object$frame
+    data <- object@frame
     nrow_group <- dplyr::distinct_at(data, dplyr::vars(vars_group)) %>% nrow()
     vars_pred_non_int <- grep("^[^:]+$", vars_pred, value = TRUE)
-    is_pred_between <- purrr::map_lgl(vars_pred_non_int, function(var) {
+    is_pred_between_simple <- purrr::map_lgl(vars_pred_non_int, function(var) {
       nrow_var <- dplyr::distinct_at(data, dplyr::vars(vars_group, var)) %>% nrow()
       nrow_var == nrow_group
     })
-    vars_between <- vars_pred[is_pred_between]
-    vars_within <- vars_pred[!is_pred_between]
+    vars_within_simple <- vars_pred_non_int[!is_pred_between_simple]
+    is_pred_within <- if (length(vars_within_simple) > 0) {
+      grepl(paste0(vars_within_simple, collapse = "|"), vars_pred)
+    } else {
+      FALSE
+    }
+    vars_within <- vars_pred[is_pred_within]
+    vars_between <- vars_pred[!is_pred_within]
   } else {
     # cannot determine which are within or between without more info
     vars_within <- character(0)
@@ -48,25 +54,3 @@ variables <- function(object) {
     between = vars_between
   )
 }
-
-# variables.lmer <- function(model) {
-#   # get formula with no random terms
-#   formula_complex <- formula(model)
-#   formula_simple <- lme4::nobars(formula_complex)
-#
-#   # determine within and between variables
-#   bare_terms <- variables.default(formula_simple)
-#   outcome_term <- bare_terms$outcome
-#   pred_terms <- bare_terms$predictor
-  # rand_terms <- gsub(
-  #   "1 ?\\| ?", "",
-  #   as.character(lme4::findbars(formula_complex))
-  # )
-#   group_term <- rand_terms[which.min(nchar(rand_terms))]
-#   within_terms <- sub_matches(rand_terms, paste0("(.*):", group_term), "\\1")
-#   if (length(within_terms) > 0) {
-#     # add within interactions
-#     within_terms <- grep(paste0(within_terms, collapse = "|"), pred_terms, value = TRUE)
-#   }
-#   between_terms <- setdiff(pred_terms, within_terms)
-# }
diff --git a/README.Rmd b/README.Rmd
@@ -9,15 +9,15 @@ knitr::opts_chunk$set(
   collapse = TRUE,
   comment = "#>",
   fig.path = "man/figures/README-",
-  out.width = "100%"
+  out.width = "80%"
 )
 
 library(supernova)
 library(mosaic)
 set.seed(123)
 ```
 
-# supernova <img src="man/figures/logo.png" width="120" align="right" />
+# supernova <img src="man/figures/logo.png" width="40%" align="right" />
 
 [![Travis build status](https://travis-ci.org/UCLATALL/supernova.svg?branch=master)](https://travis-ci.org/UCLATALL/supernova)
 [![Codecov test coverage](https://codecov.io/gh/UCLATALL/supernova/branch/master/graph/badge.svg)](https://codecov.io/gh/UCLATALL/supernova?branch=master)
@@ -41,19 +41,18 @@ In addition to the ANOVA table provided by `supernova()`, the `supernova` packag
 
 ### Supported models
 
-The following models are explicitly tested and supported by `supernova()`, _for independent samples (between-subjects) data only_. For these models, there is also support for datasets with missing or unbalanced data. 
+The following models are explicitly tested and supported by `supernova()`, _for independent samples (between-subjects) data only_. For these models, there is also support for datasets with missing or unbalanced data.
 
 * empty models: `y ~ NULL`
 * simple regression: `y ~ a`
 * multiple regression: `y ~ a + b`
 * interactive regression: `y ~ a * b`
 
-Additionally, a subset of within-subjects designs are supported. Importantly, only fully crossed (participants are observed in every condition) and simply nested models (participants have multiple scores in the same condition) have been tested. To accomodate these models `supernova()` can accept models fit via `lmer()` as in the [Examples](#examples) below.
+Additionally, a subset of within-subjects designs are supported and explicitly tested. To accomodate these models `supernova()` can accept models fit via `lmer()` as in the [Examples](#examples) below. Only models like those included in those examples have been tested for within-subjects designs.
 
 Anything not included above is not (yet) explicitly tested and may yield errors or incorrect statistics. This includes, but is not limited to
 
 * one-sample _t_-tests
-* mixed designs
 
 
 ## Installing
@@ -180,6 +179,26 @@ simple_nested %>%
   supernova()
 ```
 
+#### A three-way design with two between-groups variables and a nested variable
+
+In this example, each person in heterosexual marriage generates a rating of satisfaction. Additionally, these couples were chosen such that they either have children or not, and have been married 15 vs. 30 years. If we fit the data with `lm()` that would ignore the non-independence due to the people being in the same `couple`. Compare this output with the foloowing output where the `group` is specified as a random factor.
+
+```{r}
+complex_nested <- JMRData::ex11.22 %>%
+  tidyr::gather(sex, rating, Male, Female) %>%
+  dplyr::mutate_at(dplyr::vars(couple, children, sex, yearsmarried), as.factor)
+
+# fitting this with lm would ignore the non-independence due to group
+complex_nested %>% 
+  lm(rating ~ sex * yearsmarried * children, data = .) %>% 
+  supernova(verbose = FALSE)
+
+# using lmer() we can specify the non-independence
+complex_nested %>% 
+  lmer(rating ~ sex * yearsmarried * children + (1|couple), data = .) %>% 
+  supernova()
+```
+
 
 ### Using Different SS Types