Skip to content

Commit

Permalink
Merge pull request #29 from UCLATALL/feature/mixed-models
Browse files Browse the repository at this point in the history
Add support for mixed models
  • Loading branch information
jimstigler committed Nov 5, 2019
2 parents cfee237 + b55ffd2 commit 003838e
Show file tree
Hide file tree
Showing 12 changed files with 382 additions and 321 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: supernova
Type: Package
Title: Judd, McClelland, & Ryan Formatting for ANOVA Output
Version: 2.2.0
Date: 2019-09-20
Version: 2.2.1
Date: 2019-11-05
Authors@R: c(person("Adam", "Blake", email = "theadamattack@gmail.com", role = "aut"),
person("Jeff", "Chrabaszcz", role = "aut"),
person("Ji", "Son", email = "json2@calstatela.edu", role = "aut"),
Expand Down
135 changes: 88 additions & 47 deletions R/supernova.R
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,17 @@ supernova.lmerMod <- function(fit, type = 3, verbose = FALSE) {
effects (e.g. repeated measures models).")
}

if (verbose) {
warning("There is currently no verbose version of the supernova table for
lmer() models. Switching to non-verbose.")
verbose <- FALSE
}

model_full <- fit
model_data <- model_full@frame
vars_all <- supernova::variables(model_full)
vars_within_simple <- grep("[:]", vars_all[["within"]], value = TRUE, invert = TRUE)
vars_between_simple <- grep("[:]", vars_all[["between"]], value = TRUE, invert = TRUE)

# get formula with no random terms
formula_complex <- formula(model_full)
Expand All @@ -144,57 +152,96 @@ supernova.lmerMod <- function(fit, type = 3, verbose = FALSE) {
# TREATMENT ROWS
anova_lm <- anova_tbl(model_lm) %>% dplyr::select(-dplyr::one_of("F"))
anova_lmer <- anova_tbl(model_full) %>% dplyr::select(dplyr::one_of(c("term", "F")))
treatment_rows <- dplyr::right_join(anova_lm, anova_lmer, by = "term")
partial_rows <- dplyr::right_join(anova_lm, anova_lmer, by = "term")

# TREATMENT WITHIN
partial_within <- partial_rows[partial_rows[["term"]] %in% vars_all[["within"]],]
treatment_within <- if (length(vars_within_simple) == 0) {
# No within vars

data.frame()
} else if (length(vars_within_simple) == 1) {
# A single within var, only need one error term
df_error_within <- df_total_within - sum(partial_within[["df"]])
partial_within_error <- data.frame(
term = "Error within subjects",
df = df_error_within,
MS = partial_within[["MS"]][[1]] / partial_within[["F"]][[1]],
stringsAsFactors = FALSE
)
partial_within_error[["SS"]] <-
partial_within_error[["MS"]] * partial_within_error[["df"]]
partial_within[["PRE"]] <-
partial_within[["SS"]] / (partial_within_error[["SS"]] + partial_within[["SS"]])
partial_within[["p"]] <- pf(partial_within[["F"]], partial_within[["df"]],
df_error_within, lower.tail = FALSE)

dplyr::bind_rows(partial_within, partial_within_error)
} else {
# Multiple within vars, need to compute separate error terms
df_error_within <- partial_within[["df"]] * df_total_between
partial_within_error <- data.frame(
match = vars_all[["within"]],
term = paste(vars_all[["within"]], "error"),
df = df_error_within,
MS = partial_within[["MS"]] / partial_within[["F"]],
stringsAsFactors = FALSE
)
partial_within_error[["SS"]] <-
partial_within_error[["MS"]] * partial_within_error[["df"]]

get_partials <- function(treatment_rows, type) {
treatment <- treatment_rows[treatment_rows[["term"]] %in% vars_all[[type]]]
if (ncol(treatment) == 0) {
error <- data.frame(match = character(0))
} else {
part_total_df <- if (type == "within") df_total_within else df_total_between
error_df <- if (type == "between") {
part_total_df - sum(treatment[["df"]])
} else {
treatment[["df"]] * df_total_between
}
error <- data.frame(
match = vars_all[[type]],
term = paste(vars_all[[type]], "error"),
df = error_df,
MS = treatment[["MS"]] / treatment[["F"]],
stringsAsFactors = FALSE
)
error[["SS"]] <- error[["MS"]] * error[["df"]]
}
purrr::map_dfr(vars_all[[type]], function(x) {
purrr::map_dfr(vars_all[["within"]], function(x) {
part <- dplyr::bind_rows(
dplyr::filter_at(treatment, dplyr::vars("term"), ~ . == x),
dplyr::filter_at(error, dplyr::vars("match"), ~ . == x)
) %>%
dplyr::select(-dplyr::one_of("match"))
dplyr::filter_at(partial_within, dplyr::vars("term"), ~ . == x),
dplyr::filter_at(partial_within_error, dplyr::vars("match"), ~ . == x)
) %>% dplyr::select(-dplyr::one_of("match"))

part[, c("PRE", "p")] <- NA_real_
part[["PRE"]][[1]] <- part[["SS"]][[1]] / sum(part[["SS"]])
part[["p"]][[1]] <- pf(
part[["F"]][[1]],
part[["df"]][[1]],
part[["df"]][[2]],
lower.tail = FALSE
)
part[["p"]][[1]] <- pf(part[["F"]][[1]], part[["df"]][[1]], part[["df"]][[2]],
lower.tail = FALSE)
dplyr::select(part, dplyr::one_of("term", "SS", "df", "MS", "F", "PRE", "p"))
})
}

# TREATMENT BETWEEN
partial_between <- partial_rows[partial_rows[["term"]] %in% vars_all[["between"]],]
treatment_between <- if (length(vars_between_simple) == 0) {
# No between vars

data.frame()
} else {
# Between vars never need separate error terms
df_error_between <- df_total_between - sum(partial_between[["df"]])
partial_between_error <- data.frame(
term = "Error between subjects",
df = df_error_between,
MS = partial_between[["MS"]][[1]] / partial_between[["F"]][[1]],
stringsAsFactors = FALSE
)
partial_between_error[["SS"]] <-
partial_between_error[["MS"]] * partial_between_error[["df"]]
partial_between[["PRE"]] <-
partial_between[["SS"]] / (partial_between_error[["SS"]] + partial_between[["SS"]])
partial_between[["p"]] <- pf(partial_between[["F"]], partial_between[["df"]],
df_error_between, lower.tail = FALSE)

dplyr::bind_rows(partial_between, partial_between_error)
}

partials <- list(
within = get_partials(treatment_rows, "within"),
between = get_partials(treatment_rows, "between")
within = treatment_within,
between = treatment_between
)

# PART TOTALS
get_partial_total_ss <- function(partials, type) {
other_type <- setdiff(c("between", "within"), type)
if (length(vars_all[[type]]) == 0) {
# none of this type of variable, have to infer total
total[["SS"]] - sum(partials[[other_type]][["SS"]])
} else {
# total is the explicit sum of the partials
sum(partials[[type]][["SS"]])
}
}
Expand Down Expand Up @@ -235,16 +282,10 @@ superanova <- supernova

#' @export
print.supernova <- function(x, pcut = 4, ...) {
verbose <- attr(x, "verbose")
is_verbose <- attr(x, "verbose") == TRUE
is_lmer_model <- "lmerMod" %in% class(x$fit)
is_null_model <- length(variables(x$fit)$predictor) == 0

if (is_lmer_model & verbose) {
warning("There is currently no verbose version of the supernova table for
lmer() models. Switching to non-verbose.")
attr(x, "verbose") <- FALSE
}

# setup
tbl <- x$tbl

Expand Down Expand Up @@ -277,8 +318,6 @@ print.supernova <- function(x, pcut = 4, ...) {
tbl <- insert_row(tbl, 1, c("Between Subjects", rep("", 6)))
tbl <- insert_row(tbl, grep("^Total between subjects", tbl$term) + 1,
c("Within Subjects", rep("", 6)))
tbl[tbl$term == "Total between subjects", ][["term"]] <- " Total"
tbl[tbl$term == "Total within subjects", ][["term"]] <- " Total"

pred_terms <- variables(x$fit)$predictor
error_terms <- paste(pred_terms, "error")
Expand All @@ -289,21 +328,23 @@ print.supernova <- function(x, pcut = 4, ...) {
tbl[["term"]], paste0("(", paste0(pred_terms, collapse = "|"), ")"), " \\1"
)
tbl <- insert_rule(tbl, 1)
tbl <- insert_rule(tbl, grep("^ Total", tbl$term)[[1]] + 1)
tbl <- insert_rule(tbl, grep("^ Total", tbl$term)[[2]] + 1)
tbl <- insert_rule(tbl, grep("Total between subjects", tbl$term) + 1)
tbl <- insert_rule(tbl, grep("Total within subjects", tbl$term) + 1)
tbl[["term"]] <- tbl[["term"]] %>%
stringr::str_replace("(Total|Error) (?:between|within) subjects", "\\1")
}

# add spaces and a vertical bar to separate the terms & desc from values
barHelp <- function(x, y) paste0(x, y, " |")
bar_col <- if (verbose) "description" else "term"
bar_col <- if (!is_lmer_model & is_verbose) "description" else "term"
spaces_to_add <- max(nchar(tbl[[bar_col]])) - nchar(tbl[[bar_col]])
tbl[[bar_col]] <- mapply(barHelp, tbl[[bar_col]], strrep(" ", spaces_to_add))

# remove unnecessary column names
names(tbl)[names(tbl) %in% c("term", "description")] <- ""

# remove unnecessary columns
if (!verbose && !is_lmer_model) tbl[[2]] <- NULL
if (!is_verbose && !is_lmer_model) tbl[[2]] <- NULL

# printing
cat_line(" Analysis of Variance Table (Type ", attr(x, "type"), " SS)")
Expand Down
36 changes: 3 additions & 33 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,37 +97,7 @@ pad_len <- function(x, len, after = length(x), pad = NA) {
#' @keywords internal
resolve_type <- function(type) {
clean_type <- trimws(tolower(type))
if (clean_type %in% c("1", "i", "sequential")) return (1)
if (clean_type %in% c("2", "ii", "hierarchical")) return (2)
if (clean_type %in% c("3", "iii", "orthogonal")) return (3)
}

#' Replace and subset only matched elements
#'
#' Search and subset on pattern matches, then make replacements.
#'
#' @param x a character vector where matches are sought, or an object which can
#' be coerced by as.character to a character vector. \link{Long vectors} are
#' supported.
#' @param pattern character string containing a \link{regular expression} (or
#' character string for \code{fixed = TRUE}) to be matched in the given
#' character vector. Coerced by \code{\link{as.character}} to a character
#' string if possible. If a character vector of length 2 or more is supplied,
#' the first element is used with a warning. Missing values are allowed
#' except for \code{regexpr}, \code{gregexpr} and \code{regexec}.
#' @param replacement a replacement for matched pattern in \code{sub} and
#' \code{gsub}. Coerced to character if possible. For \code{fixed = FALSE}
#' this can include backreferences \code{"\\1"} to \code{"\\9"} to
#' parenthesized subexpressions of \code{pattern}. For \code{perl = TRUE}
#' only, it can also contain \code{"\\U"} or \code{"\\L"} to convert the rest
#' of the replacement to upper or lower case and \code{"\\E"} to end case
#' conversion. If a character vector of length 2 or more is supplied, the
#' first element is used with a warning. If \code{NA}, all elements in the
#' result corresponding to matches will be set to \code{NA}.
#'
#' @return a character vector containing only the modified elements.
#' @keywords internal
sub_matches <- function(x, pattern, replacement) {
matches <- grep(pattern, x, value = TRUE)
gsub(pattern, replacement, matches)
if (clean_type %in% c("1", "i", "sequential")) return(1)
if (clean_type %in% c("2", "ii", "hierarchical")) return(2)
if (clean_type %in% c("3", "iii", "orthogonal")) return(3)
}
36 changes: 10 additions & 26 deletions R/variables.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,21 @@ variables <- function(object) {
vars_between <- vars_pred
} else if ("lmerMod" %in% class(object)) {
# need to determine which are within vs. between
data <- if (class(object) %in% "lmerMod") object@frame else object$frame
data <- object@frame
nrow_group <- dplyr::distinct_at(data, dplyr::vars(vars_group)) %>% nrow()
vars_pred_non_int <- grep("^[^:]+$", vars_pred, value = TRUE)
is_pred_between <- purrr::map_lgl(vars_pred_non_int, function(var) {
is_pred_between_simple <- purrr::map_lgl(vars_pred_non_int, function(var) {
nrow_var <- dplyr::distinct_at(data, dplyr::vars(vars_group, var)) %>% nrow()
nrow_var == nrow_group
})
vars_between <- vars_pred[is_pred_between]
vars_within <- vars_pred[!is_pred_between]
vars_within_simple <- vars_pred_non_int[!is_pred_between_simple]
is_pred_within <- if (length(vars_within_simple) > 0) {
grepl(paste0(vars_within_simple, collapse = "|"), vars_pred)
} else {
FALSE
}
vars_within <- vars_pred[is_pred_within]
vars_between <- vars_pred[!is_pred_within]
} else {
# cannot determine which are within or between without more info
vars_within <- character(0)
Expand All @@ -48,25 +54,3 @@ variables <- function(object) {
between = vars_between
)
}

# variables.lmer <- function(model) {
# # get formula with no random terms
# formula_complex <- formula(model)
# formula_simple <- lme4::nobars(formula_complex)
#
# # determine within and between variables
# bare_terms <- variables.default(formula_simple)
# outcome_term <- bare_terms$outcome
# pred_terms <- bare_terms$predictor
# rand_terms <- gsub(
# "1 ?\\| ?", "",
# as.character(lme4::findbars(formula_complex))
# )
# group_term <- rand_terms[which.min(nchar(rand_terms))]
# within_terms <- sub_matches(rand_terms, paste0("(.*):", group_term), "\\1")
# if (length(within_terms) > 0) {
# # add within interactions
# within_terms <- grep(paste0(within_terms, collapse = "|"), pred_terms, value = TRUE)
# }
# between_terms <- setdiff(pred_terms, within_terms)
# }
29 changes: 24 additions & 5 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@ knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.path = "man/figures/README-",
out.width = "100%"
out.width = "80%"
)
library(supernova)
library(mosaic)
set.seed(123)
```

# supernova <img src="man/figures/logo.png" width="120" align="right" />
# supernova <img src="man/figures/logo.png" width="40%" align="right" />

[![Travis build status](https://travis-ci.org/UCLATALL/supernova.svg?branch=master)](https://travis-ci.org/UCLATALL/supernova)
[![Codecov test coverage](https://codecov.io/gh/UCLATALL/supernova/branch/master/graph/badge.svg)](https://codecov.io/gh/UCLATALL/supernova?branch=master)
Expand All @@ -41,19 +41,18 @@ In addition to the ANOVA table provided by `supernova()`, the `supernova` packag

### Supported models

The following models are explicitly tested and supported by `supernova()`, _for independent samples (between-subjects) data only_. For these models, there is also support for datasets with missing or unbalanced data.
The following models are explicitly tested and supported by `supernova()`, _for independent samples (between-subjects) data only_. For these models, there is also support for datasets with missing or unbalanced data.

* empty models: `y ~ NULL`
* simple regression: `y ~ a`
* multiple regression: `y ~ a + b`
* interactive regression: `y ~ a * b`

Additionally, a subset of within-subjects designs are supported. Importantly, only fully crossed (participants are observed in every condition) and simply nested models (participants have multiple scores in the same condition) have been tested. To accomodate these models `supernova()` can accept models fit via `lmer()` as in the [Examples](#examples) below.
Additionally, a subset of within-subjects designs are supported and explicitly tested. To accomodate these models `supernova()` can accept models fit via `lmer()` as in the [Examples](#examples) below. Only models like those included in those examples have been tested for within-subjects designs.

Anything not included above is not (yet) explicitly tested and may yield errors or incorrect statistics. This includes, but is not limited to

* one-sample _t_-tests
* mixed designs


## Installing
Expand Down Expand Up @@ -180,6 +179,26 @@ simple_nested %>%
supernova()
```

#### A three-way design with two between-groups variables and a nested variable

In this example, each person in heterosexual marriage generates a rating of satisfaction. Additionally, these couples were chosen such that they either have children or not, and have been married 15 vs. 30 years. If we fit the data with `lm()` that would ignore the non-independence due to the people being in the same `couple`. Compare this output with the foloowing output where the `group` is specified as a random factor.

```{r}
complex_nested <- JMRData::ex11.22 %>%
tidyr::gather(sex, rating, Male, Female) %>%
dplyr::mutate_at(dplyr::vars(couple, children, sex, yearsmarried), as.factor)
# fitting this with lm would ignore the non-independence due to group
complex_nested %>%
lm(rating ~ sex * yearsmarried * children, data = .) %>%
supernova(verbose = FALSE)
# using lmer() we can specify the non-independence
complex_nested %>%
lmer(rating ~ sex * yearsmarried * children + (1|couple), data = .) %>%
supernova()
```


### Using Different SS Types

Expand Down

0 comments on commit 003838e

Please sign in to comment.