Permalink
Branch: master
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
604 lines (499 sloc) 20.2 KB
#' @title Dynamic panel models fit with maximum likelihood
#'
#' @description Estimate dynamic panel models with fixed effects via
#' maximum likelihood estimation.
#'
#' @param formula Model formula. See details for instructions on specifying
#' parameters properly.
#' @param data Data frame in "long" format. Prefers a "panel_data" object.
#' @param id Name of the data column that identifies which individual the
#' observation is. Not needed if \code{data} is a "panel_data" object.
#' @param wave Name of the data column that identifies which wave the
#' observation is from. Not needed if \code{data} is a "panel_data" object.
#' @param error.inv Constrain the error variance to be equal across waves.
#' Default is FALSE.
#' @param const.inv Constrain the dependent variable's variance to be equal
#' across waves. This removes cross-sectional dependence. Default is FALSE.
#' @param alpha.free Estimate each wave of the dependent variable's loading on
#' the alpha latent variable. Default is FALSE, meaning each wave has a loading
#' of 1.
#' @param y.lag Which lag(s) of the dependent variable to include in the
#' regression. Default is 1, but any number of vector of numbers can be used.
#' @param y.free If TRUE, allows the regression coefficient(s) for the lagged
#' dependent variable to vary over time. Default is FALSE. You may alternately
#' provide a number or vector of numbers corresponding to which lags should
#' vary freely.
#' @param fixed.effects Fit a fixed effects model? Default is TRUE. If FALSE,
#' you get a random effects specification instead.
#' @param print.only Instead of estimating the model, print the \pkg{lavaan}
#' model string to the console instead.
#' @param err.inv Deprecated, same purpose as `error.inv`.
#' @param ... Extra parameters to pass to \code{\link[lavaan]{sem}}. Examples
#' could be \code{missing = "fiml"} for missing data or
#' \code{estimator = "MLM"} for robust estimation.
#'
#' @details
#'
#' The right-hand side of the formula has two parts, separated by a bar
#' (\code{|}). The first part should include the time-varying predictors.
#' The second part, then, is for the time-invariant variables. If you put
#' a time-varying variable in the second part of the formula, by default
#' the first wave's value of that variable is treated as the constant.
#'
#' You must include time-varying predictors. If you do not include a bar
#' in the formula, all variables are treated as time-varying.
#'
#' \emph{Predetermined variables}:
#'
#' To set a variable as predetermined, or weakly exogenous, surround the
#' variable with a \code{pre} function. For instance, if you want the variable
#' \code{union} to be predetermined, you could specify the formula like this:
#' \code{wks ~ pre(union) + lwage | ed}, where \code{wks} is the dependent
#' variable, \code{lwage} is a strictly exogenous time-varying predictor,
#' and \code{ed} is a strictly exogenous time-invariant predictor.
#'
#' To lag a predictor, surround the variable with a \code{lag} function in
#' the same way. Note that the lag function used is specific to this package,
#' so it does not work the same way as the built-in lag function.
#'
#'
#' @return An object of class `dpm` which has its own \code{summary} method.
#'
#' The `dpm` object is an extension of the `lavaan` class and has all
#' the capabilities of `lavaan` objects, with some extras.
#'
#' It contains extra slots for: \itemize{
#'
#' \item \code{mod_string}, the character object used to specify the model
#' to lavaan. This is helpful if you want to fit the model yourself or
#' wish to check that the specification is correct.
#' \item \code{wide_data}, the widened data frame necessary to fit the SEM.
#'
#' }
#'
#' @author Jacob A. Long, in consultation with Richard A. Williams. All errors
#' are Jacob's.
#'
#' @references
#'
#' Allison, P. D., Williams, R., & Moral-Benito, E. (2017). Maximum likelihood
#' for cross-lagged panel models with fixed effects. *Socius*, *3*, 1–17.
#' http://journals.sagepub.com/doi/10.1177/2378023117710578
#'
#' @export
#' @rdname dpm
#' @importFrom lavaan sem lavInspect
#' @importFrom methods as
#' @importFrom stats as.formula
#' @import rlang
#' @importFrom panelr is_panel
#'
#' @examples
#' # Load example data
#' data("WageData", package = "panelr")
#' # Convert data to panel_data format for ease of use
#' wages <- panel_data(WageData, id = id, wave = t)
#'
#' # Replicates Allison, Williams, & Moral-Benito (2017) analysis
#' fit <- dpm(wks ~ pre(lag(union)) + lag(lwage) | ed, data = wages,
#' error.inv = TRUE, information = "observed")
#' # Note: information = "observed" only needed to match Stata/SAS standard errors
#' summary(fit)
#'
#'
dpm <- function(formula, data, error.inv = FALSE, const.inv = FALSE,
alpha.free = FALSE, y.lag = 1, y.free = FALSE,
fixed.effects = TRUE, print.only = FALSE,
id = NULL, wave = NULL, err.inv = NULL, ...) {
# Check data integrity
if (!is_panel(data)) {
if (!is.data.frame(data)) {
stop("data argument must be a data frame.")
}
id <- expr_text(enexpr(id))
wave <- expr_text(enexpr(wave))
data <- panelr::panel_data(data, !! sym(id), !! sym(wave))
} else {
wave <- panelr::get_wave(data)
id <- panelr::get_id(data)
}
# Catch deprecated arg.
if (!is.null(err.inv)) {
error.inv <- err.inv
message("err.inv is deprecated. Please use error.inv instead.")
}
# Helper function to get info from model formula
pf <- cl_formula_parser(formula)
# Helper function gets info about lag specification
mf <- panel_model_frame(pf$allvars, data)
## Using model_frame to allow for variable transformations in formulae
# Requires two different strategies depending on presence/absence of lagged
# variables since panel_model_frame returns different type of object.
if ("vars_lags" %in% names(mf)) {
form_names <- names(mf$vars_lags)[names(mf$vars_lags) %nin% c(id, wave)]
mod_formula <- paste("~", paste(form_names, collapse = " + "))
mod_formula <- as.formula(mod_formula)
mf$data <- panelr::model_frame(mod_formula, data = mf$data)
} else {
mod_formula <- paste("~", paste(pf$allvars, collapse = " + "))
mod_formula <- as.formula(mod_formula)
mf <- panelr::model_frame(mod_formula, data = mf)
}
# Helper function to write the lavaan syntax
model <- model_builder(mf = mf, dv = pf$dv, endogs = pf$endogs,
exogs = pf$exogs,
constants = pf$constants, id = id, wave = wave,
err.inv = error.inv, const.inv = const.inv,
alpha.free = alpha.free, y.lag = y.lag,
y.free = y.free, fixed.effects = fixed.effects)
# If only printing is wanted, just print it and stop
if (print.only == TRUE) {
cat(model$model)
return(invisible(model$model))
}
# Fit the model with lavaan
s <- lavaan::sem(model = model$model, data = model$data, ...)
nobs_o <- length(unique(data[[id]]))
# nobs_used <- lavaan::lavInspect(s, what = "ntotal")
# Initialize the dpm object
out <- as(s, Class = "dpm")
out@mod_string <- model$model
out@wide_data <- model$data
out@call <- sys.call()
out@call_info <- list(out, dv = pf$dv, tot_obs = nobs_o,
complete_obs = model$complete_obs, endogs = pf$endogs,
exogs = pf$exogs, start = model$start, end = model$end,
y.lag = y.lag, var_coefs = model$var_coefs,
y.free = y.free, fixed.effects = fixed.effects,
alpha.free = alpha.free, const.inv = const.inv,
error.inv = error.inv)
return(out)
}
##### dpm summary #############################################################
#' @rdname summary.dpm
#' @title Summarize dpm objects
#' @description The summary method is designed to offer similar arguments to
#' `lavaan`'s summary, but with shorter and more domain-specific output.
#'
#' @param object A `dpm` object.
#' @param digits How many digits should be printed in the model summary?
#' Default is 3. You can set a default by setting the option `"dpm-digits"`.
#' @param standardized Use `lavaan`'s method for standardizing coefficients?
#' Default is FALSE.
#' @param ci Show confidence intervals? Default is FALSE.
#' @param se Show standard errors? Default is TRUE.
#' @param pvalue Show p values? Default is TRUE.
#' @param ci.level How wide should the confidence intervals be? Ignored if
#' `ci` is FALSE. Default is .95.
#' @param zstat Show the z statistic? Default is TRUE.
#' @param boot.ci.type If the model was fit with bootstrapped standard errors
#' and `ci` is TRUE, which method should be used for finding the intervals?
#' Default is `"perc"`.
#' @param ... Ignored.
#' @export
#' @importFrom crayon underline italic
#' @importFrom stats4 summary
setMethod("summary", "dpm",
function(object, standardized = FALSE, ci = FALSE, se = TRUE,
zstat = TRUE, pvalue = TRUE, ci.level = .95,
boot.ci.type = c("perc", "norm", "basic", "bca.simple"),
digits = getOption("dpm-digits", 3), ...) {
# Save coefficient vector
# TODO: give user more options here
coefs <- lavaan::parameterestimates(object, se = TRUE, zstat = TRUE,
pvalue = TRUE, ci = TRUE,
standardized = standardized,
boot.ci.type = boot.ci.type[1],
level = ci.level)
# Get attributes
a <- object@call_info
# Figure out where in the list the first and final fixed coefficient is
fixed_coefs <- coefs[coefs$label %in% unique(a$var_coefs$coef),]
which_coefs <- c(
"label",
"est",
if (!se) {NULL} else {"se"},
if (!ci) {NULL} else {c("ci.lower", "ci.upper")},
if (!zstat) {NULL} else {"z"},
if (!pvalue) {NULL} else {"pvalue"}
)
which_cols <- unlist(sapply(which_coefs[-1], function(x) {
switch(x,
"est" = "Est.",
"se" = "S.E.",
"ci.lower" = make_ci_labs(ci.level)[[1]],
"ci.upper" = make_ci_labs(ci.level)[[2]],
"z" = "z val.",
"pvalue" = "p")
}))
# if ("p" %in% which_cols) {which_cols <- c(which_cols, "")}
#
# Cut out some of the extraneous info
if (a$y.free == FALSE) {
fixed_coefs <- fixed_coefs[!duplicated(fixed_coefs[,"label"]), which_coefs]
} else {
fixed_coefs <- fixed_coefs[!duplicated(fixed_coefs[,"label"]), which_coefs]
}
pretty_names <- c(a$var_coefs$var)
a$var_coefs$pretty_name <- NA
ts <- 0
for (i in 1:(length(pretty_names))) {
if (a$var_coefs$lag[i] > 0) {
pretty_names[i] <- paste(pretty_names[i], " (t - ", a$var_coefs$lag[i],
")", sep = "")
a$var_coefs$pretty_name[i] <- pretty_names[i]
ts <- ts + 1
} else {
a$var_coefs$pretty_name[i] <- a$var_coefs$var[i]
}
}
# Get table of lagged y for each wave
if (a$y.free == TRUE) {
y_coefs <- coefs[coefs$label == "" & coefs$op == "~", c("lhs", which_coefs)]
fixed_coefs <- fixed_coefs[fixed_coefs$label != "", ]
}
coeft <- as.data.frame(fixed_coefs[,which_coefs])
if (a$y.free == TRUE) {
y_coeft <- as.data.frame(y_coefs[,c("lhs", which_coefs)])
}
coeft <- coeft[,colnames(coeft) %nin% "label"]
if (a$y.free == TRUE) {
y_coeft <- y_coeft[,colnames(y_coeft) %nin% "label"]
}
rownames(coeft) <-
a$var_coefs$pretty_name[sapply(fixed_coefs[,"label"], function(x) {
which(a$var_coefs$coef == x)
})]
colnames(coeft) <- which_cols
coeft <- as.data.frame(coeft)
# coeft <- round_df_char(coeft, digits)
if (a$y.free == TRUE) {
rns <- a$var_coefs$pretty_name[sapply(y_coefs[,"label"], function(x) {
which(a$var_coefs$coef == x)
})]
rns <- paste0(rns, " [t = ",
stringr::str_extract(y_coeft[,"lhs"], "(?<=_)[0-9]"), "]")
rownames(y_coeft) <- rns
y_coeft <- y_coeft[, colnames(y_coeft) %nin% "lhs"]
colnames(y_coeft) <- which_cols
y_coeft <- as.data.frame(y_coeft)
# y_coeft <- round_df_char(y_coeft, digits)
}
if (a$y.free == FALSE) {y_coeft <- NULL}
converged <- lavaan::lavInspect(object, what = "converged")
iters <- lavaan::lavInspect(object, what = "iterations")
fitms <- lavaan::fitmeasures(object)
fitms <- round(fitms, digits)
out <- list(coefficients = coeft, model = object, fitmeasures = fitms,
ylag_coefficients = y_coeft)
class(out) <- "summary.dpm"
out <- structure(out, dv = a$dv, tot_obs = a$tot_obs,
complete_obs = a$complete_obs, start = a$start, end = a$end,
converged = converged, iters = iters, y.free = a$y.free,
digits = digits, pvalue = pvalue)
return(out)
})
#' @export
print.summary.dpm <- function(x, ...) {
a <- attributes(x)
fitms <- x$fitmeasures
coeft <- x$coefficients
cat(underline("MODEL INFO:\n"))
cat(italic("Dependent variable:"), a$dv, "\n")
cat(italic("Total observations:"), a$tot_obs, "\n")
cat(italic("Complete observations:"), a$complete_obs, "\n")
cat(italic("Time periods:"), a$start, "-", a$end, "\n\n")
cat(underline("MODEL FIT:\n"))
cat("\U1D6D8\u00B2(", fitms["df"], ") = ", fitms["chisq"], "\n",
sep = "")
# cat("CFI =", fitms["cfi"],"\n") # Can't trust these yet
# cat("TLI =", fitms["tli"],"\n")
#
cat(italic("RMSEA")," = ", fitms["rmsea"], ", ",
"90% CI [", fitms["rmsea.ci.lower"],
", ", fitms["rmsea.ci.upper"],"]", italic("\np(RMSEA < .05)"),
" = ", fitms["rmsea.pvalue"], "\n", sep = "")
cat(italic("SRMR"), "=", fitms["srmr"], "\n\n")
if (!is.null(x$ylag_coefficients)) {
coeft <- rbind(coeft, x$ylag_coefficients)
}
coeft <- round_df_char(coeft, digits = a$digits)
if (a$pvalue == TRUE) {
pvals <- as.numeric(coeft[,"p"])
coeft[,ncol(coeft) + 1] <- NA
sigstars <- c()
for (y in 1:nrow(coeft)) {
sigstars[y] <- get_stars(pvals[y])
}
coeft[,ncol(coeft)] <- sigstars
names(coeft)[ncol(coeft)] <- ""
if (a$y.free) {
pvals <- ylag_coefficients[,"p"]
sigstars <- c()
for (y in 1:nrow(ylag_coefficients)) {
sigstars[y] <- get_stars(pvals[y])
}
ylag_coefficients[,ncol(ylag_coefficients) + 1] <- sigstars
names(ylag_coefficients)[ncol(ylag_coefficients)] <- ""
}
}
print(coeft)
cat("\n")
if (a$converged == TRUE) {
cat("Model converged after", a$iters, "iterations\n")
} else {
cat("WARNING: Model failed to converge after", a$iters, "iterations\n")
}
}
##### Other methods ###########################################################
#' @title Various methods for `dpm` objects
#' @description R likes it when these things have documentation.
#' @param object A `dpm` object
#' @param formula. An updated formula (optional)
#' @param evaluate If updating, should the updated model be updated or just
#' return the call? Default is TRUE, re-run the model.
#' @param ... Other arguments to update.
#' @export
#' @importFrom stats formula getCall update.formula update
#' @rdname dpm-methods
# Just a duplicate of the default update method, but I need it here since
# lavaan has implemented a different method
setMethod("update", signature(object = "dpm"),
function(object, formula., ..., evaluate = TRUE) {
call <- getCall(object)
extras <- match.call(expand.dots = FALSE)$...
if (!missing(formula.)) {
call$formula <- update.formula(formula(object), formula.)
}
if (length(extras)) {
existing <- !is.na(match(names(extras), names(call)))
for (a in names(extras)[existing]) call[[a]] <- extras[[a]]
if (any(!existing)) {
call <- c(as.list(call), extras[!existing])
call <- as.call(call)
}
}
if (evaluate) {eval(call, parent.frame())} else {call}
})
#' @export
#' @importFrom methods show
#' @rdname dpm-methods
setMethod("show", "dpm", function(object) {
x <- summary(object)
a <- attributes(x)
fitms <- x$fitmeasures
cat(crayon::cyan("Dynamic Panel Model [dpm]"), ":\n\n", sep = "")
cat(underline("MODEL INFO:\n"))
cat(italic("Dependent variable:"), a$dv, "\n")
cat(italic("Total observations:"), a$tot_obs, "\n")
cat(italic("Complete observations:"), a$complete_obs, "\n")
cat(italic("Time periods:"), a$start, "-", a$end, "\n\n")
cat(underline("MODEL FIT:\n"))
cat("\U1D6D8\u00B2(", fitms["df"], ") = ", fitms["chisq"], "\n",
sep = "")
cat(italic("RMSEA")," = ", fitms["rmsea"], ", ",
"90% CI [", fitms["rmsea.ci.lower"],
", ", fitms["rmsea.ci.upper"],"]", italic("\np(RMSEA < .05)"),
" = ", fitms["rmsea.pvalue"], "\n", sep = "")
cat(italic("SRMR"), "=", fitms["srmr"], "\n\n")
if (a$converged == TRUE) {
cat("Model converged after", a$iters, "iterations\n")
} else {
cat(crayon::red("WARNING: Model failed to converge after", a$iters,
"iterations\n"))
}
})
#' @export
#' @importFrom stats coef
#' @rdname dpm-methods
setMethod("coef", "dpm", function(object) {
out <- summary(object)$coefficients[,"Est."]
names(out) <- rownames(summary(object)$coefficients)
return(out)
})
##### Tidiers #################################################################
#' @title Tidy methods for dpm
#' @description `dpm` objects support the \pkg{broom} package's `tidy` method.
#' @param x A `dpm` object.
#' @param conf.int Logical indicating whether or not to include a confidence
#' interval in the tidy data frame.
#' @param conf.level The confidence level to use for the confidence interval
#' when `conf.int` is TRUE. Default is .95, corresponding to a 95% confidence
#' interval.
#' @param ... Other arguments passed to \link[dpm]{summary.dpm}.
#' @examples
#' if (requireNamespace("broom")) {
#' library(broom)
#' # Load example data
#' data("WageData", package = "panelr")
#' # Convert data to panel_data format for ease of use
#' wages <- panel_data(WageData, id = id, wave = t)
#'
#' fit <- dpm(wks ~ pre(lag(union)) + lag(lwage) | ed, data = wages)
#' tidy(fit)
#' }
#' @export tidy.dpm
#'
tidy.dpm <- function(x, conf.int = FALSE, conf.level = .95, ...) {
s <- summary(x, ci = conf.int, ci.level = conf.level, ...)
coefs <- s$coefficients
coefs$term <- rownames(coefs)
coefs$estimate <- coefs[,"Est."]
coefs$std.error <- coefs[,"S.E."]
coefs$statistic <- coefs[,"z val."]
coefs$p.value <- coefs[,"p"]
keep_cols <- c("term", "estimate", "std.error", "statistic", "p.value")
if (conf.int == TRUE) {
coefs$conf.low <- coefs[, stringr::str_detect(names(coefs), "%")][,1]
coefs$conf.high <- coefs[, stringr::str_detect(names(coefs), "%")][,2]
keep_cols <- c(keep_cols, "conf.low", "conf.high")
}
if (requireNamespace("tibble")) {
tibble::as_tibble(coefs[,keep_cols], rownames = NULL)
} else {
rownames(coefs) <- NULL
coefs[,keep_cols]
}
}
##### lavaaan summary methods #################################################
#' @title lavaan-style summary for dpm objects
#' @description This is just a quick way to get `lavaan`'s summary instead
#' the more terse summary designed for `dpm` objects.
#' @param x The \code{\link{dpm}} object
#' @param ... Other arguments to the the lavaan function.
#'
#' @examples
#'
#' # Load example data
#' data("WageData", package = "panelr")
#' # Convert data to panel_data format for ease of use
#' wages <- panel_data(WageData, id = id, wave = t)
#'
#' fit <- dpm(wks ~ pre(lag(union)) + lag(lwage) | ed, data = wages)
#' lav_summary(fit)
#'
#' @rdname lavaan-functions
#' @export
lav_summary <- function(x, ...) {
lavaan::summary(as(x, "lavaan"), ...)
}
#### extractors ##############################################################
#' @title Retrieve wide-format data from fitted dpm model
#' @description This helper function provides a simple way to retrieve the
#' widened data from a fitted [dpm::dpm()] object.
#' @param model A `dpm` object.
#' @export
get_wide_data <- function(model) {
model@wide_data
}
#' @title Retrieve lavaan model syntax from fitted dpm model
#' @description This helper function provides a simple way to retrieve the
#' lavaan model syntax from a fitted [dpm::dpm()] object.
#' @param model A `dpm` object.
#' @param print Print the syntax to the console so it is formatted properly?
#' Default is TRUE.
#' @export
get_syntax <- function(model, print = TRUE) {
if (print) {cat(model@mod_string)}
invisible(model@mod_string)
}