diff --git a/DESCRIPTION b/DESCRIPTION index 5db1962..f86befa 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,8 +2,8 @@ Package: vcrpart Type: Package Title: Tree-Based Varying Coefficient Regression for Generalized Linear and Ordinal Mixed Models -Version: 0.2-3 -Date: 2014-11-24 +Version: 0.3-1 +Date: 2015-01-04 Authors@R: c( person("Reto", "Buergin", role = c("aut", "cre", "cph"), email = "rbuergin@gmx.ch"), person("Gilbert", "Ritschard", role = c("ctb", "ths"), @@ -14,11 +14,12 @@ License: GPL (>= 2) Depends: R (>= 3.1.0), parallel, partykit Imports: stats, grid, graphics, methods, nlme, rpart, numDeriv, ucminf, zoo, sandwich, strucchange -URL: http://vcrpart.wordpress.com +URL: http://vcrpart.wordpress.com, + https://r-forge.r-project.org/projects/traminer/ LazyLoad: yes NeedsCompilation: yes -Packaged: 2014-11-25 07:26:37 UTC; reto +Packaged: 2015-01-04 10:34:57 UTC; reto Author: Reto Buergin [aut, cre, cph], Gilbert Ritschard [ctb, ths] Repository: CRAN -Date/Publication: 2014-11-25 09:24:41 +Date/Publication: 2015-01-04 12:46:23 diff --git a/MD5 b/MD5 index 119891c..0205f29 100644 --- a/MD5 +++ b/MD5 @@ -1,20 +1,21 @@ -d9422ace5f5ef22d494ef37fbf19f831 *DESCRIPTION +929438bed344b7ec2fc1864a751ff373 *DESCRIPTION adcba7e46ec21c27cfea94b0454d05f9 *NAMESPACE -dc7c790c64c073850f833738ae0a4f59 *NEWS +0b853ff955861ffa43fd11c9e4785172 *NEWS 8b40e51cf24df0eb99503a7070b84a93 *R/AAA.R b70bc37834d2f3b8c94d9d1c0020c614 *R/AllGeneric.R 91d50efd23854dea04fca307a8c9ce97 *R/fvcm.R 80b84163397dcda539b20d9ab51c56e7 *R/import.R -0801b64fedea58af67c400f8b0e15bd2 *R/olmm-methods.R -7e4d28175b9184bf3a0992401f277072 *R/olmm-utils.R +5658d937bde2aabee0e4493cdc76e85f *R/olmm-methods.R +8057fd21604296b718055bd3dbb4f370 *R/olmm-utils.R ecdcf046dcbb466c4cf29bc7e9efaf8a *R/olmm.R f4e45051571cc6994e06109412d540af *R/otsplot.R 2af1d39305eedf8fc4128b14307e1317 *R/tvcm-cv.R -32a6d478af156ed579d5909795d5cc98 *R/tvcm-methods.R +c1191337d41d4dc145e7ec221acd6ae0 *R/tvcm-methods.R d7de761ae5514253233d0540ddd85ec5 *R/tvcm-plot.R -cd08110234d8f5bbcb6833bab79abad8 *R/tvcm-utils.R -efa265167aaa866b67a1acd78df5a859 *R/tvcm.R +a19aaa41484d924fb4aeeed7588d851c *R/tvcm-utils.R +282d16856de0ec13cfbbbac8b57cf091 *R/tvcm.R 645eb28d9f7e8259dfb5edf34edbb34b *R/utils.R +dcc3606dae744cc31c166c3895363003 *ToDo 8562fccdfc9dc113b72f8eb8df66bf57 *data/PL.RData a2c9a87cf50549fd5802cbb6a88d281a *data/movie.RData 72890e1f368da6d42d9cf9414c5396e0 *data/poverty.RData @@ -23,31 +24,31 @@ ea62be4e5d10df5bc683725a7f7935f6 *data/schizo.RData 80e2db657049956161e19e3e3c2bdd32 *data/vcrpart_1.RData cb803decf95c39567f648e1e5e0ddb45 *data/vcrpart_2.RData 3ddf5ac69dc37253946b83c5bfd5fca7 *data/vcrpart_3.RData -3cd9ba9d0476859aa2f24592acd9629b *inst/CITATION -3bc5b4abcf89156967fbf807a9125ee6 *man/PL.Rd -7141810b94046f0dbfa66fd3e0477a6a *man/contr-wsum.Rd -02a3675b72ff9f699d45dab0183780b8 *man/fvcm-methods.Rd -9b47db74c740a7be156dce5564cf350f *man/fvcm.Rd -edb080c605c0dacb43ae037d6199abd4 *man/movie.Rd +c957bcc9e1c02457a3e2c0a3b3434c7c *inst/CITATION +2d7f2b0ce2324e387983211e19ac55cc *man/PL.Rd +79349138eb3456f71d2b2f9d1121d572 *man/contr-wsum.Rd +1e6b881c70bc9464f3f870ebd929be25 *man/fvcm-methods.Rd +a10e2af8c56ccd80e774c45b625fbbbb *man/fvcm.Rd +c5210f80a7291c066c5cb99d430c306f *man/movie.Rd 01062811412189130594c3983d1868ee *man/olmm-control.Rd -85658aad587f90ada41ff1808c725904 *man/olmm-gefp.Rd -82d6b519900b895e47ac8bb7fe83971f *man/olmm-methods.Rd -e58a118afbf3ff05498f9e5bbc73e893 *man/olmm-predict.Rd +dc2431d8c17e61515d75bd86c800cbda *man/olmm-gefp.Rd +3ab54e39ac59254729fe58ceac548a8b *man/olmm-methods.Rd +23e05ccd57e17ad0c231caad41210365 *man/olmm-predict.Rd c914ee5c769aacd190558f0b967f1072 *man/olmm-summary.Rd -99fd9612846185c8b253368ad62640bc *man/olmm.Rd -79da070b250ee6646ecbda2e4d33fcb3 *man/otsplot.Rd -b5d023f2e25d796fc013667de0bf0c2b *man/poverty.Rd -198b4f1a33689dc49700c02f14e42510 *man/schizo.Rd -830a40f120259f6c1e30cce2f06a224e *man/tvcglm.Rd -fc4d871d97b9315c31c720307d4b481c *man/tvcm-control.Rd -5b0157aea2925e83c7325814864fad14 *man/tvcm-cv.Rd -c2434053aa241bce6f70d969bf385e25 *man/tvcm-methods.Rd -0fa2459d031af80f1b4dbd7d3d1da797 *man/tvcm-plot.Rd -395961700e672d7ba7f7901564e83892 *man/tvcm.Rd -81b709a611fe8b1ac9d3d6ebb806bab2 *man/tvcolmm.Rd +5a0c35b431a537a976ff45eb197c7be9 *man/olmm.Rd +1d5c9b3c457d06bf33220267be476307 *man/otsplot.Rd +fe34790a447d3e5e8245fb0c0669c51c *man/poverty.Rd +bce49a4bfca2f33530e20dcb8adf800b *man/schizo.Rd +b1a8ea0cfa6a3e1e62435be8ce9f405e *man/tvcglm.Rd +7dfdeb548a05ed57530085f9718d6464 *man/tvcm-control.Rd +7d2b4dc5887bb05c129579f2a28021a3 *man/tvcm-cv.Rd +0320ab676aaf77e818adc0ff4fa92c37 *man/tvcm-methods.Rd +41720b3b0902b0340e55b55495d7311d *man/tvcm-plot.Rd +e7e5e2934a4d2214baa75e89f6002b48 *man/tvcm.Rd +c9e55e6137afe94e96cbf08c9eda6a69 *man/tvcolmm.Rd 158d7c7045232f33bd35bf5c39dcedcf *man/vcrpart-demo.Rd a14ef1287c994bf09c716cc21d50517f *man/vcrpart-formula.Rd -1e0af65b11fb043bd18f7982291d076f *src/Makevars +8d46d69896a1f1d31ed76035a0e49d67 *src/Makevars e0ee3aca34161fb42f8fffa717fc6c3e *src/init.c 62783432fffd5da456a3bb3e61c2be35 *src/olmm.c 1e5f560c59e4ea73b51fa72c057166ec *src/olmm.h diff --git a/NEWS b/NEWS index 58faedd..82bdf57 100644 --- a/NEWS +++ b/NEWS @@ -1,12 +1,33 @@ +Changes in Version 0.3-1 + + o Improvements for 'prunepath' method. + + o Moved documentation of 'prunepath' to 'tvcm-assessment'. + + o Added example for 'prune'. + + o Internal changes in 'estfun.olmm' and new argument 'center' for + the function 'predecor_control'. + + o New argument 'fast' for 'tvcm_control' to control whether an + approximative or the accurate model is used at exhaustive search. + + o Corrected bugs in 'tvcm_exsearch_nomToOrd' and 'tvcm_getNumSplits' + which appeared in in special cases. + + o Improved help manual for 'tvcglm' and 'tvcolmm'. + + o Revised references. + Changes in Version 0.2-3 - o Added contr.wsum function + o Added contr.wsum function. - o Added 'seed' argument to 'otsplot_control' + o Added 'seed' argument to 'otsplot_control'. - o Internal changes for exhaustive search of 'tvcm' + o Internal changes for exhaustive search of 'tvcm'. - o Improved documentation + o Improved documentation. Changes in Version 0.2-2 @@ -31,7 +52,7 @@ Changes in Version 0.2-2 o Added 'nimpute' argument for 'tvcm_control'. - o Added detail section to the help page of 'tvcm_control' + o Added detail section to the help page of 'tvcm_control'. o Removed AIC table from 'print.tvcm' (AIC and BIC seem not relevant measures for models fitted by 'tvcm'). @@ -49,7 +70,7 @@ Changes in Version 0.2-1 o First CRAN release. o 'tvcm' and 'fvcm' allow for multiple 'vc' terms, i.e. - coefficient-specific partitions + coefficient-specific partitions. o Complete revision of syntaxes, argument names and default parameters. R commands for the former version 0.1-14 are @@ -60,7 +81,7 @@ Changes in Version 0.2-1 (old) coefficient constancy tests combined with greedy loss minimization to (new) exhaustive greedy loss minimization. Splitting based on the (old) requires to set the argument 'sctest = TRUE' in - 'tvcm_control' + 'tvcm_control'. o The greedy loss reduction minimization stage of 'tvcm' does now fixate coefficients which are not splitted at the estimates of diff --git a/R/olmm-methods.R b/R/olmm-methods.R index 0f22c9f..68aed8f 100644 --- a/R/olmm-methods.R +++ b/R/olmm-methods.R @@ -1,6 +1,6 @@ ##' -------------------------------------------------------- # ##' Author: Reto Buergin, rbuergin@gmx.ch -##' Date: 2014-10-24 +##' Date: 2014-12-08 ##' ##' Description: ##' methods for olmm objects. @@ -40,6 +40,7 @@ ##' weights: Weights ##' ##' Modifications: +##' 2014-12-07: - add argument 'center' to 'predecor_control' ##' 2014-10-24: - improve simulate.olmm ##' - improved 'estfun.olmm' call in 'gefp.olmm' ##' 2014-10-23: - fix bug in predict.olmm @@ -157,20 +158,21 @@ deviance.olmm <- function(object, ...) return(-as.numeric(2.0 * logLik(object))) predecor_control <- function(impute = TRUE, seed = NULL, - symmetric = TRUE, reltol = 1e-6, - maxit = 250L, minsize = 1L, + symmetric = TRUE, center = FALSE, + reltol = 1e-6, maxit = 250L, minsize = 1L, verbose = FALSE, silent = FALSE) { stopifnot(is.logical(impute) && length(impute) == 1L) stopifnot(is.null(seed) | is.numeric(seed) && length(seed) == 1L) stopifnot(is.logical(symmetric) && length(symmetric) == 1L) + stopifnot(is.logical(center) && length(center) == 1L) stopifnot(is.numeric(reltol) && reltol > 0 && length(reltol) == 1L) stopifnot(is.numeric(maxit) && maxit > 0 && length(maxit) == 1L) stopifnot(is.numeric(minsize) && minsize > 0 && length(minsize) == 1L) stopifnot(is.logical(verbose) && length(verbose) == 1L) stopifnot(is.logical(silent) && length(silent) == 1L) return(structure(list(impute = impute, seed = seed, - symmetric = symmetric, reltol = reltol, - maxit = maxit, minsize = minsize, + symmetric = symmetric, center = center, + reltol = reltol, maxit = maxit, minsize = minsize, verbose = verbose, silent = silent), class = "predecor_control")) } @@ -214,12 +216,12 @@ estfun.olmm <- function(x, predecor = FALSE, control = predecor_control(), attr <- list() # default attributes scores <- x$score_obs - subsImp <- rep.int(FALSE, nrow(scores)) if (control$verbose) cat("OK") ## impute data - + + subsImp <- rep.int(FALSE, nrow(scores)) if (predecor && any(Ni != Nmax)) { Nimpute <- Nmax - Ni @@ -251,21 +253,21 @@ estfun.olmm <- function(x, predecor = FALSE, control = predecor_control(), x$eta <- rbind(x$eta, matrix(0.0, sum(Nimpute), x$dims["nEta"])) x$score_obs <- rbind(x$score_obs, matrix(0.0, sum(Nimpute), x$dims["nPar"])) - ## simulate responses - if (control$impute) { - - if (control$verbose) cat("\n* impute scores ... ") - - ## set seed - if (!is.null(control$seed)) set.seed(control$seed) + ## simulate responses + if (control$verbose) cat("\n* impute scores ... ") + + ## set seed + if (!is.null(control$seed)) set.seed(control$seed) + if (control$impute) { + ## impute predictors times <- Nimpute[x$subject[!subsImp]] rows <- unlist(tapply(1:sum(Ni), x$subject[!subsImp], function(x) sample(x, times[x[1L]], replace = TRUE))) x$frame[subsImp,] <- x$frame[rows,,drop=FALSE] x$X[subsImp, ] <- x$X[rows,,drop=FALSE] x$W[subsImp, ] <- x$W[rows,,drop=FALSE] - + ## draw responses subsW <- c(rep(which(attr(xOld$W, "merge") == 1L), x$dims["nEta"]), which(attr(xOld$W, "merge") == 2L)) @@ -280,15 +282,16 @@ estfun.olmm <- function(x, predecor = FALSE, control = predecor_control(), ordered(apply(probs, 1L, function(x) sample(yLevs, 1L, prob = x)), yLevs) ## recompute scores - .Call("olmm_update_marg", x, x$coefficients, PACKAGE = "vcrpart") - - scores <- 0.0 * x$score_obs - scores[!subsImp, ] <- x$score_obs[!subsImp,,drop=FALSE] - - if (control$verbose) cat("OK") + .Call("olmm_update_marg", x, x$coefficients, PACKAGE = "vcrpart") } - } + + scores <- x$score_obs + if (control$center && max(abs(cSums <- colSums(scores))) > 1e-6) + scores <- scores - + matrix(cSums / nrow(scores), nrow(scores), ncol(scores), byrow = TRUE) + } + ## drop the nuisance coefficients scores <- scores[, parm, drop = FALSE] diff --git a/R/olmm-utils.R b/R/olmm-utils.R index 4da28b5..97731ff 100644 --- a/R/olmm-utils.R +++ b/R/olmm-utils.R @@ -543,9 +543,10 @@ olmm_rename <- function(x, levels, family, etalab = c("int", "char", "eta")) { ##' -------------------------------------------------------- # olmm_decormat <- function(scores, subject, control = predecor_control()) { - + stopifnot(inherits(control, "predecor_control")) Nmax <- max(table(subject)) + ## estimate variances and covariances sVar <- olmm_scoreVar(scores, subject) sCovWin <- olmm_scoreCovWin(scores, subject) diff --git a/R/tvcm-methods.R b/R/tvcm-methods.R index 79eef14..af16403 100644 --- a/R/tvcm-methods.R +++ b/R/tvcm-methods.R @@ -632,6 +632,7 @@ prune.tvcm <- function(tree, cp = NULL, alpha = NULL, maxstep = NULL, prunepath.tvcm <- function(tree, steps = 1L, ...) { + steps <- intersect(steps, seq_along(tree$info$prunepath)) rval <- tree$info$prunepath[steps] rval <- lapply(rval, function(x) { x$tab <- as.data.frame(x$tab) diff --git a/R/tvcm-utils.R b/R/tvcm-utils.R index 08c3575..dcb0569 100644 --- a/R/tvcm-utils.R +++ b/R/tvcm-utils.R @@ -49,6 +49,13 @@ ##' tvcm_grow_splitpath: creates a 'splitpath.tvcm' object ##' ##' Last modifications: +##' 2014-12-10: - added 'drop = FALSE' commands in 'tvcm_exsearch_nomToOrd' +##' which produced errors +##' - 'tvcm_getNumSplits' yielded sometimes more than 'maxnumsplit' +##' values. Now a random selection is applied for these cases +##' 2014-12-09: implemented accurate search model. Involves changes in +##' 'tvcm_formula', 'tvcm_grow_exsearch', 'tvcm_exsearch_dev' +##' and 'tvcm_control'. ##' 2014-11-11: modified transformation of nominal into ordinal variables ##' to accelerate exhaustive search. There is now a function ##' 'tvcm_exsearch_nomToOrd'. @@ -750,6 +757,10 @@ tvcm_getNumSplits <- function(z, w, minsize, maxnumsplit) { ## delete largest value rval <- rval[rval < maxz] + + ## sometimes the while loop yields too many values ... + if (length(rval) > maxnumsplit) + rval <- sort(sample(rval, 9)) } else { @@ -1060,26 +1071,26 @@ tvcm_setsplits_splitnode <- function(splits, spart, snode, nodeid) { tvcm_setsplits_rselect <- function(splits, partid, nodeid, varid, control) { - ## get the node partitions + ## get the candidate node nodeidC <- nodeid for (pid in seq_along(partid)) for (nid in seq_along(nodeid[[pid]])) if (all(sapply(splits[[pid]][[nid]], function(x) length(x) == 0L))) nodeidC[[pid]] <- setdiff(nodeidC[[pid]], nid) - ## get the partition candidates + ## get the candidate partitions partidC <- partid for (pid in seq_along(partid)) - if (length(nodeidC[[pid]]) == 0L) setdiff(partidC, pid) + if (length(nodeidC[[pid]]) == 0L) + partidC <- setdiff(partidC, pid) - ## get variable candidates for each partition + ## get candidates variables for each partition varidC <- varid for (pid in seq_along(partid)) for (vid in seq_along(varid[[pid]])) if (all(sapply(splits[[pid]], length) == 0L)) varidC[[pid]] <- setdiff(varidC[[pid]], vid) - ## random selections spart <- sort(sample(partidC, min(length(partidC), control$ptry))) svar <- lapply(seq_along(varidC), function(pid) { @@ -1089,9 +1100,9 @@ tvcm_setsplits_rselect <- function(splits, partid, nodeid, varid, control) { snode <- lapply(seq_along(nodeidC), function(pid) { s <- sample(length(nodeidC[[pid]]), min(length(nodeidC[[pid]]),control$vtry[pid])) return(nodeidC[[pid]][sort(s)]) - }) + }) - ## delete not selected nodes from 'splits' + ## delete unselected nodes for (pid in seq_along(partid)) for (nid in seq_along(nodeid[[pid]])) for (vid in seq_along(varid[[pid]])) @@ -1321,11 +1332,11 @@ tvcm_exsearch_nomToOrd <- function(cp, pid, nid, vid, cp <- tvcm_getOrdSplits(z[subs], weights[subs], control$minsize[pid], control$maxordsplit) - cp <- cp[, levels(partData[, vid])] + cp <- cp[, levels(partData[, vid]),drop=FALSE] } else { ## avoid splitting (better solution?) - cp <- cp[-(1:nrow(cp)), ] + cp <- cp[-(1:nrow(cp)),,drop=FALSE] } attr(cp, "type") <- "coef" @@ -1335,7 +1346,9 @@ tvcm_exsearch_nomToOrd <- function(cp, pid, nid, vid, tvcm_exsearch_dev <- function(cutpoint, pid, nid, vid, - model, modelNuis, nuisance, + model, start, + modelNuis, startNuis, + nuisance, where, partData, control, loss0, mfName) { @@ -1347,28 +1360,37 @@ tvcm_exsearch_dev <- function(cutpoint, } else { zs <- z %in% levels(z)[cutpoint > 0L] } - model[[mfName]]$Left <- 1 * (subs & zs) - model[[mfName]]$Right <- 1 * (subs & !zs) - parm <- grep("Left", names(coef(model)), value = TRUE) - - ## fit the 'update' model + + if (control$fast) { + model[[mfName]]$Left <- 1 * (subs & zs) + model[[mfName]]$Right <- 1 * (subs & !zs) + } else { + model[[mfName]][subs & zs, paste("Node", LETTERS[pid], sep = "")] <- "Left" + model[[mfName]][subs & !zs, paste("Node", LETTERS[pid], sep = "")] <- "Right" + model[[mfName]][!subs, paste("Node", LETTERS[pid], sep = "")] <- + as.integer(droplevels(where[[pid]][!subs])) + } + model$coefficients <- vcrpart_copy(start) model <- tvcm_grow_update(model, control) + rval <- rep(NA, 2L) if (!inherits(model, "try-error")) { - rval <- c((loss0 - control$lossfun(model)), - length(coef(model)[grep("Left", names(coef(model)))]) - - length(nuisance)) + rval[1L] <- loss0 - control$lossfun(model) + rval[2L] <- length(coef(model)[grep("Right", names(coef(model)))]) if (is.null(modelNuis)) { return(rval) } else { modelNuis[[mfName]]$Left <- 1 * (subs & zs) modelNuis[[mfName]]$Right <- 1 * (subs & !zs) + modelNuis$coefficients <- vcrpart_copy(startNuis) modelNuis <- tvcm_grow_update(modelNuis, control) rval[1L] <- rval[1L] - (loss0 - control$lossfun(modelNuis)) + rval[2L] <- rval[2L] - + length(coef(modelNuis)[grep("Right", names(coef(modelNuis)))]) return(rval) - } + } } else { - return(c(NA, NA)) + return(rval) } } @@ -1385,40 +1407,66 @@ tvcm_grow_exsearch <- function(splits, partid, nodeid, varid, mcall$data <- eval(mcall$data, environment(mcall)) w <- weights(model) - mcall$offset <- predict(model, type = "link") + + if (control$fast) + mcall$offset <- predict(model, type = "link") + if (inherits(model, "glm")) { mcall$x <- TRUE mcall$y <- TRUE mcall$model <- TRUE } else if (inherits(model, "olmm")) { - mcall$restricted <- grep("ranefCholFac", names(coef(model)), value = TRUE) - mcall$start <- coef(model)[mcall$restricted] + if (control$fast) { + mcall$restricted <- grep("ranefCholFac", names(coef(model)), value = TRUE) + mcall$start <- coef(model)[mcall$restricted] + } } - ff <- tvcm_formula(formList, rep.int(FALSE, length(partid)), + if (control$fast) { + root <- rep.int(FALSE, length(partid)) + } else { + root <- sapply(nodes, width) == 1 + } + + ff <- tvcm_formula(formList, root, eval(mcall$family, environment(mcall)), - environment(mcall), full = FALSE, update = TRUE) + environment(mcall), full = FALSE, + update = TRUE, fast = control$fast) + Left <- sample(c(0, 1), nobs(model), replace = TRUE) Right <- Left - 1 + Node <- lapply(where, function(x) { + levs <- c("Left", "Right", seq(1, nlevels(x) - 1, length.out = nlevels(x) - 1)) + return(factor(rep(levs, length.out = length(x)), levels = levs)) + }) for (pid in seq_along(partid)) { if (length(unlist(splits[[pid]])) > 0L) { mcall$formula <- ff$update[[pid]][[1L]] + mcall$data$Left <- Left - mcall$data$Right <- Right + mcall$data$Right <- Right + mcall$data[, paste("Node", LETTERS[pid], sep = "")] <- Node[[pid]] + sModel <- tvcm_grow_fit(mcall, doFit = FALSE) - sModel$coefficients[grepl("Left", names(sModel$coefficients))] <- 0.0 - sModel$coefficients[grepl("Right", names(sModel$coefficients))] <- 0.0 + sStart <- vcrpart_copy(sModel$coefficients) + sStart[intersect(names(sStart), names(coef(model)))] <- + coef(model)[intersect(names(sStart), names(coef(model)))] + sModel$control <- model$control if (length(control$nuisance[[pid]]) == 0L) { sModelN <- NULL + sNStart <- NULL } else { mcallN <- mcall + mcallN$offset <- predict(model, type = "link") mcallN$formula <- ff$update[[pid]][[2L]] sModelN <- tvcm_grow_fit(mcallN, doFit = FALSE) - sModelN$coefficients[] <- 0.0 + sNStart <- sModelN$coefficients + sNStart[intersect(names(sNStart), names(coef(model)))] <- + coef(model)[intersect(names(sNStart), names(coef(model)))] sModelN$control <- model$control } @@ -1451,11 +1499,13 @@ tvcm_grow_exsearch <- function(splits, partid, nodeid, varid, pid = partid[pid], nid = nodeid[[partid[pid]]][nid], vid = varid[[partid[pid]]][vid], - model = sModel, modelNuis = sModelN, + model = sModel, start = sStart, + modelNuis = sModelN, startNuis = sNStart, nuisance = control$nuisance[[pid]], where = where, partData = partData, control = control, loss0 = loss0, mfName = mfName) + if (is.matrix(st)) st <- t(st) else st <- matrix(st, ncol = 1L) splits[[pid]][[nid]][[vid]][[2L]][subs] <- st[, 1L] splits[[pid]][[nid]][[vid]][[3L]][subs] <- st[, 2L] @@ -1649,8 +1699,9 @@ tvcm_grow_splitnode <- function(nodes, where, dev, partData, step, weights) { tvcm_formula <- function(formList, root, family = cumulative(), env = parent.frame(), - full = TRUE, update = FALSE) { + full = TRUE, update = FALSE, fast = TRUE) { + yName <- rownames(attr(terms(formList$original), "factors"))[1L] ## puts the predictors for fixed effects and varying effects @@ -1689,8 +1740,7 @@ tvcm_formula <- function(formList, root, family = cumulative(), rval <- paste(rval, paste(feTerms, collapse = "+"), sep = "") if (rval != "" && inherits(family, "family.olmm")) rval <- paste(effect, "(", rval, ")", sep = "") - - + return(c(vcTerms, feTerms)) } @@ -1774,6 +1824,10 @@ tvcm_formula <- function(formList, root, family = cumulative(), ## update formulas fUpdate <- NULL + + feCeTerms <- getTerms(formList, "ce", rep(FALSE, length(root)), family) + feGeTerms <- getTerms(formList, "ge", rep(FALSE, length(root)), family) + if (update) { ## get nuisance terms @@ -1786,25 +1840,39 @@ tvcm_formula <- function(formList, root, family = cumulative(), for (pid in seq_along(fUpdate)) { fUpdate[[pid]] <- vector("list", 2L) nLab <- paste("Node", LETTERS[pid], sep = "") - + + if (fast) { + feIntTmp <- "none" + feCeTmp <- feCeTerms[grep(nLab, feCeTerms)] + feGeTmp <- feGeTerms[grep(nLab, feGeTerms)] + feCeTmp <- c(gsub(nLab, "Left", feCeTmp), gsub(nLab, "Right", feCeTmp)) + feGeTmp <- c(gsub(nLab, "Left", feGeTmp), gsub(nLab, "Right", feGeTmp)) + } else { + rootTmp <- root + rootTmp[pid] <- FALSE + feIntTmp <- formList$fe$intercept + if (!is.null(vcInt) && any(direct) && rootTmp[direct]) + feIntTmp <- "ce" + feCeTmp <- getTerms(formList, "ce", rootTmp, family) + feGeTmp <- getTerms(formList, "ge", rootTmp, family) + } + ## full formula - feCeTmp <- feCeTerms[grep(nLab, feCeTerms)] - feCeTmp <- c(gsub(nLab, "Left", feCeTmp), gsub(nLab, "Right", feCeTmp)) - feGeTmp <- feGeTerms[grep(nLab, feGeTerms)] - feGeTmp <- c(gsub(nLab, "Left", feGeTmp), gsub(nLab, "Right", feGeTmp)) fUpdate[[pid]][[1L]] <- - getForm(yName,feCeTmp,feGeTmp,"none",reForm, family, env) + getForm(yName,feCeTmp,feGeTmp,feIntTmp,reForm, family, env) - ## null formula + ## null formula (always use approximative model, even if fast = FALSE) feCeTmp <- feCeTerms[grep(nLab, feCeTerms)] - feCeTmp <- feCeTmp[grep(nLab, feCeTmp)] feCeTmp <- intersect(feCeTmp, nuisance[[pid]]) - feCeTmp <- c(gsub(nLab, "Left", feCeTmp), gsub(nLab, "Right", feCeTmp)) + feGeTmp <- feGeTerms[grep(nLab, feGeTerms)] - feGeTmp <- feGeTmp[grep(nLab, feGeTmp)] feGeTmp <- intersect(feGeTmp, nuisance[[pid]]) + + feCeTmp <- c(gsub(nLab, "Left", feCeTmp), gsub(nLab, "Right", feCeTmp)) feGeTmp <- c(gsub(nLab, "Left", feGeTmp), gsub(nLab, "Right", feGeTmp)) - fUpdate[[pid]][[2L]] <- getForm(yName,feCeTmp,feGeTmp,"none",reForm,family, env) + + fUpdate[[pid]][[2L]] <- + getForm(yName,feCeTmp,feGeTmp,"none",reForm,family, env) } } return(list(full = fFull, update = fUpdate)) diff --git a/R/tvcm.R b/R/tvcm.R index f42e0db..c302978 100644 --- a/R/tvcm.R +++ b/R/tvcm.R @@ -1,7 +1,7 @@ ##' -------------------------------------------------------- # ##' Author: Reto Buergin ##' E-Mail: reto.buergin@unige.ch, rbuergin@gmx.ch -##' Date: 2014-10-23 +##' Date: 2015-01-04 ##' ##' Description: ##' The 'tvcm' function @@ -13,9 +13,11 @@ ##' tvcm the main fitting function ##' tvcm_control control function for 'tvcm' ##' -##' all functions are documented as *.Rd files -##' ##' Last modifications: +##' 2014-12-08: - enable 'sctest = FALSE' in 'tvcolmm_control' +##' - remove checks on length of argument list, which is +##' not necessary because R assigns the argument names +##' automatically ##' 2014-11-05: - set seed at start of 'tvcm' and re-establish old seed ##' at the end ##' 2014-10-23: - improved extraction of fitting arguments (see 'fitargs') @@ -72,15 +74,17 @@ tvcolmm <- function(formula, data, family = cumulative(), tvcolmm_control <- function(alpha = 0.05, bonferroni = TRUE, minsize = 50, - maxnomsplit = 5, maxordsplit = 9, maxnumsplit = 9, + maxnomsplit = 5, maxordsplit = 9, + maxnumsplit = 9, fast = TRUE, trim = 0.1, estfun.args = list(), nimpute = 5, seed = NULL, ...) { mc <- match.call() mc[[1L]] <- as.name("tvcm_control") - if (!"minsize" %in% names(mc) & length(mc) < 7L) + if (!"minsize" %in% names(mc)) mc$minsize <- formals(tvcolmm_control)$minsize - mc$sctest <- TRUE + if (!"sctest" %in% names(mc)) + mc$sctest <- TRUE return(eval.parent(mc)) } @@ -90,13 +94,11 @@ tvcglm <- function(formula, data, family, control = tvcglm_control(), ...) { mc <- match.call() mc[[1L]] <- as.name("tvcm") - if (!"control" %in% names(mc) & - (length(mc) < 9L | - length(mc) >= 9L && !inherits(eval.parent(mc[[4L]]), "tvcm_control"))) - mc$control <- formals(tvcglm)$control + if (!"control" %in% names(mc)) + mc$control <- formals(tvcglm)$control mc$fit <- "glm" return(eval.parent(mc)) -} + } tvcglm_control <- function(minsize = 30, mindev = 2.0, @@ -339,11 +341,11 @@ tvcm_control <- function(minsize = 30, mindev = ifelse(sctest, 0.0, 2.0), trim = 0.1, estfun.args = list(), nimpute = 5, maxnomsplit = 5, maxordsplit = 9, maxnumsplit = 9, maxstep = 1e3, maxwidth = 1e9, maxdepth = 1e9, - lossfun = neglogLik2, ooblossfun = NULL, + lossfun = neglogLik2, ooblossfun = NULL, fast = TRUE, cp = 0.0, dfpar = 0.0, dfsplit = 1.0, cv = !sctest, folds = folds_control("kfold", 5), prune = cv, papply = mclapply, papply.args = list(), - center = TRUE, seed = NULL, verbose = FALSE, ...) { + center = fast, seed = NULL, verbose = FALSE, ...) { mc <- match.call() ## check available arguments @@ -370,6 +372,8 @@ tvcm_control <- function(minsize = 30, mindev = ifelse(sctest, 0.0, 2.0), stopifnot(is.function(lossfun)) stopifnot(is.null(ooblossfun) | is.function(ooblossfun)) + stopifnot(is.logical(fast) && length(fast) == 1L) + stopifnot(is.numeric(cp) && length(cp) == 1L) stopifnot(is.numeric(dfpar) && length(dfpar) == 1L) stopifnot(is.numeric(dfsplit) && length(dfsplit) == 1L) @@ -439,6 +443,7 @@ tvcm_control <- function(minsize = 30, mindev = ifelse(sctest, 0.0, 2.0), maxdepth = as.integer(maxdepth), lossfun = lossfun, ooblossfun = ooblossfun, + fast = fast, cp = cp, dfpar = dfpar, dfsplit = dfsplit, diff --git a/ToDo b/ToDo new file mode 100644 index 0000000..5780b28 --- /dev/null +++ b/ToDo @@ -0,0 +1,4 @@ +- Check Gauss-Hermite integration points of the 'lme4' package +- Check if 'ucminf' works again +- Adopt prediction function for 'olmm' for prediction for in-sample individuals +- Check nloptr package \ No newline at end of file diff --git a/inst/CITATION b/inst/CITATION index b7b52e6..c465eed 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -5,29 +5,29 @@ p2 <- person("Gilbert", "Ritschard", email = "gilbert.ritschard@unige.ch") citEntry(entry="unpublished", author = personList(p1, p2), - title = "Coefficient-wise tree-based varying coefficient regression with vcrpart", + title = "Coefficient-Wise Tree-Based Varying Coefficient Regression with vcrpart", year = "2014", note = "Article in progress", - textVersion = "B\u00FCrgin R. and Ritschard G. (2014b), Coefficient-wise tree-based varying coefficient regression with vcrpart. Article in progress." + textVersion = "B\u00FCrgin, R. and G. Ritschard (2014c), Coefficient-Wise Tree-Based Varying Coefficient Regression with vcrpart. Article in progress." ) citEntry(entry="unpublished", author = personList(p1, p2), - title = "Tree-based varying-coefficient regression for longitudinal ordinal responses", + title = "Tree-Based Varying-Coefficient Regression for Longitudinal Ordinal Responses", year = "2014", note = "Article in progress", - textVersion = "B\u00FCrgin R. and Ritschard G. (2014a), Tree-based varying coefficient regression for longitudinal ordinal responses. Article in progress." + textVersion = "B\u00FCrgin, R. and G. Ritschard (2014b), Tree-Based Varying Coefficient Regression for Longitudinal Ordinal Responses. Article in progress." ) citEntry(entry="article", author = personList(p1, p2), - title = "A decorated parallel coordinate plot for categorical longitudinal data", + title = "A Decorated Parallel Coordinate Plot for Categorical Longitudinal Data", journal = "The American Statistician", year = "2014", volume = "68", - pages = "98-103", + pages = "98--103", number = "2", - textVersion="B\u00FCrgin, R. & Ritschard, G. (2014), A decorated parallel coordinate plot for categorical longitudinal data, The American Statistician 68(2), 98-103.", + textVersion="B\u00FCrgin, R. and G. Ritschard (2014a), A Decorated Parallel Coordinate Plot for Categorical Longitudinal Data, The American Statistician 68(2), 98-103.", ) citFooter("See",sQuote("citation()"),"for citing R itself.") diff --git a/man/PL.Rd b/man/PL.Rd index f786124..5363b6e 100644 --- a/man/PL.Rd +++ b/man/PL.Rd @@ -58,9 +58,10 @@ \source{Austrian Social Security Database (ASSD). The data set is also available from \url{https://sites.google.com/site/rafaellalive/research}} -\references{Lalive, R. and Zweimueller, J. (2009), How does parental leave affect - fertility and return to work? Evidence from two natural experiments. - \emph{The Quarterly Journal of Economics}. +\references{Lalive, R. and J. Zweimueller (2009), Does Parental Leave + Affect Fertility and Return-to-Work? Evidence from Two Natural + Experiments. \emph{The Quarterly Journal of Economics} \bold{124}(3), + 1363--1402. } \keyword{datasets} diff --git a/man/contr-wsum.Rd b/man/contr-wsum.Rd index 2c30535..5cd23ae 100644 --- a/man/contr-wsum.Rd +++ b/man/contr-wsum.Rd @@ -19,8 +19,8 @@ contr.wsum(x, weights = rep.int(1.0, length(x))) \details{ Computes a contrast matrix similar to \command{\link{contr.sum}}. The - contrast for the last category are however weighted by the sum of - weights of each category. + reference category is however weighted by the sum of weights of the + other categories. } \value{A matrix with \code{nlevels(x)} rows and \code{nlevels(x)- 1} diff --git a/man/fvcm-methods.Rd b/man/fvcm-methods.Rd index e07da3a..5bd53b3 100644 --- a/man/fvcm-methods.Rd +++ b/man/fvcm-methods.Rd @@ -72,14 +72,14 @@ \author{Reto Buergin} \references{ - Leo Breiman (1996). Bagging Predictors. \emph{Machine Learning}, - 123--140 + Breiman, L. (1996). Bagging Predictors. \emph{Machine Learning}, + \bold{24}(2), 123--140. - Leo Breiman (2001). Random Forests. \emph{Machine Learning}, + Breiman, L. (2001). Random Forests. \emph{Machine Learning}, \bold{45}(1), 5--32. - - T. Hastie, R. Tibshirani, J. Friedman (2001), The elements of - statistical learning, Springer. + + Hastie, T., R. Tibshirani and J. Friedman (2001), The Elements of + Statistical Learning (2 ed.), Springer-Verlag. } \seealso{\command{\link{fvcm}}, \command{\link{tvcm-methods}}} diff --git a/man/fvcm.Rd b/man/fvcm.Rd index 54477ba..d53f215 100644 --- a/man/fvcm.Rd +++ b/man/fvcm.Rd @@ -7,10 +7,10 @@ \alias{fvcglm} \alias{fvcglm_control} -\title{Bagging and random forests based on \command{\link{tvcm}}} +\title{Bagging and Random Forests based on \command{\link{tvcm}}} \description{ - Bagging (Breiman, 1996) and random forest (Breiman, 2001) ensemble + Bagging (Breiman, 1996) and Random Forest (Breiman, 2001) ensemble algorithms for \command{\link{tvcm}}. } @@ -70,8 +70,8 @@ fvcglm_control(maxstep = 10, folds = folds_control("subsampling", 5), } \details{ - Implements the \emph{bagging} (Breiman, 1996) and \emph{random - forests} (Breiman, 2001) ensemble algorithms for + Implements the \emph{Bagging} (Breiman, 1996) and \emph{Random + Forests} (Breiman, 2001) ensemble algorithms for \command{\link{tvcm}}. The method consist in growing multiple trees by using \command{\link{tvcm}} and aggregating the fitted coefficient functions in the scale of the predictor function. To enable bagging, @@ -102,14 +102,14 @@ fvcglm_control(maxstep = 10, folds = folds_control("subsampling", 5), \author{Reto Buergin} \references{ - Leo Breiman (1996). Bagging Predictors. \emph{Machine Learning}, - 123--140 + Breiman, L. (1996). Bagging Predictors. \emph{Machine Learning}, + \bold{24}(2), 123--140. - Leo Breiman (2001). Random Forests. \emph{Machine Learning}, + Breiman, L. (2001). Random Forests. \emph{Machine Learning}, \bold{45}(1), 5--32. - T. Hastie, R. Tibshirani, J. Friedman (2001), The elements of - statistical learning, Springer. + Hastie, T., R. Tibshirani and J. Friedman (2001), The Elements of + Statistical Learning (2 ed.), Springer-Verlag. } \seealso{\command{\link{fvcm-methods}}, \command{\link{tvcm}}, diff --git a/man/movie.Rd b/man/movie.Rd index bfdef77..9d5d453 100644 --- a/man/movie.Rd +++ b/man/movie.Rd @@ -26,8 +26,9 @@ \source{The data are tabulated in Hartzel et al. (2001).} -\references{Hartzel, J., Agresti A. and Caffo, B. (2001). Multinomial - Logit Random Effect Models, \emph{Statistical Modelling} \bold{1}: - 81--102} +\references{ + Hartzel, J., A. Agresti and B. Caffo (2001). Multinomial Logit Random + Effect Models, \emph{Statistical Modelling} \bold{1}(2), 81--102. +} \keyword{datasets} diff --git a/man/olmm-gefp.Rd b/man/olmm-gefp.Rd index 6e59ce8..3e91041 100644 --- a/man/olmm-gefp.Rd +++ b/man/olmm-gefp.Rd @@ -18,7 +18,8 @@ estfun.olmm(x, predecor = FALSE, control = predecor_control(), nuisance = NULL, ...) predecor_control(impute = TRUE, seed = NULL, - symmetric = TRUE, reltol = 1e-6, + symmetric = TRUE, center = FALSE, + reltol = 1e-6, maxit = 250L, minsize = 1L, verbose = FALSE, silent = FALSE) @@ -111,11 +112,12 @@ gefp.olmm(object, scores = NULL, order.by = NULL, subset = NULL, } \references{ -Zeileis A., Hornik K. (2007), Generalized M-Fluctuation Tests for Parameter -Instability, \emph{Statistica Neerlandica}, \bold{61}, 488--508. - -Buergin R. and Ritschard G. (2014a), Tree-based varying coefficient -regression for longitudinal ordinal responses. Article in progress. + Zeileis A., Hornik K. (2007), Generalized M-Fluctuation Tests for + Parameter Instability, \emph{Statistica Neerlandica}, \bold{61}(4), + 488--508. + + Buergin R. and Ritschard G. (2014b), Tree-Based Varying Coefficient + Regression for Longitudinal Ordinal Responses. Article in progress. } \author{Reto Buergin} diff --git a/man/olmm-methods.Rd b/man/olmm-methods.Rd index 9a945f7..1743c52 100644 --- a/man/olmm-methods.Rd +++ b/man/olmm-methods.Rd @@ -125,18 +125,18 @@ \author{Reto Buergin} \references{ - Agresti, A. (2010). \emph{Analysis of Ordinal Categorical Data}, 10 - edn, Wiley. + Agresti, A. (2010). \emph{Analysis of Ordinal Categorical Data} (2 + ed.), Wiley. Tutz, G. (2012). \emph{Regression for Categorical Data}, Cambridge Series in Statistical and Probabilistic Mathematics. - Li, C. and Sheperd, B. E. (2012). A new residual for ordinal - outcomes, \emph{Biometrika} \bold{99} (2): 437-480 + Li, C. and B. E. Sheperd (2012). A New Residual for Ordinal + Outcomes, \emph{Biometrika}, \bold{99}(2), 437-480. - Bates D, Maechler M, Bolker BM and Walker S (2014). lme4: - Linear mixed-effects models using Eigen and S4. Submitted to - \emph{Journal of Statistical Software} + Bates, D., M. Maechler, B. M. Bolker and S. Walker (2014). lme4: + Linear Mixed-Effects Models Using Eigen and S4. Submitted to + \emph{Journal of Statistical Software}. } \seealso{\command{\link{olmm}}, \command{\link{predict.olmm}}, diff --git a/man/olmm-predict.Rd b/man/olmm-predict.Rd index 443e299..e72e2d2 100644 --- a/man/olmm-predict.Rd +++ b/man/olmm-predict.Rd @@ -61,6 +61,12 @@ \author{Reto Buergin} +\references{ + Skrondal, A., S. Rabe-Hesketh (2009). Prediction in multilevel + generalized linear models. \emph{Journal of the Royal Statistical + Society: Series A (Statistics in Society)}, \bold{172}(3), 659--687. +} + \seealso{\command{\link{olmm}}, \command{\link{olmm-methods}}} \examples{ diff --git a/man/olmm.Rd b/man/olmm.Rd index d81c28b..9ca7819 100644 --- a/man/olmm.Rd +++ b/man/olmm.Rd @@ -89,12 +89,10 @@ olmm(formula, data, family = cumulative(), For alternative fitting functions, see for example the functions \code{clmm} of \pkg{ordinal}, - \code{cumlogitRE} of package \pkg{glmmAK}, \code{nplmt} of package \pkg{mixcat}, \code{DPolmm} of package \pkg{DPpackage}, \code{lcmm} of package \pkg{lcmm}, - \code{MCMCglmm} of package \pkg{MCMCglmm}, - \code{sabre} of package \pkg{sabreR} or + \code{MCMCglmm} of package \pkg{MCMCglmm} or \code{OrdinalBoost} of package \pkg{GMMBoost}. The implementation adopts functions of the packages \pkg{statmod} @@ -178,30 +176,30 @@ olmm(formula, data, family = cumulative(), \author{Reto Buergin} \references{ - Agresti, A. (2010). \emph{Analysis of Ordinal Categorical Data}, 10 - edn, Wiley. + Agresti, A. (2010). \emph{Analysis of Ordinal Categorical Data} (2 + ed.), Wiley. - Hartzel, J., Agresti A. and Caffo, B. (2001). Multinomial Logit Random - Effect Models, \emph{Statistical Modelling} \bold{1}: 81--102 + Hartzel, J., A. Agresti and B. Caffo (2001). Multinomial Logit Random + Effect Models, \emph{Statistical Modelling} \bold{1}(2), 81--102. - Hedeker, D. and Gibbons, R. (1994). A random-effects ordinal - regression model for multilevel analysis, \emph{Biometrics} \bold{20} - (4): 933--944 + Hedeker, D. and R. Gibbons (1994). A Random-Effects Ordinal + Regression Model for Multilevel Analysis, \emph{Biometrics} + \bold{20}(4), 933--944. - Tutz, G. and Hennevogl W. (1996). Random effects in ordinal regression - models, \emph{Computational Statistics & Data Analysis} \bold{22} (5): - 537--557 + Tutz, G. and W. Hennevogl (1996). Random Effects in Ordinal Regression + Models, \emph{Computational Statistics & Data Analysis} \bold{22}(5), + 537--557. Tutz, G. (2012). \emph{Regression for Categorical Data}, Cambridge Series in Statistical and Probabilistic Mathematics. - Frederick Novomestky (2012). matrixcalc: Collection of functions for - matrix calculations. R package version 1.0-3. URL + Novomestky, F. (2012). matrixcalc: Collection of Functions for + Matrix Calculations. R package version 1.0-3. URL \url{http://CRAN.R-project.org/package=matrixcalc} - Gordon Smyth, Yifang Hu, Peter Dunn, Belinda Phipson and Yunshun Chen - (2014). statmod: Statistical Modeling. R package version 1.4.20. - URL \url{http://CRAN.R-project.org/package=statmod} + Smyth, G., Y. Hu, P. Dunn, B. Phipson and Y. Chen (2014). statmod: + Statistical Modeling. R package version 1.4.20. URL + \url{http://CRAN.R-project.org/package=statmod} } \seealso{\command{\link{olmm-methods}}, \command{\link{olmm_control}}, diff --git a/man/otsplot.Rd b/man/otsplot.Rd index 2c72bd0..bc1fabe 100644 --- a/man/otsplot.Rd +++ b/man/otsplot.Rd @@ -116,13 +116,13 @@ otsplot_filter(method = c("minfreq", "cumfreq", "linear"), level = NULL) \author{Reto Buergin and Gilbert Ritschard} \references{ - Reto Buergin and Gilbert Ritschard, G. (2014). A decorated parallel - coordinate plot for categorical longitudinal data, \emph{The American - Statistician} \bold{68}: 98-103 + Buergin, R. and G. Ritschard (2014a). A Decorated Parallel Coordinate + Plot for Categorical Longitudinal Data, \emph{The American + Statistician} \bold{68}(2): 98--103. - Ross Ihaka, Paul Murrell, Kurt Hornik, Jason C. Fisher, Achim Zeileis (2013). + Ihaka, R., P. Murrell, K. Hornik, J. C. Fisher and A. Zeileis (2013). colorspace: Color Space Manipulation. R package version 1.2-4. URL - \url{http://CRAN.R-project.org/package=colorspace} + \url{http://CRAN.R-project.org/package=colorspace}. } \examples{ diff --git a/man/poverty.Rd b/man/poverty.Rd index 0022bde..4886a05 100644 --- a/man/poverty.Rd +++ b/man/poverty.Rd @@ -72,13 +72,13 @@ \source{VLV survey, see also \url{http://cigev.unige.ch/recherches/vlv.html}} -\references{Ludwig, C., Cavalli, S. and Oris, - M. \sQuote{Vivre/Leben/Vivere}: An interdisciplinary survey addressing +\references{Ludwig, C., S. Cavalli and M. Oris + \sQuote{Vivre/Leben/Vivere}: An interdisciplinary survey addressing progress and inequalities of ageing over the past 30 years in - Switzerland. \emph{Archives of Gerontology and Geriatrics}. + Switzerland. \emph{Archives of Gerontology and Geriatrics}. - Gabriel, R., Oris, M. Studer, M. and Baeriswyl, M. The persistance of - social stratification? Submitted. + Gabriel, R., M. Oris, M. Studer and M. Baeriswyl (2014). The + Persistance of Social Stratification? Submitted. } \keyword{datasets} diff --git a/man/schizo.Rd b/man/schizo.Rd index 0fb9de2..b342428 100644 --- a/man/schizo.Rd +++ b/man/schizo.Rd @@ -34,7 +34,7 @@ \source{\url{http://tigger.uic.edu/~hedeker/ml.html}} -\references{Hedeker, D. and Gibbons, R. (2006). Longitudinal Data +\references{Hedeker, D. and R. Gibbons (2006). Longitudinal Data Analysis. Wiley, Palo Alto, CA.} \keyword{datasets} diff --git a/man/tvcglm.Rd b/man/tvcglm.Rd index 3d2f1a3..c02224f 100644 --- a/man/tvcglm.Rd +++ b/man/tvcglm.Rd @@ -8,14 +8,14 @@ \description{The \command{\link{tvcglm}} function implements the tree-based varying coefficient regression algorithm for generalized - linear models introduced by Buergin and Ritschard (2014b). The + linear models introduced by Buergin and Ritschard (2014c). The algorithm approximates varying coefficients by piecewise constant functions using recursive partitioning, i.e., it estimates the - coefficients of the model separately for strata of the value space of - partitioning variables. The special feature of the algorithm is to - assign each varying coefficient a partition, which enhances the - possibilities for model specification and to select moderator - variables individually by coefficient + selected coefficients individually by strata of the value space of + partitioning variables. The special feature of the provided algorithm + is that it allows building for each varying coefficient an individual + partition, which enhances the possibilities for model specification + and to select partitioning variables individually by coefficient. } \usage{ @@ -32,11 +32,19 @@ tvcglm_control(minsize = 30, mindev = 2.0, \arguments{ \item{formula}{a symbolic description of the model to fit, e.g., - \code{y ~ vc(z1, \ldots, zL, by = x1 + \ldots + xP) + re(1|id)} + \code{y ~ vc(z1, z2, z3) + vc(z1, z2, by = x1) + vc(z2, z3, by = x2)} - where \code{vc} term specifies the varying fixed coefficients. Only - one such \code{vc} term is allowed. For details, - see \command{\link{olmm}} and \command{\link{vcrpart-formula}}.} + where the \code{vc} terms specify the varying fixed + coefficients. The unnamed arguments within \code{vc} terms are + interpreted as partitioning variables (i.e., moderators). The + \code{by} argument specifies the associated predictor variable. If + no such predictor variable is specified (e.g., see the first term in + the above example formula), the \code{vc} term is interpreted as a + varying intercept, i.e., an nonparametric estimate of the direct + effect of the partitioning variables. For details, see + \command{\link{vcrpart-formula}}. Note that the global intercept may + be removed by a \code{-1} term, according to the desired + interpretation of the model.} \item{family}{the model family. An object of class \command{\link{family.olmm}}.} \item{data}{a data frame containing the variables in the model.} @@ -75,29 +83,30 @@ tvcglm_control(minsize = 30, mindev = 2.0, } \details{ - The TVCGLM algorithm uses two stages. The first stage (partitioning) - builds too overly fine partitions and the second stage (pruning) - selects the best-sized partitions by collapsing inner nodes. For the - second stage, which is automatically processed, we refer to - \command{\link{tvcm-assessment}}. The partitioning stage iterates the - following steps: + \command{\link{tvcglm}} processes two stages. The first stage, called + partitioning stage, builds overly fine partitions for each \code{vc} + term; the second stage, called pruning stage, selects the best-sized + partitions by collapsing inner nodes. For details on the pruning + stage, see \command{\link{tvcm-assessment}}. The partitioning stage + iterates the following steps: \enumerate{ \item Fit the current generalized linear model \code{y ~ NodeA:x1 + \ldots + NodeK:xK} - with \command{\link{glm}}, where \code{NodeK} is a categorical - variable with terminal node labels \code{1}, \ldots for the - \eqn{K}-th varying coefficient. + with \command{\link{glm}}, where \code{Nodek} is a categorical + variable with terminal node labels for the \eqn{k}-th varying + coefficient. - \item Search and globally optimal split among the candidate - splits by exhaustive -2 likelihood training error grid search, - by cycling through all partitions, nodes and moderator variables. + \item Search the globally best split among the candidate splits by + an exhaustive -2 likelihood training error search that cycles + through all possible splits. - \item If the -2 likelihood training error reduction through the best + \item If the -2 likelihood training error reduction of the best split is smaller than \code{mindev} or there is no candidate split - satisfying the minimum node size \code{minsize}, stop the algorithm. + satisfying the minimum node size \code{minsize}, stop the + algorithm. \item Else incorporate the best split and repeat the procedure. } @@ -108,6 +117,10 @@ tvcglm_control(minsize = 30, mindev = 2.0, (a minimum node size of 30) and \code{mindev = 2} (the training error reduction of the best split must be larger than two to continue). + The algorithm implements a number of split point reduction methods to + decrease the computational complexity. See the arguments + \code{maxnomsplit}, \code{maxordsplit} and \code{maxnumsplit}. + The algorithm can be seen as an extension of CART (Breiman et. al., 1984) and PartReg (Wang and Hastie, 2014), with the new feature that partitioning can be processed coefficient-wise. @@ -117,20 +130,22 @@ tvcglm_control(minsize = 30, mindev = 2.0, } \references{ - Breiman, L., Friedman, J.H., Olshen, R.A. and Stone, C.J. (1984) - \emph{Classification and Regression Trees}. Wadsworth. + Breiman, L., J. H. Friedman, R. A. Olshen and C.J. Stone (1984) + \emph{Classification and Regression Trees}. Volume 19. The Wadsworth + Statistics / Probability Series. Wang, J. C., Hastie, T. (2014), Boosted Varying-Coefficient Regression Models for Product Demand Prediction, \emph{Journal of - Computational and Graphical Statistics}, \bold{23}, 361--382. + Computational and Graphical Statistics}, \bold{23}(2), 361--382. - Buergin R. and Ritschard G. (2014b). Coefficient-wise tree-based - varying coefficient regression with vcrpart. Article in progress. + Buergin R. and G. Ritschard (2014c). Coefficient-Wise Tree-Based + Varying Coefficient Regression with vcrpart. Article in progress. } \seealso{\command{\link{tvcm_control}}, \command{\link{tvcm-methods}}, \command{\link{tvcm-plot}}, \command{\link{tvcm-plot}}, - \command{\link{tvcm-assessment}}, \command{\link{glm}}} + \command{\link{tvcm-assessment}}, \command{\link{fvcglm}}, + \command{\link{glm}}} \examples{ ## ------------------------------------------------------------------- # diff --git a/man/tvcm-control.Rd b/man/tvcm-control.Rd index 8bf6af5..dc81caa 100644 --- a/man/tvcm-control.Rd +++ b/man/tvcm-control.Rd @@ -14,11 +14,11 @@ tvcm_control(minsize = 30, mindev = ifelse(sctest, 0.0, 2.0), trim = 0.1, estfun.args = list(), nimpute = 5, maxnomsplit = 5, maxordsplit = 9, maxnumsplit = 9, maxstep = 1e3, maxwidth = 1e9, maxdepth = 1e9, - lossfun = neglogLik2, ooblossfun = NULL, + lossfun = neglogLik2, ooblossfun = NULL, fast = TRUE, cp = 0.0, dfpar = 0.0, dfsplit = 1.0, cv = !sctest, folds = folds_control("kfold", 5), prune = cv, papply = mclapply, papply.args = list(), - center = TRUE, seed = NULL, verbose = FALSE, ...) + center = fast, seed = NULL, verbose = FALSE, ...) } \arguments{ @@ -48,6 +48,11 @@ tvcm_control(minsize = 30, mindev = ifelse(sctest, 0.0, 2.0), \item{ooblossfun}{a loss function that defines how to compute the validation error during cross-validation. The function will be assigned to the \code{fun} argument of \command{\link{oobloss}}.} + \item{fast}{logical scalar. Whether the approximative model should be + used to search for the next split. The approximative search model + uses the fitted values of the current model as offsets and estimates + only the coefficients of the added split. If \code{FALSE}, the + accurate search model is used.} \item{cp}{numeric scalar. The penalty to be multiplied with the complexity of the model during partitioning. The complexity of the model is defined as the number of coefficients times \code{dfpar} diff --git a/man/tvcm-cv.Rd b/man/tvcm-cv.Rd index 7887ec7..36d15f9 100644 --- a/man/tvcm-cv.Rd +++ b/man/tvcm-cv.Rd @@ -3,14 +3,13 @@ \alias{tvcm-assessment} \alias{prune} \alias{prune.tvcm} -\alias{folds_control} +\alias{prunepath} +\alias{prunepath.tvcm} \alias{cvloss} \alias{cvloss.tvcm} -\alias{loss} -\alias{loss.tvcm} +\alias{folds_control} \alias{oobloss} \alias{oobloss.tvcm} -\alias{print.cvloss.tvcm} \alias{plot.cvloss.tvcm} \title{Model selection utility functions for \command{\link{tvcm}} objects.} @@ -25,15 +24,15 @@ \method{prune}{tvcm}(tree, cp = NULL, alpha = NULL, maxstep = NULL, terminal = NULL, original = FALSE, ...) +\method{prunepath}{tvcm}(tree, steps = 1L, ...) + +\method{cvloss}{tvcm}(object, folds = folds_control(), ...) + folds_control(type = c("kfold", "subsampling", "bootstrap"), K = ifelse(type == "kfold", 5, 30), prob = 0.5, weights = c("case", "freq"), seed = NULL) -\method{cvloss}{tvcm}(object, folds = folds_control(), ...) - -\method{print}{cvloss.tvcm}(x, ...) - \method{plot}{cvloss.tvcm}(x, legend = TRUE, details = TRUE, ...) \method{oobloss}{tvcm}(object, newdata = NULL, weights = NULL, @@ -42,29 +41,6 @@ folds_control(type = c("kfold", "subsampling", "bootstrap"), \arguments{ \item{object, tree}{an object of class \command{\link{tvcm}}.} - \item{x}{an object of class \code{cvloss.tvcm} as produced by - \command{\link{cvloss}}.} - \item{type}{character string. The type of sampling scheme to be used - to divide the data of the input model in a learning and a validation - set.} - \item{K}{integer scalar. The number of folds.} - \item{prob}{numeric between 0 and 1. The probability for the - \code{"subsampling"} cross-validation scheme.} - \item{weights}{for \command{\link{folds_control}}, a character that - defines whether the weights of \code{object} are case weights or - frequencies of cases; for \command{\link{oobloss}}, a numeric vector - of weights corresponding to the rows of \code{newdata}.} - \item{seed}{an numeric scalar that defines the seed.} - \item{folds}{a list with control arguments as produced by - \command{\link{folds_control}}.} - \item{legend}{logical scalar. Whether a legend should be added.} - \item{details}{logical scalar. Whether the foldwise validation errors - should be shown.} - \item{fun}{the loss function for the validation sets. By default, the - (possibly weighted) mean of the deviance residuals as defined by the - \command{\link{family}} of the fitted \code{object} is applied.} - \item{newdata}{a data.frame of out-of-bag data (including the response - variable). See also \command{\link{predict.tvcm}}.} \item{cp}{numeric scalar. The complexity parameter to be cross-validated resp. the penalty with which the model should be pruned.} \item{alpha}{numeric significance level. Represents the stopping @@ -78,30 +54,55 @@ folds_control(type = c("kfold", "subsampling", "bootstrap"), be equal the number of partitions.} \item{original}{logical scalar. Whether pruning should be based on the trees from partitioning rather than on the current trees.} + \item{steps}{integer vector. The iteration steps from which + information should be extracted.} + \item{folds}{a list with control arguments as produced by + \command{\link{folds_control}}.} + \item{type}{character string. The type of sampling scheme to be used + to divide the data of the input model in a learning and a validation + set.} + \item{K}{integer scalar. The number of folds.} + \item{weights}{for \command{\link{folds_control}}, a character that + defines whether the weights of \code{object} are case weights or + frequencies of cases; for \command{\link{oobloss}}, a numeric vector + of weights corresponding to the rows of \code{newdata}.} + \item{prob}{numeric between 0 and 1. The probability for the + \code{"subsampling"} cross-validation scheme.} + \item{seed}{an numeric scalar that defines the seed.} + \item{x}{an object of class \code{cvloss.tvcm} as produced by + \command{\link{cvloss}}.} + \item{legend}{logical scalar. Whether a legend should be added.} + \item{details}{logical scalar. Whether the foldwise validation errors + should be shown.} + \item{newdata}{a data.frame of out-of-bag data (including the response + variable). See also \command{\link{predict.tvcm}}.} + \item{fun}{the loss function for the validation sets. By default, the + (possibly weighted) mean of the deviance residuals as defined by the + \command{\link{family}} of the fitted \code{object} is applied.} \item{...}{other arguments to be passed.} } -\details{By default, \command{\link{tvcm}} is a two stage procedure that - first grows overly large trees and second selects the best-sized trees by - pruning. The here presented functions may be interesting for advanced users - who want to process the model selection stage separately. +\details{\command{\link{tvcglm}} and \command{\link{tvcm}} processe + tree-size selection by default. The functions could be interesting for + advanced users. - In normal practice, the \command{\link{prune}} function is used to collapse - inner nodes of the tree structures by the tuning parameter \code{cp}. The - aim of pruning by \code{cp} is to collapse inner nodes to minimize the - cost-complexity criterion + The \command{\link{prune}} function is used to collapse inner nodes of + the tree structures by the tuning parameter \code{cp}. The aim of + pruning by \code{cp} is to collapse inner nodes to minimize the + cost-complexity criterion \deqn{error(cp) = error(tree) + cp * complexity(tree)} - whereby, the training error \eqn{error(tree)} is defined by \code{lossfun} - and \eqn{complexity(tree)} is defined as the total number of coefficients times - \code{dfpar} plus the total number of splits times \code{dfsplit}. The function - \code{lossfun} and the parameters \code{dfpar} and \code{dfsplit} are defined - by the \code{control} argument of \command{\link{tvcm}}, see also - \command{\link{tvcm_control}}. By default, \eqn{error(tree)} is minus two - times the total likelihood of the model and \eqn{complexity(tree)} the number - of splits. The minimization of \eqn{error(cp)} is implemented by the - following iterative backward-stepwise algorithm + where the training error \eqn{error(tree)} is defined by + \code{lossfun} and \eqn{complexity(tree)} is defined as the total + number of coefficients times \code{dfpar} plus the total number of + splits times \code{dfsplit}. The function \code{lossfun} and the + parameters \code{dfpar} and \code{dfsplit} are defined by the + \code{control} argument of \command{\link{tvcm}}, see also + \command{\link{tvcm_control}}. By default, \eqn{error(tree)} is minus + two times the total likelihood of the model and \eqn{complexity(tree)} + the number of splits. The minimization of \eqn{error(cp)} is + implemented by the following iterative backward-stepwise algorithm \enumerate{ \item fit all \code{subtree} models that collapse one inner node of the @@ -116,7 +117,7 @@ folds_control(type = c("kfold", "subsampling", "bootstrap"), } The penalty \code{cp} is generally unknown and is estimated adaptively from - the data. \command{\link{cvloss}} implements the cross-validation + the data. The \command{\link{cvloss}} function implements the cross-validation method to do this. \command{\link{cvloss}} repeats for each fold the following steps @@ -127,18 +128,26 @@ folds_control(type = c("kfold", "subsampling", "bootstrap"), \code{cp} the average validation error. } - Doing so yields for each fold a sequence of values for \code{cp} and a - sequence of average validation errors. The obtained sequences for \code{cp} - are combined to a fine grid and the average validation error is averaged - correspondingly. From these two sequences we choose the \code{cp} that - minimizes the validation error. Notice that the average validation error - is computed as the total prediction error of the validation set divided - by the sum of validation set weights. See also the argument \code{ooblossfun} in - \command{\link{tvcm_control}} and the function \command{\link{oobloss}}. - + Doing so yields for each fold a sequence of values for \code{cp} and + a sequence of average validation errors. These sequences are then + combined to a finer grid and the average validation error is averaged + correspondingly. From these two sequences we choose the \code{cp} + value that minimizes the validation error. Notice that the average + validation error is computed as the total prediction error of the + validation set divided by the sum of validation set weights. See also + the argument \code{ooblossfun} in \command{\link{tvcm_control}} and + the function \command{\link{oobloss}}. + + The \command{\link{prunepath}} function can be used to backtrack the + pruning algorithm. By default, it shows the results from collapsing + inner nodes in the first iteration. The interesting iteration(s) can + be selected by the \code{steps} argument. The output shows several + information on the performances when collapsing inner nodes. The node + labels shown in the output refer to the initial tree. + The function \command{\link{folds_control}} is used to specify the cross-validation scheme, where a random 5-fold cross-validation scheme - is set as the default. Alternatives are \code{type = "subsampling"} + is used by default. Alternatives are \code{type = "subsampling"} (random draws without replacement) and \code{type = "bootstrap"} (random draws with replacement). For 2-stage models (with random-effects) fitted by \command{\link{olmm}}, the subsets are based on subject-wise @@ -183,11 +192,12 @@ folds_control(type = c("kfold", "subsampling", "bootstrap"), } \references{ - Breiman, L., Friedman, J.H., Olshen, R.A. and Stone, C.J. (1984) - \emph{Classification and Regression Trees}. Wadsworth. + Breiman, L., J. H. Friedman, R. A. Olshen and C.J. Stone (1984) + \emph{Classification and Regression Trees}. Volume 19. The Wadsworth + Statistics / Probability Series. - T. Hastie, R. Tibshirani, J. Friedman (2001), The elements of - statistical learning, Springer. + Hastie, T., R. Tibshirani and J. Friedman (2001), The Elements of + Statistical Learning (2 ed.), Springer-Verlag. } \seealso{ @@ -216,6 +226,13 @@ cv <- cvloss(model, folds = folds_control(type = "kfold", K = 2, seed = 1)) cv plot(cv) +## prune model with estimated 'cp' +model.p <- prune(model, cp = cv$cp.hat) + + +## backtrack pruning +prunepath(model.p, steps = 1:3) + ## out-of-bag error oobloss(model, newdata = vcrpart_2[76:100,]) diff --git a/man/tvcm-methods.Rd b/man/tvcm-methods.Rd index ed7df9d..5ce3242 100644 --- a/man/tvcm-methods.Rd +++ b/man/tvcm-methods.Rd @@ -14,8 +14,6 @@ \alias{neglogLik2.tvcm} \alias{nobs.tvcm} \alias{predict.tvcm} -\alias{prunepath} -\alias{prunepath.tvcm} \alias{print.tvcm} \alias{ranef.tvcm} \alias{resid.tvcm} @@ -53,8 +51,6 @@ \method{splitpath}{tvcm}(tree, steps = 1L, details = FALSE, ...) -\method{prunepath}{tvcm}(tree, steps = 1L, ...) - \method{summary}{tvcm}(object, ...) \method{width}{tvcm}(x, ...) @@ -91,11 +87,12 @@ observation. In cases of multiple \command{\link{vc}} terms for the same predictor, the coefficients are summed up. - \command{\link{splitpath}} and \command{\link{prunepath}} are new - methods to trace the splitting resp. pruning procedures. They shows - several information, such as the loss reduction of new splits during - partitioning or the loss reduction of collapsing an inner node when - pruning. + The \command{\link{splitpath}} function allows to backtrack the + partitioning procedure. By default, it shows which split was chosen in + the first iteration. The interesting iteration(s) can be selected by + the \code{steps} argument. With \code{details = TRUE} it is also + possible to backtrack the coefficient constancy tests and/or the loss + reduction statistics. \command{\link{summary}} computes summary statistics of the fitted model, including the estimated coefficients. The varying coefficient are printed diff --git a/man/tvcm-plot.Rd b/man/tvcm-plot.Rd index b7cd474..c66e0d2 100644 --- a/man/tvcm-plot.Rd +++ b/man/tvcm-plot.Rd @@ -125,8 +125,8 @@ panel_coef(object, parm = NULL, } \references{ - T. Hastie, R. Tibshirani, J. Friedman (2001), The elements of - statistical learning, Springer. + Hastie, T., R. Tibshirani and J. Friedman (2001), The Elements of + Statistical Learning (2 ed.), Springer-Verlag. } \examples{ diff --git a/man/tvcm.Rd b/man/tvcm.Rd index 29daf83..f7cecea 100644 --- a/man/tvcm.Rd +++ b/man/tvcm.Rd @@ -81,25 +81,24 @@ tvcm(formula, data, fit, family, } \references{ - Zeileis, A., Hothorn, T., and Hornik, K. (2008). Model-Based + Zeileis, A., T. Hothorn, and K. Hornik (2008). Model-Based Recursive Partitioning. \emph{Journal of Computational and Graphical Statistics}, \bold{17}(2), 492--514. - Wang, J. C., Hastie, T. (2014), Boosted Varying-Coefficient + Wang, J. C. and T. Hastie (2014), Boosted Varying-Coefficient Regression Models for Product Demand Prediction, \emph{Journal of - Computational and Graphical Statistics}, \bold{23}, 361--382. + Computational and Graphical Statistics}, \bold{23}(2), 361--382. - Torsten Hothorn, Achim Zeileis (2014). partykit: A Modular Toolkit - for Recursive Partytioning in R. Working Paper 2014-10. \emph{Working - Papers in Economics and Statistics, Research Platform Empirical and - Experimental Economics, Universitaet Innsbruck}. URL - \url{http://EconPapers.RePEc.org/RePEc:inn:wpaper:2014-10} + Hothorn, T. and A. Zeileis (2014). partykit: A Modular Toolkit + for Recursive Partytioning in R. In \emph{Working Papers in Economics + and Statistics, Research Platform Empirical and Experimental + Economics}, Number 2014-10. Universitaet Innsbruck. - Buergin R. and Ritschard G. (2014a), Tree-based varying coefficient - regression for longitudinal ordinal responses. Article in progress. + Buergin R. and G. Ritschard (2014b), Tree-Based Varying Coefficient + Regression for Longitudinal Ordinal Responses. Article in progress. - Buergin R. and Ritschard G. (2014b), Coefficient-wise tree-based - varying coefficient regression with vcrpart. Article in progress. + Buergin R. and G. Ritschard (2014c), Coefficient-Wise Tree-Based + Varying Coefficient Regression with vcrpart. Article in progress. } \seealso{\command{\link{tvcolmm}}, \command{\link{tvcglm}}, diff --git a/man/tvcolmm.Rd b/man/tvcolmm.Rd index b4d4b7b..7512fad 100644 --- a/man/tvcolmm.Rd +++ b/man/tvcolmm.Rd @@ -8,9 +8,9 @@ \description{The \command{\link{tvcolmm}} function implements the tree-based longitudinal varying coefficient regression algorithm - proposed in Buergin and Ritschard (2014a). The algorithm approximates + proposed in Buergin and Ritschard (2014b). The algorithm approximates varying fixed coefficients in the cumulative logit mixed model by a - (multivariate) piecewise constant functions using recursive + (multivariate) piecewise constant function using recursive partitioning, i.e., it estimates the fixed effect component of the model separately for strata of the value space of partitioning variables. @@ -22,7 +22,8 @@ tvcolmm(formula, data, family = cumulative(), control = tvcolmm_control(), ...) tvcolmm_control(alpha = 0.05, bonferroni = TRUE, minsize = 50, - maxnomsplit = 5, maxordsplit = 9, maxnumsplit = 9, + maxnomsplit = 5, maxordsplit = 9, + maxnumsplit = 9, fast = TRUE, trim = 0.1, estfun.args = list(), nimpute = 5, seed = NULL, ...) } @@ -30,15 +31,18 @@ tvcolmm_control(alpha = 0.05, bonferroni = TRUE, minsize = 50, \arguments{ \item{formula}{a symbolic description of the model to fit, e.g., - \code{y ~ -1 + vc(z1, \ldots, zL, by = x1 + \ldots + xP, intercept = - TRUE) + re(1|id)} + \code{y ~ -1 + vc(z1, \ldots, zL, by = x1 + \ldots + xP, intercept = TRUE) + re(1|id)} where \code{vc} term specifies the varying fixed coefficients. Only one such \code{vc} term is allowed with - \command{\link{tvcolmm}}. The example formula removes the global - intercept and adds a locally varying intercept by setting - \code{intercept = TRUE} in the \code{vc} term. For details, see - \command{\link{olmm}} and \command{\link{vcrpart-formula}}.} + \command{\link{tvcolmm}} (in contrast to command{\link{tvcglm}} + where multiple \code{vc} terms can be specified). The above example + formula removes the global intercepts and adds locally varying + intercepts, by adding a \code{-1} term and specfiying \code{intercept + = TRUE} in the \code{vc} term. If varying intercepts are desired, we + recommend to always remove the global intercepts. For more details on + the formula specification, see \command{\link{olmm}} and + \command{\link{vcrpart-formula}}.} \item{family}{the model family. An object of class \command{\link{family.olmm}}.} \item{data}{a data frame containing the variables in the model.} @@ -62,7 +66,10 @@ tvcolmm_control(alpha = 0.05, bonferroni = TRUE, minsize = 50, \item{minsize}{numeric scalar. The minimum sum of weights in terminal nodes.} \item{maxnomsplit, maxordsplit, maxnumsplit}{integer scalars for split - candidate reduction. See \command{\link{tvcm_control}}} + candidate reduction. See \command{\link{tvcm_control}}.} + \item{fast}{logical scalar. Whether the approximative model should be + used to search for the next split. See + \command{\link{tvcm_control}}.} \item{trim}{numeric between 0 and 1. Specifies the trimming parameter in coefficient constancy tests for continuous partitioning variables. See also the argument \code{from} of function @@ -79,7 +86,7 @@ tvcolmm_control(alpha = 0.05, bonferroni = TRUE, minsize = 50, } \details{ - The TVCOLMM algorithm iterates the following steps: + The \command{\link{tvcolmm}} function iterates the following steps: \enumerate{ \item Fit the current mixed model @@ -89,8 +96,8 @@ tvcolmm_control(alpha = 0.05, bonferroni = TRUE, minsize = 50, with \command{\link{olmm}}, where \code{Node} is a categorical variable with terminal node labels \code{1}, \ldots, \code{M}. - \item Test for the constancy of the fixed effects \code{Node:x1, - \ldots} separately for each moderator \code{z1}, \ldots, \code{zL} + \item Test the constancy of the fixed effects \code{Node:x1, + \ldots}, separately for each moderator \code{z1}, \ldots, \code{zL} in each node \code{1}, \ldots, \code{M}. This yields \code{L} times \code{M} (possibly Bonferroni corrected) \eqn{p}-values for rejecting coefficient constancy. @@ -99,7 +106,7 @@ tvcolmm_control(alpha = 0.05, bonferroni = TRUE, minsize = 50, then select the node and the variable corresponding to the minimum \eqn{p}-value. Search and incorporate the optimal among the candidate splits in the selected node and variable by - exhaustive likelihood maximization grid search. + exhaustive likelihood search. \item Else if minimum \eqn{p}-value is larger than \code{alpha}, stop the algorithm and return the current model. @@ -108,7 +115,7 @@ tvcolmm_control(alpha = 0.05, bonferroni = TRUE, minsize = 50, The implemented coefficient constancy tests used for node and variable selection (step 2) are based on the M-fluctuation tests of Zeileis and Hornik (2007), using the observation scores of the fitted mixed - model. These observation scores can be extracted by + model. The observation scores can be extracted by \command{\link{estfun.olmm}} for models fitted with \command{\link{olmm}}. To deal with intra-individual correlations between such observation scores, the \command{\link{estfun.olmm}} @@ -122,42 +129,52 @@ tvcolmm_control(alpha = 0.05, bonferroni = TRUE, minsize = 50, the technique of Hajjem et. al (2011) and Sela and Simonoff (2012) to incorporate regression trees into mixed models. + For the exhaustive search, the algorithm implements a number of split + point reduction methods to decrease the computational complexity. See + the arguments \code{maxnomsplit}, \code{maxordsplit} and + \code{maxnumsplit}. By default, the algorithm also uses the + approximative search model approach proposed in Buergin and Ritschard + (2014c). To disable this option to use the original algorithm, set + \code{fast = FALSE} in \command{\link{tvcolmm_control}}. + Special attention is given to varying intercepts, i.e. the terms that account for the direct effects of the moderators. A common specification is - \code{y ~ -1 + vc(z1, \ldots, zL, by = x1 + \ldots + xP, intercept = - TRUE) + re(1 + w1 + \ldots |id)} + \code{y ~ -1 + vc(z1, \ldots, zL, by = x1 + \ldots + xP, intercept = TRUE) + re(1 + w1 + \ldots |id)} - Doing so replaces the globale intercept by local intercepts. + Doing so replaces the globale intercept by local intercepts. As + mentioned, if a varying intercepts are desired, we recommend to always + remove the global intercept. } \value{An object of class \command{\link{tvcm}} } \references{ - Zeileis, A., Hothorn, T., and Hornik, K. (2008). Model-Based - Recursive Partitioning. \emph{Journal of Computational and Graphical - Statistics}, \bold{17}(2), 492--514. - - Zeileis, A., Hornik, K. (2007), Generalized M-Fluctuation Tests for - Parameter Instability, \emph{Statistica Neerlandica}, \bold{61}, - 488--508. - - Buergin R. and Ritschard G. (2014a), Tree-based varying coefficient - regression for longitudinal ordinal responses. Article in progress. + Zeileis, A., T. Hothorn, and K. Hornik (2008). Model-Based + Recursive Partitioning. \emph{Journal of Computational and Graphical + Statistics}, \bold{17}(2), 492--514. + + Zeileis A., Hornik K. (2007), Generalized M-Fluctuation Tests for + Parameter Instability, \emph{Statistica Neerlandica}, \bold{61}(4), + 488--508. + + Buergin R. and Ritschard G. (2014b), Tree-Based Varying Coefficient + Regression for Longitudinal Ordinal Responses. Article in progress. - R. Sela and J. S. Simonoff (2012). RE-EM trees: a data mining - approach for longitudinal and clustered data, \emph{Machine Learning} - \bold{86}, 169--207. - - A. Hajjem, F. Bellavance and D. Larocque (2011), Mixed effects - regression trees for clustered data, \emph{Statistics & Probability - Letters} \bold{81}, 451--459. + Sela R. and J. S. Simonoff (2012). RE-EM trees: A Data Mining + Approach for Longitudinal and Clustered data, \emph{Machine Learning} + \bold{86}(2), 169--207. + + A. Hajjem, F. Bellavance and D. Larocque (2011), Mixed Effects + Regression Trees for Clustered Data, \emph{Statistics & Probability + Letters} \bold{81}(4), 451--459. } \seealso{\command{\link{tvcm_control}}, \command{\link{tvcm-methods}}, - \command{\link{tvcm-plot}}, \command{\link{glm}}} + \command{\link{tvcm-plot}}, \command{\link{fvcolmm}}, + \command{\link{olmm}}} \examples{ ## ------------------------------------------------------------------- # diff --git a/src/Makevars b/src/Makevars index 8d28f91..9542baf 100644 --- a/src/Makevars +++ b/src/Makevars @@ -1 +1,2 @@ -PKG_LIBS = ${LAPACK_LIBS} ${BLAS_LIBS} $(FLIBS) \ No newline at end of file + +PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)