-
Notifications
You must be signed in to change notification settings - Fork 1
/
cost-functions.R
88 lines (83 loc) · 3.09 KB
/
cost-functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#' Cost Functions for Fitted Regression Models
#'
#' Compute cost functions (cross-validation criteria) for fitted
#' regression models.
#'
#' @aliases costFunctions
#'
#' @param y response
#' @param yhat fitted value
#'
#' @seealso \code{\link{cv}}, \code{\link{cv.merMod}},
#' \code{\link{cv.function}}.
#'
#' @details
#' Cost functions (cross-validation criteria) are meant to measure lack-of-fit. Several cost functions are provided:
#' 1. \code{mse()} returns the mean-squared error of prediction for
#' a numeric response variable \code{y} and predictions \code{yhat}; and
#' \code{rmse()} returns the root-mean-squared error and is just the
#' square-root of \code{mse()}.
#' 2. \code{medAbsErr()} returns the median absolute error of prediction for a numeric
#' response \code{y} and predictions \code{yhat}.
#' 3. \code{BayesRule()} and \code{BayesRule2()} report the proportion
#' of incorrect predictions for a dichotomous response variable \code{y}, assumed
#' coded (or coercible to) \code{0} and \code{1}. The \code{yhat} values are
#' predicted probabilities and are rounded to 0 or 1. The distinction
#' between \code{BayesRule()} and \code{BayesRule2()} is that the former
#' checks that the \code{y} values are all either \code{0} or \code{1}
#' and that the \code{yhat} values are all between 0 and 1, while
#' the latter doesn't and is therefore faster.
#'
#' @returns In general, cost functions should return a single numeric
#' value measuring lack-of-fit. \code{mse()} returns the mean-squared error;
#' \code{rmse()} returns the root-mean-squared error;
#' \code{medAbsErr()} returns the median absolute error;
#' and \code{BayesRule()} and
#' \code{BayesRule2()} return the proportion of misclassified cases.
#' @examples
#' data("Duncan", package="carData")
#' m.lm <- lm(prestige ~ income + education, data=Duncan)
#' mse(Duncan$prestige, fitted(m.lm))
#'
#' data("Mroz", package="carData")
#' m.glm <- glm(lfp ~ ., data=Mroz, family=binomial)
#' BayesRule(Mroz$lfp == "yes", fitted(m.glm))
#' @describeIn cost-functions Mean-square error.
#' @export
mse <- function(y, yhat) {
result <- mean((y - yhat) ^ 2)
attr(result, "casewise loss") <- "(y - yhat)^2"
result
}
#' @describeIn cost-functions Root-mean-square error.
#' @export
rmse <- function(y, yhat) {
sqrt(mean((y - yhat) ^ 2))
}
#' @describeIn cost-functions Median absolute error.
#' @importFrom stats median
#' @export
medAbsErr <- function(y, yhat) {
median(abs(y - yhat))
}
#' @describeIn cost-functions Bayes Rule for a binary response.
#' @export
BayesRule <- function(y, yhat) {
if (!all(y %in% c(0, 1)))
stop("response values not all 0 or 1")
if (any(yhat < 0) ||
any(yhat > 1))
stop("fitted values outside of interval [0, 1]")
yhat <- round(yhat)
result <- mean(y != yhat) # proportion in error
attr(result, "casewise loss") <- "y != round(yhat)"
result
}
#' @describeIn cost-functions Bayes rule for a binary response (without bounds checking).
#' @export
BayesRule2 <- function(y, yhat) {
yhat <- round(yhat)
result <- mean(y != yhat) # proportion in error
attr(result, "casewise loss") <- "y != round(yhat)"
result
}