-
Notifications
You must be signed in to change notification settings - Fork 1
/
influencePlot.mlm.R
171 lines (165 loc) · 7.43 KB
/
influencePlot.mlm.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#' Influence Plots for Multivariate Linear Models
#'
#' This function creates various types of \dQuote{bubble} plots of influence
#' measures with the areas of the circles representing the observations
#' proportional to generalized Cook's distances.
#'
#' \code{type="stres"} plots squared (internally) Studentized residuals against
#' hat values;
#' \code{type="cookd"} plots Cook's distance against hat values;
#' \code{type="LR"} plots residual components against leverage components, with
#' the attractive property that contours of constant Cook's distance fall on diagonal
#' lines with slope = -1. Adjacent reference lines represent multiples of influence.
#'
#' The \code{id.method="noteworthy"} setting also requires setting
#' \code{id.n>0} to have any effect. Using \code{id.method="noteworthy"}, and
#' \code{id.n>0}, the number of points labeled is the union of the largest
#' \code{id.n} values on each of L, R, and CookD.
#'
#' @param model An \code{mlm} object, as returned by \code{\link[stats]{lm}}
#' with a multivariate response.
#' @param scale a factor to adjust the radii of the circles, in relation to
#' \code{sqrt(CookD)}
#' @param type Type of plot: one of \code{c("stres", "cookd", "LR")}. See Details.
#' @param infl influence measure structure as returned by
#' \code{\link{mlm.influence}}
#' @param FUN For \code{m>1}, the function to be applied to the \eqn{H} and
#' \eqn{Q} matrices returning a scalar value. \code{FUN=det} and \code{FUN=tr}
#' are possible choices, returning the \eqn{|H|} and \eqn{tr(H)} respectively.
#' @param labels,id.method,id.n,id.cex,id.col settings for labeling points;
#' see \code{\link{showLabels}} for details. To omit point labeling, set
#' \code{id.n=0}, the default. The default \code{id.method="noteworthy"} is
#' used in this function to indicate setting labels for points with large
#' Studentized residuals, hat-values or Cook's distances. See Details below.
#' Set \code{id.method="identify"} for interactive point identification.
#' @param fill,fill.col,fill.alpha.max \code{fill}: logical, specifying whether
#' the circles should be filled. When \code{fill=TRUE}, \code{fill.col} gives
#' the base fill color to which transparency specified by \code{fill.alpha.max}
#' is applied.
#' @param ref.col,ref.lty,ref.lab arguments for reference lines. Incompletely
#' implemented in this version
#' @param \dots other arguments passed down
#'
#' @return If points are identified, returns a data frame with the hat values,
#' Studentized residuals and Cook's distance of the identified points. If no
#' points are identified, nothing is returned. This function is primarily used
#' for its side-effect of drawing a plot.
#' @author Michael Friendly
#' @seealso \code{\link{mlm.influence}}, \code{\link{lrPlot}}
#'
#' \code{\link[car]{influencePlot}} in the car package
#'
#' @method influencePlot mlm
#' @importFrom car influencePlot
#' @importFrom grDevices palette
#' @importFrom stats model.response qbeta qf residuals rstudent
#'
#' @references
#' Barrett, B. E. and Ling, R. F. (1992). General Classes of
#' Influence Measures for Multivariate Regression. \emph{Journal of the
#' American Statistical Association}, \bold{87}(417), 184-191.
#'
#' Barrett, B. E. (2003). Understanding Influence in Multivariate Regression
#' \emph{Communications in Statistics - Theory and Methods}, \bold{32},
#' 667-680.
#'
#' McCulloch, C. E. & Meeter, D. (1983). Discussion of "Outliers..." by R. J.
#' Beckman and R. D. Cook. \emph{Technometrics}, 25, 152-155
#' @keywords models regression multivariate
#' @examples
#'
#' data(Rohwer, package="heplots")
#' Rohwer2 <- subset(Rohwer, subset=group==2)
#' Rohwer.mod <- lm(cbind(SAT, PPVT, Raven) ~ n+s+ns+na+ss, data=Rohwer2)
#'
#' influencePlot(Rohwer.mod, id.n=4, type="stres")
#' influencePlot(Rohwer.mod, id.n=4, type="LR")
#' influencePlot(Rohwer.mod, id.n=4, type="cookd")
#'
#' # Sake data
#' data(Sake, package="heplots")
#' Sake.mod <- lm(cbind(taste,smell) ~ ., data=Sake)
#' influencePlot(Sake.mod, id.n=3, type="stres")
#' influencePlot(Sake.mod, id.n=3, type="LR")
#' influencePlot(Sake.mod, id.n=3, type="cookd")
#'
#' # Adopted data
#' data(Adopted, package="heplots")
#' Adopted.mod <- lm(cbind(Age2IQ, Age4IQ, Age8IQ, Age13IQ) ~ AMED + BMIQ, data=Adopted)
#' influencePlot(Adopted.mod, id.n=3)
#' influencePlot(Adopted.mod, id.n=3, type="LR", ylim=c(-4,-1.5))
#'
#'
#' @export
#'
influencePlot.mlm <-
function(model, scale=12, type=c("stres", "LR", "cookd"),
infl=mlm.influence(model, do.coef = FALSE), FUN=det,
fill=TRUE, fill.col="red",
fill.alpha.max=0.5,
labels,
id.method = "noteworthy",
id.n = if(id.method[1]=="identify") Inf else 0,
id.cex=1, id.col=palette()[1],
ref.col="gray", ref.lty=2, ref.lab=TRUE, # args for reference lines
...)
{
m <- infl$m
df <- as.data.frame(infl, FUN=FUN, funnames=FALSE)
CookD <- df$CookD
H <- df$H
Q <- df$Q
L <- df$L
R <- df$R
if(missing(labels)) labels <- rownames(df)
p <- nrow(coef(model))
q <- ncol(coef(model))
n <- length(infl$H)
scale=scale/max(sqrt(CookD))
# TODO: replace with grDevices::adjustcolor to avoid CRAN warning
if (fill) cols <- heplots::trans.colors(fill.col, alpha=fill.alpha.max * CookD/(max(CookD)))
if(id.method != "identify"){
which.rstud <- order(R, decreasing=TRUE)[1:id.n]
which.cook <- order(CookD, decreasing=TRUE)[1:id.n]
which.hat <- order(H, decreasing=TRUE)[1:id.n]
id.method <- inf <- Reduce(union, list(which.rstud,which.cook, which.hat))
}
type <- match.arg(type)
if (type=='cookd') {
plot(H, CookD, xlab="Hat value", ylab="Cook's D", cex=scale*CookD, ...)
if (fill) points(H, CookD, cex=scale*CookD, pch=16, col=cols)
# abline(h=qf(.95, 1, n-p), lty=ref.lty, col=ref.col)
abline(h=4/(n-p), lty=ref.lty, col=ref.col)
abline(v=c(2, 3)*p/n, lty=ref.lty, col=ref.col)
noteworthy <- car::showLabels(H, CookD, labels=labels, method=id.method,
n=id.n, cex=id.cex, col=id.col)
}
else if (type=='stres') {
plot(H, Q, xlab="Hat value", ylab="Squared Studentized Residual", cex=scale*CookD, ...)
if (fill) points(H, Q, cex=scale*CookD, pch=16, col=cols)
abline(v=c(2, 3)*p/n, lty=ref.lty, col=ref.col)
abline(h=qbeta(.95, q/2, (n-p-q)/2), lty=ref.lty, col=ref.col)
noteworthy <- car::showLabels(H, Q, labels=labels, method=id.method,
n=id.n, cex=id.cex, col=id.col)
}
else if (type=='LR') {
logL <- log(L)
logR <- log(R)
plot(logL, logR, xlab="log Leverage", ylab="log Residual", cex=scale*CookD, ...)
if (fill) points(logL, logR, cex=scale*CookD, pch=16, col=cols)
xmin <- floor(par("usr")[1])
xmax <- ceiling(par("usr")[2])
# FIXME: bit of a kludge in calculating intercepts of diagonal lines
for(a in (2*xmin):xmax) abline(a=a, b=-1, col=ref.col, lty=ref.lty)
noteworthy <- car::showLabels(logL, logR, labels=labels, method=id.method,
n=id.n, cex=id.cex, col=id.col)
}
#browser()
if (length(noteworthy > 0)) {
## FIXME: m>1 car::showLabels returns the labels, not the indices
if (m>1) noteworthy <- inf
res <- data.frame(H, Q, CookD, L, R)[noteworthy,]
rownames(res) <- labels[noteworthy]
return(res)
}
}