/
xpose.gam.R
334 lines (300 loc) · 10.2 KB
/
xpose.gam.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
#' Stepwise GAM search for covariates on a parameter (Xpose 4)
#'
#' Function takes an Xpose object and performs a generalized additive model
#' (GAM) stepwise search for influential covariates on a single model parameter.
#'
#'
#' @param object An xpose.data object.
#' @param parnam ONE (and only one) model parameter name.
#' @param covnams Covariate names to test on parameter.
#' @param trace TRUE if you want GAM output to screen.
#' @param scope Scope of the GAM search.
#' @param disp If dispersion should be used in the GAM object.
#' @param start.mod Starting model.
#' @param family Assumption for the parameter distribution.
#' @param wts.data Weights on the least squares fitting of parameter vs.
#' covariate. Often one can use the variances of the individual parameter
#' values as weights. This data frame must have column with name ID and any
#' subset variable as well as the variable defined by the \code{wts.col}.
#' @param wts.col Which column in the \code{wts.data} to use.
#' @param steppit TRUE for stepwise search, false for no search.
#' @param subset Subset on data.
#' @param onlyfirst TRUE if only the first row of each individual's data is to
#' be used.
#' @param medianNorm Normalize to the median of parameter and covariates.
#' @param nmods Number of models to examine.
#' @param smoother1 Smoother for each model.
#' @param smoother2 Smoother for each model.
#' @param smoother3 Smoother for each model.
#' @param smoother4 Smoother for each model.
#' @param arg1 Argument for model 1.
#' @param arg2 Argument for model 2.
#' @param arg3 Argument for model 3.
#' @param arg4 Argument for model 4.
#' @param excl1 Covariate exclusion from model 1.
#' @param excl2 Covariate exclusion from model 2.
#' @param excl3 Covariate exclusion from model 3.
#' @param excl4 Covariate exclusion from model 4.
#' @param extra Extra exclusion criteria.
#' @param \dots Used to pass arguments to more basic functions.
#' @return Returned is a \code{\link[gam]{step.Gam}} object. In this object
#' the step-wise-selected model is returned, with up to two additional
#' components. There is an "anova" component
#' corresponding to the steps taken in the search, as well as a
#' "keep" component if the "keep=" argument was supplied in the call.
#' @author E. Niclas Jonsson, Mats Karlsson, Andrew Hooker & Justin Wilkins
#' @seealso \code{\link[gam]{step.gam}}
#' @examples
#' ## Run a GAM using the example xpose database
#' gam_ka <- xpose.gam(simpraz.xpdb, parnam="KA")
#'
#' ## Summarize GAM
#' xp.summary(gam_ka)
#'
#' ## GAM residuals of base model vs. covariates
#' xp.plot(gam_ka)
#'
#' ## An Akaike plot of the results
#' xp.akaike.plot(gam_ka)
#'
#' ## Studentized residuals
#' xp.ind.stud.res(gam_ka)
#'
#' ## Individual influence on GAM fit
#' xp.ind.inf.fit(gam_ka)
#'
#' ## Individual influence on GAM terms
#' xp.ind.inf.terms(gam_ka)
#'
#' ## Individual parameters to GAM fit
#' xp.cook(gam_ka)
#'
#' @export xpose.gam
#' @family GAM functions
xpose.gam <-
function(object,
parnam = xvardef("parms", object)[1],
covnams = xvardef("covariates", object),
trace = TRUE,
scope = NULL,
disp = object@Prefs@Gam.prefs$disp,
start.mod=object@Prefs@Gam.prefs$start.mod,
family="gaussian",
wts.data =object@Data.firstonly,
wts.col= NULL,#object@Prefs@Gam.prefs$wts,
## must have ID and any subset variable
## well as a variable defined by wts.col
steppit=object@Prefs@Gam.prefs$steppit,
subset=xsubset(object),
onlyfirst=object@Prefs@Gam.prefs$onlyfirst,
medianNorm=object@Prefs@Gam.prefs$medianNorm,
## for the scope function
nmods=object@Prefs@Gam.prefs$nmods,
smoother1=object@Prefs@Gam.prefs$smoother1,
smoother2=object@Prefs@Gam.prefs$smoother2,
smoother3=object@Prefs@Gam.prefs$smoother3,
smoother4=object@Prefs@Gam.prefs$smoother4,
arg1=object@Prefs@Gam.prefs$arg1,
arg2=object@Prefs@Gam.prefs$arg2,
arg3=object@Prefs@Gam.prefs$arg3,
arg4=object@Prefs@Gam.prefs$arg4,
excl1=object@Prefs@Gam.prefs$excl1,
excl2=object@Prefs@Gam.prefs$excl2,
excl3=object@Prefs@Gam.prefs$excl3,
excl4=object@Prefs@Gam.prefs$excl4,
extra=object@Prefs@Gam.prefs$extra,
...) {
##
## Check the data
##
for (i in xvardef("parms", object)) {
if(is.null(i)) {
cat("Parameters are not properly set in the database!\n")
return()
}
}
for (i in xvardef("covariates", object)) {
if(is.null(i)) {
cat("Covariates are not properly set in the database!\n")
return()
}
}
if(length(parnam)>1) {
cat(
"You have specified more than on parameter but you can only\n"
)
cat(
"run the GAM on one parameter at a time.\n"
)
return()
}
##Get data
gamdata <- Data(object,subset=subset,onlyfirst=onlyfirst,...)
if(any(is.null(gamdata))) return("The subset expression is invalid.")
## subset weights
if(!is.null(wts.col)){
if(!is.null(subset)){
wts.data <- subset(wts.data,eval(parse(text=subset)))
}
##check that ids are equal
if(!all(gamdata$ID==wts.data$ID)){
cat("Weights and data set do not have same ID values.\n")
return()
}
## add other weighting options
#browser()
#str(wts.data)
#se*wpop/(wpop-se)
## assign weight column
wts <- wts.data[,wts.col]
} else {
wts <- NULL
}
##
## Normalize to median if requested
##
if(medianNorm==TRUE) {
for(i in covnams) {
if(is.factor(gamdata[,i])) {
next
} else {
gamdata[,i] <- gamdata[,i] - median(gamdata[, i])
}
}
for(i in parnam) {
if(is.factor(gamdata[,i])) {
next
} else {
gamdata[,i] <- gamdata[,i] - median(gamdata[, i])
}
}
}
## Check the length of the wts
if(!is.null(wts)) {
if(length(wts) != length(gamdata[[1]])) {
cat("Weights and data have unequal length!\n")
return()
} else {
## looks good
}
}
c1 <- call("assign","wts",wts,pos=1)
eval(c1)
c2 <- call("assign","gamdata",gamdata,pos=1)
eval(c2)
c3 <- call("assign","covnams",covnams,pos=1)
eval(c3)
##
## Set starting model
##
if(is.null(start.mod)) {
form <- as.formula(paste(parnam,"~1"))
} else {
form <- start.mod
}
##
## Check to see if the dispersion should be estimated
##
if(!is.null(disp)) {
disp1 <- xp.get.disp(gamdata=gamdata,parnam=parnam,covnams=covnams,family=family)
disp2 <- disp1$dispersion
}
##
## Nonice way to get the proportional error model (doesn't work right now)
##
if(!any(is.null(wts))) {
##if(family=="quasi")
## bam.start <- gam(form,data=gamdata,weights=wts,
## family=quasi(link=identity,var="mu^2"))
#else
bam.start <- gam(form,weights=wts,data=gamdata)
} else {
##if(family=="quasi")
##bam.start <- gam(form,data=gamdata,family=quasi(link=identity,var="mu^2"))
##else
bam.start <- gam(form,data=gamdata)
}
##
## Set the keep function
##
"bam.keep.old.gam" <-
function(object, AIC){
list(df.resid = object$df.resid,
deviance = object$deviance,
term = as.character(object$formula)[3],
labs = labels(object),
AIC = AIC)
}
"bam.keep" <-
function(object){
list(df.resid = object$df.resid,
deviance = object$deviance,
term = as.character(object$formula)[3],
labs = labels(object),
AIC = object$aic)
}
##
## Set the scope
##
if(any(is.null(scope))){
#scope <- xp.check.scope(object,covnams=covnams)
scope <-xp.scope3(object,
covnam=covnams,
nmods=nmods,
smoother1=smoother1,
smoother2=smoother2,
smoother3=smoother3,
smoother4=smoother4,
arg1=arg1,
arg2=arg2,
arg3=arg3,
arg4=arg4,
excl1=excl1,
excl2=excl2,
excl3=excl3,
excl4=excl4,
extra=extra)
}
##
## Run the GAM
##
if(!is.null(steppit)){
if(is.null(disp)){
if(packageVersion("gam") >= "1.15"){
nose1.parm <- step.Gam(bam.start, trace = trace, scope, keep = bam.keep)
} else if(packageVersion("gam") >= "1.9.1"){
nose1.parm <- do.call("step.gam", list(bam.start, trace = trace, scope, keep = bam.keep))
} else {
nose1.parm <- do.call("step.gam", list(bam.start, trace = trace, scope, keep = bam.keep.old.gam))
}
} else {
if(packageVersion("gam") >= "1.15"){
nose1.parm <- step.Gam(bam.start, trace = trace, scope, keep = bam.keep,scale=disp2)
} else if(packageVersion("gam") >= "1.9.1"){
nose1.parm <- do.call("step.gam",list(bam.start, trace = trace, scope, keep = bam.keep,scale=disp2))
} else {
nose1.parm <- do.call("step.gam",list(bam.start, trace = trace, scope, keep = bam.keep.old.gam,scale=disp2))
}
}
} else {
nose1.parm <- bam.start
}
## add to gam object
if(!is.null(disp)) {
nose1.parm$dispersion <- disp2
} else {
nose1.parm$dispersion <- disp
}
if(!is.null(steppit)){
nose1.parm$steppit <- steppit
}
nose1.parm$subset <- subset
nose1.parm$onlyfirst <- onlyfirst
nose1.parm$medianNorm <- medianNorm
nose1.parm$pars <- parnam
nose1.parm$runno <- object@Runno
remove("gamdata",pos=1)
remove("wts",pos=1)
remove("covnams",pos=1)
return(nose1.parm)
}