/
coxph.R
728 lines (668 loc) · 31.3 KB
/
coxph.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
# Automatically generated from the noweb directory
#tt <- function(x) x
coxph <- function(formula, data, weights, subset, na.action,
init, control, ties= c("efron", "breslow", "exact"),
singular.ok =TRUE, robust,
model=FALSE, x=FALSE, y=TRUE, tt, method=ties,
id, cluster, istate, statedata, nocenter=c(-1, 0, 1), ...) {
missing.ties <- missing(ties) & missing(method) #see later multistate sect
ties <- match.arg(ties)
Call <- match.call()
## We want to pass any ... args to coxph.control, but not pass things
## like "dats=mydata" where someone just made a typo. The use of ...
## is simply to allow things like "eps=1e6" with easier typing
extraArgs <- list(...)
if (length(extraArgs)) {
controlargs <- names(formals(coxph.control)) #legal arg names
indx <- pmatch(names(extraArgs), controlargs, nomatch=0L)
if (any(indx==0L))
stop(gettextf("Argument %s not matched",
names(extraArgs)[indx==0L]), domain = NA)
}
if (missing(control)) control <- coxph.control(...)
# Move any cluster() term out of the formula, and make it an argument
# instead. This makes everything easier. But, I can only do that with
# a local copy, doing otherwise messes up future use of update() on
# the model object for a user stuck in "+ cluster()" mode.
if (missing(formula)) stop("a formula argument is required")
ss <- "cluster"
if (is.list(formula))
Terms <- if (missing(data)) terms(formula[[1]], specials=ss) else
terms(formula[[1]], specials=ss, data=data)
else Terms <- if (missing(data)) terms(formula, specials=ss) else
terms(formula, specials=ss, data=data)
tcl <- attr(Terms, 'specials')$cluster
if (length(tcl) > 1) stop("a formula cannot have multiple cluster terms")
if (length(tcl) > 0) { # there is one
factors <- attr(Terms, 'factors')
if (any(factors[tcl,] >1)) stop("cluster() cannot be in an interaction")
if (attr(Terms, "response") ==0)
stop("formula must have a Surv response")
if (is.null(Call$cluster))
Call$cluster <- attr(Terms, "variables")[[1+tcl]][[2]]
else warning("cluster appears both in a formula and as an argument, formula term ignored")
# [.terms is broken at least through R 4.1; use our
# local drop.special() function instead.
Terms <- drop.special(Terms, tcl)
formula <- Call$formula <- formula(Terms)
}
# create a call to model.frame() that contains the formula (required)
# and any other of the relevant optional arguments
# but don't evaluate it just yet
indx <- match(c("formula", "data", "weights", "subset", "na.action",
"cluster", "id", "istate"),
names(Call), nomatch=0)
if (indx[1] ==0) stop("A formula argument is required")
tform <- Call[c(1,indx)] # only keep the arguments we wanted
tform[[1L]] <- quote(stats::model.frame) # change the function called
# if the formula is a list, do the first level of processing on it.
if (is.list(formula)) {
multiform <- TRUE
dformula <- formula[[1]] # the default formula for transitions
if (missing(statedata)) covlist <- parsecovar1(formula[-1])
else {
if (!inherits(statedata, "data.frame"))
stop("statedata must be a data frame")
if (is.null(statedata$state))
stop("statedata data frame must contain a 'state' variable")
covlist <- parsecovar1(formula[-1], names(statedata))
}
# create the master formula, used for model.frame
# the term.labels + reformulate + environment trio is used in [.terms;
# if it's good enough for base R it's good enough for me
tlab <- unlist(lapply(covlist$rhs, function(x)
attr(terms.formula(x$formula), "term.labels")))
tlab <- c(attr(terms.formula(dformula), "term.labels"), tlab)
newform <- reformulate(tlab, dformula[[2]])
environment(newform) <- environment(dformula)
formula <- newform
tform$na.action <- na.pass # defer any missing value work to later
}
else {
multiform <- FALSE # formula is not a list of expressions
covlist <- NULL
dformula <- formula
}
# add specials to the formula
special <- c("strata", "tt", "frailty", "ridge", "pspline")
tform$formula <- if(missing(data)) terms(formula, special) else
terms(formula, special, data=data)
# Make "tt" visible for coxph formulas, without making it visible elsewhere
if (!is.null(attr(tform$formula, "specials")$tt)) {
coxenv <- new.env(parent= environment(formula))
assign("tt", function(x) x, envir=coxenv)
environment(tform$formula) <- coxenv
}
# okay, now evaluate the formula
mf <- eval(tform, parent.frame())
Terms <- terms(mf)
# Grab the response variable, and deal with Surv2 objects
n <- nrow(mf)
Y <- model.response(mf)
isSurv2 <- inherits(Y, "Surv2")
if (isSurv2) {
# this is Surv2 style data
# if there were any obs removed due to missing, remake the model frame
if (length(attr(mf, "na.action"))) {
tform$na.action <- na.pass
mf <- eval.parent(tform)
}
if (!is.null(attr(Terms, "specials")$cluster))
stop("cluster() cannot appear in the model statement")
new <- surv2data(mf)
mf <- new$mf
istate <- new$istate
id <- new$id
Y <- new$y
n <- nrow(mf)
}
else {
if (!is.Surv(Y)) stop("Response must be a survival object")
id <- model.extract(mf, "id")
istate <- model.extract(mf, "istate")
}
if (n==0) stop("No (non-missing) observations")
type <- attr(Y, "type")
multi <- FALSE
if (type=="mright" || type == "mcounting") multi <- TRUE
else if (type!='right' && type!='counting')
stop(paste("Cox model doesn't support \"", type,
"\" survival data", sep=''))
data.n <- nrow(Y) #remember this before any time transforms
if (!multi && multiform)
stop("formula is a list but the response is not multi-state")
if (multi) {
if (length(attr(Terms, "specials")$frailty) >0)
stop("multi-state models do not currently support frailty terms")
if (length(attr(Terms, "specials")$pspline) >0)
stop("multi-state models do not currently support pspline terms")
if (length(attr(Terms, "specials")$ridge) >0)
stop("multi-state models do not currently support ridge penalties")
if (!missing.ties) method <- ties <- "breslow"
}
if (control$timefix) Y <- aeqSurv(Y)
if (length(attr(Terms, 'variables')) > 2) { # a ~1 formula has length 2
ytemp <- innerterms(formula[1:2])
suppressWarnings(z <- as.numeric(ytemp)) # are any of the elements numeric?
ytemp <- ytemp[is.na(z)] # toss numerics, e.g. Surv(t, 1-s)
xtemp <- innerterms(formula[-2])
if (any(!is.na(match(xtemp, ytemp))))
warning("a variable appears on both the left and right sides of the formula")
}
# The time transform will expand the data frame mf. To do this
# it needs Y and the strata. Everything else (cluster, offset, weights)
# should be extracted after the transform
#
strats <- attr(Terms, "specials")$strata
hasinteractions <- FALSE
dropterms <- NULL
if (length(strats)) {
stemp <- untangle.specials(Terms, 'strata', 1)
if (length(stemp$vars)==1) strata.keep <- mf[[stemp$vars]]
else strata.keep <- strata(mf[,stemp$vars], shortlabel=TRUE)
istrat <- as.integer(strata.keep)
for (i in stemp$vars) { #multiple strata terms are allowed
# The factors attr has one row for each variable in the frame, one
# col for each term in the model. Pick rows for each strata
# var, and find if it participates in any interactions.
if (any(attr(Terms, 'order')[attr(Terms, "factors")[i,] >0] >1))
hasinteractions <- TRUE
}
if (!hasinteractions) dropterms <- stemp$terms
} else istrat <- NULL
if (hasinteractions && multi)
stop("multi-state coxph does not support strata*covariate interactions")
timetrans <- attr(Terms, "specials")$tt
if (missing(tt)) tt <- NULL
if (length(timetrans)) {
if (multi || isSurv2) stop("the tt() transform is not implemented for multi-state or Surv2 models")
# begin tt() preprocessing
timetrans <- untangle.specials(Terms, 'tt')
ntrans <- length(timetrans$terms)
if (is.null(tt)) {
tt <- function(x, time, riskset, weights){ #default to O'Brien's logit rank
obrien <- function(x) {
r <- rank(x)
(r-.5)/(.5+length(r)-r)
}
unlist(tapply(x, riskset, obrien))
}
}
if (is.function(tt)) tt <- list(tt) #single function becomes a list
if (is.list(tt)) {
if (any(!sapply(tt, is.function)))
stop("The tt argument must contain function or list of functions")
if (length(tt) != ntrans) {
if (length(tt) ==1) {
temp <- vector("list", ntrans)
for (i in 1:ntrans) temp[[i]] <- tt[[1]]
tt <- temp
}
else stop("Wrong length for tt argument")
}
}
else stop("The tt argument must contain a function or list of functions")
if (ncol(Y)==2) {
if (length(strats)==0) {
sorted <- order(-Y[,1], Y[,2])
newstrat <- rep.int(0L, nrow(Y))
newstrat[1] <- 1L
}
else {
sorted <- order(istrat, -Y[,1], Y[,2])
#newstrat marks the first obs of each strata
newstrat <- as.integer(c(1, 1*(diff(istrat[sorted])!=0)))
}
if (storage.mode(Y) != "double") storage.mode(Y) <- "double"
counts <- .Call(Ccoxcount1, Y[sorted,],
as.integer(newstrat))
tindex <- sorted[counts$index]
}
else {
if (length(strats)==0) {
sort.end <- order(-Y[,2], Y[,3])
sort.start<- order(-Y[,1])
newstrat <- c(1L, rep(0, nrow(Y) -1))
}
else {
sort.end <- order(istrat, -Y[,2], Y[,3])
sort.start<- order(istrat, -Y[,1])
newstrat <- c(1L, as.integer(diff(istrat[sort.end])!=0))
}
if (storage.mode(Y) != "double") storage.mode(Y) <- "double"
counts <- .Call(Ccoxcount2, Y,
as.integer(sort.start -1L),
as.integer(sort.end -1L),
as.integer(newstrat))
tindex <- counts$index
}
Y <- Surv(rep(counts$time, counts$nrisk), counts$status)
type <- 'right' # new Y is right censored, even if the old was (start, stop]
mf <- mf[tindex,]
istrat <- rep(1:length(counts$nrisk), counts$nrisk)
weights <- model.weights(mf)
if (!is.null(weights) && any(!is.finite(weights)))
stop("weights must be finite")
id <- model.extract(mf, "id") # update the id and/or cluster, if present
cluster <- model.extract(mf, "cluster")
tcall <- attr(Terms, 'variables')[timetrans$terms+2]
pvars <- attr(Terms, 'predvars')
pmethod <- sub("makepredictcall.", "", as.vector(methods("makepredictcall")))
for (i in 1:ntrans) {
newtt <- (tt[[i]])(mf[[timetrans$var[i]]], Y[,1], istrat, weights)
mf[[timetrans$var[i]]] <- newtt
nclass <- class(newtt)
if (any(nclass %in% pmethod)) { # It has a makepredictcall method
dummy <- as.call(list(as.name(class(newtt)[1]), tcall[[i]][[2]]))
ptemp <- makepredictcall(newtt, dummy)
pvars[[timetrans$terms[i]+2]] <- ptemp
}
}
attr(Terms, "predvars") <- pvars
# end tt() preprocessing
}
xlevels <- .getXlevels(Terms, mf)
# grab the cluster, if present. Using cluster() in a formula is no
# longer encouraged
cluster <- model.extract(mf, "cluster")
weights <- model.weights(mf)
# The user can call with cluster, id, robust, or any combination
# Default for robust: if cluster or any id with > 1 event or
# any weights that are not 0 or 1, then TRUE
# If only id, treat it as the cluster too
has.cluster <- !(missing(cluster) || length(cluster)==0)
has.id <- !(missing(id) || length(id)==0)
has.rwt<- (!is.null(weights) && any(weights != floor(weights)))
#has.rwt<- FALSE # we are rethinking this
has.robust <- (!missing(robust) && !is.null(robust)) # arg present
if (has.id) id <- as.factor(id)
if (missing(robust) || is.null(robust)) {
if (has.cluster || has.rwt ||
(has.id && (multi || anyDuplicated(id[Y[,ncol(Y)]==1]))))
robust <- TRUE else robust <- FALSE
}
if (!is.logical(robust)) stop("robust must be TRUE/FALSE")
if (has.cluster) {
if (!robust) {
warning("cluster specified with robust=FALSE, cluster ignored")
ncluster <- 0
clname <- NULL
}
else {
if (is.factor(cluster)) {
clname <- levels(cluster)
cluster <- as.integer(cluster)
} else {
clname <- sort(unique(cluster))
cluster <- match(cluster, clname)
}
ncluster <- length(clname)
}
} else {
if (robust && has.id) {
# treat the id as both identifier and clustering
clname <- levels(id)
cluster <- as.integer(id)
ncluster <- length(clname)
}
else {
ncluster <- 0 # has neither
}
}
# if the user said "robust", (time1,time2) data, and no cluster or
# id, complain about it
if (robust && is.null(cluster)) {
if (ncol(Y) ==2 || !has.robust) cluster <- seq.int(1, nrow(mf))
else stop("one of cluster or id is needed")
}
contrast.arg <- NULL #due to shared code with model.matrix.coxph
attr(Terms, "intercept") <- 1 # always have a baseline hazard
if (multi) {
# check for consistency of the states, and create a transition
# matrix
if (length(id)==0)
stop("an id statement is required for multi-state models")
mcheck <- survcheck2(Y, id, istate)
# error messages here
if (mcheck$flag["overlap"] > 0)
stop("data set has overlapping intervals for one or more subjects")
transitions <- mcheck$transitions
istate <- mcheck$istate
states <- mcheck$states
# build tmap, which has one row per term, one column per transition
if (missing(statedata))
covlist2 <- parsecovar2(covlist, NULL, dformula= dformula,
Terms, transitions, states)
else covlist2 <- parsecovar2(covlist, statedata, dformula= dformula,
Terms, transitions, states)
tmap <- covlist2$tmap
if (!is.null(covlist)) {
# first vector will be true if there is at least 1 transition for which all
# covariates are present, second if there is at least 1 for which some are not
good.tran <- bad.tran <- rep(FALSE, nrow(Y))
# We don't need to check interaction terms
termname <- rownames(attr(Terms, 'factors'))
trow <- (!is.na(match(rownames(tmap), termname)))
# create a missing indicator for each term
termiss <- matrix(0L, nrow(mf), ncol(mf))
for (i in 1:ncol(mf)) {
xx <- is.na(mf[[i]])
if (is.matrix(xx)) termiss[,i] <- apply(xx, 1, any)
else termiss[,i] <- xx
}
for (i in levels(istate)) {
rindex <- which(istate ==i)
j <- which(covlist2$mapid[,1] == match(i, states)) #possible transitions
for (jcol in j) {
k <- which(trow & tmap[,jcol] > 0) # the terms involved in that
bad.tran[rindex] <- (bad.tran[rindex] |
apply(termiss[rindex, k, drop=FALSE], 1, any))
good.tran[rindex] <- (good.tran[rindex] |
apply(!termiss[rindex, k, drop=FALSE], 1, all))
}
}
n.partially.used <- sum(good.tran & bad.tran & !is.na(Y))
omit <- (!good.tran & bad.tran) | is.na(Y)
if (all(omit)) stop("all observations deleted due to missing values")
temp <- setNames(seq(omit)[omit], attr(mf, "row.names")[omit])
attr(temp, "class") <- "omit"
mf <- mf[!omit,, drop=FALSE]
attr(mf, "na.action") <- temp
Y <- Y[!omit]
id <- id[!omit]
if (length(istate)) istate <- istate[!omit] # istate can be NULL
}
}
if (length(dropterms)) {
Terms2 <- Terms[-dropterms]
X <- model.matrix(Terms2, mf, constrasts.arg=contrast.arg)
# we want to number the terms wrt the original model matrix
temp <- attr(X, "assign")
shift <- sort(dropterms)
for (i in seq(along.with=shift))
temp <- temp + 1*(shift[i] <= temp)
attr(X, "assign") <- temp
}
else X <- model.matrix(Terms, mf, contrasts.arg=contrast.arg)
# drop the intercept after the fact, and also drop strata if necessary
Xatt <- attributes(X)
if (hasinteractions) adrop <- c(0, untangle.specials(Terms, "strata")$terms)
else adrop <- 0
xdrop <- Xatt$assign %in% adrop #columns to drop (always the intercept)
X <- X[, !xdrop, drop=FALSE]
attr(X, "assign") <- Xatt$assign[!xdrop]
attr(X, "contrasts") <- Xatt$contrasts
offset <- model.offset(mf)
if (is.null(offset) || all(offset==0)) {
offset <- rep(0., nrow(mf))
meanoffset <- 0
} else if (any(!is.finite(offset) | !is.finite(exp(offset))))
stop("offsets must lead to a finite risk score")
else {
meanoffset <- mean(offset)
offset <- offset - meanoffset # this can help stability of exp()
}
weights <- model.weights(mf)
if (!is.null(weights) && any(!is.finite(weights)))
stop("weights must be finite")
assign <- attrassign(X, Terms)
contr.save <- attr(X, "contrasts")
if (sum(Y[, ncol(Y)]) == 0) {
# No events in the data!
ncoef <- ncol(X)
ctemp <- rep(NA, ncoef)
names(ctemp) <- colnames(X)
concordance= c(concordant=0, discordant=0, tied.x=0, tied.y=0, tied.xy=0,
concordance=NA, std=NA, timefix=FALSE)
rval <- list(coefficients= ctemp,
var = matrix(0.0, ncoef, ncoef),
loglik=c(0,0),
score =0,
iter =0,
linear.predictors = offset,
residuals = rep(0.0, data.n),
means = colMeans(X), method=method,
n = data.n, nevent=0, terms=Terms, assign=assign,
concordance=concordance, wald.test=0.0,
y = Y, call=Call)
class(rval) <- "coxph"
return(rval)
}
if (multi) {
if (length(strats) >0) {
# tmap starts with a "(Baseline)" row, which we want
# strats is indexed off the data frame, which includes the response, so
# turns out to be correct for the remaining rows of tmap
smap <- tmap[c(1L, strats),]
smap[-1,] <- ifelse(smap[-1,] >0, 1L, 0L)
if (nrow(smap) > 2) {
# multi state with more than 1 strata statement -- really unusual
temp <- smap[-1,]
if (!all(apply(temp, 2, function(x) all(x==0) || all(x==1)))) {
# the hard case: some transitions use one strata variable, some
# transitions use another. We need to keep them separate
strata.keep <- mf[,strats] # this will be a data frame
istrat <- sapply(strata.keep, as.numeric)
}
}
}
else smap <- tmap[1,,drop=FALSE]
cmap <- parsecovar3(tmap, colnames(X), attr(X, "assign"), covlist2$phbaseline)
xstack <- stacker(cmap, smap, as.integer(istate), X, Y, strata=istrat,
states=states)
rkeep <- unique(xstack$rindex)
transitions <- survcheck2(Y[rkeep,], id[rkeep], istate[rkeep])$transitions
Xsave <- X # the originals may be needed later
Ysave <- Y
X <- xstack$X
Y <- xstack$Y
istrat <- xstack$strata
if (length(offset)) offset <- offset[xstack$rindex]
if (length(weights)) weights <- weights[xstack$rindex]
if (length(cluster)) cluster <- cluster[xstack$rindex]
t2 <- tmap[-c(1, strats),,drop=FALSE] # remove the intercept row and strata rows
r2 <- row(t2)[!duplicated(as.vector(t2)) & t2 !=0]
c2 <- col(t2)[!duplicated(as.vector(t2)) & t2 !=0]
a2 <- lapply(seq(along.with=r2), function(i) {cmap[assign[[r2[i]]], c2[i]]})
# which elements are unique?
tab <- table(r2)
count <- tab[r2]
names(a2) <- ifelse(count==1, row.names(t2)[r2],
paste(row.names(t2)[r2], colnames(cmap)[c2], sep="_"))
assign <- a2
}
# infinite covariates are not screened out by the na.omit routines
# But this needs to be done after the multi-X part
if (!all(is.finite(X)))
stop("data contains an infinite predictor")
# init is checked after the final X matrix has been made
if (missing(init)) init <- NULL
else {
if (length(init) != ncol(X)) stop("wrong length for init argument")
temp <- X %*% init - sum(colMeans(X) * init) + offset
# it's okay to have a few underflows, but if all of them are too
# small we get all zeros
if (any(exp(temp) > .Machine$double.xmax) || all(exp(temp)==0))
stop("initial values lead to overflow or underflow of the exp function")
}
pterms <- sapply(mf, inherits, 'coxph.penalty')
if (any(pterms)) {
pattr <- lapply(mf[pterms], attributes)
pname <- names(pterms)[pterms]
#
# Check the order of any penalty terms
ord <- attr(Terms, "order")[match(pname, attr(Terms, 'term.labels'))]
if (any(ord>1)) stop ('Penalty terms cannot be in an interaction')
pcols <- assign[match(pname, names(assign))]
fit <- coxpenal.fit(X, Y, istrat, offset, init=init,
control,
weights=weights, method=method,
row.names(mf), pcols, pattr, assign,
nocenter= nocenter)
}
else {
rname <- row.names(mf)
if (multi) rname <- rname[xstack$rindex]
if( method=="breslow" || method =="efron") {
if (grepl('right', type))
fit <- coxph.fit(X, Y, istrat, offset, init, control,
weights=weights, method=method,
rname, nocenter=nocenter)
else fit <- agreg.fit(X, Y, istrat, offset, init, control,
weights=weights, method=method,
rname, nocenter=nocenter)
}
else if (method=='exact') {
if (type== "right")
fit <- coxexact.fit(X, Y, istrat, offset, init, control,
weights=weights, method=method,
rname, nocenter=nocenter)
else fit <- agexact.fit(X, Y, istrat, offset, init, control,
weights=weights, method=method,
rname, nocenter=nocenter)
}
else stop(paste ("Unknown method to ties", method))
}
if (is.character(fit)) {
fit <- list(fail=fit)
class(fit) <- 'coxph'
}
else {
if (!is.null(fit$coefficients) && any(is.na(fit$coefficients))) {
vars <- (1:length(fit$coefficients))[is.na(fit$coefficients)]
msg <-paste("X matrix deemed to be singular; variable",
paste(vars, collapse=" "))
if (!singular.ok) stop(msg)
# else warning(msg) # stop being chatty
}
fit$n <- data.n
fit$nevent <- sum(Y[,ncol(Y)])
fit$terms <- Terms
fit$assign <- assign
class(fit) <- fit$class
fit$class <- NULL
# don't compute a robust variance if there are no coefficients
if (robust && !is.null(fit$coefficients) && !all(is.na(fit$coefficients))) {
fit$naive.var <- fit$var
# a little sneaky here: by calling resid before adding the
# na.action method, I avoid having missings re-inserted
# I also make sure that it doesn't have to reconstruct X and Y
fit2 <- c(fit, list(x=X, y=Y, weights=weights))
if (length(istrat)) fit2$strata <- istrat
if (length(cluster)) {
temp <- residuals.coxph(fit2, type='dfbeta', collapse=cluster,
weighted=TRUE)
# get score for null model
if (is.null(init))
fit2$linear.predictors <- 0*fit$linear.predictors
else fit2$linear.predictors <- c(X %*% init)
temp0 <- residuals.coxph(fit2, type='score', collapse=cluster,
weighted=TRUE)
}
else {
temp <- residuals.coxph(fit2, type='dfbeta', weighted=TRUE)
fit2$linear.predictors <- 0*fit$linear.predictors
temp0 <- residuals.coxph(fit2, type='score', weighted=TRUE)
}
fit$var <- t(temp) %*% temp
u <- apply(as.matrix(temp0), 2, sum)
fit$rscore <- coxph.wtest(t(temp0)%*%temp0, u, control$toler.chol)$test
}
#Wald test
if (length(fit$coefficients) && is.null(fit$wald.test)) {
#not for intercept only models, or if test is already done
nabeta <- !is.na(fit$coefficients)
# The init vector might be longer than the betas, for a sparse term
if (is.null(init)) temp <- fit$coefficients[nabeta]
else temp <- (fit$coefficients -
init[1:length(fit$coefficients)])[nabeta]
fit$wald.test <- coxph.wtest(fit$var[nabeta,nabeta], temp,
control$toler.chol)$test
}
# Concordance. Done here so that we can use cluster if it is present
# The returned value is a subset of the full result, partly because it
# is all we need, but more for backward compatability with survConcordance.fit
if (length(cluster))
temp <- concordancefit(Y, fit$linear.predictors, istrat, weights,
cluster=cluster, reverse=TRUE,
timefix= FALSE)
else temp <- concordancefit(Y, fit$linear.predictors, istrat, weights,
reverse=TRUE, timefix= FALSE)
if (is.matrix(temp$count))
fit$concordance <- c(colSums(temp$count), concordance=temp$concordance,
std=sqrt(temp$var))
else fit$concordance <- c(temp$count, concordance=temp$concordance,
std=sqrt(temp$var))
na.action <- attr(mf, "na.action")
if (length(na.action)) fit$na.action <- na.action
if (model) {
if (length(timetrans)) {
stop("'model=TRUE' not supported for models with tt terms")
}
fit$model <- mf
}
if (x) {
if (multi) fit$x <- Xsave else fit$x <- X
if (length(timetrans)) fit$strata <- istrat
else if (length(strats)) fit$strata <- strata.keep
}
if (y) {
if (multi) fit$y <- Ysave else fit$y <- Y
}
fit$timefix <- control$timefix # remember this option
}
if (!is.null(weights) && any(weights!=1)) fit$weights <- weights
if (multi) {
fit$transitions <- transitions
fit$states <- states
fit$cmap <- cmap
fit$smap <- smap # why not 'stratamap'? Confusion with fit$strata
nonzero <- which(colSums(cmap)!=0)
fit$rmap <- cbind(row=xstack$rindex, transition= nonzero[xstack$transition])
# add a suffix to each coefficent name. Those that map to multiple transitions
# get the first transition they map to
single <- apply(cmap, 1, function(x) all(x %in% c(0, max(x)))) #only 1 coef
cindx <- col(cmap)[match(1:length(fit$coefficients), cmap)]
rindx <- row(cmap)[match(1:length(fit$coefficients), cmap)]
suffix <- ifelse(single[rindx], "", paste0("_", colnames(cmap)[cindx]))
newname <- paste0(names(fit$coefficients), suffix)
if (any(covlist2$phbaseline > 0)) {
# for proportional baselines, use a better name
base <- colnames(tmap)[covlist2$phbaseline]
child <- colnames(tmap)[which(covlist2$phbaseline >0)]
indx <- 1 + length(newname) - length(base):1 # coefs are the last ones
newname[indx] <- paste0("ph(", child, "/", base, ")")
phrow <- apply(cmap, 1, function(x) all(x[x>0] %in% indx))
matcoef <- cmap[!phrow,,drop=FALSE ] # ph() terms exluded
}
else matcoef <- cmap
names(fit$coefficients) <- newname
if (FALSE) {
# an idea that was tried, then paused: make the linear predictors
# and residuals into matrices with one column per transition
matcoef[matcoef>0] <- fit$coefficients[matcoef]
temp <- Xsave %*% matcoef
colnames(temp) <- colnames(cmap)
fit$linear.predictors <- temp
temp <- matrix(0., nrow=nrow(Xsave), ncol=ncol(fit$cmap))
temp[cbind(xstack$rindex, xstack$transition)] <- fit$residuals
# if there are any transitions with no covariates, residuals have not
# yet been calculated for those.
if (any(colSums(cmap) ==0)) {
from.state <- as.numeric(sub(":.*$", "", colnames(cmap)))
to.state <- as.numeric(sub("^.*:", "", colnames(cmap)))
# warning("no covariate residuals not filled in")
}
fit$residuals <- temp
}
class(fit) <- c("coxphms", class(fit))
}
names(fit$means) <- names(fit$coefficients)
fit$formula <- formula(Terms)
if (length(xlevels) >0) fit$xlevels <- xlevels
fit$contrasts <- contr.save
if (meanoffset !=0) fit$linear.predictors <- fit$linear.predictors + meanoffset
if (x & any(offset !=0)) fit$offset <- offset
fit$call <- Call
fit
}