/
com_likelihoodutilities.R
241 lines (223 loc) · 9.33 KB
/
com_likelihoodutilities.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
## Utilities and likelihood for 2D line transect distance analysis
## Mostly extracted from LT2D package with very limited typo and readability edits
## Designed to be used within the following protocol: 1) choose relevant combinations of functions h and pi from the available list provided in com_hfunctions and com_pifunctions 2) fit to data using fityx ; 3) select best model using AIC or other ; 4) plot along the perpendicular axis using plotfit.x ; 5) optional not implemented: compute SE using a parametric bootstrap within MNorm(fit$par,fit$vcov)
## "Waiting function"
# Calculates the pdf of the 'waiting distance' \eqn{f(y,x)=h(y,x)*\exp(-\int_y^{ystart} h(t,x) dt)}.
fyx=function(y,x,b,hr,ystart,nint=100,eps=1e-12)
{
if(length(y)!=length(x)) stop("Lengths of x and y must be the same.")
n=length(x)
f=intval=rep(NA,n)
hr=match.fun(hr)
for(i in 1:n) {
y0=max(y[i],eps)
dy=(ystart-y0)/nint/2 # for crude integration
yy=seq(y0,ystart,length=(nint+1))[-(nint+1)]+dy # for crude integration
h=hr(yy,rep(x[i],nint),b)
int=sum(hr(yy,rep(x[i],nint),b)*dy*2) # crude integration
intval[i]=exp(-int)
# int=integrate(f=hr,lower=max(y[i],ylo),upper=ystart,x=x[i],b=b)
# intval[i]=exp(-int$value)
}
hrval=hr(y,x,b)
bads=which(hrval>=.Machine$double.xmax) # identify infinite hazards
if(length(bads)>0) { # infinite hazard so p(detect)=0
f[bads]=.Machine$double.xmax
f[-bads]=hr(y[-bads],x[-bads],b)*intval[-bads]
}else{
f=hr(y,x,b)*intval
}
return(f)
}
## "Survival function"
# Calculates probability to "survive undetected" until position y, for a given perpendicular distance x and given forward distance range ymax.
# y is a vector
# hfun is a function from "com_hfunctions.R" (got rid of original option to give a char string)
# Numerical integration (if using h.HBDF there is an analytical solution worked out but not used here)
Sy=function(x,y,ymax,b,hfun) {
n=length(x)
if(length(y)!=n) stop("Lengths of x and y must be the same.")
pS=rep(NA,n)
h=match.fun(hfun)
for(i in 1:n)
pS[i]=exp(-integrate(h,y[i],ymax,x=x[i],b=b,subdivisions = 1000L)$value)
return(pS)
}
## Perpendicular detection function
# Calculates the perpendicular detection function, \eqn{p(x)}, for a given hazard.
px=function(x,b,hr,ystart,nint=100) 1-Sy(x,rep(0.0001,length(x)),ystart,b,hr)
## Product of p and pi
p.pi.x=function(x,b,hr,ystart,pi.x,logphi,w) px(x,b,hr,ystart)*pi.x(x,logphi,w)
## Negative log-likelihood ####################################################
#'@param y scalar or vector; forward distance observations
#'@param x scale or vector; perp. distance observations
#'@param pars c(b,logphi); hazard rate and density log-parameters in a vector (see details).
#'@param hr hazard rate function
#'@param ystart max forward distance at which could possibly detect animal /!\ Must to ensure the hazard function has decayed to almost zero by ystart.
#'@param pi.x perpendicular distance density distribution
#'@param w perpendicular truncation distance.
#'@param length.b length of the hazard rate parameter vector
negloglik.yx=function(pars,y,x,hr,ystart,pi.x,w,length.b=2,debug=FALSE)
{
if(length(y)!=length(x)) stop("Lengths of x and y must be the same.")
if(debug) print(pars)
n=length(y)
hr=match.fun(hr)
pi.x=match.fun(pi.x)
# unpack parameters /!\ pars must be passed as c(b,logphi), typically a length-4 vector
b=pars[1:length.b]
if(attributes(pi.x)$fName=="pi.const") logphi=NULL else logphi=pars[(1+length.b):length(pars)]
llik=rep(NA,n)
# caluclate numerator:
num=sum(log(fyx(y,x,b,hr,ystart)) + log(pi.x(x,logphi,w)))
# calculate denominator:
int=integrate(f=p.pi.x,lower=0,upper=w,b=b,hr=hr,ystart=ystart,pi.x=pi.x,logphi=logphi,w=w)
denom=log(int$value)
# likelihood:
llik=num-n*denom
##llik[is.nan(llik)]=-9e37
if(debug) print(llik)
return(-llik)
}
fityx=function(y,x,b,hr,ystart,pi.x,logphi,w,control=list(),hessian=FALSE,corrFlag=0.7,debug=FALSE,...)
{
if(length(y)!=length(x)) stop("Lengths of x and y must be the same.")
if(debug) print(pars)
n=length(y)
hr=match.fun(hr)
pi.x=match.fun(pi.x)
if(attributes(pi.x)$fName=="pi.const") pars=b else pars=c(b,logphi)
length.b=length(b)
fit=optim(par=pars,fn=negloglik.yx,y=y,x=x,hr=hr,ystart=ystart,pi.x=pi.x,w=w,
length.b=length.b,
hessian=hessian,debug=debug,control=control,...)
fit$error=FALSE
if(fit$convergence!=0){
warning('Convergence issue (code = ', fit$convergence,') . Check optim() help.')
fit$error=TRUE
}
fit$hr=hr
fit$pi.x=pi.x
fit$ystart=ystart
fit$w=w
fit$b=fit$par[1:length.b] # ***
if(length.b!=length(pars)){
fit$logphi=fit$par[(1+length.b):length(pars)]
parnames=c(rep("b",length.b), rep("logphi",length(pars)-length.b))
}else{
fit$logphi=NA
parnames=c(rep("b",length.b))
} # ***
fit$AIC=2*fit$value+2*length(fit$par)
fit$dat=data.frame(x=x,y=y) # attach data to fitted object
## Standard errors
if(hessian){
mNames=paste('b',1:length.b,sep='')
if(!all(is.na(fit$logphi))) mNames=c(mNames,paste('logphi',1:length(fit$logphi),sep=''))
fit$vcov=solve(fit$hessian)
if(any(diag(fit$vcov)<=0)) fit$vcov=MASS::ginv(fit$hessian + diag(length(fit$par))*1e-6)
if(any(diag(fit$vcov)<=0)) {
warning('Failed to invert hessian. Model covergance problem in fityx?')
fit$error=TRUE
fit$CVpar=rep(NA,length(fit$par))
} else fit$SE=sqrt(diag(fit$vcov))
fit$corr=cov2cor(fit$vcov)
row.names(fit$corr)=parnames
colnames(fit$corr)=parnames
corr=fit$corr
corr[upper.tri(corr,diag=TRUE)]=NA
corrIND=which(abs(corr)>corrFlag,arr.ind=T)
if(nrow(corrIND)){
warning('absolute correlation exceeds ',corrFlag,' in parameter estimates: ',
paste(paste(mNames[corrIND[,1]],mNames[corrIND[,2]],sep=' to '),collapse='; '))
fit$error=TRUE}
}
return(fit)
}
## Plot results along the x axis
plotfit.x=function(x,est,nclass=10,nint=100,
plot=TRUE,title="",ymx=NULL,
...)
{
Nhat.yx=bias=NULL
b=est$b; hr=match.fun(est$hr); ystart=est$ystart; pi.x=match.fun(est$pi.x)
logphi=est$logphi; w=est$w
x=x[x>=0 & x<=w]
f.x=p.x.std=adbnTRUE=0
gridx=seq(1e-10,w,length=100)
p.xpifit=p.pi.x(gridx,b,hr,ystart,pi.x,logphi,w)
mufit=integrate(f=p.pi.x,lower=0,upper=w,b=b,hr=hr
,ystart=ystart,pi.x=pi.x,logphi=logphi,w=w)$value
f.xfit=p.xpifit/mufit
p.xfit=px(gridx,b,hr,ystart,nint=nint)
ptot=integrate(f=px,lower=0,upper=w,b=b,hr=hr,ystart=ystart)$value
p.xfit.std=p.xfit/ptot
adbn=pi.x(gridx,logphi,w)
if(plot){
breaks=seq(0,w,length=(nclass+1))
hx=hist(x,breaks=breaks,plot=FALSE) # get hist bar heights
ymax=max(f.xfit,p.xfit.std,adbn,f.x,p.x.std,adbnTRUE,hx$density,ymx)
## Plot empirical distribution of sightings along x axis
hx=hist(x,breaks=breaks,freq=FALSE,ylim=c(0,ymax),
main=title,xlab="perpendicular distance (x)",ylab="pdf", ...)
## Overlay predicted distribution of sightings along x axis, f(x)
lines(gridx,f.xfit,col="grey",lwd=2)
# overlay fitted detection function p(x), scaled to have area=1
lines(gridx,p.xfit.std,lty=2,col="black",lwd=2)
# overlay fitted animal density function pi(x), scaled to have area=1
lines(gridx,adbn,col="black",lwd=2)
legend("topright",legend=c("f(x)","p(x)",expression(pi(x))),
col=c("grey","black","black"),lwd=c(2,2,2),lty=c(1,2,1))
}
invisible(list(gridx=gridx,p.xpifit=p.xpifit,mufit=mufit,
f.xfit=f.xfit,p.xfit=p.xfit,ptot=ptot,p.xfit.std=p.xfit.std,adbn=adbn
))
}
## Parametric bootstrap
boot <- function(fit, Nboot=1000) {
pHat = N = NULL
length.b = length(fit$b)
hr=match.fun(fit$hr); ystart=fit$ystart;
pi.x=match.fun(fit$pi.x); w=fit$w
for (k in 1:Nboot) {
theta = rmvnorm(1, fit$par, fit$vcov)
b = theta[1:length.b]
logphi = theta[(length.b+1):length(theta)]
phat = integrate(f=p.pi.x,lower=0,upper=w,b=b,hr=hr,
ystart=ystart,pi.x=pi.x,logphi=logphi,w=w)$value
pHat = c(pHat, phat)
N = c(N, length(x)/phat)
}
phat.est = integrate(f=p.pi.x,lower=0,upper=w,b=fit$b,hr=hr,
ystart=ystart,pi.x=pi.x,logphi=fit$logphi,w=w)$value
tabVU = data.frame(rbind( c(phat.est, quantile(pHat, c(0.025,0.985))),
c(length(x)/phat.est, quantile(N, c(0.025,0.985)))
))
colnames(tabVU) = c("Est","CI.low","CI.upp")
rownames(tabVU) = c("p-hat","N")
round(tabVU, 3)
return(tabVU)
}
## Effective strip width p-hat (do parametric bootstrap to compute SE)
phat=function(fit=NULL,w=NULL,hr=NULL,b=NULL,ystart=NULL,pi.x=NULL,logphi=NULL)
{
int=NULL
if(!is.null(fit)){
upper=fit$w; b=fit$b; hr=match.fun(fit$hr)
ystart=fit$ystart;pi.x=match.fun(fit$pi.x);logphi=fit$logphi;w=fit$w
int=integrate(f=p.pi.x,lower=0,upper=w,b=b,hr=hr,
ystart=ystart,pi.x=pi.x,logphi=logphi,w=w)$value
}
return(int)
}
tryCatch.W.E <- function(expr)
{
W <- NULL
w.handler <- function(w){ # warning handler
W <<- w
invokeRestart("muffleWarning")
}
list(value = withCallingHandlers(tryCatch(expr, error = function(e) e),
warning = w.handler),
warning = W)
}