/
psdCalc.R
246 lines (239 loc) · 13.9 KB
/
psdCalc.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
#' @title Convenience function for calculating PSD-X and PSD X-Y values.
#'
#' @description Convenience function for calculating (traditional) PSD-X and (incremental) PSD X-Y values for all Gabelhouse lengths and increments thereof.
#'
#' @details Computes the (traditional) PSD-X and (incremental) PSD X-Y values, with associated confidence intervals, for each Gabelhouse length. All PSD-X and PSD X-Y values are printed if \code{what="all"} (DEFAULT), only PSD-X values are printed if \code{what="traditional"}, only PSD X-Y values are printed if \code{what="incremental"}, and nothing is printed (but the matrix is still returned) if \code{what="none"}.
#'
#' Confidence intervals can be computed with either the multinomial (Default) or binomial distribution as set in \code{method}See details in \code{\link{psdCI}} for more information.
#' This function may be used for species for which Gabelhouse length categories are not defined. In this case do not include a name in \code{species}, but define at least two lengths in \code{addLens} where the first category MUST be called \dQuote{stock}.
#'
#' @param formula A formula of the form \code{~length} where \dQuote{length} generically represents a variable in \code{data} that contains the observed lengths. Note that this formula may only contain one variable and it must be numeric.
#' @param data A data.frame that minimally contains the observed lengths given in the variable in \code{formula}.
#' @param species A string that contains the species name for which Gabelhouse lengths exist. See \code{\link{psdVal}} for details. See details for how to use this function for species for which Gabelhouse lengths are not defined.
#' @param units A string that indicates the type of units used for the lengths. Choices are \code{mm} for millimeters (DEFAULT), \code{cm} for centimeters, and \code{in} for inches.
#' @param what A string that indicates the type of PSD values that will be printed. See details.
#' @param drop0Est A logical that indicates whether the PSD values that are zero should be dropped from the output.
#' @param method A character that identifies the confidence interval method to use. See details in \code{\link{psdCI}}.
#' @param addLens A numeric vector that contains minimum lengths for additional categories. See \code{\link{psdVal}} for details.
#' @param addNames A string vector that contains names for the additional lengths added with \code{addLens}. See \code{\link{psdVal}} for details.
#' @param justAdds A logical that indicates whether just the values related to the length sin \code{addLens} should be returned.
#' @param conf.level A number that indicates the level of confidence to use for constructing confidence intervals (default is \code{0.95}).
#' @param showIntermediate A logical that indicates whether the number of fish in the category and the number of stock fish (i.e., \dQuote{intermediate} values) should be included in the returned matrix. Default is to not include these values.
#' @param digits A numeric that indicates the number of decimals to round the result to. Default is zero digits following the recommendation of Neumann and Allen (2007).
#'
#' @return A matrix with columns that contain the computed PSD-X or PSD X-Y values and associated confidence intervals. If \code{showIntermediate=TRUE} then the number of fish in the category and the number of stock fish will also be shown.
#'
#' @section Testing: Point estimate calculations match those constructed "by hand."
#'
#' @author Derek H. Ogle, \email{DerekOgle51@gmail.com}
#'
#' @section IFAR Chapter: 6-Size Structure.
#'
#' @seealso See \code{\link{psdVal}}, \code{\link{psdPlot}}, \code{\link{psdAdd}}, \code{\link{PSDlit}}, \code{\link{tictactoe}}, \code{\link{lencat}}, and \code{\link{rcumsum}} for related functionality.
#'
#' @references Ogle, D.H. 2016. \href{https://fishr-core-team.github.io/fishR/pages/books.html#introductory-fisheries-analyses-with-r}{Introductory Fisheries Analyses with R}Chapman & Hall/CRC, Boca Raton, FL.
#'
#' Guy, C.S., R.M. Neumann, and D.W. Willis. 2006. New terminology for proportional stock density (PSD) and relative stock density (RSD): proportional size structure (PSS). Fisheries 31:86-87. [Was (is?) from http://pubstorage.sdstate.edu/wfs/415-F.pdf.]
#'
#' Guy, C.S., R.M. Neumann, D.W. Willis, and R.O. Anderson2006Proportional size distribution (PSD): A further refinement of population size structure index terminology. Fisheries. 32:348. [Was (is?) from http://pubstorage.sdstate.edu/wfs/450-F.pdf.]
#'
#' Neumann, R.M. and Allen, M.S. 2007. Size structure. In Guy, C.S. and Brown, M.L., editors, Analysis and Interpretation of Freshwater Fisheries Data, Chapter 9, pages 375-421. American Fisheries Society, Bethesda, MD.
#'
#' Willis, D.W., B.R. Murphy, and C.S. Guy. 1993. Stock density indices: development, use, and limitations. Reviews in Fisheries Science 1:203-222. [Was (is?) from http://web1.cnre.vt.edu/murphybr/web/Readings/Willis\%20et\%20al.pdf.]
#'
#' @keywords hplot
#'
#' @examples
#' ## Random length data
#' # suppose this is yellow perch to the nearest mm
#' yepdf <- data.frame(yepmm=round(c(rnorm(100,mean=125,sd=15),
#' rnorm(50,mean=200,sd=25),
#' rnorm(20,mean=300,sd=40)),0),
#' species=rep("Yellow Perch",170))
#' psdCalc(~yepmm,data=yepdf,species="Yellow perch",digits=1)
#' psdCalc(~yepmm,data=yepdf,species="Yellow perch",digits=1,drop0Est=TRUE)
#'
#' ## add a length
#' psdCalc(~yepmm,data=yepdf,species="Yellow perch",addLens=150)
#'
#' ## add lengths with names
#' psdCalc(~yepmm,data=yepdf,species="Yellow perch",addLens=150,addNames="minLen")
#' psdCalc(~yepmm,data=yepdf,species="Yellow perch",addLens=c("minLen"=150))
#' psdCalc(~yepmm,data=yepdf,species="Yellow perch",addLens=c(150,275),addNames=c("minSlot","maxSlot"))
#' psdCalc(~yepmm,data=yepdf,species="Yellow perch",addLens=c("minLen"=150,"maxslot"=275))
#'
#' ## add lengths with names, return just those values that use those lengths
#' psdCalc(~yepmm,data=yepdf,species="Yellow perch",addLens=c("minLen"=150),justAdds=TRUE)
#' psdCalc(~yepmm,data=yepdf,species="Yellow perch",addLens=c("minLen"=150),justAdds=TRUE,
#' what="traditional")
#' psdCalc(~yepmm,data=yepdf,species="Yellow perch",addLens=c(150,275),
#' addNames=c("minSlot","maxSlot"),justAdds=TRUE)
#' psdCalc(~yepmm,data=yepdf,species="Yellow perch",addLens=c(150,275),
#' addNames=c("minSlot","maxSlot"),justAdds=TRUE,what="traditional")
#'
#' ## different output types
#' psdCalc(~yepmm,data=yepdf,species="Yellow perch",addLens=150,what="traditional")
#' psdCalc(~yepmm,data=yepdf,species="Yellow perch",addLens=150,what="incremental")
#' psdCalc(~yepmm,data=yepdf,species="Yellow perch",addLens=150,what="none")
#'
#' ## Show intermediate values
#' psdCalc(~yepmm,data=yepdf,species="Yellow perch",showInterm=TRUE)
#' psdCalc(~yepmm,data=yepdf,species="Yellow perch",what="traditional",showInterm=TRUE)
#' psdCalc(~yepmm,data=yepdf,species="Yellow perch",what="incremental",showInterm=TRUE)
#'
#' ## Control the digits
#' psdCalc(~yepmm,data=yepdf,species="Yellow perch",digits=1)
#'
#' ## Working with a species not in PSDlit ... same data, but don't give species
#' psdCalc(~yepmm,data=yepdf,addLens=c("stock"=130,"quality"=200,"preferred"=250,
#' "memorable"=300,"trophy"=380))
#' psdCalc(~yepmm,data=yepdf,addLens=c("stock"=130,"quality"=200,
#' "preferred"=250,"name1"=220))
#'
#' @export psdCalc
psdCalc <- function(formula,data,species,units=c("mm","cm","in"),
method=c("multinomial","binomial"),conf.level=0.95,
addLens=NULL,addNames=NULL,justAdds=FALSE,
what=c("all","traditional","incremental","none"),
drop0Est=TRUE,showIntermediate=FALSE,digits=0) {
method <- match.arg(method)
what <- match.arg(what)
units <- match.arg(units)
## Check on conf.level
iCheckConfLevel(conf.level)
## make sure species is not missing, or if it is that addLens have been given
if (!missing(species)) {
brks <- psdVal(species,units=units,incl.zero=FALSE,
addLens=addLens,addNames=addNames)
} else {
## species is missing so must have and addLens
if (is.null(addLens)) STOP("Must include name in 'species' or lengths in 'addLens'.")
## ... and addLens must have at least two values
if (length(addLens)<2) STOP("'addLens' must contain at least two length categories.")
## ... and those lengths must be named ...
if (is.null(names(addLens))) {
if (is.null(addNames))
STOP("Category names must be defined in 'addLens' or given in 'addNames'.")
if (length(addLens)!=length(addNames))
STOP("'addLens' and 'addNames' are different lengths.")
names(addLens) <- addNames
}
## first category must be "stock"
if (names(addLens)[1]!="stock") STOP("First category name must be 'stock'.")
## another category must be "quality"
# if (!("quality" %in% names(addLens)))
# STOP("One length category must be called 'quality'")
## looks good so set brks to addLens (but make sure they are ordered)
brks <- addLens[order(addLens)]
}
## find psd lengths for this species
## perform checks and initial preparation of the data.frame
dftemp <- iPrepData4PSD(formula,data,brks[1],units)
## add the length categorization variable, don't drop unused levels
dftemp <- lencat(formula,data=dftemp,breaks=brks,vname="lcatr",
use.names=TRUE,droplevels=FALSE)
## get sample size (number of stock-length fish)
n <- nrow(dftemp)
## make the proportions table
ptbl <- prop.table(table(dftemp$lcatr))
## check to see if some fish are more than quality-sized
if (!cumsum(ptbl)[[2]]<1) WARN("No fish in larger than 'stock' categories.")
## compute all traditional and interval PSD values
res <- iGetAllPSD(ptbl,n=n,method=method,conf.level=conf.level,digits=digits)
## decide to keep intermediate calculation columns or not (in first two columns)
if (!showIntermediate) res <- res[,-c(1:2)]
## return result
k <- length(ptbl)
switch(what,
all= { },
traditional= { res <- res[1:(k-1),] },
incremental= { res <- res[k:nrow(res),] }
)
## Drop estimates that are zero if requested to do so
if (drop0Est) res <- res[res[,"Estimate"]>0,,drop=FALSE]
## Return just the additional lengths if requested to do so
if (justAdds & !is.null(addLens)) {
# add names to the additional lengths
addLens <- iHndlAddNames(addLens,addNames)
# find which rows in the result vector contain the
# additional lengths that the user is asking for
tmp <- NULL
for (i in names(addLens)) tmp <- c(tmp,grep(i,rownames(res)))
res <- res[sort(unique(tmp)),]
}
## Invisibly return the matrix if requested to do so
if (what=="none") invisible(res)
else round(res,digits)
}
# ==============================================================================
# Internal function to prepare the data.frame for ease of computing the PSD
# values. Performs some checks and deletes the sub-stock fish.
# ==============================================================================
iPrepData4PSD <- function(formula,data,stock.len,units) {
## check if the data.frame has data
if (nrow(data)==0) STOP("'data' does not contain any rows.")
## remove "tibble" if it was (tibbles cause problems below)
if (inherits(data,"tbl_df")) data <- as.data.frame(data)
## get name of length variable from the formula
cl <- iGetVarFromFormula(formula,data,expNumVars=1)
if (!is.numeric(data[,cl])) STOP("Variable in 'formula' must be numeric.")
## restrict data to above the stock length
data <- data[data[,cl]>=stock.len,]
## assure that NA values in the length variable are removed
data <- data[!is.na(data[,cl]),]
# if nothing in data.frame then send error
if (nrow(data)==0) {
msg <- "There are no stock-length fish in the sample.\n"
msg <- paste0(msg,"Note that units='",units,"' was used.\n")
STOP(msg)
}
# return new data.frame
data
}
# ==============================================================================
# INTERNAL functions that will compute all available traditional and incremental
# PSD valuesA matrix of PSD values and confidence intervals will be returned.
# ==============================================================================
iMakePSDLabels <- function(nms) {
# check if any names are not Gabelhouse names
tmp <- which(!(nms %in% c("stock","quality","preferred","memorable","trophy")))
# convert breaks names to one letter
abb <- toupper(substring(nms,1,1))
# but put non-Gabelhouse names back in
abb[tmp] <- nms[tmp]
# return abbreviations
abb
}
iMakePSDIV <- function(ptbl) {
## Get number of categories
k <- length(ptbl)
## Get category name abbreviations
abb <- iMakePSDLabels(names(ptbl))
## make matrix for traditional PSDs
tmp1 <- matrix(0,nrow=k-1,ncol=k)
tmp1[upper.tri(tmp1)] <- 1
rownames(tmp1) <- paste("PSD",abb[-1],sep="-")
## make identify matrix for incremental PSD
tmp2 <- matrix(0,nrow=(k-1),ncol=k)
diag(tmp2) <- 1
rownames(tmp2) <- paste0("PSD ",abb[1:(k-1)],"-",abb[2:k])
## put together and return
rbind(tmp1,tmp2)
}
iGetAllPSD <- function(ptbl,n,method,conf.level=0.95,digits) {
## Make a matrix of indicator variables for all PSDs
id1 <- iMakePSDIV(ptbl)
## check if sample size is >20 (see Brenden et al. 2008), warn if not
# do this here and suppress warnings for psdCI so that there is only one warning
ns <- n*ptbl
if (any(ns>0 & ns<20))
WARN("Some category sample size <20, some CI coverage may be\n lower than ",
100*conf.level,"%.")
## Compute all PSDs
suppressWarnings(res <- t(apply(id1,MARGIN=1,FUN=psdCI,ptbl=ptbl,n=n,
method=method,conf.level=conf.level,digits=digits)))
## Add the numerator (number in category) and denominator (stock) columns
res <- cbind(res[,1]*n/100,n,res)
colnames(res) <- c("num","stock","Estimate",iCILabel(conf.level))
res
}