-
Notifications
You must be signed in to change notification settings - Fork 0
/
sample_dove_routes.R
115 lines (87 loc) · 3.24 KB
/
sample_dove_routes.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#' Select random subset of Breeding Bird Survey (BBS) routes
#'
#' This function randomly samples routes from within a stage.
#'
#' @param dat Dataframe containing BBS data. Must contain columns for Year, Aou species code and Route
#' @param years Years for for which you want to generate data for
#' @param max.sample Maximum number routes per year.
#'
#' @examples
#'
#' ## create data
#' #dat <- sample_dove_routes(dat = doves_PA,
#' # years = c(2005,2007,2010))
#'
#' ## Make year a factor
#' #dat$Year <- factor(dat$Year)
#'
#' ## Plot means with 95% confidence intervals
#'
#' library(ggplot2)
#' library(ggpubr)
#'
#' #ggerrorplot(dat,
#' # x = "Year",
#' #y = "tot.seen",
#' #desc_stat = "mean_ci",
#' #add = "mean")
#'
#' @export
sample_dove_routes <- function(dat = doves_PA,
years = c(1990,2000,2005),
max.sample = 50){
#IDs of unique routes in dataset
unique.routes <- unique(dat$Route)
#Number of unique routes
n.unique.routes <- length(unique.routes)
#number of years
n.years <- length(years)
#number of routes per year, rounding down
n.routes.per.year <- trunc(n.unique.routes/n.years)
#set total number of routes
##can be less that all possible if allowed by user
n.routes.per.year <- ifelse(n.routes.per.year > max.sample,
max.sample,
n.routes.per.year)
#cat(n.unique.routes," unique routes in dataset. \n ")
#cat(n.routes.per.year," routes per year will be randomly sampled, without replacement \n")
#total number of routes actually sampled
n.routes.tot <- n.routes.per.year*n.years
#sample routes#
##sample w/o replacement n.routes.total from vector of
##all unique routes
##This creates a new vector of route numbers
##that i)is a sample of all possible routes, and
## ii)is in a random order (not sequential)
sampled.routes <- sample(x = unique.routes,
size = n.routes.tot,
replace = F)
#assign a numeric index to each sampled and re-ordered route order
route.index <- 1:n.routes.tot
#create dataframe of sampled routes
route.df <- data.frame(route.index,
Route = sampled.routes)
#load Hmisc
requireNamespace("Hmisc")
#use cut2() to divide route index number into even number of groups
route.df$cuts <- cut2(route.df$route.index,
m = n.routes.per.year)
#set as factor
route.df$Year <- factor(route.df$cuts,
labels = years)
cat(summary(route.df$Year))
route.df$Year <- as.numeric(as.character(route.df$Year))
any(duplicated(route.df) == TRUE)
# merge sampled routes with original data to generate new dataframe
df.out2 <- merge(route.df, dat, all = TRUE)
df.out2$tot.seen[is.na(df.out2$tot.seen)] <- 0
#df.out2$tot.heard[is.na(df.out2$tot.heard)] <- 0
#df.out2$tot.seen.stop[is.na(df.out2$tot.seen.stop)] <- 0
#df.out2$tot.seen.drive[is.na(df.out2$tot.seen.drive)] <- 0
df.out3 <- df.out2[which(df.out2$Year %in% years),]
df.out4 <- df.out3[-which(is.na(df.out3$cuts) == TRUE),]
# cat("Original BBS dataframe was", dim(dat), "\n")
# cat("New resampled dataframe is", dim(df.out2),"\n")
#
return(df.out4)
}