-
Notifications
You must be signed in to change notification settings - Fork 0
/
sample_BBS_routes.R
133 lines (104 loc) · 4.08 KB
/
sample_BBS_routes.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#' Select random subset of Breeding Bird Survey (BBS) routes
#'
#' This function randomly samples routes from within a stage.
#'
#' @param dat Dataframe containing BBS data. Must contain columns for Year, Aou species code and Route
#' @param years Years for for which you want to generate data for
#' @param aou.code American Ornithological Union (AOU) numeric species code
#' @param max.sample Maximum number routes per year.
#'
#' @examples
#'
#' ## Setting up dataframe to examine change over time
#' ## Focal species: European starling (Aou code = 4940)
#' ### Start with the raw BBS_PA dataframe
#' ### The function sample_BBS_routes()
#' ### randomly samples routes for years your specify.
#' ### It selects routes from the entire state.
#' ### Note: sample_BBS_routes() requires the Hmisc package to work
#'
#' ## Subset European starling data
#' ### See help file for AOU_species_codes for further details
#' ### on subsetting data based on a species numeric code
#' \dontrun{
#' BBS_PA_EUST <- BBS_PA %>% filter(Aou == 4940)
#'
#' ## select the years 1990, 2000, and 2010
#' BBS_PA_EUST_2 <- sample_BBS_routes(dat = BBS_PA_EUST,
#' years = c(1990,2000,2010),
#' aou.code = 4940)
#'
#' ## Look at data subset that has been created
#' head(BBS_PA_EUST_2)
#' summary(BBS_PA_EUST_2)
#'
#' ##Plot the results with a boxplot
#' ggboxplot(data = BBS_PA_EUST_2,
#' y = "SpeciesTotal",
#' x = "Year")
#'
#' ## Save output
#' ### data can be saved with write.csv(), eg:
#' write.csv(BBS_PA_EUST_2, file = "starling_change_over_time.csv")
#' }
#'
#' @export
sample_BBS_routes <- function(dat = BBS_PA,
years = c(1990,2000,2005),
aou.code = 7610,
max.sample = 50){
#IDs of unique routes in dataset
unique.routes <- unique(dat$Route)
#Number of unique routes
n.unique.routes <- length(unique.routes)
#number of years
n.years <- length(years)
#number of routes per year, rounding down
n.routes.per.year <- trunc(n.unique.routes/n.years)
#set total number of routes
##can be less that all possible if allowed by user
n.routes.per.year <- ifelse(n.routes.per.year > max.sample,
max.sample,
n.routes.per.year)
#cat(n.unique.routes," unique routes in dataset. \n ")
#cat(n.routes.per.year," routes per year will be randomly sampled, without replacement \n")
#total number of routes actually sampled
n.routes.tot <- n.routes.per.year*n.years
#sample routes#
##sample w/o replacement n.routes.total from vector of
##all unique routes
##This creates a new vector of route numbers
##that i)is a sample of all possible routes, and
## ii)is in a random order (not sequential)
sampled.routes <- sample(x = unique.routes,
size = n.routes.tot,
replace = F)
#assign a numeric index to each sampled and re-ordered route order
route.index <- 1:n.routes.tot
#create dataframe of sampled routes
route.df <- data.frame(Aou = aou.code,
route.index,
Route = sampled.routes)
#load Hmisc
requireNamespace("Hmisc")
#use cut2() to divide route index number into even number of groups
route.df$cuts <- Hmisc::cut2(route.df$route.index,
m = n.routes.per.year)
#set as factor
route.df$Year <- factor(route.df$cuts,
labels = years)
cat(summary(route.df$Year))
route.df$Year <- as.numeric(as.character(route.df$Year))
any(duplicated(route.df) == TRUE)
# merge sampled routes with original BBS data to generate new dataframe
df.out2 <- merge(route.df, dat, all = TRUE)
df.out2$SpeciesTotal[is.na(df.out2$SpeciesTotal)] <- 0
df.out3 <- df.out2[which(df.out2$Year %in% years),]
df.out4 <- df.out3[-which(is.na(df.out3$cuts) == TRUE),]
# cat("Original BBS dataframe was", dim(dat), "\n")
# cat("New resampled dataframe is", dim(df.out2),"\n")
#
df.out4 <- df.out4[,c("Aou","Route","Year","countrynum","statenum",
"SpeciesTotal")]
return(df.out4)
}