/
filters.R
154 lines (143 loc) · 3.6 KB
/
filters.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#' Exclude records with \code{species} "corolla-corolla".
#'
#' \lifecycle{stable}
#'
#' @param x A tibble with a column "species".
#' @export
#' @family helpers
exclude_training_species <- function(x) {
x %>%
{
if ("species" %in% names(.)) {
dplyr::filter(., species != "corolla-corolla")
} else {
.
}
} %>%
{
if ("encounter_species" %in% names(.)) {
dplyr::filter(., encounter_species != "corolla-corolla")
} else {
.
}
}
}
#' @export
#' @rdname exclude_training_species
filter_realspecies <- function(x) {
x %>%
{
if ("species" %in% names(.)) {
dplyr::filter(., species != "corolla-corolla")
} else {
.
}
} %>%
{
if ("encounter_species" %in% names(.)) {
dplyr::filter(., encounter_species != "corolla-corolla")
} else {
.
}
}
}
#' Exclude ODKC records with \code{species} "corolla-corolla".
#'
#' \lifecycle{stable}
#'
#' @param x A tibble with a column "details_species".
#' @export
#' @family odkc
exclude_training_species_odkc <- function(x) {
x %>%
dplyr::filter(details_species != "corolla-corolla")
}
#' Filter records with missing \code{survey_id}
#'
#' \lifecycle{stable}
#'
#' @param x The output of \code{\link{parse_surveys}}
#' @export
#' @family helpers
filter_missing_survey <- function(x) {
x %>% dplyr::filter(is.na(survey_id))
}
#' @export
#' @rdname filter_missing_survey
filter_nosurvey <- function(x) {
x %>% dplyr::filter(is.na(survey_pk))
}
#' Filter records with missing \code{site_id}
#' @param x The output of \code{parse_turtle_nest_encounters}
#' @export
#' @family helpers
filter_missing_site <- function(x) {
x %>% dplyr::filter(is.na(site_pk))
}
#' @export
#' @rdname filter_missing_site
filter_nosite <- function(x) {
x %>% dplyr::filter(is.na(site_pk))
}
#' Exclude training surveys.
#'
#' \lifecycle{stable}
#'
#' @param x The output of \code{parse_surveys}
#' @export
#' @family helpers
exclude_training_surveys <- function(x) {
x %>% dplyr::filter(is_production == TRUE)
}
#' @export
#' @rdname exclude_training_surveys
filter_realsurveys <- function(x) {
x %>% dplyr::filter(is_production == TRUE)
}
#' Filter surveys with "NEEDS QA" in \code{start_comments} or
#' \code{end_comments}.
#'
#' \lifecycle{stable}
#'
#' @param x The output of \code{parse_surveys}
#' @export
#' @family helpers
filter_surveys_requiring_qa <- function(x) {
x %>%
dplyr::filter(
grepl("NEEDS QA", start_comments) | grepl("NEEDS QA", end_comments)
) %>%
dplyr::select(
change_url, turtle_date, site_name, reporter, reporter_username,
start_comments, end_comments
)
}
#' Filter surveys with a missing \code{end_source_id}.
#'
#' \lifecycle{stable}
#'
#' @param x The output of \code{parse_surveys}
#' @export
#' @family helpers
filter_surveys_missing_end <- function(x) {
x %>%
dplyr::filter(is.na(end_source_id)) %>%
dplyr::select(
change_url, turtle_date, site_name, reporter, season,
start_time, end_time, start_comments, end_comments
)
}
#' Filter a dataframe of tracks, disturbance, incidents, or surveys to season
#'
#' \lifecycle{stable}
#'
#' @param data A dataframe of tracks, disturbance, incidents, or surveys
#' containing a column "season" (int) with the season start year, e.g. 2019.
#' @param season_start_year The desired season's start year, e.g. 2019.
#' @return The dataframe filtered to rows from the desired season.
#' @export
#' @family helpers
filter_wastd_season <- function(data, season_start_year) {
dplyr::filter(data, season == season_start_year)
}
# usethis::use_test("filters")