/
plot_monthly_cumulative_stats.R
251 lines (216 loc) 路 12.6 KB
/
plot_monthly_cumulative_stats.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# Copyright 2019 Province of British Columbia
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
#' @title Plot cumulative monthly flow statistics
#'
#' @description Plot the monthly cumulative mean, median, maximum, minimum, and 5, 25, 75, 95th percentiles for each month of the year
#' from a daily streamflow data set. Calculates statistics from all values from complete years, unless specified.
#' Data calculated using \code{calc_monthly_cumulative_stats()} function. Can plot individual years for comparison using the
#' add_year argument. Defaults to volumetric cumulative flows, can use \code{use_yield} and \code{basin_area} to convert to
#' water yield. Returns a list of plots.
#'
#' @inheritParams calc_monthly_cumulative_stats
#' @inheritParams plot_daily_cumulative_stats
#'
#' @return A list of ggplot2 objects with the following for each station provided:
#' \item{Monthly_Cumulative_Stats}{a plot that contains monthly cumulative flow statistics}
#' Default plots on each object:
#' \item{Mean}{monthly cumulative mean}
#' \item{Median}{monthly cumulative median}
#' \item{Min-5 Percentile Range}{a ribbon showing the range of data between the monthly cumulative minimum and 5th percentile}
#' \item{5-25 Percentiles Range}{a ribbon showing the range of data between the monthly cumulative 5th and 25th percentiles}
#' \item{25-75 Percentiles Range}{a ribbon showing the range of data between the monthly cumulative 25th and 75th percentiles}
#' \item{75-95 Percentiles Range}{a ribbon showing the range of data between the monthly cumulative 75th and 95th percentiles}
#' \item{95 Percentile-Max Range}{a ribbon showing the range of data between the monthly cumulative 95th percentile and the maximum}
#' \item{'Year' Flows}{(optional) the monthly cumulative flows for the designated year}
#'
#' @seealso \code{\link{calc_monthly_cumulative_stats}}
#'
#' @examples
#' # Run if HYDAT database has been downloaded (using tidyhydat::download_hydat())
#' if (file.exists(tidyhydat::hy_downloaded_db())) {
#'
#' # Plot annual cumulative volume statistics
#' plot_monthly_cumulative_stats(station_number = "08NM116")
#'
#' # Plot annual cumulative yield statistics with default HYDAT basin area
#' plot_monthly_cumulative_stats(station_number = "08NM116",
#' use_yield = TRUE)
#'
#' # Plot annual cumulative yield statistics with custom basin area
#' plot_monthly_cumulative_stats(station_number = "08NM116",
#' use_yield = TRUE,
#' basin_area = 800)
#'
#' }
#' @export
plot_monthly_cumulative_stats <- function(data,
dates = Date,
values = Value,
groups = STATION_NUMBER,
station_number,
use_yield = FALSE,
basin_area,
water_year_start = 1,
start_year,
end_year,
exclude_years,
months = 1:12,
log_discharge = FALSE,
log_ticks = ifelse(log_discharge, TRUE, FALSE),
include_title = FALSE,
add_year){
## ARGUMENT CHECKS
## others will be check in calc_ function
## ---------------
if (missing(data)) {
data <- NULL
}
if (missing(station_number)) {
station_number <- NULL
}
if (missing(start_year)) {
start_year <- 0
}
if (missing(end_year)) {
end_year <- 9999
}
if (missing(exclude_years)) {
exclude_years <- NULL
}
if (missing(basin_area)) {
basin_area <- NA
}
if (missing(add_year)) {
add_year <- NULL
}
logical_arg_check(log_discharge)
log_ticks_checks(log_ticks, log_discharge)
add_year_checks(add_year)
logical_arg_check(include_title)
## FLOW DATA CHECKS AND FORMATTING
## -------------------------------
# Check if data is provided and import it
flow_data <- flowdata_import(data = data, station_number = station_number)
# Check and rename columns
flow_data <- format_all_cols(data = flow_data,
dates = as.character(substitute(dates)),
values = as.character(substitute(values)),
groups = as.character(substitute(groups)),
rm_other_cols = TRUE)
## CALC STATS
## ----------
monthly_stats <- calc_monthly_cumulative_stats(data = flow_data,
percentiles = c(5,25,75,95),
use_yield = use_yield,
basin_area = basin_area,
water_year_start = water_year_start,
start_year = start_year,
end_year = end_year,
exclude_years = exclude_years,
months = months)
## ADD YEAR IF SELECTED
## --------------------
if(!is.null(add_year)){
year_data <- fill_missing_dates(data = flow_data, water_year_start = water_year_start)
year_data <- add_date_variables(data = year_data, water_year_start = water_year_start)
# Add cumulative flows
if (use_yield){
year_data <- add_cumulative_yield(data = year_data, water_year_start = water_year_start, basin_area = basin_area,
months = months)
year_data$Cumul_Flow <- year_data$Cumul_Yield_mm
} else {
year_data <- add_cumulative_volume(data = year_data, water_year_start = water_year_start,
months = months)
year_data$Cumul_Flow <- year_data$Cumul_Volume_m3
}
year_data <- dplyr::filter(year_data, WaterYear >= start_year & WaterYear <= end_year)
year_data <- dplyr::filter(year_data, !(WaterYear %in% exclude_years))
year_data <- dplyr::filter(year_data, WaterYear == add_year)
year_data <- dplyr::summarize(dplyr::group_by(year_data, STATION_NUMBER, WaterYear, MonthName),
Monthly_Total = max(Cumul_Flow, na.rm = FALSE))
year_data <- dplyr::rename(year_data, "Month" = MonthName)
# Add the daily data from add_year to the daily stats
monthly_stats <- dplyr::left_join(monthly_stats, year_data, by = c("STATION_NUMBER", "Month"))
# Warning if all daily values are NA from the add_year
for (stn in unique(monthly_stats$STATION_NUMBER)) {
year_test <- dplyr::filter(monthly_stats, STATION_NUMBER == stn)
if(all(is.na(monthly_stats$Monthly_Total)))
warning("Daily data does not exist for the year listed in add_year and was not plotted.", call. = FALSE)
}
}
monthly_stats[is.na(monthly_stats)] <- 0
## PLOT STATS
## ----------
# Create the daily stats plots
monthly_plots <- dplyr::group_by(monthly_stats, STATION_NUMBER)
monthly_plots <- tidyr::nest(monthly_plots)
monthly_plots <- dplyr::mutate(
monthly_plots,
plot = purrr::map2(
data, STATION_NUMBER,
~ggplot2::ggplot(data = ., ggplot2::aes(x = Month, group = 1)) +
ggplot2::geom_ribbon(ggplot2::aes(ymin = Minimum, ymax = P5, fill = "Min-5th Percentile")) +
ggplot2::geom_ribbon(ggplot2::aes(ymin = P5, ymax = P25, fill = "5th-25th Percentile")) +
ggplot2::geom_ribbon(ggplot2::aes(ymin = P25, ymax = P75, fill = "25th-75th Percentile")) +
ggplot2::geom_ribbon(ggplot2::aes(ymin = P75, ymax = P95, fill = "75th-95th Percentile")) +
ggplot2::geom_ribbon(ggplot2::aes(ymin = P95, ymax = Maximum, fill = "95th Percentile-Max")) +
ggplot2::geom_line(ggplot2::aes(y = Median, colour = "Median"), size = 0.7) +
ggplot2::geom_line(ggplot2::aes(y = Mean, colour = "Mean"), size = 0.7) +
ggplot2::scale_fill_manual(values = c("Min-5th Percentile" = "orange" , "5th-25th Percentile" = "yellow",
"25th-75th Percentile" = "skyblue1", "75th-95th Percentile" = "dodgerblue2",
"95th Percentile-Max" = "royalblue4"),
breaks = c("95th Percentile-Max", "75th-95th Percentile", "25th-75th Percentile",
"5th-25th Percentile", "Min-5th Percentile")) +
ggplot2::scale_color_manual(values = c("Median" = "purple3", "Mean" = "springgreen4")) +
{if (!log_discharge) ggplot2::scale_y_continuous(expand = c(0, 0), breaks = scales::pretty_breaks(n = 7),
labels = scales::label_number(scale_cut = append(scales::cut_short_scale(),1,1)))}+
{if (log_discharge) ggplot2::scale_y_log10(expand = c(0, 0), breaks = scales::log_breaks(n = 8, base = 10) ,
labels = scales::label_number(scale_cut = append(scales::cut_short_scale(),1,1)))}+
{if (log_discharge & log_ticks) ggplot2::annotation_logticks(base= 10, sides = "l", colour = "grey25", size = 0.3,
short = ggplot2::unit(.07, "cm"), mid = ggplot2::unit(.15, "cm"),
long = ggplot2::unit(.2, "cm"))} +
ggplot2::xlab("Month")+
ggplot2::scale_x_discrete(expand = c(0.01,0.01)) +
{if (!use_yield) ggplot2::ylab("Cumulative Volume (cubic metres)")} +
{if(use_yield) ggplot2::ylab("Cumulative Yield (mm)")} +
ggplot2::theme_bw() +
ggplot2::labs(color = 'Monthly Statistics') +
{if (include_title & .y != "XXXXXXX") ggplot2::labs(color = paste0(.y,'\n \nMonthly Statistics')) } +
ggplot2::theme(axis.text=ggplot2::element_text(size = 10, colour = "grey25"),
axis.title=ggplot2::element_text(size = 12, colour = "grey25"),
axis.title.y=ggplot2::element_text(margin = ggplot2::margin(0,0,0,0)),
axis.ticks = ggplot2::element_line(size = .1, colour = "grey25"),
axis.ticks.length=ggplot2::unit(0.05, "cm"),
panel.border = ggplot2::element_rect(colour = "black", fill = NA, size = 1),
panel.grid.minor = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_line(size = .1),
panel.background = ggplot2::element_rect(fill = "grey94"),
legend.text = ggplot2::element_text(size = 9, colour = "grey25"),
legend.box = "vertical",
legend.justification = "right",
legend.key.size = ggplot2::unit(0.4, "cm"),
legend.spacing = ggplot2::unit(-0.4, "cm"),
legend.background = ggplot2::element_blank()) +
ggplot2::guides(colour = ggplot2::guide_legend(order = 1), fill = ggplot2::guide_legend(order = 2, title = NULL)) +
{if (is.numeric(add_year)) ggplot2::geom_line(ggplot2::aes(y = Monthly_Total, colour = "yr.colour"), size = 0.7)} +
{if (is.numeric(add_year)) ggplot2::scale_color_manual(values = c("Mean" = "paleturquoise", "Median" = "dodgerblue4", "yr.colour" = "red"),
labels = c("Mean", "Median", paste0(add_year, " Flows")))}
))
# Create a list of named plots extracted from the tibble
plots <- monthly_plots$plot
if (nrow(monthly_plots) == 1) {
names(plots) <- paste0(ifelse(use_yield, "Monthly_Cumulative_Yield_Stats", "Monthly_Cumulative_Volumetric_Stats"))
} else {
names(plots) <- paste0(monthly_plots$STATION_NUMBER, ifelse(use_yield, "_Monthly_Cumulative_Yield_Stats", "_Monthly_Cumulative_Volumetric_Stats"))
}
plots
}