Skip to content

Commit

Permalink
added prep functions
Browse files Browse the repository at this point in the history
  • Loading branch information
jongoetz committed Nov 5, 2017
1 parent f40b86c commit ad2faf8
Show file tree
Hide file tree
Showing 9 changed files with 515 additions and 0 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
@@ -1,6 +1,9 @@
# Generated by roxygen2: do not edit by hand

export(annual_stats)
export(fasstr_add_date_vars)
export(fasstr_add_rolling_means)
export(fasstr_fill_missing_dates)
export(longterm.stats)
import(ggplot2)
import(scales)
Expand Down
118 changes: 118 additions & 0 deletions R/fasstr_add_date_vars.R
@@ -0,0 +1,118 @@
# Copyright 2017 Province of British Columbia
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.

#' @title Add calendar and water year date variables.
#'
#' @description Adds mulitple date variables to a dataframe from a column of dates, including
#' year, month (numeric and text), day of years, and water years and day of water years.
#'
#' @param flowdata Dataframe. A dataframe of daily mean streamflow data used to calculate the annual statistics.
#' Two columns are required: a 'Date' column with dates formatted YYYY-MM-DD and a 'Q' column with the daily
#' mean streamflow values in units of cubic metres per second. \code{flowdata} not required if \code{HYDAT} is used.
#' @param HYDAT Character. A HYDAT station number (e.g. "08NM116") of which to extract daily streamflow data from the HYDAT database.
#' tidyhydat package and a downloaded SQLite HYDAT required.
#' @param water_year_start Numeric. Month to start water year (1 to 12 for Jan to Dec).

#'
#' @return A list with the following elements:
#' \item{flowdata}
#'
#' @examples
#' \dontrun{
#'
#' set example :)
#' }
#' @export

#'
#--------------------------------------------------------------
# Compute the statistics on an (calendar and water) year basis

fasstr_add_date_vars <- function(
flowdata=NULL,
HYDAT=NULL,
water_year_start=10){

# Compute statistics on an annual (calendar and water) year basis
#
# See the man-roxygen director for definition of parameters
#
# Output: List with elements given above.
#
#############################################################
# Some basic error checking on the input parameters
#

if( is.null(flowdata) & is.null(HYDAT)) {
stop("flowdata or HYDAT parameters must be set")}
if( !is.null(HYDAT) & !is.null(flowdata)) {
stop("Must select either flowdata or HYDAT parameters, not both.")}
if( is.null(HYDAT) & !is.data.frame(flowdata)) {
stop("flowdata parameter is not a data frame.")}
if( is.null(HYDAT) & !"Date" %in% names(flowdata)){
stop("flowdata dataframe doesn't contain a Date variable.")}
if( is.null(HYDAT) & !inherits(flowdata$Date[1], "Date")){
stop("Date column in flowdata dataframe is not a date.")}
if( !is.numeric(water_year_start)) {
stop("water_year_start parameter must be numeric between 1 and 12 (Jan-Dec)")}
if( water_year_start<1 & water_year_start>12 ) {
stop("water_year_start parameter must be numeric between 1 and 12 (Jan-Dec)")}

# If HYDAT station is listed, check if it exists and make it the flowdata
if (!is.null(HYDAT)) {
if (!HYDAT %in% tidyhydat::allstations$STATION_NUMBER) {stop("Station in 'HYDAT' parameter does not exist.")}
flowdata <- tidyhydat::DLY_FLOWS(STATION_NUMBER = HYDAT)
}

# Create values used to calculate the water year day of year
if (water_year_start==2) {doy.temp <- c(31,31)}
if (water_year_start==3) {doy.temp <- c(61,62)}
if (water_year_start==4) {doy.temp <- c(90,91)}
if (water_year_start==5) {doy.temp <- c(120,121)}
if (water_year_start==6) {doy.temp <- c(151,152)}
if (water_year_start==7) {doy.temp <- c(181,182)}
if (water_year_start==8) {doy.temp <- c(212,213)}
if (water_year_start==9) {doy.temp <- c(243,244)}
if (water_year_start==10) {doy.temp <- c(273,274)}
if (water_year_start==11) {doy.temp <- c(304,305)}
if (water_year_start==12) {doy.temp <- c(334,335)}

# Calculate each date variable
flowdata$Year <- lubridate::year(flowdata$Date)
flowdata$Month <- lubridate::month(flowdata$Date)
flowdata$MonthName <- month.abb[flowdata$Month]
flowdata$DayofYear <- lubridate::yday(flowdata$Date)

if (water_year_start==1) {
flowdata$WaterYear <- flowdata$Year
flowdata$WaterDayofYear <- flowdata$DayofYear
} else {
flowdata$WaterYear <- as.numeric(ifelse(flowdata$Month>=water_year_start,
flowdata$Year+1,
flowdata$Year))
flowdata$WaterDayofYear <- ifelse(flowdata$Month<water_year_start,
flowdata$DayofYear+(365-doy.temp[1]),
ifelse((as.Date(with(flowdata, paste(Year+1,01,01,sep="-")),"%Y-%m-%d")
-as.Date(with(flowdata, paste(Year,01,01,sep="-")),"%Y-%m-%d"))==366,
flowdata$DayofYear-doy.temp[2],
flowdata$DayofYear-doy.temp[1]))
}




# ADD SEASONS?


return(flowdata)
} # end of function

104 changes: 104 additions & 0 deletions R/fasstr_add_rolling_means.R
@@ -0,0 +1,104 @@
# Copyright 2017 Province of British Columbia
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.

#' @title Add rolling means.
#'
#' @description Adds rollings means.
#'
#' @param flowdata Dataframe. A dataframe of daily mean streamflow data used to calculate the annual statistics.
#' Two columns are required: a 'Date' column with dates formatted YYYY-MM-DD and a 'Q' column with the daily
#' mean streamflow values in units of cubic metres per second. \code{flowdata} not required if \code{HYDAT} is used.
#' @param HYDAT Character. A HYDAT station number (e.g. "08NM116") of which to extract daily streamflow data from the HYDAT database.
#' tidyhydat package and a downloaded SQLite HYDAT required.
#' @param rolling_nday Numeric. Default 3,7,30.
#' @param align Character. specifyies whether the index of the result should be left- or right-aligned or centered
#' (default) compared to the rolling window of observations
#'
#'
#' @return A list with the following elements:
#' \item{flowdata}
#'
#' @examples
#' \dontrun{
#'
#' set example :)
#' }
#' @export

#'
#--------------------------------------------------------------
# Compute the statistics on an (calendar and water) year basis

fasstr_add_rolling_means <- function(flowdata=NULL,
HYDAT=NULL,
rolling_nday=c(3,7,30),
align="right"){ # or left or centre


# Some basic error checking on the input parameters
Value.Q=FALSE #used if flow is in a column called Value, not Q
if ("Value" %in% names(flowdata)){
Value.Q=TRUE
flowdata <- dplyr::rename(flowdata,Q=Value)}
if( is.null(flowdata) & is.null(HYDAT)) {
stop("flowdata or HYDAT parameters must be set")}
if( !is.null(HYDAT) & !is.null(flowdata)) {
stop("Must select either flowdata or HYDAT parameters, not both.")}
if( is.null(HYDAT) & !is.data.frame(flowdata)) {
stop("flowdata parameter is not a data frame.")}
if( is.null(HYDAT) & !all(c("Date","Q") %in% names(flowdata))){
stop("flowdata dataframe doesn't contain date or flow columns (labeled Q or Value)")}
if( is.null(HYDAT) & !is.numeric(flowdata$Q)) {
stop("Flow data (Q or Value) column in flowdata dataframe is not numeric.")}
if( is.null(HYDAT) & any(flowdata$Q <0, na.rm=TRUE)) {
stop('flowdata cannot have negative values - check your data')}
if( is.null(HYDAT) & !inherits(flowdata$Date[1], "Date")){
stop("Date column in flowdata dataframe is not a date.")}
if( !is.numeric(rolling_nday)) {
stop("rolling_nday parameter must be numeric between 1 and 12 (Jan-Dec)")}
#SOMETHING ABOUT ALIGN
#SOMETHING ABOUT FILL NA

# If HYDAT station is listed, check if it exists and make it the flowdata
if (!is.null(HYDAT)) {
if (!HYDAT %in% tidyhydat::allstations$STATION_NUMBER) {stop("Station in 'HYDAT' parameter does not exist.")}
flowdata <- tidyhydat::DLY_FLOWS(STATION_NUMBER = HYDAT)
flowdata <- dplyr::rename(flowdata,Q=Value)
}

# get list of dates in flowdata
dates.list <- c(flowdata$Date)

# fill in missing dates to ensure means roll over consecutive days
flowdata <- fasstr_fill_missing_dates(flowdata=flowdata)

# Add rolling means
flowdata$Q03DAvg <- zoo::rollapply( flowdata$Q, 3, mean, fill=NA, align=align)
flowdata$Q07DAvg <- zoo::rollapply( flowdata$Q, 7, mean, fill=NA, align=align)
flowdata$Q30DAvg <- zoo::rollapply( flowdata$Q, 30, mean, fill=NA, align=align)

# Return flowdata to original dates
flowdata <- dplyr::filter(flowdata,Date %in% dates.list)


# Fill in STATION_NUMBER and Parameter if HYDAT selected
if (!is.null(HYDAT)) {
flowdata$STATION_NUMBER <- HYDAT
flowdata$Parameter <- "FLOW"
flowdata <- dplyr::rename(flowdata,Value=Q)
}

if (Value.Q) {flowdata <- dplyr::rename(flowdata,Value=Q)}

return(flowdata)
} # end of function

153 changes: 153 additions & 0 deletions R/fasstr_fill_missing_dates.R
@@ -0,0 +1,153 @@
# Copyright 2017 Province of British Columbia
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.


#' @title Fill missing dates with NA.
#'
#' @description Fill missing dates with NA.
#'
#' @param flowdata Dataframe. A dataframe of daily mean streamflow data used to calculate the annual statistics.
#' Two columns are required: a 'Date' column with dates formatted YYYY-MM-DD and a 'Q' column with the daily
#' mean streamflow values in units of cubic metres per second. \code{flowdata} not required if \code{HYDAT} is used.
#' @param HYDAT Character. A HYDAT station number (e.g. "08NM116") of which to extract daily streamflow data from the HYDAT database.
#' tidyhydat package and a downloaded SQLite HYDAT required.
#' @param water_year Logical (TRUE/FALSE). Choose to fill to the start of the first/last water years.
#' @param water_year_start Numeric. Month to start water year (1 to 12 for Jan to Dec). Default 10 (Oct).
#'
#'
#' @return A list with the following elements:
#' \item{flowdata}
#'
#' @examples
#' \dontrun{
#'
#' set example :)
#' }
#' @export

#'
#--------------------------------------------------------------
# Compute the statistics on an (calendar and water) year basis

fasstr_fill_missing_dates <- function(flowdata=NULL,
HYDAT=NULL,
water_year=FALSE,
water_year_start=10){

# Some basic error checking on the input parameters
Value.Q=FALSE #used if flow is in a column called Value, not Q
if ("Value" %in% names(flowdata)){
Value.Q=TRUE
flowdata <- dplyr::rename(flowdata,Q=Value)}
if( is.null(flowdata) & is.null(HYDAT)) {
stop("flowdata or HYDAT parameters must be set")}
if( !is.null(HYDAT) & !is.null(flowdata)) {
stop("Must select either flowdata or HYDAT parameters, not both.")}
if( is.null(HYDAT) & !is.data.frame(flowdata)) {
stop("flowdata parameter is not a data frame.")}
if( is.null(HYDAT) & !all(c("Date","Q") %in% names(flowdata))){
stop("flowdata dataframe doesn't contain date or flow columns (labeled Q or Value)")}
if( is.null(HYDAT) & !is.numeric(flowdata$Q)) {
stop("Flow data (Q or Value) column in flowdata dataframe is not numeric.")}
if( is.null(HYDAT) & any(flowdata$Q <0, na.rm=TRUE)) {
stop('flowdata cannot have negative values - check your data')}
if( is.null(HYDAT) & !inherits(flowdata$Date[1], "Date")){
stop("Date column in flowdata dataframe is not a date.")}
if( !is.numeric(water_year_start)) {
stop("water_year_start parameter must be numeric between 1 and 12 (Jan-Dec)")}
if( water_year_start<1 & water_year_start>12 ) {
stop("water_year_start parameter must be numeric between 1 and 12 (Jan-Dec)")}


# If HYDAT station is listed, check if it exists and make it the flowdata
if (!is.null(HYDAT)) {
if (!HYDAT %in% tidyhydat::allstations$STATION_NUMBER) {stop("Station in 'HYDAT' parameter does not exist.")}
flowdata <- tidyhydat::DLY_FLOWS(STATION_NUMBER = HYDAT)
}

#Get the station_number and Parameter from flowdata if used HYDAT in a previous fasstr function
if ("STATION_NUMBER" %in% names(flowdata)){STATION_NUMBER <- flowdata$STATION_NUMBER[1]}
if ("Parameter" %in% names(flowdata)){Parameter <- flowdata$Parameter[1]}

# If water year is TRUE and month is not January
if (water_year & water_year_start>1) {

#Create a temp file to determine the min/max water years (cant affect flowdata yet)
flowdata.temp <- flowdata
flowdata.temp$Year <- lubridate::year(flowdata.temp$Date)
flowdata.temp$Month <- lubridate::month(flowdata.temp$Date)
flowdata.temp$WaterYear <- as.numeric(ifelse(flowdata.temp$Month>=water_year_start,
flowdata.temp$Year+1,
flowdata.temp$Year))
min_wateryear <- min(flowdata.temp$WaterYear)
max_wateryear <- max(flowdata.temp$WaterYear)


# Extend the flowdata to well before the start and end dates (will filter to water years)
min_year <- lubridate::year(min(flowdata$Date))-1
max_year <- lubridate::year(max(flowdata$Date))+1

flowdata <- merge(flowdata,
data.frame(Date=seq(as.Date(paste(min_year,'01-01',sep='-'),
"%Y-%m-%d"),
as.Date(paste(max_year ,'12-31',sep='-'),
'%Y-%m-%d'), 1)),
all.y=TRUE)

# Add Water year to be able to filter it
flowdata$Year <- lubridate::year(flowdata$Date)
flowdata$Month <- lubridate::month(flowdata$Date)
flowdata$WaterYear <- as.numeric(ifelse(flowdata$Month>=water_year_start,
flowdata$Year+1,
flowdata$Year))


# Filter flowdata for the min and max water years and remove date columns
flowdata <- dplyr::filter(flowdata,WaterYear>=min_wateryear & WaterYear<=max_wateryear)
flowdata <- dplyr::select(flowdata,-Year,-Month,-WaterYear)



# If not water year, or January is chosen as water year start
} else {
min_year <- lubridate::year(min(flowdata$Date))
max_year <- lubridate::year(max(flowdata$Date))

flowdata <- merge(flowdata,
data.frame(Date=seq(as.Date(paste(min_year,'01-01',sep='-'),
"%Y-%m-%d"),
as.Date(paste(max_year ,'12-31',sep='-'),
'%Y-%m-%d'), 1)),
all.y=TRUE)
}


# Fill in STATION_NUMBER and Parameter if HYDAT selected
if (!is.null(HYDAT)) {
flowdata$STATION_NUMBER <- HYDAT
flowdata$Parameter <- "FLOW"
}

# If flowdata was from HYDAT in a previous function
if ("STATION_NUMBER" %in% names(flowdata)){flowdata$STATION_NUMBER <- STATION_NUMBER}
if ("Parameter" %in% names(flowdata)){flowdata$Parameter <- Parameter}
if (Value.Q) {flowdata <- dplyr::rename(flowdata,Value=Q)}

# If fasstr_add_date_vars() used previously, add the date variables to the new dates
if (all(c("Year","Month","MonthName","WaterYear","DayofYear","WaterDayofYear") %in% names(flowdata))) {
flowdata <- fasstr_add_date_vars(flowdata=flowdata)
}



return(flowdata)
} # end of function

0 comments on commit ad2faf8

Please sign in to comment.