Skip to content

Commit

Permalink
Merge pull request #44 from dafenner/develop
Browse files Browse the repository at this point in the history
Include data description, examples with example data, updated README, DESCRIPTOIN, documentation
  • Loading branch information
dafenner committed Jul 19, 2023
2 parents 33327e3 + 873ecf7 commit ca364e1
Show file tree
Hide file tree
Showing 21 changed files with 240 additions and 10 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: CrowdQCplus
Type: Package
Title: Enhanced quality control for crowdsourced data from citizen weather stations
Version: 1.1.0
Date: 2023-07-11
Date: 2023-07-19
Author: Daniel Fenner, Tom Grassmann, Benjamin Bechtel, Matthias Demuzere, Jonas Kittner, Fred Meier
Authors@R: c(
person("Daniel", "Fenner", email = "daniel.fenner@meteo.uni-freiburg.de", role = c("aut", "cre")),
Expand All @@ -22,5 +22,6 @@ LazyData: true
RoxygenNote: 7.2.3
Depends: R (>= 3.5.0)
Imports: data.table, methods, stats, robustbase, lubridate, terra, geodata
Suggests: testthat
NeedsCompilation: no
Packaged: 2023-07-11 8:00 UTC; dafenner
Packaged: 2023-07-19 19:00 UTC; dafenner
79 changes: 79 additions & 0 deletions R/cqcp_filter.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@
#'
#' @return data.table
#' @export
#'
#' @examples
#' data(cqcp_cws_data)
#' data <- cqcp_padding(cqcp_cws_data)
#' data_qc <- cqcp_m1(data)
cqcp_m1 <- function(data, cutOff = 1, quiet = FALSE){
val <- data[!is.na(ta), .(a = 1), by = .(p_id,lon,lat)]
bad_s <- val[,.(anz = sum(lon == val$lon & lat == val$lat)), by = p_id]
Expand Down Expand Up @@ -86,6 +91,12 @@ cqcp_getZ <- function(x){
#'
#' @return data.table
#' @export
#'
#' @examples
#' data(cqcp_cws_data)
#' data <- cqcp_padding(cqcp_cws_data)
#' data_qc <- cqcp_m1(data)
#' data_qc <- cqcp_m2(data_qc)
cqcp_m2 <- function(data, low = 0.01, high = 0.95, heightCorrection = TRUE,
debug = FALSE, lapse_rate = 0.0065, t_distribution = FALSE,
quiet = FALSE){
Expand Down Expand Up @@ -190,6 +201,13 @@ cqcp_cor_timespan <- function(x, y, t, cutOff, timespan = "month"){
#'
#' @return data.table
#' @export
#'
#' @examples
#' data(cqcp_cws_data)
#' data <- cqcp_padding(cqcp_cws_data)
#' data_qc <- cqcp_m1(data)
#' data_qc <- cqcp_m2(data_qc)
#' data_qc <- cqcp_m3(data_qc)
cqcp_m3 <- function(data, cutOff = 0.2, complete = FALSE, duration = NULL,
quiet = FALSE){

Expand Down Expand Up @@ -240,6 +258,14 @@ cqcp_m3 <- function(data, cutOff = 0.2, complete = FALSE, duration = NULL,
#'
#' @return data.table
#' @export
#'
#' @examples
#' data(cqcp_cws_data)
#' data <- cqcp_padding(cqcp_cws_data)
#' data_qc <- cqcp_m1(data)
#' data_qc <- cqcp_m2(data_qc)
#' data_qc <- cqcp_m3(data_qc)
#' data_qc <- cqcp_m4(data_qc)
cqcp_m4 <- function(data, cutOff = 0.9, complete = FALSE, duration = NULL,
quiet = FALSE){

Expand Down Expand Up @@ -338,6 +364,16 @@ cqcp_m4 <- function(data, cutOff = 0.9, complete = FALSE, duration = NULL,
#'
#' @return data.table
#' @export
#'
#' @examples
#' data(cqcp_cws_data)
#' data <- cqcp_padding(cqcp_cws_data)
#' data_qc <- cqcp_m1(data)
#' data_qc <- cqcp_m2(data_qc)
#' data_qc <- cqcp_m3(data_qc)
#' data_qc <- cqcp_m4(data_qc)
#' data_qc <- cqcp_m5(data_qc)
#' data_qc_keep <- cqcp_m5(data_qc, keep_isolated = TRUE) # keep isolated CWS
cqcp_m5 <- function(data, radius = 3000, n_buddies = 5, alpha = 0.1,
heightCorrection = TRUE, lapse_rate = 0.0065,
check_elevation = TRUE, max_elev_diff = 100,
Expand Down Expand Up @@ -475,6 +511,16 @@ cqcp_interpol <- function(x, maxLength = 1){
#'
#' @return data.table
#' @export
#'
#' @examples
#' data(cqcp_cws_data)
#' data <- cqcp_padding(cqcp_cws_data)
#' data_qc <- cqcp_m1(data)
#' data_qc <- cqcp_m2(data_qc)
#' data_qc <- cqcp_m3(data_qc)
#' data_qc <- cqcp_m4(data_qc)
#' data_qc <- cqcp_m5(data_qc)
#' data_qc <- cqcp_o1(data_qc)
cqcp_o1 <- function(data, fun = cqcp_interpol, quiet = FALSE, ...){
data[, ta_int := ta]
data[!m5, "ta_int"] <- NA
Expand Down Expand Up @@ -503,6 +549,17 @@ cqcp_o1 <- function(data, fun = cqcp_interpol, quiet = FALSE, ...){
#'
#' @return data.table
#' @export
#'
#' @examples
#' data(cqcp_cws_data)
#' data <- cqcp_padding(cqcp_cws_data)
#' data_qc <- cqcp_m1(data)
#' data_qc <- cqcp_m2(data_qc)
#' data_qc <- cqcp_m3(data_qc)
#' data_qc <- cqcp_m4(data_qc)
#' data_qc <- cqcp_m5(data_qc)
#' data_qc <- cqcp_o1(data_qc)
#' data_qc <- cqcp_o2(data_qc)
cqcp_o2 <- function(data, cutOff = 0.8, quiet = FALSE){
has_d <- cqcp_has_column(data, column = "day")
if(!has_d){
Expand Down Expand Up @@ -533,6 +590,18 @@ cqcp_o2 <- function(data, cutOff = 0.8, quiet = FALSE){
#'
#' @return data.table
#' @export
#'
#' @examples
#' data(cqcp_cws_data)
#' data <- cqcp_padding(cqcp_cws_data)
#' data_qc <- cqcp_m1(data)
#' data_qc <- cqcp_m2(data_qc)
#' data_qc <- cqcp_m3(data_qc)
#' data_qc <- cqcp_m4(data_qc)
#' data_qc <- cqcp_m5(data_qc)
#' data_qc <- cqcp_o1(data_qc)
#' data_qc <- cqcp_o2(data_qc)
#' data_qc <- cqcp_o3(data_qc)
cqcp_o3 <- function(data, cutOff = 0.8, complete = FALSE, duration = NULL,
quiet = FALSE){

Expand Down Expand Up @@ -596,6 +665,11 @@ cqcp_o3 <- function(data, cutOff = 0.8, complete = FALSE, duration = NULL,
#'
#' @return data.table
#' @export
#'
#' @examples
#' data(cqcp_cws_data)
#' data <- cqcp_padding(cqcp_cws_data)
#' data_corr <- cqcp_o4(data, 762)
cqcp_o4 <- function(data, time_constant, quiet = FALSE) {

if(is.null(time_constant) | missing(time_constant)) return(data)
Expand Down Expand Up @@ -674,6 +748,11 @@ cqcp_has_column <- function(data, column = "month"){
#'
#' @return data.table
#' @export
#'
#' @examples
#' data(cqcp_cws_data)
#' data <- cqcp_padding(cqcp_cws_data)
#' data_qc <- cqcp_qcCWS(data)
cqcp_qcCWS <- function(data,
m1_cutOff = 1,
m2_low = 0.01, m2_high = 0.95,
Expand Down
22 changes: 18 additions & 4 deletions R/cqcp_helper.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ cqcp_colourise <- function(str, colour) {
#'
#' @return logical (or list), TRUE if data passed all checks.
#' @export
#'
#' @examples
#' data(cqcp_cws_data)
#' cqcp_check_input(cqcp_cws_data)
cqcp_check_input <- function(data, print = TRUE, file = NULL, as_list = FALSE){

ok <- TRUE
Expand Down Expand Up @@ -442,6 +446,10 @@ cqcp_add_dem_height <- function(data, file = NULL, raster = NULL,
#'
#' @return data.table with regular time series for all stations.
#' @export
#'
#' @examples
#' data(cqcp_cws_data)
#' cqcp_padding(cqcp_cws_data)
cqcp_padding <- function(data, resolution = "1 hour", rounding_method = "nearest",
quiet = FALSE) {

Expand Down Expand Up @@ -510,6 +518,10 @@ cqcp_padding <- function(data, resolution = "1 hour", rounding_method = "nearest
#'
#' @return data.table with output statistics
#' @export
#'
#' @examples
#' data(cqcp_cws_data)
#' n_data <- cqcp_output_statistics(cqcp_cws_data)
cqcp_output_statistics <- function(data, print = TRUE, file = NULL) {

levels <- c("m1", "m2", "m3", "m4", "m5", "o1", "o2", "o3")
Expand All @@ -532,10 +544,12 @@ cqcp_output_statistics <- function(data, print = TRUE, file = NULL) {
cat("+ CrowdQC+ output statistics +\n")
cat("++++++++++++++++++++++++++++++\n")
cat(paste0("Raw data: ",n_data$n_raw," values, ",n_stat$n_raw," stations\n"))
for(j in 2:length(columns)) {
cat(paste0("QC level ",columns[j],": ",n_data[,as.character(get(columns[j]))],
" values (= ",sprintf("%.2f",n_data[,(get(columns[j]))]/n_data$n_raw*100),
" % of raw data), ",n_stat[,as.character(get(columns[j]))]," stations\n"))
if (length(columns) > 1) {
for(j in 2:length(columns)) {
cat(paste0("QC level ",columns[j],": ",n_data[,as.character(get(columns[j]))],
" values (= ",sprintf("%.2f",n_data[,(get(columns[j]))]/n_data$n_raw*100),
" % of raw data), ",n_stat[,as.character(get(columns[j]))]," stations\n"))
}
}
}
# File?
Expand Down
15 changes: 15 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#' Example CWS data
#'
#' The example data set contains one month of randomly created data for 624 'crowd weather stations' CWS.
#' The air-temperature data is based on measured air temperature at one location and
#' then randomly creating a sort-of meaningful spatially-distributed data set out of it.
#'
#' @format A data table with 298706 rows and 6 variables:
#' \describe{
#' \item{p_id}{Unique identifier for each CWS}
#' \item{time}{Time stamp to which the air-temperature data corresponds to, in UTC}
#' \item{ta}{Air temperature in degree C}
#' \item{lon}{Longitude of the CWS (WGS-84)}
#' \item{lat}{Latitude of the CWS (WGS-84)}
#' \item{z}{Altitude (m) of the CWS}}
"cqcp_cws_data"
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@ Data should be represented as a [data.table](https://CRAN.R-project.org/package=
Optionally, the user can provide elevation information per station (column `z`), as to perform a height correction in some of the QC levels.
Any other column can be present, but is quietly ignored by CrowdQC+.

Please have a look at the [example data](https://github.com/dafenner/CrowdQCplus/blob/master/data/cqcp_cws_data.rda) to see how the data should look like:
```R
data(cqcp_cws_data)
head(cqcp_cws_data)
```

This is how an input data table with hourly data of a month should be organised (values completely nonsense and made up):
| p_id | time | ta | lon | lat | z |
| -----| ---- | -- | --- | --- | - |
Expand Down
4 changes: 4 additions & 0 deletions man/cqcp_check_input.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions man/cqcp_cws_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions man/cqcp_m1.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions man/cqcp_m2.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions man/cqcp_m3.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions man/cqcp_m4.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions man/cqcp_m5.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions man/cqcp_o1.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions man/cqcp_o2.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions man/cqcp_o3.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions man/cqcp_o4.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/cqcp_output_statistics.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit ca364e1

Please sign in to comment.