Skip to content

Commit

Permalink
Add function to drop rows
Browse files Browse the repository at this point in the history
fixes #70
  • Loading branch information
joelnitta committed May 8, 2023
1 parent 46ca1d4 commit 54ba041
Show file tree
Hide file tree
Showing 7 changed files with 297 additions and 0 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export(dct_check_mapping)
export(dct_check_sci_name)
export(dct_check_tax_status)
export(dct_check_taxon_id)
export(dct_drop_row)
export(dct_fill_col)
export(dct_modify_row)
export(dct_options)
Expand Down
96 changes: 96 additions & 0 deletions R/dct_drop_row.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#' Drop rows by taxonID
#'
#' Helper function for dct_drop_row
#'
#' @inheritParams dct_drop_row
#'
#' @return Dataframe
#' @noRd
#' @autoglobal
drop_row_by_taxon_id <- function(tax_dat, taxon_id) {
# - taxonID of tax_dat must be non-missing and unique
tryCatch(
dct_check_taxon_id(tax_dat, on_fail = "error", on_success = "logical"),
error = function(x) {
stop(
paste(
"tax_dat must include column taxonID, which must be a character or",
"numeric vector with unique, non-missing values"
),
call. = FALSE
)
}
)
tax_dat[!tax_dat$taxonID %in% taxon_id, ]
}

#' Drop rows by scientificName
#'
#' Helper function for dct_drop_row
#'
#' @inheritParams dct_drop_row
#'
#' @return Dataframe
#' @noRd
#' @autoglobal
drop_row_by_sci_name <- function(tax_dat, sci_name) {
# - scientificName of tax_dat must be non-missing and unique
tryCatch(
dct_check_sci_name(tax_dat, on_fail = "error", on_success = "logical"),
error = function(x) {
stop(
paste(
"tax_dat must include column scientificName, which must be a",
"character vector with unique, non-missing values"
),
call. = FALSE
)
}
)
tax_dat[!tax_dat$scientificName %in% sci_name, ]
}

#' Drop row(s) of a taxonomic database
#'
#' Drop one or more rows from a taxonomic database in Darwin Core (DwC) format
#' by taxonID or scientificName.
#'
#' Only works if values of taxonID or scientificName are unique and non-missing
#' in the taxonomic database (tax_dat).
#'
#' Either taxonID or scientificName should be provided, but not both.
#'
#' @param tax_dat Dataframe; taxonomic database in DwC format.
#' @param taxonID Character or numeric vector; taxonID of the row(s)
#' to be dropped.
#' @param scientificName Character vector; scientificName of the row(s)
#' to be dropped.
#'
#' @return Dataframe; taxonomic database in DwC format
#' @autoglobal
#' @export
#' @example inst/examples/dct_drop_row.R
dct_drop_row <- function(tax_dat,
taxonID = NULL, # nolint
scientificName = NULL # nolint
) {
# Check input
# - tax_dat must be dataframe
assertthat::assert_that(
inherits(tax_dat, "data.frame"),
msg = "tax_dat must be of class data.frame"
)
# - must provide taxonID or scientificName
assertthat::assert_that(
sum(is.null(taxonID), is.null(scientificName)) == 1,
msg = "Either taxonID or scientificName must be provided, but not both"
)
# Drop rows
if (!is.null(taxonID)) {
res <- drop_row_by_taxon_id(tax_dat, taxonID)
}
if (!is.null(scientificName)) {
res <- drop_row_by_sci_name(tax_dat, scientificName)
}
return(res)
}
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ reference:
- title: Modify taxonomic names
- contents:
- dct_add_row
- dct_drop_row
- dct_modify_row
- dct_fill_col
- title: Validate taxonomic names
Expand Down
23 changes: 23 additions & 0 deletions inst/examples/dct_drop_row.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Can drop rows by scientificName or taxonID
dct_filmies |>
dct_drop_row(scientificName = "Cephalomanes atrovirens Presl")

dct_filmies |>
dct_drop_row(taxonID = "54133783")

# Can drop multiple rows at once by providing multiple values for
# scientificName or taxonID
dct_filmies |>
dct_drop_row(
scientificName = c(
"Cephalomanes atrovirens Presl",
"Trichomanes crassum Copel."
)
)

dct_filmies |>
dct_drop_row(
taxonID = c(
"54133783", "54133783"
)
)
55 changes: 55 additions & 0 deletions man/dct_drop_row.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

103 changes: 103 additions & 0 deletions tests/testthat/test-dct_drop_row.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
test_dat <- data.frame(
taxonID = 1:5,
scientificName = letters[1:5]
)

test_that("Drop by taxonID works", {
expect_equal(
dct_drop_row(test_dat, taxonID = c(1, 2)),
data.frame(
taxonID = 1:5,
scientificName = letters[1:5]
)[3:5, ]
)
expect_equal(
drop_row_by_taxon_id(test_dat, taxon_id = c(1, 2)),
data.frame(
taxonID = 1:5,
scientificName = letters[1:5]
)[3:5, ]
)
})

test_that("Drop by scientificName works", {
expect_equal(
dct_drop_row(test_dat, scientificName = c("a", "b")),
data.frame(
taxonID = 1:5,
scientificName = letters[1:5]
)[3:5, ]
)
expect_equal(
drop_row_by_sci_name(test_dat, sci_name = c("a", "b")),
data.frame(
taxonID = 1:5,
scientificName = letters[1:5]
)[3:5, ]
)
})

test_that("Checks work", {
expect_error(
dct_drop_row(test_dat, scientificName = "a", taxonID = 1),
"Either taxonID or scientificName must be provided, but not both"
)
expect_error(
dct_drop_row(test_dat),
"Either taxonID or scientificName must be provided, but not both"
)
expect_error(
dct_drop_row(data.frame(taxonID = "a"), scientificName = "b"),
paste(
"tax_dat must include column scientificName,",
"which must be a character vector with unique, non-missing values"
)
)
expect_error(
dct_drop_row(
data.frame(taxonID = 1:3, scientificName = c("a", "b", "b")),
scientificName = "b"
),
paste(
"tax_dat must include column scientificName,",
"which must be a character vector with unique, non-missing values"
)
)
expect_error(
dct_drop_row(
data.frame(taxonID = 1:3, scientificName = c("a", "b", NA)),
scientificName = "b"
),
paste(
"tax_dat must include column scientificName,",
"which must be a character vector with unique, non-missing values"
)
)
expect_error(
dct_drop_row(data.frame(scientificName = "a"), taxonID = "b"),
paste(
"tax_dat must include column taxonID, which must be a character or",
"numeric vector with unique, non-missing values"
)
)
expect_error(
dct_drop_row(
data.frame(taxonID = c("a", "b", "b"), scientificName = c("a", "b", "c")),
taxonID = "b"
),
paste(
"tax_dat must include column taxonID, which must be a character or",
"numeric vector with unique, non-missing values"
)
)
expect_error(
dct_drop_row(
data.frame(taxonID = c("a", "b", NA), scientificName = c("a", "b", "c")),
taxonID = "b"
),
paste(
"tax_dat must include column taxonID, which must be a character or",
"numeric vector with unique, non-missing values"
)
)
})
18 changes: 18 additions & 0 deletions vignettes/editing.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,24 @@ filmies_small |>

Note that in this case the `taxonID` already existed in the data to add, so it is not generated automatically.

## Deleting rows

`dct_drop_row()` drops one or more rows by `taxonID` or `scientificName`.

For example, we can exclude the row for *Cephalomanes atrovirens* Presl by either using its `scientificName` (`Cephalomanes atrovirens Presl`) or its `taxonID` (`54115096`):

```{r drop-row}
filmies_small |>
dct_drop_row(scientificName = "Cephalomanes atrovirens Presl")
filmies_small |>
dct_drop_row(taxonID = "54115096")
```

Since it looks up values by `taxonID` or `scientificName`, `dct_drop_row()` requires these to be unique and non-missing in the taxonomic database.

Of course, since the taxonomic database is a dataframe, you could also use other subsetting techniques like brackets in base R or `dplyr::filter()` from the tidyverse to delete rows.

## Modifying rows

### Identifying rows to modify
Expand Down

0 comments on commit 54ba041

Please sign in to comment.