Skip to content

Commit

Permalink
feat: added the ability to group parameter frames by a taxa ID column
Browse files Browse the repository at this point in the history
  • Loading branch information
brycefrank committed Jan 15, 2024
1 parent 0445628 commit c3f4121
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 8 deletions.
46 changes: 39 additions & 7 deletions R/load.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,31 @@ my_function <- function(family, genus, species) {
)
}

combine_taxa <- function(data, key) {
taxon_list <- list()

for(i in 1:nrow(data)) {
data_i <- data[i,]
taxon_list[[i]] <- Taxon(
family = data_i$family, genus = data_i$genus, species = data_i$species
)
}

taxa <- do.call(Taxa, taxon_list)
distinct_cols <- colnames(data)[!colnames(data) %in% c("family", "genus", "species")]

distinct_data <- data %>%
dplyr::distinct_at(.vars = distinct_cols)

if(nrow(distinct_data) != 1) {
browser()
stop("Could not generate a distinct taxonomic row for taxa ID:", key$taxa_id)
}

distinct_data$taxa <- list(taxa)
distinct_data
}

#' Aggregate family, genus, and species columns of `tbl_df`` into taxa data
#' structure
#'
Expand All @@ -62,20 +87,27 @@ my_function <- function(family, genus, species) {
#' "taxons". A taxon is a list containing family, genus, and species fields.
#'
#' @param table The table for which the taxa will be aggregated
#' @param remove_taxa_cols Whether or not to remove the family, genus, and
#' species columns after aggregation
#' @param grouping_col An optional column to group on when creating taxa. Rows
#' with the same grouping_col value will be stored into the same taxa.
#' @return A tibble with family, genus, and species columns added
aggregate_taxa <- function(table, remove_taxa_cols = TRUE) {
aggregate_taxa <- function(table, grouping_col = NULL)
{
default_taxon_fields <- c("family", "genus", "species")
taxon_fields <- colnames(table)[colnames(table) %in% default_taxon_fields]
missing_taxon_fields <- default_taxon_fields[!default_taxon_fields %in% taxon_fields]

if(is.null(grouping_col)) {
taxa_fill <- 1:nrow(table)
} else {
taxa_fill <- tibble::deframe(table[,grouping_col])
}

table %>%
dplyr::mutate(!!!stats::setNames(rep(list(NA), length(missing_taxon_fields)), missing_taxon_fields)) %>%
dplyr::rowwise() %>%
dplyr::mutate(taxa = list(Taxa(Taxon(family = .data$family, genus = .data$genus, species = .data$species)))) %>%
dplyr::ungroup() %>%
dplyr::select(-dplyr::all_of(default_taxon_fields))
dplyr::mutate(taxa_id = taxa_fill) %>%
dplyr::group_by(taxa_id) %>%
dplyr::group_map(combine_taxa) %>%
dplyr::bind_rows()
}

#' Load a locally installed table of allometric models
Expand Down
2 changes: 1 addition & 1 deletion inst/variable_defs/t.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
measure,component,modifier,description
t,l,t,mean annual temperature
t,l,t,mean annual temperature
13 changes: 13 additions & 0 deletions tests/testthat/test-taxa.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,19 @@ test_that("aggregate_taxa creates correct Taxa and Taxon grouping", {
expect_s4_class(agg_table_12$taxa[[1]][[1]], "Taxon")
})

test_that("aggregate_taxa groups by a taxa ID column", {
test_table_1 <- tibble::tibble(
family = c("Pinaceae", "Pinaceae", "Betulaceae"),
genus = c("Pinus", "Pinus", "Alnus"),
species = c("ponderosa", "jeffreyii", "rubra"),
taxa_id = c(1, 1, 2),
other_values = c("a", "a", "b")
)

aggregated <- aggregate_taxa(test_table_1, grouping_col = "taxa_id")
expect_equal(nrow(aggregated), 2)
})

test_that("Non-unique Taxon objects throw error", {
expect_error({
Taxa(Taxon(family = "a"), Taxon(family = "a"))
Expand Down

0 comments on commit c3f4121

Please sign in to comment.