Skip to content

Commit

Permalink
Merge pull request #29 from ecohealthalliance/feature/uid_in_spreadsh…
Browse files Browse the repository at this point in the history
…eets

Feature/uid in spreadsheets
  • Loading branch information
collinschwantes committed Jul 10, 2024
2 parents cff3dc4 + 01656ed commit 91c2e62
Show file tree
Hide file tree
Showing 7 changed files with 129 additions and 16 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
.Ruserdata
inst/doc
docs
auth
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: ohcleandat
Type: Package
Title: One Health Data Cleaning and Quality Checking Package
Version: 0.2.3
Version: 0.2.4
Authors@R: c(
person("Collin", "Schwantes", email = "schwantes@ecohealthalliance.org", role = c("cre", "aut"), comment = c(ORCID = "0000-0003-4014-4896")),
person("Johana", "Teigen", email = "teigen@ecohealthalliance.org", role = "aut", comment = c(ORCID = "0000-0002-6209-2321")),
Expand Down
55 changes: 52 additions & 3 deletions R/read_excel_all_sheets.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,60 @@
#' For a given excel file, this will detect all sheets, and iteratively read
#' all sheets and place them in a list.
#'
#' @param file character File path to an excel file
#' If primary keys are added, the primary key is the triplet of the file,
#' sheet name, and row number e.g. "file_xlsx_sheet1_1". Row numbering is based
#' on the data ingested into R. R automatically skips empty rows at the beginning
#' of the spreadsheet so id 1 in the primary key will belong to the first row
#' with data.
#'
#' @note The primary key method is possible because Excel forces sheet names
#' to be unique.
#'
#' @param add_primary_key_field Logical. Should a primary key field be added?
#' @param primary_key character. The column name for the unique identifier to be added to the data.
#' @param file character. File path to an excel file
#'
#' @return list
#' @export
read_excel_all_sheets <- function(file){
#'
#' @examples
#' \dontrun{
#' # Adding primary key field
#' read_excel_all_sheet(file = "test_pk.xlsx",add_primary_key_field = TRUE)
#'
#' # Don't add primary key field
#' read_excel_all_sheet(file = "test_pk.xlsx")
#'
#' }
#'
read_excel_all_sheets <- function(file, add_primary_key_field = FALSE, primary_key = "primary_key"){
sheets <- readxl::excel_sheets(file)
purrr::map(sheets, ~readxl::read_excel(file, sheet = .x))

if(!add_primary_key_field){
out <- purrr::map(sheets, ~readxl::read_excel(file, sheet = .x))
return(out)
}

if(add_primary_key_field){
purrr::map2(sheets,file,function(sheet,file){
df <- readxl::read_excel(file, sheet = sheet)

file_name <- gsub("\\.","_",basename(file))
row_ids <- paste(file_name,sheet,1:nrow(df),sep = "_")

if(primary_key%in%names(df)){

msg <- sprintf("primary_key - %s - is already a column in the dataframe.
\nPlease choose a column name that isn't present in the data.",primary_key)
rlang::abort(msg)
}

out <- df %>%
dplyr::mutate({{primary_key}} := {{row_ids}})

return(out)
})
}

}

39 changes: 31 additions & 8 deletions R/read_googlesheets.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,36 +6,51 @@
#' @param sheet Sheet to read, in the sense of "worksheet" or "tab".
#' @param ss Something that identifies a Google Sheet such as drive id or URL
#' @param ... other arguments passed to `googlesheets4::range_read()`
#' @param add_primary_key_field Logical. Should a primary key field be added?
#' @param primary_key character. The column name for the unique identifier to be added to the data.
#'
#' @return tibble
#' @export
#' @seealso [googlesheets4::range_read()]
#' @examples
#' \dontrun{
#' read_googlesheets(ss = kzn_animal_ship_sheets, sheet = "all")
#' read_googlesheets(ss = kzn_animal_ship_sheets, sheet = "all",)
#' }
#'
read_googlesheets <-
function(key_path,
sheet = "all",
ss,
add_primary_key_field = FALSE,
primary_key = "primary_key",
...) {
# Handle authentication
googledrive::drive_auth(path = key_path)
googlesheets4::gs4_auth(path = key_path)

# decide of all sheets are read and combined, or just one.
# decide if all sheets are read and combined, or just one.
if (sheet == "all") {
sheet_names <- googlesheets4::sheet_names(ss = ss)
sheet_names <- rlang::set_names(sheet_names, sheet_names)
dat <-
purrr::map(sheet_names,
\(x) googlesheets4::range_read(
ss = ss,
sheet = x,
na = c("", "NA", "NULL", "-"),
...
))
function(x){
df <- googlesheets4::range_read(
ss = ss,
sheet = x,
na = c("", "NA", "NULL", "-"),
...
)

if(add_primary_key_field){
#ss_sheet_rownum
drive_id <- googledrive::as_id(ss)
row_ids <- paste(drive_id,x,1:nrow(df),sep = "_")
df <- df %>%
dplyr::mutate({{primary_key}} := {{row_ids}})
}
return(df)
} )

} else{
dat <-
Expand All @@ -46,6 +61,14 @@ read_googlesheets <-
...
)

if(add_primary_key_field){
#ss_sheet_rownum
drive_id <- googledrive::as_id(ss)
row_ids <- paste(drive_id,sheet,1:nrow(dat),sep = "_")
dat <- dat %>%
dplyr::mutate({{primary_key}} := {{row_ids}})
}

}

return(dat)
Expand Down
Binary file added inst/test_pk.xlsx
Binary file not shown.
33 changes: 31 additions & 2 deletions man/read_excel_all_sheets.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 13 additions & 2 deletions man/read_googlesheets.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 91c2e62

Please sign in to comment.