-
Notifications
You must be signed in to change notification settings - Fork 2
/
atlasDownloader.R
130 lines (122 loc) · 6.04 KB
/
atlasDownloader.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# Additional inputs were made by mjwestgate to allow continued support with galah V2
##### 1. atlasDownloader ####
#' Download occurrence data from the Atlas of Living Australia (ALA)
#'
#'
#' Downloads ALA data and creates a new file in the path to put those data. This function can also
#' request downloads from other atlases (see: http://galah.ala.org.au/articles/choosing_an_atlas.html).
#' However, it will only send the download to your email and you must do the rest yourself at this point.
#'
#' @param path A character directory. The path to a folder where the download will be stored.
#' @param userEmail A character string. The email used associated with the user's ALA account;
#' user must make an ALA account to download data.
#' @param ALA_taxon A character string. The taxon to download from ALA. Uses [galah::galah_identify()]
#' @param DL_reason Numeric. The reason for data download according to [galah::galah_config()]
#' @param atlas Character. The atlas to download occurrence data from - see here https://galah.ala.org.au/R/articles/choosing_an_atlas.html for details.
#' Note: the default is "ALA" and is probably the only atlas which will work seamlessly with the rest
#' of the workflow. However, different atlases can still be downloaded and a doi will be sent to
#' your email.
#'
#' @return Completes an ALA data download and saves those data to the path provided.
#'
#' @importFrom dplyr %>%
#' @importFrom utils unzip
#' @export
#'
#' @examples
#' \dontrun{
#' atlasDownloader(path = DataPath,
#' userEmail = "InsertYourEmail",
#' ALA_taxon = "Apiformes",
#' DL_reason = 4)
#' }
atlasDownloader <- function(path, userEmail = NULL, ALA_taxon, DL_reason = 4, atlas = "ALA"){
# locally bind variables to the function
. <- file_name <- NULL
#### Intro checks ####
writeLines(paste("1.","\n",
" - Note: galah has a 50 million record download limit.", "\n",
"You may call atlas_counts() to check.", "\n",
" - Additionally, you must register your email with your ", atlas, " otherwise you will get an ",
"error message.", "\n",
"See here - https://www.ala.org.au - or your relevant atlas","\n",
" - Valid donwload reasons include can be found by running show_all_reasons()",
sep = ""))
# Check for a userEmail input present and halt if FALSE
if(exists("userEmail") == FALSE){
stop("You must provide a userEmail for the ",atlas," download.")
}
# Check for a userEmail format and halt if FALSE
if(grepl( ".[^@]+@{1}.+\\..+", userEmail) == FALSE){
stop("The email you entered might be incorrect, please double-check the format.")
}
requireNamespace("galah")
# Define ColsToKeep
ColsToKeep <- BeeBDC::ColTypeR()[[1]] %>% names()
# Create a new working directory for ALA data in the path provided
dir.create(paste0(path, "/", atlas, "_galah_path", sep = ""), showWarnings = FALSE)
atlas_galah_path <- paste0(path, "/", atlas, "_galah_path")
# Set up the ALA download configuration
writeLines(" - Setting galah configuration.")
galah::galah_config(directory = atlas_galah_path,
download_reason_id = DL_reason,
verbose=TRUE,
email = userEmail,
send_email = TRUE,
atlas = atlas)
#### ALA download ####
# Choose ALA columns to download
# Thankfully, ALA has a fantastic r package, galah, that allows easy download of occurrence data.
# Thank you, ALA <3
# DOWNLOAD ALA data here
# Apiformes is an informal name that is helpful to select the bee families out of the superfamily Apoidea.
writeLines(paste("2.","\n",
" - Beginning atlas download via galah.", "\n",
"A progress bar of your download should appear shortly. You will also receive an email ",
"when your download is complete.", sep = ""))
# Use of `Sys.Date()` comes with the risk that consecutive downloads on the same day will
# overwrite each other, even if they are for different queries
# Note: `file_name` given above is chosen for consistency with previous version of BeeBDC
file_name <- paste0("galah_download_", Sys.Date(), ".zip")
ALA_Occurence_download <- galah::galah_call() %>%
galah::galah_identify(ALA_taxon) %>%
galah::galah_select(tidyselect::any_of(ColsToKeep)) %>%
galah::atlas_occurrences(mint_doi = FALSE, file = file_name)
# get download attributes from file and make it into a dataframe
attrs_ALA_Occurence_download <- attributes(ALA_Occurence_download)
writeLines(paste("3.","\n"," - atlas download is complete.", "\n",
"The script will now unzip all of the data and metadata to ",
atlas_galah_path, ". This may take a short while.",
sep = ""))
# unzip the file
unzip(
# File to unzip
zipfile = paste(atlas_galah_path,
"/galah_download_", Sys.Date(), ".zip",
sep = ""),
# Where to put the extracted file
exdir = paste(atlas_galah_path,
"/galah_download_folder",
sep = ""),
overwrite = TRUE)
browser()
#### Save data ####
# Save some download information
dplyr::tibble(
downloaders_email = userEmail,
taxon = ALA_taxon,
doi = attr(ALA_Occurence_download, "doi"),
search_url = attr(ALA_Occurence_download, "search_url"),
# data_type = attrs_ALA_Occurence_download$data_type, # not supported post galah v.2
# data_request = paste(dplyr::lst(attrs_ALA_Occurence_download$data_request)), # not supported post galah v.2
ALA_download_reason = DL_reason,
download_date = Sys.Date()) %>%
write_excel_csv(file = paste(atlas_galah_path,
"/galah_download_folder/",
"galah_DL_info.csv",
sep = ""))
# Write user instructions
writeLines(paste("4.","\n"," - Fin.",
sep = ""))
}
##### Current end ALA ####