/
dataDWD.R
238 lines (235 loc) · 10.7 KB
/
dataDWD.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# DWD Daten mit R runterladen, Wetter und Klimadaten in R
# Deutscher Wetterdienst R Daten download Klimastationen
# Weather Data Germany download with R, Climate Data Germany
#
#' @title Download data from the DWD CDC FTP Server
#' @description Get climate data from the German Weather Service (DWD) FTP-server.
#' The desired dataset is downloaded into `dir`.
#' If `read=TRUE`, it is also read and processed.\cr
#' `dataDWD` handles vectors of URLs,
#' displays progress bars (if the package `pbapply` is available)
#' and by default does not re-download data already in `dir`
#' (but see argument `force` to update files).\cr
#' To solve "errors in download.file: cannot open URL", see
#' <https://bookdown.org/brry/rdwd/fileindex.html>.\cr
#'
#' @return Presuming downloading and processing were successful:
#' if `read=TRUE`, the desired dataset
#' (as returned by [readDWD()]),
#' otherwise the filename as saved on disc
#' (may have "_n" appended in name, see [newFilename()]).\cr
#' If length(file)>1, the output is a list of outputs / vector of filenames.\cr
#' The output is always invisible.
#' @author Berry Boessenkool, \email{berry-b@@gmx.de}, Jun-Oct 2016
#' @seealso [selectDWD()]. [readDWD()], [download.file()].\cr
#' <https://bookdown.org/brry/rdwd>\cr
#' Helpful for plotting: [berryFunctions::monthAxis()],
#' see also [berryFunctions::climateGraph()]
#' @keywords data file
#' @importFrom utils tail download.file browseURL
#' @importFrom berryFunctions newFilename owa tmessage twarning tstop truncMessage
#' @importFrom pbapply pblapply
#' @importFrom stats runif
#' @export
#' @examples
#' \dontrun{ ## requires internet connection
#' # find FTP files for a given station name and file path:
#' link <- selectDWD("Fuerstenzell", res="hourly", var="wind", per="recent")
#' # download file:
#' fname <- dataDWD(link, dir=locdir(), read=FALSE) ; fname
#' # dir="DWDdata" is the default directory to store files
#' # unless force=TRUE, already obtained files will not be downloaded again
#'
#' # read and plot file:
#' wind <- readDWD(fname, varnames=TRUE) ; head(wind)
#' metafiles <- readMeta(fname) ; str(metafiles, max.level=1)
#' column_names <- readVars(fname) ; head(column_names)
#'
#' plot(wind$MESS_DATUM, wind$F, main="DWD hourly wind Fuerstenzell", col="blue",
#' xaxt="n", las=1, type="l", xlab="Date", ylab="Hourly Wind speed [m/s]")
#' berryFunctions::monthAxis(1)
#'
#'
#' # current and historical files, keep historical in the overlap time period:
#' link <- selectDWD("Potsdam", res="daily", var="kl", per="hr"); link
#' potsdam <- dataDWD(link, dir=locdir(), hr=4)
#' plot(TMK~MESS_DATUM, data=tail(potsdam,1500), type="l")
#'
#'
#' # With many files (>>50), use sleep to avoid getting kicked off the FTP server
#' #links <- selectDWD(res="daily", var="solar")
#' #sol <- dataDWD(links, sleep=20) # random waiting time after download (0 to 20 secs)
#'
#' # Real life examples can be found in the use cases section of the vignette:
#' # browseURL("https://bookdown.org/brry/rdwd")
#' }
#'
#' @param url Char (vector): complete file URL(s) (including base and filename.zip)
#' as returned by [selectDWD()]. Can be a vector with several FTP URLs.
#' @param base Single char: base URL that will be removed from output file names.
#' DEFAULT: [`dwdbase`]
#' @param joinbf Logical: paste `base` and `file url` together?
#' Needed mostly for data at [`gridbase`].
#' DEFAULT: FALSE (selectDWD returns complete URLs already)
#' @param dir Char: Writeable directory name where to save the downloaded file.
#' Created if not existent. DEFAULT: [locdir()]
#' @param force Logical (vector): always download, even if the file already exists in `dir`?
#' Use NA to force re-downloading files older than 24 hours.
#' Use a numerical value to force after that amount of hours.
#' Use something like `c(Inf, 24)` or `force=c(24*365, 6)`, for per="hr".
#' Note: if `force` is not FALSE, the `overwrite` default is TRUE.
#' DEFAULT: FALSE
#' @param overwrite Logical (vector): if force=TRUE, overwrite the existing file
#' rather than append "_1"/"_2" etc to the filename?
#' DEFAULT: `!isFALSE(force)`, i.e. true when `force` is specified.
#' @param read Logical: read the file(s) with [readDWD()]? If FALSE,
#' only download is performed and the filename(s) returned. DEFAULT: TRUE
#' @param dbin Logical: Download binary file, i.e. add `mode="wb"` to the
#' [download.file()] call?
#' See [Website](https://bookdown.org/brry/rdwd/raster-data.html#binary-file-errors)
#' for details.
#' DEFAULT: TRUE
#' @param method [download.file] `method`. Introduced in version 1.5.25 (2022-05-12)
#' as triggered by <https://github.com/brry/rdwd/issues/34>.
#' DEFAULT: `getOption("download.file.method")`
#' @param dfargs Named list of additional arguments passed to [download.file()]
#' Note that mode="wb" is already passed if `dbin=TRUE`
#' @param sleep Number. If not 0, a random number of seconds between 0 and
#' `sleep` is passed to [Sys.sleep()] after each download
#' to avoid getting kicked off the FTP-Server,
#' see note in [indexFTP()]. DEFAULT: 0
#' @param progbar Logical: present a progress bar with estimated remaining time?
#' If missing and length(file)==1, progbar is internally set to FALSE.
#' Only works if the R package `pbapply` is available. DEFAULT: TRUE (!quiet)
#' @param browse Logical: open repository via [browseURL()] and
#' return URL folder path? If TRUE, no data is downloaded.
#' If file has several values, only unique folders will be opened.
#' DEFAULT: FALSE
#' @param ntrunc Single integer: number of filenames printed in messages
#' before they get truncated with message "(and xx more)". DEFAULT: 2
#' @param file Deprecated since rdwd version 1.3.34, 2020-07-28.
#' @param quiet Logical: suppress message about directory / filenames?
#' DEFAULT: FALSE through [rdwdquiet()]
#' @param \dots Further arguments passed to [readDWD()],
#' like `fread`, `varnames`, `hr`, etc.
#
dataDWD <- function(
url,
base=dwdbase,
joinbf=FALSE,
dir=locdir(),
force=FALSE,
overwrite=!isFALSE(force),
read=TRUE,
dbin=TRUE,
method=getOption("download.file.method"),
dfargs=NULL,
sleep=0,
progbar=!quiet,
browse=FALSE,
ntrunc=2,
file=NULL,
quiet=rdwdquiet(),
...
)
{
if(!is.null(file)) tstop("The argument 'file' has been renamed to 'url' with rdwd version 1.3.34, 2020-07-28")
if(!is.atomic(url)) tstop("url must be a vector, not a ", class(url))
if(!is.character(url)) tstop("url must be char, not ", class(url))
base <- sub("/$","",base) # remove accidental trailing slash
url <- sub("^/","",url) # remove accidental leading slash
if(joinbf) url <- paste0(base,"/",url)
if(missing(progbar) & length(url)==1) progbar <- FALSE
if(any(url==""))
{
tmessage("Removing ", sum(url==""), " empty element(s) from url vector.")
url <- url[url!=""]
}
if(length(url)<1) tstop("The vector of urls to be downloaded is empty.")
# be safe from accidental vector input:
dir <- dir[1]
progbar <- progbar[1]
sleep <- sleep[1]
quiet <- quiet[1]
read <- read[1]
browse <- browse[1]
#
# open URL path(s) in internet browser:
if(browse)
{
folders <- unique(dirname(url))
sapply(folders, browseURL)
return(folders)
}
# create directory to store downloaded data
owd <- dirDWD(dir, quiet=quiet)
on.exit(setwd(owd))
# output file name(s)
hbas <- sub("^ftp://","https://", base) # for https base
outfile <- gsub(paste0(base,"/"), "", url)
outfile <- gsub(paste0(hbas,"/"), "", outfile)
outfile <- gsub("/", "_", outfile)
# force=NA management
if(is.null(force)) tstop("'force' cannot be NULL. Must be TRUE, FALSE, NA or a number.")
force <- rep(force, length=length(outfile)) # recycle vector
fT <- sapply(force, isTRUE)
fF <- sapply(force, isFALSE)
if(any(fT)) force[fT] <- 0
if(any(fF)) force[fF] <- Inf
force[is.na(force)] <- 24
force <- difftime(Sys.time(), file.mtime(outfile), units="h") > force
dontdownload <- file.exists(outfile) & !force
if( any(dontdownload) & !quiet )
{
tmessage(sum(dontdownload), " file", if(sum(dontdownload)>1)"s",
" already existing and not downloaded again: ",
berryFunctions::truncMessage(outfile[dontdownload], ntrunc=ntrunc, prefix=""),
"\nNow downloading ",sum(!dontdownload)," files...")
}
outfile <- newFilename(outfile, quiet=quiet, ignore=dontdownload,
overwrite=overwrite, ntrunc=ntrunc, tellignore=FALSE)
# since berryFunctions 1.15.9 (2017-06-14), outfile is now an absolute path
# Optional progress bar:
if(progbar) lapply <- pbapply::pblapply
# ------------------------------------------------------------------------------
# loop over each filename
dl_results <- lapply(seq_along(url), function(i)
if(!dontdownload[i])
{
# Actual file download:
dfdefaults <- list(url=url[i], destfile=outfile[i], method=method, quiet=TRUE)
if(dbin) dfdefaults <- c(dfdefaults, mode="wb")
e <- try(suppressWarnings(do.call(download.file,
berryFunctions::owa(dfdefaults, dfargs))), silent=TRUE)
# wait some time to avoid FTP bot recognition:
if(sleep!=0) Sys.sleep(runif(n=1, min=0, max=sleep))
return(e)
})
# check for download errors:
iserror <- sapply(dl_results, inherits, "try-error")
if(any(iserror))
{
ne <- sum(iserror)
msg <- paste0(ne, " Download", if(ne>1) "s have" else " has",
" failed (out of ",length(iserror),").",
if(read)" Setting read=FALSE.")
read <- FALSE
msg <- paste0(msg, " download.file error",if(ne>1) "s",":\n")
msg2 <- sapply(dl_results[iserror], function(e)attr(e,"condition")$message)
msg2 <- berryFunctions::truncMessage(msg2, ntrunc=15, prefix="", midfix="", altnix="", sep="\n")
if(any(!substr(url[iserror], 1, 4) %in% c("ftp:","http")))
msg2 <- paste0(msg2, "\n- dataDWD needs urls starting with 'ftp://' or 'https://'. ",
"You can use joinbf=TRUE for relative links.")
if(grepl("cannot open URL", msg2) || grepl("Kann URL .* nicht", msg2))
msg2 <- paste0(msg2, "\n- If files have been renamed on the DWD server, ",
"see https://bookdown.org/brry/rdwd/fileindex.html")
msg <- paste0(msg, msg2)
warning(msg, call.=FALSE)
}
# ------------------------------------------------------------------------------
# Output: Read the file or outfile name:
output <- outfile
if(read) output <- readDWD(file=outfile, quiet=quiet, progbar=progbar, ...)
# output:
return(invisible(output))
}