/
parseFilename.R
184 lines (179 loc) · 5.78 KB
/
parseFilename.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#' Parse a filename
#'
#' Attempts to extract meaningful information from a filename, typically the
#' date and time a recording started.
#'
#' @details
#' ## Determining the format
#' It is sometimes impossible to accurately determine the format of
#' a filename, e.g. when an eight-digit 'AudioMoth HEX' only contains numbers it
#' could be confused with a YYYYMMDD format. If a list of filenames is given
#' and the "match" format is specified then an effort will be made to determine
#' the most likely format that applies to all filenames.
#'
#' ## Supported formats
#' * **AudioMoth** - The newer format for AudioMoth devices consists of a
#' standard YYYYMMDD_HHMMSS.wav format. Specifying 'AudioMoth' forces a call
#' to the `audiomoth()` function from the `seewave` package
#' \insertCite{seewave2008}{sonicscrewdriver}.
#' * **AudioMoth HEX** - Older format for AudioMoth devices consisting of eight
#' hexadecimal characters. Conversion is handled by a call to
#' `seewave::audiomoth()`.
#' * **timestamp** - A standard date-time format. Uses the R standard origin of
#' 1970-01-01 00:00:00 UTC.
#' * **Wildlife Acoustics SM2** - Can also be used for Wildlife Acoustics SM4
#' devices. Conversion is handled by a call to `seewave::songmeter()`.
#' * **Wildlife Acoustics SM3** - Conversion is handled by a call to
#' `seewave::songmeter()`.
#' * **YYYYMMDD_HHMMSS** - A standard date-time format.
#'
#' @param file A filename (or list of filenames).
#' @param format Optionally force a given format (see Details). If NULL (default)
#' an attempt is made to automatically detect the format for each file. If "match"
#' and a list of filenames is given then an attempt will be made to find a format
#' that matches all files. This may give incorrect results if the filename is
#' ambiguous (see Details).
#' @param timezone Optionally set a timezone.
#' @return A list of file, type of match, datetime.
#' \cr\cr
#' It is possible to determine additional properties from some files, these will
#' be added to the list.
#' @references
#' \insertAllCited{}
#' @export
#' @examples
#' parseFilename("5E90A4D4.wav")
#'
parseFilename <- function(file, format=NULL, timezone=NULL) {
if (is(file, "list")) {
if (!is.null(format)) {
if (format == "match") {
formats <- lapply(file, .detectFormat)
if (all(formats == formats[[1]])) {
format <- formats[[1]]
return(lapply(file, parseFilename, format=format, timezone=timezone))
} else {
formats <- lapply(file, .detectFormat, alternative=1)
if (all(formats == formats[[1]])) {
format <- formats[[1]]
return(lapply(file, parseFilename, format=format, timezone=timezone))
}
}
}
} else {
return(lapply(file, parseFilename, format=format, timezone=timezone))
}
}
if (is.null(format)) {
format <- .detectFormat(file)
if (is.null(format)) {
stop("Could not determine format of ", file)
}
}
if (!format %in% .knownFileFormats()) {
stop(paste("Unknown format:", format))
}
if (format %in% c("AudioMoth HEX", "AudioMoth")) {
if (is.null(timezone)) {
tz <- ""
} else {
tz <- timezone
}
data <- seewave::audiomoth(file, tz=tz)
if (attr(data[1,"time"], "tzone") == "") {
attr(data[,"time"], "tzone") <- tz
}
return(list(
filename = file,
match=format,
datetime = data[,"time"]
))
}
if (format %in% c("Wildlife Acoustics SM2", "Wildlife Acoustics SM3")) {
data <- seewave::songmeter(file)
ret <- (list(
filename = file,
match=format,
datetime = as.POSIXct(data[,"time"], tz=timezone),
model = data[,"model"],
prefix = data[,"prefix"],
mic = data[,"mic"],
geo = data[,"geo"]
))
return(ret)
}
if (format == "YYYYMMDD") {
if (is.null(timezone)) {
timezone <- "UTC"
}
datetime <- as.POSIXct(strptime(tools::file_path_sans_ext(basename(file)), "%Y%m%d"), tz=timezone)
}
if (format == "YYYYMMDD_HHMMSS") {
if (is.null(timezone)) {
timezone <- "UTC"
}
datetime <- as.POSIXct(strptime(tools::file_path_sans_ext(basename(file)), "%Y%m%d_%H%M%S"), tz=timezone)
}
if (format == "timestamp") {
datetime <- as.POSIXct(as.numeric(tools::file_path_sans_ext(basename(file))), origin=as.POSIXct("1970-01-01"))
}
return(list(
filename = file,
match=format,
datetime = datetime
))
}
.knownFileFormats <- function() {
return(c(
"AudioMoth HEX",
"AudioMoth",
"timestamp",
"Wildlife Acoustics SM2",
"Wildlife Acoustics SM3",
"YYYYMMDD",
"YYYYMMDD_HHMMSS"
))
}
.detectFormat <- function(file, alternative=0) {
bn <- tools::file_path_sans_ext(basename(file))
# Check for timestamp
if (grepl("^[0-9]{10}$", bn)) {
return("timestamp")
}
# Check for YYYMMDD
if (grepl("^[0-9]{8}$", bn)) {
if (alternative == 1) {
return("AudioMoth HEX")
}
return("YYYYMMDD")
}
# Check for AudioMoth old hexadecimal
if (grepl("^[0-9A-Fa-f]{8}$", bn)) {
format <- "AudioMoth HEX"
if (file_ext(file) != "wav") {
attr(format, "extension_match") <- FALSE
}
return(format)
}
# Check for YYYYMMDD_HHMMSS
if (grepl("^[0-9]{8}_[0-9]{6}$", bn)) {
return("YYYYMMDD_HHMMSS")
}
# Check for Wildlife Acoustics format PREFIX_YYYYMMDD_HHMMSS
if (grepl("^[^_]*_[0-9]{8}_[0-9]{6}$", bn)) {
format <- "Wildlife Acoustics SM2"
if (!file_ext(file) %in% c("wav", "wac")) {
attr(format, "extension_match") <- FALSE
}
return(format)
}
# Check for Wildlife Acoustics SM3 format
if (grepl("^[^_]*_[01+-]{3}_[0-9]{8}[_$][0-9]{6}$", bn)) {
format <- "Wildlife Acoustics SM3"
if (!file_ext(file) %in% c("wav", "wac")) {
attr(format, "extension_match") <- FALSE
}
return(format)
}
return(NULL)
}