-
Notifications
You must be signed in to change notification settings - Fork 14
/
__init__.py
423 lines (334 loc) · 12.4 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
from __future__ import print_function
from wq.io import BaseIO, TupleMapper, TimeSeriesMapper
from wq.io.parsers.base import BaseParser
from wq.io.exceptions import NoData
from wq.io.util import flattened
from suds.client import Client
from suds.sudsobject import asdict, Object as SudsObject
from climata.base import WebserviceLoader, FilterOpt, DateOpt
from climata.base import fill_date_range, as_list
url = 'https://wcc.sc.egov.usda.gov/awdbWebService/services?WSDL'
_server = None
def get_server():
global _server
if _server is None:
_server = Client(url).service
return _server
class SnotelIO(WebserviceLoader, BaseParser, TupleMapper, BaseIO):
"""
Base class for accessing SNOTEL AWDB SOAP web services.
"""
webservice_name = "awdbWebService"
data_function = None
# Override Default WebserviceLoader options
start_date = DateOpt(ignored=True)
end_date = DateOpt(ignored=True)
state = FilterOpt(ignored=True)
basin = FilterOpt(ignored=True)
county = FilterOpt(ignored=True)
station = FilterOpt(ignored=True)
parameter = FilterOpt(ignored=True)
def load(self):
if self.debug:
self.print_debug()
params = self.params
fn = getattr(get_server(), self.data_function)
self.data = fn(**params)
if len(self.data) == 0:
self.data = []
else:
self.data = as_list(self.data)
if isinstance(self.data[0], SudsObject):
parse = asdict
else:
parse = str
self.data = [parse(row) for row in self.data]
# Some records may have additional fields; loop through entire
# array to ensure all field names are accounted for. (Otherwise BaseIO
# will guess field names using only the first record.)
scan_fields = True
def print_debug(self):
print('%s.%s(%s)' % (
self.webservice_name,
self.data_function,
','.join(
'%s=%s' % (key, val)
for key, val in self.params.items()
)
))
class StationIO(SnotelIO):
"""
Retrieve metadata for all stations in a region. Leverages both
getStations() and getStationMetadata().
"""
data_function = 'getStations'
# Applicable WebserviceLoader default options
state = FilterOpt(url_param='stateCds', multi=True)
county = FilterOpt(url_param='countyNames', multi=True)
basin = FilterOpt(url_param='hucs', multi=True)
parameter = FilterOpt(url_param='elementCds', multi=True)
# Additional options
min_latitude = FilterOpt(url_param='minLatitude')
max_latitude = FilterOpt(url_param='maxLatitude')
min_elevation = FilterOpt(url_param='minElevation')
max_elevation = FilterOpt(url_param='maxElevation')
ordinals = FilterOpt(url_param='ordinals')
# This is not the same as station (stationTriplet)
station_ids = FilterOpt(url_param='stationIds', multi=True)
# heightDepths = FilterOpt(url_param='heightDepths')
# This parameter is submitted as
# <heightDepths><value>value</value><unitCd>unit</unitCd></heightDepths>
# Left out since it doesn't seem important and isn't well-documented
default_params = {
'logicalAnd': 'true',
}
def load(self):
super(StationIO, self).load()
self.data = [
StationMetaIO(station=station, debug=self.debug).data[0]
for station in self.data
]
class StationMetaIO(SnotelIO):
"""
Wrapper for getStationMetadata() - used internally by StationIO.
"""
data_function = 'getStationMetadata'
station = FilterOpt(required=True, url_param='stationTriplet')
class StationElementIO(SnotelIO):
"""
Wrapper for getStationElements(), incorporating element names from
getElements()
"""
data_function = 'getStationElements'
# Applicable WebserviceLoader default options
start_date = DateOpt(url_param='beginDate')
end_date = DateOpt(url_param='endDate')
station = FilterOpt(required=True, url_param='stationTriplet')
def load(self):
super(StationElementIO, self).load()
names = ElementIO.get_names()
for elem in self.data:
elem['element_name'] = names[elem['elementCd']]
class StationDataIO(StationElementIO):
"""
Base class for StationDailyDataIO and StationHourlyDataIO. Retrieves all
data for a station that matches the specified duration by calling the
specified inner_io_class.
"""
nested = True
# Applicable WebserviceLoader default options
start_date = DateOpt(url_param='beginDate', required=True)
end_date = DateOpt(url_param='endDate', required=True)
parameter = FilterOpt()
inner_io_class = None
duration = None
@property
def params(self):
params = super(StationDataIO, self).params
# Parameter filter (if any) is applied *after* the initial request
params.pop('parameter', None)
return params
def load(self):
super(StationDataIO, self).load()
data = []
for row in self.data:
# Only include records matching the specified duration
# and parameter
if row['duration'] != self.duration:
continue
elem = self.getvalue('parameter')
if elem and row['elementCd'] != elem:
continue
# getStationElements() sometimes returns parameters that don't
# actually have data for the requested timeframe - silently catch
# the exception and remove parameter from results.
try:
row['data'] = self.inner_io_class(
station=row['stationTriplet'],
parameter=row['elementCd'],
start_date=self.getvalue('start_date'),
end_date=self.getvalue('end_date'),
debug=self.debug,
)
except NoData:
continue
data.append(row)
self.data = data
class ElementIO(SnotelIO):
"""
List of all SNOTEL element names, codes and units.
"""
data_function = 'getElements'
@classmethod
def get_elements(cls):
"""
Store singleton instance on IO to speed up retrieval after first call.
"""
if not hasattr(cls, '_cache'):
cls._cache = cls()
return cls._cache
@classmethod
def get_names(cls):
return {
e.elementcd: e.name
for e in cls.get_elements()
}
class DailyDataIO(SnotelIO):
"""
Wrapper for getData(), used internally by StationDailyDataIO
"""
data_function = 'getData'
# Applicable WebserviceLoader default options
station = FilterOpt(required=True, url_param='stationTriplets')
parameter = FilterOpt(required=True, url_param='elementCd')
start_date = DateOpt(required=True, url_param='beginDate')
end_date = DateOpt(required=True, url_param='endDate')
# HeightDepth parameters don't seem to be necessary.
default_params = {
'ordinal': 1,
'duration': 'DAILY',
'getFlags': 'true',
'alwaysReturnDailyFeb29': 'false',
}
def parse(self):
data = self.data[0]
if not data or 'values' not in data:
raise NoData
bd = data['beginDate']
ed = data['endDate']
dates = fill_date_range(bd, ed, date_format='%Y-%m-%d %H:%M:%S')
vals = as_list(data['values'])
flags = as_list(data['flags'])
self.data = [{
'date': date,
'value': val,
'flag': flag
} for date, val, flag in zip(dates, vals, flags)]
class StationDailyDataIO(StationDataIO):
"""
Requests all daily data for the specified station, optionally filtered by
parameter. The outer IO is the list of available parameters/elements, with
each item in the list containing a nested IO with the actual data.
Usage:
params = StationDailyDataIO(
station='302:OR:SNTL',
start_date='2014-07-01',
end_date='2014-07-31'
)
for param in params:
print param.element_name
for row in param.data:
print " ", row.date, row.value, param.storedunitcd
"""
inner_io_class = DailyDataIO
duration = "DAILY"
class RegionDailyDataIO(StationIO):
"""
All-in-one IO for loading site metadata and daily data for a region (i.e. a
state, county, or basin). Internally calls:
- getStations()
- getStationMetadata()
- getStationElements()
- getData()
The outer IO is a list of sites in the region - derived from StationIO, but
with an extra "data" property on each station pointing to an inner time
series IO for each site. The inner IO is based on StationDailyDataIO but
flattened to avoid multiple levels of nesting. parameter is optional but
recommended (otherwise all available data for all sites will be returned).
Usage:
sites = RegionDailyDataIO(
basin='17060105',
start_date='2014-07-01',
end_date='2014-07-31',
parameter='TAVG',
)
for site in sites:
print site.name
for row in site.data:
print " ", row.date, row.value, row.storedunitcd
"""
nested = True
# Applicable WebserviceLoader default options
start_date = DateOpt(required=True)
end_date = DateOpt(required=True)
parameter = FilterOpt(url_param='elementCds')
@property
def params(self):
params = super(RegionDailyDataIO, self).params
# Start and end date are actually only used by inner io.
del params['start_date']
del params['end_date']
return params
def load(self):
super(RegionDailyDataIO, self).load()
for station in self.data:
station['data'] = flattened(
StationDailyDataIO,
station=station['stationTriplet'],
start_date=self.getvalue('start_date'),
end_date=self.getvalue('end_date'),
parameter=self.getvalue('parameter'),
debug=self.debug,
)
class HourlyDataIO(TimeSeriesMapper, SnotelIO):
"""
Wrapper for getHourlyData(), used internally by StationHourlyDataIO
"""
data_function = 'getHourlyData'
# TimeSeriesMapper configuration
date_formats = [
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%d %H:%M'
]
# Applicable WebserviceLoader default options
station = FilterOpt(required=True, url_param='stationTriplets')
parameter = FilterOpt(required=True, url_param='elementCd')
start_date = DateOpt(required=True, url_param='beginDate')
end_date = DateOpt(required=True, url_param='endDate')
# Additional options
begin_hour = FilterOpt(url_param='beginHour')
end_hour = FilterOpt(url_param='endHour')
# HeightDepth parameters don't seem to be necessary.
default_params = {
'ordinal': 1,
}
def load(self):
super(HourlyDataIO, self).load()
if self.data and 'values' in self.data[0]:
self.data = [
asdict(row)
for row in as_list(self.data[0]['values'])
]
else:
raise NoData
class StationHourlyDataIO(StationDataIO):
"""
Requests all hourly data for the specified station, optionally filtered by
parameter. The outer IO is the list of available parameters/elements, with
each item in the list containing a nested IO with the actual data.
Usage:
params = StationHourlyDataIO(
station='302:OR:SNTL',
start_date='2014-07-01',
end_date='2014-07-02',
)
for param in params:
print param.element_name
for row in param.data:
print " ", row.datetime, row.value, param.storedunitcd
"""
inner_io_class = HourlyDataIO
duration = "HOURLY"
class ForecastPeriodIO(SnotelIO):
data_function = 'getForecastPeriods'
class ForecastDataIO(SnotelIO):
data_function = 'getForecast'
station = FilterOpt(required=True, url_param='stationTriplets')
parameter = FilterOpt(required=True, url_param='elementCd')
forecast_period = FilterOpt(required=True, url_param='forecastPeriod')
publication_date = DateOpt(required=True, url_param='publicationDate')
class ForecastIO(SnotelIO):
data_function = 'getForecasts'
station = FilterOpt(required=True, url_param='stationTriplet')
parameter = FilterOpt(required=True, url_param='elementCd')
forecast_period = FilterOpt(required=True, url_param='forecastPeriod')