Skip to content

Commit

Permalink
Abandon 'recent' observation type in favour of 'historical'
Browse files Browse the repository at this point in the history
  • Loading branch information
jdemaeyer committed Mar 24, 2021
1 parent 39f7d1a commit 8ba7302
Show file tree
Hide file tree
Showing 8 changed files with 18 additions and 80 deletions.
23 changes: 1 addition & 22 deletions brightsky/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,11 +447,10 @@ def parse(self):
with zipfile.ZipFile(self.path) as zf:
dwd_station_id = self.parse_station_id(zf)
wmo_station_id = dwd_id_to_wmo(dwd_station_id)
observation_type = self.parse_observation_type()
lat_lon_history = self.parse_lat_lon_history(zf, dwd_station_id)
for record in self.parse_records(zf, lat_lon_history):
yield {
'observation_type': observation_type,
'observation_type': 'historical',
'dwd_station_id': dwd_station_id,
'wmo_station_id': wmo_station_id,
**record
Expand All @@ -463,15 +462,6 @@ def parse_station_id(self, zf):
return m.group(1)
raise ValueError(f"Unable to parse station ID for {self.path}")

def parse_observation_type(self):
filename = os.path.basename(self.path)
if filename.endswith('_akt.zip'):
return 'recent'
elif filename.endswith('_hist.zip'):
return 'historical'
raise ValueError(
f'Unable to determine observation type from path "{self.path}"')

def parse_lat_lon_history(self, zf, dwd_station_id):
with zf.open(f'Metadaten_Geographie_{dwd_station_id}.txt') as f:
reader = csv.DictReader(
Expand Down Expand Up @@ -671,9 +661,6 @@ def parse_lat_lon_history(self, zf, dwd_station_id):

def parse_reader(self, filename, reader, lat_lon_history):
hour_values = []
# First row is at :00, which we will already have filled up with
# the last :50 entry of another file (see below)
next(reader)
for row in reader:
timestamp = datetime.datetime.strptime(
row['MESS_DATUM'], '%Y%m%d%H%M').replace(tzinfo=tzutc())
Expand All @@ -688,14 +675,6 @@ def parse_reader(self, filename, reader, lat_lon_history):
yield self._make_record(
timestamp, hour_values, filename, lat_lon_history)
hour_values.clear()
observation_type = self.parse_observation_type()
if observation_type == 'historical' and timestamp.minute == 50:
# Not 100 % accurate but better than taking only the :00 value of
# another file. For observation_type 'recent', we'll get a proper
# midnight value from the 'current' observation
yield self._make_record(
timestamp + datetime.timedelta(minutes=10),
hour_values, filename, lat_lon_history)

def _make_record(self, timestamp, hour_values, filename, lat_lon_history):
lat, lon, height, station_name = self._station_params(
Expand Down
14 changes: 7 additions & 7 deletions docs/brightsky.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ info:
## Data Origin
Data with the observation types `current`, `recent`, and `historical` is published by [DWD's extensive network of observation stations](https://www.dwd.de/DE/leistungen/klimadatendeutschland/stationsliste.html). The difference between these three types lies only in the level of quality assurance that the DWD has provided (see the [DWD's Climate Data Center README](https://opendata.dwd.de/climate_environment/CDC/Readme_intro_CDC_ftp.txt)). Data with observation type `forecast` comes from [MOSMIX](https://www.dwd.de/EN/research/weatherforecasting/met_applications/nwp_applications/mosmix_application.html).
Data with the observation types `current` and `historical` is published by [DWD's extensive network of observation stations](https://www.dwd.de/DE/leistungen/klimadatendeutschland/stationsliste.html). The difference between these two types lies only in the level of quality assurance that the DWD has provided (see the [DWD's Climate Data Center README](https://opendata.dwd.de/climate_environment/CDC/Readme_intro_CDC_ftp.txt)). Data with observation type `forecast` comes from [MOSMIX](https://www.dwd.de/EN/research/weatherforecasting/met_applications/nwp_applications/mosmix_application.html).
All source files can be found on DWD's [Open Data Server](https://www.dwd.de/EN/ourservices/opendata/opendata.html):
<table>
<tr><th>Observation Type</th><th>Location</th></tr>
<tr><td><code>historical</code> / <code>recent</code></td><td><a href="https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/hourly/">https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/hourly/</a></td></tr>
<tr><td><code>historical</code></td><td><a href="https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/hourly/">https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/hourly/</a></td></tr>
<tr><td><code>current</code></td><td><a href="https://opendata.dwd.de/weather/weather_reports/poi/">https://opendata.dwd.de/weather/weather_reports/poi/</a></td></tr>
<tr><td><code>forecast</code></td><td><a href="https://opendata.dwd.de/weather/local_forecasts/mos/MOSMIX_S/all_stations/kml/">https://opendata.dwd.de/weather/local_forecasts/mos/MOSMIX_S/all_stations/kml/</a></td></tr>
</table>
Expand All @@ -35,7 +35,7 @@ info:
The DWD provides data from an extensive network of own and third-party meteorological observation stations, located mainly but not exclusively in Germany. Each of these is identified by a station ID. In an ideal world, these stations would provide an endless stream of one record of "historical weather" and one record of "forecasted weather" per hour. Sadly (and I am sure to much nuisance for the DWD), in the real world, stations have outages and measurement errors, and sometimes even move. Moreover, the weather forecasts come from a model that is not tied to any physical observation station locations, and predicts the meteorological parameters at a whole bunch of additional locations (which each get their own station ID).
To accomodate these real-world complications, Bright Sky internally uses a concept of _sources_. Each source is a unique combination of location and observation type. Hence, each physical observation station (with a single station ID) will typically correspond to five sources in Bright Sky: one for each of the five observation types `forecast`, `synop`, `current`, `recent`, and `historical`. If the station has moved during its lifetime, you may even find more than one source for the same station ID and observation type.
To accomodate these real-world complications, Bright Sky internally uses a concept of _sources_. Each source is a unique combination of location and observation type. Hence, each physical observation station (with a single station ID) will typically correspond to four sources in Bright Sky: one for each of the four observation types `forecast`, `synop`, `current`, and `historical`. If the station has moved during its lifetime, you may even find more than one source for the same station ID and observation type.
Bright Sky's _source IDs_ have no meaning in the real world. They should be treated as the technical artifact they are, and not even relied on to stay constant (although they likely will within each major release of Bright Sky).
Expand Down Expand Up @@ -123,7 +123,7 @@ paths:
dwd_station_id: '01766'
wmo_station_id: '10315'
station_name: Münster/Osnabrück
observation_type: recent
observation_type: historical
lat: 52.1344
lon: 7.6969
height: 47.8
Expand Down Expand Up @@ -338,7 +338,7 @@ paths:
dwd_station_id: '01766'
wmo_station_id: '10315'
station_name: Münster/Osnabrück
observation_type: recent
observation_type: historical
lat: 52.1344
lon: 7.6969
height: 47.8
Expand Down Expand Up @@ -606,8 +606,8 @@ components:
observation_type:
description: Source type
type: string
example: 'recent'
enum: ['forecast', 'synop', 'current', 'recent', 'historical']
example: 'historical'
enum: ['forecast', 'synop', 'current', 'historical']
lat:
description: Station latitude, in decimal degrees
type: number
Expand Down
2 changes: 1 addition & 1 deletion docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ <h5 class="subtitle">JSON API for DWD's open weather data.</h5>
<em>"dwd_station_id"</em>: "01766",
<em>"wmo_station_id"</em>: "10315",
<em>"station_name"</em>: "Münster/Osnabrück",
<em>"observation_type"</em>: "recent",
<em>"observation_type"</em>: "historical",
<em>"first_record"</em>: "2020-01-01T00:00:00+00:00",
<em>"last_record"</em>: "2020-08-13T23:00:00+00:00",
<em>"lat"</em>: 52.1344,
Expand Down
2 changes: 1 addition & 1 deletion scripts/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def query_():
FROM sources
WHERE observation_type = %s
""",
('recent',))
('historical',))
rows = random.choices(cur.fetchall(), k=100)
station_kwargs = [
{'dwd_station_id': row['dwd_station_id']} for row in rows]
Expand Down
6 changes: 3 additions & 3 deletions tests/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

SOURCES = [
{
'observation_type': 'recent',
'observation_type': 'historical',
'lat': 10.1,
'lon': 20.2,
'height': 30.3,
Expand All @@ -17,7 +17,7 @@
'station_name': 'Münster',
},
{
'observation_type': 'recent',
'observation_type': 'historical',
'lat': 40.4,
'lon': 50.5,
'height': 60.6,
Expand All @@ -26,7 +26,7 @@
'station_name': 'Aurich',
},
{
'observation_type': 'recent',
'observation_type': 'historical',
'lat': 60.6,
'lon': 70.7,
'height': 80.8,
Expand Down
14 changes: 3 additions & 11 deletions tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def test_observations_parser_parses_metadata(data_dir):
p = WindObservationsParser(
path=data_dir / 'observations_recent_FF_akt.zip')
metadata = {
'observation_type': 'recent',
'observation_type': 'historical',
'source': (
'Observations:Recent:produkt_ff_stunde_20180915_20200317_04911.txt'
),
Expand All @@ -197,13 +197,6 @@ def test_observations_parser_parses_metadata(data_dir):
assert is_subset(metadata, record)


def test_observations_parser_parses_historical_observation_type(data_dir):
p = PressureObservationsParser(
path=data_dir / 'observations_recent_P0_hist.zip')
for record in p.parse():
assert record['observation_type'] == 'historical'


def test_observations_parser_handles_missing_values(data_dir):
p = WindObservationsParser(
path=data_dir / 'observations_recent_FF_akt.zip')
Expand Down Expand Up @@ -339,11 +332,10 @@ def test_wind_gusts_observations_parser(data_dir):
_test_parser(
WindGustsObservationsParser,
data_dir / 'observations_recent_extrema_wind_akt.zip',
{'timestamp': '2018-12-03 01:00',
'wind_gust_speed': 6.9, 'wind_gust_direction': 210},
{'timestamp': '2018-12-03 00:00',
'wind_gust_speed': 6.3, 'wind_gust_direction': 210},
{'timestamp': '2020-06-04 23:00',
'wind_gust_speed': 6.2, 'wind_gust_direction': 270},
count=9,
meta_path=data_dir / 'observations_recent_extrema_wind_akt_meta.zip'
)

Expand Down
33 changes: 0 additions & 33 deletions tests/test_tasks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import datetime

import pytest
from dateutil.tz import tzutc

from brightsky.export import DBExporter, SYNOPExporter
Expand Down Expand Up @@ -107,35 +106,3 @@ def test_clean_deletes_expired_forecast_current_synop_records(db):
assert [r['temperature'] for r in rows] == [10., 30., 40.]
rows = db.fetch('SELECT temperature FROM synop ORDER BY temperature')
assert [r['temperature'] for r in rows] == [60., 70.]


@pytest.mark.skip("Temporarily disabled until issue #108 is resolved")
def test_clean_deletes_expired_recent_records(db):
now = datetime.datetime.utcnow().replace(
minute=0, second=0, microsecond=0, tzinfo=tzutc())
records = [
{
'observation_type': 'historical',
'timestamp': now - datetime.timedelta(hours=6),
**PLACE,
'temperature': 10.,
},
{
'observation_type': 'recent',
'timestamp': now - datetime.timedelta(hours=7),
**PLACE,
'temperature': 20.,
},
{
'observation_type': 'recent',
'timestamp': now - datetime.timedelta(hours=6),
**PLACE,
'temperature': 30.,
},
]
DBExporter().export(records)
assert len(db.table('weather')) == 3
clean()
assert len(db.table('weather')) == 2
rows = db.fetch('SELECT temperature FROM weather ORDER BY temperature')
assert [r['temperature'] for r in rows] == [10., 30.]
4 changes: 2 additions & 2 deletions tests/test_web.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
'wmo_station_id': '10315',
},
{
'observation_type': 'recent',
'observation_type': 'historical',
'lat': 52.1344,
'lon': 7.6969,
'height': 47.8,
Expand Down Expand Up @@ -303,7 +303,7 @@ def test_weather_source_selection(data, api):
observation_types = {
s['id']: s['observation_type'] for s in resp.json['sources']}
for w in resp.json['weather'][:28]:
assert observation_types[w['source_id']] == 'recent'
assert observation_types[w['source_id']] == 'historical'
for w in resp.json['weather'][28:36]:
assert observation_types[w['source_id']] == 'current'
for w in resp.json['weather'][36:]:
Expand Down

0 comments on commit 8ba7302

Please sign in to comment.