From 35b43a644c56d2fbc2e426b25b98a50309dc08fc Mon Sep 17 00:00:00 2001 From: benjamin Date: Mon, 26 Apr 2021 23:59:40 +0200 Subject: [PATCH] Add baseline test for ECCC observations Fix method of data acquisition from ECCC --- CHANGELOG.rst | 6 +- tests/provider/eccc/test_api.py | 117 ++++++++++++++++++ wetterdienst/provider/eccc/observation/api.py | 33 +++-- 3 files changed, 142 insertions(+), 14 deletions(-) create mode 100644 tests/provider/eccc/test_api.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 7a08d1322..2967bccff 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,11 +4,11 @@ Changelog Development *********** -- Add origin and metric unit mappings to services -- Use argument "metric" in request classes to convert origin units to metric, set to default +- Add origin and si unit mappings to services +- Use argument "si_units" in request classes to convert origin units to si, set to default - Improve caching behaviour by introducing optional ``WD_CACHE_DIR`` and ``WD_CACHE_DISABLE`` environment variables. Thanks, @meteoDaniel! - +- Add baseline test for ECCC observations 0.17.0 (08.04.2021) ******************* diff --git a/tests/provider/eccc/test_api.py b/tests/provider/eccc/test_api.py new file mode 100644 index 000000000..81f4e5a19 --- /dev/null +++ b/tests/provider/eccc/test_api.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2018-2021, earthobservations developers. +# Distributed under the MIT License. See LICENSE for more info. +import numpy as np +import pandas as pd +import pytz +from pandas._testing import assert_frame_equal + +from wetterdienst.provider.eccc.observation import EcccObservationRequest + + +def test_eccc_api_stations(): + request = EcccObservationRequest( + parameter="DAILY", + resolution="DAILY", + start_date="1990-01-01", + end_date="1990-01-02", + humanize=True, + tidy=True, + si_units=False, + ).filter_by_station_id(station_id=(14,)) + + expected = pd.DataFrame( + { + "station_id": ["14"], + "from_date": [pd.Timestamp("1984-01-01", tz=pytz.UTC)], + "to_date": [pd.Timestamp("1996-01-01", tz=pytz.UTC)], + "height": [4.0], + "latitude": [48.87], + "longitude": [-123.28], + "name": ["ACTIVE PASS"], + "state": ["BRITISH COLUMBIA"], + } + ) + + assert_frame_equal(request.df, expected) + + +def test_eccc_api_values(): + request = EcccObservationRequest( + parameter="DAILY", + resolution="DAILY", + start_date="1980-01-01", + end_date="1980-01-02", + humanize=True, + tidy=True, + si_units=False, + ).filter_by_station_id(station_id=(1652,)) + + values = request.values.all().df + + expected_df = pd.DataFrame( + { + "date": [ + pd.Timestamp("1980-01-01", tz=pytz.UTC), + pd.Timestamp("1980-01-02", tz=pytz.UTC), + ] + * 11, + "parameter": pd.Categorical( + [ + "temperature_air_max_200", + "temperature_air_max_200", + "temperature_air_min_200", + "temperature_air_min_200", + "temperature_air_200", + "temperature_air_200", + "heating_degree_days", + "heating_degree_days", + "cooling_degree_days", + "cooling_degree_days", + "precipitation_height_rain", + "precipitation_height_rain", + "snow_depth_new", + "snow_depth_new", + "precipitation_height", + "precipitation_height", + "snow_depth", + "snow_depth", + "wind_direction_max_velocity", + "wind_direction_max_velocity", + "wind_gust_max", + "wind_gust_max", + ] + ), + "value": [ + -16.3, + -16.4, + -29.1, + -28.3, + -22.7, + -22.4, + 40.7, + 40.4, + 0.0, + 0.0, + 0.0, + 0.0, + 1.8, + 0.0, + 0.8, + 0.0, + 19.0, + 20.0, + np.NaN, + np.NaN, + np.NaN, + np.NaN, + ], + "quality": pd.Categorical([np.NaN] * 22), + "station_id": pd.Categorical(["1652"] * 22), + "dataset": pd.Categorical(["daily"] * 22), + } + ) + + assert_frame_equal( + values.reset_index(drop=True), expected_df, check_categorical=False + ) diff --git a/wetterdienst/provider/eccc/observation/api.py b/wetterdienst/provider/eccc/observation/api.py index 1a383fd56..aed080f58 100644 --- a/wetterdienst/provider/eccc/observation/api.py +++ b/wetterdienst/provider/eccc/observation/api.py @@ -199,6 +199,8 @@ def _collect_station_parameter( df = df.reset_index(drop=True) + df = df.drop(columns=["data quality"], errors="ignore") + if self.stations.stations.tidy: df = self._tidy_up_dataframe(df) @@ -213,21 +215,30 @@ def _create_file_urls( self, station_id: str, start_year: int, end_year: int ) -> Generator[str, None, None]: # TODO: make faster, requests per month take too long! - if self.stations.stations.resolution != Resolution.HOURLY: + # if self.stations.stations.resolution != Resolution.HOURLY: + # url = self._base_url.format(int(station_id), self._timeframe) + # + # yield url + # else: + resolution = self.stations.stations.resolution + + freq = "Y" + if resolution == Resolution.HOURLY: + freq = "M" + + # For hourly data request only necessary data to reduce amount of data being + # downloaded and parsed + for date in pd.date_range( + f"{start_year}-01-01", f"{end_year + 1}-01-01", freq=freq, closed=None + ): url = self._base_url.format(int(station_id), self._timeframe) - yield url - else: - # For hourly data request only necessary data to reduce amount of data being - # downloaded and parsed - for date in pd.date_range( - f"{start_year}-01-01", f"{end_year + 1}-01-01", freq="M", closed=None - ): - url = self._base_url.format(int(station_id), self._timeframe) + url += f"&Year={date.year}" - url += f"&Year={date.year}&Month={date.month}" + if resolution == Resolution.HOURLY: + url += "&Month={date.month}" - yield url + yield url class EcccObservationRequest(ScalarRequestCore):