Skip to content

Commit

Permalink
Add baseline test for ECCC observations
Browse files Browse the repository at this point in the history
Fix method of data acquisition from ECCC
  • Loading branch information
gutzbenj committed Apr 26, 2021
1 parent d7e2115 commit 35b43a6
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 14 deletions.
6 changes: 3 additions & 3 deletions CHANGELOG.rst
Expand Up @@ -4,11 +4,11 @@ Changelog
Development
***********

- Add origin and metric unit mappings to services
- Use argument "metric" in request classes to convert origin units to metric, set to default
- Add origin and si unit mappings to services
- Use argument "si_units" in request classes to convert origin units to si, set to default
- Improve caching behaviour by introducing optional ``WD_CACHE_DIR`` and
``WD_CACHE_DISABLE`` environment variables. Thanks, @meteoDaniel!

- Add baseline test for ECCC observations

0.17.0 (08.04.2021)
*******************
Expand Down
117 changes: 117 additions & 0 deletions tests/provider/eccc/test_api.py
@@ -0,0 +1,117 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2018-2021, earthobservations developers.
# Distributed under the MIT License. See LICENSE for more info.
import numpy as np
import pandas as pd
import pytz
from pandas._testing import assert_frame_equal

from wetterdienst.provider.eccc.observation import EcccObservationRequest


def test_eccc_api_stations():
request = EcccObservationRequest(
parameter="DAILY",
resolution="DAILY",
start_date="1990-01-01",
end_date="1990-01-02",
humanize=True,
tidy=True,
si_units=False,
).filter_by_station_id(station_id=(14,))

expected = pd.DataFrame(
{
"station_id": ["14"],
"from_date": [pd.Timestamp("1984-01-01", tz=pytz.UTC)],
"to_date": [pd.Timestamp("1996-01-01", tz=pytz.UTC)],
"height": [4.0],
"latitude": [48.87],
"longitude": [-123.28],
"name": ["ACTIVE PASS"],
"state": ["BRITISH COLUMBIA"],
}
)

assert_frame_equal(request.df, expected)


def test_eccc_api_values():
request = EcccObservationRequest(
parameter="DAILY",
resolution="DAILY",
start_date="1980-01-01",
end_date="1980-01-02",
humanize=True,
tidy=True,
si_units=False,
).filter_by_station_id(station_id=(1652,))

values = request.values.all().df

expected_df = pd.DataFrame(
{
"date": [
pd.Timestamp("1980-01-01", tz=pytz.UTC),
pd.Timestamp("1980-01-02", tz=pytz.UTC),
]
* 11,
"parameter": pd.Categorical(
[
"temperature_air_max_200",
"temperature_air_max_200",
"temperature_air_min_200",
"temperature_air_min_200",
"temperature_air_200",
"temperature_air_200",
"heating_degree_days",
"heating_degree_days",
"cooling_degree_days",
"cooling_degree_days",
"precipitation_height_rain",
"precipitation_height_rain",
"snow_depth_new",
"snow_depth_new",
"precipitation_height",
"precipitation_height",
"snow_depth",
"snow_depth",
"wind_direction_max_velocity",
"wind_direction_max_velocity",
"wind_gust_max",
"wind_gust_max",
]
),
"value": [
-16.3,
-16.4,
-29.1,
-28.3,
-22.7,
-22.4,
40.7,
40.4,
0.0,
0.0,
0.0,
0.0,
1.8,
0.0,
0.8,
0.0,
19.0,
20.0,
np.NaN,
np.NaN,
np.NaN,
np.NaN,
],
"quality": pd.Categorical([np.NaN] * 22),
"station_id": pd.Categorical(["1652"] * 22),
"dataset": pd.Categorical(["daily"] * 22),
}
)

assert_frame_equal(
values.reset_index(drop=True), expected_df, check_categorical=False
)
33 changes: 22 additions & 11 deletions wetterdienst/provider/eccc/observation/api.py
Expand Up @@ -199,6 +199,8 @@ def _collect_station_parameter(

df = df.reset_index(drop=True)

df = df.drop(columns=["data quality"], errors="ignore")

if self.stations.stations.tidy:
df = self._tidy_up_dataframe(df)

Expand All @@ -213,21 +215,30 @@ def _create_file_urls(
self, station_id: str, start_year: int, end_year: int
) -> Generator[str, None, None]:
# TODO: make faster, requests per month take too long!
if self.stations.stations.resolution != Resolution.HOURLY:
# if self.stations.stations.resolution != Resolution.HOURLY:
# url = self._base_url.format(int(station_id), self._timeframe)
#
# yield url
# else:
resolution = self.stations.stations.resolution

freq = "Y"
if resolution == Resolution.HOURLY:
freq = "M"

# For hourly data request only necessary data to reduce amount of data being
# downloaded and parsed
for date in pd.date_range(
f"{start_year}-01-01", f"{end_year + 1}-01-01", freq=freq, closed=None
):
url = self._base_url.format(int(station_id), self._timeframe)

yield url
else:
# For hourly data request only necessary data to reduce amount of data being
# downloaded and parsed
for date in pd.date_range(
f"{start_year}-01-01", f"{end_year + 1}-01-01", freq="M", closed=None
):
url = self._base_url.format(int(station_id), self._timeframe)
url += f"&Year={date.year}"

url += f"&Year={date.year}&Month={date.month}"
if resolution == Resolution.HOURLY:
url += "&Month={date.month}"

yield url
yield url


class EcccObservationRequest(ScalarRequestCore):
Expand Down

0 comments on commit 35b43a6

Please sign in to comment.