Skip to content

Commit

Permalink
Add logging in most places where data is acquired
Browse files Browse the repository at this point in the history
  • Loading branch information
gutzbenj committed Mar 3, 2024
1 parent 47d7d98 commit 4b87b18
Show file tree
Hide file tree
Showing 12 changed files with 44 additions and 1 deletion.
1 change: 1 addition & 0 deletions wetterdienst/provider/dwd/dmo/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1537,6 +1537,7 @@ def _all(self) -> pl.LazyFrame:
:return:
"""
log.info(f"Downloading file {self._url}.")
payload = download_file(self._url, self.settings, CacheExpiry.METAINDEX)
text = StringIO(payload.read().decode(encoding="latin-1"))
lines = text.readlines()
Expand Down
1 change: 1 addition & 0 deletions wetterdienst/provider/dwd/mosmix/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1407,6 +1407,7 @@ def _all(self) -> pl.LazyFrame:
:return:
"""
log.info(f"Downloading file {self._url}.")
payload = download_file(self._url, self.settings, CacheExpiry.METAINDEX)
text = StringIO(payload.read().decode(encoding="latin-1"))
lines = text.readlines()
Expand Down
4 changes: 4 additions & 0 deletions wetterdienst/provider/dwd/observation/download.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2018-2021, earthobservations developers.
# Distributed under the MIT License. See LICENSE for more info.
import logging
from concurrent.futures import ThreadPoolExecutor
from io import BytesIO
from typing import List, Tuple
Expand All @@ -14,6 +15,8 @@
from wetterdienst.util.cache import CacheExpiry
from wetterdienst.util.network import download_file

log = logging.getLogger(__name__)


def download_climate_observations_data_parallel(
remote_files: pl.Series, settings: Settings
Expand Down Expand Up @@ -51,6 +54,7 @@ def _download_climate_observations_data(remote_file: str, settings: Settings) ->


def __download_climate_observations_data(remote_file: str, settings: Settings) -> bytes:
log.info(f"Downloading file {remote_file}.")
file = download_file(remote_file, settings=settings, ttl=CacheExpiry.FIVE_MINUTES)

try:
Expand Down
7 changes: 7 additions & 0 deletions wetterdienst/provider/dwd/observation/metaindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright (C) 2018-2021, earthobservations developers.
# Distributed under the MIT License. See LICENSE for more info.
import datetime as dt
import logging
import re
from concurrent.futures import ThreadPoolExecutor
from io import BytesIO, StringIO
Expand All @@ -24,6 +25,8 @@
from wetterdienst.util.network import download_file, list_remote_files_fsspec
from wetterdienst.util.polars_util import read_fwf_from_df

log = logging.getLogger(__name__)

DWD_COLUMN_NAMES_MAPPING = {
"column_0": Columns.STATION_ID.value,
"column_1": Columns.START_DATE.value,
Expand Down Expand Up @@ -131,6 +134,7 @@ def _create_meta_index_for_climate_observations(
remote_files = list_remote_files_fsspec(url, settings=settings, ttl=CacheExpiry.METAINDEX)
# Find the one meta file from the files listed on the server
meta_file = _find_meta_file(remote_files, url, ["beschreibung", "txt"])
log.info(f"Downloading file {meta_file}.")
payload = download_file(meta_file, settings=settings, ttl=CacheExpiry.METAINDEX)
return _read_meta_df(payload)

Expand Down Expand Up @@ -194,7 +198,9 @@ def _create_meta_index_for_subdaily_extreme_wind(period: Period, settings: Setti
# Find the one meta file from the files listed on the server
meta_file_fx3 = _find_meta_file(remote_files, url, ["fx3", "beschreibung", "txt"])
meta_file_fx6 = _find_meta_file(remote_files, url, ["fx6", "beschreibung", "txt"])
log.info(f"Downloading file {meta_file_fx3}.")
payload_fx3 = download_file(meta_file_fx3, settings=settings, ttl=CacheExpiry.METAINDEX)
log.info(f"Downloading file {meta_file_fx6}.")
payload_fx6 = download_file(meta_file_fx6, settings=settings, ttl=CacheExpiry.METAINDEX)
df_fx3 = _read_meta_df(payload_fx3)
df_fx6 = _read_meta_df(payload_fx6)
Expand All @@ -217,6 +223,7 @@ def _create_meta_index_for_1minute_historical_precipitation(settings: Settings)
metadata_file_paths = list_remote_files_fsspec(url, settings=settings, ttl=CacheExpiry.METAINDEX)
station_ids = [re.findall(STATION_ID_REGEX, file).pop(0) for file in metadata_file_paths]

log.info(f"Downloading {len(metadata_file_paths)} files for 1minute precipitation historical metadata.")
with ThreadPoolExecutor() as executor:
metadata_files = executor.map(
lambda file: download_file(url=file, settings=settings, ttl=CacheExpiry.NO_CACHE), metadata_file_paths
Expand Down
3 changes: 2 additions & 1 deletion wetterdienst/provider/dwd/radar/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ def _download_generic_data(self, url: str) -> Generator[RadarResult, None, None]
ttl = CacheExpiry.FIVE_MINUTES
if not self._should_cache_download(url):
ttl = CacheExpiry.NO_CACHE

log.info(f"Downloading file {url}.")
data = download_file(url=url, ttl=ttl, settings=self.settings)

# RadarParameter.FX_REFLECTIVITY
Expand Down Expand Up @@ -523,6 +523,7 @@ def __download_radolan_data(url: str, settings: Settings) -> BytesIO:
the file in binary, either an archive of one file or an archive of multiple
files
"""
log.info(f"Downloading file {url}.")
return download_file(url=url, ttl=CacheExpiry.TWELVE_HOURS, settings=settings)

@staticmethod
Expand Down
2 changes: 2 additions & 0 deletions wetterdienst/provider/dwd/road/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ def _download_road_weather_observations(remote_files: List[str], settings) -> Li
:param remote_files: List of requested files
:return: List of downloaded files
"""
log.info(f"Downloading {len(remote_files)} files from DWD Road Weather.")
with ThreadPoolExecutor() as p:
files_in_bytes = p.map(
lambda file: download_file(url=file, settings=settings, ttl=CacheExpiry.TWELVE_HOURS), remote_files
Expand Down Expand Up @@ -406,6 +407,7 @@ def __init__(
)

def _all(self) -> pl.LazyFrame:
log.info(f"Downloading file {self._endpoint}.")
payload = download_file(self._endpoint, self.settings, CacheExpiry.METAINDEX)
df = pl.read_excel(source=payload, sheet_name="Tabelle1", read_options={"infer_schema_length": 0})
df = df.rename(mapping=self._column_mapping)
Expand Down
2 changes: 2 additions & 0 deletions wetterdienst/provider/ea/hydrology/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ class EaHydrologyValues(TimeseriesValues):

def _collect_station_parameter(self, station_id: str, parameter: Enum, dataset: Enum) -> pl.DataFrame:
endpoint = self._base_url.format(station_id=station_id)
log.info(f"Downloading file {endpoint}.")
payload = download_file(endpoint, self.sr.stations.settings, CacheExpiry.NO_CACHE)
measures_list = json.load(payload)["items"][0]["measures"]
measures_list = pl.Series(name="measure", values=measures_list).to_frame()
Expand All @@ -105,6 +106,7 @@ def _collect_station_parameter(self, station_id: str, parameter: Enum, dataset:
except IndexError:
return pl.DataFrame()
values_endpoint = f"{measure_dict['@id']}/readings.json"
log.info(f"Downloading file {values_endpoint}.")
payload = download_file(values_endpoint, CacheExpiry.FIVE_MINUTES)
readings = json.loads(payload.read())["items"]
df = pl.from_dicts(readings)
Expand Down
6 changes: 6 additions & 0 deletions wetterdienst/provider/eaufrance/hubeau/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Distributed under the MIT License. See LICENSE for more info.
import datetime as dt
import json
import logging
import math
from enum import Enum
from typing import Iterator, List, Literal, Optional, Tuple, Union
Expand All @@ -26,6 +27,8 @@
from wetterdienst.util.network import download_file
from wetterdienst.util.parameter import DatasetTreeCore

log = logging.getLogger(__name__)

REQUIRED_ENTRIES = [
"code_station",
"libelle_station",
Expand Down Expand Up @@ -97,6 +100,7 @@ def _get_hubeau_dates(self, station_id, parameter, dataset) -> Iterator[Tuple[dt

def _get_dynamic_frequency(self, station_id, parameter, dataset) -> Tuple[int, Literal["min", "H"]]:
url = self._endpoint_freq.format(station_id=station_id, grandeur_hydro=parameter.value)
log.info(f"Downloading file {url}.")
response = download_file(url=url, settings=self.sr.stations.settings, ttl=CacheExpiry.METAINDEX)
values_dict = json.load(response)["data"]
try:
Expand Down Expand Up @@ -130,6 +134,7 @@ def _collect_station_parameter(self, station_id: str, parameter: Enum, dataset:
start_date=start_date.isoformat(),
end_date=end_date.isoformat(),
)
log.info(f"Downloading file {url}.")
response = download_file(url=url, settings=self.sr.stations.settings)
data_dict = json.load(response)["data"]
df = pl.DataFrame(data_dict)
Expand Down Expand Up @@ -207,6 +212,7 @@ def _all(self) -> pl.LazyFrame:
:return:
"""
log.info(f"Downloading file {self._endpoint}.")
response = download_file(url=self._endpoint, settings=self.settings, ttl=CacheExpiry.METAINDEX)
data = json.load(response)["data"]
for entry in data:
Expand Down
2 changes: 2 additions & 0 deletions wetterdienst/provider/eccc/observation/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ def _download_stations(self) -> Tuple[BytesIO, int]:
payload = None
source = None
try:
log.info(f"Downloading file {gdrive_url}.")
payload = download_file(gdrive_url, self.settings, CacheExpiry.METAINDEX)
source = 0
except Exception as e:
Expand All @@ -355,6 +356,7 @@ def _download_stations(self) -> Tuple[BytesIO, int]:

# Fall back to different source.
try:
log.info(f"Downloading file {http_url}.")
response = download_file(http_url, self.settings, CacheExpiry.METAINDEX)
with gzip.open(response, mode="rb") as f:
payload = BytesIO(f.read())
Expand Down
5 changes: 5 additions & 0 deletions wetterdienst/provider/geosphere/observation/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Distributed under the MIT License. See LICENSE for more info.
import datetime as dt
import json
import logging
from datetime import datetime, timedelta
from enum import Enum
from typing import List, Optional, Union
Expand All @@ -25,6 +26,8 @@
from wetterdienst.util.network import download_file
from wetterdienst.util.parameter import DatasetTreeCore

log = logging.getLogger(__name__)


class GeosphereObservationResolution(Enum):
MINUTE_10 = Resolution.MINUTE_10.value
Expand Down Expand Up @@ -670,6 +673,7 @@ def _collect_station_parameter(self, station_id: str, parameter: Enum, dataset:
start_date=start_date.astimezone(dt.timezone.utc).strftime("%Y-%m-%dT%H:%m"),
end_date=end_date.astimezone(dt.timezone.utc).strftime("%Y-%m-%dT%H:%m"),
)
log.info(f"Downloading file {url}.")
response = download_file(url=url, settings=self.sr.stations.settings, ttl=CacheExpiry.FIVE_MINUTES)
data_raw = json.loads(response.read())
timestamps = data_raw.pop("timestamps")
Expand Down Expand Up @@ -744,6 +748,7 @@ def __init__(
def _all(self) -> pl.LazyFrame:
dataset = self._dataset_base[self.resolution.name].value
url = self._endpoint.format(dataset=dataset)
log.info(f"Downloading file {url}.")
response = download_file(url=url, settings=self.settings, ttl=CacheExpiry.METAINDEX)
df = pl.read_csv(response).lazy()
df = df.drop("Sonnenschein", "Globalstrahlung")
Expand Down
4 changes: 4 additions & 0 deletions wetterdienst/provider/imgw/hydrology/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright (C) 2018-2023, earthobservations developers.
# Distributed under the MIT License. See LICENSE for more info.
import datetime as dt
import logging
import re
from concurrent.futures import ThreadPoolExecutor
from enum import Enum
Expand All @@ -26,6 +27,8 @@
from wetterdienst.util.network import download_file, list_remote_files_fsspec
from wetterdienst.util.parameter import DatasetTreeCore

log = logging.getLogger(__name__)


class ImgwHydrologyParameter(DatasetTreeCore):
class DAILY(DatasetTreeCore):
Expand Down Expand Up @@ -350,6 +353,7 @@ def _all(self) -> pl.LazyFrame:
:return:
"""
log.info(f"Downloading file {self._endpoint}.")
payload = download_file(self._endpoint, settings=self.settings, ttl=CacheExpiry.METAINDEX)
df = pl.read_csv(
payload, encoding="latin-1", has_header=False, separator=";", skip_rows=1, infer_schema_length=0
Expand Down
8 changes: 8 additions & 0 deletions wetterdienst/provider/noaa/ghcn/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright (C) 2018-2021, earthobservations developers.
# Distributed under the MIT License. See LICENSE for more info.
import datetime as dt
import logging
from enum import Enum
from typing import List, Optional, Union

Expand All @@ -28,6 +29,8 @@
from wetterdienst.util.network import download_file
from wetterdienst.util.polars_util import read_fwf_from_df

log = logging.getLogger(__name__)


class NoaaGhcnDataset(Enum):
HOURLY = "hourly"
Expand Down Expand Up @@ -55,6 +58,7 @@ def _collect_station_parameter(self, station_id: str, parameter, dataset) -> pl.
def _collect_station_parameter_for_hourly(self, station_id: str, parameter, dataset) -> pl.DataFrame:
url = f"https://www.ncei.noaa.gov/oa/global-historical-climatology-network/hourly/access/by-station/GHCNh_{station_id}_por.psv"
file = url.format(station_id=station_id)
log.info(f"Downloading file {file}.")
try:
payload = download_file(file, settings=self.sr.stations.settings, ttl=CacheExpiry.FIVE_MINUTES)
except FileNotFoundError:
Expand Down Expand Up @@ -337,6 +341,7 @@ def _collect_station_parameter_for_daily(self, station_id: str, parameter, datas
"""
url = "http://noaa-ghcn-pds.s3.amazonaws.com/csv.gz/by_station/{station_id}.csv.gz"
file = url.format(station_id=station_id)
log.info(f"Downloading file {file}.")
payload = download_file(file, settings=self.sr.stations.settings, ttl=CacheExpiry.FIVE_MINUTES)
df = pl.read_csv(
source=payload,
Expand Down Expand Up @@ -447,6 +452,7 @@ def _all(self) -> pl.LazyFrame:

def _create_metaindex_for_ghcn_hourly(self) -> pl.LazyFrame:
file = "https://www.ncei.noaa.gov/oa/global-historical-climatology-network/hourly/doc/ghcnh-station-list.csv"
log.info(f"Downloading file {file}.")
payload = download_file(file, settings=self.settings, ttl=CacheExpiry.METAINDEX)
df = pl.read_csv(
payload,
Expand Down Expand Up @@ -500,6 +506,7 @@ def _create_metaindex_for_ghcn_daily(self) -> pl.LazyFrame:
:return: DataFrame with all stations_result
"""
listings_url = "http://noaa-ghcn-pds.s3.amazonaws.com/ghcnd-stations.txt"
log.info(f"Downloading file {listings_url}.")
listings_file = download_file(listings_url, settings=self.settings, ttl=CacheExpiry.TWELVE_HOURS)
df = pl.read_csv(listings_file, has_header=False, truncate_ragged_lines=True)
column_specs = ((0, 10), (12, 19), (21, 29), (31, 36), (38, 39), (41, 70), (80, 84))
Expand All @@ -515,6 +522,7 @@ def _create_metaindex_for_ghcn_daily(self) -> pl.LazyFrame:
]

inventory_url = "http://noaa-ghcn-pds.s3.amazonaws.com/ghcnd-inventory.txt"
log.info(f"Downloading file {inventory_url}.")
inventory_file = download_file(inventory_url, settings=self.settings, ttl=CacheExpiry.TWELVE_HOURS)
inventory_df = pl.read_csv(inventory_file, has_header=False, truncate_ragged_lines=True)
column_specs = ((0, 10), (36, 39), (41, 44))
Expand Down

0 comments on commit 4b87b18

Please sign in to comment.