Skip to content

Commit

Permalink
Add NOAA GHCN implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
gutzbenj committed Oct 10, 2021
1 parent bf02609 commit e97aac3
Show file tree
Hide file tree
Showing 16 changed files with 2,193 additions and 207 deletions.
16 changes: 10 additions & 6 deletions tests/core/scalar/test_parameter.py
Expand Up @@ -16,6 +16,8 @@
EcccObservationParameter,
)
from wetterdienst.provider.eccc.observation.metadata.unit import EcccObservationUnit
from wetterdienst.provider.noaa.ghcn import NoaaGhcnParameter
from wetterdienst.provider.noaa.ghcn.unit import NoaaGhcnUnit

ORIGIN_UNITS = [unit.value for unit in OriginUnit]
SI_UNITS = [unit.value for unit in SIUnit]
Expand All @@ -31,29 +33,31 @@
(DwdMosmixUnit, False),
(EcccObservationParameter, False),
(EcccObservationUnit, False),
(NoaaGhcnParameter, False),
(NoaaGhcnUnit, False),
),
)
def test_parameter_names(parameter_enum, is_ds_tree):
"""Test parameter and dataset tree enums for consistent parameter naming following the
core Parameter enum. Due to equal structure units are also tested here"""

def _check_quality_flags(param):
return param.startswith("QUALITY") or param.startswith("QN")

parameters = []

for res in parameter_enum:
if is_ds_tree:
for dataset in parameter_enum[res]:
for parameter in parameter_enum[res][dataset]:
parameter_name = parameter.name
if not parameter_name.startswith(
"QUALITY"
) and not parameter_name.startswith("QN"):
if not _check_quality_flags(parameter_name):
if parameter_name not in Parameter._member_names_:
parameters.append(parameter_name)
else:
for parameter in parameter_enum[res]:
parameter_name = parameter.name
if not parameter_name.startswith(
"QUALITY"
) and not parameter_name.startswith("QN"):
if not _check_quality_flags(parameter_name):
if parameter_name not in Parameter._member_names_:
parameters.append(parameter_name)

Expand Down
19 changes: 13 additions & 6 deletions wetterdienst/core/scalar/request.py
Expand Up @@ -78,15 +78,14 @@ def _period_base(self) -> Optional[Period]:
pass

@property
# @abstractmethod
def _parameter_base(self) -> Enum:
"""parameter base enumeration from which parameters can be parsed e.g.
DWDObservationParameter"""
if self._has_datasets:
if not self._unique_dataset:
raise NotImplementedError(
"implement _parameter_base enumeration that "
"all parameters of one resolution stored together"
"implement _parameter_base enumeration that has "
"all parameters of one resolution stored in one place"
)
return

Expand All @@ -101,7 +100,7 @@ def _data_range(self) -> DataRange:
@abstractmethod
def _has_datasets(self) -> bool:
"""Boolean if weather service has datasets (when multiple parameters are stored
in one table/file"""
in one table/file)"""
pass

@property
Expand Down Expand Up @@ -134,9 +133,17 @@ def _unique_dataset(self) -> bool:
@property
def _dataset_accessor(self) -> str:
"""Accessor for dataset, by default the resolution is used as we expect
datasets to be divided in resolutions"""
datasets to be divided in resolutions but for some e.g. DWD Mosmix
datasets are divided in another way (SMALL/LARGE in this case)"""
return self.resolution.name

@property
@abstractmethod
def _has_tidy_data(self) -> bool:
"""If data is generally provided tidy -> then data should not be tidied but
rather tabulated if data is requested to not being tidy"""
pass

@property
def _parameter_to_dataset_mapping(self) -> dict:
""" Mapping to go from a (flat) parameter to dataset """
Expand Down Expand Up @@ -266,7 +273,7 @@ def _parse_parameter(
if self._unique_dataset:
# If unique dataset the dataset is given by the accessor
# and the parameter is not a subset of a dataset
dataset_ = self._dataset_tree[self._dataset_accessor]
dataset_ = self._dataset_base[self._dataset_accessor]
elif not dataset_:
# If there's multiple datasets the mapping defines which one
# is taken for the given parameter
Expand Down
59 changes: 52 additions & 7 deletions wetterdienst/core/scalar/values.py
Expand Up @@ -3,7 +3,7 @@
# Distributed under the MIT License. See LICENSE for more info.
import logging
import operator
from abc import abstractmethod
from abc import abstractmethod, ABCMeta
from enum import Enum
from typing import Dict, Generator, List, Tuple, Union

Expand All @@ -24,7 +24,7 @@
log = logging.getLogger(__name__)


class ScalarValuesCore:
class ScalarValuesCore(metaclass=ABCMeta):
""" Core for sources of point data where data is related to a station """

# Fields for type coercion, needed for separation from fields with actual data
Expand Down Expand Up @@ -441,13 +441,16 @@ def query(self) -> Generator[ValuesResult, None, None]:
parameter_df = self.convert_values_to_si(parameter_df, dataset)

if self.stations.stations.tidy:
parameter_df = self.tidy_up_df(parameter_df, dataset)
if not self.stations.stations._has_tidy_data:
parameter_df = self.tidy_up_df(parameter_df, dataset)

if parameter != dataset:
parameter_df = parameter_df[
parameter_df[Columns.PARAMETER.value]
== parameter.value.lower()
]
elif self.stations.stations._has_tidy_data:
parameter_df = self.tabulate_df(parameter_df)

parameter_df = self._build_complete_df(
parameter_df, station_id, parameter, dataset
Expand Down Expand Up @@ -530,15 +533,50 @@ def tidy_up_df(self, df: pd.DataFrame, dataset: Enum) -> pd.DataFrame:

return df

@abstractmethod
def _tidy_up_df(self, df: pd.DataFrame, dataset) -> pd.DataFrame:
"""
Abstract method to be implemented by services to tidy a DataFrame
Abstract method to be implemented by services to tidy a DataFrame if tidy is not provided
tidy by default
:param df:
:return:
"""
pass
if not self.stations.stations._has_tidy_data:
raise NotImplementedError("implement _tidy_up_df method to tidy data")

@staticmethod
def tabulate_df(df: pd.DataFrame) -> pd.DataFrame:
"""
Method to tabulate a dataframe with each row having one timestamp and
all parameter values and corresponding quality levels.
Example:
date parameter value quality
1971-01-01 precipitation_height 0 0
1971-01-01 temperature_air_mean_200 10 0
becomes
date precipitation_height qn_precipitation_height temperature_air_mean_200 ...
1971-01-01 0 0 10 ...
:param df: pandas.DataFrame with tidy data
:returns pandas.DataFrame with tabulated data e.g. pairwise columns of values
and quality flags
"""
df_tabulated = pd.DataFrame({Columns.DATE.value: df[Columns.DATE.value]})

for parameter, parameter_df in df.groupby(by=[df[Columns.PARAMETER.value]]):
# Build quality columm name
parameter_quality = f"{Columns.QUALITY_PREFIX.value}_{parameter}"

# Add values
df_tabulated[parameter] = parameter_df[Columns.VALUE.value]
# Add quality levels
df_tabulated[parameter_quality] = parameter_df[Columns.QUALITY.value]

return df_tabulated

def _coerce_date_fields(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Expand Down Expand Up @@ -735,9 +773,16 @@ def _create_humanized_parameters_mapping(self) -> Dict[str, str]:
:return:
"""
# hcnm = {
# parameter.value: parameter.name.lower()
# for parameter in self.stations.stations._parameter_base
# }

hcnm = {
parameter.value: parameter.name.lower()
for parameter in self.stations.stations._parameter_base
for parameter in self.stations.stations._parameter_base[
self.stations.stations.resolution.name
]
}

return hcnm

0 comments on commit e97aac3

Please sign in to comment.