From 46105da0b508b4c5a04e555bbe1a3deb1da47652 Mon Sep 17 00:00:00 2001 From: Robin TROESCH <38283096+unitrium@users.noreply.github.com> Date: Tue, 23 Apr 2024 16:33:51 +0200 Subject: [PATCH] remove required (#6692) --- parsers/ELEXON.py | 7 +-- parsers/ENTSOE.py | 72 +---------------------------- parsers/FO.py | 8 ++-- parsers/TR.py | 5 +- parsers/archived/GB_NIR.py | 2 +- parsers/lib/validation.py | 17 +------ parsers/test/test_entsoe_quality.py | 8 ---- 7 files changed, 10 insertions(+), 109 deletions(-) diff --git a/parsers/ELEXON.py b/parsers/ELEXON.py index 4cfb20de29..4607bccc0d 100644 --- a/parsers/ELEXON.py +++ b/parsers/ELEXON.py @@ -514,7 +514,6 @@ def fetch_production( else: entry["production"]["wind"] = None - required = ["coal", "gas", "nuclear", "wind"] expected_range = { # Historical data might be above the current capacity for coal "coal": (0, 20000), @@ -522,11 +521,7 @@ def fetch_production( "nuclear": (100, 56000), "wind": (0, 600000), } - data = [ - x - for x in data - if validate(x, logger, required=required, expected_range=expected_range) - ] + data = [x for x in data if validate(x, logger, expected_range=expected_range)] return data diff --git a/parsers/ENTSOE.py b/parsers/ENTSOE.py index 70385c1f41..3bed66327b 100644 --- a/parsers/ENTSOE.py +++ b/parsers/ENTSOE.py @@ -363,25 +363,18 @@ class WindAndSolarProductionForecastTypes(Enum): # It will still work if data is present but 0. # "expected_range" and "floor" only count production and storage # - not exchanges! - "AT": { - "required": ["hydro"], - }, - "BA": {"required": ["coal", "hydro", "wind"], "expected_range": (500, 6500)}, + "BA": {"expected_range": (500, 6500)}, "BE": { - "required": ["gas", "nuclear"], "expected_range": (3000, 25000), }, "BG": { - "required": ["coal", "nuclear", "hydro"], "expected_range": (2000, 20000), }, "CH": { - "required": ["hydro", "nuclear"], "expected_range": (2000, 25000), }, "CZ": { # usual load is in 7-12 GW range - "required": ["coal", "nuclear"], "expected_range": (3000, 25000), }, "DE": { @@ -390,73 +383,37 @@ class WindAndSolarProductionForecastTypes(Enum): # and when those are missing this can indicate that others are missing as well. # We have also never seen unknown being 0. # Usual load is in 30 to 80 GW range. - "required": [ - "coal", - "gas", - "wind", - "biomass", - "hydro", - "unknown", - "solar", - ], "expected_range": (20000, 100000), }, - "EE": { - "required": ["coal"], - }, "ES": { - "required": ["coal", "nuclear"], "expected_range": (10000, 80000), }, "FI": { - "required": ["coal", "nuclear", "hydro", "biomass"], "expected_range": (2000, 20000), }, "GB": { - # usual load is in 15 to 50 GW range - "required": ["coal", "gas", "nuclear"], "expected_range": (10000, 80000), }, "GR": { - "required": ["coal", "gas"], "expected_range": (2000, 20000), }, - "HR": { - "required": [ - "coal", - "gas", - "wind", - "biomass", - "oil", - "solar", - ], - }, - "HU": { - "required": ["coal", "nuclear"], - }, "IE": { - "required": ["coal"], "expected_range": (1000, 15000), }, "IT": { - "required": ["coal"], "expected_range": (5000, 50000), }, "PL": { - # usual load is in 10-20 GW range and coal is always present - "required": ["coal"], + # usual load is in 10-20 GW range "expected_range": (5000, 35000), }, "PT": { - "required": ["coal", "gas"], "expected_range": (1000, 20000), }, "RO": { - "required": ["coal", "nuclear", "hydro"], "expected_range": (2000, 25000), }, "RS": { - "required": ["biomass", "coal", "gas", "hydro", "unknown"], "expected_range": { "coal": ( 800, @@ -465,27 +422,10 @@ class WindAndSolarProductionForecastTypes(Enum): "hydro": (0, 5000), # 5 GW is double the production capacity of Serbia. }, }, - "SE": { - "required": ["hydro", "nuclear", "wind", "unknown"], - }, - "SE-SE1": { - "required": ["hydro", "wind", "unknown", "solar"], - }, - "SE-SE2": { - "required": ["gas", "hydro", "wind", "unknown", "solar"], - }, - "SE-SE3": { - "required": ["gas", "hydro", "nuclear", "wind", "unknown", "solar"], - }, - "SE-SE4": { - "required": ["gas", "hydro", "wind", "unknown", "solar"], - }, "SI": { # own total generation capacity is around 4 GW - "required": ["nuclear"], "expected_range": (140, 5000), }, - "SK": {"required": ["nuclear"]}, } @@ -1001,14 +941,6 @@ def validate_production( if validation_criteria: return validate(datapoint, logger=logger, **validation_criteria) - - # NOTE: Why are there sepcial checks for these zones? - if zone_key.startswith("DK-"): - return validate(datapoint, logger=logger, required=["coal", "solar", "wind"]) - - if zone_key.startswith("NO-"): - return validate(datapoint, logger=logger, required=["hydro"]) - return True diff --git a/parsers/FO.py b/parsers/FO.py index 7b20d68dd5..89f9f73910 100644 --- a/parsers/FO.py +++ b/parsers/FO.py @@ -26,7 +26,6 @@ class ValidationObject(TypedDict): - required: list[str] floor: int @@ -36,9 +35,9 @@ class ZoneData(TypedDict): ZONE_MAP: dict[VALID_ZONE_KEYS, ZoneData] = { - "FO": {"data_key": "Sev_E", "validation": {"required": ["hydro"], "floor": 10}}, - "FO-MI": {"data_key": "H_E", "validation": {"required": ["hydro"], "floor": 9}}, - "FO-SI": {"data_key": "S_E", "validation": {"required": ["hydro"], "floor": 1}}, + "FO": {"data_key": "Sev_E", "validation": {"floor": 10}}, + "FO-MI": {"data_key": "H_E", "validation": {"floor": 9}}, + "FO-SI": {"data_key": "S_E", "validation": {"floor": 1}}, } @@ -103,7 +102,6 @@ def fetch_production( data = validate( data, logger, - required=ZONE_MAP[zone_key]["validation"]["required"], floor=ZONE_MAP[zone_key]["validation"]["floor"], ) if isinstance(data, dict): diff --git a/parsers/TR.py b/parsers/TR.py index 19bbeed11b..2a29bbd63d 100755 --- a/parsers/TR.py +++ b/parsers/TR.py @@ -88,13 +88,10 @@ def validate_production_data( logger: Logger = getLogger(__name__), ) -> list: """detects outliers: for real-time data the latest data point can be completely out of the expected range and needs to be excluded""" - required = list(PRODUCTION_MAPPING) floor = ( 17000 # as seen during the Covid-19 pandemic, the minimum production was 17 GW ) - all_data_points_validated = [ - x for x in data if validate(x, logger, required=required, floor=floor) - ] + all_data_points_validated = [x for x in data if validate(x, logger, floor=floor)] if exclude_last_data_point: all_data_points_validated = all_data_points_validated[:-1] return all_data_points_validated diff --git a/parsers/archived/GB_NIR.py b/parsers/archived/GB_NIR.py index 4e400b36e2..4961b55b89 100644 --- a/parsers/archived/GB_NIR.py +++ b/parsers/archived/GB_NIR.py @@ -212,7 +212,7 @@ def fetch_production( "source": "soni.ltd.uk", } production_mix_by_quarter_hour.append( - validate(production_mix, logger=logger, required=["gas", "coal"], floor=1.0) + validate(production_mix, logger=logger, floor=1.0) ) return production_mix_by_quarter_hour diff --git a/parsers/lib/validation.py b/parsers/lib/validation.py index 538803e4b4..58fe309fb5 100644 --- a/parsers/lib/validation.py +++ b/parsers/lib/validation.py @@ -158,11 +158,6 @@ def validate(datapoint: dict, logger: Logger | None, **kwargs) -> dict[str, Any] remove_negative: bool Changes negative production values to None. Defaults to False. - required: list - Generation types that must be present. - For example ['gas', 'hydro'] - If any of these types are None the datapoint will be invalidated. - Defaults to an empty list. floor: float | int Checks production sum is above floor value. If this is not the case the datapoint is invalidated. @@ -203,11 +198,9 @@ def validate(datapoint: dict, logger: Logger | None, **kwargs) -> dict[str, Any] >>> }, >>> 'source': 'mysource.com' >>> } - >>> validate(datapoint, None, required=['gas'], expected_range=(100, 2000)) + >>> validate(datapoint, None, expected_range=(100, 2000)) datapoint - >>> validate(datapoint, None, required=['not_a_production_type']) - None - >>> validate(datapoint, None, required=['gas'], + >>> validate(datapoint, None, >>> expected_range={'solar': (0, 1000), 'wind': (100, 2000)}) datapoint """ @@ -215,7 +208,6 @@ def validate(datapoint: dict, logger: Logger | None, **kwargs) -> dict[str, Any] logger = getLogger(__name__) remove_negative: bool = kwargs.pop("remove_negative", False) - required: list[Any] = kwargs.pop("required", []) floor: float | int | None = kwargs.pop("floor", None) expected_range: tuple | dict | None = kwargs.pop("expected_range", None) fake_zeros: bool = kwargs.pop("fake_zeros", False) @@ -235,11 +227,6 @@ def validate(datapoint: dict, logger: Logger | None, **kwargs) -> dict[str, Any] ) generation[key] = None - if required: - for item in required: - if not has_value_for_key(datapoint, item, logger): - return - if floor: # when adding power to the system, storage key is negative total = sum(v for k, v in generation.items() if v is not None) - sum( diff --git a/parsers/test/test_entsoe_quality.py b/parsers/test/test_entsoe_quality.py index c9e1b9efde..5f23fe9df5 100644 --- a/parsers/test/test_entsoe_quality.py +++ b/parsers/test/test_entsoe_quality.py @@ -15,10 +15,6 @@ class ProductionTestCase(unittest.TestCase): test_logger = logging.getLogger() test_logger.setLevel(logging.ERROR) - def test_missing_required_biomass_in_DE(self): - validated = validate_production(p10, self.test_logger) # noqa: F405 - self.assertEqual(validated, None) - def test_production_too_low_in_PL(self): validated = validate_production(p11, self.test_logger) # noqa: F405 self.assertEqual(validated, None) @@ -27,10 +23,6 @@ def test_production_too_high_in_SI(self): validated = validate_production(p12, self.test_logger) # noqa: F405 self.assertEqual(validated, None) - def test_missing_solar_in_DK1(self): - validated = validate_production(p13, self.test_logger) # noqa: F405 - self.assertEqual(validated, None) - def test_valid_production_in_FI(self): validated = validate_production(p14, self.test_logger) # noqa: F405 if isinstance(validated, dict):