Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(parser validation): Remove required mode rule #6692

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions parsers/ELEXON.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,19 +514,14 @@ def fetch_production(
else:
entry["production"]["wind"] = None

required = ["coal", "gas", "nuclear", "wind"]
expected_range = {
# Historical data might be above the current capacity for coal
"coal": (0, 20000),
"gas": (100, 60000),
"nuclear": (100, 56000),
"wind": (0, 600000),
}
data = [
x
for x in data
if validate(x, logger, required=required, expected_range=expected_range)
]
data = [x for x in data if validate(x, logger, expected_range=expected_range)]

return data

Expand Down
72 changes: 2 additions & 70 deletions parsers/ENTSOE.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,25 +363,18 @@ class WindAndSolarProductionForecastTypes(Enum):
# It will still work if data is present but 0.
# "expected_range" and "floor" only count production and storage
# - not exchanges!
"AT": {
"required": ["hydro"],
},
"BA": {"required": ["coal", "hydro", "wind"], "expected_range": (500, 6500)},
"BA": {"expected_range": (500, 6500)},
"BE": {
"required": ["gas", "nuclear"],
"expected_range": (3000, 25000),
},
"BG": {
"required": ["coal", "nuclear", "hydro"],
"expected_range": (2000, 20000),
},
"CH": {
"required": ["hydro", "nuclear"],
"expected_range": (2000, 25000),
},
"CZ": {
# usual load is in 7-12 GW range
"required": ["coal", "nuclear"],
"expected_range": (3000, 25000),
},
"DE": {
Expand All @@ -390,73 +383,37 @@ class WindAndSolarProductionForecastTypes(Enum):
# and when those are missing this can indicate that others are missing as well.
# We have also never seen unknown being 0.
# Usual load is in 30 to 80 GW range.
"required": [
"coal",
"gas",
"wind",
"biomass",
"hydro",
"unknown",
"solar",
],
"expected_range": (20000, 100000),
},
"EE": {
"required": ["coal"],
},
"ES": {
"required": ["coal", "nuclear"],
"expected_range": (10000, 80000),
},
"FI": {
"required": ["coal", "nuclear", "hydro", "biomass"],
"expected_range": (2000, 20000),
},
"GB": {
# usual load is in 15 to 50 GW range
"required": ["coal", "gas", "nuclear"],
"expected_range": (10000, 80000),
},
"GR": {
"required": ["coal", "gas"],
"expected_range": (2000, 20000),
},
"HR": {
"required": [
"coal",
"gas",
"wind",
"biomass",
"oil",
"solar",
],
},
"HU": {
"required": ["coal", "nuclear"],
},
"IE": {
"required": ["coal"],
"expected_range": (1000, 15000),
},
"IT": {
"required": ["coal"],
"expected_range": (5000, 50000),
},
"PL": {
# usual load is in 10-20 GW range and coal is always present
"required": ["coal"],
# usual load is in 10-20 GW range
"expected_range": (5000, 35000),
},
"PT": {
"required": ["coal", "gas"],
"expected_range": (1000, 20000),
},
"RO": {
"required": ["coal", "nuclear", "hydro"],
"expected_range": (2000, 25000),
},
"RS": {
"required": ["biomass", "coal", "gas", "hydro", "unknown"],
"expected_range": {
"coal": (
800,
Expand All @@ -465,27 +422,10 @@ class WindAndSolarProductionForecastTypes(Enum):
"hydro": (0, 5000), # 5 GW is double the production capacity of Serbia.
},
},
"SE": {
"required": ["hydro", "nuclear", "wind", "unknown"],
},
"SE-SE1": {
"required": ["hydro", "wind", "unknown", "solar"],
},
"SE-SE2": {
"required": ["gas", "hydro", "wind", "unknown", "solar"],
},
"SE-SE3": {
"required": ["gas", "hydro", "nuclear", "wind", "unknown", "solar"],
},
"SE-SE4": {
"required": ["gas", "hydro", "wind", "unknown", "solar"],
},
"SI": {
# own total generation capacity is around 4 GW
"required": ["nuclear"],
"expected_range": (140, 5000),
},
"SK": {"required": ["nuclear"]},
}


Expand Down Expand Up @@ -1001,14 +941,6 @@ def validate_production(

if validation_criteria:
return validate(datapoint, logger=logger, **validation_criteria)

# NOTE: Why are there sepcial checks for these zones?
if zone_key.startswith("DK-"):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aren't we removing all validation for these zones?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They don't have any other validation going on so it shouldn't be a problem.

return validate(datapoint, logger=logger, required=["coal", "solar", "wind"])

if zone_key.startswith("NO-"):
return validate(datapoint, logger=logger, required=["hydro"])

return True


Expand Down
8 changes: 3 additions & 5 deletions parsers/FO.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@


class ValidationObject(TypedDict):
required: list[str]
floor: int


Expand All @@ -36,9 +35,9 @@ class ZoneData(TypedDict):


ZONE_MAP: dict[VALID_ZONE_KEYS, ZoneData] = {
"FO": {"data_key": "Sev_E", "validation": {"required": ["hydro"], "floor": 10}},
"FO-MI": {"data_key": "H_E", "validation": {"required": ["hydro"], "floor": 9}},
"FO-SI": {"data_key": "S_E", "validation": {"required": ["hydro"], "floor": 1}},
"FO": {"data_key": "Sev_E", "validation": {"floor": 10}},
"FO-MI": {"data_key": "H_E", "validation": {"floor": 9}},
"FO-SI": {"data_key": "S_E", "validation": {"floor": 1}},
}


Expand Down Expand Up @@ -103,7 +102,6 @@ def fetch_production(
data = validate(
data,
logger,
required=ZONE_MAP[zone_key]["validation"]["required"],
floor=ZONE_MAP[zone_key]["validation"]["floor"],
)
if isinstance(data, dict):
Expand Down
5 changes: 1 addition & 4 deletions parsers/TR.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,10 @@ def validate_production_data(
logger: Logger = getLogger(__name__),
) -> list:
"""detects outliers: for real-time data the latest data point can be completely out of the expected range and needs to be excluded"""
required = list(PRODUCTION_MAPPING)
floor = (
17000 # as seen during the Covid-19 pandemic, the minimum production was 17 GW
)
all_data_points_validated = [
x for x in data if validate(x, logger, required=required, floor=floor)
]
all_data_points_validated = [x for x in data if validate(x, logger, floor=floor)]
if exclude_last_data_point:
all_data_points_validated = all_data_points_validated[:-1]
return all_data_points_validated
Expand Down
2 changes: 1 addition & 1 deletion parsers/archived/GB_NIR.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def fetch_production(
"source": "soni.ltd.uk",
}
production_mix_by_quarter_hour.append(
validate(production_mix, logger=logger, required=["gas", "coal"], floor=1.0)
validate(production_mix, logger=logger, floor=1.0)
)

return production_mix_by_quarter_hour
Expand Down
17 changes: 2 additions & 15 deletions parsers/lib/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,11 +158,6 @@ def validate(datapoint: dict, logger: Logger | None, **kwargs) -> dict[str, Any]
remove_negative: bool
Changes negative production values to None.
Defaults to False.
required: list
Generation types that must be present.
For example ['gas', 'hydro']
If any of these types are None the datapoint will be invalidated.
Defaults to an empty list.
floor: float | int
Checks production sum is above floor value.
If this is not the case the datapoint is invalidated.
Expand Down Expand Up @@ -203,19 +198,16 @@ def validate(datapoint: dict, logger: Logger | None, **kwargs) -> dict[str, Any]
>>> },
>>> 'source': 'mysource.com'
>>> }
>>> validate(datapoint, None, required=['gas'], expected_range=(100, 2000))
>>> validate(datapoint, None, expected_range=(100, 2000))
datapoint
>>> validate(datapoint, None, required=['not_a_production_type'])
None
>>> validate(datapoint, None, required=['gas'],
>>> validate(datapoint, None,
>>> expected_range={'solar': (0, 1000), 'wind': (100, 2000)})
datapoint
"""
if logger is None:
logger = getLogger(__name__)

remove_negative: bool = kwargs.pop("remove_negative", False)
required: list[Any] = kwargs.pop("required", [])
floor: float | int | None = kwargs.pop("floor", None)
expected_range: tuple | dict | None = kwargs.pop("expected_range", None)
fake_zeros: bool = kwargs.pop("fake_zeros", False)
Expand All @@ -235,11 +227,6 @@ def validate(datapoint: dict, logger: Logger | None, **kwargs) -> dict[str, Any]
)
generation[key] = None

if required:
for item in required:
if not has_value_for_key(datapoint, item, logger):
return

if floor:
# when adding power to the system, storage key is negative
total = sum(v for k, v in generation.items() if v is not None) - sum(
Expand Down
8 changes: 0 additions & 8 deletions parsers/test/test_entsoe_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,6 @@ class ProductionTestCase(unittest.TestCase):
test_logger = logging.getLogger()
test_logger.setLevel(logging.ERROR)

def test_missing_required_biomass_in_DE(self):
validated = validate_production(p10, self.test_logger) # noqa: F405
self.assertEqual(validated, None)

def test_production_too_low_in_PL(self):
validated = validate_production(p11, self.test_logger) # noqa: F405
self.assertEqual(validated, None)
Expand All @@ -27,10 +23,6 @@ def test_production_too_high_in_SI(self):
validated = validate_production(p12, self.test_logger) # noqa: F405
self.assertEqual(validated, None)

def test_missing_solar_in_DK1(self):
validated = validate_production(p13, self.test_logger) # noqa: F405
self.assertEqual(validated, None)

def test_valid_production_in_FI(self):
validated = validate_production(p14, self.test_logger) # noqa: F405
if isinstance(validated, dict):
Expand Down
Loading