Skip to content

Commit

Permalink
remove required (#6692)
Browse files Browse the repository at this point in the history
  • Loading branch information
unitrium committed Apr 23, 2024
1 parent d555e59 commit 46105da
Show file tree
Hide file tree
Showing 7 changed files with 10 additions and 109 deletions.
7 changes: 1 addition & 6 deletions parsers/ELEXON.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,19 +514,14 @@ def fetch_production(
else:
entry["production"]["wind"] = None

required = ["coal", "gas", "nuclear", "wind"]
expected_range = {
# Historical data might be above the current capacity for coal
"coal": (0, 20000),
"gas": (100, 60000),
"nuclear": (100, 56000),
"wind": (0, 600000),
}
data = [
x
for x in data
if validate(x, logger, required=required, expected_range=expected_range)
]
data = [x for x in data if validate(x, logger, expected_range=expected_range)]

return data

Expand Down
72 changes: 2 additions & 70 deletions parsers/ENTSOE.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,25 +363,18 @@ class WindAndSolarProductionForecastTypes(Enum):
# It will still work if data is present but 0.
# "expected_range" and "floor" only count production and storage
# - not exchanges!
"AT": {
"required": ["hydro"],
},
"BA": {"required": ["coal", "hydro", "wind"], "expected_range": (500, 6500)},
"BA": {"expected_range": (500, 6500)},
"BE": {
"required": ["gas", "nuclear"],
"expected_range": (3000, 25000),
},
"BG": {
"required": ["coal", "nuclear", "hydro"],
"expected_range": (2000, 20000),
},
"CH": {
"required": ["hydro", "nuclear"],
"expected_range": (2000, 25000),
},
"CZ": {
# usual load is in 7-12 GW range
"required": ["coal", "nuclear"],
"expected_range": (3000, 25000),
},
"DE": {
Expand All @@ -390,73 +383,37 @@ class WindAndSolarProductionForecastTypes(Enum):
# and when those are missing this can indicate that others are missing as well.
# We have also never seen unknown being 0.
# Usual load is in 30 to 80 GW range.
"required": [
"coal",
"gas",
"wind",
"biomass",
"hydro",
"unknown",
"solar",
],
"expected_range": (20000, 100000),
},
"EE": {
"required": ["coal"],
},
"ES": {
"required": ["coal", "nuclear"],
"expected_range": (10000, 80000),
},
"FI": {
"required": ["coal", "nuclear", "hydro", "biomass"],
"expected_range": (2000, 20000),
},
"GB": {
# usual load is in 15 to 50 GW range
"required": ["coal", "gas", "nuclear"],
"expected_range": (10000, 80000),
},
"GR": {
"required": ["coal", "gas"],
"expected_range": (2000, 20000),
},
"HR": {
"required": [
"coal",
"gas",
"wind",
"biomass",
"oil",
"solar",
],
},
"HU": {
"required": ["coal", "nuclear"],
},
"IE": {
"required": ["coal"],
"expected_range": (1000, 15000),
},
"IT": {
"required": ["coal"],
"expected_range": (5000, 50000),
},
"PL": {
# usual load is in 10-20 GW range and coal is always present
"required": ["coal"],
# usual load is in 10-20 GW range
"expected_range": (5000, 35000),
},
"PT": {
"required": ["coal", "gas"],
"expected_range": (1000, 20000),
},
"RO": {
"required": ["coal", "nuclear", "hydro"],
"expected_range": (2000, 25000),
},
"RS": {
"required": ["biomass", "coal", "gas", "hydro", "unknown"],
"expected_range": {
"coal": (
800,
Expand All @@ -465,27 +422,10 @@ class WindAndSolarProductionForecastTypes(Enum):
"hydro": (0, 5000), # 5 GW is double the production capacity of Serbia.
},
},
"SE": {
"required": ["hydro", "nuclear", "wind", "unknown"],
},
"SE-SE1": {
"required": ["hydro", "wind", "unknown", "solar"],
},
"SE-SE2": {
"required": ["gas", "hydro", "wind", "unknown", "solar"],
},
"SE-SE3": {
"required": ["gas", "hydro", "nuclear", "wind", "unknown", "solar"],
},
"SE-SE4": {
"required": ["gas", "hydro", "wind", "unknown", "solar"],
},
"SI": {
# own total generation capacity is around 4 GW
"required": ["nuclear"],
"expected_range": (140, 5000),
},
"SK": {"required": ["nuclear"]},
}


Expand Down Expand Up @@ -1001,14 +941,6 @@ def validate_production(

if validation_criteria:
return validate(datapoint, logger=logger, **validation_criteria)

# NOTE: Why are there sepcial checks for these zones?
if zone_key.startswith("DK-"):
return validate(datapoint, logger=logger, required=["coal", "solar", "wind"])

if zone_key.startswith("NO-"):
return validate(datapoint, logger=logger, required=["hydro"])

return True


Expand Down
8 changes: 3 additions & 5 deletions parsers/FO.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@


class ValidationObject(TypedDict):
required: list[str]
floor: int


Expand All @@ -36,9 +35,9 @@ class ZoneData(TypedDict):


ZONE_MAP: dict[VALID_ZONE_KEYS, ZoneData] = {
"FO": {"data_key": "Sev_E", "validation": {"required": ["hydro"], "floor": 10}},
"FO-MI": {"data_key": "H_E", "validation": {"required": ["hydro"], "floor": 9}},
"FO-SI": {"data_key": "S_E", "validation": {"required": ["hydro"], "floor": 1}},
"FO": {"data_key": "Sev_E", "validation": {"floor": 10}},
"FO-MI": {"data_key": "H_E", "validation": {"floor": 9}},
"FO-SI": {"data_key": "S_E", "validation": {"floor": 1}},
}


Expand Down Expand Up @@ -103,7 +102,6 @@ def fetch_production(
data = validate(
data,
logger,
required=ZONE_MAP[zone_key]["validation"]["required"],
floor=ZONE_MAP[zone_key]["validation"]["floor"],
)
if isinstance(data, dict):
Expand Down
5 changes: 1 addition & 4 deletions parsers/TR.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,10 @@ def validate_production_data(
logger: Logger = getLogger(__name__),
) -> list:
"""detects outliers: for real-time data the latest data point can be completely out of the expected range and needs to be excluded"""
required = list(PRODUCTION_MAPPING)
floor = (
17000 # as seen during the Covid-19 pandemic, the minimum production was 17 GW
)
all_data_points_validated = [
x for x in data if validate(x, logger, required=required, floor=floor)
]
all_data_points_validated = [x for x in data if validate(x, logger, floor=floor)]
if exclude_last_data_point:
all_data_points_validated = all_data_points_validated[:-1]
return all_data_points_validated
Expand Down
2 changes: 1 addition & 1 deletion parsers/archived/GB_NIR.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def fetch_production(
"source": "soni.ltd.uk",
}
production_mix_by_quarter_hour.append(
validate(production_mix, logger=logger, required=["gas", "coal"], floor=1.0)
validate(production_mix, logger=logger, floor=1.0)
)

return production_mix_by_quarter_hour
Expand Down
17 changes: 2 additions & 15 deletions parsers/lib/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,11 +158,6 @@ def validate(datapoint: dict, logger: Logger | None, **kwargs) -> dict[str, Any]
remove_negative: bool
Changes negative production values to None.
Defaults to False.
required: list
Generation types that must be present.
For example ['gas', 'hydro']
If any of these types are None the datapoint will be invalidated.
Defaults to an empty list.
floor: float | int
Checks production sum is above floor value.
If this is not the case the datapoint is invalidated.
Expand Down Expand Up @@ -203,19 +198,16 @@ def validate(datapoint: dict, logger: Logger | None, **kwargs) -> dict[str, Any]
>>> },
>>> 'source': 'mysource.com'
>>> }
>>> validate(datapoint, None, required=['gas'], expected_range=(100, 2000))
>>> validate(datapoint, None, expected_range=(100, 2000))
datapoint
>>> validate(datapoint, None, required=['not_a_production_type'])
None
>>> validate(datapoint, None, required=['gas'],
>>> validate(datapoint, None,
>>> expected_range={'solar': (0, 1000), 'wind': (100, 2000)})
datapoint
"""
if logger is None:
logger = getLogger(__name__)

remove_negative: bool = kwargs.pop("remove_negative", False)
required: list[Any] = kwargs.pop("required", [])
floor: float | int | None = kwargs.pop("floor", None)
expected_range: tuple | dict | None = kwargs.pop("expected_range", None)
fake_zeros: bool = kwargs.pop("fake_zeros", False)
Expand All @@ -235,11 +227,6 @@ def validate(datapoint: dict, logger: Logger | None, **kwargs) -> dict[str, Any]
)
generation[key] = None

if required:
for item in required:
if not has_value_for_key(datapoint, item, logger):
return

if floor:
# when adding power to the system, storage key is negative
total = sum(v for k, v in generation.items() if v is not None) - sum(
Expand Down
8 changes: 0 additions & 8 deletions parsers/test/test_entsoe_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,6 @@ class ProductionTestCase(unittest.TestCase):
test_logger = logging.getLogger()
test_logger.setLevel(logging.ERROR)

def test_missing_required_biomass_in_DE(self):
validated = validate_production(p10, self.test_logger) # noqa: F405
self.assertEqual(validated, None)

def test_production_too_low_in_PL(self):
validated = validate_production(p11, self.test_logger) # noqa: F405
self.assertEqual(validated, None)
Expand All @@ -27,10 +23,6 @@ def test_production_too_high_in_SI(self):
validated = validate_production(p12, self.test_logger) # noqa: F405
self.assertEqual(validated, None)

def test_missing_solar_in_DK1(self):
validated = validate_production(p13, self.test_logger) # noqa: F405
self.assertEqual(validated, None)

def test_valid_production_in_FI(self):
validated = validate_production(p14, self.test_logger) # noqa: F405
if isinstance(validated, dict):
Expand Down

0 comments on commit 46105da

Please sign in to comment.