Skip to content

Commit

Permalink
refactor(pa): upgrade PA parser with event classes (#6610)
Browse files Browse the repository at this point in the history
* refactor(pa): upgrade PA parser with event classes

* refactor(pa): drop usage of arrow

* refactor: tests to use snapshots
  • Loading branch information
amv213 committed Apr 2, 2024
1 parent 4559ba4 commit 41d3c8a
Show file tree
Hide file tree
Showing 3 changed files with 207 additions and 169 deletions.
283 changes: 157 additions & 126 deletions parsers/PA.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,34 @@
#!/usr/bin/env python3

import json
import re
from datetime import datetime
from logging import Logger, getLogger
from zoneinfo import ZoneInfo

import arrow
import pandas as pd
from bs4 import BeautifulSoup
from requests import Session

from electricitymap.contrib.lib.models.event_lists import (
ExchangeList,
ProductionBreakdownList,
TotalConsumptionList,
)
from electricitymap.contrib.lib.models.events import ProductionMix
from electricitymap.contrib.lib.types import ZoneKey
from parsers.lib.exceptions import ParserException

PARSER = "PA.py"
TIMEZONE = ZoneInfo("America/Panama")
ZONE_KEY = ZoneKey("PA")

EXCHANGE_URL = "https://sitr.cnd.com.pa/m/pub/int.html"
CONSUMPTION_URL = "https://sitr.cnd.com.pa/m/pub/sin.html"
CONSUMPTION_SOURCE = "sitr.cnd.com.pa"

EXCHANGE_URL = "https://sitr.cnd.com.pa/m/pub/int.html"
EXCHANGE_SOURCE = "sitr.cnd.com.pa"

PRODUCTION_URL = "https://sitr.cnd.com.pa/m/pub/gen.html"
PRODUCTION_SOURCE = "sitr.cnd.com.pa"

# Sources:
# 1. https://www.celsia.com/Portals/0/contenidos-celsia/accionistas-e-inversionistas/perfil-corporativo-US/presentaciones-US/2014/presentacion-morgan-ingles-v2.pdf
Expand Down Expand Up @@ -97,33 +110,67 @@
"Tropitérmica 3": "oil", # [6]:162[7] spelled "Tropitermica" in both
}

PRODUCTION_TYPE_TO_PRODUCTION_MODE = {
"Hídrica": "hydro",
"Eólica": "wind",
"Solar": "solar",
"Biogás": "biomass",
"Térmica": "unknown",
}

_SPANISH_CALENDAR = {
"enero": "01",
"febrero": "02",
"marzo": "03",
"abril": "04",
"mayo": "05",
"junio": "06",
"julio": "07",
"agosto": "08",
"septiembre": "09",
"octubre": "10",
"noviembre": "11",
"diciembre": "12",
}


def _localise_spanish_date(date: str) -> str:
"""Localises a date containing full name (lowercase) spanish months, replacing them with zero-padded decimal numbers.
This avoids having to mess up the global locale to be able to parse the date.
"""
return re.sub(
"|".join(_SPANISH_CALENDAR.keys()),
lambda m: _SPANISH_CALENDAR[m.group(0)],
date,
)


def extract_pie_chart_data(html):
"""Extracts generation breakdown pie chart data from the source code of the page"""
data_source = re.search(r"var localPie = (\[\{.+\}\]);", html).group(
1
) # Extract object with data
data_source = re.sub(
r"(name|value|color)", r'"\1"', data_source
) # Un-quoted keys ({key:"value"}) are valid JavaScript but not valid JSON (which requires {"key":"value"}). Will break if other keys than these three are introduced. Alternatively, use a JSON5 library (JSON5 allows un-quoted keys)
# Extract object with data
data_source = re.search(r"var localPie = (\[\{.+\}\]);", html).group(1)
# Un-quoted keys ({key:"value"}) are valid JavaScript but not valid JSON (which requires {"key":"value"}).
# Will break if other keys than these three are introduced.
# Alternatively, use a JSON5 library (JSON5 allows un-quoted keys)
data_source = re.sub(r"(name|value|color)", r'"\1"', data_source)
return json.loads(data_source)


def sum_thermal_units(soup) -> float:
"""
Sums thermal units of the generation mix to prevent using slightly outdated chart data.
"""Sums thermal units of the generation mix to prevent using slightly outdated chart data.
Thermal total from the graph and the total one would get from summing output of all generators deviates a bit,
presumably because they aren't updated at the exact same moment.
"""

# Sum thermal units from table Térmicas (MW)
thermal_h3 = soup.find("h3", string=re.compile(r"\s*Térmicas\s*"))
thermal_tables = thermal_h3.find_next_sibling().find_all(
"table", {"class": "table table-hover table-striped table-sm sitr-gen-group"}
)

thermal_units = 0

for thermal_table in thermal_tables:
thermal_units += sum(
[
Expand All @@ -142,65 +189,50 @@ def sum_thermal_units(soup) -> float:


def fetch_production(
zone_key: str = "PA",
zone_key: ZoneKey = ZONE_KEY,
session: Session | None = None,
target_datetime: datetime | None = None,
logger: Logger = getLogger(__name__),
) -> dict:
) -> list[dict]:
"""Requests the last known production mix (in MW) of a given country."""
if target_datetime:
raise NotImplementedError("This parser is not yet able to parse past dates")

if target_datetime is not None:
raise ParserException(
PARSER, "This parser is not yet able to parse historical data", zone_key
)

# Fetch page and load into BeautifulSoup
r = session or Session()
url = PRODUCTION_URL
response = r.get(url)
response = r.get(PRODUCTION_URL)
if not response.ok:
raise ParserException(
PARSER,
f"Exception when fetching production error code: {response.status_code}: {response.text}",
zone_key,
)

response.encoding = "utf-8"
html_doc = response.text
soup = BeautifulSoup(html_doc, "html.parser")

# Parse production from pie chart
productions = extract_pie_chart_data(
html_doc
) # [{name:"Hídrica 1342.54 (80.14%)",value:1342.54,color:"#99ccee"}, ...]
# Parse the datetime and return a python datetime object
spanish_date = soup.find("h3", {"class": "sitr-update"}).string
english_date = _localise_spanish_date(spanish_date)
date = datetime.strptime(english_date, "%d-%m-%Y %H:%M:%S").replace(tzinfo=TIMEZONE)

# Sum thermal units from table Térmicas (MW)
thermal_sum = sum_thermal_units(soup)

map_generation = {
"Hídrica": "hydro",
"Eólica": "wind",
"Solar": "solar",
"Biogás": "biomass",
"Térmica": "unknown",
}
data = {
"zoneKey": "PA",
"production": {
# Setting default values here so we can do += when parsing the thermal generation breakdown
"biomass": 0.0,
"coal": 0.0,
"gas": 0.0,
"hydro": 0.0,
"nuclear": 0.0,
"oil": 0.0,
"solar": 0.0,
"wind": 0.0,
"geothermal": 0.0,
"unknown": 0.0,
},
"storage": {},
"source": "https://www.cnd.com.pa/",
}
production_mix = ProductionMix()
productions = extract_pie_chart_data(html_doc)
# [{name:"Hídrica 1342.54 (80.14%)",value:1342.54,color:"#99ccee"}, ...]
for prod in productions: # {name:"Hídrica 1342.54 (80.14%)", ...}
prod_data = prod["name"].split(" ") # "Hídrica 1342.54 (80.14%)"
production_type = map_generation[prod_data[0]] # Hídrica
production_value = float(prod_data[1]) # 1342.54
data["production"][production_type] = production_value

# Replacing chart termica data with manually calculated thermal generation to avoid using outdated chart data
data["production"]["unknown"] = thermal_sum

production_type, production_value, _percentage = prod["name"].split(" ", 2)
# ignore termica data to avoid using outdated chart data
if production_type == "Térmica":
continue
production_mode = PRODUCTION_TYPE_TO_PRODUCTION_MODE[production_type] # hydro
production_mix.add_value(production_mode, float(production_value))

# Calculate manually thermal generation to avoid using outdated chart data
thermal_generation = sum_thermal_units(soup)
# Known fossil plants: parse, subtract from "unknown", add to "coal"/"oil"/"gas"
thermal_production_breakdown = soup.find_all("table", {"class": "sitr-table-gen"})[
1
Expand Down Expand Up @@ -228,58 +260,58 @@ def fetch_production(
unit_fuel_type = MAP_THERMAL_GENERATION_UNIT_NAME_TO_FUEL_TYPE[
unit_name
]
data["production"][unit_fuel_type] += unit_generation
data["production"]["unknown"] -= unit_generation
production_mix.add_value(unit_fuel_type, unit_generation)
thermal_generation -= unit_generation
else:
logger.warning(
f"{unit_name} is not mapped to generation type",
extra={"key": zone_key},
)

if 0 > data["production"]["unknown"] > -10:
logger.info(
f"Ignoring small amount of negative thermal generation ({data['production']['unknown']}MW)",
extra={"key": zone_key},
)
data["production"]["unknown"] = 0.0

# Round remaining "unknown" output to 13 decimal places to get rid of floating point errors
data["production"]["unknown"] = round(data["production"]["unknown"], 13)

if 0 < data["production"]["unknown"] < 1e-3:
data["production"]["unknown"] = 0.0

# Parse the datetime and return a python datetime object
spanish_date = soup.find("h3", {"class": "sitr-update"}).string
date = arrow.get(
spanish_date, "DD-MMMM-YYYY H:mm:ss", locale="es", tzinfo="America/Panama"
thermal_generation = round(thermal_generation, 13)
if 0 < thermal_generation < 1e-3:
thermal_generation = 0.0
# assign anything remaining to 'unknown'
production_mix.add_value("unknown", max(thermal_generation, 0.0))

production_breakdown_list = ProductionBreakdownList(logger)
production_breakdown_list.append(
zoneKey=zone_key,
datetime=date,
source=PRODUCTION_SOURCE,
production=production_mix,
)
data["datetime"] = date.datetime

return data
return production_breakdown_list.to_list()


def fetch_exchange(
zone_key1: str = "CR",
zone_key2: str = "PA",
zone_key1: ZoneKey = ZONE_KEY,
zone_key2: ZoneKey = ZoneKey("CR"),
session: Session | None = None,
target_datetime: datetime | None = None,
logger: Logger = getLogger(__name__),
) -> dict:
"""
Requests the last known power exchange (in MW) between two countries.
"""
) -> list[dict]:
"""Requests the last known power exchange (in MW) between two countries."""

if target_datetime:
raise NotImplementedError("This parser is not yet able to parse past dates")
sorted_zone_keys = ZoneKey("->".join(sorted([zone_key1, zone_key2])))

sorted_zone_keys = "->".join(sorted([zone_key1, zone_key2]))

r = session or Session()
url = EXCHANGE_URL
if target_datetime is not None:
raise ParserException(
PARSER,
"This parser is not yet able to parse historical data",
sorted_zone_keys,
)

response = r.get(url)
assert response.status_code == 200
session = session or Session()
timestamp = datetime.now(tz=TIMEZONE)
response = session.get(EXCHANGE_URL)
if not response.ok:
raise ParserException(
PARSER,
f"Exception when fetching production error code: {response.status_code}: {response.text}",
sorted_zone_keys,
)

df = pd.read_html(response.text)[0]

Expand Down Expand Up @@ -320,52 +352,51 @@ def fetch_exchange(
"PA->SV": net_flow_sv, # Panama to El Salvador
}

if sorted_zone_keys not in net_flows:
raise NotImplementedError(
f"This exchange pair is not implemented: {sorted_zone_keys}"
)

data = {
"datetime": datetime.now(tz=TIMEZONE),
"netFlow": net_flows[sorted_zone_keys],
"sortedZoneKeys": sorted_zone_keys,
"source": url,
}

return data
exchange_list = ExchangeList(logger)
exchange_list.append(
zoneKey=sorted_zone_keys,
datetime=timestamp,
netFlow=net_flows[sorted_zone_keys],
source=EXCHANGE_SOURCE,
)
return exchange_list.to_list()


def fetch_consumption(
zone_key: str = "PA",
zone_key: ZoneKey = ZONE_KEY,
session: Session | None = None,
target_datetime: datetime | None = None,
logger: Logger = getLogger(__name__),
) -> dict:
"""
Fetches consumption of Panama.
"""
) -> list[dict]:
"""Fetches consumption of Panama."""

if target_datetime:
raise NotImplementedError("This parser is not yet able to parse past dates")
if target_datetime is not None:
raise ParserException(
PARSER, "This parser is not yet able to parse historical data", zone_key
)

r = session or Session()
url = CONSUMPTION_URL

response = r.get(url)
assert response.status_code == 200
timestamp = datetime.now(tz=TIMEZONE)
response = r.get(CONSUMPTION_URL)
if not response.ok:
raise ParserException(
PARSER,
f"Exception when fetching production error code: {response.status_code}: {response.text}",
zone_key,
)

soup = BeautifulSoup(response.text, "html.parser")
consumption_title = soup.find("h5", string=re.compile(r"\s*Demanda Total\s*"))
consumption_val = float(consumption_title.find_next_sibling().text.split()[0])

data = {
"consumption": consumption_val,
"datetime": datetime.now(tz=TIMEZONE),
"source": url,
"zoneKey": zone_key,
}

return data
consumption_list = TotalConsumptionList(logger)
consumption_list.append(
zoneKey=zone_key,
datetime=timestamp,
consumption=consumption_val,
source=PRODUCTION_SOURCE,
)
return consumption_list.to_list()


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 41d3c8a

Please sign in to comment.