Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: upgrade CA-NS with event classes #6050

Merged
merged 15 commits into from
Feb 10, 2024
Merged
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
291 changes: 130 additions & 161 deletions parsers/CA_NS.py
Original file line number Diff line number Diff line change
@@ -1,213 +1,182 @@
#!/usr/bin/env python3

# The datetime library is used to handle datetimes
from datetime import datetime, timezone
from logging import Logger, getLogger
from typing import Any

from requests import Session

from electricitymap.contrib.config import ZoneKey
from electricitymap.contrib.lib.models.event_lists import (
ExchangeList,
ProductionBreakdownList,
)
from electricitymap.contrib.lib.models.events import ProductionMix
from parsers.lib.exceptions import ParserException

def _get_ns_info(requests_obj, logger: Logger):
zone_key = "CA-NS"

# This is based on validation logic in https://www.nspower.ca/site/renewables/assets/js/site.js
# In practical terms, I've seen hydro production go way too high (>70%) which is way more
# than reported capacity.
valid_percent = {
# The validation JS reports error when Solid Fuel (coal) is over 85%,
# but as far as I can tell, that can actually be a valid result, I've seen it a few times.
# Use 98% instead.
"coal": (0, 0.98),
"gas": (0, 0.5),
"biomass": (0, 0.15),
"hydro": (0, 0.60),
"wind": (0, 0.55),
"imports": (0, 0.50),
}
LOAD_URL = "https://www.nspower.ca/library/CurrentLoad/CurrentLoad.json"
MIX_URL = "https://www.nspower.ca/library/CurrentLoad/CurrentMix.json"
PARSER = "CA_NS.py"
SOURCE = "nspower.ca"
ZONE_KEY = ZoneKey("CA-NS")

# Sanity checks: verify that reported production doesn't exceed listed capacity by a lot.
# In particular, we've seen error cases where hydro production ends up calculated as 900 MW
# which greatly exceeds known capacity of 418 MW.
valid_absolute = {
"coal": 1300,
"gas": 700,
"biomass": 100,
"hydro": 500,
"wind": 700,
}

mix_url = "https://www.nspower.ca/library/CurrentLoad/CurrentMix.json"
mix_data = requests_obj.get(mix_url).json()

load_url = "https://www.nspower.ca/library/CurrentLoad/CurrentLoad.json"
load_data = requests_obj.get(load_url).json()

production = []
imports = []
for mix in mix_data:
percent_mix = {
"coal": mix["Solid Fuel"] / 100.0,
"gas": (mix["HFO/Natural Gas"] + mix["CT's"] + mix["LM 6000's"]) / 100.0,
"biomass": mix["Biomass"] / 100.0,
"hydro": mix["Hydro"] / 100.0,
"wind": mix["Wind"] / 100.0,
"imports": mix["Imports"] / 100.0,
}

# datetime is in format '/Date(1493924400000)/'
# get the timestamp 1493924400 (cutting out last three zeros as well)
data_timestamp = int(mix["datetime"][6:-5])
data_date = datetime.fromtimestamp(data_timestamp, tz=timezone.utc)

# validate
valid = True
for gen_type, value in percent_mix.items():
percent_bounds = valid_percent[gen_type]
if not (percent_bounds[0] <= value <= percent_bounds[1]):
# skip this datapoint in the loop
valid = False
logger.warning(
"discarding datapoint at {dt} due to {fuel} percentage "
"out of bounds: {value}".format(
dt=data_date, fuel=gen_type, value=value
),
extra={"key": zone_key},
)
if not valid:
# continue the outer loop, not the inner
def _parse_timestamp(timestamp: str) -> datetime:
"""
Construct a datetime object from a date string formatted as, e.g.,
"/Date(1493924400000)/" by extracting the Unix timestamp 1493924400. Note
that the three trailing zeros are cut out as well).
"""
return datetime.fromtimestamp(int(timestamp[6:-5]), tz=timezone.utc)


def _get_ns_info(
session: Session, logger: Logger
) -> tuple[ExchangeList, ProductionBreakdownList]:
# Request data from the source. Skip the first element of each JSON array
# because the reported base load is always 0 MW.
loads = { # Lookup table mapping timestamps to base loads (in MW)
_parse_timestamp(load["datetime"]): load["Base Load"]
for load in session.get(LOAD_URL).json()[1:]
}
mixes = session.get(MIX_URL).json()[1:] # Electricity mix breakdowns in %

exchanges = ExchangeList(logger)
production_breakdowns = ProductionBreakdownList(logger)
for mix in mixes:
timestamp = _parse_timestamp(mix["datetime"])

# Ensure the provided percentages are within bounds, similarly to the
# logic in https://www.nspower.ca/site/renewables/assets/js/site.js. In
# practical terms, I've seen hydro production go higher than 70%, which
# is way more than reported capacity.
if (
15 < mix["Biomass"]
or 60 < mix["Hydro"]
or 50 < mix["Imports"]
# The validation JS reports an error when Solid Fuel (coal) is over
# 85%, but as far as I can tell, that can actually be a valid
# result, I've seen it a few times. Use 98% instead.
or 98 < mix["Solid Fuel"]
or 55 < mix["Wind"]
# Gas
or 50 < mix["HFO/Natural Gas"] + mix["CT's"] + mix["LM 6000's"]
):
kruschk marked this conversation as resolved.
Show resolved Hide resolved
logger.warning(
f"discarding datapoint at {timestamp} because some mode's "
f"share of the mix is infeasible: {mix}",
extra={"key": ZONE_KEY},
)
continue

# in mix_data, the values are expressed as percentages,
# and have to be multiplied by load to find the actual MW value.
corresponding_load = [
load_period
for load_period in load_data
if load_period["datetime"] == mix["datetime"]
]
if corresponding_load:
load = corresponding_load[0]["Base Load"]
if timestamp in loads:
load = loads[timestamp]
else:
# if not found, assume 1244 MW, based on average yearly electricity available for use
# in 2014 and 2015 (Statistics Canada table Table 127-0008 for Nova Scotia)
# If a base load corresponding with this timestamp is not found,
# assume 1244 MW based on the average yearly electricity available
# for use in 2014 and 2015 (Statistics Canada table 127-0008 for
# Nova Scotia).
load = 1244
kruschk marked this conversation as resolved.
Show resolved Hide resolved
logger.warning(
f"unable to find load for {data_date}, assuming 1244 MW",
extra={"key": zone_key},
f"unable to find load for {timestamp}; assuming 1244 MW",
extra={"key": ZONE_KEY},
)

electricity_mix = {
gen_type: percent_value * load
for gen_type, percent_value in percent_mix.items()
}

# validate again
valid = True
for gen_type, value in electricity_mix.items():
absolute_bound = valid_absolute.get(
gen_type
) # imports are not in valid_absolute
if absolute_bound and value > absolute_bound:
valid = False
logger.warning(
"discarding datapoint at {dt} due to {fuel} "
"too high: {value} MW".format(
dt=data_date, fuel=gen_type, value=value
),
extra={"key": zone_key},
)
if not valid:
# continue the outer loop, not the inner
continue

production.append(
{
"zoneKey": zone_key,
"datetime": data_date,
"production": {
key: value
for key, value in electricity_mix.items()
if key != "imports"
},
"source": "nspower.ca",
}
# In this source, imports are positive. In the expected result for
# CA-NB->CA-NS, "net" represents a flow from NB to NS, i.e., an import
# to NS, so the value can be used directly. Note that this API only
# specifies imports; when NS is exporting energy, the API returns 0.
exchanges.append(
datetime=timestamp,
netFlow=load * mix["Imports"] / 100,
source=SOURCE,
zoneKey=ZoneKey("CA-NB->CA-NS"),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not strictly related to this refactor, but just to mention. The assumption that all imports are from NB is now problematic because Nova Scotia's interconnection to Newfoundland is now live. But I've never seen live data on its operation, the closest thing was that it amounted to 14% of NS supply in 2023H1, per https://www.nspower.ca/cleanandgreen/clean-energy "Our Energy Stats" chart

Copy link
Contributor

@q-- q-- Jan 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For reference related to your comment: the daily generation reports for Newfoundland might be usable as a source for the interconnector data, see #2541 (comment)

Copy link
Contributor

@q-- q-- Jan 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey, look: I found a page on the NSPower website that has the information you need (Maritime Link interconnector) and more, updated roughly every 5 minutes, with more granular data than the current source: it's called Daily Report, but under the heading "Current System Conditions" you'll find "10 minute averages" of e.g. load, wind, and imports and exports through various interconnectors that are at most five minutes old.

Edit: well, turns out you found it at some point as well: #3206 (comment)

Copy link
Contributor

@q-- q-- Jan 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I dug deeper into this, and it turns out we aren't actually using this exchange data since we get the data on the NS-NB interconnection from New Brunswick Power, not from NSPower. So it might be best to just delete the exchanges-related part, or, alternatively, switch to obtaining exchange info from the source in #6317 (and perhaps implement the NS-Newfoundland interconnector as well). But that could be done in a separate issue.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey @VIKTORVAV99, any thoughts on this? Should we simply delete the exchange parser if it's not used?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To clarify, my idea was to delete the exchange parser now so we are more certain we don't accidentally fetch incorrect data, and then re-add an exchange parser based on the new source from #6317 in a separate pull request.

But doing both in this PR is also acceptable, if you've got the time for it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, gotcha! It's not entirely clear to me what needs to be done to implement the new exchange parser (there are a lot of links to follow and I'm a bit lost), so I'll just delete it here and we can address it separately. Would those changes have any impact on the production parser?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The exchange stuff shouldn't. The "Current System Conditions" also has wind generation, but as the page has data updated roughly every 5 minutes I think it'll be hard to integrate with the hourly data provided by the current source – and probably not that useful either, if the current source stays online.

As for implementing the exchange parser, the short summary is: from https://www.nspower.ca/oasis/system-reports-messages/daily-report, under "Current System Conditions", grab:

  1. The timestamp, e.g. "Last Updated: 10-Jan-24 20:38:10";
  2. "NS Export" for (most likely) the New Brunswick interconnector (optional, already provided by the New Brunswick parser);
  3. "Maritime Link Import" for the Newfoundland interconnector.

The rest of the stuff in the issue is useful for exchange configuration and historical data.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Awesome, thanks for clarifying! That doesn't sound as arduous as I thought; maybe I can roll it into this PR. I'll try to carve out some time to work on it this weekend.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright, that wasn't so bad! A few notes:

  • I created the CA-NL-NF->CA-NS exchange, but this is my first time doing so, so I very well could have done something wrong. In particular, I haven't tested how it will look because I don't have the front-end set up on my machine (I used Google Maps for the coordinates and a protractor for the rotation 😁). Further, I wasn't sure whether to use CA-NL-LB or CA-NL-NF for the other zone (I chose the latter based on the physical location of the link).
  • I also implemented the CA-NB->CA-NS exchange, but the back-end is still configured to use the NB parser for this pair.

)

# In this source, imports are positive. In the expected result for CA-NB->CA-NS,
# "net" represents a flow from NB to NS, that is, an import to NS.
# So the value can be used directly.
# Note that this API only specifies imports. When NS is exporting energy, the API returns 0.
imports.append(
{
"datetime": data_date,
"netFlow": electricity_mix["imports"],
"sortedZoneKeys": "CA-NB->CA-NS",
"source": "nspower.ca",
}
production_mix = ProductionMix()
production_mix.add_value("biomass", load * mix["Biomass"] / 100)
production_mix.add_value("coal", load * mix["Solid Fuel"] / 100)
production_mix.add_value("gas", load * mix["CT's"] / 100)
production_mix.add_value("gas", load * mix["HFO/Natural Gas"] / 100)
production_mix.add_value("gas", load * mix["LM 6000's"] / 100)
production_mix.add_value("hydro", load * mix["Hydro"] / 100)
production_mix.add_value("wind", load * mix["Wind"] / 100)
# Sanity checks: verify that reported production doesn't exceed listed
# capacity by a lot. In particular, we've seen error cases where hydro
# production ends up calculated as 900 MW which greatly exceeds known
# capacity of 418 MW.
if (
100 < production_mix.biomass
or 1300 < production_mix.coal
or 700 < production_mix.gas
or 500 < production_mix.hydro
or 700 < production_mix.wind
kruschk marked this conversation as resolved.
Show resolved Hide resolved
):
logger.warning(
f"discarding datapoint at {timestamp} because some mode's "
f"production is infeasible: {production_mix}",
extra={"key": ZONE_KEY},
)
continue
production_breakdowns.append(
datetime=timestamp,
production=production_mix,
source=SOURCE,
zoneKey=ZONE_KEY,
)

return production, imports
return exchanges, production_breakdowns


def fetch_production(
zone_key: str = "CA-NS",
zone_key: ZoneKey = ZONE_KEY,
session: Session | None = None,
target_datetime: datetime | None = None,
logger: Logger = getLogger(__name__),
) -> list[dict]:
) -> list[dict[str, Any]]:
"""Requests the last known production mix (in MW) of a given country."""
if target_datetime:
raise NotImplementedError(
"This parser is unable to give information more than 24 hours in the past"
)

r = session or Session()
raise ParserException(PARSER, "Unable to fetch historical data", zone_key)

production, imports = _get_ns_info(r, logger)
if zone_key != ZONE_KEY:
raise ParserException(PARSER, f"Cannot parse zone '{zone_key}'", zone_key)

return production
_, production_breakdowns = _get_ns_info(session or Session(), logger)
return production_breakdowns.to_list()


def fetch_exchange(
zone_key1: str,
zone_key2: str,
zone_key1: ZoneKey,
zone_key2: ZoneKey,
session: Session | None = None,
target_datetime: datetime | None = None,
logger: Logger = getLogger(__name__),
) -> list[dict]:
) -> list[dict[str, Any]]:
"""
Requests the last known power exchange (in MW) between two regions.

Note: As of early 2017, Nova Scotia only has an exchange with New Brunswick (CA-NB).
(An exchange with Newfoundland, "Maritime Link", is scheduled to open in "late 2017").
Note: As of early 2017, Nova Scotia only has an exchange with New Brunswick
(CA-NB). (An exchange with Newfoundland, "Maritime Link", is scheduled to
open in "late 2017").

The API for Nova Scotia only specifies imports.
When NS is exporting energy, the API returns 0.
The API for Nova Scotia only specifies imports. When NS is exporting
energy, the API returns 0.
"""
if target_datetime:
raise NotImplementedError(
"This parser is unable to give information more than 24 hours in the past"
)

sorted_zone_keys = "->".join(sorted([zone_key1, zone_key2]))
raise ParserException(PARSER, "Unable to fetch historical data", ZONE_KEY)

sorted_zone_keys = "->".join(sorted((zone_key1, zone_key2)))
if sorted_zone_keys != "CA-NB->CA-NS":
raise NotImplementedError("This exchange pair is not implemented")

requests_obj = session or Session()
_, imports = _get_ns_info(requests_obj, logger)
raise ParserException(PARSER, "Unimplemented exchange pair", sorted_zone_keys)

return imports
exchanges, _ = _get_ns_info(session or Session(), logger)
return exchanges.to_list()


if __name__ == "__main__":
"""Main method, never used by the Electricity Map backend, but handy for testing."""

# Never used by the Electricity Map backend, but handy for testing.
from pprint import pprint

test_logger = getLogger()

print("fetch_production() ->")
pprint(fetch_production(logger=test_logger))

pprint(fetch_production())
print('fetch_exchange("CA-NS", "CA-NB") ->')
pprint(fetch_exchange("CA-NS", "CA-NB", logger=test_logger))
pprint(fetch_exchange("CA-NS", "CA-NB"))