-
Notifications
You must be signed in to change notification settings - Fork 916
/
US_PREPA.py
273 lines (220 loc) · 10.2 KB
/
US_PREPA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
#!/usr/bin/env python3
"""
Real-time parser for Puerto Rico.
Fetches data from various pages embedded as an iframe at https://aeepr.com/en-us/Pages/Generaci%C3%B3n.aspx
The electricity authority is known in English as PREPA (Puerto Rico Electric Power Authority) and in Spanish as AEEPR (Autoridad de Energía Eléctrica Puerto Rico)
"""
import json
import re
from datetime import datetime
from logging import Logger, getLogger
from zoneinfo import ZoneInfo
from requests import Session
TIMEZONE = ZoneInfo("America/Puerto_Rico")
US_PROXY = "https://us-ca-proxy-jfnx5klx2a-uw.a.run.app"
HOST_PARAMETER = "?host=https://aeepr.com"
GENERATION_BREAKDOWN_URL = (
f"{US_PROXY}/es-pr/generacion/Documents/combustibles.aspx{HOST_PARAMETER}"
)
RENEWABLES_BREAKDOWN_URL = (
f"{US_PROXY}/es-pr/generacion/Documents/Unidades_renovables.aspx{HOST_PARAMETER}"
)
TIMESTAMP_URL = (
f"{US_PROXY}/es-pr/generacion/Documents/CostosCombustible.aspx{HOST_PARAMETER}"
)
def extract_data(html):
"""Extracts data from the source code of an HTML page with a FusionCharts chart"""
dataSource = re.search(r"dataSource: (\{.+\}\]\})\}\);", html).group(
1
) # Extract object with data
dataSource = re.sub(
r",\s*\}", "}", dataSource
) # ,} is valid JavaScript but not valid JSON
dataSource = json.loads(dataSource)
sourceData = dataSource[
"data"
] # The rest of the dataSource object contains unnecessary stuff like chart theme, label, axis names etc.
return sourceData
def convert_timestamp(
zone_key: str, timestamp_string: str, logger: Logger = getLogger(__name__)
):
"""Converts timestamp fetched from website into timezone-aware datetime object.
Arguments:
----------
timestamp_string: timestamp in the format 06/01/2020 08:40:00 AM
"""
timestamp_string = re.sub(
r"\s+", " ", timestamp_string
) # Replace double spaces with one
timestamp = datetime.strptime(timestamp_string, "%m/%d/%Y %I:%M:%S %p").replace(
tzinfo=TIMEZONE
)
logger.debug(f"PARSED TIMESTAMP {timestamp}", extra={"key": zone_key})
return timestamp
def fetch_production(
zone_key: str = "US-PR",
session: Session | None = None,
target_datetime: datetime | None = None,
logger: Logger = getLogger(__name__),
) -> dict:
"""Requests the last known production mix (in MW) of a given region."""
global renewable_output
if target_datetime is not None:
raise NotImplementedError(
"The datasource currently implemented is only real time"
)
r = session or Session()
data = { # To be returned as response data
"zoneKey": zone_key,
#'datetime': '2017-01-01T00:00:00Z',
"production": {
"biomass": 0.0,
"coal": 0.0,
"gas": 0.0,
"hydro": 0.0,
"nuclear": 0.0,
"oil": 0.0,
"solar": 0.0,
"wind": 0.0,
"geothermal": 0.0,
"unknown": 0.0,
},
# 'storage': {
# 'hydro': -10.0,
# },
"source": "aeepr.com",
}
renewable_output = 0.0 # Temporarily stored here. We'll subtract solar, wind and biomass (landfill gas) from it and assume the remainder, if any, is hydro
# Step 1: fetch production by generation type
# Note: seems to be rounded down (to an integer)
# Total at the top of the page fetched in step 3 isn't rounded down, but seems to be lagging behind sometimes.
# Difference is only minor, so for now we will IGNORE that total (instead of trying to parse the total and addding the difference to "unknown")
res = r.get(GENERATION_BREAKDOWN_URL)
assert res.status_code == 200, (
"Exception when fetching production for "
f"{zone_key}: error when calling url={GENERATION_BREAKDOWN_URL}"
)
sourceData = extract_data(res.text)
logger.debug(f"Raw generation breakdown: {sourceData}", extra={"key": zone_key})
for item in sourceData: # Item has a label with fuel type + generation in MW, and a value with a percentage
if item["label"] == " MW": # There's one empty item for some reason. Skip it.
continue
logger.debug(item["label"], extra={"key": zone_key})
parsedLabel = re.search(r"^(.+?)\s+(\d+)\s+MW$", item["label"])
category = parsedLabel.group(1) # E.g. GAS NATURAL
outputInMW = float(parsedLabel.group(2))
if category == "BUNKER C" or category == "DIESEL CC" or category == "DIESEL GT":
data["production"]["oil"] += outputInMW
elif category == "GAS NATURAL":
data["production"]["gas"] += outputInMW
elif category == "CARBON":
data["production"]["coal"] += outputInMW
elif category == "RENOVABLES":
renewable_output += outputInMW # Temporarily store aggregate renewable output. We'll subtract solar, wind and biomass (landfill gas) from it and assume the remainder, if any, is hydro
else:
logger.warn(
f'Unknown energy type "{category}" is present for Puerto Rico',
extra={"key": zone_key},
)
logger.info(
f'Category "{category}" produces {outputInMW}MW', extra={"key": zone_key}
)
# Step 2: fetch renewable production breakdown
# Data from this source isn't rounded. Assume renewable production not accounted for is hydro
res = r.get(RENEWABLES_BREAKDOWN_URL)
assert res.status_code == 200, (
"Exception when fetching renewable production for "
f"{zone_key}: error when calling url={RENEWABLES_BREAKDOWN_URL}"
)
sourceData = extract_data(res.text)
logger.debug(
f"Raw renewable generation breakdown: {sourceData}", extra={"key": zone_key}
)
original_renewable_output = renewable_output # If nothing gets subtracted renewable_output, there probably was no data on the renewables breakdown page
logger.debug(
f"Total (unspecified) renewable output from total generation breakdown: {original_renewable_output}MW",
extra={"key": zone_key},
)
for item in sourceData: # Somewhat different from above, the item's label has the generation type and the item's value has generation in MW
if item["label"] == " ": # There's one empty item for some reason. Skip it.
continue
if item["label"] == "Solar":
data["production"]["solar"] += float(item["value"])
elif item["label"] == "Eolica":
data["production"]["wind"] += float(item["value"])
elif item["label"] == "Landfill Gas":
data["production"]["biomass"] += float(item["value"])
else:
logger.warn(
f"Unknown renewable type \"{item['label']}\" is present for Puerto Rico",
extra={"key": zone_key},
)
renewable_output -= float(
item["value"]
) # Subtract production accounted for from the renewable output total
logger.info(
f"Renewable \"{item['label']}\" produces {item['value']}MW",
extra={"key": zone_key},
)
logger.debug(
f"Renewable output yet to be accounted for: {renewable_output}MW",
extra={"key": zone_key},
)
logger.debug(
"Rounding remaining renewable output to 14 decimal places to get rid of floating point errors"
)
renewable_output = round(renewable_output, 14)
logger.info(
f"Remaining renewable output not accounted for: {renewable_output}MW",
extra={"key": zone_key},
)
# Assume renewable generation not accounted for is hydro - if we could fetch the other renewable generation data
if renewable_output >= 0.0:
if (
original_renewable_output == renewable_output
): # Nothing got subtracted for Solar, Wind or Landfill gas - so the page probably didn't contain any data. Renewable type=unknown
logger.warning(
f"Renewable generation breakdown page was empty, reporting unspecified renewable output ({renewable_output}MW) as 'unknown'",
extra={"key": zone_key},
)
data["production"]["unknown"] += renewable_output
else: # Otherwise, any remaining renewable output is probably hydro
logger.info(
f"Assuming remaining renewable output of {renewable_output}MW is hydro",
extra={"key": zone_key},
)
data["production"]["hydro"] += renewable_output
else:
logger.warn(
f"Renewable generation breakdown page total is greater than total renewable output, a difference of {renewable_output}MW",
extra={"key": zone_key},
)
# Step 3: fetch the timestamp, which is at the bottom of a different iframe
# Note: there's a race condition here when requesting data very close to <hour>:10 and <hour>:40, which is when the data gets updated
# Sometimes it's some seconds later, so we grab the timestamp from here to know the exact moment
res = r.get(
TIMESTAMP_URL
) # TODO do we know for sure the timestamp on this page gets updated *every time* the generation breakdown gets updated?
assert (
res.status_code == 200
), f"Exception when fetching timestamp for {zone_key}: error when calling url={TIMESTAMP_URL}"
raw_timestamp_match = re.search(
r"Ultima Actualizaci�n: ((?:0[1-9]|1[0-2])/(?:[0-2][0-9]|3[0-2])/2[01][0-9]{2} [0-2][0-9]:[0-5][0-9]:[0-5][0-9] [AP]M)",
res.text,
)
if raw_timestamp_match is None:
raise Exception(f"Could not find timestamp in {res.text}")
raw_timestamp = raw_timestamp_match.group(1)
logger.debug(f"RAW TIMESTAMP: {raw_timestamp}", extra={"key": zone_key})
data["datetime"] = convert_timestamp(zone_key, raw_timestamp)
assert (
data["production"]["oil"] > 0.0
), f"{zone_key} is missing required generation type: oil"
return data
if __name__ == "__main__":
"""Main method, never used by the Electricity Map backend, but handy for testing."""
print("fetch_production() ->")
print(fetch_production())
# TODO add forecast parser
# print('fetch_generation_forecast() ->')
# print(fetch_generation_forecast())