From de72d363f1cc1f8f43b23d8804d12c6f8f4e0737 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jarek=20Pi=C3=B3rkowski?= Date: Sat, 31 Mar 2018 18:12:04 +0200 Subject: [PATCH 1/3] SG: don't fail when unable to OCR Solar production is a very small part of Singapore electricity, so when it can't be retrieved, just log warning and move on. Add the OCR'd text to log message to possibly aid debugging. Currently OCR is stuck on parsing the datetime "2018-03-31 11:44" which it reads as "2018-03-31 1 1 :44". Maybe the regexp could be improved to handle that, but in meanwhile, log better. --- parsers/SG.py | 57 +++++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/parsers/SG.py b/parsers/SG.py index a14f972fb8..6737173212 100644 --- a/parsers/SG.py +++ b/parsers/SG.py @@ -1,11 +1,12 @@ #!/usr/bin/env python3 from collections import defaultdict +import logging +import re import arrow from PIL import Image from pytesseract import image_to_string -import re import requests TIMEZONE = 'Asia/Singapore' @@ -40,18 +41,10 @@ For Electricity Map, we map CCGT and GT to gas, and ST to "unknown". -There appears to be no real-time data for solar production. -Installed solar has been rising rapidly, but from a very low base. -Per Singapore Energy Statistics 2016 pg 96, total installed solar PV capacity at end of 2015 was 45.8 MWac. -Per https://www.ema.gov.sg/cmsmedia/Publications_and_Statistics/Statistics/47RSU.pdf -total installed solar capacity in 2017Q1 was 129.8 MWp / 99.9 MWac, so capacity doubled during 2016. -However, when producing at max capacity this would only be about 2% of summer mid-night demand of around 5 GW. -So for now this won't introduce a big inaccuracy. - -There exists an interconnection to Malaysia. Its capacity is apparently 2x 200 MW, -which is potentially 5-10% of normal use. I was unable to find data on its use -(not even historical, let along real-time). The Singapore Energy Statistics 2016 document -does not note any electricity exports or imports. +The Energy Market Authority estimates current solar production and publishes it at +https://www.ema.gov.sg/solarmap.aspx + +There exists an interconnection to Malaysia, it is implemented in MY_WM.py. """ TYPE_MAPPINGS = { @@ -61,28 +54,33 @@ } -def get_solar(session=None): +def get_solar(session, logger): """ Fetches a graphic showing estimated solar production data. Uses OCR (tesseract) to extract MW value. Returns a float or None. """ - s = session or requests.Session() url = 'https://www.ema.gov.sg/cmsmedia/irradiance/plot.png' - solar_image = Image.open(s.get(url, stream=True).raw) + solar_image = Image.open(session.get(url, stream=True).raw) gray = solar_image.convert('L') - threshold_filter = lambda x: 0 if x<77 else 255 + threshold_filter = lambda x: 0 if x < 77 else 255 black_white = gray.point(threshold_filter, '1') text = image_to_string(black_white, lang='eng') - pattern = r'Est. PV Output: (.*)MWac' - val = re.search(pattern, text, re.MULTILINE).group(1) + try: + pattern = r'Est. PV Output: (.*)MWac' + val = re.search(pattern, text, re.MULTILINE).group(1) + + time_pattern = r'\d+-\d+-\d+\s+\d+:\d+' + time_string = re.search(time_pattern, text, re.MULTILINE).group(0) + except AttributeError: + msg = 'Unable to get values for SG solar from OCR text: {}'.format(text) + logger.warning(msg, extra={'key': 'SG'}) + return None - time_pattern = r'\d+-\d+-\d+\s+\d+:\d+' - time_string = re.search(time_pattern, text, re.MULTILINE).group(0) solar_dt = arrow.get(time_string).replace(tzinfo='Asia/Singapore') singapore_dt = arrow.now('Asia/Singapore') diff = singapore_dt - solar_dt @@ -159,7 +157,8 @@ def sg_data_to_datetime(data): return data_datetime -def fetch_production(zone_key='SG', session=None, target_datetime=None, logger=None): +def fetch_production(zone_key='SG', session=None, target_datetime=None, + logger=logging.getLogger(__name__)): """Requests the last known production mix (in MW) of Singapore. Arguments: @@ -202,12 +201,12 @@ def fetch_production(zone_key='SG', session=None, target_datetime=None, logger=N else: # unrecognized - log it, then add into unknown - print( - 'Singapore has unrecognized generation type "{}" with production share {}%'.format( - gen_type, gen_percent)) + msg = ('Singapore has unrecognized generation type "{}" ' + 'with production share {}%').format(gen_type, gen_percent) + logger.warning(msg) generation_by_type['unknown'] += gen_mw - generation_by_type['solar'] = get_solar(session=None) + generation_by_type['solar'] = get_solar(requests_obj, logger) # some generation methods that are not used in Singapore generation_by_type.update({ @@ -216,12 +215,16 @@ def fetch_production(zone_key='SG', session=None, target_datetime=None, logger=N 'hydro': 0 }) + source = 'emcsg.com' + if generation_by_type['solar']: + source += ', ema.gov.sg' + return { 'datetime': sg_data_to_datetime(data), 'zoneKey': zone_key, 'production': generation_by_type, 'storage': {}, # there is no known electricity storage in Singapore - 'source': 'emcsg.com' + 'source': source } From 2469f636d8ff12c21c03de25bbd698b7a6054482 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jarek=20Pi=C3=B3rkowski?= Date: Sat, 31 Mar 2018 18:29:29 +0200 Subject: [PATCH 2/3] use logger more --- parsers/SG.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/parsers/SG.py b/parsers/SG.py index 6737173212..1c779327fa 100644 --- a/parsers/SG.py +++ b/parsers/SG.py @@ -87,7 +87,9 @@ def get_solar(session, logger): # Need to be sure we don't get old data if image stops updating. if diff.seconds > 3600: - print('Singapore solar data is too old to use.') + msg = ('Singapore solar data is too old to use, ' + 'parsed data timestamp was {}.').format(solar_dt) + logger.warning(msg, extra={'key': 'SG'}) return None # At night format changes from 0.00 to 0 @@ -95,16 +97,18 @@ def get_solar(session, logger): # This try/except will make sure no invalid data is returned. try: solar = float(val) - except ValueError as err: + except ValueError: if len(val) == 1 and 'O' in val: solar = 0.0 else: - print("Singapore solar data is unreadable - got {}.".format(val)) - solar = None + msg = "Singapore solar data is unreadable - got {}.".format(val) + logger.warning(msg, extra={'key': 'SG'}) + return None else: if solar > 200.0: - print("Singapore solar generation is way over capacity - got {}".format(val)) - solar = None + msg = "Solar generation is way over capacity - got {}".format(val) + logger.warning(msg, extra={'key': 'SG'}) + return None return solar From d6e9be1d96d714c01498a3476139ce3ffdad32a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jarek=20Pi=C3=B3rkowski?= Date: Sun, 1 Apr 2018 10:34:19 +0200 Subject: [PATCH 3/3] EMA solar is a source even when it returns 0 --- parsers/SG.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/parsers/SG.py b/parsers/SG.py index 1c779327fa..771c699245 100644 --- a/parsers/SG.py +++ b/parsers/SG.py @@ -219,16 +219,12 @@ def fetch_production(zone_key='SG', session=None, target_datetime=None, 'hydro': 0 }) - source = 'emcsg.com' - if generation_by_type['solar']: - source += ', ema.gov.sg' - return { 'datetime': sg_data_to_datetime(data), 'zoneKey': zone_key, 'production': generation_by_type, 'storage': {}, # there is no known electricity storage in Singapore - 'source': source + 'source': 'emcsg.com, ema.gov.sg' }