From 59aa5f5d10d5b7cd42a8b5181d7bd3734dba7694 Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Sat, 15 May 2021 16:08:53 -0400 Subject: [PATCH] Telegram escaping completely refactored (#386) --- apprise/plugins/NotifyTelegram.py | 86 +++++++++++++++++++++---------- test/test_telegram.py | 18 ++++++- 2 files changed, 77 insertions(+), 27 deletions(-) diff --git a/apprise/plugins/NotifyTelegram.py b/apprise/plugins/NotifyTelegram.py index ea3c600a75..0e06a209cb 100644 --- a/apprise/plugins/NotifyTelegram.py +++ b/apprise/plugins/NotifyTelegram.py @@ -524,39 +524,73 @@ def send(self, body, title='', notify_type=NotifyType.INFO, attach=None, body, ) - elif self.notify_format == NotifyFormat.HTML: - payload['parse_mode'] = 'HTML' - - # HTML Spaces ( ) and tabs ( ) aren't supported - # See https://core.telegram.org/bots/api#html-style - body = re.sub(' ?', ' ', body, re.I) + else: # HTML or TEXT - # Tabs become 3 spaces - body = re.sub(' ?', ' ', body, re.I) + # Use Telegram's HTML mode + payload['parse_mode'] = 'HTML' - if title: + # Telegram's HTML support doesn't like having HTML escaped + # characters passed into it. to handle this situation, we need to + # search the body for these sequences and convert them to the + # output the user expected + telegram_escape_html_dict = { # HTML Spaces ( ) and tabs ( ) aren't supported # See https://core.telegram.org/bots/api#html-style - title = re.sub(' ?', ' ', title, re.I) + r'nbsp': ' ', # Tabs become 3 spaces - title = re.sub(' ?', ' ', title, re.I) - - payload['text'] = '{}{}'.format( - '{}\r\n'.format(title) if title else '', - body, - ) + r'emsp': ' ', + + # Some characters get re-escaped by the Telegram upstream + # service so we need to convert these back, + r'apos': '\'', + r'quot': '"', + } + + # Create a regular expression from the dictionary keys + html_regex = re.compile("&(%s);?" % "|".join( + map(re.escape, telegram_escape_html_dict.keys())).lower(), + re.I) + + # For each match, look-up corresponding value in dictionary + # we look +1 to ignore the & that does not appear in the index + # we only look at the first 4 characters because we don't want to + # fail on ' as it's accepted (along with &apos - no + # semi-colon) + body = html_regex.sub( # pragma: no branch + lambda mo: telegram_escape_html_dict[ + mo.string[mo.start():mo.end()][1:5]], body) - else: # pass directly as is... - payload['parse_mode'] = 'HTML' - - # Telegram strangely escapes all HTML characters for us already - # but to avoid causing issues with HTML, we escape the < and > - # characters - title = re.sub('>', '>', title, re.I) - title = re.sub('<', '<', title, re.I) - body = re.sub('>', '>', body, re.I) - body = re.sub('<', '<', body, re.I) + if title: + # For each match, look-up corresponding value in dictionary + # Indexing is explained above (for how the body is parsed) + title = html_regex.sub( # pragma: no branch + lambda mo: telegram_escape_html_dict[ + mo.string[mo.start():mo.end()][1:5]], title) + + if self.notify_format == NotifyFormat.TEXT: + telegram_escape_text_dict = { + # We need to escape characters that conflict with html + # entity blocks (< and >) when displaying text + r'>': '>', + r'<': '<', + } + + # Create a regular expression from the dictionary keys + text_regex = re.compile("(%s)" % "|".join( + map(re.escape, telegram_escape_text_dict.keys())).lower(), + re.I) + + # For each match, look-up corresponding value in dictionary + body = text_regex.sub( # pragma: no branch + lambda mo: telegram_escape_text_dict[ + mo.string[mo.start():mo.end()]], body) + + if title: + # For each match, look-up corresponding value in dictionary + title = text_regex.sub( # pragma: no branch + lambda mo: telegram_escape_text_dict[ + mo.string[mo.start():mo.end()]], title) payload['text'] = '{}{}'.format( '{}\r\n'.format(title) if title else '', diff --git a/test/test_telegram.py b/test/test_telegram.py index a3dee6e1c8..8df78f61e8 100644 --- a/test/test_telegram.py +++ b/test/test_telegram.py @@ -29,6 +29,7 @@ import mock import requests from json import dumps +from json import loads from apprise import Apprise from apprise import AppriseAttachment from apprise import AppriseAsset @@ -202,11 +203,26 @@ def test_notify_telegram_plugin(mock_post, mock_get): }) mock_post.return_value.status_code = requests.codes.ok - # Test sending attachments obj = plugins.NotifyTelegram(bot_token=bot_token, targets='12345') assert len(obj.targets) == 1 assert obj.targets[0] == '12345' + # Test the escaping of characters since Telegram escapes stuff for us to + # which we need to consider + mock_post.reset_mock() + body = "

\'\"This can't\t\r\nfail us\"\'

" + assert obj.notify( + body=body, title='special characters', + notify_type=NotifyType.INFO) is True + assert mock_post.call_count == 1 + payload = loads(mock_post.call_args_list[0][1]['data']) + + # Our special characters are escaped properly + assert payload['text'] == \ + 'special characters\r\n<p>'\ + '\'"This can\'t\t\r\nfail us"\'</p>' + + # Test sending attachments attach = AppriseAttachment(os.path.join(TEST_VAR_DIR, 'apprise-test.gif')) assert obj.notify( body='body', title='title', notify_type=NotifyType.INFO,