Skip to content

Commit

Permalink
Telegram escaping completely refactored (#386)
Browse files Browse the repository at this point in the history
  • Loading branch information
caronc committed May 15, 2021
1 parent 7f7ee04 commit 59aa5f5
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 27 deletions.
86 changes: 60 additions & 26 deletions apprise/plugins/NotifyTelegram.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,39 +524,73 @@ def send(self, body, title='', notify_type=NotifyType.INFO, attach=None,
body,
)

elif self.notify_format == NotifyFormat.HTML:
payload['parse_mode'] = 'HTML'

# HTML Spaces ( ) and tabs ( ) aren't supported
# See https://core.telegram.org/bots/api#html-style
body = re.sub(' ?', ' ', body, re.I)
else: # HTML or TEXT

# Tabs become 3 spaces
body = re.sub(' ?', ' ', body, re.I)
# Use Telegram's HTML mode
payload['parse_mode'] = 'HTML'

if title:
# Telegram's HTML support doesn't like having HTML escaped
# characters passed into it. to handle this situation, we need to
# search the body for these sequences and convert them to the
# output the user expected
telegram_escape_html_dict = {
# HTML Spaces ( ) and tabs ( ) aren't supported
# See https://core.telegram.org/bots/api#html-style
title = re.sub(' ?', ' ', title, re.I)
r'nbsp': ' ',

# Tabs become 3 spaces
title = re.sub(' ?', ' ', title, re.I)

payload['text'] = '{}{}'.format(
'<b>{}</b>\r\n'.format(title) if title else '',
body,
)
r'emsp': ' ',

# Some characters get re-escaped by the Telegram upstream
# service so we need to convert these back,
r'apos': '\'',
r'quot': '"',
}

# Create a regular expression from the dictionary keys
html_regex = re.compile("&(%s);?" % "|".join(
map(re.escape, telegram_escape_html_dict.keys())).lower(),
re.I)

# For each match, look-up corresponding value in dictionary
# we look +1 to ignore the & that does not appear in the index
# we only look at the first 4 characters because we don't want to
# fail on &apos; as it's accepted (along with &apos - no
# semi-colon)
body = html_regex.sub( # pragma: no branch
lambda mo: telegram_escape_html_dict[
mo.string[mo.start():mo.end()][1:5]], body)

else: # pass directly as is...
payload['parse_mode'] = 'HTML'

# Telegram strangely escapes all HTML characters for us already
# but to avoid causing issues with HTML, we escape the < and >
# characters
title = re.sub('>', '&gt;', title, re.I)
title = re.sub('<', '&lt;', title, re.I)
body = re.sub('>', '&gt;', body, re.I)
body = re.sub('<', '&lt;', body, re.I)
if title:
# For each match, look-up corresponding value in dictionary
# Indexing is explained above (for how the body is parsed)
title = html_regex.sub( # pragma: no branch
lambda mo: telegram_escape_html_dict[
mo.string[mo.start():mo.end()][1:5]], title)

if self.notify_format == NotifyFormat.TEXT:
telegram_escape_text_dict = {
# We need to escape characters that conflict with html
# entity blocks (< and >) when displaying text
r'>': '&gt;',
r'<': '&lt;',
}

# Create a regular expression from the dictionary keys
text_regex = re.compile("(%s)" % "|".join(
map(re.escape, telegram_escape_text_dict.keys())).lower(),
re.I)

# For each match, look-up corresponding value in dictionary
body = text_regex.sub( # pragma: no branch
lambda mo: telegram_escape_text_dict[
mo.string[mo.start():mo.end()]], body)

if title:
# For each match, look-up corresponding value in dictionary
title = text_regex.sub( # pragma: no branch
lambda mo: telegram_escape_text_dict[
mo.string[mo.start():mo.end()]], title)

payload['text'] = '{}{}'.format(
'<b>{}</b>\r\n'.format(title) if title else '',
Expand Down
18 changes: 17 additions & 1 deletion test/test_telegram.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import mock
import requests
from json import dumps
from json import loads
from apprise import Apprise
from apprise import AppriseAttachment
from apprise import AppriseAsset
Expand Down Expand Up @@ -202,11 +203,26 @@ def test_notify_telegram_plugin(mock_post, mock_get):
})
mock_post.return_value.status_code = requests.codes.ok

# Test sending attachments
obj = plugins.NotifyTelegram(bot_token=bot_token, targets='12345')
assert len(obj.targets) == 1
assert obj.targets[0] == '12345'

# Test the escaping of characters since Telegram escapes stuff for us to
# which we need to consider
mock_post.reset_mock()
body = "<p>\'\"This can't\t\r\nfail&nbsp;us\"\'</p>"
assert obj.notify(
body=body, title='special characters',
notify_type=NotifyType.INFO) is True
assert mock_post.call_count == 1
payload = loads(mock_post.call_args_list[0][1]['data'])

# Our special characters are escaped properly
assert payload['text'] == \
'<b>special characters</b>\r\n&lt;p&gt;'\
'\'"This can\'t\t\r\nfail us"\'&lt;/p&gt;'

# Test sending attachments
attach = AppriseAttachment(os.path.join(TEST_VAR_DIR, 'apprise-test.gif'))
assert obj.notify(
body='body', title='title', notify_type=NotifyType.INFO,
Expand Down

0 comments on commit 59aa5f5

Please sign in to comment.