From b16cf7a55317987a98d5ad22308890006a7eecba Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Wed, 8 Apr 2020 17:46:57 -0500 Subject: [PATCH 01/28] initial python 3 changes --- .github/workflows/pythontest.yml | 40 ++++++++++++++++++ messaging/mms/iterator.py | 5 ++- messaging/mms/message.py | 2 +- messaging/mms/mms_pdu.py | 20 ++++----- messaging/mms/wsp_pdu.py | 32 +++++++------- messaging/sms/base.py | 2 +- messaging/sms/deliver.py | 2 +- messaging/sms/gsm0338.py | 5 ++- messaging/sms/pdu.py | 2 +- messaging/sms/submit.py | 8 ++-- messaging/sms/udh.py | 6 +-- messaging/utils.py | 4 +- setup.py | 6 --- {messaging/test => tests}/__init__.py | 0 .../27d0a048cd79555de05283a22372b0eb.mms | Bin {messaging/test => tests}/mms-data/BTMMS.MMS | Bin {messaging/test => tests}/mms-data/NOWMMS.MMS | Bin .../test => tests}/mms-data/SEC-SGHS300M.mms | Bin {messaging/test => tests}/mms-data/SIMPLE.MMS | Bin .../mms-data/SonyEricssonT310-R201.mms | Bin .../test => tests}/mms-data/TOMSLOT.MMS | Bin .../test => tests}/mms-data/gallery2test.mms | Bin {messaging/test => tests}/mms-data/iPhone.mms | Bin .../mms-data/images_are_cut_off_debug.mms | Bin {messaging/test => tests}/mms-data/m.mms | Bin .../test => tests}/mms-data/openwave.mms | Bin .../mms-data/projekt_exempel.mms | Bin .../test => tests}/test_gsm_encoding.py | 26 ++++++------ {messaging/test => tests}/test_mms.py | 4 +- {messaging/test => tests}/test_sms.py | 13 +++--- {messaging/test => tests}/test_udh.py | 4 +- {messaging/test => tests}/test_wap.py | 4 +- 32 files changed, 109 insertions(+), 76 deletions(-) create mode 100644 .github/workflows/pythontest.yml rename {messaging/test => tests}/__init__.py (100%) rename {messaging/test => tests}/mms-data/27d0a048cd79555de05283a22372b0eb.mms (100%) rename {messaging/test => tests}/mms-data/BTMMS.MMS (100%) rename {messaging/test => tests}/mms-data/NOWMMS.MMS (100%) rename {messaging/test => tests}/mms-data/SEC-SGHS300M.mms (100%) rename {messaging/test => tests}/mms-data/SIMPLE.MMS (100%) rename {messaging/test => tests}/mms-data/SonyEricssonT310-R201.mms (100%) rename {messaging/test => tests}/mms-data/TOMSLOT.MMS (100%) rename {messaging/test => tests}/mms-data/gallery2test.mms (100%) rename {messaging/test => tests}/mms-data/iPhone.mms (100%) rename {messaging/test => tests}/mms-data/images_are_cut_off_debug.mms (100%) rename {messaging/test => tests}/mms-data/m.mms (100%) rename {messaging/test => tests}/mms-data/openwave.mms (100%) rename {messaging/test => tests}/mms-data/projekt_exempel.mms (100%) rename {messaging/test => tests}/test_gsm_encoding.py (92%) rename {messaging/test => tests}/test_mms.py (99%) rename {messaging/test => tests}/test_sms.py (98%) rename {messaging/test => tests}/test_udh.py (91%) rename {messaging/test => tests}/test_wap.py (99%) diff --git a/.github/workflows/pythontest.yml b/.github/workflows/pythontest.yml new file mode 100644 index 0000000..4be696c --- /dev/null +++ b/.github/workflows/pythontest.yml @@ -0,0 +1,40 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Python Test + +on: + push: + branches: + - '**' + pull_request: + branches: [ master ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.5, 3.6, 3.7, 3.8] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + - name: Lint + run: | + pip install pylint + pylint -rn --errors-only ./smpp + - name: Test + env: + COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} + run: | + pip install coveralls pytest-cov + pytest --cov=messaging tests/ + coveralls diff --git a/messaging/mms/iterator.py b/messaging/mms/iterator.py index cd49505..21e21e4 100644 --- a/messaging/mms/iterator.py +++ b/messaging/mms/iterator.py @@ -15,11 +15,11 @@ """Iterator with "value preview" capability.""" -class PreviewIterator(object): +class PreviewIterator: """An ``iter`` wrapper class providing a "previewable" iterator. This "preview" functionality allows the iterator to return successive - values from its ``iterable`` object, without actually mvoving forward + values from its ``iterable`` object, without actually moving forward itself. This is very usefuly if the next item(s) in an iterator must be used for something, after which the iterator should "undo" those read operations, so that they can be read again by another function. @@ -32,6 +32,7 @@ def __init__(self, data): self._cached_values = [] self._preview_pos = 0 + #pylint: disable=non-iterator-returned def __iter__(self): return self diff --git a/messaging/mms/message.py b/messaging/mms/message.py index 6cff8a0..0b5b23b 100644 --- a/messaging/mms/message.py +++ b/messaging/mms/message.py @@ -430,7 +430,7 @@ def set_duration(self, duration): self.duration = duration -class DataPart(object): +class DataPart: """ I am a data entry in the MMS body diff --git a/messaging/mms/mms_pdu.py b/messaging/mms/mms_pdu.py index ed44c46..be874e2 100644 --- a/messaging/mms/mms_pdu.py +++ b/messaging/mms/mms_pdu.py @@ -25,7 +25,7 @@ def flatten_list(x): """Flattens ``x`` into a single list""" result = [] for el in x: - if hasattr(el, "__iter__") and not isinstance(el, basestring): + if hasattr(el, "__iter__") and not isinstance(el, str): result.extend(flatten_list(el)) else: result.append(el) @@ -175,14 +175,14 @@ def decode_message_body(self, data_iter): # , # , # - for part_num in xrange(num_entries): + for part_num in range(num_entries): #print '\nPart %d:\n------' % part_num headers_len = self.decode_uint_var(data_iter) data_len = self.decode_uint_var(data_iter) # Prepare to read content-type + other possible headers ct_field_bytes = [] - for i in xrange(headers_len): + for i in range(headers_len): ct_field_bytes.append(data_iter.next()) ct_iter = PreviewIterator(ct_field_bytes) @@ -201,7 +201,7 @@ def decode_message_body(self, data_iter): # Data (note: this is not null-terminated) data = array.array('B') - for i in xrange(data_len): + for i in range(data_len): data.append(data_iter.next()) part = message.DataPart() @@ -281,9 +281,9 @@ def decode_mms_header(byte_iter): try: name = mms_field_names[byte][1] mms_value = getattr(MMSDecoder, 'decode_%s' % name)(byte_iter) - except wsp_pdu.DecodeError, msg: + except wsp_pdu.DecodeError as e: raise wsp_pdu.DecodeError('Invalid MMS Header: Could ' - 'not decode MMS-value: %s' % msg) + 'not decode MMS-value: %s' % e) except: raise RuntimeError('A fatal error occurred, probably due to an ' 'unimplemented decoding operation. Tried to ' @@ -316,9 +316,9 @@ def decode_encoded_string_value(byte_iter): # TODO: add proper support for charsets... try: charset = wsp_pdu.Decoder.decode_well_known_charset(byte_iter) - except wsp_pdu.DecodeError, msg: + except wsp_pdu.DecodeError as e: raise Exception('encoded_string_value decoding error - ' - 'Could not decode Charset value: %s' % msg) + 'Could not decode Charset value: %s' % e) return wsp_pdu.Decoder.decode_text_string(byte_iter) except wsp_pdu.DecodeError: @@ -844,9 +844,9 @@ def encode_header(header_field_name, header_value): ret = getattr(MMSEncoder, 'encode_%s' % expected_type)(header_value) encoded_header.extend(ret) - except wsp_pdu.EncodeError, msg: + except wsp_pdu.EncodeError as e: raise wsp_pdu.EncodeError('Error encoding parameter ' - 'value: %s' % msg) + 'value: %s' % e) except: debug('A fatal error occurred, probably due to an ' 'unimplemented encoding operation') diff --git a/messaging/mms/wsp_pdu.py b/messaging/mms/wsp_pdu.py index 7aa1eae..c6e8be5 100644 --- a/messaging/mms/wsp_pdu.py +++ b/messaging/mms/wsp_pdu.py @@ -447,7 +447,7 @@ def decode_long_integer(byte_iter): longInt = 0 # Decode the Multi-octect-integer - for i in xrange(shortLength): + for i in range(shortLength): longInt = longInt << 8 longInt |= byte_iter.next() @@ -804,8 +804,8 @@ def decode_constrained_media(byte_iter): """ try: media_value = Decoder.decode_constrained_encoding(byte_iter) - except DecodeError, msg: - raise DecodeError('Invalid Constrained-media: %s' % msg) + except DecodeError as e: + raise DecodeError('Invalid Constrained-media: %s' % e) if isinstance(media_value, int): try: @@ -843,7 +843,7 @@ def decode_content_general_form(byte_iter): # Read parameters, etc, until is reached ct_field_bytes = array.array('B') - for i in xrange(value_length): + for i in range(value_length): ct_field_bytes.append(byte_iter.next()) ct_iter = PreviewIterator(ct_field_bytes) @@ -898,8 +898,8 @@ def decode_typed_parameter(byte_iter): typed_value = '' try: typed_value = getattr(Decoder, 'decode_%s' % value_type)(byte_iter) - except DecodeError, msg: - raise DecodeError('Could not decode Typed-parameter: %s' % msg) + except DecodeError as e: + raise DecodeError('Could not decode Typed-parameter: %s' % e) except: debug('A fatal error occurred, probably due to an ' 'unimplemented decoding operation') @@ -1319,7 +1319,7 @@ def decode_well_known_header(byte_iter): hdr_fields = get_header_field_names() # TODO: *technically* this can fail, but then we have already # read a byte... should fix? - if field_value not in xrange(len(hdr_fields)): + if field_value not in list(range(len(hdr_fields))): raise DecodeError('Invalid Header Field value: %d' % field_value) field_name = hdr_fields[field_value] @@ -1333,8 +1333,8 @@ def decode_well_known_header(byte_iter): try: decoded_value = getattr(Decoder, 'decode_%s' % wap_value_type)(byte_iter) - except DecodeError, msg: - raise DecodeError('Could not decode Wap-value: %s' % msg) + except DecodeError as e: + raise DecodeError('Could not decode Wap-value: %s' % e) except: debug('An error occurred, probably due to an ' 'unimplemented decoding operation. Tried to ' @@ -1664,8 +1664,8 @@ def encode_parameter(parameter_name, parameter_value, version='1.2'): ret = getattr(Encoder, 'encode_%s' % expected_type)(parameter_value) encoded_parameter.extend(ret) - except EncodeError, msg: - raise EncodeError('Error encoding param value: %s' % msg) + except EncodeError as e: + raise EncodeError('Error encoding param value: %s' % e) except: debug('A fatal error occurred, probably due to an ' 'unimplemented encoding operation') @@ -1799,8 +1799,8 @@ def encode_header(field_name, value): try: ret = getattr(Encoder, 'encode_%s' % wap_value_type)(value) encoded_header.extend(ret) - except EncodeError, msg: - raise EncodeError('Error encoding Wap-value: %s' % msg) + except EncodeError as e: + raise EncodeError('Error encoding Wap-value: %s' % e) except: debug('A fatal error occurred, probably due to an ' 'unimplemented encoding operation') @@ -1920,7 +1920,7 @@ def encode_extension_media(media_value): :return: The encoded media type value, as a sequence of bytes :rtype: str """ - if not isinstance(media_value, basestring): + if not isinstance(media_value, str): try: media_value = str(media_value) except: @@ -2045,8 +2045,8 @@ def encode_accept_value(accept_value): # ...now try Accept-general-form try: encoded_media_range = Encoder.encode_media_type(accept_value) - except EncodeError, msg: - raise EncodeError('Cannot encode Accept-value: %s' % msg) + except EncodeError as e: + raise EncodeError('Cannot encode Accept-value: %s' % e) value_length = Encoder.encode_value_length(len(encoded_media_range)) encoded_accept_value = value_length diff --git a/messaging/sms/base.py b/messaging/sms/base.py index b7b1d59..ba09cf6 100644 --- a/messaging/sms/base.py +++ b/messaging/sms/base.py @@ -1,7 +1,7 @@ # see LICENSE -class SmsBase(object): +class SmsBase: def __init__(self): self.udh = None diff --git a/messaging/sms/deliver.py b/messaging/sms/deliver.py index 5a9ba6b..0cb7a81 100644 --- a/messaging/sms/deliver.py +++ b/messaging/sms/deliver.py @@ -184,7 +184,7 @@ def _process_message(self, data): data = data[ud_len:].tolist() _bytes = [int("%02X%02X" % (data[i], data[i + 1]), 16) for i in range(0, len(data), 2)] - self.text = u''.join(list(map(unichr, _bytes))) + self.text = u''.join(list(map(chr, _bytes))) pdu = property(lambda self: self._pdu, _set_pdu) diff --git a/messaging/sms/gsm0338.py b/messaging/sms/gsm0338.py index 32a50b5..d717069 100644 --- a/messaging/sms/gsm0338.py +++ b/messaging/sms/gsm0338.py @@ -195,11 +195,11 @@ # unicode -> default GSM 03.38 def_regular_encode_dict = \ - dict((u, g) for g, u in def_regular_decode_dict.iteritems()) + dict((u, g) for g, u in def_regular_decode_dict.items()) # unicode -> default escaped GSM 03.38 characters def_escape_encode_dict = \ - dict((u, g) for g, u in def_escape_decode_dict.iteritems()) + dict((u, g) for g, u in def_escape_decode_dict.items()) def encode(input_, errors='strict'): @@ -218,6 +218,7 @@ def encode(input_, errors='strict'): result.append('\x1b') result.append(def_escape_encode_dict[c]) else: + print(repr(c)) if errors == 'strict': raise UnicodeError("Invalid GSM character") elif errors == 'replace': diff --git a/messaging/sms/pdu.py b/messaging/sms/pdu.py index 9d680d5..b517bea 100644 --- a/messaging/sms/pdu.py +++ b/messaging/sms/pdu.py @@ -1,7 +1,7 @@ # see LICENSE -class Pdu(object): +class Pdu: def __init__(self, pdu, len_smsc, cnt=1, seq=1): self.pdu = pdu.upper() diff --git a/messaging/sms/submit.py b/messaging/sms/submit.py index e426f13..71764fe 100644 --- a/messaging/sms/submit.py +++ b/messaging/sms/submit.py @@ -314,9 +314,9 @@ def _split_sms_message(self, text): chr(sms_ref) + chr(total_parts) + chr(i)) padding = " " else: - udh = (unichr(int("%04x" % ((udh_len << 8) | mid), 16)) + - unichr(int("%04x" % ((data_len << 8) | sms_ref), 16)) + - unichr(int("%04x" % ((total_parts << 8) | i), 16))) + udh = (chr(int("%04x" % ((udh_len << 8) | mid), 16)) + + chr(int("%04x" % ((data_len << 8) | sms_ref), 16)) + + chr(int("%04x" % ((total_parts << 8) | i), 16))) padding = "" pdu_msgs.append(packing_func(padding + msg, udh)) @@ -325,6 +325,6 @@ def _split_sms_message(self, text): def _get_rand_id(self): if not self.id_list: - self.id_list = range(0, 255) + self.id_list = list(range(0, 255)) return self.id_list.pop(0) diff --git a/messaging/sms/udh.py b/messaging/sms/udh.py index eecfa23..7087f74 100644 --- a/messaging/sms/udh.py +++ b/messaging/sms/udh.py @@ -1,7 +1,7 @@ # See LICENSE -class PortAddress(object): +class PortAddress: def __init__(self, dest_port, orig_port, eight_bits): self.dest_port = dest_port @@ -13,7 +13,7 @@ def __repr__(self): return "" % args -class ConcatReference(object): +class ConcatReference: def __init__(self, ref, cnt, seq, eight_bits): self.ref = ref @@ -26,7 +26,7 @@ def __repr__(self): return "" % args -class UserDataHeader(object): +class UserDataHeader: def __init__(self): self.concat = None diff --git a/messaging/utils.py b/messaging/utils.py index 1456d79..9544a4d 100644 --- a/messaging/utils.py +++ b/messaging/utils.py @@ -59,13 +59,13 @@ def to_bytes(s): if sys.version_info >= (3,): return bytes(s) - return ''.join(map(unichr, s)) + return ''.join(map(chr, s)) def debug(s): # set this to True if you want to poke at PDU encoding/decoding if False: - print s + print(s) def swap(s): diff --git a/setup.py b/setup.py index d826050..0ad047f 100644 --- a/setup.py +++ b/setup.py @@ -2,16 +2,11 @@ import sys from messaging import VERSION -extra = {} -if sys.version_info >= (3,): - extra['use_2to3'] = True - setup(name="python-messaging", version='%s.%s.%s' % VERSION, description='SMS/MMS encoder/decoder', license=open('COPYING').read(), packages=find_packages(), - install_requires=['nose'], zip_safe=True, test_suite='nose.collector', classifiers=[ @@ -28,5 +23,4 @@ 'Programming Language :: Python :: 3.2', 'Topic :: Communications :: Telephony', ], - **extra ) diff --git a/messaging/test/__init__.py b/tests/__init__.py similarity index 100% rename from messaging/test/__init__.py rename to tests/__init__.py diff --git a/messaging/test/mms-data/27d0a048cd79555de05283a22372b0eb.mms b/tests/mms-data/27d0a048cd79555de05283a22372b0eb.mms similarity index 100% rename from messaging/test/mms-data/27d0a048cd79555de05283a22372b0eb.mms rename to tests/mms-data/27d0a048cd79555de05283a22372b0eb.mms diff --git a/messaging/test/mms-data/BTMMS.MMS b/tests/mms-data/BTMMS.MMS similarity index 100% rename from messaging/test/mms-data/BTMMS.MMS rename to tests/mms-data/BTMMS.MMS diff --git a/messaging/test/mms-data/NOWMMS.MMS b/tests/mms-data/NOWMMS.MMS similarity index 100% rename from messaging/test/mms-data/NOWMMS.MMS rename to tests/mms-data/NOWMMS.MMS diff --git a/messaging/test/mms-data/SEC-SGHS300M.mms b/tests/mms-data/SEC-SGHS300M.mms similarity index 100% rename from messaging/test/mms-data/SEC-SGHS300M.mms rename to tests/mms-data/SEC-SGHS300M.mms diff --git a/messaging/test/mms-data/SIMPLE.MMS b/tests/mms-data/SIMPLE.MMS similarity index 100% rename from messaging/test/mms-data/SIMPLE.MMS rename to tests/mms-data/SIMPLE.MMS diff --git a/messaging/test/mms-data/SonyEricssonT310-R201.mms b/tests/mms-data/SonyEricssonT310-R201.mms similarity index 100% rename from messaging/test/mms-data/SonyEricssonT310-R201.mms rename to tests/mms-data/SonyEricssonT310-R201.mms diff --git a/messaging/test/mms-data/TOMSLOT.MMS b/tests/mms-data/TOMSLOT.MMS similarity index 100% rename from messaging/test/mms-data/TOMSLOT.MMS rename to tests/mms-data/TOMSLOT.MMS diff --git a/messaging/test/mms-data/gallery2test.mms b/tests/mms-data/gallery2test.mms similarity index 100% rename from messaging/test/mms-data/gallery2test.mms rename to tests/mms-data/gallery2test.mms diff --git a/messaging/test/mms-data/iPhone.mms b/tests/mms-data/iPhone.mms similarity index 100% rename from messaging/test/mms-data/iPhone.mms rename to tests/mms-data/iPhone.mms diff --git a/messaging/test/mms-data/images_are_cut_off_debug.mms b/tests/mms-data/images_are_cut_off_debug.mms similarity index 100% rename from messaging/test/mms-data/images_are_cut_off_debug.mms rename to tests/mms-data/images_are_cut_off_debug.mms diff --git a/messaging/test/mms-data/m.mms b/tests/mms-data/m.mms similarity index 100% rename from messaging/test/mms-data/m.mms rename to tests/mms-data/m.mms diff --git a/messaging/test/mms-data/openwave.mms b/tests/mms-data/openwave.mms similarity index 100% rename from messaging/test/mms-data/openwave.mms rename to tests/mms-data/openwave.mms diff --git a/messaging/test/mms-data/projekt_exempel.mms b/tests/mms-data/projekt_exempel.mms similarity index 100% rename from messaging/test/mms-data/projekt_exempel.mms rename to tests/mms-data/projekt_exempel.mms diff --git a/messaging/test/test_gsm_encoding.py b/tests/test_gsm_encoding.py similarity index 92% rename from messaging/test/test_gsm_encoding.py rename to tests/test_gsm_encoding.py index 364cb56..37f10e1 100644 --- a/messaging/test/test_gsm_encoding.py +++ b/tests/test_gsm_encoding.py @@ -17,12 +17,12 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. """Unittests for the gsm encoding/decoding module""" -import unittest +from unittest import TestCase import messaging.sms.gsm0338 # imports GSM7 codec # Reversed from: ftp://ftp.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT MAP = { -# unichr(0x0000): (0x0000, 0x00), # Null +# chr(0x0000): (0x0000, 0x00), # Null u'@': (0x0040, 0x00), u'£': (0x00a3, 0x01), u'$': (0x0024, 0x02), @@ -33,10 +33,10 @@ u'ì': (0x00ec, 0x07), u'ò': (0x00f2, 0x08), u'Ç': (0x00c7, 0x09), # LATIN CAPITAL LETTER C WITH CEDILLA - unichr(0x000a): (0x000a, 0x0a), # Linefeed + chr(0x000a): (0x000a, 0x0a), # Linefeed u'Ø': (0x00d8, 0x0b), u'ø': (0x00f8, 0x0c), - unichr(0x000d): (0x000d, 0x0d), # Carriage return + chr(0x000d): (0x000d, 0x0d), # Carriage return u'Å': (0x00c5, 0x0e), u'å': (0x00e5, 0x0f), u'Δ': (0x0394, 0x10), @@ -50,7 +50,7 @@ u'Σ': (0x03a3, 0x18), u'Θ': (0x0398, 0x19), u'Ξ': (0x039e, 0x1a), - unichr(0x00a0): (0x00a0, 0x1b), # Escape to extension table (displayed + chr(0x00a0): (0x00a0, 0x1b), # Escape to extension table (displayed # as NBSP, on decode of invalid escape # sequence) u'Æ': (0x00c6, 0x1c), @@ -86,7 +86,7 @@ u':': (0x003a, 0x3a), u';': (0x003b, 0x3b), u'[': (0x005b, 0x1b3c), - unichr(0x000c): (0x000c, 0x1b0a), # Formfeed + chr(0x000c): (0x000c, 0x1b0a), # Formfeed u']': (0x005d, 0x1b3e), u'?': (0x003f, 0x3f), u'|': (0x007c, 0x1b40), @@ -189,7 +189,7 @@ BAD = -1 -class TestEncodingFunctions(unittest.TestCase): +class TestEncodingFunctions(TestCase): def test_encoding_supported_unicode_gsm(self): @@ -205,7 +205,7 @@ def test_encoding_supported_unicode_gsm(self): i_gsm = BAD # so we see the comparison, not an exception # We shouldn't generate an invalid escape sequence - if key == unichr(0x00a0): + if key == chr(0x00a0): self.assertEqual(BAD, i_gsm) else: self.assertEqual(MAP[key][1], i_gsm) @@ -252,16 +252,16 @@ def test_decoding_supported_unicode_gsm(self): def test_is_gsm_text_true(self): for key in MAP.keys(): - if key == unichr(0x00a0): + if key == chr(0x00a0): continue self.assertEqual(messaging.sms.gsm0338.is_gsm_text(key), True) def test_is_gsm_text_false(self): self.assertEqual( - messaging.sms.gsm0338.is_gsm_text(unichr(0x00a0)), False) + messaging.sms.gsm0338.is_gsm_text(chr(0x00a0)), False) - for i in xrange(1, 0xffff + 1): - if unichr(i) not in MAP: + for i in range(1, 0xffff + 1): + if chr(i) not in MAP: # Note: it's a little odd, but on error we want to see values - if messaging.sms.gsm0338.is_gsm_text(unichr(i)) is not False: + if messaging.sms.gsm0338.is_gsm_text(chr(i)) is not False: self.assertEqual(BAD, i) diff --git a/messaging/test/test_mms.py b/tests/test_mms.py similarity index 99% rename from messaging/test/test_mms.py rename to tests/test_mms.py index 2cf6bf1..fa53b7d 100644 --- a/messaging/test/test_mms.py +++ b/tests/test_mms.py @@ -2,7 +2,7 @@ from array import array import datetime import os -import unittest +from unittest import TestCase from messaging.mms.message import MMSMessage @@ -11,7 +11,7 @@ DATA_DIR = os.path.join(os.path.dirname(__file__), 'mms-data') -class TestMmsDecoding(unittest.TestCase): +class TestMmsDecoding(TestCase): def test_decoding_from_data(self): path = os.path.join(DATA_DIR, 'iPhone.mms') diff --git a/messaging/test/test_sms.py b/tests/test_sms.py similarity index 98% rename from messaging/test/test_sms.py rename to tests/test_sms.py index c3ff7b5..9c3ec74 100644 --- a/messaging/test/test_sms.py +++ b/tests/test_sms.py @@ -1,10 +1,7 @@ # -*- coding: utf-8 -*- from datetime import datetime, timedelta -try: - import unittest2 as unittest -except ImportError: - import unittest +from unittest import TestCase from messaging.sms import SmsSubmit, SmsDeliver from messaging.utils import (timedelta_to_relative_validity as to_relative, @@ -12,7 +9,7 @@ FixedOffset) -class TestEncodingFunctions(unittest.TestCase): +class TestEncodingFunctions(TestCase): def test_converting_timedelta_to_validity(self): self.assertRaises(ValueError, to_relative, timedelta(minutes=4)) @@ -52,7 +49,7 @@ def test_converting_datetime_to_validity(self): self.assertEqual(to_absolute(when, "GMT-3"), expected) -class TestSmsSubmit(unittest.TestCase): +class TestSmsSubmit(TestCase): def test_encoding_validity(self): # no validity @@ -241,7 +238,7 @@ def test_encoding_bad_csca_raises_error(self): self.assertRaises(ValueError, setattr, sms, 'csca', "1badcsca") -class TestSubmitPduCounts(unittest.TestCase): +class TestSubmitPduCounts(TestCase): DEST = "+3530000000" GSM_CHAR = "x" @@ -317,7 +314,7 @@ def test_unicode_6(self): self.assertEqual(len(sms.to_pdu()), 4) -class TestSmsDeliver(unittest.TestCase): +class TestSmsDeliver(TestCase): def test_decoding_7bit_pdu(self): pdu = "07911326040000F0040B911346610089F60000208062917314080CC8F71D14969741F977FD07" diff --git a/messaging/test/test_udh.py b/tests/test_udh.py similarity index 91% rename from messaging/test/test_udh.py rename to tests/test_udh.py index 9496ff6..92ec9f5 100644 --- a/messaging/test/test_udh.py +++ b/tests/test_udh.py @@ -1,10 +1,10 @@ -import unittest +from unittest import TestCase from messaging.sms.udh import UserDataHeader from messaging.utils import to_array -class TestUserDataHeader(unittest.TestCase): +class TestUserDataHeader(TestCase): def test_user_data_header(self): data = to_array("08049f8e020105040b8423f0") diff --git a/messaging/test/test_wap.py b/tests/test_wap.py similarity index 99% rename from messaging/test/test_wap.py rename to tests/test_wap.py index 84224a1..26de322 100644 --- a/messaging/test/test_wap.py +++ b/tests/test_wap.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- from array import array -import unittest +from unittest import TestCase from messaging.sms import SmsDeliver from messaging.sms.wap import (is_a_wap_push_notification as is_push, @@ -13,7 +13,7 @@ def list_to_str(l): return a.tostring() -class TestSmsWapPush(unittest.TestCase): +class TestSmsWapPush(TestCase): data = [1, 6, 34, 97, 112, 112, 108, 105, 99, 97, 116, 105, 111, 110, 47, 118, 110, 100, 46, 119, 97, 112, 46, 109, 109, 115, 45, From d74ebf4dd2e4428edf6188d6b7f3048ba642757b Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Wed, 8 Apr 2020 17:49:14 -0500 Subject: [PATCH 02/28] lint proper folder --- .github/workflows/pythontest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pythontest.yml b/.github/workflows/pythontest.yml index 4be696c..7d2db5e 100644 --- a/.github/workflows/pythontest.yml +++ b/.github/workflows/pythontest.yml @@ -30,7 +30,7 @@ jobs: - name: Lint run: | pip install pylint - pylint -rn --errors-only ./smpp + pylint -rn --errors-only ./messaging - name: Test env: COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} From f30d0d69ad9ae63096cd3fb9fadc8e21f43e973e Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Wed, 8 Apr 2020 17:57:08 -0500 Subject: [PATCH 03/28] more python 3 updates --- messaging/mms/iterator.py | 4 ++-- messaging/mms/mms_pdu.py | 26 +++++++++++----------- messaging/mms/wsp_pdu.py | 44 +++++++++++++++++++------------------- tests/test_gsm_encoding.py | 10 ++++----- 4 files changed, 42 insertions(+), 42 deletions(-) diff --git a/messaging/mms/iterator.py b/messaging/mms/iterator.py index 21e21e4..d69df68 100644 --- a/messaging/mms/iterator.py +++ b/messaging/mms/iterator.py @@ -41,7 +41,7 @@ def next(self): if len(self._cached_values) > 0: return self._cached_values.pop(0) else: - return self._it.next() + return next(self._it) def preview(self): """ @@ -62,7 +62,7 @@ def preview(self): if self._preview_pos < len(self._cached_values): value = self._cached_values[self._preview_pos] else: - value = self._it.next() + value = next(self._it) self._cached_values.append(value) self._preview_pos += 1 diff --git a/messaging/mms/mms_pdu.py b/messaging/mms/mms_pdu.py index be874e2..b004c02 100644 --- a/messaging/mms/mms_pdu.py +++ b/messaging/mms/mms_pdu.py @@ -183,7 +183,7 @@ def decode_message_body(self, data_iter): # Prepare to read content-type + other possible headers ct_field_bytes = [] for i in range(headers_len): - ct_field_bytes.append(data_iter.next()) + ct_field_bytes.append(next(data_iter)) ct_iter = PreviewIterator(ct_field_bytes) # Get content type @@ -202,7 +202,7 @@ def decode_message_body(self, data_iter): # Data (note: this is not null-terminated) data = array.array('B') for i in range(data_len): - data.append(data_iter.next()) + data.append(next(data_iter)) part = message.DataPart() part.set_data(data, ctype) @@ -269,7 +269,7 @@ def decode_mms_header(byte_iter): byte = wsp_pdu.Decoder.decode_short_integer_from_byte(preview) if byte in mms_field_names: - byte_iter.next() + next(byte_iter) mms_field_name = mms_field_names[byte][0] else: byte_iter.reset_preview() @@ -350,13 +350,13 @@ def decode_boolean_value(byte_iter): byte_iter.reset_preview() raise wsp_pdu.DecodeError('Error parsing boolean value ' 'for byte: %s' % hex(byte)) - byte = byte_iter.next() + byte = next(byte_iter) return byte == 128 @staticmethod def decode_delivery_time_value(byte_iter): value_length = wsp_pdu.Decoder.decode_value_length(byte_iter) - token = byte_iter.next() + token = next(byte_iter) value = wsp_pdu.Decoder.decode_long_integer(byte_iter) if token == 128: token_type = 'absolute' @@ -383,7 +383,7 @@ def decode_from_value(byte_iter): """ value_length = wsp_pdu.Decoder.decode_value_length(byte_iter) # See what token we have - byte = byte_iter.next() + byte = next(byte_iter) if byte == 129: # Insert-address-token return '' @@ -416,7 +416,7 @@ def decode_message_class_value(byte_iter): } byte = byte_iter.preview() if byte in class_identifiers: - byte_iter.next() + next(byte_iter) return class_identifiers[byte] byte_iter.reset_preview() @@ -444,7 +444,7 @@ def decode_message_type_value(byte_iter): byte = byte_iter.preview() if byte in message_types: - byte_iter.next() + next(byte_iter) return message_types[byte] byte_iter.reset_preview() @@ -467,7 +467,7 @@ def decode_priority_value(byte_iter): byte = byte_iter.preview() if byte in priorities: - byte = byte_iter.next() + byte = next(byte_iter) return priorities[byte] byte_iter.reset_preview() @@ -498,7 +498,7 @@ def decode_sender_visibility_value(byte_iter): raise wsp_pdu.DecodeError('Error parsing sender visibility ' 'value for byte: %s' % hex(byte)) - byte = byte_iter.next() + byte = next(byte_iter) value = 'Hide' if byte == 128 else 'Show' return value @@ -529,7 +529,7 @@ def decode_response_status_value(byte_iter): 0x88: 'Error-unsupported-message', } byte = byte_iter.preview() - byte_iter.next() + next(byte_iter) # Return error unspecified if it couldn't be decoded return response_status_values.get(byte, 0x81) @@ -555,7 +555,7 @@ def decode_status_value(byte_iter): 0x84: 'Unrecognised', } - byte = byte_iter.next() + byte = next(byte_iter) # Return an unrecognised state if it couldn't be decoded return status_values.get(byte, 0x84) @@ -576,7 +576,7 @@ def decode_expiry_value(byte_iter): :rtype: str or int """ value_length = MMSDecoder.decode_value_length(byte_iter) - token = byte_iter.next() + token = next(byte_iter) if token == 0x80: # Absolute-token return MMSDecoder.decode_date_value(byte_iter) diff --git a/messaging/mms/wsp_pdu.py b/messaging/mms/wsp_pdu.py index c6e8be5..6b75440 100644 --- a/messaging/mms/wsp_pdu.py +++ b/messaging/mms/wsp_pdu.py @@ -331,7 +331,7 @@ def decode_uint_8(byte_iter): :rtype: int """ # Make the byte unsigned - return byte_iter.next() & 0xf + return next(byte_iter) & 0xf @staticmethod def decode_uint_var(byte_iter): @@ -353,11 +353,11 @@ def decode_uint_var(byte_iter): :rtype: int """ uint = 0 - byte = byte_iter.next() + byte = next(byte_iter) while (byte >> 7) == 0x01: uint = uint << 7 uint |= byte & 0x7f - byte = byte_iter.next() + byte = next(byte_iter) uint = uint << 7 uint |= byte & 0x7f @@ -388,7 +388,7 @@ def decode_short_integer(byte_iter): byte_iter.reset_preview() raise DecodeError('Not a valid short-integer: MSB not set') - byte = byte_iter.next() + byte = next(byte_iter) return byte & 0x7f @staticmethod @@ -449,7 +449,7 @@ def decode_long_integer(byte_iter): # Decode the Multi-octect-integer for i in range(shortLength): longInt = longInt << 8 - longInt |= byte_iter.next() + longInt |= next(byte_iter) return longInt @@ -474,14 +474,14 @@ def decode_text_string(byte_iter): :rtype: str """ decoded_string = '' - byte = byte_iter.next() + byte = next(byte_iter) # Remove Quote character (octet 127), if present if byte == 127: - byte = byte_iter.next() + byte = next(byte_iter) while byte != 0x00: decoded_string += chr(byte) - byte = byte_iter.next() + byte = next(byte_iter) return decoded_string @@ -505,7 +505,7 @@ def decode_quoted_string(byte_iter): raise DecodeError('Invalid quoted string: must ' 'start with ') - byte_iter.next() + next(byte_iter) # CHECK: should the quotation chars be pre- and appended before # returning *technically* we should not check for quote characters. return Decoder.decode_text_string(byte_iter) @@ -528,10 +528,10 @@ def decode_token_text(byte_iter): byte_iter.reset_preview() raise DecodeError('Invalid token') - byte = byte_iter.next() + byte = next(byte_iter) while byte > 31 and byte not in separators: token += chr(byte) - byte = byte_iter.next() + byte = next(byte_iter) return token @@ -561,10 +561,10 @@ def decode_extension_media(byte_iter): raise DecodeError('Invalid Extension-media: TEXT ' 'starts with invalid character: %d' % byte) - byte = byte_iter.next() + byte = next(byte_iter) while byte != 0x00: media_value += chr(byte) - byte = byte_iter.next() + byte = next(byte_iter) return media_value @@ -614,7 +614,7 @@ def decode_short_length(byte_iter): raise DecodeError('Not a valid short-length: ' 'should be in octet range 0-30') - return byte_iter.next() + return next(byte_iter) @staticmethod def decode_value_length(byte_iter): @@ -646,7 +646,7 @@ def decode_value_length(byte_iter): byte = byte_iter.preview() # CHECK: this strictness MAY cause issues, but it is correct if byte == 31: - byte_iter.next() # skip past the length-quote + next(byte_iter) # skip past the length-quote length_value = Decoder.decode_uint_var(byte_iter) else: byte_iter.reset_preview() @@ -844,7 +844,7 @@ def decode_content_general_form(byte_iter): # Read parameters, etc, until is reached ct_field_bytes = array.array('B') for i in range(value_length): - ct_field_bytes.append(byte_iter.next()) + ct_field_bytes.append(next(byte_iter)) ct_iter = PreviewIterator(ct_field_bytes) # Now, decode all the bytes read @@ -1186,11 +1186,11 @@ def decode_no_value(byte_iter): :return: No-value, which is 0x00 :rtype: int """ - byte_iter, local_iter = byte_iter.next() - if local_iter.next() != 0x00: + byte_iter, local_iter = next(byte_iter) + if next(local_iter) != 0x00: raise DecodeError('Expected No-value') - byte_iter.next() + next(byte_iter) return 0x00 @staticmethod @@ -1226,7 +1226,7 @@ def decode_accept_value(byte_iter): # Check for the Q-Token (to see if there are Accept-parameters) if byte_iter.preview() == 128: - byte_iter.next() + next(byte_iter) q_value = Decoder.decode_q_value(byte_iter) try: accept_extension = Decoder.decode_parameter(byte_iter) @@ -1260,7 +1260,7 @@ def decode_pragma_value(byte_iter): """ byte = byte_iter.preview() if byte == 0x80: # No-cache - byte_iter.next() + next(byte_iter) # TODO: Not sure if this parameter name (or even usage) is correct name, value = 'Cache-control', 'No-cache' else: @@ -1286,7 +1286,7 @@ def decode_well_known_charset(byte_iter): byte = byte_iter.preview() byte_iter.reset_preview() if byte == 127: - byte_iter.next() + next(byte_iter) decoded_charset = '*' else: charset_value = Decoder.decode_integer_value(byte_iter) diff --git a/tests/test_gsm_encoding.py b/tests/test_gsm_encoding.py index 37f10e1..94d968a 100644 --- a/tests/test_gsm_encoding.py +++ b/tests/test_gsm_encoding.py @@ -193,7 +193,7 @@ class TestEncodingFunctions(TestCase): def test_encoding_supported_unicode_gsm(self): - for key in MAP.keys(): + for key in list(MAP.keys()): # Use 'ignore' so that we see the code tested, not an exception s_gsm = key.encode('gsm0338', 'ignore') @@ -213,7 +213,7 @@ def test_encoding_supported_unicode_gsm(self): def test_encoding_supported_greek_unicode_gsm(self): # Note: Conversion is one way, hence no corresponding decode test - for key in GREEK_MAP.keys(): + for key in list(GREEK_MAP.keys()): # Use 'replace' so that we trigger the mapping s_gsm = key.encode('gsm0338', 'replace') @@ -227,7 +227,7 @@ def test_encoding_supported_greek_unicode_gsm(self): def test_encoding_supported_quirk_unicode_gsm(self): # Note: Conversion is one way, hence no corresponding decode test - for key in QUIRK_MAP.keys(): + for key in list(QUIRK_MAP.keys()): # Use 'replace' so that we trigger the mapping s_gsm = key.encode('gsm0338', 'replace') @@ -239,7 +239,7 @@ def test_encoding_supported_quirk_unicode_gsm(self): self.assertEqual(QUIRK_MAP[key][1], i_gsm) def test_decoding_supported_unicode_gsm(self): - for key in MAP.keys(): + for key in list(MAP.keys()): i_gsm = MAP[key][1] if i_gsm <= 0xff: s_gsm = chr(i_gsm) @@ -251,7 +251,7 @@ def test_decoding_supported_unicode_gsm(self): self.assertEqual(MAP[key][0], ord(s_unicode)) def test_is_gsm_text_true(self): - for key in MAP.keys(): + for key in list(MAP.keys()): if key == chr(0x00a0): continue self.assertEqual(messaging.sms.gsm0338.is_gsm_text(key), True) From c67ba491f56eb668cee5c1b546cc9239c25fa765 Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Wed, 8 Apr 2020 18:13:50 -0500 Subject: [PATCH 04/28] make into python 3 iterator --- messaging/mms/iterator.py | 2 +- tests/test_sms.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/messaging/mms/iterator.py b/messaging/mms/iterator.py index d69df68..b4a77eb 100644 --- a/messaging/mms/iterator.py +++ b/messaging/mms/iterator.py @@ -36,7 +36,7 @@ def __init__(self, data): def __iter__(self): return self - def next(self): + def __next__(self): self.reset_preview() if len(self._cached_values) > 0: return self._cached_values.pop(0) diff --git a/tests/test_sms.py b/tests/test_sms.py index 9c3ec74..5f06c78 100644 --- a/tests/test_sms.py +++ b/tests/test_sms.py @@ -461,7 +461,7 @@ def test_decoding_delivery_status_report_without_smsc_address(self): } sms = SmsDeliver(pdu) - self.assertEqual(sms.csca, None) + self.assertIsNone(sms.csca) data = sms.data self.assertEqual(data['ref'], 5) self.assertEqual(sms.sr, sr) From 0ecaa61d843a93bf6bc696ee55252b9b153a209c Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Wed, 8 Apr 2020 20:15:26 -0500 Subject: [PATCH 05/28] encoding/decoding --- doc/conf.py | 12 +- doc/tutorial/sms.rst | 2 +- messaging/sms/deliver.py | 2 +- messaging/sms/gsm0338.py | 310 ++++++++++++++++++------------------- tests/test_gsm_encoding.py | 298 +++++++++++++++++------------------ tests/test_mms.py | 13 +- tests/test_sms.py | 38 ++--- tests/test_wap.py | 9 +- 8 files changed, 343 insertions(+), 341 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 84f79bb..aa0ee98 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -20,8 +20,8 @@ master_doc = 'index' # General information about the project. -project = u'python-messaging' -copyright = u'2010, Pablo Martí' +project = 'python-messaging' +copyright = '2010, Pablo Martí' # The short X.Y version. version = '0.5.9' @@ -150,8 +150,8 @@ # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'python-messaging.tex', u'python-messaging Documentation', - u'Pablo Martí', 'manual'), + ('index', 'python-messaging.tex', 'python-messaging Documentation', + 'Pablo Martí', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of @@ -183,6 +183,6 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - ('index', 'python-messaging', u'python-messaging Documentation', - [u'Pablo Martí'], 1) + ('index', 'python-messaging', 'python-messaging Documentation', + ['Pablo Martí'], 1) ] diff --git a/doc/tutorial/sms.rst b/doc/tutorial/sms.rst index 7277224..39a49bd 100644 --- a/doc/tutorial/sms.rst +++ b/doc/tutorial/sms.rst @@ -106,7 +106,7 @@ term:`PDU` decoding is really simple with :class:`~messaging.sms.SmsDeliver`:: print sms.data # {'csca': '+447785016005', 'type': None, # 'date': datetime.datetime(2009, 9, 1, 16, 41, 32), - # 'text': u' 1741 bst', 'fmt': 0, 'pid': 0, + # 'text': ' 1741 bst', 'fmt': 0, 'pid': 0, # 'dcs': 0, 'number': '+447927267410'} Apart from the pdu, the :py:meth:`messaging.sms.SmsDeliver.__init__` accepts a diff --git a/messaging/sms/deliver.py b/messaging/sms/deliver.py index 0cb7a81..37714e6 100644 --- a/messaging/sms/deliver.py +++ b/messaging/sms/deliver.py @@ -184,7 +184,7 @@ def _process_message(self, data): data = data[ud_len:].tolist() _bytes = [int("%02X%02X" % (data[i], data[i + 1]), 16) for i in range(0, len(data), 2)] - self.text = u''.join(list(map(chr, _bytes))) + self.text = ''.join(list(map(chr, _bytes))) pdu = property(lambda self: self._pdu, _set_pdu) diff --git a/messaging/sms/gsm0338.py b/messaging/sms/gsm0338.py index d717069..5b90950 100644 --- a/messaging/sms/gsm0338.py +++ b/messaging/sms/gsm0338.py @@ -21,151 +21,151 @@ # default GSM 03.38 -> unicode def_regular_decode_dict = { - '\x00': u'\u0040', # COMMERCIAL AT - '\x01': u'\u00A3', # POUND SIGN - '\x02': u'\u0024', # DOLLAR SIGN - '\x03': u'\u00A5', # YEN SIGN - '\x04': u'\u00E8', # LATIN SMALL LETTER E WITH GRAVE - '\x05': u'\u00E9', # LATIN SMALL LETTER E WITH ACUTE - '\x06': u'\u00F9', # LATIN SMALL LETTER U WITH GRAVE - '\x07': u'\u00EC', # LATIN SMALL LETTER I WITH GRAVE - '\x08': u'\u00F2', # LATIN SMALL LETTER O WITH GRAVE - '\x09': u'\u00C7', # LATIN CAPITAL LETTER C WITH CEDILLA + '\x00': '\u0040', # COMMERCIAL AT + '\x01': '\u00A3', # POUND SIGN + '\x02': '\u0024', # DOLLAR SIGN + '\x03': '\u00A5', # YEN SIGN + '\x04': '\u00E8', # LATIN SMALL LETTER E WITH GRAVE + '\x05': '\u00E9', # LATIN SMALL LETTER E WITH ACUTE + '\x06': '\u00F9', # LATIN SMALL LETTER U WITH GRAVE + '\x07': '\u00EC', # LATIN SMALL LETTER I WITH GRAVE + '\x08': '\u00F2', # LATIN SMALL LETTER O WITH GRAVE + '\x09': '\u00C7', # LATIN CAPITAL LETTER C WITH CEDILLA # The Unicode page suggests this is a mistake: but # it's still in the latest version of the spec and # our implementation has to be exact. - '\x0A': u'\u000A', # LINE FEED - '\x0B': u'\u00D8', # LATIN CAPITAL LETTER O WITH STROKE - '\x0C': u'\u00F8', # LATIN SMALL LETTER O WITH STROKE - '\x0D': u'\u000D', # CARRIAGE RETURN - '\x0E': u'\u00C5', # LATIN CAPITAL LETTER A WITH RING ABOVE - '\x0F': u'\u00E5', # LATIN SMALL LETTER A WITH RING ABOVE - '\x10': u'\u0394', # GREEK CAPITAL LETTER DELTA - '\x11': u'\u005F', # LOW LINE - '\x12': u'\u03A6', # GREEK CAPITAL LETTER PHI - '\x13': u'\u0393', # GREEK CAPITAL LETTER GAMMA - '\x14': u'\u039B', # GREEK CAPITAL LETTER LAMDA - '\x15': u'\u03A9', # GREEK CAPITAL LETTER OMEGA - '\x16': u'\u03A0', # GREEK CAPITAL LETTER PI - '\x17': u'\u03A8', # GREEK CAPITAL LETTER PSI - '\x18': u'\u03A3', # GREEK CAPITAL LETTER SIGMA - '\x19': u'\u0398', # GREEK CAPITAL LETTER THETA - '\x1A': u'\u039E', # GREEK CAPITAL LETTER XI - '\x1C': u'\u00C6', # LATIN CAPITAL LETTER AE - '\x1D': u'\u00E6', # LATIN SMALL LETTER AE - '\x1E': u'\u00DF', # LATIN SMALL LETTER SHARP S (German) - '\x1F': u'\u00C9', # LATIN CAPITAL LETTER E WITH ACUTE - '\x20': u'\u0020', # SPACE - '\x21': u'\u0021', # EXCLAMATION MARK - '\x22': u'\u0022', # QUOTATION MARK - '\x23': u'\u0023', # NUMBER SIGN - '\x24': u'\u00A4', # CURRENCY SIGN - '\x25': u'\u0025', # PERCENT SIGN - '\x26': u'\u0026', # AMPERSAND - '\x27': u'\u0027', # APOSTROPHE - '\x28': u'\u0028', # LEFT PARENTHESIS - '\x29': u'\u0029', # RIGHT PARENTHESIS - '\x2A': u'\u002A', # ASTERISK - '\x2B': u'\u002B', # PLUS SIGN - '\x2C': u'\u002C', # COMMA - '\x2D': u'\u002D', # HYPHEN-MINUS - '\x2E': u'\u002E', # FULL STOP - '\x2F': u'\u002F', # SOLIDUS - '\x30': u'\u0030', # DIGIT ZERO - '\x31': u'\u0031', # DIGIT ONE - '\x32': u'\u0032', # DIGIT TWO - '\x33': u'\u0033', # DIGIT THREE - '\x34': u'\u0034', # DIGIT FOUR - '\x35': u'\u0035', # DIGIT FIVE - '\x36': u'\u0036', # DIGIT SIX - '\x37': u'\u0037', # DIGIT SEVEN - '\x38': u'\u0038', # DIGIT EIGHT - '\x39': u'\u0039', # DIGIT NINE - '\x3A': u'\u003A', # COLON - '\x3B': u'\u003B', # SEMICOLON - '\x3C': u'\u003C', # LESS-THAN SIGN - '\x3D': u'\u003D', # EQUALS SIGN - '\x3E': u'\u003E', # GREATER-THAN SIGN - '\x3F': u'\u003F', # QUESTION MARK - '\x40': u'\u00A1', # INVERTED EXCLAMATION MARK - '\x41': u'\u0041', # LATIN CAPITAL LETTER A - '\x42': u'\u0042', # LATIN CAPITAL LETTER B - '\x43': u'\u0043', # LATIN CAPITAL LETTER C - '\x44': u'\u0044', # LATIN CAPITAL LETTER D - '\x45': u'\u0045', # LATIN CAPITAL LETTER E - '\x46': u'\u0046', # LATIN CAPITAL LETTER F - '\x47': u'\u0047', # LATIN CAPITAL LETTER G - '\x48': u'\u0048', # LATIN CAPITAL LETTER H - '\x49': u'\u0049', # LATIN CAPITAL LETTER I - '\x4A': u'\u004A', # LATIN CAPITAL LETTER J - '\x4B': u'\u004B', # LATIN CAPITAL LETTER K - '\x4C': u'\u004C', # LATIN CAPITAL LETTER L - '\x4D': u'\u004D', # LATIN CAPITAL LETTER M - '\x4E': u'\u004E', # LATIN CAPITAL LETTER N - '\x4F': u'\u004F', # LATIN CAPITAL LETTER O - '\x50': u'\u0050', # LATIN CAPITAL LETTER P - '\x51': u'\u0051', # LATIN CAPITAL LETTER Q - '\x52': u'\u0052', # LATIN CAPITAL LETTER R - '\x53': u'\u0053', # LATIN CAPITAL LETTER S - '\x54': u'\u0054', # LATIN CAPITAL LETTER T - '\x55': u'\u0055', # LATIN CAPITAL LETTER U - '\x56': u'\u0056', # LATIN CAPITAL LETTER V - '\x57': u'\u0057', # LATIN CAPITAL LETTER W - '\x58': u'\u0058', # LATIN CAPITAL LETTER X - '\x59': u'\u0059', # LATIN CAPITAL LETTER Y - '\x5A': u'\u005A', # LATIN CAPITAL LETTER Z - '\x5B': u'\u00C4', # LATIN CAPITAL LETTER A WITH DIAERESIS - '\x5C': u'\u00D6', # LATIN CAPITAL LETTER O WITH DIAERESIS - '\x5D': u'\u00D1', # LATIN CAPITAL LETTER N WITH TILDE - '\x5E': u'\u00DC', # LATIN CAPITAL LETTER U WITH DIAERESIS - '\x5F': u'\u00A7', # SECTION SIGN - '\x60': u'\u00BF', # INVERTED QUESTION MARK - '\x61': u'\u0061', # LATIN SMALL LETTER A - '\x62': u'\u0062', # LATIN SMALL LETTER B - '\x63': u'\u0063', # LATIN SMALL LETTER C - '\x64': u'\u0064', # LATIN SMALL LETTER D - '\x65': u'\u0065', # LATIN SMALL LETTER E - '\x66': u'\u0066', # LATIN SMALL LETTER F - '\x67': u'\u0067', # LATIN SMALL LETTER G - '\x68': u'\u0068', # LATIN SMALL LETTER H - '\x69': u'\u0069', # LATIN SMALL LETTER I - '\x6A': u'\u006A', # LATIN SMALL LETTER J - '\x6B': u'\u006B', # LATIN SMALL LETTER K - '\x6C': u'\u006C', # LATIN SMALL LETTER L - '\x6D': u'\u006D', # LATIN SMALL LETTER M - '\x6E': u'\u006E', # LATIN SMALL LETTER N - '\x6F': u'\u006F', # LATIN SMALL LETTER O - '\x70': u'\u0070', # LATIN SMALL LETTER P - '\x71': u'\u0071', # LATIN SMALL LETTER Q - '\x72': u'\u0072', # LATIN SMALL LETTER R - '\x73': u'\u0073', # LATIN SMALL LETTER S - '\x74': u'\u0074', # LATIN SMALL LETTER T - '\x75': u'\u0075', # LATIN SMALL LETTER U - '\x76': u'\u0076', # LATIN SMALL LETTER V - '\x77': u'\u0077', # LATIN SMALL LETTER W - '\x78': u'\u0078', # LATIN SMALL LETTER X - '\x79': u'\u0079', # LATIN SMALL LETTER Y - '\x7A': u'\u007A', # LATIN SMALL LETTER Z - '\x7B': u'\u00E4', # LATIN SMALL LETTER A WITH DIAERESIS - '\x7C': u'\u00F6', # LATIN SMALL LETTER O WITH DIAERESIS - '\x7D': u'\u00F1', # LATIN SMALL LETTER N WITH TILDE - '\x7E': u'\u00FC', # LATIN SMALL LETTER U WITH DIAERESIS - '\x7F': u'\u00E0', # LATIN SMALL LETTER A WITH GRAVE + '\x0A': '\u000A', # LINE FEED + '\x0B': '\u00D8', # LATIN CAPITAL LETTER O WITH STROKE + '\x0C': '\u00F8', # LATIN SMALL LETTER O WITH STROKE + '\x0D': '\u000D', # CARRIAGE RETURN + '\x0E': '\u00C5', # LATIN CAPITAL LETTER A WITH RING ABOVE + '\x0F': '\u00E5', # LATIN SMALL LETTER A WITH RING ABOVE + '\x10': '\u0394', # GREEK CAPITAL LETTER DELTA + '\x11': '\u005F', # LOW LINE + '\x12': '\u03A6', # GREEK CAPITAL LETTER PHI + '\x13': '\u0393', # GREEK CAPITAL LETTER GAMMA + '\x14': '\u039B', # GREEK CAPITAL LETTER LAMDA + '\x15': '\u03A9', # GREEK CAPITAL LETTER OMEGA + '\x16': '\u03A0', # GREEK CAPITAL LETTER PI + '\x17': '\u03A8', # GREEK CAPITAL LETTER PSI + '\x18': '\u03A3', # GREEK CAPITAL LETTER SIGMA + '\x19': '\u0398', # GREEK CAPITAL LETTER THETA + '\x1A': '\u039E', # GREEK CAPITAL LETTER XI + '\x1C': '\u00C6', # LATIN CAPITAL LETTER AE + '\x1D': '\u00E6', # LATIN SMALL LETTER AE + '\x1E': '\u00DF', # LATIN SMALL LETTER SHARP S (German) + '\x1F': '\u00C9', # LATIN CAPITAL LETTER E WITH ACUTE + '\x20': '\u0020', # SPACE + '\x21': '\u0021', # EXCLAMATION MARK + '\x22': '\u0022', # QUOTATION MARK + '\x23': '\u0023', # NUMBER SIGN + '\x24': '\u00A4', # CURRENCY SIGN + '\x25': '\u0025', # PERCENT SIGN + '\x26': '\u0026', # AMPERSAND + '\x27': '\u0027', # APOSTROPHE + '\x28': '\u0028', # LEFT PARENTHESIS + '\x29': '\u0029', # RIGHT PARENTHESIS + '\x2A': '\u002A', # ASTERISK + '\x2B': '\u002B', # PLUS SIGN + '\x2C': '\u002C', # COMMA + '\x2D': '\u002D', # HYPHEN-MINUS + '\x2E': '\u002E', # FULL STOP + '\x2F': '\u002F', # SOLIDUS + '\x30': '\u0030', # DIGIT ZERO + '\x31': '\u0031', # DIGIT ONE + '\x32': '\u0032', # DIGIT TWO + '\x33': '\u0033', # DIGIT THREE + '\x34': '\u0034', # DIGIT FOUR + '\x35': '\u0035', # DIGIT FIVE + '\x36': '\u0036', # DIGIT SIX + '\x37': '\u0037', # DIGIT SEVEN + '\x38': '\u0038', # DIGIT EIGHT + '\x39': '\u0039', # DIGIT NINE + '\x3A': '\u003A', # COLON + '\x3B': '\u003B', # SEMICOLON + '\x3C': '\u003C', # LESS-THAN SIGN + '\x3D': '\u003D', # EQUALS SIGN + '\x3E': '\u003E', # GREATER-THAN SIGN + '\x3F': '\u003F', # QUESTION MARK + '\x40': '\u00A1', # INVERTED EXCLAMATION MARK + '\x41': '\u0041', # LATIN CAPITAL LETTER A + '\x42': '\u0042', # LATIN CAPITAL LETTER B + '\x43': '\u0043', # LATIN CAPITAL LETTER C + '\x44': '\u0044', # LATIN CAPITAL LETTER D + '\x45': '\u0045', # LATIN CAPITAL LETTER E + '\x46': '\u0046', # LATIN CAPITAL LETTER F + '\x47': '\u0047', # LATIN CAPITAL LETTER G + '\x48': '\u0048', # LATIN CAPITAL LETTER H + '\x49': '\u0049', # LATIN CAPITAL LETTER I + '\x4A': '\u004A', # LATIN CAPITAL LETTER J + '\x4B': '\u004B', # LATIN CAPITAL LETTER K + '\x4C': '\u004C', # LATIN CAPITAL LETTER L + '\x4D': '\u004D', # LATIN CAPITAL LETTER M + '\x4E': '\u004E', # LATIN CAPITAL LETTER N + '\x4F': '\u004F', # LATIN CAPITAL LETTER O + '\x50': '\u0050', # LATIN CAPITAL LETTER P + '\x51': '\u0051', # LATIN CAPITAL LETTER Q + '\x52': '\u0052', # LATIN CAPITAL LETTER R + '\x53': '\u0053', # LATIN CAPITAL LETTER S + '\x54': '\u0054', # LATIN CAPITAL LETTER T + '\x55': '\u0055', # LATIN CAPITAL LETTER U + '\x56': '\u0056', # LATIN CAPITAL LETTER V + '\x57': '\u0057', # LATIN CAPITAL LETTER W + '\x58': '\u0058', # LATIN CAPITAL LETTER X + '\x59': '\u0059', # LATIN CAPITAL LETTER Y + '\x5A': '\u005A', # LATIN CAPITAL LETTER Z + '\x5B': '\u00C4', # LATIN CAPITAL LETTER A WITH DIAERESIS + '\x5C': '\u00D6', # LATIN CAPITAL LETTER O WITH DIAERESIS + '\x5D': '\u00D1', # LATIN CAPITAL LETTER N WITH TILDE + '\x5E': '\u00DC', # LATIN CAPITAL LETTER U WITH DIAERESIS + '\x5F': '\u00A7', # SECTION SIGN + '\x60': '\u00BF', # INVERTED QUESTION MARK + '\x61': '\u0061', # LATIN SMALL LETTER A + '\x62': '\u0062', # LATIN SMALL LETTER B + '\x63': '\u0063', # LATIN SMALL LETTER C + '\x64': '\u0064', # LATIN SMALL LETTER D + '\x65': '\u0065', # LATIN SMALL LETTER E + '\x66': '\u0066', # LATIN SMALL LETTER F + '\x67': '\u0067', # LATIN SMALL LETTER G + '\x68': '\u0068', # LATIN SMALL LETTER H + '\x69': '\u0069', # LATIN SMALL LETTER I + '\x6A': '\u006A', # LATIN SMALL LETTER J + '\x6B': '\u006B', # LATIN SMALL LETTER K + '\x6C': '\u006C', # LATIN SMALL LETTER L + '\x6D': '\u006D', # LATIN SMALL LETTER M + '\x6E': '\u006E', # LATIN SMALL LETTER N + '\x6F': '\u006F', # LATIN SMALL LETTER O + '\x70': '\u0070', # LATIN SMALL LETTER P + '\x71': '\u0071', # LATIN SMALL LETTER Q + '\x72': '\u0072', # LATIN SMALL LETTER R + '\x73': '\u0073', # LATIN SMALL LETTER S + '\x74': '\u0074', # LATIN SMALL LETTER T + '\x75': '\u0075', # LATIN SMALL LETTER U + '\x76': '\u0076', # LATIN SMALL LETTER V + '\x77': '\u0077', # LATIN SMALL LETTER W + '\x78': '\u0078', # LATIN SMALL LETTER X + '\x79': '\u0079', # LATIN SMALL LETTER Y + '\x7A': '\u007A', # LATIN SMALL LETTER Z + '\x7B': '\u00E4', # LATIN SMALL LETTER A WITH DIAERESIS + '\x7C': '\u00F6', # LATIN SMALL LETTER O WITH DIAERESIS + '\x7D': '\u00F1', # LATIN SMALL LETTER N WITH TILDE + '\x7E': '\u00FC', # LATIN SMALL LETTER U WITH DIAERESIS + '\x7F': '\u00E0', # LATIN SMALL LETTER A WITH GRAVE } # default GSM 03.38 escaped characters -> unicode def_escape_decode_dict = { - '\x0A': u'\u000C', # FORM FEED - '\x14': u'\u005E', # CIRCUMFLEX ACCENT - '\x28': u'\u007B', # LEFT CURLY BRACKET - '\x29': u'\u007D', # RIGHT CURLY BRACKET - '\x2F': u'\u005C', # REVERSE SOLIDUS - '\x3C': u'\u005B', # LEFT SQUARE BRACKET - '\x3D': u'\u007E', # TILDE - '\x3E': u'\u005D', # RIGHT SQUARE BRACKET - '\x40': u'\u007C', # VERTICAL LINE - '\x65': u'\u20AC', # EURO SIGN + '\x0A': '\u000C', # FORM FEED + '\x14': '\u005E', # CIRCUMFLEX ACCENT + '\x28': '\u007B', # LEFT CURLY BRACKET + '\x29': '\u007D', # RIGHT CURLY BRACKET + '\x2F': '\u005C', # REVERSE SOLIDUS + '\x3C': '\u005B', # LEFT SQUARE BRACKET + '\x3D': '\u007E', # TILDE + '\x3E': '\u005D', # RIGHT SQUARE BRACKET + '\x40': '\u007C', # VERTICAL LINE + '\x65': '\u20AC', # EURO SIGN } # Replacement characters, default is question mark. Used when it is not too @@ -173,22 +173,22 @@ # humans read and write SMS. But for USSD and other M2M applications it's # important to ensure the conversion is exact. def_replace_encode_dict = { - u'\u00E7': '\x09', # LATIN SMALL LETTER C WITH CEDILLA + '\u00E7': '\x09', # LATIN SMALL LETTER C WITH CEDILLA - u'\u0391': '\x41', # GREEK CAPITAL LETTER ALPHA - u'\u0392': '\x42', # GREEK CAPITAL LETTER BETA - u'\u0395': '\x45', # GREEK CAPITAL LETTER EPSILON - u'\u0397': '\x48', # GREEK CAPITAL LETTER ETA - u'\u0399': '\x49', # GREEK CAPITAL LETTER IOTA - u'\u039A': '\x4B', # GREEK CAPITAL LETTER KAPPA - u'\u039C': '\x4D', # GREEK CAPITAL LETTER MU - u'\u039D': '\x4E', # GREEK CAPITAL LETTER NU - u'\u039F': '\x4F', # GREEK CAPITAL LETTER OMICRON - u'\u03A1': '\x50', # GREEK CAPITAL LETTER RHO - u'\u03A4': '\x54', # GREEK CAPITAL LETTER TAU - u'\u03A7': '\x58', # GREEK CAPITAL LETTER CHI - u'\u03A5': '\x59', # GREEK CAPITAL LETTER UPSILON - u'\u0396': '\x5A', # GREEK CAPITAL LETTER ZETA + '\u0391': '\x41', # GREEK CAPITAL LETTER ALPHA + '\u0392': '\x42', # GREEK CAPITAL LETTER BETA + '\u0395': '\x45', # GREEK CAPITAL LETTER EPSILON + '\u0397': '\x48', # GREEK CAPITAL LETTER ETA + '\u0399': '\x49', # GREEK CAPITAL LETTER IOTA + '\u039A': '\x4B', # GREEK CAPITAL LETTER KAPPA + '\u039C': '\x4D', # GREEK CAPITAL LETTER MU + '\u039D': '\x4E', # GREEK CAPITAL LETTER NU + '\u039F': '\x4F', # GREEK CAPITAL LETTER OMICRON + '\u03A1': '\x50', # GREEK CAPITAL LETTER RHO + '\u03A4': '\x54', # GREEK CAPITAL LETTER TAU + '\u03A7': '\x58', # GREEK CAPITAL LETTER CHI + '\u03A5': '\x59', # GREEK CAPITAL LETTER UPSILON + '\u0396': '\x5A', # GREEK CAPITAL LETTER ZETA } QUESTION_MARK = chr(0x3f) @@ -248,9 +248,9 @@ def decode(input_, errors='strict'): if index < len(input_): c = input_[index] index += 1 - result.append(def_escape_decode_dict.get(c, u'\xa0')) + result.append(def_escape_decode_dict.get(c, '\xa0')) else: - result.append(u'\xa0') + result.append('\xa0') else: try: result.append(def_regular_decode_dict[c]) @@ -265,7 +265,7 @@ def decode(input_, errors='strict'): else: raise UnicodeError("Unknown error handling") - ret = u''.join(result) + ret = ''.join(result) return ret, len(ret) diff --git a/tests/test_gsm_encoding.py b/tests/test_gsm_encoding.py index 94d968a..00e908b 100644 --- a/tests/test_gsm_encoding.py +++ b/tests/test_gsm_encoding.py @@ -23,167 +23,167 @@ # Reversed from: ftp://ftp.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT MAP = { # chr(0x0000): (0x0000, 0x00), # Null - u'@': (0x0040, 0x00), - u'£': (0x00a3, 0x01), - u'$': (0x0024, 0x02), - u'¥': (0x00a5, 0x03), - u'è': (0x00e8, 0x04), - u'é': (0x00e9, 0x05), - u'ù': (0x00f9, 0x06), - u'ì': (0x00ec, 0x07), - u'ò': (0x00f2, 0x08), - u'Ç': (0x00c7, 0x09), # LATIN CAPITAL LETTER C WITH CEDILLA + '@': (0x0040, 0x00), + '£': (0x00a3, 0x01), + '$': (0x0024, 0x02), + '¥': (0x00a5, 0x03), + 'è': (0x00e8, 0x04), + 'é': (0x00e9, 0x05), + 'ù': (0x00f9, 0x06), + 'ì': (0x00ec, 0x07), + 'ò': (0x00f2, 0x08), + 'Ç': (0x00c7, 0x09), # LATIN CAPITAL LETTER C WITH CEDILLA chr(0x000a): (0x000a, 0x0a), # Linefeed - u'Ø': (0x00d8, 0x0b), - u'ø': (0x00f8, 0x0c), + 'Ø': (0x00d8, 0x0b), + 'ø': (0x00f8, 0x0c), chr(0x000d): (0x000d, 0x0d), # Carriage return - u'Å': (0x00c5, 0x0e), - u'å': (0x00e5, 0x0f), - u'Δ': (0x0394, 0x10), - u'_': (0x005f, 0x11), - u'Φ': (0x03a6, 0x12), - u'Γ': (0x0393, 0x13), - u'Λ': (0x039b, 0x14), - u'Ω': (0x03a9, 0x15), - u'Π': (0x03a0, 0x16), - u'Ψ': (0x03a8, 0x17), - u'Σ': (0x03a3, 0x18), - u'Θ': (0x0398, 0x19), - u'Ξ': (0x039e, 0x1a), + 'Å': (0x00c5, 0x0e), + 'å': (0x00e5, 0x0f), + 'Δ': (0x0394, 0x10), + '_': (0x005f, 0x11), + 'Φ': (0x03a6, 0x12), + 'Γ': (0x0393, 0x13), + 'Λ': (0x039b, 0x14), + 'Ω': (0x03a9, 0x15), + 'Π': (0x03a0, 0x16), + 'Ψ': (0x03a8, 0x17), + 'Σ': (0x03a3, 0x18), + 'Θ': (0x0398, 0x19), + 'Ξ': (0x039e, 0x1a), chr(0x00a0): (0x00a0, 0x1b), # Escape to extension table (displayed # as NBSP, on decode of invalid escape # sequence) - u'Æ': (0x00c6, 0x1c), - u'æ': (0x00e6, 0x1d), - u'ß': (0x00df, 0x1e), - u'É': (0x00c9, 0x1f), - u' ': (0x0020, 0x20), - u'!': (0x0021, 0x21), - u'"': (0x0022, 0x22), - u'#': (0x0023, 0x23), - u'¤': (0x00a4, 0x24), - u'%': (0x0025, 0x25), - u'&': (0x0026, 0x26), - u'\'': (0x0027, 0x27), - u'{': (0x007b, 0x1b28), - u'}': (0x007d, 0x1b29), - u'*': (0x002a, 0x2a), - u'+': (0x002b, 0x2b), - u',': (0x002c, 0x2c), - u'-': (0x002d, 0x2d), - u'.': (0x002e, 0x2e), - u'\\': (0x005c, 0x1b2f), - u'0': (0x0030, 0x30), - u'1': (0x0031, 0x31), - u'2': (0x0032, 0x32), - u'3': (0x0033, 0x33), - u'4': (0x0034, 0x34), - u'5': (0x0035, 0x35), - u'6': (0x0036, 0x36), - u'7': (0x0037, 0x37), - u'8': (0x0038, 0x38), - u'9': (0x0039, 0x39), - u':': (0x003a, 0x3a), - u';': (0x003b, 0x3b), - u'[': (0x005b, 0x1b3c), + 'Æ': (0x00c6, 0x1c), + 'æ': (0x00e6, 0x1d), + 'ß': (0x00df, 0x1e), + 'É': (0x00c9, 0x1f), + ' ': (0x0020, 0x20), + '!': (0x0021, 0x21), + '"': (0x0022, 0x22), + '#': (0x0023, 0x23), + '¤': (0x00a4, 0x24), + '%': (0x0025, 0x25), + '&': (0x0026, 0x26), + '\'': (0x0027, 0x27), + '{': (0x007b, 0x1b28), + '}': (0x007d, 0x1b29), + '*': (0x002a, 0x2a), + '+': (0x002b, 0x2b), + ',': (0x002c, 0x2c), + '-': (0x002d, 0x2d), + '.': (0x002e, 0x2e), + '\\': (0x005c, 0x1b2f), + '0': (0x0030, 0x30), + '1': (0x0031, 0x31), + '2': (0x0032, 0x32), + '3': (0x0033, 0x33), + '4': (0x0034, 0x34), + '5': (0x0035, 0x35), + '6': (0x0036, 0x36), + '7': (0x0037, 0x37), + '8': (0x0038, 0x38), + '9': (0x0039, 0x39), + ':': (0x003a, 0x3a), + ';': (0x003b, 0x3b), + '[': (0x005b, 0x1b3c), chr(0x000c): (0x000c, 0x1b0a), # Formfeed - u']': (0x005d, 0x1b3e), - u'?': (0x003f, 0x3f), - u'|': (0x007c, 0x1b40), - u'A': (0x0041, 0x41), - u'B': (0x0042, 0x42), - u'C': (0x0043, 0x43), - u'D': (0x0044, 0x44), - u'E': (0x0045, 0x45), - u'F': (0x0046, 0x46), - u'G': (0x0047, 0x47), - u'H': (0x0048, 0x48), - u'I': (0x0049, 0x49), - u'J': (0x004a, 0x4a), - u'K': (0x004b, 0x4b), - u'L': (0x004c, 0x4c), - u'M': (0x004d, 0x4d), - u'N': (0x004e, 0x4e), - u'O': (0x004f, 0x4f), - u'P': (0x0050, 0x50), - u'Q': (0x0051, 0x51), - u'R': (0x0052, 0x52), - u'S': (0x0053, 0x53), - u'T': (0x0054, 0x54), - u'U': (0x0055, 0x55), - u'V': (0x0056, 0x56), - u'W': (0x0057, 0x57), - u'X': (0x0058, 0x58), - u'Y': (0x0059, 0x59), - u'Z': (0x005a, 0x5a), - u'Ä': (0x00c4, 0x5b), - u'Ö': (0x00d6, 0x5c), - u'Ñ': (0x00d1, 0x5d), - u'Ü': (0x00dc, 0x5e), - u'§': (0x00a7, 0x5f), - u'¿': (0x00bf, 0x60), - u'a': (0x0061, 0x61), - u'b': (0x0062, 0x62), - u'c': (0x0063, 0x63), - u'd': (0x0064, 0x64), - u'€': (0x20ac, 0x1b65), - u'f': (0x0066, 0x66), - u'g': (0x0067, 0x67), - u'h': (0x0068, 0x68), - u'<': (0x003c, 0x3c), - u'j': (0x006a, 0x6a), - u'k': (0x006b, 0x6b), - u'l': (0x006c, 0x6c), - u'm': (0x006d, 0x6d), - u'n': (0x006e, 0x6e), - u'~': (0x007e, 0x1b3d), - u'p': (0x0070, 0x70), - u'q': (0x0071, 0x71), - u'r': (0x0072, 0x72), - u's': (0x0073, 0x73), - u't': (0x0074, 0x74), - u'>': (0x003e, 0x3e), - u'v': (0x0076, 0x76), - u'i': (0x0069, 0x69), - u'x': (0x0078, 0x78), - u'^': (0x005e, 0x1b14), - u'z': (0x007a, 0x7a), - u'ä': (0x00e4, 0x7b), - u'ö': (0x00f6, 0x7c), - u'ñ': (0x00f1, 0x7d), - u'ü': (0x00fc, 0x7e), - u'à': (0x00e0, 0x7f), - u'¡': (0x00a1, 0x40), - u'/': (0x002f, 0x2f), - u'o': (0x006f, 0x6f), - u'u': (0x0075, 0x75), - u'w': (0x0077, 0x77), - u'y': (0x0079, 0x79), - u'e': (0x0065, 0x65), - u'=': (0x003d, 0x3d), - u'(': (0x0028, 0x28), - u')': (0x0029, 0x29), + ']': (0x005d, 0x1b3e), + '?': (0x003f, 0x3f), + '|': (0x007c, 0x1b40), + 'A': (0x0041, 0x41), + 'B': (0x0042, 0x42), + 'C': (0x0043, 0x43), + 'D': (0x0044, 0x44), + 'E': (0x0045, 0x45), + 'F': (0x0046, 0x46), + 'G': (0x0047, 0x47), + 'H': (0x0048, 0x48), + 'I': (0x0049, 0x49), + 'J': (0x004a, 0x4a), + 'K': (0x004b, 0x4b), + 'L': (0x004c, 0x4c), + 'M': (0x004d, 0x4d), + 'N': (0x004e, 0x4e), + 'O': (0x004f, 0x4f), + 'P': (0x0050, 0x50), + 'Q': (0x0051, 0x51), + 'R': (0x0052, 0x52), + 'S': (0x0053, 0x53), + 'T': (0x0054, 0x54), + 'U': (0x0055, 0x55), + 'V': (0x0056, 0x56), + 'W': (0x0057, 0x57), + 'X': (0x0058, 0x58), + 'Y': (0x0059, 0x59), + 'Z': (0x005a, 0x5a), + 'Ä': (0x00c4, 0x5b), + 'Ö': (0x00d6, 0x5c), + 'Ñ': (0x00d1, 0x5d), + 'Ü': (0x00dc, 0x5e), + '§': (0x00a7, 0x5f), + '¿': (0x00bf, 0x60), + 'a': (0x0061, 0x61), + 'b': (0x0062, 0x62), + 'c': (0x0063, 0x63), + 'd': (0x0064, 0x64), + '€': (0x20ac, 0x1b65), + 'f': (0x0066, 0x66), + 'g': (0x0067, 0x67), + 'h': (0x0068, 0x68), + '<': (0x003c, 0x3c), + 'j': (0x006a, 0x6a), + 'k': (0x006b, 0x6b), + 'l': (0x006c, 0x6c), + 'm': (0x006d, 0x6d), + 'n': (0x006e, 0x6e), + '~': (0x007e, 0x1b3d), + 'p': (0x0070, 0x70), + 'q': (0x0071, 0x71), + 'r': (0x0072, 0x72), + 's': (0x0073, 0x73), + 't': (0x0074, 0x74), + '>': (0x003e, 0x3e), + 'v': (0x0076, 0x76), + 'i': (0x0069, 0x69), + 'x': (0x0078, 0x78), + '^': (0x005e, 0x1b14), + 'z': (0x007a, 0x7a), + 'ä': (0x00e4, 0x7b), + 'ö': (0x00f6, 0x7c), + 'ñ': (0x00f1, 0x7d), + 'ü': (0x00fc, 0x7e), + 'à': (0x00e0, 0x7f), + '¡': (0x00a1, 0x40), + '/': (0x002f, 0x2f), + 'o': (0x006f, 0x6f), + '': (0x0075, 0x75), + 'w': (0x0077, 0x77), + 'y': (0x0079, 0x79), + 'e': (0x0065, 0x65), + '=': (0x003d, 0x3d), + '(': (0x0028, 0x28), + ')': (0x0029, 0x29), } GREEK_MAP = { # Note: these might look like Latin uppercase, but they aren't - u'Α': (0x0391, 0x41), - u'Β': (0x0392, 0x42), - u'Ε': (0x0395, 0x45), - u'Η': (0x0397, 0x48), - u'Ι': (0x0399, 0x49), - u'Κ': (0x039a, 0x4b), - u'Μ': (0x039c, 0x4d), - u'Ν': (0x039d, 0x4e), - u'Ο': (0x039f, 0x4f), - u'Ρ': (0x03a1, 0x50), - u'Τ': (0x03a4, 0x54), - u'Χ': (0x03a7, 0x58), - u'Υ': (0x03a5, 0x59), - u'Ζ': (0x0396, 0x5a), + 'Α': (0x0391, 0x41), + 'Β': (0x0392, 0x42), + 'Ε': (0x0395, 0x45), + 'Η': (0x0397, 0x48), + 'Ι': (0x0399, 0x49), + 'Κ': (0x039a, 0x4b), + 'Μ': (0x039c, 0x4d), + 'Ν': (0x039d, 0x4e), + 'Ο': (0x039f, 0x4f), + 'Ρ': (0x03a1, 0x50), + 'Τ': (0x03a4, 0x54), + 'Χ': (0x03a7, 0x58), + 'Υ': (0x03a5, 0x59), + 'Ζ': (0x0396, 0x5a), } QUIRK_MAP = { - u'ç': (0x00e7, 0x09), + 'ç': (0x00e7, 0x09), } BAD = -1 diff --git a/tests/test_mms.py b/tests/test_mms.py index fa53b7d..5701c94 100644 --- a/tests/test_mms.py +++ b/tests/test_mms.py @@ -2,6 +2,7 @@ from array import array import datetime import os +import binascii from unittest import TestCase from messaging.mms.message import MMSMessage @@ -94,7 +95,7 @@ def test_decoding_TOMSLOT_mms(self): mms = MMSMessage.from_file(path) self.assertTrue(isinstance(mms, MMSMessage)) headers = { - 'From': '616c6c616e40746f6d736c6f742e636f6d'.decode('hex'), + 'From': binascii.unhexlify(b'616c6c616e40746f6d736c6f742e636f6d'), 'Transaction-Id': '1234', 'MMS-Version': '1.0', 'Message-Type': 'm-retrieve-conf', 'Date': datetime.datetime(2003, 2, 16, 3, 48, 33), @@ -125,7 +126,7 @@ def test_decoding_images_are_cut_off_debug_mms(self): headers = { 'From': '', 'Read-Reply': False, 'Transaction-Id': '2112410527', 'MMS-Version': '1.0', - 'To': '7464707440616a616a672e63646d'.decode('hex'), + 'To': binascii.unhexlify(b'7464707440616a616a672e63646d'), 'Delivery-Report': False, 'Message-Type': 'm-send-req', 'Content-Type': ('application/vnd.wap.multipart.related', {'Start': '', 'Type': 'application/smil'}), @@ -147,7 +148,7 @@ def test_decoding_openwave_mms(self): mms = MMSMessage.from_file(path) self.assertTrue(isinstance(mms, MMSMessage)) headers = { - 'From': '2b31363530353535303030302f545950453d504c4d4e'.decode('hex'), + 'From': binascii.unhexlify(b'2b31363530353535303030302f545950453d504c4d4e'), 'Message-Class': 'Personal', 'Transaction-Id': '1067263672', 'MMS-Version': '1.0', 'Priority': 'Normal', 'To': '112/TYPE=PLMN', @@ -200,7 +201,7 @@ def test_decoding_gallery2test_mms(self): mms = MMSMessage.from_file(path) self.assertTrue(isinstance(mms, MMSMessage)) headers = { - 'From': '2b31363530353535303030302f545950453d504c4d4e'.decode('hex'), + 'From': binascii.unhexlify(b'2b31363530353535303030302f545950453d504c4d4e'), 'Message-Class': 'Personal', 'Transaction-Id': '1118775337', 'MMS-Version': '1.0', 'Priority': 'Normal', 'To': 'Jg', 'Delivery-Report': False, @@ -256,7 +257,7 @@ def test_decoding_m_mms(self): mms = MMSMessage.from_file(path) self.assertTrue(isinstance(mms, MMSMessage)) headers = { - 'From': '676f6c64706f737440686f746d61696c2e636f6d'.decode('hex'), + 'From': binascii.unhexlify(b'676f6c64706f737440686f746d61696c2e636f6d'), 'Transaction-Id': '0000000001', 'MMS-Version': '1.0', 'Message-Type': 'm-retrieve-conf', 'Date': datetime.datetime(2002, 8, 9, 13, 8, 2), @@ -349,7 +350,7 @@ def test_decoding_SEC_SGHS300M(self): 'Sender-Visibility': 'Show', 'From': '', 'Read-Reply': False, 'Message-Class': 'Personal', 'Transaction-Id': '31887', 'MMS-Version': '1.0', - 'To': '303733383334353636342f545950453d504c4d4e'.decode('hex'), + 'To': binascii.unhexlify(b'303733383334353636342f545950453d504c4d4e'), 'Delivery-Report': False, 'Message-Type': 'm-send-req', 'Subject': 'IL', 'Content-Type': ('application/vnd.wap.multipart.mixed', {}), diff --git a/tests/test_sms.py b/tests/test_sms.py index 5f06c78..0b4151d 100644 --- a/tests/test_sms.py +++ b/tests/test_sms.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- from datetime import datetime, timedelta - +import binascii from unittest import TestCase from messaging.sms import SmsSubmit, SmsDeliver @@ -53,7 +53,7 @@ class TestSmsSubmit(TestCase): def test_encoding_validity(self): # no validity - number = '2b3334363136353835313139'.decode('hex') + number = binascii.unhexlify(b'2b3334363136353835313139') text = "hola" expected = "0001000B914316565811F9000004E8373B0C" @@ -64,7 +64,7 @@ def test_encoding_validity(self): self.assertEqual(pdu.pdu, expected) # absolute validity - number = '2b3334363136353835313139'.decode('hex') + number = binascii.unhexlify(b'2b3334363136353835313139') text = "hola" expected = "0019000B914316565811F900000170520251930004E8373B0C" @@ -76,7 +76,7 @@ def test_encoding_validity(self): self.assertEqual(pdu.pdu, expected) # relative validity - number = '2b3334363136353835313139'.decode('hex') + number = binascii.unhexlify(b'2b3334363136353835313139') text = "hola" expected = "0011000B914316565811F90000AA04E8373B0C" expected_len = 18 @@ -90,7 +90,7 @@ def test_encoding_validity(self): self.assertEqual(pdu.length, expected_len) def test_encoding_csca(self): - number = '2b3334363136353835313139'.decode('hex') + number = binascii.unhexlify(b'2b3334363136353835313139') text = "hola" csca = "+34646456456" expected = "07914346466554F601000B914316565811F9000004E8373B0C" @@ -107,7 +107,7 @@ def test_encoding_csca(self): self.assertEqual(pdu.seq, 1) def test_encoding_class(self): - number = '2b3334363534313233343536'.decode('hex') + number = binascii.unhexlify(b'2b3334363534313233343536') text = "hey yo" expected_0 = "0001000B914356143254F6001006E8721E947F03" expected_1 = "0001000B914356143254F6001106E8721E947F03" @@ -135,7 +135,7 @@ def test_encoding_class(self): def test_encoding_request_status(self): # tested with pduspy.exe and http://www.rednaxela.net/pdu.php - number = '2b3334363534313233343536'.decode('hex') + number = binascii.unhexlify(b'2b3334363534313233343536') text = "hey yo" expected = "0021000B914356143254F6000006E8721E947F03" @@ -148,7 +148,7 @@ def test_encoding_request_status(self): def test_encoding_message_with_latin1_chars(self): # tested with pduspy.exe - number = '2b3334363534313233343536'.decode('hex') + number = binascii.unhexlify(b'2b3334363534313233343536') text = u"Hölä" expected = "0011000B914356143254F60000AA04483E7B0F" @@ -160,7 +160,7 @@ def test_encoding_message_with_latin1_chars(self): self.assertEqual(pdu.pdu, expected) # tested with pduspy.exe - number = '2b3334363534313233343536'.decode('hex') + number = binascii.unhexlify(b'2b3334363534313233343536') text = u"BÄRÇA äñ@" expected = "0001000B914356143254F6000009C2AD341104EDFB00" @@ -185,8 +185,8 @@ def test_encoding_8bit_message(self): self.assertEqual(pdu.pdu, expected) def test_encoding_ucs2_message(self): - number = '2b3334363136353835313139'.decode('hex') - text = u'あ叶葉' + number = binascii.unhexlify(b'2b3334363136353835313139') + text = 'あ叶葉' csca = '+34646456456' expected = "07914346466554F601000B914316565811F9000806304253F68449" @@ -198,7 +198,7 @@ def test_encoding_ucs2_message(self): self.assertEqual(pdu.pdu, expected) text = u"Русский" - number = '363535333435363738'.decode('hex') + number = binascii.unhexlify(b'363535333435363738') expected = "001100098156355476F80008AA0E0420044304410441043A04380439" sms = SmsSubmit(number, text) @@ -211,7 +211,7 @@ def test_encoding_ucs2_message(self): def test_encoding_multipart_7bit(self): # text encoded with umts-tools text = "Or walk with Kings - nor lose the common touch, if neither foes nor loving friends can hurt you, If all men count with you, but none too much; If you can fill the unforgiving minute With sixty seconds' worth of distance run, Yours is the Earth and everything thats in it, And - which is more - you will be a Man, my son" - number = '363535333435363738'.decode('hex') + number = binascii.unhexlify(b'363535333435363738') expected = [ "005100098156355476F80000AAA00500038803019E72D03DCC5E83EE693A1AB44CBBCF73500BE47ECB41ECF7BC0CA2A3CBA0F1BBDD7EBB41F4777D8C6681D26690BB9CA6A3CB7290F95D9E83DC6F3988FDB6A7DD6790599E2EBBC973D038EC06A1EB723A28FFAEB340493328CC6683DA653768FCAEBBE9A07B9A8E06E5DF7516485CA783DC6F7719447FBF41EDFA18BD0325CDA0FCBB0E1A87DD", "005100098156355476F80000AAA005000388030240E6349B0DA2A3CBA0BADBFC969FD3F6B4FB0C6AA7DD757A19744DD3D1A0791A4FCF83E6E5F1DB4D9E9F40F7B79C8E06BDCD20727A4E0FBBC76590BCEE6681B2EFBA7C0E4ACF41747419540CCBE96850D84D0695ED65799E8E4EBBCF203A3A4C9F83D26E509ACE0205DD64500B7447A7C768507A0E6ABFE565500B947FD741F7349B0D129741", @@ -242,8 +242,8 @@ class TestSubmitPduCounts(TestCase): DEST = "+3530000000" GSM_CHAR = "x" - EGSM_CHAR = u"€" - UNICODE_CHAR = u"ő" + EGSM_CHAR = "€" + UNICODE_CHAR = "ő" def test_gsm_1(self): sms = SmsSubmit(self.DEST, self.GSM_CHAR * 160) @@ -320,7 +320,7 @@ def test_decoding_7bit_pdu(self): pdu = "07911326040000F0040B911346610089F60000208062917314080CC8F71D14969741F977FD07" text = "How are you?" csca = "+31624000000" - number = '2b3331363431363030393836'.decode('hex') + number = binascii.unhexlify(b'2b3331363431363030393836') sms = SmsDeliver(pdu) self.assertEqual(sms.text, text) @@ -331,7 +331,7 @@ def test_decoding_ucs2_pdu(self): pdu = "07914306073011F0040B914316709807F2000880604290224080084E2D5174901A8BAF" text = u"中兴通讯" csca = "+34607003110" - number = '2b3334363130373839373032'.decode('hex') + number = binascii.unhexlify(b'2b3334363130373839373032') sms = SmsDeliver(pdu) self.assertEqual(sms.text, text) @@ -342,7 +342,7 @@ def test_decoding_7bit_pdu_data(self): pdu = "07911326040000F0040B911346610089F60000208062917314080CC8F71D14969741F977FD07" text = "How are you?" csca = "+31624000000" - number = '2b3331363431363030393836'.decode('hex') + number = binascii.unhexlify(b'2b3331363431363030393836') data = SmsDeliver(pdu).data self.assertEqual(data['text'], text) @@ -355,7 +355,7 @@ def test_decoding_7bit_pdu_data(self): def test_decoding_datetime_gmtplusone(self): pdu = "0791447758100650040C914497716247010000909010711423400A2050EC468B81C4733A" text = " 1741 bst" - number = '2b343437393137323637343130'.decode('hex') + number = binascii.unhexlify(b'2b343437393137323637343130') date = datetime(2009, 9, 1, 16, 41, 32) sms = SmsDeliver(pdu) diff --git a/tests/test_wap.py b/tests/test_wap.py index 26de322..95d7dee 100644 --- a/tests/test_wap.py +++ b/tests/test_wap.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from array import array from unittest import TestCase +import binascii from messaging.sms import SmsDeliver from messaging.sms.wap import (is_a_wap_push_notification as is_push, @@ -39,7 +40,7 @@ def test_decoding_m_notification_ind(self): "0791447758100650400E80885810000000810004016082415464408C0C08049F8E020105040B8423F00106226170706C69636174696F6E2F766E642E7761702E6D6D732D6D65737361676500AF848C82984E4F4B3543694B636F544D595347344D4253774141734B7631344655484141414141414141008D908919802B3434373738353334323734392F545950453D504C4D4E008A808E0274008805810301194083687474703A2F", "0791447758100650440E8088581000000081000401608241547440440C08049F8E020205040B8423F02F70726F6D6D732F736572766C6574732F4E4F4B3543694B636F544D595347344D4253774141734B763134465548414141414141414100", ] - number = '3838383530313030303030303138'.decode('hex') + number = binascii.unhexlify(b'3838383530313030303030303138') csca = "+447785016005" data = "" @@ -66,7 +67,7 @@ def test_decoding_m_notification_ind(self): 'NOK5CiKcoTMYSG4MBSwAAsKv14FUHAAAAAAAA') self.assertEqual(mms.headers['MMS-Version'], '1.0') self.assertEqual(mms.headers['From'], - '2b3434373738353334323734392f545950453d504c4d4e'.decode('hex')) + binascii.unhexlify(b'2b3434373738353334323734392f545950453d504c4d4e')) self.assertEqual(mms.headers['Message-Class'], 'Personal') self.assertEqual(mms.headers['Message-Size'], 29696) self.assertEqual(mms.headers['Expiry'], 72000) @@ -103,7 +104,7 @@ def test_decoding_m_notification_ind(self): 'NOK5A1ZdFTMYSG4O3VQAAsJv94GoNAAAAAAAA') self.assertEqual(mms.headers['MMS-Version'], '1.0') self.assertEqual(mms.headers['From'], - '2b3434373731373237353034392f545950453d504c4d4e'.decode('hex')) + binascii.unhexlify(b'2b3434373731373237353034392f545950453d504c4d4e')) self.assertEqual(mms.headers['Message-Class'], 'Personal') self.assertEqual(mms.headers['Message-Size'], 29696) self.assertEqual(mms.headers['Expiry'], 259199) @@ -115,7 +116,7 @@ def test_decoding_generic_wap_push(self): "0791947122725014440C8500947122921105F5112042519582408C0B05040B8423F0000396020101060B03AE81EAC3958D01A2B48403056A0A20566F6461666F6E650045C60C037761702E6D65696E63616C6C79612E64652F000801035A756D206B6F7374656E6C6F73656E20506F7274616C20224D65696E0083000322202D2065696E66616368206175662064656E20666F6C67656E64656E204C696E6B206B6C69636B656E", "0791947122725014440C8500947122921105F5112042519592403C0B05040B8423F00003960202206F6465722064696520536569746520646972656B7420617566727566656E2E2049687200830003205465616D000101", ] - number = '303034393137323232393131'.decode('hex') + number = binascii.unhexlify(b'303034393137323232393131') csca = "+491722270541" data = "" From 45cac685f9bc6b852e4be4ca6594bae2674814fa Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Wed, 8 Apr 2020 20:24:20 -0500 Subject: [PATCH 06/28] more encode decode --- messaging/sms/wap.py | 2 +- tests/test_gsm_encoding.py | 14 +++++++------- tests/test_mms.py | 12 ++++++------ tests/test_sms.py | 30 +++++++++++++++--------------- 4 files changed, 29 insertions(+), 29 deletions(-) diff --git a/messaging/sms/wap.py b/messaging/sms/wap.py index 46611ab..7f5ff90 100644 --- a/messaging/sms/wap.py +++ b/messaging/sms/wap.py @@ -6,7 +6,7 @@ def is_a_wap_push_notification(s): - if not isinstance(s, str): + if not isinstance(s, bytes): raise TypeError("data must be an array.array serialised to string") data = array("B", s) diff --git a/tests/test_gsm_encoding.py b/tests/test_gsm_encoding.py index 00e908b..9bbd0cb 100644 --- a/tests/test_gsm_encoding.py +++ b/tests/test_gsm_encoding.py @@ -18,6 +18,7 @@ """Unittests for the gsm encoding/decoding module""" from unittest import TestCase +import codecs import messaging.sms.gsm0338 # imports GSM7 codec # Reversed from: ftp://ftp.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT @@ -195,7 +196,7 @@ def test_encoding_supported_unicode_gsm(self): for key in list(MAP.keys()): # Use 'ignore' so that we see the code tested, not an exception - s_gsm = key.encode('gsm0338', 'ignore') + s_gsm = codecs.encode(key, 'gsm0338', 'ignore') if len(s_gsm) == 1: i_gsm = ord(s_gsm) @@ -215,7 +216,7 @@ def test_encoding_supported_greek_unicode_gsm(self): for key in list(GREEK_MAP.keys()): # Use 'replace' so that we trigger the mapping - s_gsm = key.encode('gsm0338', 'replace') + s_gsm = codecs.encode(key, 'gsm0338', 'replace') if len(s_gsm) == 1: i_gsm = ord(s_gsm) @@ -229,7 +230,7 @@ def test_encoding_supported_quirk_unicode_gsm(self): for key in list(QUIRK_MAP.keys()): # Use 'replace' so that we trigger the mapping - s_gsm = key.encode('gsm0338', 'replace') + s_gsm = codecs.encode(key, 'gsm0338', 'replace') if len(s_gsm) == 1: i_gsm = ord(s_gsm) @@ -247,18 +248,17 @@ def test_decoding_supported_unicode_gsm(self): s_gsm = chr((i_gsm & 0xff00) >> 8) s_gsm += chr(i_gsm & 0x00ff) - s_unicode = s_gsm.decode('gsm0338', 'strict') + s_unicode = codecs.encode(s_gsm, 'gsm0338', 'strict') self.assertEqual(MAP[key][0], ord(s_unicode)) def test_is_gsm_text_true(self): for key in list(MAP.keys()): if key == chr(0x00a0): continue - self.assertEqual(messaging.sms.gsm0338.is_gsm_text(key), True) + self.assertTrue(messaging.sms.gsm0338.is_gsm_text(key)) def test_is_gsm_text_false(self): - self.assertEqual( - messaging.sms.gsm0338.is_gsm_text(chr(0x00a0)), False) + self.assertFalse(messaging.sms.gsm0338.is_gsm_text(chr(0x00a0))) for i in range(1, 0xffff + 1): if chr(i) not in MAP: diff --git a/tests/test_mms.py b/tests/test_mms.py index 5701c94..c45dbb7 100644 --- a/tests/test_mms.py +++ b/tests/test_mms.py @@ -95,7 +95,7 @@ def test_decoding_TOMSLOT_mms(self): mms = MMSMessage.from_file(path) self.assertTrue(isinstance(mms, MMSMessage)) headers = { - 'From': binascii.unhexlify(b'616c6c616e40746f6d736c6f742e636f6d'), + 'From': binascii.unhexlify(b'616c6c616e40746f6d736c6f742e636f6d').decode(), 'Transaction-Id': '1234', 'MMS-Version': '1.0', 'Message-Type': 'm-retrieve-conf', 'Date': datetime.datetime(2003, 2, 16, 3, 48, 33), @@ -126,7 +126,7 @@ def test_decoding_images_are_cut_off_debug_mms(self): headers = { 'From': '', 'Read-Reply': False, 'Transaction-Id': '2112410527', 'MMS-Version': '1.0', - 'To': binascii.unhexlify(b'7464707440616a616a672e63646d'), + 'To': binascii.unhexlify(b'7464707440616a616a672e63646d').decode(), 'Delivery-Report': False, 'Message-Type': 'm-send-req', 'Content-Type': ('application/vnd.wap.multipart.related', {'Start': '', 'Type': 'application/smil'}), @@ -148,7 +148,7 @@ def test_decoding_openwave_mms(self): mms = MMSMessage.from_file(path) self.assertTrue(isinstance(mms, MMSMessage)) headers = { - 'From': binascii.unhexlify(b'2b31363530353535303030302f545950453d504c4d4e'), + 'From': binascii.unhexlify(b'2b31363530353535303030302f545950453d504c4d4e').decode(), 'Message-Class': 'Personal', 'Transaction-Id': '1067263672', 'MMS-Version': '1.0', 'Priority': 'Normal', 'To': '112/TYPE=PLMN', @@ -201,7 +201,7 @@ def test_decoding_gallery2test_mms(self): mms = MMSMessage.from_file(path) self.assertTrue(isinstance(mms, MMSMessage)) headers = { - 'From': binascii.unhexlify(b'2b31363530353535303030302f545950453d504c4d4e'), + 'From': binascii.unhexlify(b'2b31363530353535303030302f545950453d504c4d4e').decode(), 'Message-Class': 'Personal', 'Transaction-Id': '1118775337', 'MMS-Version': '1.0', 'Priority': 'Normal', 'To': 'Jg', 'Delivery-Report': False, @@ -257,7 +257,7 @@ def test_decoding_m_mms(self): mms = MMSMessage.from_file(path) self.assertTrue(isinstance(mms, MMSMessage)) headers = { - 'From': binascii.unhexlify(b'676f6c64706f737440686f746d61696c2e636f6d'), + 'From': binascii.unhexlify(b'676f6c64706f737440686f746d61696c2e636f6d').decode(), 'Transaction-Id': '0000000001', 'MMS-Version': '1.0', 'Message-Type': 'm-retrieve-conf', 'Date': datetime.datetime(2002, 8, 9, 13, 8, 2), @@ -350,7 +350,7 @@ def test_decoding_SEC_SGHS300M(self): 'Sender-Visibility': 'Show', 'From': '', 'Read-Reply': False, 'Message-Class': 'Personal', 'Transaction-Id': '31887', 'MMS-Version': '1.0', - 'To': binascii.unhexlify(b'303733383334353636342f545950453d504c4d4e'), + 'To': binascii.unhexlify(b'303733383334353636342f545950453d504c4d4e').decode(), 'Delivery-Report': False, 'Message-Type': 'm-send-req', 'Subject': 'IL', 'Content-Type': ('application/vnd.wap.multipart.mixed', {}), diff --git a/tests/test_sms.py b/tests/test_sms.py index 0b4151d..302fa95 100644 --- a/tests/test_sms.py +++ b/tests/test_sms.py @@ -53,7 +53,7 @@ class TestSmsSubmit(TestCase): def test_encoding_validity(self): # no validity - number = binascii.unhexlify(b'2b3334363136353835313139') + number = binascii.unhexlify(b'2b3334363136353835313139').decode() text = "hola" expected = "0001000B914316565811F9000004E8373B0C" @@ -64,7 +64,7 @@ def test_encoding_validity(self): self.assertEqual(pdu.pdu, expected) # absolute validity - number = binascii.unhexlify(b'2b3334363136353835313139') + number = binascii.unhexlify(b'2b3334363136353835313139').decode() text = "hola" expected = "0019000B914316565811F900000170520251930004E8373B0C" @@ -76,7 +76,7 @@ def test_encoding_validity(self): self.assertEqual(pdu.pdu, expected) # relative validity - number = binascii.unhexlify(b'2b3334363136353835313139') + number = binascii.unhexlify(b'2b3334363136353835313139').decode() text = "hola" expected = "0011000B914316565811F90000AA04E8373B0C" expected_len = 18 @@ -90,7 +90,7 @@ def test_encoding_validity(self): self.assertEqual(pdu.length, expected_len) def test_encoding_csca(self): - number = binascii.unhexlify(b'2b3334363136353835313139') + number = binascii.unhexlify(b'2b3334363136353835313139').decode() text = "hola" csca = "+34646456456" expected = "07914346466554F601000B914316565811F9000004E8373B0C" @@ -107,7 +107,7 @@ def test_encoding_csca(self): self.assertEqual(pdu.seq, 1) def test_encoding_class(self): - number = binascii.unhexlify(b'2b3334363534313233343536') + number = binascii.unhexlify(b'2b3334363534313233343536').decode() text = "hey yo" expected_0 = "0001000B914356143254F6001006E8721E947F03" expected_1 = "0001000B914356143254F6001106E8721E947F03" @@ -135,7 +135,7 @@ def test_encoding_class(self): def test_encoding_request_status(self): # tested with pduspy.exe and http://www.rednaxela.net/pdu.php - number = binascii.unhexlify(b'2b3334363534313233343536') + number = binascii.unhexlify(b'2b3334363534313233343536').decode() text = "hey yo" expected = "0021000B914356143254F6000006E8721E947F03" @@ -148,7 +148,7 @@ def test_encoding_request_status(self): def test_encoding_message_with_latin1_chars(self): # tested with pduspy.exe - number = binascii.unhexlify(b'2b3334363534313233343536') + number = binascii.unhexlify(b'2b3334363534313233343536').decode() text = u"Hölä" expected = "0011000B914356143254F60000AA04483E7B0F" @@ -160,7 +160,7 @@ def test_encoding_message_with_latin1_chars(self): self.assertEqual(pdu.pdu, expected) # tested with pduspy.exe - number = binascii.unhexlify(b'2b3334363534313233343536') + number = binascii.unhexlify(b'2b3334363534313233343536').decode() text = u"BÄRÇA äñ@" expected = "0001000B914356143254F6000009C2AD341104EDFB00" @@ -185,7 +185,7 @@ def test_encoding_8bit_message(self): self.assertEqual(pdu.pdu, expected) def test_encoding_ucs2_message(self): - number = binascii.unhexlify(b'2b3334363136353835313139') + number = binascii.unhexlify(b'2b3334363136353835313139').decode() text = 'あ叶葉' csca = '+34646456456' expected = "07914346466554F601000B914316565811F9000806304253F68449" @@ -198,7 +198,7 @@ def test_encoding_ucs2_message(self): self.assertEqual(pdu.pdu, expected) text = u"Русский" - number = binascii.unhexlify(b'363535333435363738') + number = binascii.unhexlify(b'363535333435363738').decode() expected = "001100098156355476F80008AA0E0420044304410441043A04380439" sms = SmsSubmit(number, text) @@ -211,7 +211,7 @@ def test_encoding_ucs2_message(self): def test_encoding_multipart_7bit(self): # text encoded with umts-tools text = "Or walk with Kings - nor lose the common touch, if neither foes nor loving friends can hurt you, If all men count with you, but none too much; If you can fill the unforgiving minute With sixty seconds' worth of distance run, Yours is the Earth and everything thats in it, And - which is more - you will be a Man, my son" - number = binascii.unhexlify(b'363535333435363738') + number = binascii.unhexlify(b'363535333435363738').decode() expected = [ "005100098156355476F80000AAA00500038803019E72D03DCC5E83EE693A1AB44CBBCF73500BE47ECB41ECF7BC0CA2A3CBA0F1BBDD7EBB41F4777D8C6681D26690BB9CA6A3CB7290F95D9E83DC6F3988FDB6A7DD6790599E2EBBC973D038EC06A1EB723A28FFAEB340493328CC6683DA653768FCAEBBE9A07B9A8E06E5DF7516485CA783DC6F7719447FBF41EDFA18BD0325CDA0FCBB0E1A87DD", "005100098156355476F80000AAA005000388030240E6349B0DA2A3CBA0BADBFC969FD3F6B4FB0C6AA7DD757A19744DD3D1A0791A4FCF83E6E5F1DB4D9E9F40F7B79C8E06BDCD20727A4E0FBBC76590BCEE6681B2EFBA7C0E4ACF41747419540CCBE96850D84D0695ED65799E8E4EBBCF203A3A4C9F83D26E509ACE0205DD64500B7447A7C768507A0E6ABFE565500B947FD741F7349B0D129741", @@ -320,7 +320,7 @@ def test_decoding_7bit_pdu(self): pdu = "07911326040000F0040B911346610089F60000208062917314080CC8F71D14969741F977FD07" text = "How are you?" csca = "+31624000000" - number = binascii.unhexlify(b'2b3331363431363030393836') + number = binascii.unhexlify(b'2b3331363431363030393836').decode() sms = SmsDeliver(pdu) self.assertEqual(sms.text, text) @@ -331,7 +331,7 @@ def test_decoding_ucs2_pdu(self): pdu = "07914306073011F0040B914316709807F2000880604290224080084E2D5174901A8BAF" text = u"中兴通讯" csca = "+34607003110" - number = binascii.unhexlify(b'2b3334363130373839373032') + number = binascii.unhexlify(b'2b3334363130373839373032').decode() sms = SmsDeliver(pdu) self.assertEqual(sms.text, text) @@ -342,7 +342,7 @@ def test_decoding_7bit_pdu_data(self): pdu = "07911326040000F0040B911346610089F60000208062917314080CC8F71D14969741F977FD07" text = "How are you?" csca = "+31624000000" - number = binascii.unhexlify(b'2b3331363431363030393836') + number = binascii.unhexlify(b'2b3331363431363030393836').decode() data = SmsDeliver(pdu).data self.assertEqual(data['text'], text) @@ -355,7 +355,7 @@ def test_decoding_7bit_pdu_data(self): def test_decoding_datetime_gmtplusone(self): pdu = "0791447758100650040C914497716247010000909010711423400A2050EC468B81C4733A" text = " 1741 bst" - number = binascii.unhexlify(b'2b343437393137323637343130') + number = binascii.unhexlify(b'2b343437393137323637343130').decode() date = datetime(2009, 9, 1, 16, 41, 32) sms = SmsDeliver(pdu) From 03a1a7e3f4c3c64ed1e6c4f82b82ea5caa981f65 Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Wed, 8 Apr 2020 20:53:25 -0500 Subject: [PATCH 07/28] try other ways --- messaging/sms/gsm0338.py | 2 +- tests/test_gsm_encoding.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/messaging/sms/gsm0338.py b/messaging/sms/gsm0338.py index 5b90950..c8035cf 100644 --- a/messaging/sms/gsm0338.py +++ b/messaging/sms/gsm0338.py @@ -283,7 +283,7 @@ def getregentry(encoding): def is_gsm_text(text): """Returns True if ``text`` can be encoded as gsm text""" try: - text.encode("gsm0338") + codec.encode(text, "gsm0338") except UnicodeError: return False except: diff --git a/tests/test_gsm_encoding.py b/tests/test_gsm_encoding.py index 9bbd0cb..d781968 100644 --- a/tests/test_gsm_encoding.py +++ b/tests/test_gsm_encoding.py @@ -248,7 +248,7 @@ def test_decoding_supported_unicode_gsm(self): s_gsm = chr((i_gsm & 0xff00) >> 8) s_gsm += chr(i_gsm & 0x00ff) - s_unicode = codecs.encode(s_gsm, 'gsm0338', 'strict') + s_unicode = codecs.decode(s_gsm, 'gsm0338', 'strict') self.assertEqual(MAP[key][0], ord(s_unicode)) def test_is_gsm_text_true(self): From 2f4a85f081ca39c9d2094a00fb885115e3ea633f Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Thu, 9 Apr 2020 11:06:53 -0500 Subject: [PATCH 08/28] try to fix is gsm text --- messaging/sms/gsm0338.py | 4 ++-- tests/test_gsm_encoding.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/messaging/sms/gsm0338.py b/messaging/sms/gsm0338.py index c8035cf..76081c7 100644 --- a/messaging/sms/gsm0338.py +++ b/messaging/sms/gsm0338.py @@ -271,7 +271,7 @@ def decode(input_, errors='strict'): # encodings module API def getregentry(encoding): - if encoding == 'gsm0338': + if encoding in ('gsm0338', 'gsm7'): return codecs.CodecInfo(name='gsm0338', encode=encode, decode=decode) @@ -283,7 +283,7 @@ def getregentry(encoding): def is_gsm_text(text): """Returns True if ``text`` can be encoded as gsm text""" try: - codec.encode(text, "gsm0338") + codec.encode(text, 'gsm0338') except UnicodeError: return False except: diff --git a/tests/test_gsm_encoding.py b/tests/test_gsm_encoding.py index d781968..47b3719 100644 --- a/tests/test_gsm_encoding.py +++ b/tests/test_gsm_encoding.py @@ -19,7 +19,7 @@ from unittest import TestCase import codecs -import messaging.sms.gsm0338 # imports GSM7 codec +from messaging.sms.gsm0338 import is_gsm_text # imports GSM7 codec # Reversed from: ftp://ftp.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT MAP = { @@ -255,13 +255,13 @@ def test_is_gsm_text_true(self): for key in list(MAP.keys()): if key == chr(0x00a0): continue - self.assertTrue(messaging.sms.gsm0338.is_gsm_text(key)) + self.assertTrue(is_gsm_text(key)) def test_is_gsm_text_false(self): - self.assertFalse(messaging.sms.gsm0338.is_gsm_text(chr(0x00a0))) + self.assertFalse(is_gsm_text(chr(0x00a0))) for i in range(1, 0xffff + 1): if chr(i) not in MAP: # Note: it's a little odd, but on error we want to see values - if messaging.sms.gsm0338.is_gsm_text(chr(i)) is not False: + if is_gsm_text(chr(i)) is not False: self.assertEqual(BAD, i) From 4dcdbfec82c5ca190f5c2addd48933070c3eed05 Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Thu, 9 Apr 2020 11:22:27 -0500 Subject: [PATCH 09/28] log and spelling error --- messaging/sms/gsm0338.py | 2 +- tests/test_gsm_encoding.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/messaging/sms/gsm0338.py b/messaging/sms/gsm0338.py index 76081c7..208857c 100644 --- a/messaging/sms/gsm0338.py +++ b/messaging/sms/gsm0338.py @@ -283,7 +283,7 @@ def getregentry(encoding): def is_gsm_text(text): """Returns True if ``text`` can be encoded as gsm text""" try: - codec.encode(text, 'gsm0338') + codecs.encode(text, 'gsm0338') except UnicodeError: return False except: diff --git a/tests/test_gsm_encoding.py b/tests/test_gsm_encoding.py index 47b3719..b53197a 100644 --- a/tests/test_gsm_encoding.py +++ b/tests/test_gsm_encoding.py @@ -206,6 +206,9 @@ def test_encoding_supported_unicode_gsm(self): i_gsm = BAD # so we see the comparison, not an exception # We shouldn't generate an invalid escape sequence + print(i_gsm) + print(MAP) + print(MAP[key]) if key == chr(0x00a0): self.assertEqual(BAD, i_gsm) else: @@ -248,7 +251,7 @@ def test_decoding_supported_unicode_gsm(self): s_gsm = chr((i_gsm & 0xff00) >> 8) s_gsm += chr(i_gsm & 0x00ff) - s_unicode = codecs.decode(s_gsm, 'gsm0338', 'strict') + s_unicode = codecs.decode(s_gsm, 'gsm0338') self.assertEqual(MAP[key][0], ord(s_unicode)) def test_is_gsm_text_true(self): From 0e04c28b8beff4a631d49116b52f9cfed57e6b8c Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Thu, 9 Apr 2020 11:30:17 -0500 Subject: [PATCH 10/28] fix dumb deletion --- tests/test_gsm_encoding.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/test_gsm_encoding.py b/tests/test_gsm_encoding.py index b53197a..a5fda42 100644 --- a/tests/test_gsm_encoding.py +++ b/tests/test_gsm_encoding.py @@ -157,7 +157,7 @@ '¡': (0x00a1, 0x40), '/': (0x002f, 0x2f), 'o': (0x006f, 0x6f), - '': (0x0075, 0x75), + 'u': (0x0075, 0x75), 'w': (0x0077, 0x77), 'y': (0x0079, 0x79), 'e': (0x0065, 0x65), @@ -206,9 +206,6 @@ def test_encoding_supported_unicode_gsm(self): i_gsm = BAD # so we see the comparison, not an exception # We shouldn't generate an invalid escape sequence - print(i_gsm) - print(MAP) - print(MAP[key]) if key == chr(0x00a0): self.assertEqual(BAD, i_gsm) else: From 82c460f2f08ecad2f64968887d61621d741a4460 Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Thu, 9 Apr 2020 11:40:30 -0500 Subject: [PATCH 11/28] byte string and codec encode --- messaging/sms/submit.py | 3 ++- tests/test_mms.py | 38 +++++++++++++++++++------------------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/messaging/sms/submit.py b/messaging/sms/submit.py index 71764fe..528de7e 100644 --- a/messaging/sms/submit.py +++ b/messaging/sms/submit.py @@ -3,6 +3,7 @@ from datetime import datetime, timedelta import re +import codecs from messaging.sms import consts from messaging.utils import (debug, encode_str, clean_number, @@ -243,7 +244,7 @@ def _get_msg_pdu(self): message_pdu = "" if self.fmt == 0x00: - self.text_gsm = self.text.encode("gsm0338") + self.text_gsm = codecs.encode(self.text, "gsm0338") if len(self.text_gsm) <= consts.SEVENBIT_SIZE: message_pdu = [pack_8bits_to_7bits(self.text_gsm)] else: diff --git a/tests/test_mms.py b/tests/test_mms.py index c45dbb7..aff6382 100644 --- a/tests/test_mms.py +++ b/tests/test_mms.py @@ -36,7 +36,7 @@ def test_decoding_iPhone_mms(self): 'Message-Type': 'm-send-req', 'Content-Type': ('application/vnd.wap.multipart.related', {'Start': '0.smil', 'Type': 'application/smil'}), } - smil_data = '\n\n\n \n\n\n\n\n\n\n\n\n\n\n' + smil_data = b'\n\n\n \n\n\n\n\n\n\n\n\n\n\n' self.assertEqual(mms.headers, headers) self.assertEqual(mms.content_type, 'application/vnd.wap.multipart.related') @@ -58,7 +58,7 @@ def test_decoding_SIMPLE_mms(self): 'Content-Type': ('application/vnd.wap.multipart.related', {}), 'Subject': 'Simple message', } - text_data = "This is a simple MMS message with a single text body part." + text_data = b"This is a simple MMS message with a single text body part." self.assertEqual(mms.headers, headers) self.assertEqual(mms.content_type, 'application/vnd.wap.multipart.related') @@ -77,8 +77,8 @@ def test_decoding_BTMMS_mms(self): 'Content-Type': ('application/vnd.wap.multipart.related', {'Start': '', 'Type': 'application/smil'}), 'Subject': 'BT Ignite MMS', } - smil_data = '\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n' - text_data = 'BT Ignite\r\n\r\nMMS Services' + smil_data = b'\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n' + text_data = b'BT Ignite\r\n\r\nMMS Services' self.assertEqual(mms.headers, headers) self.assertEqual(mms.content_type, 'application/vnd.wap.multipart.related') @@ -102,8 +102,8 @@ def test_decoding_TOMSLOT_mms(self): 'Content-Type': ('application/vnd.wap.multipart.related', {'Start': '', 'Type': 'application/smil'}), 'Subject': 'Tom Slot Band', } - smil_data = '\r\n\t\r\n\t\t\r\n\t\t\r\n\t\t\r\n\t\t\t\r\n\t\t\t\r\n\t\t\t\r\n\t\t\r\n\t\r\n\t\r\n\t\t\r\n\t\t\t\r\n\t\t\r\n\t\t\r\n\t\t\t\r\n\t\t\r\n\t\t\r\n\t\t\t\r\n\t\t\r\n\t\t\r\n\t\t\t\r\n\t\t\r\n\t\t\r\n\t\t\t\r\n\t\t\t\r\n\t\t\t\t\r\n\t\t\t\r\n\t\t\t\r\n\t\r\n\r\n' - text_data = 'Presented by NowMMS\r\n' + smil_data = b'\r\n\t\r\n\t\t\r\n\t\t\r\n\t\t\r\n\t\t\t\r\n\t\t\t\r\n\t\t\t\r\n\t\t\r\n\t\r\n\t\r\n\t\t\r\n\t\t\t\r\n\t\t\r\n\t\t\r\n\t\t\t\r\n\t\t\r\n\t\t\r\n\t\t\t\r\n\t\t\r\n\t\t\r\n\t\t\t\r\n\t\t\r\n\t\t\r\n\t\t\t\r\n\t\t\t\r\n\t\t\t\t\r\n\t\t\t\r\n\t\t\t\r\n\t\r\n\r\n' + text_data = b'Presented by NowMMS\r\n' self.assertEqual(mms.headers, headers) self.assertEqual(mms.content_type, 'application/vnd.wap.multipart.related') @@ -132,7 +132,7 @@ def test_decoding_images_are_cut_off_debug_mms(self): 'Content-Type': ('application/vnd.wap.multipart.related', {'Start': '', 'Type': 'application/smil'}), 'Subject': 'Picture3', } - smil_data = '' + smil_data = b'' self.assertEqual(mms.headers, headers) self.assertEqual(len(mms.data_parts), 2) self.assertEqual(mms.content_type, @@ -156,8 +156,8 @@ def test_decoding_openwave_mms(self): 'Content-Type': ('application/vnd.wap.multipart.related', {'Start': '', 'Type': 'application/smil'}), 'Subject': 'rubrik', } - smil_data = '\n \n \n \n \n \n \n \n \n \n \n \n \n\n' - text_data = 'rubrik' + smil_data = b'\n \n \n \n \n \n \n \n \n \n \n \n \n\n' + text_data = b'rubrik' self.assertEqual(mms.headers, headers) self.assertEqual(len(mms.data_parts), 2) self.assertEqual(mms.content_type, @@ -179,8 +179,8 @@ def test_decoding_SonyEricssonT310_R201_mms(self): 'Date': datetime.datetime(2004, 3, 18, 7, 30, 34), 'Content-Type': ('application/vnd.wap.multipart.related', {'Start': '', 'Type': 'application/smil'}), } - text_data = 'Hej hopp' - smil_data = '\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n' + text_data = b'Hej hopp' + smil_data = b'\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n' self.assertEqual(mms.headers, headers) self.assertEqual(len(mms.data_parts), 4) self.assertEqual(mms.content_type, @@ -209,8 +209,8 @@ def test_decoding_gallery2test_mms(self): 'Content-Type': ('application/vnd.wap.multipart.related', {'Start': '', 'Type': 'application/smil'}), 'Subject': 'Jgj', } - text_data = 'Jgj' - smil_data = '\n \n \n \n \n \n \n \n \n \n gnu-head\n \n \n \n\n' + text_data = b'Jgj' + smil_data = b'\n \n \n \n \n \n \n \n \n \n gnu-head\n \n \n \n\n' self.assertEqual(mms.headers, headers) self.assertEqual(len(mms.data_parts), 3) self.assertEqual(mms.content_type, @@ -238,8 +238,8 @@ def test_decoding_projekt_exempel_mms(self): 'Content-Type': ('application/vnd.wap.multipart.related', {'Start': '', 'Type': 'application/smil'}), 'Subject': 'Hej', } - smil_data = '\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n' - text_data = 'Jonatan \xc3\xa4r en GNU' + smil_data = b'\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n' + text_data = b'Jonatan \xc3\xa4r en GNU' self.assertEqual(mms.headers, headers) self.assertEqual(len(mms.data_parts), 3) self.assertEqual(mms.content_type, @@ -271,7 +271,7 @@ def test_decoding_m_mms(self): text_data5 = 'KLONE\r\nKLONE\r\n' text_data6 = 'pr\xe4sentiert..' text_data7 = 'GOLD' - smil_data = '\r\n\r\n\r\n\r\n\r\n\r\n\r\n \r\n \r\n\r\n\r\n\r\n\r\n \r\n \r\n\r\n\r\n\r\n\r\n \r\n \r\n\r\n\r\n\r\n\r\n \r\n \r\n\r\n\r\n\r\n\r\n\r\n \r\n \r\n\r\n\r\n\r\n\r\n \r\n \r\n\r\n\r\n\r\n\r\n\r\n \r\n \r\n\r\n\r\n' + smil_data = b'\r\n\r\n\r\n\r\n\r\n\r\n\r\n \r\n \r\n\r\n\r\n\r\n\r\n \r\n \r\n\r\n\r\n\r\n\r\n \r\n \r\n\r\n\r\n\r\n\r\n \r\n \r\n\r\n\r\n\r\n\r\n\r\n \r\n \r\n\r\n\r\n\r\n\r\n \r\n \r\n\r\n\r\n\r\n\r\n\r\n \r\n \r\n\r\n\r\n' self.assertEqual(mms.headers, headers) self.assertEqual(len(mms.data_parts), 9) self.assertEqual(mms.content_type, 'application/vnd.wap.multipart.related') @@ -324,8 +324,8 @@ def test_decoding_27d0a048cd79555de05283a22372b0eb_mms(self): 'Subject': 'Angående art-tillhörighet', #'Subject': 'Ang\xc3\xa5ende art-tillh\xc3\xb6righet', } - smil_data = '\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n' - text_data = 'Jonatan \xc3\xa4r en gnu.' + smil_data = b'\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n' + text_data = b'Jonatan \xc3\xa4r en gnu.' self.assertEqual(mms.headers, headers) self.assertEqual(len(mms.data_parts), 3) self.assertEqual(mms.content_type, @@ -355,7 +355,7 @@ def test_decoding_SEC_SGHS300M(self): 'Message-Type': 'm-send-req', 'Subject': 'IL', 'Content-Type': ('application/vnd.wap.multipart.mixed', {}), } - text_data = 'HV' + text_data = b'HV' self.assertEqual(mms.headers, headers) self.assertEqual(len(mms.data_parts), 1) self.assertEqual(mms.content_type, From 0026a63bcfb1f5ee8c140bee0dc3a052245d1ffc Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Thu, 9 Apr 2020 11:49:00 -0500 Subject: [PATCH 12/28] byte strings and silence depreciation --- messaging/utils.py | 3 ++- tests/test_mms.py | 14 +++++++------- tests/test_wap.py | 2 +- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/messaging/utils.py b/messaging/utils.py index 9544a4d..ffc35cb 100644 --- a/messaging/utils.py +++ b/messaging/utils.py @@ -2,6 +2,7 @@ from datetime import timedelta, tzinfo from math import floor import sys +import codecs class FixedOffset(tzinfo): @@ -46,7 +47,7 @@ def dst(self, dt): def bytes_to_str(b): if sys.version_info >= (3,): - return b.decode('latin1') + return codecs.decode(b, 'latin1') return b diff --git a/tests/test_mms.py b/tests/test_mms.py index aff6382..c93770b 100644 --- a/tests/test_mms.py +++ b/tests/test_mms.py @@ -264,13 +264,13 @@ def test_decoding_m_mms(self): 'Content-Type': ('application/vnd.wap.multipart.related', {'Start': '', 'Type': 'application/smil'}), 'Subject': 'GOLD', } - text_data1 = 'Audio' - text_data2 = 'Text +' - text_data3 = 'tagtag.com/gold\r\n' - text_data4 = 'globalisierunglobalisierunglobalisierunglobalisierunglobalisierunglobalisierunglobalisierungnureisilabolg' - text_data5 = 'KLONE\r\nKLONE\r\n' - text_data6 = 'pr\xe4sentiert..' - text_data7 = 'GOLD' + text_data1 = b'Audio' + text_data2 = b'Text +' + text_data3 = b'tagtag.com/gold\r\n' + text_data4 = b'globalisierunglobalisierunglobalisierunglobalisierunglobalisierunglobalisierunglobalisierungnureisilabolg' + text_data5 = b'KLONE\r\nKLONE\r\n' + text_data6 = b'pr\xe4sentiert..' + text_data7 = b'GOLD' smil_data = b'\r\n\r\n\r\n\r\n\r\n\r\n\r\n \r\n \r\n\r\n\r\n\r\n\r\n \r\n \r\n\r\n\r\n\r\n\r\n \r\n \r\n\r\n\r\n\r\n\r\n \r\n \r\n\r\n\r\n\r\n\r\n\r\n \r\n \r\n\r\n\r\n\r\n\r\n \r\n \r\n\r\n\r\n\r\n\r\n\r\n \r\n \r\n\r\n\r\n' self.assertEqual(mms.headers, headers) self.assertEqual(len(mms.data_parts), 9) diff --git a/tests/test_wap.py b/tests/test_wap.py index 95d7dee..ef38ce6 100644 --- a/tests/test_wap.py +++ b/tests/test_wap.py @@ -11,7 +11,7 @@ def list_to_str(l): a = array("B", l) - return a.tostring() + return a.tobytes() class TestSmsWapPush(TestCase): From 7c2b44a8d18be9f63db567f02788be3f18461faa Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Thu, 9 Apr 2020 12:00:18 -0500 Subject: [PATCH 13/28] try encoding to latin1 --- messaging/utils.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/messaging/utils.py b/messaging/utils.py index ffc35cb..e3c9f86 100644 --- a/messaging/utils.py +++ b/messaging/utils.py @@ -46,10 +46,7 @@ def dst(self, dt): def bytes_to_str(b): - if sys.version_info >= (3,): - return codecs.decode(b, 'latin1') - - return b + return codecs.encode(b, 'latin1') def to_array(pdu): @@ -57,10 +54,7 @@ def to_array(pdu): def to_bytes(s): - if sys.version_info >= (3,): - return bytes(s) - - return ''.join(map(chr, s)) + return bytes(s) def debug(s): From fad0b72c0b9117440a57124a239d31c207da1de7 Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Thu, 9 Apr 2020 12:05:58 -0500 Subject: [PATCH 14/28] bytes string --- messaging/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/messaging/utils.py b/messaging/utils.py index e3c9f86..1f07a4d 100644 --- a/messaging/utils.py +++ b/messaging/utils.py @@ -96,7 +96,7 @@ def pack_8bits_to_7bits(message, udh=None): if udh is None: tl = len(txt) - txt += '\x00' + txt += b'\x00' msgl = int(len(txt) * 7 / 8) op = [-1] * msgl c = shift = 0 @@ -113,10 +113,10 @@ def pack_8bits_to_7bits(message, udh=None): pdu = chr(tl) + ''.join(map(chr, op)) else: - txt = "\x00\x00\x00\x00\x00\x00" + txt + txt = b"\x00\x00\x00\x00\x00\x00" + txt tl = len(txt) - txt += '\x00' + txt += b'\x00' msgl = int(len(txt) * 7 / 8) op = [-1] * msgl c = shift = 0 From 26b1bb68564191f37909c29a007b060ca2ba3f60 Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Thu, 9 Apr 2020 12:11:59 -0500 Subject: [PATCH 15/28] dont need to use ordinal on byte string --- messaging/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/messaging/utils.py b/messaging/utils.py index 1f07a4d..4bf24aa 100644 --- a/messaging/utils.py +++ b/messaging/utils.py @@ -106,8 +106,8 @@ def pack_8bits_to_7bits(message, udh=None): c += 1 shift = n % 7 - lb = ord(txt[c]) >> shift - hb = (ord(txt[c + 1]) << (7 - shift) & 255) + lb = txt[c] >> shift + hb = (txt[c + 1] << (7 - shift) & 255) op[n] = lb + hb c += 1 @@ -126,8 +126,8 @@ def pack_8bits_to_7bits(message, udh=None): c += 1 shift = n % 7 - lb = ord(txt[c]) >> shift - hb = (ord(txt[c + 1]) << (7 - shift) & 255) + lb = txt[c] >> shift + hb = (txt[c + 1] << (7 - shift) & 255) op[n] = lb + hb c += 1 From 30ee755f76bafbf5180514ea189d2612bb0da790 Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Thu, 9 Apr 2020 12:20:07 -0500 Subject: [PATCH 16/28] convert to list --- messaging/sms/submit.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/messaging/sms/submit.py b/messaging/sms/submit.py index 528de7e..bbcda34 100644 --- a/messaging/sms/submit.py +++ b/messaging/sms/submit.py @@ -326,6 +326,6 @@ def _split_sms_message(self, text): def _get_rand_id(self): if not self.id_list: - self.id_list = list(range(0, 255)) + self.id_list = range(0, 255) - return self.id_list.pop(0) + return list(self.id_list).pop(0) From b679235c3ecbb09299dab7ff6dc35174afe7dcba Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Thu, 9 Apr 2020 12:41:50 -0500 Subject: [PATCH 17/28] log more --- messaging/sms/deliver.py | 6 ++++++ tests/test_sms.py | 12 ++++++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/messaging/sms/deliver.py b/messaging/sms/deliver.py index 37714e6..c0e9255 100644 --- a/messaging/sms/deliver.py +++ b/messaging/sms/deliver.py @@ -173,7 +173,13 @@ def _process_message(self, data): if self.fmt == 0x00: # XXX: Use unpack_msg2 + print(data) data = data[ud_len:].tolist() + print(data) + print(msg) + print(unpack_msg(msg)) + print(unpack_msg(msg)[headlen:msgl]) + print(unpack_msg2(data)) #self.text = unpack_msg2(data).decode("gsm0338") self.text = unpack_msg(msg)[headlen:msgl].decode("gsm0338") diff --git a/tests/test_sms.py b/tests/test_sms.py index 302fa95..d6fe65a 100644 --- a/tests/test_sms.py +++ b/tests/test_sms.py @@ -149,7 +149,7 @@ def test_encoding_request_status(self): def test_encoding_message_with_latin1_chars(self): # tested with pduspy.exe number = binascii.unhexlify(b'2b3334363534313233343536').decode() - text = u"Hölä" + text = "Hölä" expected = "0011000B914356143254F60000AA04483E7B0F" sms = SmsSubmit(number, text) @@ -161,7 +161,7 @@ def test_encoding_message_with_latin1_chars(self): # tested with pduspy.exe number = binascii.unhexlify(b'2b3334363534313233343536').decode() - text = u"BÄRÇA äñ@" + text = "BÄRÇA äñ@" expected = "0001000B914356143254F6000009C2AD341104EDFB00" sms = SmsSubmit(number, text) @@ -197,7 +197,7 @@ def test_encoding_ucs2_message(self): pdu = sms.to_pdu()[0] self.assertEqual(pdu.pdu, expected) - text = u"Русский" + text = "Русский" number = binascii.unhexlify(b'363535333435363738').decode() expected = "001100098156355476F80008AA0E0420044304410441043A04380439" @@ -329,7 +329,7 @@ def test_decoding_7bit_pdu(self): def test_decoding_ucs2_pdu(self): pdu = "07914306073011F0040B914316709807F2000880604290224080084E2D5174901A8BAF" - text = u"中兴通讯" + text = "中兴通讯" csca = "+34607003110" number = binascii.unhexlify(b'2b3334363130373839373032').decode() @@ -413,8 +413,8 @@ def test_decode_weird_multipart_german_pdu(self): "07919471227210244405852122F039F1015062712181804F050003190202E4E8309B5E7683DAFC319A5E76B340F73D9A5D7683A6E93268FD9ED3CB6EF67B0E5AD172B19B2C2693C9602E90355D6683A6F0B007946E8382F5393BEC26BB00", ] texts = [ - u"Lieber Vodafone-Kunde, mit Ihrer nationalen Tarifoption zahlen Sie in diesem Netz 3,45 € pro MB plus 59 Ct pro Session. Wenn Sie diese Info nicht mehr e", - u"rhalten möchten, wählen Sie kostenlos +4917212220. Viel Spaß im Ausland.", + "Lieber Vodafone-Kunde, mit Ihrer nationalen Tarifoption zahlen Sie in diesem Netz 3,45 € pro MB plus 59 Ct pro Session. Wenn Sie diese Info nicht mehr e", + "rhalten möchten, wählen Sie kostenlos +4917212220. Viel Spaß im Ausland.", ] for i, sms in enumerate(map(SmsDeliver, pdus)): From f299c7009be17e3df6a64afd29b40bf0715f0e31 Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Thu, 9 Apr 2020 12:52:26 -0500 Subject: [PATCH 18/28] try other method and say which character failed to encode --- messaging/sms/deliver.py | 5 ++++- messaging/sms/gsm0338.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/messaging/sms/deliver.py b/messaging/sms/deliver.py index c0e9255..c74cb2e 100644 --- a/messaging/sms/deliver.py +++ b/messaging/sms/deliver.py @@ -180,7 +180,10 @@ def _process_message(self, data): print(unpack_msg(msg)) print(unpack_msg(msg)[headlen:msgl]) print(unpack_msg2(data)) - #self.text = unpack_msg2(data).decode("gsm0338") + try: + self.text = unpack_msg2(data).decode("gsm0338") + except UnicodeError: + print('Unable To decode msg2') self.text = unpack_msg(msg)[headlen:msgl].decode("gsm0338") elif self.fmt == 0x04: diff --git a/messaging/sms/gsm0338.py b/messaging/sms/gsm0338.py index 208857c..7fc4053 100644 --- a/messaging/sms/gsm0338.py +++ b/messaging/sms/gsm0338.py @@ -257,7 +257,7 @@ def decode(input_, errors='strict'): except KeyError: # error handling: unassigned byte, must be > 0x7f if errors == 'strict': - raise UnicodeError("Unrecognized GSM character") + raise UnicodeError("Unrecognized GSM character %s" % c) elif errors == 'replace': result.append('?') elif errors == 'ignore': From fabf96ab8f3ca341b01cfa19fd745457e8e3595f Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Thu, 9 Apr 2020 13:02:49 -0500 Subject: [PATCH 19/28] more explicit logging --- messaging/sms/gsm0338.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/messaging/sms/gsm0338.py b/messaging/sms/gsm0338.py index 7fc4053..e2b5da4 100644 --- a/messaging/sms/gsm0338.py +++ b/messaging/sms/gsm0338.py @@ -257,7 +257,7 @@ def decode(input_, errors='strict'): except KeyError: # error handling: unassigned byte, must be > 0x7f if errors == 'strict': - raise UnicodeError("Unrecognized GSM character %s" % c) + raise UnicodeError("Unrecognized GSM character %s at index %i of input %s" % (hex(c), index, input_)) elif errors == 'replace': result.append('?') elif errors == 'ignore': From 37ba153c18e076bb6966af44a22fa633d300b37e Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Thu, 9 Apr 2020 13:11:32 -0500 Subject: [PATCH 20/28] use newer gsm parser --- messaging/sms/__init__.py | 4 +- messaging/sms/gsm0338.py | 353 ++++++++---------------------------- messaging/sms/gsm0338old.py | 293 ++++++++++++++++++++++++++++++ messaging/sms/submit.py | 4 +- tests/test_gsm_encoding.py | 12 +- 5 files changed, 382 insertions(+), 284 deletions(-) create mode 100644 messaging/sms/gsm0338old.py diff --git a/messaging/sms/__init__.py b/messaging/sms/__init__.py index f00ad10..d619a85 100644 --- a/messaging/sms/__init__.py +++ b/messaging/sms/__init__.py @@ -2,6 +2,6 @@ from messaging.sms.submit import SmsSubmit from messaging.sms.deliver import SmsDeliver -from messaging.sms.gsm0338 import is_gsm_text +from messaging.sms.gsm0338 import is_valid_gsm -__all__ = ["SmsSubmit", "SmsDeliver", "is_gsm_text"] +__all__ = ["SmsSubmit", "SmsDeliver", "is_valid_gsm"] diff --git a/messaging/sms/gsm0338.py b/messaging/sms/gsm0338.py index e2b5da4..2af677f 100644 --- a/messaging/sms/gsm0338.py +++ b/messaging/sms/gsm0338.py @@ -1,293 +1,98 @@ -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - +# https://github.com/jezeniel/smsutil/blob/master/smsutil/codecs.py import codecs -import sys -import traceback - -# data from -# http://snoops.roy202.org/testerman/browser/trunk/plugins/codecs/gsm0338.py - -# default GSM 03.38 -> unicode -def_regular_decode_dict = { - '\x00': '\u0040', # COMMERCIAL AT - '\x01': '\u00A3', # POUND SIGN - '\x02': '\u0024', # DOLLAR SIGN - '\x03': '\u00A5', # YEN SIGN - '\x04': '\u00E8', # LATIN SMALL LETTER E WITH GRAVE - '\x05': '\u00E9', # LATIN SMALL LETTER E WITH ACUTE - '\x06': '\u00F9', # LATIN SMALL LETTER U WITH GRAVE - '\x07': '\u00EC', # LATIN SMALL LETTER I WITH GRAVE - '\x08': '\u00F2', # LATIN SMALL LETTER O WITH GRAVE - '\x09': '\u00C7', # LATIN CAPITAL LETTER C WITH CEDILLA - # The Unicode page suggests this is a mistake: but - # it's still in the latest version of the spec and - # our implementation has to be exact. - - '\x0A': '\u000A', # LINE FEED - '\x0B': '\u00D8', # LATIN CAPITAL LETTER O WITH STROKE - '\x0C': '\u00F8', # LATIN SMALL LETTER O WITH STROKE - '\x0D': '\u000D', # CARRIAGE RETURN - '\x0E': '\u00C5', # LATIN CAPITAL LETTER A WITH RING ABOVE - '\x0F': '\u00E5', # LATIN SMALL LETTER A WITH RING ABOVE - '\x10': '\u0394', # GREEK CAPITAL LETTER DELTA - '\x11': '\u005F', # LOW LINE - '\x12': '\u03A6', # GREEK CAPITAL LETTER PHI - '\x13': '\u0393', # GREEK CAPITAL LETTER GAMMA - '\x14': '\u039B', # GREEK CAPITAL LETTER LAMDA - '\x15': '\u03A9', # GREEK CAPITAL LETTER OMEGA - '\x16': '\u03A0', # GREEK CAPITAL LETTER PI - '\x17': '\u03A8', # GREEK CAPITAL LETTER PSI - '\x18': '\u03A3', # GREEK CAPITAL LETTER SIGMA - '\x19': '\u0398', # GREEK CAPITAL LETTER THETA - '\x1A': '\u039E', # GREEK CAPITAL LETTER XI - '\x1C': '\u00C6', # LATIN CAPITAL LETTER AE - '\x1D': '\u00E6', # LATIN SMALL LETTER AE - '\x1E': '\u00DF', # LATIN SMALL LETTER SHARP S (German) - '\x1F': '\u00C9', # LATIN CAPITAL LETTER E WITH ACUTE - '\x20': '\u0020', # SPACE - '\x21': '\u0021', # EXCLAMATION MARK - '\x22': '\u0022', # QUOTATION MARK - '\x23': '\u0023', # NUMBER SIGN - '\x24': '\u00A4', # CURRENCY SIGN - '\x25': '\u0025', # PERCENT SIGN - '\x26': '\u0026', # AMPERSAND - '\x27': '\u0027', # APOSTROPHE - '\x28': '\u0028', # LEFT PARENTHESIS - '\x29': '\u0029', # RIGHT PARENTHESIS - '\x2A': '\u002A', # ASTERISK - '\x2B': '\u002B', # PLUS SIGN - '\x2C': '\u002C', # COMMA - '\x2D': '\u002D', # HYPHEN-MINUS - '\x2E': '\u002E', # FULL STOP - '\x2F': '\u002F', # SOLIDUS - '\x30': '\u0030', # DIGIT ZERO - '\x31': '\u0031', # DIGIT ONE - '\x32': '\u0032', # DIGIT TWO - '\x33': '\u0033', # DIGIT THREE - '\x34': '\u0034', # DIGIT FOUR - '\x35': '\u0035', # DIGIT FIVE - '\x36': '\u0036', # DIGIT SIX - '\x37': '\u0037', # DIGIT SEVEN - '\x38': '\u0038', # DIGIT EIGHT - '\x39': '\u0039', # DIGIT NINE - '\x3A': '\u003A', # COLON - '\x3B': '\u003B', # SEMICOLON - '\x3C': '\u003C', # LESS-THAN SIGN - '\x3D': '\u003D', # EQUALS SIGN - '\x3E': '\u003E', # GREATER-THAN SIGN - '\x3F': '\u003F', # QUESTION MARK - '\x40': '\u00A1', # INVERTED EXCLAMATION MARK - '\x41': '\u0041', # LATIN CAPITAL LETTER A - '\x42': '\u0042', # LATIN CAPITAL LETTER B - '\x43': '\u0043', # LATIN CAPITAL LETTER C - '\x44': '\u0044', # LATIN CAPITAL LETTER D - '\x45': '\u0045', # LATIN CAPITAL LETTER E - '\x46': '\u0046', # LATIN CAPITAL LETTER F - '\x47': '\u0047', # LATIN CAPITAL LETTER G - '\x48': '\u0048', # LATIN CAPITAL LETTER H - '\x49': '\u0049', # LATIN CAPITAL LETTER I - '\x4A': '\u004A', # LATIN CAPITAL LETTER J - '\x4B': '\u004B', # LATIN CAPITAL LETTER K - '\x4C': '\u004C', # LATIN CAPITAL LETTER L - '\x4D': '\u004D', # LATIN CAPITAL LETTER M - '\x4E': '\u004E', # LATIN CAPITAL LETTER N - '\x4F': '\u004F', # LATIN CAPITAL LETTER O - '\x50': '\u0050', # LATIN CAPITAL LETTER P - '\x51': '\u0051', # LATIN CAPITAL LETTER Q - '\x52': '\u0052', # LATIN CAPITAL LETTER R - '\x53': '\u0053', # LATIN CAPITAL LETTER S - '\x54': '\u0054', # LATIN CAPITAL LETTER T - '\x55': '\u0055', # LATIN CAPITAL LETTER U - '\x56': '\u0056', # LATIN CAPITAL LETTER V - '\x57': '\u0057', # LATIN CAPITAL LETTER W - '\x58': '\u0058', # LATIN CAPITAL LETTER X - '\x59': '\u0059', # LATIN CAPITAL LETTER Y - '\x5A': '\u005A', # LATIN CAPITAL LETTER Z - '\x5B': '\u00C4', # LATIN CAPITAL LETTER A WITH DIAERESIS - '\x5C': '\u00D6', # LATIN CAPITAL LETTER O WITH DIAERESIS - '\x5D': '\u00D1', # LATIN CAPITAL LETTER N WITH TILDE - '\x5E': '\u00DC', # LATIN CAPITAL LETTER U WITH DIAERESIS - '\x5F': '\u00A7', # SECTION SIGN - '\x60': '\u00BF', # INVERTED QUESTION MARK - '\x61': '\u0061', # LATIN SMALL LETTER A - '\x62': '\u0062', # LATIN SMALL LETTER B - '\x63': '\u0063', # LATIN SMALL LETTER C - '\x64': '\u0064', # LATIN SMALL LETTER D - '\x65': '\u0065', # LATIN SMALL LETTER E - '\x66': '\u0066', # LATIN SMALL LETTER F - '\x67': '\u0067', # LATIN SMALL LETTER G - '\x68': '\u0068', # LATIN SMALL LETTER H - '\x69': '\u0069', # LATIN SMALL LETTER I - '\x6A': '\u006A', # LATIN SMALL LETTER J - '\x6B': '\u006B', # LATIN SMALL LETTER K - '\x6C': '\u006C', # LATIN SMALL LETTER L - '\x6D': '\u006D', # LATIN SMALL LETTER M - '\x6E': '\u006E', # LATIN SMALL LETTER N - '\x6F': '\u006F', # LATIN SMALL LETTER O - '\x70': '\u0070', # LATIN SMALL LETTER P - '\x71': '\u0071', # LATIN SMALL LETTER Q - '\x72': '\u0072', # LATIN SMALL LETTER R - '\x73': '\u0073', # LATIN SMALL LETTER S - '\x74': '\u0074', # LATIN SMALL LETTER T - '\x75': '\u0075', # LATIN SMALL LETTER U - '\x76': '\u0076', # LATIN SMALL LETTER V - '\x77': '\u0077', # LATIN SMALL LETTER W - '\x78': '\u0078', # LATIN SMALL LETTER X - '\x79': '\u0079', # LATIN SMALL LETTER Y - '\x7A': '\u007A', # LATIN SMALL LETTER Z - '\x7B': '\u00E4', # LATIN SMALL LETTER A WITH DIAERESIS - '\x7C': '\u00F6', # LATIN SMALL LETTER O WITH DIAERESIS - '\x7D': '\u00F1', # LATIN SMALL LETTER N WITH TILDE - '\x7E': '\u00FC', # LATIN SMALL LETTER U WITH DIAERESIS - '\x7F': '\u00E0', # LATIN SMALL LETTER A WITH GRAVE -} - -# default GSM 03.38 escaped characters -> unicode -def_escape_decode_dict = { - '\x0A': '\u000C', # FORM FEED - '\x14': '\u005E', # CIRCUMFLEX ACCENT - '\x28': '\u007B', # LEFT CURLY BRACKET - '\x29': '\u007D', # RIGHT CURLY BRACKET - '\x2F': '\u005C', # REVERSE SOLIDUS - '\x3C': '\u005B', # LEFT SQUARE BRACKET - '\x3D': '\u007E', # TILDE - '\x3E': '\u005D', # RIGHT SQUARE BRACKET - '\x40': '\u007C', # VERTICAL LINE - '\x65': '\u20AC', # EURO SIGN -} - -# Replacement characters, default is question mark. Used when it is not too -# important to ensure exact UTF-8 -> GSM -> UTF-8 equivilence, such as when -# humans read and write SMS. But for USSD and other M2M applications it's -# important to ensure the conversion is exact. -def_replace_encode_dict = { - '\u00E7': '\x09', # LATIN SMALL LETTER C WITH CEDILLA - - '\u0391': '\x41', # GREEK CAPITAL LETTER ALPHA - '\u0392': '\x42', # GREEK CAPITAL LETTER BETA - '\u0395': '\x45', # GREEK CAPITAL LETTER EPSILON - '\u0397': '\x48', # GREEK CAPITAL LETTER ETA - '\u0399': '\x49', # GREEK CAPITAL LETTER IOTA - '\u039A': '\x4B', # GREEK CAPITAL LETTER KAPPA - '\u039C': '\x4D', # GREEK CAPITAL LETTER MU - '\u039D': '\x4E', # GREEK CAPITAL LETTER NU - '\u039F': '\x4F', # GREEK CAPITAL LETTER OMICRON - '\u03A1': '\x50', # GREEK CAPITAL LETTER RHO - '\u03A4': '\x54', # GREEK CAPITAL LETTER TAU - '\u03A7': '\x58', # GREEK CAPITAL LETTER CHI - '\u03A5': '\x59', # GREEK CAPITAL LETTER UPSILON - '\u0396': '\x5A', # GREEK CAPITAL LETTER ZETA -} +import re + +GSM_BASIC_CHARSET = ( + '@£$¥èéùìòÇ\nØø\rÅåΔ_ΦΓΛΩΠΨΣΘΞ\x1bÆæßÉ !"#¤%&\'()*+,-./0123456789:;<=>?¡' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÑܧ¿abcdefghijklmnopqrstuvwxyzäöñüà') + +GSM_EXT_CHARSET = '\f^{}\\[~]|€' + +GSM_CHARSET = GSM_BASIC_CHARSET + GSM_EXT_CHARSET + +basic_pairs = dict(zip( + [i for i in range(len(GSM_BASIC_CHARSET))], + [ord(c) for c in GSM_BASIC_CHARSET], +)) + +ext_pairs = dict(zip( + [bytes([ord('\x1b'), ord(c)]) + for c in '\x0a\x14\x28\x29\x2f\x3c\x3d\x3e\x40\x65'], + [ord(c) for c in GSM_EXT_CHARSET] +)) + +decoding_map = basic_pairs +decoding_map.update(ext_pairs) + +encoding_map = codecs.make_encoding_map(decoding_map) + + +def decode_gsm0338(text, decoding_map): + ESCAPE = ord('\x1b') + SPACE = ord(' ') + decoded = '' + skip = None + for index, char in enumerate(bytes(text)): + next = index + 1 + if skip == index: + continue + if char != ESCAPE: + d = decoding_map.get(char) + elif char == ESCAPE and next < len(text): + ext_char = bytes([ESCAPE, text[next]]) + d = decoding_map.get(ext_char, SPACE) + if d != SPACE: + skip = next + else: + d = SPACE + decoded += chr(d) + return decoded, len(decoded) -QUESTION_MARK = chr(0x3f) -# unicode -> default GSM 03.38 -def_regular_encode_dict = \ - dict((u, g) for g, u in def_regular_decode_dict.items()) +class GSM0338Codec(codecs.Codec): + def encode(self, input, errors='strict'): + return codecs.charmap_encode(input, errors, encoding_map) -# unicode -> default escaped GSM 03.38 characters -def_escape_encode_dict = \ - dict((u, g) for g, u in def_escape_decode_dict.items()) + def decode(self, input, errors='strict'): + return decode_gsm0338(input, decoding_map) -def encode(input_, errors='strict'): - """ - :type input_: unicode +class GSM0338IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input, self.errors, encoding_map)[0] - :return: string - """ - result = [] - for c in input_: - try: - result.append(def_regular_encode_dict[c]) - except KeyError: - if c in def_escape_encode_dict: - # OK, let's encode it as an escaped characters - result.append('\x1b') - result.append(def_escape_encode_dict[c]) - else: - print(repr(c)) - if errors == 'strict': - raise UnicodeError("Invalid GSM character") - elif errors == 'replace': - result.append( - def_replace_encode_dict.get(c, QUESTION_MARK)) - elif errors == 'ignore': - pass - else: - raise UnicodeError("Unknown error handling") - ret = ''.join(result) - return ret, len(ret) +class GSM0338IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return decode_gsm0338(input, decoding_map)[0] -def decode(input_, errors='strict'): - """ - :type input_: str +class GSM0338StreamReader(GSM0338Codec, codecs.StreamReader): + pass - :return: unicode - """ - result = [] - index = 0 - while index < len(input_): - c = input_[index] - index += 1 - if c == '\x1b': - if index < len(input_): - c = input_[index] - index += 1 - result.append(def_escape_decode_dict.get(c, '\xa0')) - else: - result.append('\xa0') - else: - try: - result.append(def_regular_decode_dict[c]) - except KeyError: - # error handling: unassigned byte, must be > 0x7f - if errors == 'strict': - raise UnicodeError("Unrecognized GSM character %s at index %i of input %s" % (hex(c), index, input_)) - elif errors == 'replace': - result.append('?') - elif errors == 'ignore': - pass - else: - raise UnicodeError("Unknown error handling") - ret = ''.join(result) - return ret, len(ret) +class GSM0338StreamWriter(GSM0338Codec, codecs.StreamWriter): + pass -# encodings module API -def getregentry(encoding): +def search_gsm0338(encoding): if encoding in ('gsm0338', 'gsm7'): - return codecs.CodecInfo(name='gsm0338', - encode=encode, - decode=decode) + return codecs.CodecInfo( + name='gsm0338', + encode=GSM0338Codec().encode, + decode=GSM0338Codec().decode, + incrementalencoder=GSM0338IncrementalEncoder, + incrementaldecoder=GSM0338IncrementalDecoder, + streamwriter=GSM0338StreamWriter, + streamreader=GSM0338StreamReader + ) + return None -# Codec registration -codecs.register(getregentry) +def is_valid_gsm(text): + ''' Validate if `text` is a valid gsm 03.338. ''' + r = '^[' + re.escape(GSM_CHARSET) + ']+$' + return re.match(r, text, re.UNICODE) is not None -def is_gsm_text(text): - """Returns True if ``text`` can be encoded as gsm text""" - try: - codecs.encode(text, 'gsm0338') - except UnicodeError: - return False - except: - traceback.print_exc(file=sys.stdout) - return False - return True +codecs.register(search_gsm0338) \ No newline at end of file diff --git a/messaging/sms/gsm0338old.py b/messaging/sms/gsm0338old.py new file mode 100644 index 0000000..e2b5da4 --- /dev/null +++ b/messaging/sms/gsm0338old.py @@ -0,0 +1,293 @@ +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import codecs +import sys +import traceback + +# data from +# http://snoops.roy202.org/testerman/browser/trunk/plugins/codecs/gsm0338.py + +# default GSM 03.38 -> unicode +def_regular_decode_dict = { + '\x00': '\u0040', # COMMERCIAL AT + '\x01': '\u00A3', # POUND SIGN + '\x02': '\u0024', # DOLLAR SIGN + '\x03': '\u00A5', # YEN SIGN + '\x04': '\u00E8', # LATIN SMALL LETTER E WITH GRAVE + '\x05': '\u00E9', # LATIN SMALL LETTER E WITH ACUTE + '\x06': '\u00F9', # LATIN SMALL LETTER U WITH GRAVE + '\x07': '\u00EC', # LATIN SMALL LETTER I WITH GRAVE + '\x08': '\u00F2', # LATIN SMALL LETTER O WITH GRAVE + '\x09': '\u00C7', # LATIN CAPITAL LETTER C WITH CEDILLA + # The Unicode page suggests this is a mistake: but + # it's still in the latest version of the spec and + # our implementation has to be exact. + + '\x0A': '\u000A', # LINE FEED + '\x0B': '\u00D8', # LATIN CAPITAL LETTER O WITH STROKE + '\x0C': '\u00F8', # LATIN SMALL LETTER O WITH STROKE + '\x0D': '\u000D', # CARRIAGE RETURN + '\x0E': '\u00C5', # LATIN CAPITAL LETTER A WITH RING ABOVE + '\x0F': '\u00E5', # LATIN SMALL LETTER A WITH RING ABOVE + '\x10': '\u0394', # GREEK CAPITAL LETTER DELTA + '\x11': '\u005F', # LOW LINE + '\x12': '\u03A6', # GREEK CAPITAL LETTER PHI + '\x13': '\u0393', # GREEK CAPITAL LETTER GAMMA + '\x14': '\u039B', # GREEK CAPITAL LETTER LAMDA + '\x15': '\u03A9', # GREEK CAPITAL LETTER OMEGA + '\x16': '\u03A0', # GREEK CAPITAL LETTER PI + '\x17': '\u03A8', # GREEK CAPITAL LETTER PSI + '\x18': '\u03A3', # GREEK CAPITAL LETTER SIGMA + '\x19': '\u0398', # GREEK CAPITAL LETTER THETA + '\x1A': '\u039E', # GREEK CAPITAL LETTER XI + '\x1C': '\u00C6', # LATIN CAPITAL LETTER AE + '\x1D': '\u00E6', # LATIN SMALL LETTER AE + '\x1E': '\u00DF', # LATIN SMALL LETTER SHARP S (German) + '\x1F': '\u00C9', # LATIN CAPITAL LETTER E WITH ACUTE + '\x20': '\u0020', # SPACE + '\x21': '\u0021', # EXCLAMATION MARK + '\x22': '\u0022', # QUOTATION MARK + '\x23': '\u0023', # NUMBER SIGN + '\x24': '\u00A4', # CURRENCY SIGN + '\x25': '\u0025', # PERCENT SIGN + '\x26': '\u0026', # AMPERSAND + '\x27': '\u0027', # APOSTROPHE + '\x28': '\u0028', # LEFT PARENTHESIS + '\x29': '\u0029', # RIGHT PARENTHESIS + '\x2A': '\u002A', # ASTERISK + '\x2B': '\u002B', # PLUS SIGN + '\x2C': '\u002C', # COMMA + '\x2D': '\u002D', # HYPHEN-MINUS + '\x2E': '\u002E', # FULL STOP + '\x2F': '\u002F', # SOLIDUS + '\x30': '\u0030', # DIGIT ZERO + '\x31': '\u0031', # DIGIT ONE + '\x32': '\u0032', # DIGIT TWO + '\x33': '\u0033', # DIGIT THREE + '\x34': '\u0034', # DIGIT FOUR + '\x35': '\u0035', # DIGIT FIVE + '\x36': '\u0036', # DIGIT SIX + '\x37': '\u0037', # DIGIT SEVEN + '\x38': '\u0038', # DIGIT EIGHT + '\x39': '\u0039', # DIGIT NINE + '\x3A': '\u003A', # COLON + '\x3B': '\u003B', # SEMICOLON + '\x3C': '\u003C', # LESS-THAN SIGN + '\x3D': '\u003D', # EQUALS SIGN + '\x3E': '\u003E', # GREATER-THAN SIGN + '\x3F': '\u003F', # QUESTION MARK + '\x40': '\u00A1', # INVERTED EXCLAMATION MARK + '\x41': '\u0041', # LATIN CAPITAL LETTER A + '\x42': '\u0042', # LATIN CAPITAL LETTER B + '\x43': '\u0043', # LATIN CAPITAL LETTER C + '\x44': '\u0044', # LATIN CAPITAL LETTER D + '\x45': '\u0045', # LATIN CAPITAL LETTER E + '\x46': '\u0046', # LATIN CAPITAL LETTER F + '\x47': '\u0047', # LATIN CAPITAL LETTER G + '\x48': '\u0048', # LATIN CAPITAL LETTER H + '\x49': '\u0049', # LATIN CAPITAL LETTER I + '\x4A': '\u004A', # LATIN CAPITAL LETTER J + '\x4B': '\u004B', # LATIN CAPITAL LETTER K + '\x4C': '\u004C', # LATIN CAPITAL LETTER L + '\x4D': '\u004D', # LATIN CAPITAL LETTER M + '\x4E': '\u004E', # LATIN CAPITAL LETTER N + '\x4F': '\u004F', # LATIN CAPITAL LETTER O + '\x50': '\u0050', # LATIN CAPITAL LETTER P + '\x51': '\u0051', # LATIN CAPITAL LETTER Q + '\x52': '\u0052', # LATIN CAPITAL LETTER R + '\x53': '\u0053', # LATIN CAPITAL LETTER S + '\x54': '\u0054', # LATIN CAPITAL LETTER T + '\x55': '\u0055', # LATIN CAPITAL LETTER U + '\x56': '\u0056', # LATIN CAPITAL LETTER V + '\x57': '\u0057', # LATIN CAPITAL LETTER W + '\x58': '\u0058', # LATIN CAPITAL LETTER X + '\x59': '\u0059', # LATIN CAPITAL LETTER Y + '\x5A': '\u005A', # LATIN CAPITAL LETTER Z + '\x5B': '\u00C4', # LATIN CAPITAL LETTER A WITH DIAERESIS + '\x5C': '\u00D6', # LATIN CAPITAL LETTER O WITH DIAERESIS + '\x5D': '\u00D1', # LATIN CAPITAL LETTER N WITH TILDE + '\x5E': '\u00DC', # LATIN CAPITAL LETTER U WITH DIAERESIS + '\x5F': '\u00A7', # SECTION SIGN + '\x60': '\u00BF', # INVERTED QUESTION MARK + '\x61': '\u0061', # LATIN SMALL LETTER A + '\x62': '\u0062', # LATIN SMALL LETTER B + '\x63': '\u0063', # LATIN SMALL LETTER C + '\x64': '\u0064', # LATIN SMALL LETTER D + '\x65': '\u0065', # LATIN SMALL LETTER E + '\x66': '\u0066', # LATIN SMALL LETTER F + '\x67': '\u0067', # LATIN SMALL LETTER G + '\x68': '\u0068', # LATIN SMALL LETTER H + '\x69': '\u0069', # LATIN SMALL LETTER I + '\x6A': '\u006A', # LATIN SMALL LETTER J + '\x6B': '\u006B', # LATIN SMALL LETTER K + '\x6C': '\u006C', # LATIN SMALL LETTER L + '\x6D': '\u006D', # LATIN SMALL LETTER M + '\x6E': '\u006E', # LATIN SMALL LETTER N + '\x6F': '\u006F', # LATIN SMALL LETTER O + '\x70': '\u0070', # LATIN SMALL LETTER P + '\x71': '\u0071', # LATIN SMALL LETTER Q + '\x72': '\u0072', # LATIN SMALL LETTER R + '\x73': '\u0073', # LATIN SMALL LETTER S + '\x74': '\u0074', # LATIN SMALL LETTER T + '\x75': '\u0075', # LATIN SMALL LETTER U + '\x76': '\u0076', # LATIN SMALL LETTER V + '\x77': '\u0077', # LATIN SMALL LETTER W + '\x78': '\u0078', # LATIN SMALL LETTER X + '\x79': '\u0079', # LATIN SMALL LETTER Y + '\x7A': '\u007A', # LATIN SMALL LETTER Z + '\x7B': '\u00E4', # LATIN SMALL LETTER A WITH DIAERESIS + '\x7C': '\u00F6', # LATIN SMALL LETTER O WITH DIAERESIS + '\x7D': '\u00F1', # LATIN SMALL LETTER N WITH TILDE + '\x7E': '\u00FC', # LATIN SMALL LETTER U WITH DIAERESIS + '\x7F': '\u00E0', # LATIN SMALL LETTER A WITH GRAVE +} + +# default GSM 03.38 escaped characters -> unicode +def_escape_decode_dict = { + '\x0A': '\u000C', # FORM FEED + '\x14': '\u005E', # CIRCUMFLEX ACCENT + '\x28': '\u007B', # LEFT CURLY BRACKET + '\x29': '\u007D', # RIGHT CURLY BRACKET + '\x2F': '\u005C', # REVERSE SOLIDUS + '\x3C': '\u005B', # LEFT SQUARE BRACKET + '\x3D': '\u007E', # TILDE + '\x3E': '\u005D', # RIGHT SQUARE BRACKET + '\x40': '\u007C', # VERTICAL LINE + '\x65': '\u20AC', # EURO SIGN +} + +# Replacement characters, default is question mark. Used when it is not too +# important to ensure exact UTF-8 -> GSM -> UTF-8 equivilence, such as when +# humans read and write SMS. But for USSD and other M2M applications it's +# important to ensure the conversion is exact. +def_replace_encode_dict = { + '\u00E7': '\x09', # LATIN SMALL LETTER C WITH CEDILLA + + '\u0391': '\x41', # GREEK CAPITAL LETTER ALPHA + '\u0392': '\x42', # GREEK CAPITAL LETTER BETA + '\u0395': '\x45', # GREEK CAPITAL LETTER EPSILON + '\u0397': '\x48', # GREEK CAPITAL LETTER ETA + '\u0399': '\x49', # GREEK CAPITAL LETTER IOTA + '\u039A': '\x4B', # GREEK CAPITAL LETTER KAPPA + '\u039C': '\x4D', # GREEK CAPITAL LETTER MU + '\u039D': '\x4E', # GREEK CAPITAL LETTER NU + '\u039F': '\x4F', # GREEK CAPITAL LETTER OMICRON + '\u03A1': '\x50', # GREEK CAPITAL LETTER RHO + '\u03A4': '\x54', # GREEK CAPITAL LETTER TAU + '\u03A7': '\x58', # GREEK CAPITAL LETTER CHI + '\u03A5': '\x59', # GREEK CAPITAL LETTER UPSILON + '\u0396': '\x5A', # GREEK CAPITAL LETTER ZETA +} + +QUESTION_MARK = chr(0x3f) + +# unicode -> default GSM 03.38 +def_regular_encode_dict = \ + dict((u, g) for g, u in def_regular_decode_dict.items()) + +# unicode -> default escaped GSM 03.38 characters +def_escape_encode_dict = \ + dict((u, g) for g, u in def_escape_decode_dict.items()) + + +def encode(input_, errors='strict'): + """ + :type input_: unicode + + :return: string + """ + result = [] + for c in input_: + try: + result.append(def_regular_encode_dict[c]) + except KeyError: + if c in def_escape_encode_dict: + # OK, let's encode it as an escaped characters + result.append('\x1b') + result.append(def_escape_encode_dict[c]) + else: + print(repr(c)) + if errors == 'strict': + raise UnicodeError("Invalid GSM character") + elif errors == 'replace': + result.append( + def_replace_encode_dict.get(c, QUESTION_MARK)) + elif errors == 'ignore': + pass + else: + raise UnicodeError("Unknown error handling") + + ret = ''.join(result) + return ret, len(ret) + + +def decode(input_, errors='strict'): + """ + :type input_: str + + :return: unicode + """ + result = [] + index = 0 + while index < len(input_): + c = input_[index] + index += 1 + if c == '\x1b': + if index < len(input_): + c = input_[index] + index += 1 + result.append(def_escape_decode_dict.get(c, '\xa0')) + else: + result.append('\xa0') + else: + try: + result.append(def_regular_decode_dict[c]) + except KeyError: + # error handling: unassigned byte, must be > 0x7f + if errors == 'strict': + raise UnicodeError("Unrecognized GSM character %s at index %i of input %s" % (hex(c), index, input_)) + elif errors == 'replace': + result.append('?') + elif errors == 'ignore': + pass + else: + raise UnicodeError("Unknown error handling") + + ret = ''.join(result) + return ret, len(ret) + + +# encodings module API +def getregentry(encoding): + if encoding in ('gsm0338', 'gsm7'): + return codecs.CodecInfo(name='gsm0338', + encode=encode, + decode=decode) + +# Codec registration +codecs.register(getregentry) + + +def is_gsm_text(text): + """Returns True if ``text`` can be encoded as gsm text""" + try: + codecs.encode(text, 'gsm0338') + except UnicodeError: + return False + except: + traceback.print_exc(file=sys.stdout) + return False + + return True diff --git a/messaging/sms/submit.py b/messaging/sms/submit.py index bbcda34..3a9cf57 100644 --- a/messaging/sms/submit.py +++ b/messaging/sms/submit.py @@ -12,7 +12,7 @@ timedelta_to_relative_validity, datetime_to_absolute_validity) from messaging.sms.base import SmsBase -from messaging.sms.gsm0338 import is_gsm_text +from messaging.sms.gsm0338 import is_valid_gsm from messaging.sms.pdu import Pdu VALID_NUMBER = re.compile("^\+?\d{3,20}$") @@ -205,7 +205,7 @@ def _get_sms_submit_pdu(self, udh=False): def _get_msg_pdu(self): # Data coding scheme if self.fmt is None: - if is_gsm_text(self.text): + if is_valid_gsm(self.text): self.fmt = 0x00 else: self.fmt = 0x08 diff --git a/tests/test_gsm_encoding.py b/tests/test_gsm_encoding.py index a5fda42..df76350 100644 --- a/tests/test_gsm_encoding.py +++ b/tests/test_gsm_encoding.py @@ -19,7 +19,7 @@ from unittest import TestCase import codecs -from messaging.sms.gsm0338 import is_gsm_text # imports GSM7 codec +from messaging.sms.gsm0338 import is_valid_gsm # imports GSM7 codec # Reversed from: ftp://ftp.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT MAP = { @@ -251,17 +251,17 @@ def test_decoding_supported_unicode_gsm(self): s_unicode = codecs.decode(s_gsm, 'gsm0338') self.assertEqual(MAP[key][0], ord(s_unicode)) - def test_is_gsm_text_true(self): + def test_is_valid_gsm_true(self): for key in list(MAP.keys()): if key == chr(0x00a0): continue - self.assertTrue(is_gsm_text(key)) + self.assertTrue(is_valid_gsm(key)) - def test_is_gsm_text_false(self): - self.assertFalse(is_gsm_text(chr(0x00a0))) + def test_is_valid_gsm_false(self): + self.assertFalse(is_valid_gsm(chr(0x00a0))) for i in range(1, 0xffff + 1): if chr(i) not in MAP: # Note: it's a little odd, but on error we want to see values - if is_gsm_text(chr(i)) is not False: + if is_valid_gsm(chr(i)) is not False: self.assertEqual(BAD, i) From a49bae480c3331ac910b3b8268abf4ef6e152ee3 Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Thu, 9 Apr 2020 13:17:48 -0500 Subject: [PATCH 21/28] try newer encoding method --- messaging/sms/gsm0338old.py | 4 ++-- messaging/sms/submit.py | 3 +-- messaging/utils.py | 3 +-- tests/test_gsm_encoding.py | 9 ++++----- 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/messaging/sms/gsm0338old.py b/messaging/sms/gsm0338old.py index e2b5da4..b57668b 100644 --- a/messaging/sms/gsm0338old.py +++ b/messaging/sms/gsm0338old.py @@ -271,8 +271,8 @@ def decode(input_, errors='strict'): # encodings module API def getregentry(encoding): - if encoding in ('gsm0338', 'gsm7'): - return codecs.CodecInfo(name='gsm0338', + if encoding in ('gsm0338o'): + return codecs.CodecInfo(name='gsm0338o', encode=encode, decode=decode) diff --git a/messaging/sms/submit.py b/messaging/sms/submit.py index 3a9cf57..073541f 100644 --- a/messaging/sms/submit.py +++ b/messaging/sms/submit.py @@ -3,7 +3,6 @@ from datetime import datetime, timedelta import re -import codecs from messaging.sms import consts from messaging.utils import (debug, encode_str, clean_number, @@ -244,7 +243,7 @@ def _get_msg_pdu(self): message_pdu = "" if self.fmt == 0x00: - self.text_gsm = codecs.encode(self.text, "gsm0338") + self.text_gsm = self.text.encode("gsm0338") if len(self.text_gsm) <= consts.SEVENBIT_SIZE: message_pdu = [pack_8bits_to_7bits(self.text_gsm)] else: diff --git a/messaging/utils.py b/messaging/utils.py index 4bf24aa..13731d3 100644 --- a/messaging/utils.py +++ b/messaging/utils.py @@ -2,7 +2,6 @@ from datetime import timedelta, tzinfo from math import floor import sys -import codecs class FixedOffset(tzinfo): @@ -46,7 +45,7 @@ def dst(self, dt): def bytes_to_str(b): - return codecs.encode(b, 'latin1') + return b.encode('latin1') def to_array(pdu): diff --git a/tests/test_gsm_encoding.py b/tests/test_gsm_encoding.py index df76350..1d9ab2c 100644 --- a/tests/test_gsm_encoding.py +++ b/tests/test_gsm_encoding.py @@ -18,7 +18,6 @@ """Unittests for the gsm encoding/decoding module""" from unittest import TestCase -import codecs from messaging.sms.gsm0338 import is_valid_gsm # imports GSM7 codec # Reversed from: ftp://ftp.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT @@ -196,7 +195,7 @@ def test_encoding_supported_unicode_gsm(self): for key in list(MAP.keys()): # Use 'ignore' so that we see the code tested, not an exception - s_gsm = codecs.encode(key, 'gsm0338', 'ignore') + s_gsm = key.encode('gsm0338', 'ignore') if len(s_gsm) == 1: i_gsm = ord(s_gsm) @@ -216,7 +215,7 @@ def test_encoding_supported_greek_unicode_gsm(self): for key in list(GREEK_MAP.keys()): # Use 'replace' so that we trigger the mapping - s_gsm = codecs.encode(key, 'gsm0338', 'replace') + s_gsm = key.encode('gsm0338', 'replace') if len(s_gsm) == 1: i_gsm = ord(s_gsm) @@ -230,7 +229,7 @@ def test_encoding_supported_quirk_unicode_gsm(self): for key in list(QUIRK_MAP.keys()): # Use 'replace' so that we trigger the mapping - s_gsm = codecs.encode(key, 'gsm0338', 'replace') + s_gsm = key.encode('gsm0338', 'replace') if len(s_gsm) == 1: i_gsm = ord(s_gsm) @@ -248,7 +247,7 @@ def test_decoding_supported_unicode_gsm(self): s_gsm = chr((i_gsm & 0xff00) >> 8) s_gsm += chr(i_gsm & 0x00ff) - s_unicode = codecs.decode(s_gsm, 'gsm0338') + s_unicode = s_gsm.decode('gsm0338') self.assertEqual(MAP[key][0], ord(s_unicode)) def test_is_valid_gsm_true(self): From a378cb9aeabb0a8e2228101177318792b117f7ef Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Thu, 9 Apr 2020 13:23:42 -0500 Subject: [PATCH 22/28] assert dicts are the same --- tests/test_gsm_encoding.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/test_gsm_encoding.py b/tests/test_gsm_encoding.py index 1d9ab2c..23c47f0 100644 --- a/tests/test_gsm_encoding.py +++ b/tests/test_gsm_encoding.py @@ -18,8 +18,8 @@ """Unittests for the gsm encoding/decoding module""" from unittest import TestCase -from messaging.sms.gsm0338 import is_valid_gsm # imports GSM7 codec - +from messaging.sms.gsm0338 import is_valid_gsm, decoding_map # imports GSM7 codec +from messaging.sms.gsm0338old import def_regular_decode_dict # Reversed from: ftp://ftp.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT MAP = { # chr(0x0000): (0x0000, 0x00), # Null @@ -191,6 +191,9 @@ class TestEncodingFunctions(TestCase): + def test_mappings_are_same(self): + self.assertDictEqual(decoding_map, def_regular_decode_dict) + def test_encoding_supported_unicode_gsm(self): for key in list(MAP.keys()): @@ -247,7 +250,7 @@ def test_decoding_supported_unicode_gsm(self): s_gsm = chr((i_gsm & 0xff00) >> 8) s_gsm += chr(i_gsm & 0x00ff) - s_unicode = s_gsm.decode('gsm0338') + s_unicode = s_gsm.encode('gsm0338') self.assertEqual(MAP[key][0], ord(s_unicode)) def test_is_valid_gsm_true(self): From f1a93e29746ac22abcb6b85de883f945d4106623 Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Thu, 9 Apr 2020 13:36:28 -0500 Subject: [PATCH 23/28] try translating --- tests/test_gsm_encoding.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/tests/test_gsm_encoding.py b/tests/test_gsm_encoding.py index 23c47f0..46d4cf5 100644 --- a/tests/test_gsm_encoding.py +++ b/tests/test_gsm_encoding.py @@ -192,7 +192,8 @@ class TestEncodingFunctions(TestCase): def test_mappings_are_same(self): - self.assertDictEqual(decoding_map, def_regular_decode_dict) + translated_dict = dict((hex(k), ord(v)) for k, v in def_regular_decode_dict.items()) + self.assertDictEqual(decoding_map, translated_dict) def test_encoding_supported_unicode_gsm(self): @@ -201,9 +202,9 @@ def test_encoding_supported_unicode_gsm(self): s_gsm = key.encode('gsm0338', 'ignore') if len(s_gsm) == 1: - i_gsm = ord(s_gsm) + i_gsm = s_gsm elif len(s_gsm) == 2: - i_gsm = (ord(s_gsm[0]) << 8) + ord(s_gsm[1]) + i_gsm = (s_gsm[0] << 8) + s_gsm[1] else: i_gsm = BAD # so we see the comparison, not an exception @@ -220,12 +221,10 @@ def test_encoding_supported_greek_unicode_gsm(self): # Use 'replace' so that we trigger the mapping s_gsm = key.encode('gsm0338', 'replace') - if len(s_gsm) == 1: - i_gsm = ord(s_gsm) - else: - i_gsm = BAD # so we see the comparison, not an exception + if len(s_gsm) != 1: + s_gsm = BAD # so we see the comparison, not an exception - self.assertEqual(GREEK_MAP[key][1], i_gsm) + self.assertEqual(GREEK_MAP[key][1], s_gsm) def test_encoding_supported_quirk_unicode_gsm(self): # Note: Conversion is one way, hence no corresponding decode test @@ -234,12 +233,10 @@ def test_encoding_supported_quirk_unicode_gsm(self): # Use 'replace' so that we trigger the mapping s_gsm = key.encode('gsm0338', 'replace') - if len(s_gsm) == 1: - i_gsm = ord(s_gsm) - else: - i_gsm = BAD # so we see the comparison, not an exception + if len(s_gsm) != 1: + s_gsm = BAD # so we see the comparison, not an exception - self.assertEqual(QUIRK_MAP[key][1], i_gsm) + self.assertEqual(QUIRK_MAP[key][1], s_gsm) def test_decoding_supported_unicode_gsm(self): for key in list(MAP.keys()): @@ -265,5 +262,5 @@ def test_is_valid_gsm_false(self): for i in range(1, 0xffff + 1): if chr(i) not in MAP: # Note: it's a little odd, but on error we want to see values - if is_valid_gsm(chr(i)) is not False: + if is_valid_gsm(chr(i)): self.assertEqual(BAD, i) From aae7c7bb3f7a7132c105d043fdad2083e1ad03a0 Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Thu, 9 Apr 2020 12:11:59 -0500 Subject: [PATCH 24/28] Revert "dont need to use ordinal on byte string" This reverts commit 26b1bb68564191f37909c29a007b060ca2ba3f60. --- messaging/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/messaging/utils.py b/messaging/utils.py index 13731d3..b89998e 100644 --- a/messaging/utils.py +++ b/messaging/utils.py @@ -105,8 +105,8 @@ def pack_8bits_to_7bits(message, udh=None): c += 1 shift = n % 7 - lb = txt[c] >> shift - hb = (txt[c + 1] << (7 - shift) & 255) + lb = ord(txt[c]) >> shift + hb = (ord(txt[c + 1]) << (7 - shift) & 255) op[n] = lb + hb c += 1 @@ -125,8 +125,8 @@ def pack_8bits_to_7bits(message, udh=None): c += 1 shift = n % 7 - lb = txt[c] >> shift - hb = (txt[c + 1] << (7 - shift) & 255) + lb = ord(txt[c]) >> shift + hb = (ord(txt[c + 1]) << (7 - shift) & 255) op[n] = lb + hb c += 1 From 14d2856671cd6252d8749755c2f5f97f443fe0bb Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Thu, 9 Apr 2020 14:54:27 -0500 Subject: [PATCH 25/28] Auto stash before revert of "dont need to use ordinal on byte string" --- Dockerfile | 9 ++ doc/tutorial/mms.rst | 16 ++-- doc/tutorial/sms.rst | 14 +-- docker-compose.yml | 9 ++ messaging/mms/message.py | 2 +- messaging/mms/mms_pdu.py | 4 +- messaging/sms/deliver.py | 16 +--- messaging/sms/gsm0338.py | 176 ++++++++++++++++++++++++++++++++----- messaging/utils.py | 10 ++- resources/pydump.py | 2 +- test.log | Bin 0 -> 107596 bytes tests/test_gsm_encoding.py | 7 +- tests/test_wap.py | 16 ++-- 13 files changed, 210 insertions(+), 71 deletions(-) create mode 100644 Dockerfile create mode 100644 docker-compose.yml create mode 100644 test.log diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..dfbfce8 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,9 @@ +from python:3-alpine + +WORKDIR /usr/src/app + +COPY . . + +RUN pip install pytest pylint + +CMD ["tail", "-f", "/dev/null"] \ No newline at end of file diff --git a/doc/tutorial/mms.rst b/doc/tutorial/mms.rst index 65df390..db31614 100644 --- a/doc/tutorial/mms.rst +++ b/doc/tutorial/mms.rst @@ -75,7 +75,7 @@ for a plain HTTP POST:: data = buf.getvalue() buf.close() - print "PROXY RESPONSE", data + print("PROXY RESPONSE", data) Encoding a m-notifyresp-ind PDU @@ -110,8 +110,8 @@ MMS, you just need to:: # data is an array.array("B") instance mms = MMSMessage.from_data(data) - print mms.headers['Message-Type'] # m-send-req - print mms.headers['To'] # '+34231342234/TYPE=PLMN' + print(mms.headers['Message-Type'] # m-send-req) + print(mms.headers['To'] # '+34231342234/TYPE=PLMN') Decoding from a file @@ -125,8 +125,8 @@ need the path to the file and:: path = '/tmp/binary-mms.bin' mms = MMSMessage.from_file(path) - print mms.headers['Message-Type'] # m-send-req - print mms.headers['To'] # '+34231342234/TYPE=PLMN' + print(mms.headers['Message-Type'] # m-send-req) + print(mms.headers['To'] # '+34231342234/TYPE=PLMN') Obtaining a MMS from a WAP push notification @@ -144,7 +144,7 @@ headers:: "0791447758100650400E80885810000000810004016082415464408C0C08049F8E020105040B8423F00106226170706C69636174696F6E2F766E642E7761702E6D6D732D6D65737361676500AF848C82984E4F4B3543694B636F544D595347344D4253774141734B7631344655484141414141414141008D908919802B3434373738353334323734392F545950453D504C4D4E008A808E0274008805810301194083687474703A2F", "0791447758100650440E8088581000000081000401608241547440440C08049F8E020205040B8423F02F70726F6D6D732F736572766C6574732F4E4F4B3543694B636F544D595347344D4253774141734B763134465548414141414141414100", ] - data = "" + data = b"" sms = SmsDeliver(pdus[0]) data += sms.text @@ -154,7 +154,7 @@ headers:: mms = extract_push_notification(data) url = mms.headers['Content-Location'] - print url + print(url) Once you have the content location, you need to do a HTTP GET to retrieve @@ -184,4 +184,4 @@ the MMS payload:: buf.close() mms = MMSMessage.from_data(data) - print mms + print(mms) diff --git a/doc/tutorial/sms.rst b/doc/tutorial/sms.rst index 39a49bd..8aae92a 100644 --- a/doc/tutorial/sms.rst +++ b/doc/tutorial/sms.rst @@ -36,7 +36,7 @@ How to encode a single part SMS ready to be sent:: sms = SmsSubmit("+44123231231", "hey how's it going?") pdu = sms.to_pdu()[0] - print pdu.length, pdu.pdu + print(pdu.length, pdu.pdu) How to encode a concatenated SMS ready to be sent:: @@ -45,7 +45,7 @@ How to encode a concatenated SMS ready to be sent:: sms = SmsSubmit("+44123231231", "hey " * 50) for pdu in sms.to_pdu(): - print pdu.length, pdu.pdu + print(pdu.length, pdu.pdu) Setting class @@ -59,7 +59,7 @@ Setting the SMS class (0-3) is a no brainer:: sms.class = 0 pdu = sms.to_pdu()[0] - print pdu.length, pdu.pdu + print(pdu.length, pdu.pdu) Setting validity @@ -78,7 +78,7 @@ Setting absolute validity:: sms.validity = datetime(2010, 12, 31, 23, 59, 59) pdu = sms.to_pdu()[0] - print pdu.length, pdu.pdu + print(pdu.length, pdu.pdu) Setting relative validity:: @@ -90,7 +90,7 @@ Setting relative validity:: sms.validity = timedelta(hours=5) pdu = sms.to_pdu()[0] - print pdu.length, pdu.pdu + print(pdu.length, pdu.pdu) Decoding @@ -103,7 +103,7 @@ term:`PDU` decoding is really simple with :class:`~messaging.sms.SmsDeliver`:: pdu = "0791447758100650040C914497726247010000909010711423400A2050EC468B81C4733A" sms = SmsDeliver(pdu) - print sms.data + print(sms.data) # {'csca': '+447785016005', 'type': None, # 'date': datetime.datetime(2009, 9, 1, 16, 41, 32), # 'text': ' 1741 bst', 'fmt': 0, 'pid': 0, @@ -135,7 +135,7 @@ registered:: # prompt appears (a more robust implementation # would wait till the prompt appeared) ser.write('AT+CMGS=%d\r' % pdu.length) - print ser.readlines() + print(ser.readlines()) # write the PDU and send a Ctrl+z escape ser.write('%s\x1a' % pdu.pdu) ser.close() diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..5570fba --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,9 @@ +version: "3" +services: + messaging: + build: + context: ./ + dockerfile: ./Dockerfile + volumes: + - ./messaging:/usr/src/app/messaging + - ./tests:/usr/src/app/tests diff --git a/messaging/mms/message.py b/messaging/mms/message.py index 0b5b23b..1c1d7dc 100644 --- a/messaging/mms/message.py +++ b/messaging/mms/message.py @@ -544,7 +544,7 @@ def data(self): """A buffer containing the binary data of this part""" if self._data is not None: if type(self._data) == array.array: - self._data = self._data.tostring() + self._data = self._data.tobytes() return self._data elif self._filename is not None: diff --git a/messaging/mms/mms_pdu.py b/messaging/mms/mms_pdu.py index b004c02..62dd9d6 100644 --- a/messaging/mms/mms_pdu.py +++ b/messaging/mms/mms_pdu.py @@ -167,7 +167,7 @@ def decode_message_body(self, data_iter): except StopIteration: return - #print 'Number of data entries (parts) in MMS body:', num_entries + #print('Number of data entries (parts) in MMS body:', num_entries) ########## MMS body: entries ########## # For every data "part", we have to read the following sequence: @@ -176,7 +176,7 @@ def decode_message_body(self, data_iter): # , # for part_num in range(num_entries): - #print '\nPart %d:\n------' % part_num + #print('\nPart %d:\n------' % part_num) headers_len = self.decode_uint_var(data_iter) data_len = self.decode_uint_var(data_iter) diff --git a/messaging/sms/deliver.py b/messaging/sms/deliver.py index c74cb2e..7696c5d 100644 --- a/messaging/sms/deliver.py +++ b/messaging/sms/deliver.py @@ -173,21 +173,13 @@ def _process_message(self, data): if self.fmt == 0x00: # XXX: Use unpack_msg2 - print(data) + data = data[ud_len:].tolist() - print(data) - print(msg) - print(unpack_msg(msg)) - print(unpack_msg(msg)[headlen:msgl]) - print(unpack_msg2(data)) - try: - self.text = unpack_msg2(data).decode("gsm0338") - except UnicodeError: - print('Unable To decode msg2') - self.text = unpack_msg(msg)[headlen:msgl].decode("gsm0338") + + self.text = unpack_msg2(data).decode("gsm0338") elif self.fmt == 0x04: - self.text = data[ud_len:].tostring() + self.text = data[ud_len:].tobytes() elif self.fmt == 0x08: data = data[ud_len:].tolist() diff --git a/messaging/sms/gsm0338.py b/messaging/sms/gsm0338.py index 2af677f..b2ab2e6 100644 --- a/messaging/sms/gsm0338.py +++ b/messaging/sms/gsm0338.py @@ -1,28 +1,160 @@ # https://github.com/jezeniel/smsutil/blob/master/smsutil/codecs.py import codecs +from array import array import re -GSM_BASIC_CHARSET = ( - '@£$¥èéùìòÇ\nØø\rÅåΔ_ΦΓΛΩΠΨΣΘΞ\x1bÆæßÉ !"#¤%&\'()*+,-./0123456789:;<=>?¡' - 'ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÑܧ¿abcdefghijklmnopqrstuvwxyzäöñüà') - -GSM_EXT_CHARSET = '\f^{}\\[~]|€' - -GSM_CHARSET = GSM_BASIC_CHARSET + GSM_EXT_CHARSET - -basic_pairs = dict(zip( - [i for i in range(len(GSM_BASIC_CHARSET))], - [ord(c) for c in GSM_BASIC_CHARSET], -)) - -ext_pairs = dict(zip( - [bytes([ord('\x1b'), ord(c)]) - for c in '\x0a\x14\x28\x29\x2f\x3c\x3d\x3e\x40\x65'], - [ord(c) for c in GSM_EXT_CHARSET] -)) - -decoding_map = basic_pairs -decoding_map.update(ext_pairs) +# default GSM 03.38 -> unicode +GSM_BASIC_CHARSET = { + '\x00': '\u0040', # COMMERCIAL AT + '\x01': '\u00A3', # POUND SIGN + '\x02': '\u0024', # DOLLAR SIGN + '\x03': '\u00A5', # YEN SIGN + '\x04': '\u00E8', # LATIN SMALL LETTER E WITH GRAVE + '\x05': '\u00E9', # LATIN SMALL LETTER E WITH ACUTE + '\x06': '\u00F9', # LATIN SMALL LETTER U WITH GRAVE + '\x07': '\u00EC', # LATIN SMALL LETTER I WITH GRAVE + '\x08': '\u00F2', # LATIN SMALL LETTER O WITH GRAVE + '\x09': '\u00C7', # LATIN CAPITAL LETTER C WITH CEDILLA + # The Unicode page suggests this is a mistake: but + # it's still in the latest version of the spec and + # our implementation has to be exact. + + '\x0A': '\u000A', # LINE FEED + '\x0B': '\u00D8', # LATIN CAPITAL LETTER O WITH STROKE + '\x0C': '\u00F8', # LATIN SMALL LETTER O WITH STROKE + '\x0D': '\u000D', # CARRIAGE RETURN + '\x0E': '\u00C5', # LATIN CAPITAL LETTER A WITH RING ABOVE + '\x0F': '\u00E5', # LATIN SMALL LETTER A WITH RING ABOVE + '\x10': '\u0394', # GREEK CAPITAL LETTER DELTA + '\x11': '\u005F', # LOW LINE + '\x12': '\u03A6', # GREEK CAPITAL LETTER PHI + '\x13': '\u0393', # GREEK CAPITAL LETTER GAMMA + '\x14': '\u039B', # GREEK CAPITAL LETTER LAMDA + '\x15': '\u03A9', # GREEK CAPITAL LETTER OMEGA + '\x16': '\u03A0', # GREEK CAPITAL LETTER PI + '\x17': '\u03A8', # GREEK CAPITAL LETTER PSI + '\x18': '\u03A3', # GREEK CAPITAL LETTER SIGMA + '\x19': '\u0398', # GREEK CAPITAL LETTER THETA + '\x1A': '\u039E', # GREEK CAPITAL LETTER XI + '\x1C': '\u00C6', # LATIN CAPITAL LETTER AE + '\x1D': '\u00E6', # LATIN SMALL LETTER AE + '\x1E': '\u00DF', # LATIN SMALL LETTER SHARP S (German) + '\x1F': '\u00C9', # LATIN CAPITAL LETTER E WITH ACUTE + '\x20': '\u0020', # SPACE + '\x21': '\u0021', # EXCLAMATION MARK + '\x22': '\u0022', # QUOTATION MARK + '\x23': '\u0023', # NUMBER SIGN + '\x24': '\u00A4', # CURRENCY SIGN + '\x25': '\u0025', # PERCENT SIGN + '\x26': '\u0026', # AMPERSAND + '\x27': '\u0027', # APOSTROPHE + '\x28': '\u0028', # LEFT PARENTHESIS + '\x29': '\u0029', # RIGHT PARENTHESIS + '\x2A': '\u002A', # ASTERISK + '\x2B': '\u002B', # PLUS SIGN + '\x2C': '\u002C', # COMMA + '\x2D': '\u002D', # HYPHEN-MINUS + '\x2E': '\u002E', # FULL STOP + '\x2F': '\u002F', # SOLIDUS + '\x30': '\u0030', # DIGIT ZERO + '\x31': '\u0031', # DIGIT ONE + '\x32': '\u0032', # DIGIT TWO + '\x33': '\u0033', # DIGIT THREE + '\x34': '\u0034', # DIGIT FOUR + '\x35': '\u0035', # DIGIT FIVE + '\x36': '\u0036', # DIGIT SIX + '\x37': '\u0037', # DIGIT SEVEN + '\x38': '\u0038', # DIGIT EIGHT + '\x39': '\u0039', # DIGIT NINE + '\x3A': '\u003A', # COLON + '\x3B': '\u003B', # SEMICOLON + '\x3C': '\u003C', # LESS-THAN SIGN + '\x3D': '\u003D', # EQUALS SIGN + '\x3E': '\u003E', # GREATER-THAN SIGN + '\x3F': '\u003F', # QUESTION MARK + '\x40': '\u00A1', # INVERTED EXCLAMATION MARK + '\x41': '\u0041', # LATIN CAPITAL LETTER A + '\x42': '\u0042', # LATIN CAPITAL LETTER B + '\x43': '\u0043', # LATIN CAPITAL LETTER C + '\x44': '\u0044', # LATIN CAPITAL LETTER D + '\x45': '\u0045', # LATIN CAPITAL LETTER E + '\x46': '\u0046', # LATIN CAPITAL LETTER F + '\x47': '\u0047', # LATIN CAPITAL LETTER G + '\x48': '\u0048', # LATIN CAPITAL LETTER H + '\x49': '\u0049', # LATIN CAPITAL LETTER I + '\x4A': '\u004A', # LATIN CAPITAL LETTER J + '\x4B': '\u004B', # LATIN CAPITAL LETTER K + '\x4C': '\u004C', # LATIN CAPITAL LETTER L + '\x4D': '\u004D', # LATIN CAPITAL LETTER M + '\x4E': '\u004E', # LATIN CAPITAL LETTER N + '\x4F': '\u004F', # LATIN CAPITAL LETTER O + '\x50': '\u0050', # LATIN CAPITAL LETTER P + '\x51': '\u0051', # LATIN CAPITAL LETTER Q + '\x52': '\u0052', # LATIN CAPITAL LETTER R + '\x53': '\u0053', # LATIN CAPITAL LETTER S + '\x54': '\u0054', # LATIN CAPITAL LETTER T + '\x55': '\u0055', # LATIN CAPITAL LETTER U + '\x56': '\u0056', # LATIN CAPITAL LETTER V + '\x57': '\u0057', # LATIN CAPITAL LETTER W + '\x58': '\u0058', # LATIN CAPITAL LETTER X + '\x59': '\u0059', # LATIN CAPITAL LETTER Y + '\x5A': '\u005A', # LATIN CAPITAL LETTER Z + '\x5B': '\u00C4', # LATIN CAPITAL LETTER A WITH DIAERESIS + '\x5C': '\u00D6', # LATIN CAPITAL LETTER O WITH DIAERESIS + '\x5D': '\u00D1', # LATIN CAPITAL LETTER N WITH TILDE + '\x5E': '\u00DC', # LATIN CAPITAL LETTER U WITH DIAERESIS + '\x5F': '\u00A7', # SECTION SIGN + '\x60': '\u00BF', # INVERTED QUESTION MARK + '\x61': '\u0061', # LATIN SMALL LETTER A + '\x62': '\u0062', # LATIN SMALL LETTER B + '\x63': '\u0063', # LATIN SMALL LETTER C + '\x64': '\u0064', # LATIN SMALL LETTER D + '\x65': '\u0065', # LATIN SMALL LETTER E + '\x66': '\u0066', # LATIN SMALL LETTER F + '\x67': '\u0067', # LATIN SMALL LETTER G + '\x68': '\u0068', # LATIN SMALL LETTER H + '\x69': '\u0069', # LATIN SMALL LETTER I + '\x6A': '\u006A', # LATIN SMALL LETTER J + '\x6B': '\u006B', # LATIN SMALL LETTER K + '\x6C': '\u006C', # LATIN SMALL LETTER L + '\x6D': '\u006D', # LATIN SMALL LETTER M + '\x6E': '\u006E', # LATIN SMALL LETTER N + '\x6F': '\u006F', # LATIN SMALL LETTER O + '\x70': '\u0070', # LATIN SMALL LETTER P + '\x71': '\u0071', # LATIN SMALL LETTER Q + '\x72': '\u0072', # LATIN SMALL LETTER R + '\x73': '\u0073', # LATIN SMALL LETTER S + '\x74': '\u0074', # LATIN SMALL LETTER T + '\x75': '\u0075', # LATIN SMALL LETTER U + '\x76': '\u0076', # LATIN SMALL LETTER V + '\x77': '\u0077', # LATIN SMALL LETTER W + '\x78': '\u0078', # LATIN SMALL LETTER X + '\x79': '\u0079', # LATIN SMALL LETTER Y + '\x7A': '\u007A', # LATIN SMALL LETTER Z + '\x7B': '\u00E4', # LATIN SMALL LETTER A WITH DIAERESIS + '\x7C': '\u00F6', # LATIN SMALL LETTER O WITH DIAERESIS + '\x7D': '\u00F1', # LATIN SMALL LETTER N WITH TILDE + '\x7E': '\u00FC', # LATIN SMALL LETTER U WITH DIAERESIS + '\x7F': '\u00E0', # LATIN SMALL LETTER A WITH GRAVE +} + +# default GSM 03.38 escaped characters -> unicode +GSM_EXT_CHARSET = { + '\x1B\x0A': '\u000C', # FORM FEED + '\x1B\x14': '\u005E', # CIRCUMFLEX ACCENT + '\x1B\x28': '\u007B', # LEFT CURLY BRACKET + '\x1B\x29': '\u007D', # RIGHT CURLY BRACKET + '\x1B\x2F': '\u005C', # REVERSE SOLIDUS + '\x1B\x3C': '\u005B', # LEFT SQUARE BRACKET + '\x1B\x3D': '\u007E', # TILDE + '\x1B\x3E': '\u005D', # RIGHT SQUARE BRACKET + '\x1B\x40': '\u007C', # VERTICAL LINE + '\x1B\x65': '\u20AC', # EURO SIGN +} + +GSM_CHARSET = {**GSM_BASIC_CHARSET, **GSM_EXT_CHARSET} + +decoding_map = dict((ord(k), ord(v)) if len(k) == 1 else (bytes([ord(k[0]), ord(k[1])]), ord(v)) for k, v in GSM_CHARSET.items()) encoding_map = codecs.make_encoding_map(decoding_map) @@ -91,7 +223,7 @@ def search_gsm0338(encoding): def is_valid_gsm(text): ''' Validate if `text` is a valid gsm 03.338. ''' - r = '^[' + re.escape(GSM_CHARSET) + ']+$' + r = '^[' + re.escape(''.join(list(GSM_CHARSET.values()))) + ']+$' return re.match(r, text, re.UNICODE) is not None diff --git a/messaging/utils.py b/messaging/utils.py index b89998e..5080d16 100644 --- a/messaging/utils.py +++ b/messaging/utils.py @@ -45,7 +45,9 @@ def dst(self, dt): def bytes_to_str(b): - return b.encode('latin1') + if isinstance(b, str): + return b.encode('latin1') + return b.decode('latin1') def to_array(pdu): @@ -95,7 +97,7 @@ def pack_8bits_to_7bits(message, udh=None): if udh is None: tl = len(txt) - txt += b'\x00' + txt += '\x00' msgl = int(len(txt) * 7 / 8) op = [-1] * msgl c = shift = 0 @@ -112,10 +114,10 @@ def pack_8bits_to_7bits(message, udh=None): pdu = chr(tl) + ''.join(map(chr, op)) else: - txt = b"\x00\x00\x00\x00\x00\x00" + txt + txt = "\x00\x00\x00\x00\x00\x00" + txt tl = len(txt) - txt += b'\x00' + txt += '\x00' msgl = int(len(txt) * 7 / 8) op = [-1] * msgl c = shift = 0 diff --git a/resources/pydump.py b/resources/pydump.py index 9daa570..d577264 100644 --- a/resources/pydump.py +++ b/resources/pydump.py @@ -105,6 +105,6 @@ s += " %02x" % unpack('B', c) # 000000 00 e0 1e a7 05 6f 00 10 - print "%06x%s" % (offset, s) + print("%06x%s" % (offset, s)) offset += perline diff --git a/test.log b/test.log new file mode 100644 index 0000000000000000000000000000000000000000..aedf17e0ff5b426dfb07d2a840db0b386bb5a8ae GIT binary patch literal 107596 zcmeI5>vkN+amV*_&dC$RA2!sH0YxT=J3xh!!wYCBHXTKjos-Bi4oH9`B3@vEl*D8E zA@d4(gS)xsU<(Kj406bTP=id(gW^giiE~sw?PgKfl zT^s8EvC4Zp_)+KA^%)Mn9K6#L&qjD`3|0oa`mPUtp?fFcp5c6bu%+vp`mUYbyRLJ4 z-mg6u?5Fy?6YSOSED+ZMHm_C2E0uL999|DT5&Ro^`c&sP!tY}p4^-w!c-raxSnoX7 z=VE_ee-_R-&)T9>t~QKujB~SSf$`m;q}}bo z7lYpnemD4Pus3)p4vBYXIRi(u8Phpj1KXF@QvVDGUk82njmod;Gxwzdas|x}O_&iO zYC(rV>w6V6Mzo)O-G3W2MD$snJJdBG9FMf5BmFnsX*h<3VO7_^(8QlhLxw8D!L75> ztC&pI$GX=n>i-W^rs>>=Ds?{f{VZtBw2Ajcs7Jl#1AkexH%fgZ-90VG%d=U=aPXa; zd#7^ISZ&6^oM(CPKz#V6+Sl%%>dHs@G{bV3$4PtAJ_h5P5sYucob201Y$60`kixkY zT6$G;peOpYm+w$cYj`}C+yn9Pn%iqCNH^8_|B<|F<8 zOrN1v+9mIg~*is<7b{%53^T)oSmxUKpvpB3^QdaU^;#je7U0q|S!w zu{0{pWf8JF$|UFfK65LV51&Yr-BKCMQ-Qv(<7NJWBVX^OF3(;_3L^+>By6$aK z&1tzZ_?1S72Wmy5?OjP^lfXlrJsSL{AR(!T!KPq*yQB6#&{Ny`-5%`dcU_-VwK%v? z-icb6ci{A;!5`G?;FG0bLpV|&!Yc4u`1upair0FRl9mD#AUXHAOsjH**BP5dER%m} z37-mH6}n5GI)A>_dvApI>mb3fCm0y7YWOC_-jq|d##Qq41^%x&P zBmeH5)!XX-JK+;Yx;T~|XN{Yanh#-!t9-Nx$*_I$lZxD<$zFx9WlN2{4v zt-EMA^D1k)w)27b>9^u%C<|{PEng14*FU70+jWn1j`W1fhXZCR?}FrervJzSuD%St zg8oS#Mq1Om=pn`>!;$ncDHQcN+S%n|dhXYz>0`eN^MSp=uf=CMx)~JsUEnk%4w#$0 znOY%nPD8!WbFo2FdunUa9UHMp7w&W2q{PXn&Q104dPVKPxZCu}>6!u(cpT4hZNhHt&wkA$!!`vapcdZ6@q#556l;HrH8r_Uq^kbu%Q3)=2Av!Yx z!CUYN^@4|NWJc#c(kM$SoamZyVu}^#^${;(W;*_gNRXq7<6}pZf8DL@4i!ysV>G_C zVFz7#=i){VT8Xul_iN;sUhDad9Mj8sA4ZOUmE8YAa-fVH?IWqvi=B5=Eisel8kgUW+HDrlT;KVP70vJ`j}NJi=8|KY4nMoz zek@HK?GgQbvew?DhcbhYvk_+TeKk?9r5~f#n4{TOYce0bp7jyD5W4C0sco0{Vttz0 z)@vPY!_9LG!vQ(c2F0P(aY$z>cm{ndyl&}ZR^Z{_;|iXvN44A2!4_`F!{+>FI!>4R zk(t)N1r9#d4D^*W=jY;Jw~8@4WzPOuvmWLS*gTn8F(dyz%!H2hG&8MJ&9txt@(grA zL-e_QTYH&K;A7r3UdAJzg)I!s!9O|kI#HQ4iY{<#b`%Pw{7qA7HNGg%{Wj&{ma`}X}H zhqy<-X>OBcwX3aER%Hq+uc>rO&%IRFJ&s5GJ`B>39zluLAJVl;8%x%vN-UFnj@PlC zkM=A0&sTZ7jW8eR`P2@}ngr`SNNihuexob&Wu(lZKFAyI$Jo!(RU)@iVyU$rZfh1$ z83-o(k?^M#`Z>+&N2}}f)`1Ex>8HtAFQb_&Qd?ar@pGlAm)16eK`P-3?DMO2tpwJ4 zH~;6mI=d}Tr}@pTQpuDr8>?1q`XrWGyi!>?MZL+K+Gf%c;;==3&5 zZgg*C0vcSCWMd72{(UOBVEP??M69>i`Zn*g>SF(wCGA{$_)fhv)mOd`{nqpV)=Iw% zcfZ$}NitjI+z&kPH0YS{17)Fo*osapc|~ubZL+3m-ZA~LUr{M1f^YR&o@*^qZMSV@gO9r$ z!D&q&{;%t}J-T96pmn{wJGx??C2LwOuy<^QdQCEIXLMytP>`8xf>bYaSMP1>>R(lk z*|fUiXptTB#qp+a!Mf(F|4@bcgZE!6Bf2i!*7$6`B5OMPgL>-Hh{9l> zjx62aG2W-}eiThNM=)>dY<8GeHfn09dj6Mye|Fq zVB=e247@J=M-7MJy?8yNF3$Tt1~dE2qo|F+jG+0gV!hb>?eg$&Pd&IKS3cvxN2}1g za$6RS3>f*E85)^E$ghWSXue$C81W~^-1ac;M9=Ooqm09m6-J3%?j%;oAmsAqhWOeb z*<>#-sadBi|UARy{DY$Csn-0K1)MV&W11RQy(uB zXX94)`O2S*Y>s+9$G&xT9IWSWX7{UfOREx&fxx}QRS*82+PlsSqF@NsAHIiu@`?j&+ur-os9CIPqSiPk&I~RhD)kKWu zLa-i7tXeJv({|;PBezF9$+0hl zi;z2xFSFmvjjz+!M)mEP@x||;pWN#Av}qyWNxxtvu7A&{a>Sx_5T~$7wCxOuR>gPG zBau_s=k)#M=lSAmQi?SLI3j25z+1@VJ@cYjdo3$E=V5JP2Iu{t|0q>$r^Vy!EoGx( z{u;}+TFBbEf8Tc;Y*ZveLSOmp5NuT3&K?!rTZ&^I4m+b!kz+0dlcPHs9B*{k8Cg|0 z=Hg&IcPCp@!vh3<;Fybp@#IdnrnWQUNshTV7*FnG^W@IRrou572b(?YWb@?ih$lJb z;jlXz+d1Y!u(4}5nHQH(u0NFN z1;8fs!kWdDr!1&hl|_u3yd86Trp!p0j*hjPrq)+XH#mxZlq5aj_l% zl>9!x+tvp-%f-cgQaI*H72b5-+0`?u!AGeBO0$44~xmF6)`}MquPq*F7>g z=JSjDWB_Frx)|>QZ5Fy1??RWlFFmm#^znH{Rl$|Up=~h{Uh?EjM=`eaL5OC#cNXtr zF&t?Yy7Bl9;>Qlf*~CAyHuYHlm?N^Xv8D4PU5BdZ43=>c4}o=g`LF8UBgNO)ZUEI- zt5hCR1CKyyk2m#w1w2-^xjr!uLigOh^QvW+c&o+HczT=`*~%=2nz$0vCC1u$Iz2q3%Y~@zl;((Yd%m|N$jd0Sve+3vz`8T1E;{N#CcyAJuPd5VMIKa?L`E=$nfEr6mN)n)O96f z^E}bSjKYt_ z#~csTo-U7?%1)nsB)zT+9)E3YeWw?(tF$N?0+9Pe2i_7F8#O=Exl2{fX~%cs7BYYE z-}kY)Ub!<2?-+&N2c{mx<`6gR&C@;eAY6t`^fD_FQ=(L{d177*2~DTk@n&P^$eor;jf`+$&Pd@ z>Vrt^ZgJvie7Muc`>c)1NZjg``9*^-)pW%A=&>XoXdV?!x;N<^--sqyqN-8UXflkh zf7Nx5^InAhM(J4Rs-;6eWPRTlZuo=I?!3p?bcQ!RGpYiCm5)luxrt5y~d_2&m zQLO(gW)YNfnlv33>!)WW_OTfBtAB*wq{Fn?PO3TtnjR&0v0b}<$8N-q?I)T$68DV7 zVm3>4HSgE>zi7#R80G-hW0@&3&qo8sikI#KWESpy%?;c}RkcHpmr=jswS-E1(Ez9K zQOEoh+bXjGa2m&Vx|JG@Jk11|)}o7@x$XVwmSW}sFEoQfgZG&^)Wp*FSQI~2?Z~!? zUizabLcXtevYax1$NEpiB(aXf^b)aX=d6ZeEAo3>-_{!+2^w=;&%;H=EvQ}ZwLZ%+ zjIM>!+20PEz_rFV>iI?YgI+VKp49L!xivifqj=}m27-0RI9fOS-Fd(kDD+WzmWWCecV7zB8hdHqG zK66*+x@MmAzK$ipGu~N-&&g%8^J0`p@WN_^dwH`y`zDjz+Y!~gOsr4)vL zd7E!nsqWCKQr*^~k?O$O*YPrco9l}O+t-h5EQtG*vELUKA{#xbb$2gN-Z2}WQ<-vH zDjD5a;XIY)#Gf#}@^NBC*Rd5cE>)j%88_ZaxmaW6G_M}Rds4}19n!MMjhS-UH^ENz zIP77wD_TJ*@@N{@?VMeSqlt~eSj{{W@kgT^=9igwzzv%4t(pTVGG_d2&l^lGV8h1pdoK$Ec*n}D%RevAYhk;fdNx7EXR}(4 z_oqqLCZBchsorr{E8CILc8X&ziye-YOj~(Et1?@UXDT0xX1?-zO?|@YdQJ5(+2K#t zb;)r|Q`)<{Vyy!Uu&vdndbepVNa*pJ&{Dvk>=#nkbjSbCqHkL+{;RERTOD$BZF@V) z#z{SQzM3`~wrkvsVrbZ0<9l-^P5V{QSemu$$Kp=DHIMRGdB(@@z95)>&+lf?-SJS`kiNevt9Unz_EmRVAwgHe zcQKWO>A6cO(+#X955vfx&Q^1E%qpGz-{kUb^?R~b%A0jLn~o%|87@}OWbRb-%%W$q zS7OmKP0kh_(OtoU>sQAs`y~C)CE{>+WrhhT#d7Iu@*TQ|KZe#T= z!84mJ`nI35XT%~by8UaGr;YY6XS&|{f%>J>2QO3et1|Cm(;?VB`<cVl^JuQYTrM(NXb0L{r67(!<_K@fLcc3{let+-OuRc{x$E^$(ua$c4A}D1~2-dE8kX+67GK8xiOAK9OR<#QGdl|4(-xPyI-+qPoTri(_K z>*dl)C)Xxd|C|i5yQXR0*wL?{u65$EzzP-760h{ZX5OSHp|u(;TZR9Z;N!qPgU_|r zmFls-4s(mAf|;iB1+_g4Px-pUGeIV@#k?Ng28sMdSY-l#R#X_(Shr-|0v`>Y^*K}e zmhZgC>IK-D1?p9RVA%ho&JDIjT>z6fEwE!vg3_D&=H}ft+G&8_P+i>I8jUXd$m1a*#nL>Dv#1*4@}C0!v7v>YEgH#0_9lk>3N64 zo3>N1M_uuht+QA?m`&0mT;ummfKFTv@4*2-gg2OJa-U~ub97|8?{tFRMjw1=YzNE- z0eA3y5j10qkZZO;&oVQLD*()hs=hX6``+!b63}d&|E95Lxy{CWYsdm?ws&%^kbS^b zCa~X=E$hCnx^3R#uJ}i!3Lo->b02RHVxgG{t;^GzEB9o%zpE$l#9@xKCun5IxveK} zYre$n>8_4<^m||Lu?_;vJ9?6`_rhJg{5j(Zo@71Xw#vAz=XslX*nK?%oO}B49_PGw zKfobl*FE8ISLKrrZcYElO9?iVhEEYNx6fcw2kx;hv8AV}7j>XUP=r+)>V02V?u7R! z;hxS|3xPUBh3^T9;c{31sGregS68VA z*Q_>pU)>GZKrd_0Rh35@PgZ{Sg8t538O6f*82Q!f;tMWGK@p&a$~wXoaaZFtr=A3vxq{e$y>Y#c>-w6JkD zoO4&tTT4M*sEQvK+(DVFi}AGe7+M`ZgM;tt4z)GzzN6>&!gF8^r_rBo>&#jay!lcG zdO1Ccc7tnxKrf`ffG1qTQ+qn6Jg~Ih08HoZ`vFflm@mDWx*12&)2KbUXTX6vP%AhQ zn5;QkuQz&9Q?NF!<{nq51MNU-0S#RIJ@@XT?^~~@uf=x9N6K1jZ9$xfA7ZmiL`K1N z&~aP!r4K{Zb=3rpfLWfCc5zAi!tYV2W+j83GM*q5h zowtqoYoD(l9`b_~ZPO!X^~3WO3hIDO9dBzq>|(uBD^{FOHF@J*6&?alueY`2tuU{t zdULLv^}VV+^P!$S9Yyq*hbOa$7;%q2PVdD}cSo}p;+MAdyQw46{XlnYrdypWK9UT5 zUuKIdTDe&fjo!DJSSnfEf3sh|V}|E^Gs)}AYy!^VWVXR}0jD#fnEyC3B*n&cT0fdk zQd|$Uya@Z66(5HADUfVd&AgfQ%!kr&ey{Iur4f7~ufcsqPk*W3hx+#)y53$IYdAyOhQcY*1Cu{H-Ms}D2!$D-Uzvv@ctAa7J4H@)q;eCllx$CWsP zsLq0kA!3$M9fhjTxSiTpYRUCtnM~4=tNKnC41a#6sbE)3-$>7$M{H#PT+IGW#FWpE zSgl~~hIJ8(tsn}8Cr(wr_Fh1rG=8e`zL)FAw9i^*A+z+uwZ@5QnN2ltNJeeuU{M_cTy*^$_s>3Y{_A}9Dd8FAnhtc-r?)JOJsr9RMzO?&oHFO-RM=zzysrAyD>0E#6 zSnb*_Q@m{2>QEnFrGe7d!ndsk&f&ArniAiL=GT;y3tPG64*O7LFORhZ;{qcC+3tq& zm0*PjsX-<<;sv%teQB*Wti8_Fqtm?|pL#2gV^nXq^TMo|BgXNACsLed;g9N?8Z@u_ zlR1s+<4Aox^f^dz~vv=H1JTo|)krT|s+@o*0wQE|ZT&TW#`uG}<6W zo8qML_yf@p-4UwR{W6-ofw92Zs`@j%@kW~CC$fd#khO`A#VsA`i37=9r1^oa;iK?I z&lo&17eCXtNutiOdVZO+SJ)iLR@cctVJqd+GgsJiS3PG!8;2TYJg;-g6HU_JILO{H zS%$5Ak+~FzUo0_hHh1?yYj7iS6f?_ zTneuBrQGk=Y;NguzmiMgr;$q`UuPYUrkSjr*6&;52UI0 z%~xx@m`n`K-kPQ{dIqyJ*V%0O5}K%JBe_ZYhhauMkIVEiR#wYO=SL|h-eC*P%q@`u6l9eEe{<$BPzYz1i2>T`~!n3f-VLi<_N4=3rCL@}CIR(!UugZ3u9E0OLUjn;-8x8yLWW=B|TTztu4(1pJ3J+I_2K$?^s2CVy4 z-XHgw<0g$x}S%68(xsV6+q1Q~ZgO=qaKsw)(Zv>(eIt#4oUDtjjUo^i z72&u(#|TIEJ1?FOS4kIh?b&q5S#`1K3Dhsrs_fDnb5M5MrJ1_YTYc6 zgwtEG?1qdy#T(t$9LsLVr?S7$i`Zkm?1s#`i0OOPJ=WWI-L#ptkJ#0HhRbfqKLj~W zrfp`(@Y$iV>8B%6dhU|lY=UJsWL9|Ver{E70q*00;n`)&Zpbmh&9rCFE8xCNwi@D` zRzDiGtJ)AFM%fLS{hCvaE7G~{hfpF3>s&SYzk7C|BTwF)x$K7gGVHHjA_>o9-;pls zm5bR8IrTJ{#j1UEbNjhfyIW?z`1Hkj7+}5f$iT&V8qC+aZR^%(pXcE%FiRQWSUe4i zb$eDGf{)y)Z@TgM>EBGsLnIH&obL!7OOj}8SHAd zKWMf-ne401=*Mm24+dWfLYH-p_EsE++y!QSE0(yvF7$10vy#6L303m%AWdR0E!R zc+WS}xaq3Xy?D2S9lsXm1op_)6parwKf_<8O7^g(p6J8NBD+D!!=PFb<#A8Mi`v zn~?rdz@Uh3a`g3Tm&8?K? z&abdPcyv6GNM4DC8+6j5xQc0iEl=F7$lZ|4c#5LdXKH*R3k^~T-t06E>`zCohJWx>u;4)>-O|!I@g~n zR`@d-}}fvbt~gvbt|4cEL%WfF{{( zOneyD3epVs(Q3akxTpQ*RKjOS~23C?Vv*LBYN7n zC-0u*Ueo9O+1;Kf)&3tWx_FeomcLH)H0LTK$P0}huLtjRhmnOb2OT``L%ZD{+*W+@ zZ)CN;my}lZzIOj~Mlt-Y4nx^bM?1`ZBFpX98rkrN->W@wUpVs)d(ytr*{PoWME~sR zeL*^^K*V$Rt+cI^QT>nf1bg+o)caO49>9jr#baNmM>FX}Up(IWZ>GtPbwz_5%0K-;-_60U-aXWnn~FK$gREKAab0&d^vt?G8@f+U_*LC| zB>WAZuGpKNX!~rZ*gp)|rz?b2%*W-%W3~5zD8UX|*Y#~1Bh6}k5jdWTq6Zx?9BO3X zbN)EiV?{jgHvX~9yWrS0wKJMNq&wj$OI&B5@?fyGxI`NF4Eq>x(1y zkPYb?FM7|-hryO&@48h5O?9Z*{(&?YVlEEDGsL-*$Ii-H*S}3^v*!7UE}1?T@o}5< XntJ$6>B6UHu+nGxTNHTPBk%toiae@n literal 0 HcmV?d00001 diff --git a/tests/test_gsm_encoding.py b/tests/test_gsm_encoding.py index 46d4cf5..ecb8fc5 100644 --- a/tests/test_gsm_encoding.py +++ b/tests/test_gsm_encoding.py @@ -19,7 +19,6 @@ from unittest import TestCase from messaging.sms.gsm0338 import is_valid_gsm, decoding_map # imports GSM7 codec -from messaging.sms.gsm0338old import def_regular_decode_dict # Reversed from: ftp://ftp.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT MAP = { # chr(0x0000): (0x0000, 0x00), # Null @@ -191,16 +190,12 @@ class TestEncodingFunctions(TestCase): - def test_mappings_are_same(self): - translated_dict = dict((hex(k), ord(v)) for k, v in def_regular_decode_dict.items()) - self.assertDictEqual(decoding_map, translated_dict) - def test_encoding_supported_unicode_gsm(self): for key in list(MAP.keys()): # Use 'ignore' so that we see the code tested, not an exception s_gsm = key.encode('gsm0338', 'ignore') - + print(s_gsm) if len(s_gsm) == 1: i_gsm = s_gsm elif len(s_gsm) == 2: diff --git a/tests/test_wap.py b/tests/test_wap.py index ef38ce6..f941e75 100644 --- a/tests/test_wap.py +++ b/tests/test_wap.py @@ -40,9 +40,9 @@ def test_decoding_m_notification_ind(self): "0791447758100650400E80885810000000810004016082415464408C0C08049F8E020105040B8423F00106226170706C69636174696F6E2F766E642E7761702E6D6D732D6D65737361676500AF848C82984E4F4B3543694B636F544D595347344D4253774141734B7631344655484141414141414141008D908919802B3434373738353334323734392F545950453D504C4D4E008A808E0274008805810301194083687474703A2F", "0791447758100650440E8088581000000081000401608241547440440C08049F8E020205040B8423F02F70726F6D6D732F736572766C6574732F4E4F4B3543694B636F544D595347344D4253774141734B763134465548414141414141414100", ] - number = binascii.unhexlify(b'3838383530313030303030303138') + number = binascii.unhexlify(b'3838383530313030303030303138').decode() csca = "+447785016005" - data = "" + data = b"" sms = SmsDeliver(pdus[0]) self.assertEqual(sms.udh.concat.ref, 40846) @@ -67,7 +67,7 @@ def test_decoding_m_notification_ind(self): 'NOK5CiKcoTMYSG4MBSwAAsKv14FUHAAAAAAAA') self.assertEqual(mms.headers['MMS-Version'], '1.0') self.assertEqual(mms.headers['From'], - binascii.unhexlify(b'2b3434373738353334323734392f545950453d504c4d4e')) + binascii.unhexlify(b'2b3434373738353334323734392f545950453d504c4d4e').decode()) self.assertEqual(mms.headers['Message-Class'], 'Personal') self.assertEqual(mms.headers['Message-Size'], 29696) self.assertEqual(mms.headers['Expiry'], 72000) @@ -80,7 +80,7 @@ def test_decoding_m_notification_ind(self): ] number = "88850100000008" - data = "" + data = b"" sms = SmsDeliver(pdus[0]) self.assertEqual(sms.udh.concat.ref, 57299) @@ -104,7 +104,7 @@ def test_decoding_m_notification_ind(self): 'NOK5A1ZdFTMYSG4O3VQAAsJv94GoNAAAAAAAA') self.assertEqual(mms.headers['MMS-Version'], '1.0') self.assertEqual(mms.headers['From'], - binascii.unhexlify(b'2b3434373731373237353034392f545950453d504c4d4e')) + binascii.unhexlify(b'2b3434373731373237353034392f545950453d504c4d4e').decode()) self.assertEqual(mms.headers['Message-Class'], 'Personal') self.assertEqual(mms.headers['Message-Size'], 29696) self.assertEqual(mms.headers['Expiry'], 259199) @@ -116,9 +116,9 @@ def test_decoding_generic_wap_push(self): "0791947122725014440C8500947122921105F5112042519582408C0B05040B8423F0000396020101060B03AE81EAC3958D01A2B48403056A0A20566F6461666F6E650045C60C037761702E6D65696E63616C6C79612E64652F000801035A756D206B6F7374656E6C6F73656E20506F7274616C20224D65696E0083000322202D2065696E66616368206175662064656E20666F6C67656E64656E204C696E6B206B6C69636B656E", "0791947122725014440C8500947122921105F5112042519592403C0B05040B8423F00003960202206F6465722064696520536569746520646972656B7420617566727566656E2E2049687200830003205465616D000101", ] - number = binascii.unhexlify(b'303034393137323232393131') + number = binascii.unhexlify(b'303034393137323232393131').decode() csca = "+491722270541" - data = "" + data = b"" sms = SmsDeliver(pdus[0]) self.assertEqual(sms.udh.concat.ref, 150) @@ -135,7 +135,7 @@ def test_decoding_generic_wap_push(self): self.assertEqual(sms.number, number) data += sms.text - self.assertEqual(data, '\x01\x06\x0b\x03\xae\x81\xea\xc3\x95\x8d\x01\xa2\xb4\x84\x03\x05j\n Vodafone\x00E\xc6\x0c\x03wap.meincallya.de/\x00\x08\x01\x03Zum kostenlosen Portal "Mein\x00\x83\x00\x03" - einfach auf den folgenden Link klicken oder die Seite direkt aufrufen. Ihr\x00\x83\x00\x03 Team\x00\x01\x01') + self.assertEqual(data, b'\x01\x06\x0b\x03\xae\x81\xea\xc3\x95\x8d\x01\xa2\xb4\x84\x03\x05j\n Vodafone\x00E\xc6\x0c\x03wap.meincallya.de/\x00\x08\x01\x03Zum kostenlosen Portal "Mein\x00\x83\x00\x03" - einfach auf den folgenden Link klicken oder die Seite direkt aufrufen. Ihr\x00\x83\x00\x03 Team\x00\x01\x01') push = extract_push_notification(data) self.assertEqual(is_mms_notification(push), False) From 7bf5649c782bd2f2cd9afb37d3161187e79e164d Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Thu, 9 Apr 2020 18:03:05 -0500 Subject: [PATCH 26/28] fix encoding --- messaging/sms/gsm0338.py | 84 ++++++++++++++++++++++++++++++------- messaging/sms/submit.py | 2 + messaging/utils.py | 10 +++-- test.log | Bin 107596 -> 0 bytes tests/test_gsm_encoding.py | 15 +++---- 5 files changed, 83 insertions(+), 28 deletions(-) delete mode 100644 test.log diff --git a/messaging/sms/gsm0338.py b/messaging/sms/gsm0338.py index b2ab2e6..046a72b 100644 --- a/messaging/sms/gsm0338.py +++ b/messaging/sms/gsm0338.py @@ -1,3 +1,4 @@ +# Refactored using cleaner code from # https://github.com/jezeniel/smsutil/blob/master/smsutil/codecs.py import codecs from array import array @@ -152,29 +153,82 @@ '\x1B\x65': '\u20AC', # EURO SIGN } +# Replacement characters, default is question mark. Used when it is not too +# important to ensure exact UTF-8 -> GSM -> UTF-8 equivilence, such as when +# humans read and write SMS. But for USSD and other M2M applications it's +# important to ensure the conversion is exact. +GSM_REPLACE_CHARSET = { + '\u00E7': '\x09', # LATIN SMALL LETTER C WITH CEDILLA + + '\u0391': '\x41', # GREEK CAPITAL LETTER ALPHA + '\u0392': '\x42', # GREEK CAPITAL LETTER BETA + '\u0395': '\x45', # GREEK CAPITAL LETTER EPSILON + '\u0397': '\x48', # GREEK CAPITAL LETTER ETA + '\u0399': '\x49', # GREEK CAPITAL LETTER IOTA + '\u039A': '\x4B', # GREEK CAPITAL LETTER KAPPA + '\u039C': '\x4D', # GREEK CAPITAL LETTER MU + '\u039D': '\x4E', # GREEK CAPITAL LETTER NU + '\u039F': '\x4F', # GREEK CAPITAL LETTER OMICRON + '\u03A1': '\x50', # GREEK CAPITAL LETTER RHO + '\u03A4': '\x54', # GREEK CAPITAL LETTER TAU + '\u03A7': '\x58', # GREEK CAPITAL LETTER CHI + '\u03A5': '\x59', # GREEK CAPITAL LETTER UPSILON + '\u0396': '\x5A', # GREEK CAPITAL LETTER ZETA +} + GSM_CHARSET = {**GSM_BASIC_CHARSET, **GSM_EXT_CHARSET} +QUESTION_MARK = ord('\u003F') +ESCAPE = ord('\x1B') +SPACE = ord('\u00A0') + decoding_map = dict((ord(k), ord(v)) if len(k) == 1 else (bytes([ord(k[0]), ord(k[1])]), ord(v)) for k, v in GSM_CHARSET.items()) -encoding_map = codecs.make_encoding_map(decoding_map) +encoding_map = dict((ord(v), ord(k)) for k, v in GSM_BASIC_CHARSET.items()) +ext_encoding_map = dict((ord(v), ord(k[1])) for k, v in GSM_EXT_CHARSET.items()) + +replace_encode_map = dict((ord(k), ord(v)) for k, v in GSM_REPLACE_CHARSET.items()) + +def encode_gsm0338(text, errors, encoding_map, ext_encoding_map, replace_encode_map): + encoded = b'' + for char in text: + ochar = ord(char) + ec = b'' + if ochar in encoding_map: + ec = encoding_map.get(ochar) + else: + if ochar in ext_encoding_map: + encoded += bytes([ESCAPE]) + ec = ext_encoding_map.get(ochar) + elif errors == 'strict': + raise UnicodeError("Invalid GSM character") + elif errors == 'replace': + ec = replace_encode_map.get(ochar, QUESTION_MARK) + print("replacing char %s with %s" % (char, ec)) + elif errors == 'ignore': + pass + else: + raise UnicodeError("Unknown error handling") + if isinstance(ec, int): + ec = bytes([ec]) + encoded += ec + return encoded, len(encoded) def decode_gsm0338(text, decoding_map): - ESCAPE = ord('\x1b') - SPACE = ord(' ') decoded = '' skip = None for index, char in enumerate(bytes(text)): - next = index + 1 + next_char = index + 1 if skip == index: continue if char != ESCAPE: d = decoding_map.get(char) - elif char == ESCAPE and next < len(text): - ext_char = bytes([ESCAPE, text[next]]) + elif char == ESCAPE and next_char < len(text): + ext_char = bytes([ESCAPE, text[next_char]]) d = decoding_map.get(ext_char, SPACE) if d != SPACE: - skip = next + skip = next_char else: d = SPACE decoded += chr(d) @@ -182,21 +236,21 @@ def decode_gsm0338(text, decoding_map): class GSM0338Codec(codecs.Codec): - def encode(self, input, errors='strict'): - return codecs.charmap_encode(input, errors, encoding_map) + def encode(self, input_, errors='strict'): + return encode_gsm0338(input_, errors, encoding_map, ext_encoding_map, replace_encode_map) - def decode(self, input, errors='strict'): - return decode_gsm0338(input, decoding_map) + def decode(self, input_, errors='strict'): + return decode_gsm0338(input_, decoding_map) class GSM0338IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input, self.errors, encoding_map)[0] + def encode(self, input_, final=False): + return encode_gsm0338(input_, self.errors, encoding_map, ext_encoding_map, replace_encode_map)[0] class GSM0338IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return decode_gsm0338(input, decoding_map)[0] + def decode(self, input_, final=False): + return decode_gsm0338(input_, decoding_map)[0] class GSM0338StreamReader(GSM0338Codec, codecs.StreamReader): diff --git a/messaging/sms/submit.py b/messaging/sms/submit.py index 073541f..c909063 100644 --- a/messaging/sms/submit.py +++ b/messaging/sms/submit.py @@ -307,6 +307,8 @@ def _split_sms_message(self, text): sms_ref &= 0xFF for i, msg in enumerate(msgs): + if isinstance(msg, bytes): + msg = msg.decode() i += 1 total_parts = len(msgs) if limit == consts.SEVENBIT_SIZE: diff --git a/messaging/utils.py b/messaging/utils.py index 5080d16..6ce7519 100644 --- a/messaging/utils.py +++ b/messaging/utils.py @@ -45,9 +45,9 @@ def dst(self, dt): def bytes_to_str(b): - if isinstance(b, str): - return b.encode('latin1') - return b.decode('latin1') + if isinstance(b, bytes): + return b.decode('latin1') + return b def to_array(pdu): @@ -143,6 +143,8 @@ def pack_8bits_to_7bits(message, udh=None): def pack_8bits_to_8bit(message, udh=None): text = message if udh is not None: + if isinstance(udh, bytes): + udh = udh.decode() text = udh + text mlen = len(text) @@ -156,6 +158,8 @@ def pack_8bits_to_ucs2(message, udh=None): nmesg = '' if udh is not None: + if isinstance(udh, bytes): + udh = udh.decode() text = udh + text for n in text: diff --git a/test.log b/test.log deleted file mode 100644 index aedf17e0ff5b426dfb07d2a840db0b386bb5a8ae..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 107596 zcmeI5>vkN+amV*_&dC$RA2!sH0YxT=J3xh!!wYCBHXTKjos-Bi4oH9`B3@vEl*D8E zA@d4(gS)xsU<(Kj406bTP=id(gW^giiE~sw?PgKfl zT^s8EvC4Zp_)+KA^%)Mn9K6#L&qjD`3|0oa`mPUtp?fFcp5c6bu%+vp`mUYbyRLJ4 z-mg6u?5Fy?6YSOSED+ZMHm_C2E0uL999|DT5&Ro^`c&sP!tY}p4^-w!c-raxSnoX7 z=VE_ee-_R-&)T9>t~QKujB~SSf$`m;q}}bo z7lYpnemD4Pus3)p4vBYXIRi(u8Phpj1KXF@QvVDGUk82njmod;Gxwzdas|x}O_&iO zYC(rV>w6V6Mzo)O-G3W2MD$snJJdBG9FMf5BmFnsX*h<3VO7_^(8QlhLxw8D!L75> ztC&pI$GX=n>i-W^rs>>=Ds?{f{VZtBw2Ajcs7Jl#1AkexH%fgZ-90VG%d=U=aPXa; zd#7^ISZ&6^oM(CPKz#V6+Sl%%>dHs@G{bV3$4PtAJ_h5P5sYucob201Y$60`kixkY zT6$G;peOpYm+w$cYj`}C+yn9Pn%iqCNH^8_|B<|F<8 zOrN1v+9mIg~*is<7b{%53^T)oSmxUKpvpB3^QdaU^;#je7U0q|S!w zu{0{pWf8JF$|UFfK65LV51&Yr-BKCMQ-Qv(<7NJWBVX^OF3(;_3L^+>By6$aK z&1tzZ_?1S72Wmy5?OjP^lfXlrJsSL{AR(!T!KPq*yQB6#&{Ny`-5%`dcU_-VwK%v? z-icb6ci{A;!5`G?;FG0bLpV|&!Yc4u`1upair0FRl9mD#AUXHAOsjH**BP5dER%m} z37-mH6}n5GI)A>_dvApI>mb3fCm0y7YWOC_-jq|d##Qq41^%x&P zBmeH5)!XX-JK+;Yx;T~|XN{Yanh#-!t9-Nx$*_I$lZxD<$zFx9WlN2{4v zt-EMA^D1k)w)27b>9^u%C<|{PEng14*FU70+jWn1j`W1fhXZCR?}FrervJzSuD%St zg8oS#Mq1Om=pn`>!;$ncDHQcN+S%n|dhXYz>0`eN^MSp=uf=CMx)~JsUEnk%4w#$0 znOY%nPD8!WbFo2FdunUa9UHMp7w&W2q{PXn&Q104dPVKPxZCu}>6!u(cpT4hZNhHt&wkA$!!`vapcdZ6@q#556l;HrH8r_Uq^kbu%Q3)=2Av!Yx z!CUYN^@4|NWJc#c(kM$SoamZyVu}^#^${;(W;*_gNRXq7<6}pZf8DL@4i!ysV>G_C zVFz7#=i){VT8Xul_iN;sUhDad9Mj8sA4ZOUmE8YAa-fVH?IWqvi=B5=Eisel8kgUW+HDrlT;KVP70vJ`j}NJi=8|KY4nMoz zek@HK?GgQbvew?DhcbhYvk_+TeKk?9r5~f#n4{TOYce0bp7jyD5W4C0sco0{Vttz0 z)@vPY!_9LG!vQ(c2F0P(aY$z>cm{ndyl&}ZR^Z{_;|iXvN44A2!4_`F!{+>FI!>4R zk(t)N1r9#d4D^*W=jY;Jw~8@4WzPOuvmWLS*gTn8F(dyz%!H2hG&8MJ&9txt@(grA zL-e_QTYH&K;A7r3UdAJzg)I!s!9O|kI#HQ4iY{<#b`%Pw{7qA7HNGg%{Wj&{ma`}X}H zhqy<-X>OBcwX3aER%Hq+uc>rO&%IRFJ&s5GJ`B>39zluLAJVl;8%x%vN-UFnj@PlC zkM=A0&sTZ7jW8eR`P2@}ngr`SNNihuexob&Wu(lZKFAyI$Jo!(RU)@iVyU$rZfh1$ z83-o(k?^M#`Z>+&N2}}f)`1Ex>8HtAFQb_&Qd?ar@pGlAm)16eK`P-3?DMO2tpwJ4 zH~;6mI=d}Tr}@pTQpuDr8>?1q`XrWGyi!>?MZL+K+Gf%c;;==3&5 zZgg*C0vcSCWMd72{(UOBVEP??M69>i`Zn*g>SF(wCGA{$_)fhv)mOd`{nqpV)=Iw% zcfZ$}NitjI+z&kPH0YS{17)Fo*osapc|~ubZL+3m-ZA~LUr{M1f^YR&o@*^qZMSV@gO9r$ z!D&q&{;%t}J-T96pmn{wJGx??C2LwOuy<^QdQCEIXLMytP>`8xf>bYaSMP1>>R(lk z*|fUiXptTB#qp+a!Mf(F|4@bcgZE!6Bf2i!*7$6`B5OMPgL>-Hh{9l> zjx62aG2W-}eiThNM=)>dY<8GeHfn09dj6Mye|Fq zVB=e247@J=M-7MJy?8yNF3$Tt1~dE2qo|F+jG+0gV!hb>?eg$&Pd&IKS3cvxN2}1g za$6RS3>f*E85)^E$ghWSXue$C81W~^-1ac;M9=Ooqm09m6-J3%?j%;oAmsAqhWOeb z*<>#-sadBi|UARy{DY$Csn-0K1)MV&W11RQy(uB zXX94)`O2S*Y>s+9$G&xT9IWSWX7{UfOREx&fxx}QRS*82+PlsSqF@NsAHIiu@`?j&+ur-os9CIPqSiPk&I~RhD)kKWu zLa-i7tXeJv({|;PBezF9$+0hl zi;z2xFSFmvjjz+!M)mEP@x||;pWN#Av}qyWNxxtvu7A&{a>Sx_5T~$7wCxOuR>gPG zBau_s=k)#M=lSAmQi?SLI3j25z+1@VJ@cYjdo3$E=V5JP2Iu{t|0q>$r^Vy!EoGx( z{u;}+TFBbEf8Tc;Y*ZveLSOmp5NuT3&K?!rTZ&^I4m+b!kz+0dlcPHs9B*{k8Cg|0 z=Hg&IcPCp@!vh3<;Fybp@#IdnrnWQUNshTV7*FnG^W@IRrou572b(?YWb@?ih$lJb z;jlXz+d1Y!u(4}5nHQH(u0NFN z1;8fs!kWdDr!1&hl|_u3yd86Trp!p0j*hjPrq)+XH#mxZlq5aj_l% zl>9!x+tvp-%f-cgQaI*H72b5-+0`?u!AGeBO0$44~xmF6)`}MquPq*F7>g z=JSjDWB_Frx)|>QZ5Fy1??RWlFFmm#^znH{Rl$|Up=~h{Uh?EjM=`eaL5OC#cNXtr zF&t?Yy7Bl9;>Qlf*~CAyHuYHlm?N^Xv8D4PU5BdZ43=>c4}o=g`LF8UBgNO)ZUEI- zt5hCR1CKyyk2m#w1w2-^xjr!uLigOh^QvW+c&o+HczT=`*~%=2nz$0vCC1u$Iz2q3%Y~@zl;((Yd%m|N$jd0Sve+3vz`8T1E;{N#CcyAJuPd5VMIKa?L`E=$nfEr6mN)n)O96f z^E}bSjKYt_ z#~csTo-U7?%1)nsB)zT+9)E3YeWw?(tF$N?0+9Pe2i_7F8#O=Exl2{fX~%cs7BYYE z-}kY)Ub!<2?-+&N2c{mx<`6gR&C@;eAY6t`^fD_FQ=(L{d177*2~DTk@n&P^$eor;jf`+$&Pd@ z>Vrt^ZgJvie7Muc`>c)1NZjg``9*^-)pW%A=&>XoXdV?!x;N<^--sqyqN-8UXflkh zf7Nx5^InAhM(J4Rs-;6eWPRTlZuo=I?!3p?bcQ!RGpYiCm5)luxrt5y~d_2&m zQLO(gW)YNfnlv33>!)WW_OTfBtAB*wq{Fn?PO3TtnjR&0v0b}<$8N-q?I)T$68DV7 zVm3>4HSgE>zi7#R80G-hW0@&3&qo8sikI#KWESpy%?;c}RkcHpmr=jswS-E1(Ez9K zQOEoh+bXjGa2m&Vx|JG@Jk11|)}o7@x$XVwmSW}sFEoQfgZG&^)Wp*FSQI~2?Z~!? zUizabLcXtevYax1$NEpiB(aXf^b)aX=d6ZeEAo3>-_{!+2^w=;&%;H=EvQ}ZwLZ%+ zjIM>!+20PEz_rFV>iI?YgI+VKp49L!xivifqj=}m27-0RI9fOS-Fd(kDD+WzmWWCecV7zB8hdHqG zK66*+x@MmAzK$ipGu~N-&&g%8^J0`p@WN_^dwH`y`zDjz+Y!~gOsr4)vL zd7E!nsqWCKQr*^~k?O$O*YPrco9l}O+t-h5EQtG*vELUKA{#xbb$2gN-Z2}WQ<-vH zDjD5a;XIY)#Gf#}@^NBC*Rd5cE>)j%88_ZaxmaW6G_M}Rds4}19n!MMjhS-UH^ENz zIP77wD_TJ*@@N{@?VMeSqlt~eSj{{W@kgT^=9igwzzv%4t(pTVGG_d2&l^lGV8h1pdoK$Ec*n}D%RevAYhk;fdNx7EXR}(4 z_oqqLCZBchsorr{E8CILc8X&ziye-YOj~(Et1?@UXDT0xX1?-zO?|@YdQJ5(+2K#t zb;)r|Q`)<{Vyy!Uu&vdndbepVNa*pJ&{Dvk>=#nkbjSbCqHkL+{;RERTOD$BZF@V) z#z{SQzM3`~wrkvsVrbZ0<9l-^P5V{QSemu$$Kp=DHIMRGdB(@@z95)>&+lf?-SJS`kiNevt9Unz_EmRVAwgHe zcQKWO>A6cO(+#X955vfx&Q^1E%qpGz-{kUb^?R~b%A0jLn~o%|87@}OWbRb-%%W$q zS7OmKP0kh_(OtoU>sQAs`y~C)CE{>+WrhhT#d7Iu@*TQ|KZe#T= z!84mJ`nI35XT%~by8UaGr;YY6XS&|{f%>J>2QO3et1|Cm(;?VB`<cVl^JuQYTrM(NXb0L{r67(!<_K@fLcc3{let+-OuRc{x$E^$(ua$c4A}D1~2-dE8kX+67GK8xiOAK9OR<#QGdl|4(-xPyI-+qPoTri(_K z>*dl)C)Xxd|C|i5yQXR0*wL?{u65$EzzP-760h{ZX5OSHp|u(;TZR9Z;N!qPgU_|r zmFls-4s(mAf|;iB1+_g4Px-pUGeIV@#k?Ng28sMdSY-l#R#X_(Shr-|0v`>Y^*K}e zmhZgC>IK-D1?p9RVA%ho&JDIjT>z6fEwE!vg3_D&=H}ft+G&8_P+i>I8jUXd$m1a*#nL>Dv#1*4@}C0!v7v>YEgH#0_9lk>3N64 zo3>N1M_uuht+QA?m`&0mT;ummfKFTv@4*2-gg2OJa-U~ub97|8?{tFRMjw1=YzNE- z0eA3y5j10qkZZO;&oVQLD*()hs=hX6``+!b63}d&|E95Lxy{CWYsdm?ws&%^kbS^b zCa~X=E$hCnx^3R#uJ}i!3Lo->b02RHVxgG{t;^GzEB9o%zpE$l#9@xKCun5IxveK} zYre$n>8_4<^m||Lu?_;vJ9?6`_rhJg{5j(Zo@71Xw#vAz=XslX*nK?%oO}B49_PGw zKfobl*FE8ISLKrrZcYElO9?iVhEEYNx6fcw2kx;hv8AV}7j>XUP=r+)>V02V?u7R! z;hxS|3xPUBh3^T9;c{31sGregS68VA z*Q_>pU)>GZKrd_0Rh35@PgZ{Sg8t538O6f*82Q!f;tMWGK@p&a$~wXoaaZFtr=A3vxq{e$y>Y#c>-w6JkD zoO4&tTT4M*sEQvK+(DVFi}AGe7+M`ZgM;tt4z)GzzN6>&!gF8^r_rBo>&#jay!lcG zdO1Ccc7tnxKrf`ffG1qTQ+qn6Jg~Ih08HoZ`vFflm@mDWx*12&)2KbUXTX6vP%AhQ zn5;QkuQz&9Q?NF!<{nq51MNU-0S#RIJ@@XT?^~~@uf=x9N6K1jZ9$xfA7ZmiL`K1N z&~aP!r4K{Zb=3rpfLWfCc5zAi!tYV2W+j83GM*q5h zowtqoYoD(l9`b_~ZPO!X^~3WO3hIDO9dBzq>|(uBD^{FOHF@J*6&?alueY`2tuU{t zdULLv^}VV+^P!$S9Yyq*hbOa$7;%q2PVdD}cSo}p;+MAdyQw46{XlnYrdypWK9UT5 zUuKIdTDe&fjo!DJSSnfEf3sh|V}|E^Gs)}AYy!^VWVXR}0jD#fnEyC3B*n&cT0fdk zQd|$Uya@Z66(5HADUfVd&AgfQ%!kr&ey{Iur4f7~ufcsqPk*W3hx+#)y53$IYdAyOhQcY*1Cu{H-Ms}D2!$D-Uzvv@ctAa7J4H@)q;eCllx$CWsP zsLq0kA!3$M9fhjTxSiTpYRUCtnM~4=tNKnC41a#6sbE)3-$>7$M{H#PT+IGW#FWpE zSgl~~hIJ8(tsn}8Cr(wr_Fh1rG=8e`zL)FAw9i^*A+z+uwZ@5QnN2ltNJeeuU{M_cTy*^$_s>3Y{_A}9Dd8FAnhtc-r?)JOJsr9RMzO?&oHFO-RM=zzysrAyD>0E#6 zSnb*_Q@m{2>QEnFrGe7d!ndsk&f&ArniAiL=GT;y3tPG64*O7LFORhZ;{qcC+3tq& zm0*PjsX-<<;sv%teQB*Wti8_Fqtm?|pL#2gV^nXq^TMo|BgXNACsLed;g9N?8Z@u_ zlR1s+<4Aox^f^dz~vv=H1JTo|)krT|s+@o*0wQE|ZT&TW#`uG}<6W zo8qML_yf@p-4UwR{W6-ofw92Zs`@j%@kW~CC$fd#khO`A#VsA`i37=9r1^oa;iK?I z&lo&17eCXtNutiOdVZO+SJ)iLR@cctVJqd+GgsJiS3PG!8;2TYJg;-g6HU_JILO{H zS%$5Ak+~FzUo0_hHh1?yYj7iS6f?_ zTneuBrQGk=Y;NguzmiMgr;$q`UuPYUrkSjr*6&;52UI0 z%~xx@m`n`K-kPQ{dIqyJ*V%0O5}K%JBe_ZYhhauMkIVEiR#wYO=SL|h-eC*P%q@`u6l9eEe{<$BPzYz1i2>T`~!n3f-VLi<_N4=3rCL@}CIR(!UugZ3u9E0OLUjn;-8x8yLWW=B|TTztu4(1pJ3J+I_2K$?^s2CVy4 z-XHgw<0g$x}S%68(xsV6+q1Q~ZgO=qaKsw)(Zv>(eIt#4oUDtjjUo^i z72&u(#|TIEJ1?FOS4kIh?b&q5S#`1K3Dhsrs_fDnb5M5MrJ1_YTYc6 zgwtEG?1qdy#T(t$9LsLVr?S7$i`Zkm?1s#`i0OOPJ=WWI-L#ptkJ#0HhRbfqKLj~W zrfp`(@Y$iV>8B%6dhU|lY=UJsWL9|Ver{E70q*00;n`)&Zpbmh&9rCFE8xCNwi@D` zRzDiGtJ)AFM%fLS{hCvaE7G~{hfpF3>s&SYzk7C|BTwF)x$K7gGVHHjA_>o9-;pls zm5bR8IrTJ{#j1UEbNjhfyIW?z`1Hkj7+}5f$iT&V8qC+aZR^%(pXcE%FiRQWSUe4i zb$eDGf{)y)Z@TgM>EBGsLnIH&obL!7OOj}8SHAd zKWMf-ne401=*Mm24+dWfLYH-p_EsE++y!QSE0(yvF7$10vy#6L303m%AWdR0E!R zc+WS}xaq3Xy?D2S9lsXm1op_)6parwKf_<8O7^g(p6J8NBD+D!!=PFb<#A8Mi`v zn~?rdz@Uh3a`g3Tm&8?K? z&abdPcyv6GNM4DC8+6j5xQc0iEl=F7$lZ|4c#5LdXKH*R3k^~T-t06E>`zCohJWx>u;4)>-O|!I@g~n zR`@d-}}fvbt~gvbt|4cEL%WfF{{( zOneyD3epVs(Q3akxTpQ*RKjOS~23C?Vv*LBYN7n zC-0u*Ueo9O+1;Kf)&3tWx_FeomcLH)H0LTK$P0}huLtjRhmnOb2OT``L%ZD{+*W+@ zZ)CN;my}lZzIOj~Mlt-Y4nx^bM?1`ZBFpX98rkrN->W@wUpVs)d(ytr*{PoWME~sR zeL*^^K*V$Rt+cI^QT>nf1bg+o)caO49>9jr#baNmM>FX}Up(IWZ>GtPbwz_5%0K-;-_60U-aXWnn~FK$gREKAab0&d^vt?G8@f+U_*LC| zB>WAZuGpKNX!~rZ*gp)|rz?b2%*W-%W3~5zD8UX|*Y#~1Bh6}k5jdWTq6Zx?9BO3X zbN)EiV?{jgHvX~9yWrS0wKJMNq&wj$OI&B5@?fyGxI`NF4Eq>x(1y zkPYb?FM7|-hryO&@48h5O?9Z*{(&?YVlEEDGsL-*$Ii-H*S}3^v*!7UE}1?T@o}5< XntJ$6>B6UHu+nGxTNHTPBk%toiae@n diff --git a/tests/test_gsm_encoding.py b/tests/test_gsm_encoding.py index ecb8fc5..0ba0b18 100644 --- a/tests/test_gsm_encoding.py +++ b/tests/test_gsm_encoding.py @@ -195,9 +195,8 @@ def test_encoding_supported_unicode_gsm(self): for key in list(MAP.keys()): # Use 'ignore' so that we see the code tested, not an exception s_gsm = key.encode('gsm0338', 'ignore') - print(s_gsm) if len(s_gsm) == 1: - i_gsm = s_gsm + i_gsm = ord(s_gsm) elif len(s_gsm) == 2: i_gsm = (s_gsm[0] << 8) + s_gsm[1] else: @@ -215,11 +214,10 @@ def test_encoding_supported_greek_unicode_gsm(self): for key in list(GREEK_MAP.keys()): # Use 'replace' so that we trigger the mapping s_gsm = key.encode('gsm0338', 'replace') - if len(s_gsm) != 1: s_gsm = BAD # so we see the comparison, not an exception - self.assertEqual(GREEK_MAP[key][1], s_gsm) + self.assertEqual(GREEK_MAP[key][1], ord(s_gsm)) def test_encoding_supported_quirk_unicode_gsm(self): # Note: Conversion is one way, hence no corresponding decode test @@ -227,22 +225,19 @@ def test_encoding_supported_quirk_unicode_gsm(self): for key in list(QUIRK_MAP.keys()): # Use 'replace' so that we trigger the mapping s_gsm = key.encode('gsm0338', 'replace') - if len(s_gsm) != 1: s_gsm = BAD # so we see the comparison, not an exception - self.assertEqual(QUIRK_MAP[key][1], s_gsm) + self.assertEqual(QUIRK_MAP[key][1], ord(s_gsm)) def test_decoding_supported_unicode_gsm(self): for key in list(MAP.keys()): i_gsm = MAP[key][1] if i_gsm <= 0xff: - s_gsm = chr(i_gsm) + s_unicode = bytes([i_gsm]).decode('gsm0338') elif i_gsm <= 0xffff: - s_gsm = chr((i_gsm & 0xff00) >> 8) - s_gsm += chr(i_gsm & 0x00ff) + s_unicode = bytes([((i_gsm & 0xff00) >> 8), i_gsm & 0x00ff]).decode('gsm0338') - s_unicode = s_gsm.encode('gsm0338') self.assertEqual(MAP[key][0], ord(s_unicode)) def test_is_valid_gsm_true(self): From b03371e21e54905c8f80875de690f2c0cfadcf7d Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Fri, 10 Apr 2020 13:27:57 -0500 Subject: [PATCH 27/28] fix things --- doc/modules/utils.rst | 4 +- messaging/mms/iterator.py | 3 +- messaging/mms/mms_pdu.py | 8 +- messaging/mms/wsp_pdu.py | 66 ++++---- messaging/sms/deliver.py | 19 +-- messaging/sms/gsm0338.py | 25 ++- messaging/sms/gsm0338old.py | 293 ------------------------------------ messaging/sms/submit.py | 45 +++--- messaging/utils.py | 103 ++++++++++--- tests/test_mms.py | 2 +- tests/test_sms.py | 30 +++- tests/test_udh.py | 6 +- 12 files changed, 205 insertions(+), 399 deletions(-) delete mode 100644 messaging/sms/gsm0338old.py diff --git a/doc/modules/utils.rst b/doc/modules/utils.rst index 21f89ab..db854a2 100644 --- a/doc/modules/utils.rst +++ b/doc/modules/utils.rst @@ -15,9 +15,7 @@ Functions .. autofunction:: bytes_to_str -.. autofunction:: to_array - -.. autofunction:: to_bytes +.. autofunction:: hex_to_int_array .. autofunction:: swap diff --git a/messaging/mms/iterator.py b/messaging/mms/iterator.py index b4a77eb..a1b6fc1 100644 --- a/messaging/mms/iterator.py +++ b/messaging/mms/iterator.py @@ -40,8 +40,7 @@ def __next__(self): self.reset_preview() if len(self._cached_values) > 0: return self._cached_values.pop(0) - else: - return next(self._it) + return next(self._it) def preview(self): """ diff --git a/messaging/mms/mms_pdu.py b/messaging/mms/mms_pdu.py index 62dd9d6..dd32749 100644 --- a/messaging/mms/mms_pdu.py +++ b/messaging/mms/mms_pdu.py @@ -16,7 +16,6 @@ import os import random -from messaging.utils import debug from messaging.mms import message, wsp_pdu from messaging.mms.iterator import PreviewIterator @@ -320,7 +319,7 @@ def decode_encoded_string_value(byte_iter): raise Exception('encoded_string_value decoding error - ' 'Could not decode Charset value: %s' % e) - return wsp_pdu.Decoder.decode_text_string(byte_iter) + return wsp_pdu.Decoder.decode_text_string(byte_iter, charset) except wsp_pdu.DecodeError: # Fall back on just "Text-string" return wsp_pdu.Decoder.decode_text_string(byte_iter) @@ -753,8 +752,7 @@ def encode_message_body(self): for page in self._mms_message._pages: num_entries += page.number_of_parts() - for data_part in self._mms_message._data_parts: - num_entries += 1 + num_entries += len(self._mms_message._data_parts) message_body.extend(self.encode_uint_var(num_entries)) @@ -848,7 +846,7 @@ def encode_header(header_field_name, header_value): raise wsp_pdu.EncodeError('Error encoding parameter ' 'value: %s' % e) except: - debug('A fatal error occurred, probably due to an ' + logging.error('A fatal error occurred, probably due to an ' 'unimplemented encoding operation') raise diff --git a/messaging/mms/wsp_pdu.py b/messaging/mms/wsp_pdu.py index 6b75440..3100efe 100644 --- a/messaging/mms/wsp_pdu.py +++ b/messaging/mms/wsp_pdu.py @@ -44,11 +44,11 @@ import array from datetime import datetime +import logging -from messaging.utils import debug from messaging.mms.iterator import PreviewIterator -wsp_pdu_types = { +WSP_PDU_TYPES = { 0x01: 'Connect', 0x02: 'ConnectReply', 0x03: 'Redirect', @@ -63,7 +63,7 @@ } # Well-known parameter assignments ([5], table 38) -well_known_parameters = { +WELL_KNOWN_PARAMETERS = { 0x00: ('Q', 'q_value'), 0x01: ('Charset', 'well_known_charset'), 0x02: ('Level', 'version_value'), @@ -97,7 +97,7 @@ # Content type assignments ([5], table 40) -well_known_content_types = [ +WELL_KNOWN_CONTENT_TYPES = [ '*/*', 'text/*', 'text/html', 'text/plain', 'text/x-hdml', 'text/x-ttml', 'text/x-vCalendar', 'text/x-vCard', 'text/vnd.wap.wml', @@ -164,7 +164,7 @@ # Note that the assigned number is the same as the IANA MIBEnum value # "gsm-default-alphabet" is not included, as it is not assigned any # value in [5]. Also note, this is by no means a complete list -well_known_charsets = { +WELL_KNOWN_CHARSETS = { 0x07EA: 'big5', 0x03E8: 'iso-10646-ucs-2', 0x04: 'iso-8859-1', @@ -182,7 +182,7 @@ } # Header Field Name assignments ([5], table 39) -header_field_names = [ +HEADER_FIELD_NAMES = [ 'Accept', 'Accept-Charset', 'Accept-Encoding', 'Accept-Language', 'Accept-Ranges', 'Age', 'Allow', 'Authorization', 'Cache-Control', @@ -214,11 +214,11 @@ ] -# TODO: combine this dict with the header_field_names table (same as well +# TODO: combine this dict with the HEADER_FIELD_NAMES table (same as well # known parameter assignments) # Temporary fix to allow different types of header field values to be # dynamically decoded -header_field_encodings = {'Accept': 'accept_value', 'Pragma': 'pragma_value'} +HEADER_FIELD_ENCODINGS = {'Accept': 'accept_value', 'Pragma': 'pragma_value'} def get_header_field_names(version='1.2'): @@ -243,7 +243,7 @@ def get_header_field_names(version='1.2'): version = int(version.split('.')[1]) - versioned_field_names = header_field_names[:] + versioned_field_names = HEADER_FIELD_NAMES[:] if version == 3: versioned_field_names = versioned_field_names[:0x44] elif version == 2: @@ -281,7 +281,7 @@ def get_well_known_parameters(version='1.2'): else: version = int(version.split('.')[1]) - versioned_params = well_known_parameters.copy() + versioned_params = WELL_KNOWN_PARAMETERS.copy() if version <= 3: for assigned_number in range(0x11, 0x1e): del versioned_params[assigned_number] @@ -454,7 +454,7 @@ def decode_long_integer(byte_iter): return longInt @staticmethod - def decode_text_string(byte_iter): + def decode_text_string(byte_iter, encoding = 'utf-8'): """ Decodes the null-terminated, binary-encoded string value starting at the byte pointed to by ``byte_iter``. @@ -473,17 +473,22 @@ def decode_text_string(byte_iter): :return: The decoded text string :rtype: str """ - decoded_string = '' + b_decoded_string = b'' byte = next(byte_iter) # Remove Quote character (octet 127), if present if byte == 127: byte = next(byte_iter) while byte != 0x00: - decoded_string += chr(byte) + b_decoded_string += bytes([byte]) byte = next(byte_iter) - return decoded_string + try: + # Lets try to decode it to the given encoding + # if that fails we probably have characters that need to be escaped + return b_decoded_string.decode(encoding) + except UnicodeError: + return b_decoded_string.decode("unicode_escape") @staticmethod def decode_quoted_string(byte_iter): @@ -759,7 +764,7 @@ def decode_well_known_media(byte_iter): 'integer value representing it') try: - return well_known_content_types[value] + return WELL_KNOWN_CONTENT_TYPES[value] except IndexError: raise DecodeError('Invalid well-known media: could not ' 'find content type in table of assigned values') @@ -809,7 +814,7 @@ def decode_constrained_media(byte_iter): if isinstance(media_value, int): try: - return well_known_content_types[media_value] + return WELL_KNOWN_CONTENT_TYPES[media_value] except IndexError: raise DecodeError('Invalid constrained media: could not ' 'find well-known content type') @@ -901,7 +906,7 @@ def decode_typed_parameter(byte_iter): except DecodeError as e: raise DecodeError('Could not decode Typed-parameter: %s' % e) except: - debug('A fatal error occurred, probably due to an ' + logging.error('A fatal error occurred, probably due to an ' 'unimplemented decoding operation') raise @@ -1290,8 +1295,8 @@ def decode_well_known_charset(byte_iter): decoded_charset = '*' else: charset_value = Decoder.decode_integer_value(byte_iter) - if charset_value in well_known_charsets: - decoded_charset = well_known_charsets[charset_value] + if charset_value in WELL_KNOWN_CHARSETS: + decoded_charset = WELL_KNOWN_CHARSETS[charset_value] else: # This charset is not in our table... so just use the # value (at least for now) @@ -1328,15 +1333,15 @@ def decode_well_known_header(byte_iter): # decode_application_header also # Currently we decode most headers as text_strings, except # where we have a specific decoding algorithm implemented - if field_name in header_field_encodings: - wap_value_type = header_field_encodings[field_name] + if field_name in HEADER_FIELD_ENCODINGS: + wap_value_type = HEADER_FIELD_ENCODINGS[field_name] try: decoded_value = getattr(Decoder, 'decode_%s' % wap_value_type)(byte_iter) except DecodeError as e: raise DecodeError('Could not decode Wap-value: %s' % e) except: - debug('An error occurred, probably due to an ' + logging.error('An error occurred, probably due to an ' 'unimplemented decoding operation. Tried to ' 'decode header: %s' % field_name) raise @@ -1372,6 +1377,7 @@ def decode_application_header(byte_iter): app_header = Decoder.decode_text_string(byte_iter) app_specific_value = Decoder.decode_text_string(byte_iter) + return app_header, app_specific_value @staticmethod @@ -1606,10 +1612,10 @@ def encode_media_type(content_type): values :rtype: list """ - if content_type in well_known_content_types: + if content_type in WELL_KNOWN_CONTENT_TYPES: # Short-integer encoding val = Encoder.encode_short_integer( - well_known_content_types.index(content_type)) + WELL_KNOWN_CONTENT_TYPES.index(content_type)) else: val = Encoder.encode_text_string(content_type) @@ -1667,7 +1673,7 @@ def encode_parameter(parameter_name, parameter_value, version='1.2'): except EncodeError as e: raise EncodeError('Error encoding param value: %s' % e) except: - debug('A fatal error occurred, probably due to an ' + logging.error('A fatal error occurred, probably due to an ' 'unimplemented encoding operation') raise break @@ -1794,15 +1800,15 @@ def encode_header(field_name, value): # TODO: make this flow better (see also Decoder.decode_header) # most header values are encoded as text_strings, except where we # have a specific Wap-value encoding implementation - if field_name in header_field_encodings: - wap_value_type = header_field_encodings[field_name] + if field_name in HEADER_FIELD_ENCODINGS: + wap_value_type = HEADER_FIELD_ENCODINGS[field_name] try: ret = getattr(Encoder, 'encode_%s' % wap_value_type)(value) encoded_header.extend(ret) except EncodeError as e: raise EncodeError('Error encoding Wap-value: %s' % e) except: - debug('A fatal error occurred, probably due to an ' + logging.error('A fatal error occurred, probably due to an ' 'unimplemented encoding operation') raise else: @@ -1858,8 +1864,8 @@ def encode_constrained_media(media_type): :rtype: list """ # See if this value is in the table of well-known content types - if media_type in well_known_content_types: - value = well_known_content_types.index(media_type) + if media_type in WELL_KNOWN_CONTENT_TYPES: + value = WELL_KNOWN_CONTENT_TYPES.index(media_type) else: value = media_type diff --git a/messaging/sms/deliver.py b/messaging/sms/deliver.py index 7696c5d..b59529a 100644 --- a/messaging/sms/deliver.py +++ b/messaging/sms/deliver.py @@ -2,9 +2,10 @@ """Classes for processing received SMS""" from datetime import datetime, timedelta +import logging -from messaging.utils import (swap, swap_number, encode_bytes, debug, - unpack_msg, unpack_msg2, to_array) +from messaging.utils import (swap, swap_number, encode_bytes, + unpack_msg, hex_to_int_array) from messaging.sms import consts from messaging.sms.base import SmsBase from messaging.sms.udh import UserDataHeader @@ -65,7 +66,7 @@ def _set_pdu(self, pdu): # XXX: Should we keep the original PDU or the modified one? self._pdu = pdu - data = to_array(self._pdu) + data = hex_to_int_array(self._pdu) # Service centre address smscl = data.pop(0) @@ -106,7 +107,7 @@ def _set_pdu(self, pdu): sndtype = (data.pop(0) >> 4) & 0x07 if sndtype == consts.ALPHANUMERIC: # coded according to 3GPP TS 23.038 [9] GSM 7-bit default alphabet - sender = unpack_msg2(data[:sndlen]).decode("gsm0338") + sender = unpack_msg(data[:sndlen]).decode("gsm0338") else: # Extract phone number of sender sender = swap_number(encode_bytes(data[:sndlen])) @@ -172,11 +173,7 @@ def _process_message(self, data): headlen = int(headlen) if self.fmt == 0x00: - # XXX: Use unpack_msg2 - - data = data[ud_len:].tolist() - - self.text = unpack_msg2(data).decode("gsm0338") + self.text = unpack_msg(msg)[headlen:msgl].decode("gsm0338") elif self.fmt == 0x04: self.text = data[ud_len:].tobytes() @@ -210,7 +207,7 @@ def _decode_status_report_pdu(self, data): self.date = datetime.strptime(scts_str, "%y/%m/%d %H:%M:%S") except (ValueError, TypeError): scts_str = '' - debug('Could not decode scts: %s' % date) + logging.debug('Could not decode scts: %s' % date) data = data[7:] @@ -221,7 +218,7 @@ def _decode_status_report_pdu(self, data): except (ValueError, TypeError): dt_str = '' dt = None - debug('Could not decode date: %s' % date) + logging.debug('Could not decode date: %s' % date) data = data[7:] diff --git a/messaging/sms/gsm0338.py b/messaging/sms/gsm0338.py index 046a72b..dc1243f 100644 --- a/messaging/sms/gsm0338.py +++ b/messaging/sms/gsm0338.py @@ -1,3 +1,17 @@ +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + # Refactored using cleaner code from # https://github.com/jezeniel/smsutil/blob/master/smsutil/codecs.py import codecs @@ -180,7 +194,7 @@ QUESTION_MARK = ord('\u003F') ESCAPE = ord('\x1B') -SPACE = ord('\u00A0') +NBSP = ord('\u00A0') decoding_map = dict((ord(k), ord(v)) if len(k) == 1 else (bytes([ord(k[0]), ord(k[1])]), ord(v)) for k, v in GSM_CHARSET.items()) @@ -205,7 +219,6 @@ def encode_gsm0338(text, errors, encoding_map, ext_encoding_map, replace_encode_ raise UnicodeError("Invalid GSM character") elif errors == 'replace': ec = replace_encode_map.get(ochar, QUESTION_MARK) - print("replacing char %s with %s" % (char, ec)) elif errors == 'ignore': pass else: @@ -226,11 +239,11 @@ def decode_gsm0338(text, decoding_map): d = decoding_map.get(char) elif char == ESCAPE and next_char < len(text): ext_char = bytes([ESCAPE, text[next_char]]) - d = decoding_map.get(ext_char, SPACE) - if d != SPACE: - skip = next_char + d = decoding_map.get(ext_char, NBSP) + if d != NBSP: + skip = next_char else: - d = SPACE + d = NBSP decoded += chr(d) return decoded, len(decoded) diff --git a/messaging/sms/gsm0338old.py b/messaging/sms/gsm0338old.py deleted file mode 100644 index b57668b..0000000 --- a/messaging/sms/gsm0338old.py +++ /dev/null @@ -1,293 +0,0 @@ -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -import codecs -import sys -import traceback - -# data from -# http://snoops.roy202.org/testerman/browser/trunk/plugins/codecs/gsm0338.py - -# default GSM 03.38 -> unicode -def_regular_decode_dict = { - '\x00': '\u0040', # COMMERCIAL AT - '\x01': '\u00A3', # POUND SIGN - '\x02': '\u0024', # DOLLAR SIGN - '\x03': '\u00A5', # YEN SIGN - '\x04': '\u00E8', # LATIN SMALL LETTER E WITH GRAVE - '\x05': '\u00E9', # LATIN SMALL LETTER E WITH ACUTE - '\x06': '\u00F9', # LATIN SMALL LETTER U WITH GRAVE - '\x07': '\u00EC', # LATIN SMALL LETTER I WITH GRAVE - '\x08': '\u00F2', # LATIN SMALL LETTER O WITH GRAVE - '\x09': '\u00C7', # LATIN CAPITAL LETTER C WITH CEDILLA - # The Unicode page suggests this is a mistake: but - # it's still in the latest version of the spec and - # our implementation has to be exact. - - '\x0A': '\u000A', # LINE FEED - '\x0B': '\u00D8', # LATIN CAPITAL LETTER O WITH STROKE - '\x0C': '\u00F8', # LATIN SMALL LETTER O WITH STROKE - '\x0D': '\u000D', # CARRIAGE RETURN - '\x0E': '\u00C5', # LATIN CAPITAL LETTER A WITH RING ABOVE - '\x0F': '\u00E5', # LATIN SMALL LETTER A WITH RING ABOVE - '\x10': '\u0394', # GREEK CAPITAL LETTER DELTA - '\x11': '\u005F', # LOW LINE - '\x12': '\u03A6', # GREEK CAPITAL LETTER PHI - '\x13': '\u0393', # GREEK CAPITAL LETTER GAMMA - '\x14': '\u039B', # GREEK CAPITAL LETTER LAMDA - '\x15': '\u03A9', # GREEK CAPITAL LETTER OMEGA - '\x16': '\u03A0', # GREEK CAPITAL LETTER PI - '\x17': '\u03A8', # GREEK CAPITAL LETTER PSI - '\x18': '\u03A3', # GREEK CAPITAL LETTER SIGMA - '\x19': '\u0398', # GREEK CAPITAL LETTER THETA - '\x1A': '\u039E', # GREEK CAPITAL LETTER XI - '\x1C': '\u00C6', # LATIN CAPITAL LETTER AE - '\x1D': '\u00E6', # LATIN SMALL LETTER AE - '\x1E': '\u00DF', # LATIN SMALL LETTER SHARP S (German) - '\x1F': '\u00C9', # LATIN CAPITAL LETTER E WITH ACUTE - '\x20': '\u0020', # SPACE - '\x21': '\u0021', # EXCLAMATION MARK - '\x22': '\u0022', # QUOTATION MARK - '\x23': '\u0023', # NUMBER SIGN - '\x24': '\u00A4', # CURRENCY SIGN - '\x25': '\u0025', # PERCENT SIGN - '\x26': '\u0026', # AMPERSAND - '\x27': '\u0027', # APOSTROPHE - '\x28': '\u0028', # LEFT PARENTHESIS - '\x29': '\u0029', # RIGHT PARENTHESIS - '\x2A': '\u002A', # ASTERISK - '\x2B': '\u002B', # PLUS SIGN - '\x2C': '\u002C', # COMMA - '\x2D': '\u002D', # HYPHEN-MINUS - '\x2E': '\u002E', # FULL STOP - '\x2F': '\u002F', # SOLIDUS - '\x30': '\u0030', # DIGIT ZERO - '\x31': '\u0031', # DIGIT ONE - '\x32': '\u0032', # DIGIT TWO - '\x33': '\u0033', # DIGIT THREE - '\x34': '\u0034', # DIGIT FOUR - '\x35': '\u0035', # DIGIT FIVE - '\x36': '\u0036', # DIGIT SIX - '\x37': '\u0037', # DIGIT SEVEN - '\x38': '\u0038', # DIGIT EIGHT - '\x39': '\u0039', # DIGIT NINE - '\x3A': '\u003A', # COLON - '\x3B': '\u003B', # SEMICOLON - '\x3C': '\u003C', # LESS-THAN SIGN - '\x3D': '\u003D', # EQUALS SIGN - '\x3E': '\u003E', # GREATER-THAN SIGN - '\x3F': '\u003F', # QUESTION MARK - '\x40': '\u00A1', # INVERTED EXCLAMATION MARK - '\x41': '\u0041', # LATIN CAPITAL LETTER A - '\x42': '\u0042', # LATIN CAPITAL LETTER B - '\x43': '\u0043', # LATIN CAPITAL LETTER C - '\x44': '\u0044', # LATIN CAPITAL LETTER D - '\x45': '\u0045', # LATIN CAPITAL LETTER E - '\x46': '\u0046', # LATIN CAPITAL LETTER F - '\x47': '\u0047', # LATIN CAPITAL LETTER G - '\x48': '\u0048', # LATIN CAPITAL LETTER H - '\x49': '\u0049', # LATIN CAPITAL LETTER I - '\x4A': '\u004A', # LATIN CAPITAL LETTER J - '\x4B': '\u004B', # LATIN CAPITAL LETTER K - '\x4C': '\u004C', # LATIN CAPITAL LETTER L - '\x4D': '\u004D', # LATIN CAPITAL LETTER M - '\x4E': '\u004E', # LATIN CAPITAL LETTER N - '\x4F': '\u004F', # LATIN CAPITAL LETTER O - '\x50': '\u0050', # LATIN CAPITAL LETTER P - '\x51': '\u0051', # LATIN CAPITAL LETTER Q - '\x52': '\u0052', # LATIN CAPITAL LETTER R - '\x53': '\u0053', # LATIN CAPITAL LETTER S - '\x54': '\u0054', # LATIN CAPITAL LETTER T - '\x55': '\u0055', # LATIN CAPITAL LETTER U - '\x56': '\u0056', # LATIN CAPITAL LETTER V - '\x57': '\u0057', # LATIN CAPITAL LETTER W - '\x58': '\u0058', # LATIN CAPITAL LETTER X - '\x59': '\u0059', # LATIN CAPITAL LETTER Y - '\x5A': '\u005A', # LATIN CAPITAL LETTER Z - '\x5B': '\u00C4', # LATIN CAPITAL LETTER A WITH DIAERESIS - '\x5C': '\u00D6', # LATIN CAPITAL LETTER O WITH DIAERESIS - '\x5D': '\u00D1', # LATIN CAPITAL LETTER N WITH TILDE - '\x5E': '\u00DC', # LATIN CAPITAL LETTER U WITH DIAERESIS - '\x5F': '\u00A7', # SECTION SIGN - '\x60': '\u00BF', # INVERTED QUESTION MARK - '\x61': '\u0061', # LATIN SMALL LETTER A - '\x62': '\u0062', # LATIN SMALL LETTER B - '\x63': '\u0063', # LATIN SMALL LETTER C - '\x64': '\u0064', # LATIN SMALL LETTER D - '\x65': '\u0065', # LATIN SMALL LETTER E - '\x66': '\u0066', # LATIN SMALL LETTER F - '\x67': '\u0067', # LATIN SMALL LETTER G - '\x68': '\u0068', # LATIN SMALL LETTER H - '\x69': '\u0069', # LATIN SMALL LETTER I - '\x6A': '\u006A', # LATIN SMALL LETTER J - '\x6B': '\u006B', # LATIN SMALL LETTER K - '\x6C': '\u006C', # LATIN SMALL LETTER L - '\x6D': '\u006D', # LATIN SMALL LETTER M - '\x6E': '\u006E', # LATIN SMALL LETTER N - '\x6F': '\u006F', # LATIN SMALL LETTER O - '\x70': '\u0070', # LATIN SMALL LETTER P - '\x71': '\u0071', # LATIN SMALL LETTER Q - '\x72': '\u0072', # LATIN SMALL LETTER R - '\x73': '\u0073', # LATIN SMALL LETTER S - '\x74': '\u0074', # LATIN SMALL LETTER T - '\x75': '\u0075', # LATIN SMALL LETTER U - '\x76': '\u0076', # LATIN SMALL LETTER V - '\x77': '\u0077', # LATIN SMALL LETTER W - '\x78': '\u0078', # LATIN SMALL LETTER X - '\x79': '\u0079', # LATIN SMALL LETTER Y - '\x7A': '\u007A', # LATIN SMALL LETTER Z - '\x7B': '\u00E4', # LATIN SMALL LETTER A WITH DIAERESIS - '\x7C': '\u00F6', # LATIN SMALL LETTER O WITH DIAERESIS - '\x7D': '\u00F1', # LATIN SMALL LETTER N WITH TILDE - '\x7E': '\u00FC', # LATIN SMALL LETTER U WITH DIAERESIS - '\x7F': '\u00E0', # LATIN SMALL LETTER A WITH GRAVE -} - -# default GSM 03.38 escaped characters -> unicode -def_escape_decode_dict = { - '\x0A': '\u000C', # FORM FEED - '\x14': '\u005E', # CIRCUMFLEX ACCENT - '\x28': '\u007B', # LEFT CURLY BRACKET - '\x29': '\u007D', # RIGHT CURLY BRACKET - '\x2F': '\u005C', # REVERSE SOLIDUS - '\x3C': '\u005B', # LEFT SQUARE BRACKET - '\x3D': '\u007E', # TILDE - '\x3E': '\u005D', # RIGHT SQUARE BRACKET - '\x40': '\u007C', # VERTICAL LINE - '\x65': '\u20AC', # EURO SIGN -} - -# Replacement characters, default is question mark. Used when it is not too -# important to ensure exact UTF-8 -> GSM -> UTF-8 equivilence, such as when -# humans read and write SMS. But for USSD and other M2M applications it's -# important to ensure the conversion is exact. -def_replace_encode_dict = { - '\u00E7': '\x09', # LATIN SMALL LETTER C WITH CEDILLA - - '\u0391': '\x41', # GREEK CAPITAL LETTER ALPHA - '\u0392': '\x42', # GREEK CAPITAL LETTER BETA - '\u0395': '\x45', # GREEK CAPITAL LETTER EPSILON - '\u0397': '\x48', # GREEK CAPITAL LETTER ETA - '\u0399': '\x49', # GREEK CAPITAL LETTER IOTA - '\u039A': '\x4B', # GREEK CAPITAL LETTER KAPPA - '\u039C': '\x4D', # GREEK CAPITAL LETTER MU - '\u039D': '\x4E', # GREEK CAPITAL LETTER NU - '\u039F': '\x4F', # GREEK CAPITAL LETTER OMICRON - '\u03A1': '\x50', # GREEK CAPITAL LETTER RHO - '\u03A4': '\x54', # GREEK CAPITAL LETTER TAU - '\u03A7': '\x58', # GREEK CAPITAL LETTER CHI - '\u03A5': '\x59', # GREEK CAPITAL LETTER UPSILON - '\u0396': '\x5A', # GREEK CAPITAL LETTER ZETA -} - -QUESTION_MARK = chr(0x3f) - -# unicode -> default GSM 03.38 -def_regular_encode_dict = \ - dict((u, g) for g, u in def_regular_decode_dict.items()) - -# unicode -> default escaped GSM 03.38 characters -def_escape_encode_dict = \ - dict((u, g) for g, u in def_escape_decode_dict.items()) - - -def encode(input_, errors='strict'): - """ - :type input_: unicode - - :return: string - """ - result = [] - for c in input_: - try: - result.append(def_regular_encode_dict[c]) - except KeyError: - if c in def_escape_encode_dict: - # OK, let's encode it as an escaped characters - result.append('\x1b') - result.append(def_escape_encode_dict[c]) - else: - print(repr(c)) - if errors == 'strict': - raise UnicodeError("Invalid GSM character") - elif errors == 'replace': - result.append( - def_replace_encode_dict.get(c, QUESTION_MARK)) - elif errors == 'ignore': - pass - else: - raise UnicodeError("Unknown error handling") - - ret = ''.join(result) - return ret, len(ret) - - -def decode(input_, errors='strict'): - """ - :type input_: str - - :return: unicode - """ - result = [] - index = 0 - while index < len(input_): - c = input_[index] - index += 1 - if c == '\x1b': - if index < len(input_): - c = input_[index] - index += 1 - result.append(def_escape_decode_dict.get(c, '\xa0')) - else: - result.append('\xa0') - else: - try: - result.append(def_regular_decode_dict[c]) - except KeyError: - # error handling: unassigned byte, must be > 0x7f - if errors == 'strict': - raise UnicodeError("Unrecognized GSM character %s at index %i of input %s" % (hex(c), index, input_)) - elif errors == 'replace': - result.append('?') - elif errors == 'ignore': - pass - else: - raise UnicodeError("Unknown error handling") - - ret = ''.join(result) - return ret, len(ret) - - -# encodings module API -def getregentry(encoding): - if encoding in ('gsm0338o'): - return codecs.CodecInfo(name='gsm0338o', - encode=encode, - decode=decode) - -# Codec registration -codecs.register(getregentry) - - -def is_gsm_text(text): - """Returns True if ``text`` can be encoded as gsm text""" - try: - codecs.encode(text, 'gsm0338') - except UnicodeError: - return False - except: - traceback.print_exc(file=sys.stdout) - return False - - return True diff --git a/messaging/sms/submit.py b/messaging/sms/submit.py index c909063..276ed4f 100644 --- a/messaging/sms/submit.py +++ b/messaging/sms/submit.py @@ -3,9 +3,10 @@ from datetime import datetime, timedelta import re +import logging from messaging.sms import consts -from messaging.utils import (debug, encode_str, clean_number, +from messaging.utils import (encode_str, clean_number, pack_8bits_to_ucs2, pack_8bits_to_7bits, pack_8bits_to_8bit, timedelta_to_relative_validity, @@ -14,7 +15,7 @@ from messaging.sms.gsm0338 import is_valid_gsm from messaging.sms.pdu import Pdu -VALID_NUMBER = re.compile("^\+?\d{3,20}$") +VALID_NUMBER = re.compile(r"^\+?\d{3,20}$") class SmsSubmit(SmsBase): @@ -90,16 +91,16 @@ def to_pdu(self): pdu += sms_phone_pdu pdu += tppid_pdu pdu += sms_msg_pdu[0] - debug("smsc_pdu: %s" % smsc_pdu) - debug("sms_submit_pdu: %s" % sms_submit_pdu) - debug("tpmessref_pdu: %s" % tpmessref_pdu) - debug("sms_phone_pdu: %s" % sms_phone_pdu) - debug("tppid_pdu: %s" % tppid_pdu) - debug("sms_msg_pdu: %s" % sms_msg_pdu) - debug("-" * 20) - debug("full_pdu: %s" % pdu) - debug("full_text: %s" % self.text) - debug("-" * 20) + logging.debug("smsc_pdu: %s" % smsc_pdu) + logging.debug("sms_submit_pdu: %s" % sms_submit_pdu) + logging.debug("tpmessref_pdu: %s" % tpmessref_pdu) + logging.debug("sms_phone_pdu: %s" % sms_phone_pdu) + logging.debug("tppid_pdu: %s" % tppid_pdu) + logging.debug("sms_msg_pdu: %s" % sms_msg_pdu) + logging.debug("-" * 20) + logging.debug("full_pdu: %s" % pdu) + logging.debug("full_text: %s" % self.text) + logging.debug("-" * 20) return [Pdu(pdu, len_smsc)] # multipart SMS @@ -114,16 +115,16 @@ def to_pdu(self): pdu += sms_phone_pdu pdu += tppid_pdu pdu += sms_msg_pdu_item - debug("smsc_pdu: %s" % smsc_pdu) - debug("sms_submit_pdu: %s" % sms_submit_pdu) - debug("tpmessref_pdu: %s" % tpmessref_pdu) - debug("sms_phone_pdu: %s" % sms_phone_pdu) - debug("tppid_pdu: %s" % tppid_pdu) - debug("sms_msg_pdu: %s" % sms_msg_pdu_item) - debug("-" * 20) - debug("full_pdu: %s" % pdu) - debug("full_text: %s" % self.text) - debug("-" * 20) + logging.debug("smsc_pdu: %s" % smsc_pdu) + logging.debug("sms_submit_pdu: %s" % sms_submit_pdu) + logging.debug("tpmessref_pdu: %s" % tpmessref_pdu) + logging.debug("sms_phone_pdu: %s" % sms_phone_pdu) + logging.debug("tppid_pdu: %s" % tppid_pdu) + logging.debug("sms_msg_pdu: %s" % sms_msg_pdu_item) + logging.debug("-" * 20) + logging.debug("full_pdu: %s" % pdu) + logging.debug("full_text: %s" % self.text) + logging.debug("-" * 20) pdu_list.append(Pdu(pdu, len_smsc, cnt=cnt, seq=i + 1)) diff --git a/messaging/utils.py b/messaging/utils.py index 6ce7519..0bf1d40 100644 --- a/messaging/utils.py +++ b/messaging/utils.py @@ -1,8 +1,10 @@ from array import array from datetime import timedelta, tzinfo from math import floor -import sys +import re +import binascii +HEX_STR = re.compile(r"^[0-9A-Fa-f]+$") class FixedOffset(tzinfo): """Fixed offset in minutes east from UTC.""" @@ -46,24 +48,14 @@ def dst(self, dt): def bytes_to_str(b): if isinstance(b, bytes): - return b.decode('latin1') + return b.decode() return b -def to_array(pdu): +def hex_to_int_array(pdu): return array('B', [int(pdu[i:i + 2], 16) for i in range(0, len(pdu), 2)]) -def to_bytes(s): - return bytes(s) - - -def debug(s): - # set this to True if you want to poke at PDU encoding/decoding - if False: - print(s) - - def swap(s): """Swaps ``s`` according to GSM 23.040""" what = s[:] @@ -83,18 +75,33 @@ def clean_number(n): def encode_str(s): - """Returns the hexadecimal representation of ``s``""" + """ + Convert a string to hexidecimal values + + :param s: string + :type s: str + :return: hexidecimal representation of given string + :rtype: str + """ + # return binascii.hexlify(s.encode()).decode() return ''.join(["%02x" % ord(n) for n in s]) def encode_bytes(b): - return ''.join(["%02x" % n for n in b]) + """ + Convert to hexidecimal representation + + :param b: byte array + :type b: bytes + :return: Byte string converted to hex and returned as a string + :rtype: str + """ + return binascii.hexlify(b).decode() def pack_8bits_to_7bits(message, udh=None): pdu = "" txt = bytes_to_str(message) - if udh is None: tl = len(txt) txt += '\x00' @@ -169,15 +176,67 @@ def pack_8bits_to_ucs2(message, udh=None): message = chr(mlen) + nmesg return encode_str(message) - def unpack_msg(pdu): + if isinstance(pdu, (array, list)): + return unpack_list_msg(pdu) + + if isinstance(pdu, bytes): + return unpack_hex_bytes_msg(pdu) + + if isinstance(pdu, str) and HEX_STR.match(pdu): + return unpack_hex_str_msg(pdu) + + raise TypeError('Unhandled Type %s' % type(pdu)) + +def unpack_hex_str_msg(pdu): """Unpacks ``pdu`` into septets and returns the decoded string""" # Taken/modified from Dave Berkeley's pysms package count = last = 0 result = [] - for i in range(0, len(pdu), 2): - byte = int(pdu[i:i + 2], 16) + prev_char = '' + count = last = 0 + result = [] + + for index, char in enumerate(pdu): + if index % 2 == 1: + byte = int(prev_char + char, 16) + else: + prev_char = char + continue + mask = 0x7F >> count + out = ((byte & mask) << count) + last + last = byte >> (7 - count) + result.append(out) + + if len(result) >= 0xa0: + break + + if count == 6: + result.append(last) + last = 0 + + count = (count + 1) % 7 + + return bytes(result) + + +def unpack_hex_bytes_msg(pdu): + """Unpacks ``pdu`` into septets and returns the decoded string""" + # Taken/modified from Dave Berkeley's pysms package + count = last = 0 + result = [] + + prev_byte = b'' + count = last = 0 + result = [] + + for index, byte in enumerate(pdu): + if index % 2 == 1: + byte = int(bytes([prev_byte, byte]), 16) + else: + prev_byte = byte + continue mask = 0x7F >> count out = ((byte & mask) << count) + last last = byte >> (7 - count) @@ -192,10 +251,10 @@ def unpack_msg(pdu): count = (count + 1) % 7 - return to_bytes(result) + return bytes(result) -def unpack_msg2(pdu): +def unpack_list_msg(pdu): """Unpacks ``pdu`` into septets and returns the decoded string""" # Taken/modified from Dave Berkeley's pysms package count = last = 0 @@ -216,7 +275,7 @@ def unpack_msg2(pdu): count = (count + 1) % 7 - return to_bytes(result) + return bytes(result) def timedelta_to_relative_validity(t): diff --git a/tests/test_mms.py b/tests/test_mms.py index c93770b..b1836e1 100644 --- a/tests/test_mms.py +++ b/tests/test_mms.py @@ -322,7 +322,7 @@ def test_decoding_27d0a048cd79555de05283a22372b0eb_mms(self): 'Date': datetime.datetime(2004, 5, 23, 14, 14, 58), 'Content-Type': ('application/vnd.wap.multipart.related', {'Start': '', 'Type': 'application/smil'}), 'Subject': 'Angående art-tillhörighet', - #'Subject': 'Ang\xc3\xa5ende art-tillh\xc3\xb6righet', + # 'Subject': 'Ang\xc3\xa5ende art-tillh\xc3\xb6righet', } smil_data = b'\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n' text_data = b'Jonatan \xc3\xa4r en gnu.' diff --git a/tests/test_sms.py b/tests/test_sms.py index d6fe65a..93ee60f 100644 --- a/tests/test_sms.py +++ b/tests/test_sms.py @@ -230,6 +230,29 @@ def test_encoding_multipart_7bit(self): self.assertEqual(pdu.seq, i + 1) self.assertEqual(pdu.cnt, cnt) + def test_encoding_multipart_7bit_egsm(self): + # text encoded with umts-tools + self.maxDiff = None + text = '€' * 229 + 'x' + number = binascii.unhexlify(b'363535333435363738').decode() + expected = [ + "005100098156355476F80000AAA005000388030136E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437", + "005100098156355476F80000AAA0050003880302CA9BF2A6BC296FCA9BF2A6BC296FCA9BF2A6BC296FCA9BF2A6BC296FCA9BF2A6BC296FCA9BF2A6BC296FCA9BF2A6BC296FCA9BF2A6BC296FCA9BF2A6BC296FCA9BF2A6BC296FCA9BF2A6BC296FCA9BF2A6BC296FCA9BF2A6BC296FCA9BF2A6BC296FCA9BF2A6BC296FCA9BF2A6BC296FCA9BF2A6BC296FCA9BF2A6BC296FCA9BF2A6BC296FCA", + "005100098156355476F80000AAA005000388030336E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE9437E54D7953DE94F1", + ] + + sms = SmsSubmit(number, text) + sms.ref = 0x0 + sms.rand_id = 136 + sms.validity = timedelta(days=4) + + ret = sms.to_pdu() + cnt = len(ret) + for i, pdu in enumerate(ret): + self.assertEqual(pdu.pdu, expected[i]) + self.assertEqual(pdu.seq, i + 1) + self.assertEqual(pdu.cnt, cnt) + def test_encoding_bad_number_raises_error(self): self.assertRaises(ValueError, SmsSubmit, "032BADNUMBER", "text") @@ -282,11 +305,16 @@ def test_egsm_2(self): def test_egsm_3(self): sms = SmsSubmit(self.DEST, self.EGSM_CHAR * 153) # 306 septets - self.assertEqual(len(sms.to_pdu()), 3) + self.assertEqual(len(sms.to_pdu()), 2) def test_egsm_4(self): sms = SmsSubmit(self.DEST, self.EGSM_CHAR * 229 + self.GSM_CHAR) # 459 septets + self.assertEqual(len(sms.to_pdu()), 3) + + def test_egsm_5(self): + sms = SmsSubmit(self.DEST, + self.EGSM_CHAR * 270 + self.GSM_CHAR) # 541 septets self.assertEqual(len(sms.to_pdu()), 4) def test_unicode_1(self): diff --git a/tests/test_udh.py b/tests/test_udh.py index 92ec9f5..eab5b5e 100644 --- a/tests/test_udh.py +++ b/tests/test_udh.py @@ -1,13 +1,13 @@ from unittest import TestCase from messaging.sms.udh import UserDataHeader -from messaging.utils import to_array +from messaging.utils import hex_to_int_array class TestUserDataHeader(TestCase): def test_user_data_header(self): - data = to_array("08049f8e020105040b8423f0") + data = hex_to_int_array("08049f8e020105040b8423f0") udh = UserDataHeader.from_bytes(data) self.assertEqual(udh.concat.seq, 1) @@ -16,7 +16,7 @@ def test_user_data_header(self): self.assertEqual(udh.ports.dest_port, 2948) self.assertEqual(udh.ports.orig_port, 9200) - data = to_array("0003190201") + data = hex_to_int_array("0003190201") udh = UserDataHeader.from_bytes(data) self.assertEqual(udh.concat.seq, 1) From 68fba81d247981f39bef3117427694259c11c5f6 Mon Sep 17 00:00:00 2001 From: Dom Amato Date: Fri, 10 Apr 2020 13:31:44 -0500 Subject: [PATCH 28/28] fix lint --- messaging/mms/mms_pdu.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/messaging/mms/mms_pdu.py b/messaging/mms/mms_pdu.py index dd32749..0209ded 100644 --- a/messaging/mms/mms_pdu.py +++ b/messaging/mms/mms_pdu.py @@ -15,6 +15,7 @@ import array import os import random +import logging from messaging.mms import message, wsp_pdu from messaging.mms.iterator import PreviewIterator @@ -166,7 +167,7 @@ def decode_message_body(self, data_iter): except StopIteration: return - #print('Number of data entries (parts) in MMS body:', num_entries) + logging.debug('Number of data entries (parts) in MMS body: %i' % num_entries) ########## MMS body: entries ########## # For every data "part", we have to read the following sequence: @@ -175,7 +176,7 @@ def decode_message_body(self, data_iter): # , # for part_num in range(num_entries): - #print('\nPart %d:\n------' % part_num) + logging.debug('\nPart %d:\n------' % part_num) headers_len = self.decode_uint_var(data_iter) data_len = self.decode_uint_var(data_iter)