Permalink
Browse files

All encoders and decoders now implemented

  • Loading branch information...
1 parent 73d9d64 commit 668685b374e08f85204fe676df3195e5544f2b21 @samuel samuel committed Oct 22, 2009
Showing with 268 additions and 190 deletions.
  1. +14 −2 bert/__init__.py
  2. +47 −8 bert/converters.py
  3. +82 −16 bert/erlang.py
  4. +3 −2 test.py
  5. +21 −0 tests/__init__.py
  6. +0 −60 tests/bertdecoder.py
  7. +51 −0 tests/berttests.py
  8. +0 −102 tests/erlangdecoder.py
  9. +50 −0 tests/erlangtests.py
View
@@ -1,3 +1,15 @@
-from bert.erlang import ErlangTermDecoder, Atom, Binary
-from bert.converters import BERTDecoder
+from bert.erlang import ErlangTermDecoder, ErlangTermEncoder, Atom, Binary
+from bert.converters import BERTDecoder, BERTEncoder
+
+def erlang_encode(obj):
+ return ErlangTermEncoder().encode(obj)
+
+def erlang_decode(obj):
+ return ErlangTermDecoder().decode(obj)
+
+def encode(obj):
+ return BERTEncoder().encode(obj)
+
+def decode(obj):
+ return BERTDecoder().decode(obj)
View
@@ -1,13 +1,19 @@
import datetime
import re
+import time
-from bert.erlang import ErlangTermDecoder
+from bert.erlang import ErlangTermDecoder, ErlangTermEncoder, Atom
-class BERTDecoder(ErlangTermDecoder):
- def __init__(self):
- pass
+def utc_to_datetime(seconds, microseconds):
+ return datetime.datetime.utcfromtimestamp(seconds).replace(microsecond=microseconds)
+
+def datetime_to_utc(dt):
+ # Can't use time.mktime as it assume local timezone
+ delta = dt - datetime.datetime(1970, 1, 1, 0, 0)
+ return delta.days * 24 * 60 * 60 + delta.seconds, dt.microsecond
+class BERTDecoder(ErlangTermDecoder):
def decode(self, bytes, offset=0):
obj = super(BERTDecoder, self).decode(bytes, offset)
return self.convert(obj)
@@ -28,12 +34,12 @@ def convert_bert(self, item):
return None
elif item[1] == "dict":
return dict((self.convert(k), self.convert(v)) for k, v in item[2])
- elif item[1] == "true":
+ elif item[1] in ("true", True):
return True
- elif item[1] == "false":
+ elif item[1] in ("false", False):
return False
elif item[1] == "time":
- return datetime.timedelta(seconds=item[2] * 1000000 + item[3], microseconds=item[4])
+ return utc_to_datetime(item[2] * 1000000 + item[3], item[4])
elif item[1] == "regex":
flags = 0
if 'extended' in item[3]:
@@ -45,4 +51,37 @@ def convert_bert(self, item):
if 'dotall' in item[3]:
flags |= re.DOTALL
return re.compile(item[2], flags)
- return None
+ raise NotImplementedError("Unknown BERT type %s" % item[1])
+
+class BERTEncoder(ErlangTermEncoder):
+ def encode(self, obj):
+ bert = self.convert(obj)
+ return super(BERTEncoder, self).encode(bert)
+
+ def convert(self, obj):
+ if obj is True:
+ return (Atom("bert"), Atom("true"))
+ elif obj is False:
+ return (Atom("bert"), Atom("false"))
+ elif obj is None:
+ return (Atom("bert"), Atom("nil"))
+ elif isinstance(obj, dict):
+ return (Atom("bert"), Atom("dict"), [(self.convert(k), self.convert(v)) for k, v in obj.items()])
+ elif isinstance(obj, datetime.datetime):
+ seconds, microseconds = datetime_to_utc(obj)
+ megaseconds = seconds // 1000000
+ seconds = seconds % 1000000
+ return (Atom("bert"), Atom("time"), megaseconds, seconds, microseconds)
+ # elif isinstance(obj, re):
+ elif isinstance(obj, list):
+ return [self.convert(item) for item in obj]
+ elif isinstance(obj, tuple):
+ return tuple(self.convert(item) for item in obj)
+ return obj
+
+def datetime_to_split_time(dt):
+ seconds = int(time.mktime(dt.timetuple()))
+ megaseconds = seconds // 1000000
+ seconds = seconds % 1000000
+ microseconds = dt.microsecond
+ return megaseconds, seconds, microseconds
View
@@ -1,27 +1,32 @@
"""Erlang External Term Format serializer/deserializer"""
+from __future__ import division
+
+import math
import struct
-NEW_FLOAT_EXT = 70 # [Float64:IEEE float]
-SMALL_INTEGER_EXT = 97 # [UInt8:Int] Unsigned 8 bit integer
-INTEGER_EXT = 98 # [Int32:Int] Signed 32 bit integer in big-endian format
-FLOAT_EXT = 99 # [31:Float String] Float in string format (formatted "%.20e", sscanf "%lf"). Superseded by NEW_FLOAT_EXT
-ATOM_EXT = 100 # [UInt16:Len, Len:AtomName] max Len is 255
-SMALL_TUPLE_EXT = 104 # [UInt8:Arity, N:Elements]
-LARGE_TUPLE_EXT = 105 # [UInt32:Arity, N:Elements]
-NIL_EXT = 106 # empty list
-STRING_EXT = 107 # [UInt32:Len, Len:Characters]
-LIST_EXT = 108 # [UInt32:Len, Elements, Tail]
-BINARY_EXT = 109 # [UInt32:Len, Len:Data]
-SMALL_BIG_EXT = 110 # [UInt8:n, UInt8:Sign, n:nums]
-LARGE_BIG_EXT = 111 # [UInt32:n, UInt8:Sign, n:nums]
+NEW_FLOAT_EXT = 'F' # 70 [Float64:IEEE float]
+SMALL_INTEGER_EXT = 'a' # 97 [UInt8:Int] Unsigned 8 bit integer
+INTEGER_EXT = 'b' # 98 [Int32:Int] Signed 32 bit integer in big-endian format
+FLOAT_EXT = 'c' # 99 [31:Float String] Float in string format (formatted "%.20e", sscanf "%lf"). Superseded by NEW_FLOAT_EXT
+ATOM_EXT = 'd' # 100 [UInt16:Len, Len:AtomName] max Len is 255
+SMALL_TUPLE_EXT = 'h' # 104 [UInt8:Arity, N:Elements]
+LARGE_TUPLE_EXT = 'i' # 105 [UInt32:Arity, N:Elements]
+NIL_EXT = 'j' # 106 empty list
+STRING_EXT = 'k' # 107 [UInt32:Len, Len:Characters]
+LIST_EXT = 'l' # 108 [UInt32:Len, Elements, Tail]
+BINARY_EXT = 'm' # 109 [UInt32:Len, Len:Data]
+SMALL_BIG_EXT = 'n' # 110 [UInt8:n, UInt8:Sign, n:nums]
+LARGE_BIG_EXT = 'o' # 111 [UInt32:n, UInt8:Sign, n:nums]
class Atom(str):
- pass
+ def __repr__(self):
+ return "Atom(%s)" % super(Atom, self).__repr__()
class Binary(str):
- pass
+ def __repr__(self):
+ return "Binary(%s)" % super(Binary, self).__repr__()
class ErlangTermDecoder(object):
def __init__(self):
@@ -33,7 +38,7 @@ def decode(self, bytes, offset=0):
return self._decode(bytes, offset)[0]
def _decode(self, bytes, offset=0):
- tag = ord(bytes[offset])
+ tag = bytes[offset]
offset += 1
if tag == SMALL_INTEGER_EXT:
return ord(bytes[offset]), offset+1
@@ -106,3 +111,64 @@ def _decode(self, bytes, offset=0):
else:
raise NotImplementedError("Unsupported tag %d" % tag)
+class ErlangTermEncoder(object):
+ def __init__(self):
+ pass
+
+ def encode(self, obj):
+ bytes = [chr(131)]
+ self._encode(obj, bytes)
+ return "".join(bytes)
+
+ def _encode(self, obj, bytes):
+ if obj is False:
+ bytes += [ATOM_EXT, struct.pack(">H", 5), "false"]
+ elif obj is True:
+ bytes += [ATOM_EXT, struct.pack(">H", 4), "true"]
+ elif isinstance(obj, (int, long)):
+ if 0 <= obj <= 255:
+ bytes += [SMALL_INTEGER_EXT, chr(obj)]
+ elif -2147483648 <= obj <= 2147483647:
+ bytes += [INTEGER_EXT, struct.pack(">l", obj)]
+ else:
+ n = int(math.ceil(math.log(obj, 2) / 8))
+ if n <= 256:
+ bytes += [SMALL_BIG_EXT, chr(n)]
+ else:
+ bytes += [LARGE_BIG_EXT, struct.pack(">L", n)]
+ if obj >= 0:
+ bytes.append("\x00")
+ else:
+ bytes.append("\x01")
+ obj = -obj
+ while obj > 0:
+ bytes.append(chr(obj & 0xff))
+ obj >>= 8
+ elif isinstance(obj, float):
+ floatstr = "%.20e" % obj
+ bytes += [FLOAT_EXT, floatstr + "\x00"*(31-len(floatstr))]
+ elif isinstance(obj, Atom):
+ bytes += [ATOM_EXT, struct.pack(">H", len(obj)), obj]
+ elif isinstance(obj, (Binary, buffer)):
+ bytes += [BINARY_EXT, struct.pack(">L", len(obj)), obj]
+ elif isinstance(obj, str):
+ bytes += [STRING_EXT, struct.pack(">H", len(obj)), obj]
+ elif isinstance(obj, unicode):
+ self._encode([ord(x) for x in obj], bytes)
+ elif isinstance(obj, tuple):
+ n = len(obj)
+ if n < 256:
+ bytes += [SMALL_TUPLE_EXT, chr(n)]
+ else:
+ bytes += [LARGE_TUPLE_EXT, struct.pack(">L", n)]
+ for item in obj:
+ self._encode(item, bytes)
+ elif obj == []:
+ bytes.append(NIL_EXT)
+ elif isinstance(obj, list):
+ bytes += [LIST_EXT, struct.pack(">L", len(obj))]
+ for item in obj:
+ self._encode(item, bytes)
+ bytes.append(NIL_EXT) # list tail - no such thing in Python
+ else:
+ raise NotImplementedError("Unable to serialize %r" % obj)
View
@@ -3,6 +3,7 @@
import unittest
if __name__ == '__main__':
- from tests.erlangdecoder import *
- from tests.bertdecoder import *
+ from tests import *
+ from tests.erlangtests import *
+ from tests.berttests import *
unittest.main()
View
@@ -0,0 +1,21 @@
+
+import datetime
+import unittest
+
+from bert.converters import utc_to_datetime, datetime_to_utc
+
+class TestDateConversion(unittest.TestCase):
+ test_dates = [
+ (datetime.datetime(1970, 1, 1, 0, 0, 0, 0), (0, 0)),
+ (datetime.datetime(2009, 1, 8, 4, 27, 47), (1231388867, 0)),
+ (datetime.datetime(2009, 10, 8, 4, 27, 47, 123), (1254976067, 123)),
+ (datetime.datetime(2009, 1, 8, 4, 27, 47, 456), (1231388867, 456)),
+ ]
+
+ def testToDatetime(self):
+ for dt, tstamp in self.test_dates:
+ self.failUnlessEqual(dt, utc_to_datetime(tstamp[0], tstamp[1]))
+
+ def testFromDatetime(self):
+ for dt, tstamp in self.test_dates:
+ self.failUnlessEqual(tstamp, datetime_to_utc(dt))
View
@@ -1,60 +0,0 @@
-#!/usr/bin/env python
-
-import datetime
-import re
-import unittest
-
-from bert import BERTDecoder
-
-class BERTDecoderTest(unittest.TestCase):
- def setUp(self):
- self.decoder = BERTDecoder()
-
- def tearDown(self):
- pass
-
- def testNone(self):
- self.failUnlessEqual(None, self.convert(("bert", "nil")))
-
- def testNestedNone(self):
- self.failUnlessEqual([None, (None,)], self.convert([("bert", "nil"), (("bert", "nil"),)]))
-
- def testDict(self):
- self.failUnlessEqual({'foo': 'bar'}, self.convert(('bert', 'dict', [('foo', 'bar')])))
-
- def testEmptyDict(self):
- self.failUnlessEqual({}, self.convert(('bert', 'dict', [])))
-
- def testNestedDict(self):
- self.failUnlessEqual({'foo': {'baz': 'bar'}},
- self.convert(
- ('bert', 'dict', [
- ('foo', ('bert', 'dict', [
- ('baz', 'bar')]))])))
-
- def testTrue(self):
- self.failUnlessEqual(True, self.convert(('bert', 'true')))
-
- def testFalse(self):
- self.failUnlessEqual(False, self.convert(('bert', 'false')))
-
- def testTime(self):
- self.failUnlessEqual(datetime.timedelta(seconds=123*1000000+456, microseconds=789),
- self.convert(('bert', 'time', 123, 456, 789)))
-
- def testRegex(self):
- before = ('bert', 'regex', '^c(a)t$', ('caseless', 'extended'))
- # after = re.compile('^c(a)t$', re.I|re.X)
- # self.failUnlessEqual(after, self.convert(before))
- self.failUnlessEqual(str(type(self.convert(before))), "<type '_sre.SRE_Pattern'>")
-
- def testOther(self):
- """Conversion shouldn't change non-bert values"""
- before = [1, 2.0, ("foo", "bar")]
- self.failUnlessEqual(before, self.convert(before))
-
- def convert(self, term):
- return self.decoder.convert(term)
-
-if __name__ == '__main__':
- unittest.main()
View
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+
+import datetime
+import re
+import unittest
+
+from bert import BERTDecoder, BERTEncoder
+
+bert_tests = [
+ # nil
+ (None, ("bert", "nil")),
+ # nested nil
+ ([None, (None,)], [("bert", "nil"), (("bert", "nil"),)]),
+ # dict
+ ({'foo': 'bar'}, ('bert', 'dict', [('foo', 'bar')])),
+ # empty dict
+ ({}, ('bert', 'dict', [])),
+ # nested dict
+ ({'foo': {'baz': 'bar'}}, ('bert', 'dict', [('foo', ('bert', 'dict', [('baz', 'bar')]))])),
+ # true
+ (True, ('bert', 'true')),
+ # false
+ (False, ('bert', 'false')),
+ # time
+ (datetime.datetime.utcfromtimestamp(123*1000000+456).replace(microsecond=789), ('bert', 'time', 123, 456, 789)),
+ # regex
+ # (re.compile('^c(a)t$', re.I|re.X), ('bert', 'regex', '^c(a)t$', ('caseless', 'extended'))),
+ # other
+ ([1, 2.0, ("foo", "bar")], [1, 2.0, ("foo", "bar")]),
+]
+
+class BERTTestCase(unittest.TestCase):
+ def testDecode(self):
+ convert = BERTDecoder().convert
+ for python, bert in bert_tests:
+ self.failUnlessEqual(python, convert(bert))
+
+ def testEncode(self):
+ convert = BERTEncoder().convert
+ for python, bert in bert_tests:
+ self.failUnlessEqual(bert, convert(python))
+
+ def testRegex(self):
+ convert = BERTDecoder().convert
+ before = ('bert', 'regex', '^c(a)t$', ('caseless', 'extended'))
+ # after = re.compile('^c(a)t$', re.I|re.X)
+ # self.failUnlessEqual(after, self.convert(before))
+ self.failUnlessEqual(str(type(convert(before))), "<type '_sre.SRE_Pattern'>")
+
+if __name__ == '__main__':
+ unittest.main()
Oops, something went wrong.

0 comments on commit 668685b

Please sign in to comment.