Skip to content

Commit

Permalink
All encoders and decoders now implemented
Browse files Browse the repository at this point in the history
  • Loading branch information
samuel committed Oct 22, 2009
1 parent 73d9d64 commit 668685b
Show file tree
Hide file tree
Showing 9 changed files with 268 additions and 190 deletions.
16 changes: 14 additions & 2 deletions bert/__init__.py
@@ -1,3 +1,15 @@


from bert.erlang import ErlangTermDecoder, Atom, Binary from bert.erlang import ErlangTermDecoder, ErlangTermEncoder, Atom, Binary
from bert.converters import BERTDecoder from bert.converters import BERTDecoder, BERTEncoder

def erlang_encode(obj):
return ErlangTermEncoder().encode(obj)

def erlang_decode(obj):
return ErlangTermDecoder().decode(obj)

def encode(obj):
return BERTEncoder().encode(obj)

def decode(obj):
return BERTDecoder().decode(obj)
55 changes: 47 additions & 8 deletions bert/converters.py
@@ -1,13 +1,19 @@


import datetime import datetime
import re import re
import time


from bert.erlang import ErlangTermDecoder from bert.erlang import ErlangTermDecoder, ErlangTermEncoder, Atom


class BERTDecoder(ErlangTermDecoder): def utc_to_datetime(seconds, microseconds):
def __init__(self): return datetime.datetime.utcfromtimestamp(seconds).replace(microsecond=microseconds)
pass
def datetime_to_utc(dt):
# Can't use time.mktime as it assume local timezone
delta = dt - datetime.datetime(1970, 1, 1, 0, 0)
return delta.days * 24 * 60 * 60 + delta.seconds, dt.microsecond


class BERTDecoder(ErlangTermDecoder):
def decode(self, bytes, offset=0): def decode(self, bytes, offset=0):
obj = super(BERTDecoder, self).decode(bytes, offset) obj = super(BERTDecoder, self).decode(bytes, offset)
return self.convert(obj) return self.convert(obj)
Expand All @@ -28,12 +34,12 @@ def convert_bert(self, item):
return None return None
elif item[1] == "dict": elif item[1] == "dict":
return dict((self.convert(k), self.convert(v)) for k, v in item[2]) return dict((self.convert(k), self.convert(v)) for k, v in item[2])
elif item[1] == "true": elif item[1] in ("true", True):
return True return True
elif item[1] == "false": elif item[1] in ("false", False):
return False return False
elif item[1] == "time": elif item[1] == "time":
return datetime.timedelta(seconds=item[2] * 1000000 + item[3], microseconds=item[4]) return utc_to_datetime(item[2] * 1000000 + item[3], item[4])
elif item[1] == "regex": elif item[1] == "regex":
flags = 0 flags = 0
if 'extended' in item[3]: if 'extended' in item[3]:
Expand All @@ -45,4 +51,37 @@ def convert_bert(self, item):
if 'dotall' in item[3]: if 'dotall' in item[3]:
flags |= re.DOTALL flags |= re.DOTALL
return re.compile(item[2], flags) return re.compile(item[2], flags)
return None raise NotImplementedError("Unknown BERT type %s" % item[1])

class BERTEncoder(ErlangTermEncoder):
def encode(self, obj):
bert = self.convert(obj)
return super(BERTEncoder, self).encode(bert)

def convert(self, obj):
if obj is True:
return (Atom("bert"), Atom("true"))
elif obj is False:
return (Atom("bert"), Atom("false"))
elif obj is None:
return (Atom("bert"), Atom("nil"))
elif isinstance(obj, dict):
return (Atom("bert"), Atom("dict"), [(self.convert(k), self.convert(v)) for k, v in obj.items()])
elif isinstance(obj, datetime.datetime):
seconds, microseconds = datetime_to_utc(obj)
megaseconds = seconds // 1000000
seconds = seconds % 1000000
return (Atom("bert"), Atom("time"), megaseconds, seconds, microseconds)
# elif isinstance(obj, re):
elif isinstance(obj, list):
return [self.convert(item) for item in obj]
elif isinstance(obj, tuple):
return tuple(self.convert(item) for item in obj)
return obj

def datetime_to_split_time(dt):
seconds = int(time.mktime(dt.timetuple()))
megaseconds = seconds // 1000000
seconds = seconds % 1000000
microseconds = dt.microsecond
return megaseconds, seconds, microseconds
98 changes: 82 additions & 16 deletions bert/erlang.py
@@ -1,27 +1,32 @@


"""Erlang External Term Format serializer/deserializer""" """Erlang External Term Format serializer/deserializer"""


from __future__ import division

import math
import struct import struct


NEW_FLOAT_EXT = 70 # [Float64:IEEE float] NEW_FLOAT_EXT = 'F' # 70 [Float64:IEEE float]
SMALL_INTEGER_EXT = 97 # [UInt8:Int] Unsigned 8 bit integer SMALL_INTEGER_EXT = 'a' # 97 [UInt8:Int] Unsigned 8 bit integer
INTEGER_EXT = 98 # [Int32:Int] Signed 32 bit integer in big-endian format INTEGER_EXT = 'b' # 98 [Int32:Int] Signed 32 bit integer in big-endian format
FLOAT_EXT = 99 # [31:Float String] Float in string format (formatted "%.20e", sscanf "%lf"). Superseded by NEW_FLOAT_EXT FLOAT_EXT = 'c' # 99 [31:Float String] Float in string format (formatted "%.20e", sscanf "%lf"). Superseded by NEW_FLOAT_EXT
ATOM_EXT = 100 # [UInt16:Len, Len:AtomName] max Len is 255 ATOM_EXT = 'd' # 100 [UInt16:Len, Len:AtomName] max Len is 255
SMALL_TUPLE_EXT = 104 # [UInt8:Arity, N:Elements] SMALL_TUPLE_EXT = 'h' # 104 [UInt8:Arity, N:Elements]
LARGE_TUPLE_EXT = 105 # [UInt32:Arity, N:Elements] LARGE_TUPLE_EXT = 'i' # 105 [UInt32:Arity, N:Elements]
NIL_EXT = 106 # empty list NIL_EXT = 'j' # 106 empty list
STRING_EXT = 107 # [UInt32:Len, Len:Characters] STRING_EXT = 'k' # 107 [UInt32:Len, Len:Characters]
LIST_EXT = 108 # [UInt32:Len, Elements, Tail] LIST_EXT = 'l' # 108 [UInt32:Len, Elements, Tail]
BINARY_EXT = 109 # [UInt32:Len, Len:Data] BINARY_EXT = 'm' # 109 [UInt32:Len, Len:Data]
SMALL_BIG_EXT = 110 # [UInt8:n, UInt8:Sign, n:nums] SMALL_BIG_EXT = 'n' # 110 [UInt8:n, UInt8:Sign, n:nums]
LARGE_BIG_EXT = 111 # [UInt32:n, UInt8:Sign, n:nums] LARGE_BIG_EXT = 'o' # 111 [UInt32:n, UInt8:Sign, n:nums]


class Atom(str): class Atom(str):
pass def __repr__(self):
return "Atom(%s)" % super(Atom, self).__repr__()


class Binary(str): class Binary(str):
pass def __repr__(self):
return "Binary(%s)" % super(Binary, self).__repr__()


class ErlangTermDecoder(object): class ErlangTermDecoder(object):
def __init__(self): def __init__(self):
Expand All @@ -33,7 +38,7 @@ def decode(self, bytes, offset=0):
return self._decode(bytes, offset)[0] return self._decode(bytes, offset)[0]


def _decode(self, bytes, offset=0): def _decode(self, bytes, offset=0):
tag = ord(bytes[offset]) tag = bytes[offset]
offset += 1 offset += 1
if tag == SMALL_INTEGER_EXT: if tag == SMALL_INTEGER_EXT:
return ord(bytes[offset]), offset+1 return ord(bytes[offset]), offset+1
Expand Down Expand Up @@ -106,3 +111,64 @@ def _decode(self, bytes, offset=0):
else: else:
raise NotImplementedError("Unsupported tag %d" % tag) raise NotImplementedError("Unsupported tag %d" % tag)


class ErlangTermEncoder(object):
def __init__(self):
pass

def encode(self, obj):
bytes = [chr(131)]
self._encode(obj, bytes)
return "".join(bytes)

def _encode(self, obj, bytes):
if obj is False:
bytes += [ATOM_EXT, struct.pack(">H", 5), "false"]
elif obj is True:
bytes += [ATOM_EXT, struct.pack(">H", 4), "true"]
elif isinstance(obj, (int, long)):
if 0 <= obj <= 255:
bytes += [SMALL_INTEGER_EXT, chr(obj)]
elif -2147483648 <= obj <= 2147483647:
bytes += [INTEGER_EXT, struct.pack(">l", obj)]
else:
n = int(math.ceil(math.log(obj, 2) / 8))
if n <= 256:
bytes += [SMALL_BIG_EXT, chr(n)]
else:
bytes += [LARGE_BIG_EXT, struct.pack(">L", n)]
if obj >= 0:
bytes.append("\x00")
else:
bytes.append("\x01")
obj = -obj
while obj > 0:
bytes.append(chr(obj & 0xff))
obj >>= 8
elif isinstance(obj, float):
floatstr = "%.20e" % obj
bytes += [FLOAT_EXT, floatstr + "\x00"*(31-len(floatstr))]
elif isinstance(obj, Atom):
bytes += [ATOM_EXT, struct.pack(">H", len(obj)), obj]
elif isinstance(obj, (Binary, buffer)):
bytes += [BINARY_EXT, struct.pack(">L", len(obj)), obj]
elif isinstance(obj, str):
bytes += [STRING_EXT, struct.pack(">H", len(obj)), obj]
elif isinstance(obj, unicode):
self._encode([ord(x) for x in obj], bytes)
elif isinstance(obj, tuple):
n = len(obj)
if n < 256:
bytes += [SMALL_TUPLE_EXT, chr(n)]
else:
bytes += [LARGE_TUPLE_EXT, struct.pack(">L", n)]
for item in obj:
self._encode(item, bytes)
elif obj == []:
bytes.append(NIL_EXT)
elif isinstance(obj, list):
bytes += [LIST_EXT, struct.pack(">L", len(obj))]
for item in obj:
self._encode(item, bytes)
bytes.append(NIL_EXT) # list tail - no such thing in Python
else:
raise NotImplementedError("Unable to serialize %r" % obj)
5 changes: 3 additions & 2 deletions test.py
Expand Up @@ -3,6 +3,7 @@
import unittest import unittest


if __name__ == '__main__': if __name__ == '__main__':
from tests.erlangdecoder import * from tests import *
from tests.bertdecoder import * from tests.erlangtests import *
from tests.berttests import *
unittest.main() unittest.main()
21 changes: 21 additions & 0 deletions tests/__init__.py
@@ -0,0 +1,21 @@

import datetime
import unittest

from bert.converters import utc_to_datetime, datetime_to_utc

class TestDateConversion(unittest.TestCase):
test_dates = [
(datetime.datetime(1970, 1, 1, 0, 0, 0, 0), (0, 0)),
(datetime.datetime(2009, 1, 8, 4, 27, 47), (1231388867, 0)),
(datetime.datetime(2009, 10, 8, 4, 27, 47, 123), (1254976067, 123)),
(datetime.datetime(2009, 1, 8, 4, 27, 47, 456), (1231388867, 456)),
]

def testToDatetime(self):
for dt, tstamp in self.test_dates:
self.failUnlessEqual(dt, utc_to_datetime(tstamp[0], tstamp[1]))

def testFromDatetime(self):
for dt, tstamp in self.test_dates:
self.failUnlessEqual(tstamp, datetime_to_utc(dt))
60 changes: 0 additions & 60 deletions tests/bertdecoder.py

This file was deleted.

51 changes: 51 additions & 0 deletions tests/berttests.py
@@ -0,0 +1,51 @@
#!/usr/bin/env python

import datetime
import re
import unittest

from bert import BERTDecoder, BERTEncoder

bert_tests = [
# nil
(None, ("bert", "nil")),
# nested nil
([None, (None,)], [("bert", "nil"), (("bert", "nil"),)]),
# dict
({'foo': 'bar'}, ('bert', 'dict', [('foo', 'bar')])),
# empty dict
({}, ('bert', 'dict', [])),
# nested dict
({'foo': {'baz': 'bar'}}, ('bert', 'dict', [('foo', ('bert', 'dict', [('baz', 'bar')]))])),
# true
(True, ('bert', 'true')),
# false
(False, ('bert', 'false')),
# time
(datetime.datetime.utcfromtimestamp(123*1000000+456).replace(microsecond=789), ('bert', 'time', 123, 456, 789)),
# regex
# (re.compile('^c(a)t$', re.I|re.X), ('bert', 'regex', '^c(a)t$', ('caseless', 'extended'))),
# other
([1, 2.0, ("foo", "bar")], [1, 2.0, ("foo", "bar")]),
]

class BERTTestCase(unittest.TestCase):
def testDecode(self):
convert = BERTDecoder().convert
for python, bert in bert_tests:
self.failUnlessEqual(python, convert(bert))

def testEncode(self):
convert = BERTEncoder().convert
for python, bert in bert_tests:
self.failUnlessEqual(bert, convert(python))

def testRegex(self):
convert = BERTDecoder().convert
before = ('bert', 'regex', '^c(a)t$', ('caseless', 'extended'))
# after = re.compile('^c(a)t$', re.I|re.X)
# self.failUnlessEqual(after, self.convert(before))
self.failUnlessEqual(str(type(convert(before))), "<type '_sre.SRE_Pattern'>")

if __name__ == '__main__':
unittest.main()

0 comments on commit 668685b

Please sign in to comment.