From 9c2dee9f2c181fb6f3568e85353f8856caec87c1 Mon Sep 17 00:00:00 2001 From: Shalabh Chaturvedi Date: Tue, 27 Sep 2016 11:00:34 -0700 Subject: [PATCH] AVRO-1928: Simplify Python float/double encoding Switch to using built-in little endian support in the struct module, instead of explicit bit manipulation. All tests pass. --- lang/py/src/avro/io.py | 38 ++++++-------------------------------- lang/py3/avro/io.py | 37 ++++++------------------------------- 2 files changed, 12 insertions(+), 63 deletions(-) diff --git a/lang/py/src/avro/io.py b/lang/py/src/avro/io.py index b2fd2f9ba27..1c61d3d0dad 100644 --- a/lang/py/src/avro/io.py +++ b/lang/py/src/avro/io.py @@ -68,10 +68,8 @@ def unpack(self, *args): return struct.unpack(self.format, *args) struct_class = SimpleStruct -STRUCT_INT = struct_class('!I') # big-endian unsigned int -STRUCT_LONG = struct_class('!Q') # big-endian unsigned long long -STRUCT_FLOAT = struct_class('!f') # big-endian float -STRUCT_DOUBLE = struct_class('!d') # big-endian double +STRUCT_FLOAT = struct_class('I') # big-endian unsigned int # @@ -197,11 +195,7 @@ def read_float(self): The float is converted into a 32-bit integer using a method equivalent to Java's floatToIntBits and then encoded in little-endian format. """ - bits = (((ord(self.read(1)) & 0xffL)) | - ((ord(self.read(1)) & 0xffL) << 8) | - ((ord(self.read(1)) & 0xffL) << 16) | - ((ord(self.read(1)) & 0xffL) << 24)) - return STRUCT_FLOAT.unpack(STRUCT_INT.pack(bits))[0] + return STRUCT_FLOAT.unpack(self.read(4))[0] def read_double(self): """ @@ -209,15 +203,7 @@ def read_double(self): The double is converted into a 64-bit integer using a method equivalent to Java's doubleToLongBits and then encoded in little-endian format. """ - bits = (((ord(self.read(1)) & 0xffL)) | - ((ord(self.read(1)) & 0xffL) << 8) | - ((ord(self.read(1)) & 0xffL) << 16) | - ((ord(self.read(1)) & 0xffL) << 24) | - ((ord(self.read(1)) & 0xffL) << 32) | - ((ord(self.read(1)) & 0xffL) << 40) | - ((ord(self.read(1)) & 0xffL) << 48) | - ((ord(self.read(1)) & 0xffL) << 56)) - return STRUCT_DOUBLE.unpack(STRUCT_LONG.pack(bits))[0] + return STRUCT_DOUBLE.unpack(self.read(8))[0] def read_bytes(self): """ @@ -319,11 +305,7 @@ def write_float(self, datum): The float is converted into a 32-bit integer using a method equivalent to Java's floatToIntBits and then encoded in little-endian format. """ - bits = STRUCT_INT.unpack(STRUCT_FLOAT.pack(datum))[0] - self.write(chr((bits) & 0xFF)) - self.write(chr((bits >> 8) & 0xFF)) - self.write(chr((bits >> 16) & 0xFF)) - self.write(chr((bits >> 24) & 0xFF)) + self.write(STRUCT_FLOAT.pack(datum)) def write_double(self, datum): """ @@ -331,15 +313,7 @@ def write_double(self, datum): The double is converted into a 64-bit integer using a method equivalent to Java's doubleToLongBits and then encoded in little-endian format. """ - bits = STRUCT_LONG.unpack(STRUCT_DOUBLE.pack(datum))[0] - self.write(chr((bits) & 0xFF)) - self.write(chr((bits >> 8) & 0xFF)) - self.write(chr((bits >> 16) & 0xFF)) - self.write(chr((bits >> 24) & 0xFF)) - self.write(chr((bits >> 32) & 0xFF)) - self.write(chr((bits >> 40) & 0xFF)) - self.write(chr((bits >> 48) & 0xFF)) - self.write(chr((bits >> 56) & 0xFF)) + self.write(STRUCT_DOUBLE.pack(datum)) def write_bytes(self, datum): """ diff --git a/lang/py3/avro/io.py b/lang/py3/avro/io.py index b944904ee51..04616a3abf9 100644 --- a/lang/py3/avro/io.py +++ b/lang/py3/avro/io.py @@ -61,9 +61,8 @@ LONG_MAX_VALUE = (1 << 63) - 1 STRUCT_INT = struct.Struct('!I') # big-endian unsigned int -STRUCT_LONG = struct.Struct('!Q') # big-endian unsigned long long -STRUCT_FLOAT = struct.Struct('!f') # big-endian float -STRUCT_DOUBLE = struct.Struct('!d') # big-endian double +STRUCT_FLOAT = struct.Struct('I') # big-endian unsigned int @@ -211,11 +210,7 @@ def read_float(self): The float is converted into a 32-bit integer using a method equivalent to Java's floatToIntBits and then encoded in little-endian format. """ - bits = (((ord(self.read(1)) & 0xff)) | - ((ord(self.read(1)) & 0xff) << 8) | - ((ord(self.read(1)) & 0xff) << 16) | - ((ord(self.read(1)) & 0xff) << 24)) - return STRUCT_FLOAT.unpack(STRUCT_INT.pack(bits))[0] + return STRUCT_FLOAT.unpack(self.read(4))[0] def read_double(self): """ @@ -223,15 +218,7 @@ def read_double(self): The double is converted into a 64-bit integer using a method equivalent to Java's doubleToLongBits and then encoded in little-endian format. """ - bits = (((ord(self.read(1)) & 0xff)) | - ((ord(self.read(1)) & 0xff) << 8) | - ((ord(self.read(1)) & 0xff) << 16) | - ((ord(self.read(1)) & 0xff) << 24) | - ((ord(self.read(1)) & 0xff) << 32) | - ((ord(self.read(1)) & 0xff) << 40) | - ((ord(self.read(1)) & 0xff) << 48) | - ((ord(self.read(1)) & 0xff) << 56)) - return STRUCT_DOUBLE.unpack(STRUCT_LONG.pack(bits))[0] + return STRUCT_DOUBLE.unpack(self.read(8))[0] def read_bytes(self): """ @@ -354,11 +341,7 @@ def write_float(self, datum): The float is converted into a 32-bit integer using a method equivalent to Java's floatToIntBits and then encoded in little-endian format. """ - bits = STRUCT_INT.unpack(STRUCT_FLOAT.pack(datum))[0] - self.WriteByte((bits) & 0xFF) - self.WriteByte((bits >> 8) & 0xFF) - self.WriteByte((bits >> 16) & 0xFF) - self.WriteByte((bits >> 24) & 0xFF) + self.write(STRUCT_FLOAT.pack(datum)) def write_double(self, datum): """ @@ -366,15 +349,7 @@ def write_double(self, datum): The double is converted into a 64-bit integer using a method equivalent to Java's doubleToLongBits and then encoded in little-endian format. """ - bits = STRUCT_LONG.unpack(STRUCT_DOUBLE.pack(datum))[0] - self.WriteByte((bits) & 0xFF) - self.WriteByte((bits >> 8) & 0xFF) - self.WriteByte((bits >> 16) & 0xFF) - self.WriteByte((bits >> 24) & 0xFF) - self.WriteByte((bits >> 32) & 0xFF) - self.WriteByte((bits >> 40) & 0xFF) - self.WriteByte((bits >> 48) & 0xFF) - self.WriteByte((bits >> 56) & 0xFF) + self.write(STRUCT_DOUBLE.pack(datum)) def write_bytes(self, datum): """