Skip to content

Commit

Permalink
[API-703] v5 SQL: Add default serializers for the new types (#459)
Browse files Browse the repository at this point in the history
* v5 SQL: Add default serializers for the new types

This PR adds support for the new default types, namely

- LocalDate -> datetime.date
- LocalTime -> datetime.time
- OffsetDateTime -> datetime.datetime
- LocalDateTime (only deserializing it to datetime.datetime)
- BigDecimal -> decimal.Decimal

It also fixes the issues on reading/writing big integer objects.

Also, this PR updates the serialization tests with the new types
(+ sum types that are not added before to tests such as aggregators,
projections, ...)

* escape map names

* use updated names of the skip functions

* add comment explaining the width calculation

* text improvements
  • Loading branch information
mdumandag committed Aug 31, 2021
1 parent d613fc8 commit 81f202d
Show file tree
Hide file tree
Showing 12 changed files with 487 additions and 111 deletions.
36 changes: 21 additions & 15 deletions docs/serialization.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,27 @@ chapter.

Hazelcast serializes all your objects before sending them to the server.
The ``bool``, ``int``, ``long`` (for Python 2), ``float``, ``str``,
``unicode`` (for Python 2) and ``bytearray`` types are serialized
natively and you cannot override this behavior. The following table is
the conversion of types for the Java server side.

========= ======================================
Python Java
========= ======================================
bool Boolean
int Byte, Short, Integer, Long, BigInteger
long Byte, Short, Integer, Long, BigInteger
float Float, Double
str String
unicode String
bytearray byte[]
========= ======================================
``unicode`` (for Python 2), ``bytearray``, ``list`` ``datetime.date``,
``datetime.time``, ``datetime.datetime``, and ``decimal.Decimal`` types are
serialized natively and you cannot override this behavior. The following
table is the conversion of types for the Java server side.

================= ================================================
Python Java
================= ================================================
bool Boolean
int Byte, Short, Integer, Long, java.math.BigInteger
long Byte, Short, Integer, Long, java.math.BigInteger
float Float, Double
str String
unicode String
bytearray byte[]
list java.util.ArrayList
datetime.date java.time.LocalDate
datetime.time java.time.LocalTime
datetime.datetime java.time.OffsetDateTime
decimal.Decimal java.math.BigDecimal
================= ================================================


.. Note:: A ``int`` or ``long`` type is serialized as ``Integer`` by
Expand Down
6 changes: 5 additions & 1 deletion hazelcast/serialization/serialization_const.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,15 @@
# DEFAULT SERIALIZERS

JAVA_DEFAULT_TYPE_CLASS = -24
JAVA_DEFAULT_TYPE_DATE = -25
JAVA_DEFAULT_TYPE_BIG_INTEGER = -26
JAVA_DEFAULT_TYPE_BIG_DECIMAL = -27
JAVA_DEFAULT_TYPE_ARRAY = -28
JAVA_DEFAULT_TYPE_ARRAY_LIST = -29
JAVA_DEFAULT_TYPE_LINKED_LIST = -30
JAVA_DEFAULT_TYPE_LOCAL_DATE = -51
JAVA_DEFAULT_TYPE_LOCAL_TIME = -52
JAVA_DEFAULT_TYPE_LOCAL_DATE_TIME = -53
JAVA_DEFAULT_TYPE_OFFSET_DATE_TIME = -54
JAVASCRIPT_JSON_SERIALIZATION_TYPE = -130

# ------------------------------------------------------------
Expand Down
151 changes: 116 additions & 35 deletions hazelcast/serialization/serializer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import binascii
import time
from datetime import datetime
import datetime
import decimal

from hazelcast import six
from hazelcast.core import HazelcastJsonValue
Expand All @@ -9,7 +8,7 @@
from hazelcast.serialization.base import HazelcastSerializationError
from hazelcast.serialization.serialization_const import *
from hazelcast.six.moves import range, cPickle
from hazelcast.util import UUIDUtil
from hazelcast.util import UUIDUtil, int_from_bytes, int_to_bytes, timezone

if not six.PY2:
long = int
Expand Down Expand Up @@ -248,51 +247,42 @@ def get_type_id(self):


# EXTENSIONS
class DateTimeSerializer(BaseSerializer):
class BigIntegerSerializer(BaseSerializer):
def read(self, inp):
long_time = inp.read_long()
return datetime.fromtimestamp(long_time / 1000.0)
length = inp.read_int()
result = bytearray(length)
inp.read_into(result, 0, length)
return int_from_bytes(result)

def write(self, out, obj):
long_time = long(time.mktime(obj.timetuple())) * 1000
out.write_long(long_time)
out.write_byte_array(int_to_bytes(obj))

def get_type_id(self):
return JAVA_DEFAULT_TYPE_DATE
return JAVA_DEFAULT_TYPE_BIG_INTEGER


class BigIntegerSerializer(BaseSerializer):
class BigDecimalSerializer(BaseSerializer):
def read(self, inp):
length = inp.read_int()
if length == NULL_ARRAY_LENGTH:
return None
result = bytearray(length)
if length > 0:
inp.read_into(result, 0, length)
if result[0] & 0x80:
neg = bytearray()
for c in result:
neg.append(c ^ 0xFF)
return -1 * int(binascii.hexlify(neg), 16) - 1
return int(binascii.hexlify(result), 16)
inp.read_into(result, 0, length)
unscaled_value = int_from_bytes(result)
scale = inp.read_int()
sign = 0 if unscaled_value >= 0 else 1
return decimal.Decimal(
(sign, tuple(int(digit) for digit in str(abs(unscaled_value))), -1 * scale)
)

def write(self, out, obj):
the_big_int = -obj - 1 if obj < 0 else obj
end_index = -1 if (type(obj) == long and six.PY2) else None
hex_str = hex(the_big_int)[2:end_index]
if len(hex_str) % 2 == 1:
prefix = "0" # "f" if obj < 0 else "0"
hex_str = prefix + hex_str
num_array = bytearray(binascii.unhexlify(bytearray(hex_str, encoding="utf-8")))
if obj < 0:
neg = bytearray()
for c in num_array:
neg.append(c ^ 0xFF)
num_array = neg
out.write_byte_array(num_array)
sign, digits, exponent = obj.as_tuple()
unscaled_value = long("".join([str(digit) for digit in digits]))
if sign == 1:
unscaled_value = -1 * unscaled_value
out.write_byte_array(int_to_bytes(unscaled_value))
out.write_int(-1 * exponent)

def get_type_id(self):
return JAVA_DEFAULT_TYPE_BIG_INTEGER
return JAVA_DEFAULT_TYPE_BIG_DECIMAL


class JavaClassSerializer(BaseSerializer):
Expand Down Expand Up @@ -346,6 +336,97 @@ def get_type_id(self):
return JAVA_DEFAULT_TYPE_LINKED_LIST


class LocalDateSerializer(BaseSerializer):
def read(self, inp):
return datetime.date(
inp.read_int(),
inp.read_byte(),
inp.read_byte(),
)

def write(self, out, obj):
out.write_int(obj.year)
out.write_byte(obj.month)
out.write_byte(obj.day)

def get_type_id(self):
return JAVA_DEFAULT_TYPE_LOCAL_DATE


class LocalTimeSerializer(BaseSerializer):
def read(self, inp):
return datetime.time(
inp.read_byte(),
inp.read_byte(),
inp.read_byte(),
inp.read_int() // 1000, # server sends nanoseconds
)

def write(self, out, obj):
out.write_byte(obj.hour)
out.write_byte(obj.minute)
out.write_byte(obj.second)
out.write_int(obj.microsecond * 1000) # server expects nanoseconds

def get_type_id(self):
return JAVA_DEFAULT_TYPE_LOCAL_TIME


class LocalDateTimeSerializer(BaseSerializer):
def read(self, inp):
return datetime.datetime(
inp.read_int(),
inp.read_byte(),
inp.read_byte(),
inp.read_byte(),
inp.read_byte(),
inp.read_byte(),
inp.read_int() // 1000, # server sends nanoseconds
)

# "write(self, out, obj)" is never called so not implemented here

def get_type_id(self):
return JAVA_DEFAULT_TYPE_LOCAL_DATE_TIME


class OffsetDateTimeSerializer(BaseSerializer):
def read(self, inp):
return datetime.datetime(
inp.read_int(),
inp.read_byte(),
inp.read_byte(),
inp.read_byte(),
inp.read_byte(),
inp.read_byte(),
inp.read_int() // 1000, # server sends nanoseconds
timezone(datetime.timedelta(seconds=inp.read_int())),
)

def write(self, out, obj):
out.write_int(obj.year)
out.write_byte(obj.month)
out.write_byte(obj.day)
out.write_byte(obj.hour)
out.write_byte(obj.minute)
out.write_byte(obj.second)
out.write_int(obj.microsecond * 1000) # server expects nanoseconds

timezone_info = obj.tzinfo
if not timezone_info:
out.write_int(0)
return

utc_offset = timezone_info.utcoffset(None)
if utc_offset:
out.write_int(int(utc_offset.total_seconds()))
else:
out.write_int(0)

def get_type_id(self):
return JAVA_DEFAULT_TYPE_OFFSET_DATE_TIME


class PythonObjectSerializer(BaseSerializer):
def read(self, inp):
str = inp.read_string().encode()
Expand Down
8 changes: 7 additions & 1 deletion hazelcast/serialization/service.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import datetime
import decimal
import uuid

from hazelcast import six
Expand Down Expand Up @@ -101,12 +103,16 @@ def _register_constant_serializers(self):
self._registry.register_constant_serializer(DoubleArraySerializer())
self._registry.register_constant_serializer(StringArraySerializer())
# EXTENSIONS
self._registry.register_constant_serializer(DateTimeSerializer(), datetime)
self._registry.register_constant_serializer(BigIntegerSerializer())
self._registry.register_constant_serializer(BigDecimalSerializer(), decimal.Decimal)
self._registry.register_constant_serializer(JavaClassSerializer())
self._registry.register_constant_serializer(ArraySerializer())
self._registry.register_constant_serializer(ArrayListSerializer(), list)
self._registry.register_constant_serializer(LinkedListSerializer())
self._registry.register_constant_serializer(LocalDateSerializer(), datetime.date)
self._registry.register_constant_serializer(LocalTimeSerializer(), datetime.time)
self._registry.register_constant_serializer(LocalDateTimeSerializer())
self._registry.register_constant_serializer(OffsetDateTimeSerializer(), datetime.datetime)
self._registry.register_constant_serializer(
HazelcastJsonValueSerializer(), HazelcastJsonValue
)
Expand Down
55 changes: 47 additions & 8 deletions hazelcast/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,20 +359,50 @@ def from_bits(most_significant_bits, least_significant_bits):

if hasattr(int, "from_bytes"):

def int_from_bytes(buffer):
return int.from_bytes(buffer, "big", signed=True)
def int_from_bytes(buf):
return int.from_bytes(buf, "big", signed=True)


else:
# Compatibility with Python 2
def int_from_bytes(buffer):
buffer = bytearray(buffer)
if buffer[0] & 0x80:
def int_from_bytes(buf):
buf = bytearray(buf)
if buf[0] & 0x80:
neg = bytearray()
for c in buffer:
neg.append(~c)
for c in buf:
neg.append(c ^ 0xFF)
return -1 * int(binascii.hexlify(neg), 16) - 1
return int(binascii.hexlify(buffer), 16)
return int(binascii.hexlify(buf), 16)


if hasattr(int, "to_bytes"):

def int_to_bytes(number):
# Number of bytes to represent the number.
# For numbers that don't have exactly 8n bit_length,
# adding 8 and performing integer division with 8
# let us get the correct length because
# (8n + m + 8) // 8 = n + 0 + 1 (assuming m < 8).
# For negative numbers, we add 1 to get rid of the
# effects of the leading 1 (the sign bit).
width = (8 + (number + (number < 0)).bit_length()) // 8
return number.to_bytes(length=width, byteorder="big", signed=True)


else:
# Compatibility with Python 2
def int_to_bytes(number):
is_neg = number < 0
number = -number - 1 if is_neg else number
# Number of bytes to represent the number * 2, so that
# each byte is represented with 2 digit hex numbers.
width = ((8 + number.bit_length()) // 8) * 2
fmt = "%%0%dx" % width
buf = bytearray(binascii.unhexlify(fmt % number))
if is_neg:
for i in range(len(buf)):
buf[i] = buf[i] ^ 0xFF
return buf


try:
Expand All @@ -398,6 +428,15 @@ def tzname(self, dt):
def dst(self, dt):
return timedelta(0)

def __eq__(self, other):
return isinstance(other, FixedOffsetTimezone) and self._offset == other._offset

def __ne__(self, other):
return not self.__eq__(other)

def __hash__(self):
return hash(self._offset)

timezone = FixedOffsetTimezone


Expand Down

0 comments on commit 81f202d

Please sign in to comment.