From d591d46548a9f57685ee423e78ed0d7071c140d6 Mon Sep 17 00:00:00 2001 From: abrar sheikh Date: Thu, 28 Apr 2016 17:04:22 -0700 Subject: [PATCH 1/3] date logical type --- lang/py/src/avro/io.py | 37 +++++++++++++++++++++++++++++++++---- lang/py/src/avro/schema.py | 33 +++++++++++++++++++++++++++++++++ lang/py/test/test_io.py | 2 ++ lang/py/test/test_schema.py | 10 ++++++++++ 4 files changed, 78 insertions(+), 4 deletions(-) diff --git a/lang/py/src/avro/io.py b/lang/py/src/avro/io.py index b2fd2f9ba27..2bb8fc8b4b9 100644 --- a/lang/py/src/avro/io.py +++ b/lang/py/src/avro/io.py @@ -45,6 +45,7 @@ import json except ImportError: import simplejson as json +import datetime # # Constants @@ -110,8 +111,12 @@ def validate(expected_schema, datum): elif schema_type == 'bytes': return isinstance(datum, str) elif schema_type == 'int': - return ((isinstance(datum, int) or isinstance(datum, long)) - and INT_MIN_VALUE <= datum <= INT_MAX_VALUE) + if (hasattr(expected_schema, 'logical_type') and + expected_schema.logical_type == 'date'): + return isinstance(datum, datetime.date) + else: + return ((isinstance(datum, int) or isinstance(datum, long)) + and INT_MIN_VALUE <= datum <= INT_MAX_VALUE) elif schema_type == 'long': return ((isinstance(datum, int) or isinstance(datum, long)) and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE) @@ -232,6 +237,15 @@ def read_utf8(self): """ return unicode(self.read_bytes(), "utf-8") + def read_date_from_int(self): + """ + Decode python date object from int + int stores the number of days from + the unix epoch, 1 January 1970 (ISO calendar). + """ + days_since_epoc = self.read_int() + return datetime.date(1970, 1, 1) + datetime.timedelta(days_since_epoc) + def check_crc32(self, bytes): checksum = STRUCT_CRC32.unpack(self.read(4))[0]; if crc32(bytes) & 0xffffffff != checksum: @@ -362,6 +376,15 @@ def write_crc32(self, bytes): """ self.write(STRUCT_CRC32.pack(crc32(bytes) & 0xffffffff)); + def write_date_int(self, datum): + """ + Encode python date object as int + int stores the number of days from + the unix epoch, 1 January 1970 (ISO calendar). + """ + delta_date = datum - datetime.date(1970, 1, 1) + self.write_int(delta_date.days) + # # DatumReader/Writer # @@ -467,7 +490,10 @@ def read_data(self, writers_schema, readers_schema, decoder): elif writers_schema.type == 'string': return decoder.read_utf8() elif writers_schema.type == 'int': - return decoder.read_int() + if hasattr(writers_schema, 'logical_type') and writers_schema.logical_type == 'date': + return decoder.read_date_from_int() + else: + return decoder.read_int() elif writers_schema.type == 'long': return decoder.read_long() elif writers_schema.type == 'float': @@ -779,7 +805,10 @@ def write_data(self, writers_schema, datum, encoder): elif writers_schema.type == 'string': encoder.write_utf8(datum) elif writers_schema.type == 'int': - encoder.write_int(datum) + if hasattr(writers_schema, 'logical_type') and writers_schema.logical_type == 'date': + encoder.write_date_int(datum) + else: + encoder.write_int(datum) elif writers_schema.type == 'long': encoder.write_long(datum) elif writers_schema.type == 'float': diff --git a/lang/py/src/avro/schema.py b/lang/py/src/avro/schema.py index 6a7fbbb8c8b..f16ca821062 100644 --- a/lang/py/src/avro/schema.py +++ b/lang/py/src/avro/schema.py @@ -312,6 +312,18 @@ def name_ref(self, names): namespace = property(lambda self: self.get_prop('namespace')) fullname = property(lambda self: self._fullname) +# Logical type class +class LogicalSchema(Schema): + def __init__(self, type, logical_type, other_props=None): + self.logical_type = logical_type + + # Call parent ctor + Schema.__init__(self, type, other_props) + self.set_prop('logicalType', logical_type) + + def validate(self, validation_props=None): + raise NotImplementedError() + class Field(object): def __init__(self, type, name, has_default, default=None, order=None,names=None, doc=None, other_props=None): @@ -404,6 +416,20 @@ def to_json(self, names=None): def __eq__(self, that): return self.props == that.props +# +# Date Type +# + +class DateSchema(LogicalSchema): + def __init__(self, other_props=None): + LogicalSchema.__init__(self, 'int', 'date', other_props=other_props) + + def to_json(self): + return self.props + + def __eq__(self, that): + return self.props == that.props + # # Complex Types (non-recursive) # @@ -722,7 +748,14 @@ def make_avsc_object(json_data, names=None): if hasattr(json_data, 'get') and callable(json_data.get): type = json_data.get('type') other_props = get_other_props(json_data, SCHEMA_RESERVED_PROPS) + logical_type = None + if 'logicalType' in json_data: + logical_type = json_data.get('logicalType') + if logical_type not in ['date']: + raise SchemaParseException("Currently does not support %s logical type" % logical_type) if type in PRIMITIVE_TYPES: + if type == 'int' and logical_type == 'date': + return DateSchema(other_props) return PrimitiveSchema(type, other_props) elif type in NAMED_TYPES: name = json_data.get('name') diff --git a/lang/py/test/test_io.py b/lang/py/test/test_io.py index 1e79d3e8931..db0667065b6 100644 --- a/lang/py/test/test_io.py +++ b/lang/py/test/test_io.py @@ -19,6 +19,7 @@ except ImportError: from StringIO import StringIO from binascii import hexlify +import datetime import set_avro_test_path @@ -35,6 +36,7 @@ ('"float"', 1234.0), ('"double"', 1234.0), ('{"type": "fixed", "name": "Test", "size": 1}', 'B'), + ('{"type": "int", "logicalType": "date"}', datetime.date(2000, 1, 1)), ('{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', 'B'), ('{"type": "array", "items": "long"}', [1, 3, 2]), ('{"type": "map", "values": "long"}', {'a': 1, 'b': 3, 'c': 2}), diff --git a/lang/py/test/test_schema.py b/lang/py/test/test_schema.py index 00e2a05de81..59a5a2b4b27 100644 --- a/lang/py/test/test_schema.py +++ b/lang/py/test/test_schema.py @@ -295,6 +295,15 @@ def make_primitive_examples(): """, True) ] +DATE_LOGICAL_TYPE = [ + ExampleSchema("""{ + "type": "int", + "logicalType": "date"} """, True), + ExampleSchema("""{ + "type": "int", + "logicalType": "date1"} """, False), +] + EXAMPLES = PRIMITIVE_EXAMPLES EXAMPLES += FIXED_EXAMPLES EXAMPLES += ENUM_EXAMPLES @@ -303,6 +312,7 @@ def make_primitive_examples(): EXAMPLES += UNION_EXAMPLES EXAMPLES += RECORD_EXAMPLES EXAMPLES += DOC_EXAMPLES +EXAMPLES += DATE_LOGICAL_TYPE VALID_EXAMPLES = [e for e in EXAMPLES if e.valid] From 01686e90cb70d4fb2308f179b1fc299c4dbf35df Mon Sep 17 00:00:00 2001 From: abrar sheikh Date: Thu, 28 Apr 2016 18:22:20 -0700 Subject: [PATCH 2/3] time-millis logical type --- lang/py/src/avro/io.py | 61 ++++++++++++++++++++++++++++++++----- lang/py/src/avro/schema.py | 18 ++++++++++- lang/py/test/test_io.py | 1 + lang/py/test/test_schema.py | 9 ++++++ 4 files changed, 81 insertions(+), 8 deletions(-) diff --git a/lang/py/src/avro/io.py b/lang/py/src/avro/io.py index 2bb8fc8b4b9..98620086e40 100644 --- a/lang/py/src/avro/io.py +++ b/lang/py/src/avro/io.py @@ -111,9 +111,13 @@ def validate(expected_schema, datum): elif schema_type == 'bytes': return isinstance(datum, str) elif schema_type == 'int': - if (hasattr(expected_schema, 'logical_type') and - expected_schema.logical_type == 'date'): - return isinstance(datum, datetime.date) + if hasattr(expected_schema, 'logical_type'): + if expected_schema.logical_type == 'date': + return isinstance(datum, datetime.date) + elif expected_schema.logical_type == 'time-millis': + return isinstance(datum, datetime.time) + else: + return False else: return ((isinstance(datum, int) or isinstance(datum, long)) and INT_MIN_VALUE <= datum <= INT_MAX_VALUE) @@ -246,6 +250,29 @@ def read_date_from_int(self): days_since_epoc = self.read_int() return datetime.date(1970, 1, 1) + datetime.timedelta(days_since_epoc) + def read_time_millis_from_int(self): + """ + Decode python time object from int + int stores the number of milliseconds after midnight, 00:00:00.000 + """ + milisec = self.read_int() + hrs = int(milisec / 3600000) + milisec = milisec % 3600000 + + minutes = int(milisec / 60000) + milisec = milisec % 60000 + + seconds = int(milisec / 1000) + milisec = milisec % 1000 + + microseconds = milisec * 1000 + return datetime.time( + hour=hrs, + minute=minutes, + second=seconds, + microsecond=microseconds + ) + def check_crc32(self, bytes): checksum = STRUCT_CRC32.unpack(self.read(4))[0]; if crc32(bytes) & 0xffffffff != checksum: @@ -385,6 +412,14 @@ def write_date_int(self, datum): delta_date = datum - datetime.date(1970, 1, 1) self.write_int(delta_date.days) + def write_time_millis_int(self, datum): + """ + Encode python time object as int + int stores the number of days from midnight, 00:00:00.000 + """ + delta_time = datum.hour*3600000 + datum.minute * 60000 + datum.second * 1000 + datum.microsecond / 1000 + self.write_int(int(delta_time)) + # # DatumReader/Writer # @@ -490,8 +525,14 @@ def read_data(self, writers_schema, readers_schema, decoder): elif writers_schema.type == 'string': return decoder.read_utf8() elif writers_schema.type == 'int': - if hasattr(writers_schema, 'logical_type') and writers_schema.logical_type == 'date': - return decoder.read_date_from_int() + if hasattr(writers_schema, 'logical_type'): + if writers_schema.logical_type == 'date': + return decoder.read_date_from_int() + elif writers_schema.logical_type == 'time-millis': + return decoder.read_time_millis_from_int() + else: + fail_msg = "Cannot read unknown schema type: %s logicalType %s" % (writers_schema.type, writers_schema.logical_type) + raise schema.AvroException(fail_msg) else: return decoder.read_int() elif writers_schema.type == 'long': @@ -805,8 +846,14 @@ def write_data(self, writers_schema, datum, encoder): elif writers_schema.type == 'string': encoder.write_utf8(datum) elif writers_schema.type == 'int': - if hasattr(writers_schema, 'logical_type') and writers_schema.logical_type == 'date': - encoder.write_date_int(datum) + if hasattr(writers_schema, 'logical_type'): + if writers_schema.logical_type == 'date': + encoder.write_date_int(datum) + elif writers_schema.logical_type == 'time-millis': + encoder.write_time_millis_int(datum) + else: + fail_msg = 'Unknown type: %s, logicalType %s' % (writers_schema.type, writers_schema.logical_type) + raise schema.AvroException(fail_msg) else: encoder.write_int(datum) elif writers_schema.type == 'long': diff --git a/lang/py/src/avro/schema.py b/lang/py/src/avro/schema.py index f16ca821062..45c1f962019 100644 --- a/lang/py/src/avro/schema.py +++ b/lang/py/src/avro/schema.py @@ -430,6 +430,20 @@ def to_json(self): def __eq__(self, that): return self.props == that.props +# +# time-millis Type +# + +class TimeMillisSchema(LogicalSchema): + def __init__(self, other_props=None): + LogicalSchema.__init__(self, 'int', 'time-millis', other_props=other_props) + + def to_json(self): + return self.props + + def __eq__(self, that): + return self.props == that.props + # # Complex Types (non-recursive) # @@ -751,11 +765,13 @@ def make_avsc_object(json_data, names=None): logical_type = None if 'logicalType' in json_data: logical_type = json_data.get('logicalType') - if logical_type not in ['date']: + if logical_type not in ['date', 'time-millis']: raise SchemaParseException("Currently does not support %s logical type" % logical_type) if type in PRIMITIVE_TYPES: if type == 'int' and logical_type == 'date': return DateSchema(other_props) + elif type == 'int' and logical_type == 'time-millis': + return TimeMillisSchema(other_props) return PrimitiveSchema(type, other_props) elif type in NAMED_TYPES: name = json_data.get('name') diff --git a/lang/py/test/test_io.py b/lang/py/test/test_io.py index db0667065b6..b4ed503c757 100644 --- a/lang/py/test/test_io.py +++ b/lang/py/test/test_io.py @@ -37,6 +37,7 @@ ('"double"', 1234.0), ('{"type": "fixed", "name": "Test", "size": 1}', 'B'), ('{"type": "int", "logicalType": "date"}', datetime.date(2000, 1, 1)), + ('{"type": "int", "logicalType": "time-millis"}', datetime.time(2, 2, 1, 4000)), ('{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', 'B'), ('{"type": "array", "items": "long"}', [1, 3, 2]), ('{"type": "map", "values": "long"}', {'a': 1, 'b': 3, 'c': 2}), diff --git a/lang/py/test/test_schema.py b/lang/py/test/test_schema.py index 59a5a2b4b27..110ba3e0fae 100644 --- a/lang/py/test/test_schema.py +++ b/lang/py/test/test_schema.py @@ -304,6 +304,15 @@ def make_primitive_examples(): "logicalType": "date1"} """, False), ] +TIMEMILLIS_LOGICAL_TYPE = [ + ExampleSchema("""{ + "type": "int", + "logicalType": "time-millis"} """, True), + ExampleSchema("""{ + "type": "int", + "logicalType": "time-milis"} """, False), +] + EXAMPLES = PRIMITIVE_EXAMPLES EXAMPLES += FIXED_EXAMPLES EXAMPLES += ENUM_EXAMPLES From 9407b60e03002e3e04ac31a81d54821377e051e3 Mon Sep 17 00:00:00 2001 From: abrar sheikh Date: Thu, 28 Apr 2016 22:36:00 -0700 Subject: [PATCH 3/3] timestamp, time logical type support --- lang/py/src/avro/io.py | 105 ++++++++++++++++++++++++++++++-- lang/py/src/avro/schema.py | 117 +++++++++++++++++++++++++++++------- lang/py/test/test_io.py | 9 ++- lang/py/test/test_schema.py | 82 +++++++++++++++++++++++++ 4 files changed, 286 insertions(+), 27 deletions(-) diff --git a/lang/py/src/avro/io.py b/lang/py/src/avro/io.py index 98620086e40..75845386999 100644 --- a/lang/py/src/avro/io.py +++ b/lang/py/src/avro/io.py @@ -122,8 +122,16 @@ def validate(expected_schema, datum): return ((isinstance(datum, int) or isinstance(datum, long)) and INT_MIN_VALUE <= datum <= INT_MAX_VALUE) elif schema_type == 'long': - return ((isinstance(datum, int) or isinstance(datum, long)) - and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE) + if hasattr(expected_schema, 'logical_type'): + if expected_schema.logical_type == 'time-micros': + return isinstance(datum, datetime.time) + elif expected_schema.logical_type in ['timestamp-millis', 'timestamp-micros']: + return isinstance(datum, datetime.datetime) + else: + return False + else: + return ((isinstance(datum, int) or isinstance(datum, long)) + and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE) elif schema_type in ['float', 'double']: return (isinstance(datum, int) or isinstance(datum, long) or isinstance(datum, float)) @@ -273,6 +281,47 @@ def read_time_millis_from_int(self): microsecond=microseconds ) + def read_time_micros_from_long(self): + """ + Decode python time object from long + long stores the number of microseconds after midnight, 00:00:00.000000 + """ + microsec = self.read_long() + hrs = long(microsec / 3600000000) + microsec = microsec % 3600000000 + + minutes = long(microsec / 60000000) + microsec = microsec % 60000000 + + seconds = long(microsec / 1000000) + microsec = microsec % 1000000 + + microseconds = microsec + return datetime.time( + hour=hrs, + minute=minutes, + second=seconds, + microsecond=microseconds + ) + + def read_ts_millis_from_long(self): + """ + Decode python datetime object from long + long stores the number of milliseconds from + the unix epoch, 1 January 1970 00:00:00.000 UTC. + """ + ts = self.read_long() + return datetime.datetime.utcfromtimestamp(ts / 1000.0) + + def read_ts_micros_from_long(self): + """ + Decode python datetime object from long + long stores the number of milliseconds from + the unix epoch, 1 January 1970 00:00:00.000 UTC. + """ + ts = self.read_long() + return datetime.datetime.utcfromtimestamp(ts / 1000000.0) + def check_crc32(self, bytes): checksum = STRUCT_CRC32.unpack(self.read(4))[0]; if crc32(bytes) & 0xffffffff != checksum: @@ -415,11 +464,35 @@ def write_date_int(self, datum): def write_time_millis_int(self, datum): """ Encode python time object as int - int stores the number of days from midnight, 00:00:00.000 + int stores the number of milliseconds from midnight, 00:00:00.000 """ delta_time = datum.hour*3600000 + datum.minute * 60000 + datum.second * 1000 + datum.microsecond / 1000 self.write_int(int(delta_time)) + def write_time_micros_long(self, datum): + """ + Encode python time object as long + long stores the number of microseconds from midnight, 00:00:00.000000 + """ + delta_time = datum.hour*3600000000 + datum.minute * 60000000 + datum.second * 1000000 + datum.microsecond + self.write_long(long(delta_time)) + + def write_ts_millis_long(self, datum): + """ + Encode python time object as int + int stores the number of milliseconds from midnight, 00:00:00.000 + """ + delta_time = (datum - datetime.datetime(1970, 1, 1, 0, 0, 0, 0)).total_seconds() * 1000; + self.write_long(long(delta_time)) + + def write_ts_micros_long(self, datum): + """ + Encode python time object as int + int stores the number of milliseconds from midnight, 00:00:00.000 + """ + delta_time = (datum - datetime.datetime(1970, 1, 1, 0, 0, 0, 0)).total_seconds() * 1000000; + self.write_long(long(delta_time)) + # # DatumReader/Writer # @@ -536,7 +609,18 @@ def read_data(self, writers_schema, readers_schema, decoder): else: return decoder.read_int() elif writers_schema.type == 'long': - return decoder.read_long() + if hasattr(writers_schema, 'logical_type'): + if writers_schema.logical_type == 'time-micros': + return decoder.read_time_micros_from_long() + elif writers_schema.logical_type == 'timestamp-millis': + return decoder.read_ts_millis_from_long() + elif writers_schema.logical_type == 'timestamp-micros': + return decoder.read_ts_micros_from_long() + else: + fail_msg = "Cannot read unknown schema type: %s logicalType %s" % (writers_schema.type, writers_schema.logical_type) + raise schema.AvroException(fail_msg) + else: + return decoder.read_long() elif writers_schema.type == 'float': return decoder.read_float() elif writers_schema.type == 'double': @@ -857,7 +941,18 @@ def write_data(self, writers_schema, datum, encoder): else: encoder.write_int(datum) elif writers_schema.type == 'long': - encoder.write_long(datum) + if hasattr(writers_schema, 'logical_type'): + if writers_schema.logical_type == 'time-micros': + encoder.write_time_micros_long(datum) + elif writers_schema.logical_type == 'timestamp-millis': + encoder.write_ts_millis_long(datum) + elif writers_schema.logical_type == 'timestamp-micros': + encoder.write_ts_micros_long(datum) + else: + fail_msg = 'Unknown type: %s, logicalType %s' % (writers_schema.type, writers_schema.logical_type) + raise schema.AvroException(fail_msg) + else: + encoder.write_long(datum) elif writers_schema.type == 'float': encoder.write_float(datum) elif writers_schema.type == 'double': diff --git a/lang/py/src/avro/schema.py b/lang/py/src/avro/schema.py index 45c1f962019..ef66debbacd 100644 --- a/lang/py/src/avro/schema.py +++ b/lang/py/src/avro/schema.py @@ -312,18 +312,6 @@ def name_ref(self, names): namespace = property(lambda self: self.get_prop('namespace')) fullname = property(lambda self: self._fullname) -# Logical type class -class LogicalSchema(Schema): - def __init__(self, type, logical_type, other_props=None): - self.logical_type = logical_type - - # Call parent ctor - Schema.__init__(self, type, other_props) - self.set_prop('logicalType', logical_type) - - def validate(self, validation_props=None): - raise NotImplementedError() - class Field(object): def __init__(self, type, name, has_default, default=None, order=None,names=None, doc=None, other_props=None): @@ -416,6 +404,18 @@ def to_json(self, names=None): def __eq__(self, that): return self.props == that.props +# Logical type class +class LogicalSchema(Schema): + def __init__(self, type, logical_type, other_props=None): + self.logical_type = logical_type + + # Call parent ctor + Schema.__init__(self, type, other_props) + self.set_prop('logicalType', logical_type) + + def validate(self, validation_props=None): + raise NotImplementedError() + # # Date Type # @@ -430,13 +430,11 @@ def to_json(self): def __eq__(self, that): return self.props == that.props -# -# time-millis Type -# - -class TimeMillisSchema(LogicalSchema): - def __init__(self, other_props=None): - LogicalSchema.__init__(self, 'int', 'time-millis', other_props=other_props) +# super class for time related logical types +class _TimeSchema(LogicalSchema): + def __init__(self, type, logical_type, fsp, other_props=None): + LogicalSchema.__init__(self, type, logical_type, other_props) + self.set_prop('fsp', fsp) def to_json(self): return self.props @@ -444,6 +442,65 @@ def to_json(self): def __eq__(self, that): return self.props == that.props + def validate(self, validation_props=None): + if not isinstance(self.get_prop('fsp'), int): + raise SchemaParseException('Fsp (Integer) is required for logical type %s.' % self.get_prop('logicalType')) + + if self.get_prop('fsp') > validation_props.get('max_fsp'): + raise SchemaParseException('logicalType %s supports only fsp <= %d' % (self.get_prop('logicalType'), self._max_fsp())) + +# +# time-millis Type +# + +class TimeMillisSchema(_TimeSchema): + def __init__(self, fsp, other_props=None): + _TimeSchema.__init__(self, 'int', 'time-millis', fsp, other_props) + self.validate({'max_fsp': TimeMillisSchema.max_fsp()}) + + @staticmethod + def max_fsp(): + return 3 + +# +# time-micros Type +# + +class TimeMicrosSchema(_TimeSchema): + def __init__(self, fsp, other_props=None): + _TimeSchema.__init__(self, 'long', 'time-micros', fsp, other_props) + self.validate({'max_fsp': TimeMicrosSchema.max_fsp()}) + + @staticmethod + def max_fsp(): + return 6 + +# +# timestamp-millis Type +# + +class TimestampMillisSchema(_TimeSchema): + def __init__(self, fsp, other_props=None): + _TimeSchema.__init__(self, 'long', 'timestamp-millis', fsp, other_props) + self.validate({'max_fsp': TimestampMillisSchema.max_fsp()}) + + @staticmethod + def max_fsp(): + return 3 + +# +# timestamp-micros Type +# + +class TimestampMicrosSchema(_TimeSchema): + def __init__(self, fsp, other_props=None): + _TimeSchema.__init__(self, 'long', 'timestamp-micros', fsp, other_props) + self.validate({'max_fsp': TimestampMicrosSchema.max_fsp()}) + + @staticmethod + def max_fsp(): + return 6 + # # Complex Types (non-recursive) # @@ -765,13 +822,31 @@ def make_avsc_object(json_data, names=None): logical_type = None if 'logicalType' in json_data: logical_type = json_data.get('logicalType') - if logical_type not in ['date', 'time-millis']: + if logical_type not in ['date', 'time-millis', 'time-micros', 'timestamp-millis', 'timestamp-micros']: raise SchemaParseException("Currently does not support %s logical type" % logical_type) if type in PRIMITIVE_TYPES: if type == 'int' and logical_type == 'date': return DateSchema(other_props) elif type == 'int' and logical_type == 'time-millis': - return TimeMillisSchema(other_props) + return TimeMillisSchema( + fsp=json_data.get('fsp', 3), + other_props=other_props + ) + elif type == 'long' and logical_type == 'time-micros': + return TimeMicrosSchema( + fsp=json_data.get('fsp', 6), + other_props=other_props + ) + elif type == 'long' and logical_type == 'timestamp-millis': + return TimestampMillisSchema( + fsp=json_data.get('fsp', 3), + other_props=other_props + ) + elif type == 'long' and logical_type == 'timestamp-micros': + return TimestampMicrosSchema( + fsp=json_data.get('fsp', 6), + other_props=other_props + ) return PrimitiveSchema(type, other_props) elif type in NAMED_TYPES: name = json_data.get('name') diff --git a/lang/py/test/test_io.py b/lang/py/test/test_io.py index b4ed503c757..975b4baee4c 100644 --- a/lang/py/test/test_io.py +++ b/lang/py/test/test_io.py @@ -37,7 +37,14 @@ ('"double"', 1234.0), ('{"type": "fixed", "name": "Test", "size": 1}', 'B'), ('{"type": "int", "logicalType": "date"}', datetime.date(2000, 1, 1)), - ('{"type": "int", "logicalType": "time-millis"}', datetime.time(2, 2, 1, 4000)), + ('{"type": "int", "logicalType": "time-millis"}', datetime.time(2, 2, 1, 123000)), + ('{"type": "int", "logicalType": "time-millis", "fsp" : 2}', datetime.time(2, 2, 1, 120000)), + ('{"type": "long", "logicalType": "time-micros"}', datetime.time(2, 2, 1, 123456)), + ('{"type": "long", "logicalType": "time-micros", "fsp" : 5}', datetime.time(2, 2, 1, 123450)), + ('{"type": "long", "logicalType": "timestamp-millis"}', datetime.datetime(2000, 1, 18, 2, 2, 1, 123000)), + ('{"type": "long", "logicalType": "timestamp-millis", "fsp" : 1}', datetime.datetime(2000, 1, 18, 2, 2, 1, 100000)), + ('{"type": "long", "logicalType": "timestamp-micros"}', datetime.datetime(2000, 1, 18, 2, 2, 1, 123456)), + ('{"type": "long", "logicalType": "timestamp-micros", "fsp" : 4}', datetime.datetime(2000, 1, 18, 2, 2, 1, 123400)), ('{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', 'B'), ('{"type": "array", "items": "long"}', [1, 3, 2]), ('{"type": "map", "values": "long"}', {'a': 1, 'b': 3, 'c': 2}), diff --git a/lang/py/test/test_schema.py b/lang/py/test/test_schema.py index 110ba3e0fae..491050892b9 100644 --- a/lang/py/test/test_schema.py +++ b/lang/py/test/test_schema.py @@ -302,6 +302,9 @@ def make_primitive_examples(): ExampleSchema("""{ "type": "int", "logicalType": "date1"} """, False), + ExampleSchema("""{ + "type": "long", + "logicalType": "date"} """, False), ] TIMEMILLIS_LOGICAL_TYPE = [ @@ -310,7 +313,82 @@ def make_primitive_examples(): "logicalType": "time-millis"} """, True), ExampleSchema("""{ "type": "int", + "logicalType": "time-millis", + "fsp": 3} """, True), + ExampleSchema("""{ + "type": "int", + "logicalType": "time-millis", + "fsp": 4} """, False), + ExampleSchema("""{ + "type": "int", + "logicalType": "time-millis", + "fsp": 1} """, True), + ExampleSchema("""{ + "type": "int", "logicalType": "time-milis"} """, False), + ExampleSchema("""{ + "type": "long", + "logicalType": "time-millis"} """, False), +] + +TIMEMICROS_LOGICAL_TYPE = [ + ExampleSchema("""{ + "type": "long", + "logicalType": "time-micros"} """, True), + ExampleSchema("""{ + "type": "long", + "logicalType": "time-micros", + "fsp": 6} """, True), + ExampleSchema("""{ + "type": "long", + "logicalType": "time-micros", + "fsp": 7} """, False), + ExampleSchema("""{ + "type": "long", + "logicalType": "time-micro"} """, False), + ExampleSchema("""{ + "type": "int", + "logicalType": "time-micros"} """, False), +] + +TIMESTAMPMILLIS_LOGICAL_TYPE = [ + ExampleSchema("""{ + "type": "long", + "logicalType": "timestamp-millis"} """, True), + ExampleSchema("""{ + "type": "long", + "logicalType": "timestamp-millis", + "fsp": 2} """, True), + ExampleSchema("""{ + "type": "long", + "logicalType": "timestamp-millis", + "fsp": 4} """, False), + ExampleSchema("""{ + "type": "long", + "logicalType": "timestamp-milis"} """, False), + ExampleSchema("""{ + "type": "int", + "logicalType": "timestamp-millis"} """, False), +] + +TIMESTAMPMICROS_LOGICAL_TYPE = [ + ExampleSchema("""{ + "type": "long", + "logicalType": "timestamp-micros"} """, True), + ExampleSchema("""{ + "type": "long", + "logicalType": "timestamp-micros", + "fsp": 4} """, True), + ExampleSchema("""{ + "type": "long", + "logicalType": "timestamp-micros", + "fsp": 8} """, False), + ExampleSchema("""{ + "type": "long", + "logicalType": "timestamp-micro"} """, False), + ExampleSchema("""{ + "type": "int", + "logicalType": "timestamp-micros"} """, False), ] EXAMPLES = PRIMITIVE_EXAMPLES @@ -322,6 +400,10 @@ def make_primitive_examples(): EXAMPLES += RECORD_EXAMPLES EXAMPLES += DOC_EXAMPLES EXAMPLES += DATE_LOGICAL_TYPE +EXAMPLES += TIMEMILLIS_LOGICAL_TYPE +EXAMPLES += TIMEMICROS_LOGICAL_TYPE +EXAMPLES += TIMESTAMPMILLIS_LOGICAL_TYPE +EXAMPLES += TIMESTAMPMICROS_LOGICAL_TYPE VALID_EXAMPLES = [e for e in EXAMPLES if e.valid]