diff --git a/src/firebolt/async_db/__init__.py b/src/firebolt/async_db/__init__.py index 977f78f267d..4d74ebe04dd 100644 --- a/src/firebolt/async_db/__init__.py +++ b/src/firebolt/async_db/__init__.py @@ -2,7 +2,6 @@ ARRAY, BINARY, DATETIME, - DATETIME64, DECIMAL, NUMBER, ROWID, diff --git a/src/firebolt/async_db/_types.py b/src/firebolt/async_db/_types.py index b204bbd4f08..8b5c0b81a72 100644 --- a/src/firebolt/async_db/_types.py +++ b/src/firebolt/async_db/_types.py @@ -105,11 +105,11 @@ def Binary(value: str) -> str: class ARRAY: """Class for holding `array` column type information in Firebolt DB.""" - _prefix = "Array(" + _prefix = "array(" - def __init__(self, subtype: Union[type, ARRAY, DECIMAL, DATETIME64]): + def __init__(self, subtype: Union[type, ARRAY, DECIMAL]): assert (subtype in _col_types and subtype is not list) or isinstance( - subtype, (ARRAY, DECIMAL, DATETIME64) + subtype, (ARRAY, DECIMAL) ), f"Invalid array subtype: {str(subtype)}" self.subtype = subtype @@ -143,88 +143,46 @@ def __eq__(self, other: object) -> bool: return other.precision == self.precision and other.scale == self.scale -class DATETIME64: - """Class for holding `datetime64` value information in Firebolt DB.""" - - _prefix = "DateTime64(" - - def __init__(self, precision: int): - self.precision = precision - - def __str__(self) -> str: - return f"DateTime64({self.precision})" - - def __hash__(self) -> int: - return hash(str(self)) - - def __eq__(self, other: object) -> bool: - if not isinstance(other, DATETIME64): - return NotImplemented - return other.precision == self.precision - - -NULLABLE_PREFIX = "Nullable(" +NULLABLE_SUFFIX = "null" class _InternalType(Enum): """Enum of all internal Firebolt types, except for `array`.""" - # INT, INTEGER - Int8 = "Int8" - UInt8 = "UInt8" - Int16 = "Int16" - UInt16 = "UInt16" - Int32 = "Int32" - UInt32 = "UInt32" - - # BIGINT, LONG - Int64 = "Int64" - UInt64 = "UInt64" - - # FLOAT - Float32 = "Float32" - - # DOUBLE, DOUBLE PRECISION - Float64 = "Float64" + Int = "int" + Long = "long" + Float = "float" + Double = "double" - # VARCHAR, TEXT, STRING - String = "String" + Text = "text" - # DATE - Date = "Date" - Date32 = "Date32" - PGDate = "PGDate" + Date = "date" + DateExt = "date_ext" + PGDate = "pgdate" - # DATETIME, TIMESTAMP - DateTime = "DateTime" - TimestampNtz = "TimestampNtz" - TimestampTz = "TimestampTz" + Timestamp = "timestamp" + TimestampExt = "timestamp_ext" + TimestampNtz = "timestampntz" + TimestampTz = "timestamptz" - # BOOLEAN Boolean = "boolean" - # Nullable(Nothing) Nothing = "Nothing" @cached_property def python_type(self) -> type: """Convert internal type to Python type.""" types = { - _InternalType.Int8: int, - _InternalType.UInt8: int, - _InternalType.Int16: int, - _InternalType.UInt16: int, - _InternalType.Int32: int, - _InternalType.UInt32: int, - _InternalType.Int64: int, - _InternalType.UInt64: int, - _InternalType.Float32: float, - _InternalType.Float64: float, - _InternalType.String: str, + _InternalType.Int: int, + _InternalType.Long: int, + _InternalType.Float: float, + _InternalType.Double: float, + _InternalType.Text: str, _InternalType.Date: date, - _InternalType.Date32: date, + _InternalType.DateExt: date, _InternalType.PGDate: date, - _InternalType.DateTime: datetime, + _InternalType.Timestamp: datetime, + _InternalType.TimestampExt: datetime, _InternalType.TimestampNtz: datetime, _InternalType.TimestampTz: datetime, _InternalType.Boolean: bool, @@ -234,7 +192,7 @@ def python_type(self) -> type: return types[self] -def parse_type(raw_type: str) -> Union[type, ARRAY, DECIMAL, DATETIME64]: # noqa: C901 +def parse_type(raw_type: str) -> Union[type, ARRAY, DECIMAL]: # noqa: C901 """Parse typename provided by query metadata into Python type.""" if not isinstance(raw_type, str): raise DataError(f"Invalid typename {str(raw_type)}: str expected") @@ -250,18 +208,9 @@ def parse_type(raw_type: str) -> Union[type, ARRAY, DECIMAL, DATETIME64]: # noq pass else: return DECIMAL(precision, scale) - # Handle detetime64 - if raw_type.startswith(DATETIME64._prefix) and raw_type.endswith(")"): - try: - precision = int(raw_type[len(DATETIME64._prefix) : -1]) - except (ValueError, IndexError): - pass - else: - return DATETIME64(precision) # Handle nullable - if raw_type.startswith(NULLABLE_PREFIX) and raw_type.endswith(")"): - return parse_type(raw_type[len(NULLABLE_PREFIX) : -1]) - + if raw_type.endswith(NULLABLE_SUFFIX): + return parse_type(raw_type[: -len(NULLABLE_SUFFIX)].strip(" ")) try: return _InternalType(raw_type).python_type except ValueError: @@ -272,7 +221,7 @@ def parse_type(raw_type: str) -> Union[type, ARRAY, DECIMAL, DATETIME64]: # noq def parse_value( value: RawColType, - ctype: Union[type, ARRAY, DECIMAL, DATETIME64], + ctype: Union[type, ARRAY, DECIMAL], ) -> ColType: """Provided raw value, and Python type; parses first into Python value.""" if value is None: @@ -285,7 +234,7 @@ def parse_value( raise DataError(f"Invalid date value {value}: str expected") assert isinstance(value, str) return parse_datetime(value).date() - if ctype is datetime or isinstance(ctype, DATETIME64): + if ctype is datetime: if not isinstance(value, str): raise DataError(f"Invalid datetime value {value}: str expected") return parse_datetime(value) diff --git a/src/firebolt/async_db/cursor.py b/src/firebolt/async_db/cursor.py index 363314a6ace..4afef132426 100644 --- a/src/firebolt/async_db/cursor.py +++ b/src/firebolt/async_db/cursor.py @@ -51,7 +51,7 @@ logger = logging.getLogger(__name__) -JSON_OUTPUT_FORMAT = "JSONCompact" +JSON_OUTPUT_FORMAT = "JSON_Compact" class CursorState(Enum): diff --git a/src/firebolt/db/__init__.py b/src/firebolt/db/__init__.py index 67611bbe34f..8ee02c1448f 100644 --- a/src/firebolt/db/__init__.py +++ b/src/firebolt/db/__init__.py @@ -2,7 +2,6 @@ ARRAY, BINARY, DATETIME, - DATETIME64, DECIMAL, NUMBER, ROWID, diff --git a/tests/integration/dbapi/async/test_queries_async.py b/tests/integration/dbapi/async/test_queries_async.py index cc74b442a5f..f1ef2f8746f 100644 --- a/tests/integration/dbapi/async/test_queries_async.py +++ b/tests/integration/dbapi/async/test_queries_async.py @@ -138,10 +138,14 @@ async def test_select( assert ( await c.execute(f"SET advanced_mode=1") == -1 ), "Invalid set statment row count" - # For TimestampTz test + # For timestamptz test assert ( await c.execute(f"SET time_zone={timezone_name}") == -1 ), "Invalid set statment row count" + # For boolean test + assert ( + await c.execute(f"SET bool_output_format=postgres") == -1 + ), "Invalid set statment row count" assert await c.execute(all_types_query) == 1, "Invalid row count returned" assert c.rowcount == 1, "Invalid rowcount value" @@ -168,37 +172,6 @@ async def test_select( ) -async def test_boolean( - connection: Connection, -) -> None: - """Select handles boolean properly.""" - with connection.cursor() as c: - assert ( - await c.execute(f"SET advanced_mode=1") == -1 - ), "Invalid set statment row count" - # Unfortunately our parser doesn't support string set parameters - c._set_parameters.update( - { - "bool_output_format": "postgres", - "output_format_firebolt_type_names": "true", - } - ) - - assert ( - await c.execute('select true as "bool"') == 1 - ), "Invalid row count returned" - assert c.rowcount == 1, "Invalid rowcount value" - data = await c.fetchall() - assert len(data) == c.rowcount, "Invalid data length" - assert_deep_eq(data, [[True]], "Invalid data") - assert ( - c.description == [Column("bool", bool, None, None, None, None, None)], - "Invalid description value", - ) - assert len(data[0]) == len(c.description), "Invalid description length" - assert len(await c.fetchall()) == 0, "Redundant data returned by fetchall" - - @mark.skip("Don't have a good way to test this anymore. FIR-16038") @mark.timeout(timeout=400) async def test_long_query( @@ -328,7 +301,7 @@ async def test_empty_query(c: Cursor, query: str) -> None: 1.1, date(2021, 1, 1), datetime(2021, 1, 1, 1, 1, 1), - 1, + True, [1, 2, 3], ], ], @@ -382,9 +355,6 @@ async def test_empty_query(c: Cursor, query: str, params: tuple) -> None: # \0 is converted to 0 params[2] = "text0" - # Bool is converted to int - params[6] = 1 - assert ( await c.execute("SELECT * FROM test_tb_async_parameterized") == 1 ), "Invalid data length in table after parameterized insert" diff --git a/tests/integration/dbapi/conftest.py b/tests/integration/dbapi/conftest.py index 080571504cc..de64bd374dd 100644 --- a/tests/integration/dbapi/conftest.py +++ b/tests/integration/dbapi/conftest.py @@ -7,7 +7,7 @@ from firebolt.async_db._types import ColType from firebolt.async_db.cursor import Column -from firebolt.db import ARRAY, DATETIME64, DECIMAL, Connection +from firebolt.db import ARRAY, DECIMAL, Connection LOGGER = getLogger(__name__) @@ -71,7 +71,8 @@ def all_types_query() -> str: "CAST('2019-07-31 01:01:01' AS DATETIME) as \"datetime\", " "CAST('2019-07-31 01:01:01.1234' AS TIMESTAMP_EXT(4)) as \"datetime64\", " "CAST('1111-01-05 17:04:42.123456' as timestampntz) as timestampntz, " - "'1111-01-05 17:04:42.123456'::timestamptz as timestamptz," + "'1111-01-05 17:04:42.123456'::timestamptz as timestamptz, " + 'true as "boolean", ' "[1,2,3,4] as \"array\", cast('1231232.123459999990457054844258706536' as " 'decimal(38,30)) as "decimal", ' "cast(null as int) as nullable" @@ -96,9 +97,10 @@ def all_types_query_description() -> List[Column]: Column("date32", date, None, None, None, None, None), Column("pgdate", date, None, None, None, None, None), Column("datetime", datetime, None, None, None, None, None), - Column("datetime64", DATETIME64(4), None, None, None, None, None), + Column("datetime64", datetime, None, None, None, None, None), Column("timestampntz", datetime, None, None, None, None, None), Column("timestamptz", datetime, None, None, None, None, None), + Column("boolean", bool, None, None, None, None, None), Column("array", ARRAY(int), None, None, None, None, None), Column("decimal", DECIMAL(38, 30), None, None, None, None, None), Column("nullable", int, None, None, None, None, None), @@ -136,6 +138,7 @@ def all_types_query_response(timezone_offset_seconds: int) -> List[ColType]: 123456, tzinfo=timezone(timedelta(seconds=timezone_offset_seconds)), ), + True, [1, 2, 3, 4], Decimal("1231232.123459999990457054844258706536"), None, diff --git a/tests/integration/dbapi/sync/test_queries.py b/tests/integration/dbapi/sync/test_queries.py index a2342e2a364..f29946602f7 100644 --- a/tests/integration/dbapi/sync/test_queries.py +++ b/tests/integration/dbapi/sync/test_queries.py @@ -95,9 +95,14 @@ def test_select( """Select handles all data types properly.""" with connection.cursor() as c: assert c.execute(f"SET advanced_mode=1") == -1, "Invalid set statment row count" + # For timestamptz test assert ( c.execute(f"SET time_zone={timezone_name}") == -1 ), "Invalid set statment row count" + # For boolean test + assert ( + c.execute(f"SET bool_output_format=postgres") == -1 + ), "Invalid set statment row count" assert c.execute(all_types_query) == 1, "Invalid row count returned" assert c.rowcount == 1, "Invalid rowcount value" @@ -122,32 +127,6 @@ def test_select( ) -def test_boolean( - connection: Connection, -) -> None: - """Select handles boolean properly.""" - with connection.cursor() as c: - assert c.execute(f"SET advanced_mode=1") == -1, "Invalid set statment row count" - # Unfortunately our parser doesn't support string set parameters - c._set_parameters.update( - { - "bool_output_format": "postgres", - "output_format_firebolt_type_names": "true", - } - ) - - assert c.execute('select true as "bool"') == 1, "Invalid row count returned" - assert c.rowcount == 1, "Invalid rowcount value" - data = c.fetchall() - assert len(data) == c.rowcount, "Invalid data length" - assert_deep_eq(data, [[True]], "Invalid data") - assert c.description == [ - Column("bool", bool, None, None, None, None, None) - ], "Invalid description value" - assert len(data[0]) == len(c.description), "Invalid description length" - assert len(c.fetchall()) == 0, "Redundant data returned by fetchall" - - @mark.skip("Don't have a good way to test this anymore. FIR-16038") @mark.timeout(timeout=400) def test_long_query( @@ -273,7 +252,7 @@ def test_empty_query(c: Cursor, query: str) -> None: 1.1, date(2021, 1, 1), datetime(2021, 1, 1, 1, 1, 1), - 1, + True, [1, 2, 3], ], ], @@ -327,9 +306,6 @@ def test_empty_query(c: Cursor, query: str, params: tuple) -> None: # \0 is converted to 0 params[2] = "text0" - # Bool is converted to int - params[6] = 1 - assert ( c.execute("SELECT * FROM test_tb_parameterized") == 1 ), "Invalid data length in table after parameterized insert" diff --git a/tests/unit/async_db/conftest.py b/tests/unit/async_db/conftest.py index a644bf8927e..d7064bfa691 100644 --- a/tests/unit/async_db/conftest.py +++ b/tests/unit/async_db/conftest.py @@ -4,14 +4,7 @@ from pytest import fixture from pytest_asyncio import fixture as asyncio_fixture -from firebolt.async_db import ( - ARRAY, - DATETIME64, - DECIMAL, - Connection, - Cursor, - connect, -) +from firebolt.async_db import ARRAY, DECIMAL, Connection, Cursor, connect from firebolt.common.settings import Settings from tests.unit.db_conftest import * # noqa @@ -39,23 +32,19 @@ async def cursor(connection: Connection, settings: Settings) -> Cursor: @fixture def types_map() -> Dict[str, type]: base_types = { - "UInt8": int, - "UInt16": int, - "UInt32": int, - "Int32": int, - "UInt64": int, - "Int64": int, - "Float32": float, - "Float64": float, - "String": str, - "Date": date, - "Date32": date, - "PGDate": date, - "DateTime": datetime, - "DateTime64(7)": DATETIME64(7), - "TimestampNtz": datetime, - "TimestampTz": datetime, - "Nullable(Nothing)": str, + "int": int, + "long": int, + "float": float, + "double": float, + "text": str, + "date": date, + "date_ext": date, + "pgdate": date, + "timestamp": datetime, + "timestamp_ext": datetime, + "timestampntz": datetime, + "timestamptz": datetime, + "Nothing null": str, "Decimal(123, 4)": DECIMAL(123, 4), "Decimal(38,0)": DECIMAL(38, 0), # Invalid decimal format @@ -63,7 +52,7 @@ def types_map() -> Dict[str, type]: "boolean": bool, "SomeRandomNotExistingType": str, } - array_types = {f"Array({k})": ARRAY(v) for k, v in base_types.items()} - nullable_arrays = {f"Nullable({k})": v for k, v in array_types.items()} - nested_arrays = {f"Array({k})": ARRAY(v) for k, v in array_types.items()} + array_types = {f"array({k})": ARRAY(v) for k, v in base_types.items()} + nullable_arrays = {f"{k} null": v for k, v in array_types.items()} + nested_arrays = {f"array({k})": ARRAY(v) for k, v in array_types.items()} return {**base_types, **array_types, **nullable_arrays, **nested_arrays} diff --git a/tests/unit/db_conftest.py b/tests/unit/db_conftest.py index 013ffa93729..2c7505370f7 100644 --- a/tests/unit/db_conftest.py +++ b/tests/unit/db_conftest.py @@ -8,7 +8,7 @@ from firebolt.async_db.cursor import JSON_OUTPUT_FORMAT, ColType, Column from firebolt.common.settings import Settings -from firebolt.db import ARRAY, DATETIME64, DECIMAL +from firebolt.db import ARRAY, DECIMAL QUERY_ROW_COUNT: int = 10 @@ -16,21 +16,21 @@ @fixture def query_description() -> List[Column]: return [ - Column("uint8", "UInt8", None, None, None, None, None), - Column("uint16", "UInt16", None, None, None, None, None), - Column("uint32", "UInt32", None, None, None, None, None), - Column("int32", "Int32", None, None, None, None, None), - Column("uint64", "UInt64", None, None, None, None, None), - Column("int64", "Int64", None, None, None, None, None), - Column("float32", "Float32", None, None, None, None, None), - Column("float64", "Float64", None, None, None, None, None), - Column("string", "String", None, None, None, None, None), - Column("date", "Date", None, None, None, None, None), - Column("date32", "Date32", None, None, None, None, None), - Column("datetime", "DateTime", None, None, None, None, None), - Column("datetime64", "DateTime64(4)", None, None, None, None, None), - Column("bool", "UInt8", None, None, None, None, None), - Column("array", "Array(UInt8)", None, None, None, None, None), + Column("uint8", "int", None, None, None, None, None), + Column("uint16", "int", None, None, None, None, None), + Column("uint32", "int", None, None, None, None, None), + Column("int32", "int", None, None, None, None, None), + Column("uint64", "long", None, None, None, None, None), + Column("int64", "long", None, None, None, None, None), + Column("float32", "float", None, None, None, None, None), + Column("float64", "double", None, None, None, None, None), + Column("string", "text", None, None, None, None, None), + Column("date", "date", None, None, None, None, None), + Column("date32", "date_ext", None, None, None, None, None), + Column("datetime", "timestamp", None, None, None, None, None), + Column("datetime64", "timestamp_ext", None, None, None, None, None), + Column("bool", "boolean", None, None, None, None, None), + Column("array", "array(int)", None, None, None, None, None), Column("decimal", "Decimal(12, 34)", None, None, None, None, None), ] @@ -50,8 +50,8 @@ def python_query_description() -> List[Column]: Column("date", date, None, None, None, None, None), Column("date32", date, None, None, None, None, None), Column("datetime", datetime, None, None, None, None, None), - Column("datetime64", DATETIME64(4), None, None, None, None, None), - Column("bool", int, None, None, None, None, None), + Column("datetime64", datetime, None, None, None, None, None), + Column("bool", bool, None, None, None, None, None), Column("array", ARRAY(int), None, None, None, None, None), Column("decimal", DECIMAL(12, 34), None, None, None, None, None), ]