From 6a69dd2b2ae1f1cce4eea37bde7705b69a9b3164 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 15 Oct 2023 19:51:20 -0400 Subject: [PATCH 01/10] Fix get_table_names and get_view_names Associated tests in ComponentReflectionTest now pass Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/base.py | 47 ++++++++++++-------- src/databricks/sqlalchemy/test/test_suite.py | 13 ++++++ 2 files changed, 41 insertions(+), 19 deletions(-) diff --git a/src/databricks/sqlalchemy/base.py b/src/databricks/sqlalchemy/base.py index df3823439..1654bb98c 100644 --- a/src/databricks/sqlalchemy/base.py +++ b/src/databricks/sqlalchemy/base.py @@ -279,31 +279,40 @@ def get_foreign_keys( return fk_constraints def get_indexes(self, connection, table_name, schema=None, **kw): - """SQLAlchemy requires this method. Databricks doesn't support indexes. - """ + """SQLAlchemy requires this method. Databricks doesn't support indexes.""" return self.EMPTY_INDEX - def get_table_names(self, connection, schema=None, **kwargs): - TABLE_NAME = 1 - with self.get_connection_cursor(connection) as cur: - sql_str = "SHOW TABLES FROM {}".format( - ".".join([self.catalog, schema or self.schema]) - ) - data = cur.execute(sql_str).fetchall() - _tables = [i[TABLE_NAME] for i in data] + def get_table_names(self, connection: Connection, schema=None, **kwargs): + """Return a list of tables in the current schema.""" + + _target_catalog = self.catalog + _target_schema = schema or self.schema + _target = f"`{_target_catalog}`.`{_target_schema}`" + + stmt = DDL(f"SHOW TABLES FROM {_target}") + + tables_result = connection.execute(stmt).all() + views_result = self.get_view_names(connection=connection, schema=schema) - return _tables + # In Databricks, SHOW TABLES FROM returns both tables and views. + # Potential optimisation: rewrite this to instead query informtation_schema + tables_minus_views = [ + row.tableName for row in tables_result if row.tableName not in views_result + ] + + return tables_minus_views def get_view_names(self, connection, schema=None, **kwargs): - VIEW_NAME = 1 - with self.get_connection_cursor(connection) as cur: - sql_str = "SHOW VIEWS FROM {}".format( - ".".join([self.catalog, schema or self.schema]) - ) - data = cur.execute(sql_str).fetchall() - _tables = [i[VIEW_NAME] for i in data] + """Returns a list of string view names contained in the schema, if any.""" + + _target_catalog = self.catalog + _target_schema = schema or self.schema + _target = f"`{_target_catalog}`.`{_target_schema}`" + + stmt = DDL(f"SHOW VIEWS FROM {_target}") + result = connection.execute(stmt).all() - return _tables + return [row.viewName for row in result] def do_rollback(self, dbapi_connection): # Databricks SQL Does not support transactions diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 93096b509..9b74be2e6 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -417,6 +417,19 @@ class ComponentReflectionTest(ComponentReflectionTest): # test_get_schema_names # test_not_existing_table + @pytest.mark.skip("This dialect doesn't implement get_multi_pk_constraint") + def test_get_multi_pk_constraint(self): + pass + + @pytest.mark.skip("This dialect doesn't implement get_multi_columns") + def test_get_multi_columns(self): + pass + + @pytest.mark.skip("This dialect doesn't implement get_multi_foreign_keys") + def test_get_multi_foreign_keys(self): + pass + + @pytest.mark.skip(reason="Databricks doesn't support temp tables.") def test_get_temp_table_columns(self): pass From 906afe511c7d3ecedd2f14662c14f1c5d3a9093f Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 15 Oct 2023 20:23:49 -0400 Subject: [PATCH 02/10] Update get_columns with notes about autoincrement. Now passing ComponentReflectionTest::test_autoincrement_col Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/_parse.py | 98 +++++++++++++++++--- src/databricks/sqlalchemy/base.py | 70 ++------------ src/databricks/sqlalchemy/test/test_suite.py | 4 + 3 files changed, 95 insertions(+), 77 deletions(-) diff --git a/src/databricks/sqlalchemy/_parse.py b/src/databricks/sqlalchemy/_parse.py index 941737ba6..4bc7d65e7 100644 --- a/src/databricks/sqlalchemy/_parse.py +++ b/src/databricks/sqlalchemy/_parse.py @@ -1,7 +1,9 @@ from typing import List, Optional, Dict import re +import sqlalchemy from sqlalchemy.engine import CursorResult +from sqlalchemy.engine.interfaces import ReflectedColumn """ This module contains helper functions that can parse the contents @@ -9,6 +11,7 @@ wrappers around regexes. """ + def _match_table_not_found_string(message: str) -> bool: """Return True if the message contains a substring indicating that a table was not found""" @@ -22,9 +25,10 @@ def _match_table_not_found_string(message: str) -> bool: ) -def _describe_table_extended_result_to_dict_list(result: CursorResult) -> List[Dict[str, str]]: - """Transform the CursorResult of DESCRIBE TABLE EXTENDED into a list of Dictionaries - """ +def _describe_table_extended_result_to_dict_list( + result: CursorResult, +) -> List[Dict[str, str]]: + """Transform the CursorResult of DESCRIBE TABLE EXTENDED into a list of Dictionaries""" rows_to_return = [] for row in result: @@ -68,22 +72,23 @@ def extract_three_level_identifier_from_constraint_string(input_str: str) -> dic """ pat = re.compile(r"REFERENCES\s+(.*?)\s*\(") matches = pat.findall(input_str) - + if not matches: return None - + first_match = matches[0] parts = first_match.split(".") - def strip_backticks(input:str): + def strip_backticks(input: str): return input.replace("`", "") - + return { - "catalog": strip_backticks(parts[0]), + "catalog": strip_backticks(parts[0]), "schema": strip_backticks(parts[1]), - "table": strip_backticks(parts[2]) + "table": strip_backticks(parts[2]), } + def _parse_fk_from_constraint_string(constraint_str: str) -> dict: """Build a dictionary of foreign key constraint information from a constraint string. @@ -133,6 +138,7 @@ def _parse_fk_from_constraint_string(constraint_str: str) -> dict: "referred_schema": referred_schema, } + def build_fk_dict( fk_name: str, fk_constraint_string: str, schema_name: Optional[str] ) -> dict: @@ -172,6 +178,7 @@ def build_fk_dict( return complete_foreign_key_dict + def _parse_pk_columns_from_constraint_string(constraint_str: str) -> List[str]: """Build a list of constrained columns from a constraint string returned by DESCRIBE TABLE EXTENDED @@ -188,21 +195,23 @@ def _parse_pk_columns_from_constraint_string(constraint_str: str) -> List[str]: return _extracted + def build_pk_dict(pk_name: str, pk_constraint_string: str) -> dict: """Given a primary key name and a primary key constraint string, return a dictionary with the following keys: - + constrained_columns A list of string column names that make up the primary key name The name of the primary key constraint """ - + constrained_columns = _parse_pk_columns_from_constraint_string(pk_constraint_string) return {"constrained_columns": constrained_columns, "name": pk_name} - + + def match_dte_rows_by_value(dte_output: List[Dict[str, str]], match: str) -> List[dict]: """Return a list of dictionaries containing only the col_name:data_type pairs where the `data_type` value contains the match argument. @@ -221,9 +230,10 @@ def match_dte_rows_by_value(dte_output: List[Dict[str, str]], match: str) -> Lis for row_dict in dte_output: if match in row_dict["data_type"]: output_rows.append(row_dict) - + return output_rows + def get_fk_strings_from_dte_output(dte_output: List[List]) -> List[dict]: """If the DESCRIBE TABLE EXTENDED output contains foreign key constraints, return a list of dictionaries, one dictionary per defined constraint @@ -233,8 +243,10 @@ def get_fk_strings_from_dte_output(dte_output: List[List]) -> List[dict]: return output - -def get_pk_strings_from_dte_output(dte_output: List[Dict[str, str]]) -> Optional[List[dict]]: + +def get_pk_strings_from_dte_output( + dte_output: List[Dict[str, str]] +) -> Optional[List[dict]]: """If the DESCRIBE TABLE EXTENDED output contains primary key constraints, return a list of dictionaries, one dictionary per defined constraint. @@ -244,3 +256,59 @@ def get_pk_strings_from_dte_output(dte_output: List[Dict[str, str]]) -> Optional output = match_dte_rows_by_value(dte_output, "PRIMARY KEY") return output + + +# The keys of this dictionary are the values we expect to see in a +# TGetColumnsRequest's .TYPE_NAME attribute. +# These are enumerated in ttypes.py as class TTypeId. +# TODO: confirm that all types in TTypeId are included here. +GET_COLUMNS_TYPE_MAP = { + "boolean": sqlalchemy.types.Boolean, + "smallint": sqlalchemy.types.SmallInteger, + "int": sqlalchemy.types.Integer, + "bigint": sqlalchemy.types.BigInteger, + "float": sqlalchemy.types.Float, + "double": sqlalchemy.types.Float, + "string": sqlalchemy.types.String, + "varchar": sqlalchemy.types.String, + "char": sqlalchemy.types.String, + "binary": sqlalchemy.types.String, + "array": sqlalchemy.types.String, + "map": sqlalchemy.types.String, + "struct": sqlalchemy.types.String, + "uniontype": sqlalchemy.types.String, + "decimal": sqlalchemy.types.Numeric, + "timestamp": sqlalchemy.types.DateTime, + "date": sqlalchemy.types.Date, +} + + +def parse_column_info_from_tgetcolumnsresponse(thrift_resp_row) -> ReflectedColumn: + """Returns a dictionary of the ReflectedColumn schema parsed from + a single of the result of a TGetColumnsRequest thrift RPC + + Currently doesn't preserve decimal precision + """ + + pat = re.compile(r"^\w+") + _raw_col_type = re.search(pat, thrift_resp_row.TYPE_NAME).group(0).lower() + _col_type = GET_COLUMNS_TYPE_MAP[_raw_col_type] + + # See comments about autoincrement in test_suite.py + # Since Databricks SQL doesn't currently support inline AUTOINCREMENT declarations + # the autoincrement must be manually declared with an Identity() construct in SQLAlchemy + # Other dialects can perform this extra Identity() step automatically. But that is not + # implemented in the Databricks dialect right now. So autoincrement is currently always False. + # It's not clear what IS_AUTO_INCREMENT in the thrift response actually reflects or whether + # it ever returns a `YES`. + + # Per the guidance in SQLAlchemy's docstrings, we prefer to not even include an autoincrement + # key in this dictionary. + this_column = { + "name": thrift_resp_row.COLUMN_NAME, + "type": _col_type, + "nullable": bool(thrift_resp_row.NULLABLE), + "default": thrift_resp_row.COLUMN_DEF, + } + + return this_column diff --git a/src/databricks/sqlalchemy/base.py b/src/databricks/sqlalchemy/base.py index 1654bb98c..ada0fa204 100644 --- a/src/databricks/sqlalchemy/base.py +++ b/src/databricks/sqlalchemy/base.py @@ -11,6 +11,7 @@ build_pk_dict, get_fk_strings_from_dte_output, get_pk_strings_from_dte_output, + parse_column_info_from_tgetcolumnsresponse, ) import sqlalchemy @@ -19,6 +20,7 @@ from sqlalchemy.engine.interfaces import ( ReflectedForeignKeyConstraint, ReflectedPrimaryKeyConstraint, + ReflectedColumn, ) from sqlalchemy.exc import DatabaseError, SQLAlchemyError @@ -38,27 +40,6 @@ class DatabricksImpl(DefaultImpl): logger = logging.getLogger(__name__) -COLUMN_TYPE_MAP = { - "boolean": sqlalchemy.types.Boolean, - "smallint": sqlalchemy.types.SmallInteger, - "int": sqlalchemy.types.Integer, - "bigint": sqlalchemy.types.BigInteger, - "float": sqlalchemy.types.Float, - "double": sqlalchemy.types.Float, - "string": sqlalchemy.types.String, - "varchar": sqlalchemy.types.String, - "char": sqlalchemy.types.String, - "binary": sqlalchemy.types.String, - "array": sqlalchemy.types.String, - "map": sqlalchemy.types.String, - "struct": sqlalchemy.types.String, - "uniontype": sqlalchemy.types.String, - "decimal": sqlalchemy.types.Numeric, - "timestamp": sqlalchemy.types.DateTime, - "date": sqlalchemy.types.Date, -} - - class DatabricksDialect(default.DefaultDialect): """This dialect implements only those methods required to pass our e2e tests""" @@ -113,36 +94,10 @@ def create_connect_args(self, url): return [], kwargs - def get_columns(self, connection, table_name, schema=None, **kwargs): - """Return information about columns in `table_name`. - - Given a :class:`_engine.Connection`, a string - `table_name`, and an optional string `schema`, return column - information as a list of dictionaries with these keys: - - name - the column's name - - type - [sqlalchemy.types#TypeEngine] - - nullable - boolean - - default - the column's default value - - autoincrement - boolean - - sequence - a dictionary of the form - {'name' : str, 'start' :int, 'increment': int, 'minvalue': int, - 'maxvalue': int, 'nominvalue': bool, 'nomaxvalue': bool, - 'cycle': bool, 'cache': int, 'order': bool} - - Additional column attributes may be present. - """ + def get_columns( + self, connection, table_name, schema=None, **kwargs + ) -> List[ReflectedColumn]: + """Return information about columns in `table_name`.""" with self.get_connection_cursor(connection) as cur: resp = cur.columns( @@ -154,18 +109,9 @@ def get_columns(self, connection, table_name, schema=None, **kwargs): if not resp: raise sqlalchemy.exc.NoSuchTableError(table_name) columns = [] - for col in resp: - # Taken from PyHive. This removes added type info from decimals and maps - _col_type = re.search(r"^\w+", col.TYPE_NAME).group(0) - this_column = { - "name": col.COLUMN_NAME, - "type": COLUMN_TYPE_MAP[_col_type.lower()], - "nullable": bool(col.NULLABLE), - "default": col.COLUMN_DEF, - "autoincrement": False if col.IS_AUTO_INCREMENT == "NO" else True, - } - columns.append(this_column) + row_dict = parse_column_info_from_tgetcolumnsresponse(col) + columns.append(row_dict) return columns diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 9b74be2e6..3a48b5679 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -417,6 +417,10 @@ class ComponentReflectionTest(ComponentReflectionTest): # test_get_schema_names # test_not_existing_table + @pytest.mark.skip("This dialect doesn't implement test_get_view_definition") + def test_get_view_definition(self): + pass + @pytest.mark.skip("This dialect doesn't implement get_multi_pk_constraint") def test_get_multi_pk_constraint(self): pass From e6c33ce525091f3c98bbcc6469320b16f6dc270d Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 15 Oct 2023 20:31:58 -0400 Subject: [PATCH 03/10] Stop skipping get_multi_columns 84 tests that failed before now pass as a result of the changes in 66497cad4ae794a5d4480f277b21034ab76aa3c2 Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/test/test_suite.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 3a48b5679..8ea242fa7 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -425,10 +425,6 @@ def test_get_view_definition(self): def test_get_multi_pk_constraint(self): pass - @pytest.mark.skip("This dialect doesn't implement get_multi_columns") - def test_get_multi_columns(self): - pass - @pytest.mark.skip("This dialect doesn't implement get_multi_foreign_keys") def test_get_multi_foreign_keys(self): pass From e0c9f1080266e9ab71fe26a84499214014db2675 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 15 Oct 2023 20:35:53 -0400 Subject: [PATCH 04/10] Add cache decorators for these methods. Looking through the first-party dialects in SQLAlchemy's codebase, it appears SQLAlchemy knows how to properly set and invalidate these caches. Adding these decorators improves test suite performance. Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/databricks/sqlalchemy/base.py b/src/databricks/sqlalchemy/base.py index ada0fa204..5199551db 100644 --- a/src/databricks/sqlalchemy/base.py +++ b/src/databricks/sqlalchemy/base.py @@ -228,6 +228,7 @@ def get_indexes(self, connection, table_name, schema=None, **kw): """SQLAlchemy requires this method. Databricks doesn't support indexes.""" return self.EMPTY_INDEX + @reflection.cache def get_table_names(self, connection: Connection, schema=None, **kwargs): """Return a list of tables in the current schema.""" @@ -248,6 +249,7 @@ def get_table_names(self, connection: Connection, schema=None, **kwargs): return tables_minus_views + @reflection.cache def get_view_names(self, connection, schema=None, **kwargs): """Returns a list of string view names contained in the schema, if any.""" From 4bc7c13613231b8670d17621f13d0bf56034b5f6 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Mon, 16 Oct 2023 00:14:30 -0400 Subject: [PATCH 05/10] Added additional view fetchers which help the get_multi_* suite of tests to pass Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/base.py | 34 ++++++++++++++++++-- src/databricks/sqlalchemy/test/test_suite.py | 21 ++++++++---- 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/src/databricks/sqlalchemy/base.py b/src/databricks/sqlalchemy/base.py index 5199551db..fa100f4f7 100644 --- a/src/databricks/sqlalchemy/base.py +++ b/src/databricks/sqlalchemy/base.py @@ -1,5 +1,5 @@ import re -from typing import Any, List, Optional, Dict +from typing import Any, List, Optional, Dict, Collection, Iterable, Tuple import databricks.sqlalchemy._ddl as dialect_ddl_impl import databricks.sqlalchemy._types as dialect_type_impl @@ -17,10 +17,12 @@ import sqlalchemy from sqlalchemy import DDL, event from sqlalchemy.engine import Connection, Engine, default, reflection +from sqlalchemy.engine.reflection import ObjectKind from sqlalchemy.engine.interfaces import ( ReflectedForeignKeyConstraint, ReflectedPrimaryKeyConstraint, ReflectedColumn, + TableKey, ) from sqlalchemy.exc import DatabaseError, SQLAlchemyError @@ -250,7 +252,14 @@ def get_table_names(self, connection: Connection, schema=None, **kwargs): return tables_minus_views @reflection.cache - def get_view_names(self, connection, schema=None, **kwargs): + def get_view_names( + self, + connection, + schema=None, + only_materialized=False, + only_temp=False, + **kwargs, + ) -> List[str]: """Returns a list of string view names contained in the schema, if any.""" _target_catalog = self.catalog @@ -260,7 +269,26 @@ def get_view_names(self, connection, schema=None, **kwargs): stmt = DDL(f"SHOW VIEWS FROM {_target}") result = connection.execute(stmt).all() - return [row.viewName for row in result] + return [ + row.viewName + for row in result + if (not only_materialized or row.isMaterialized) + and (not only_temp or row.isTemporary) + ] + + @reflection.cache + def get_materialized_view_names( + self, connection: Connection, schema: Optional[str] = None, **kw: Any + ) -> List[str]: + """A wrapper around get_view_names that fetches only the names of materialized views""" + return self.get_view_names(connection, schema, only_materialized=True) + + @reflection.cache + def get_temp_view_names( + self, connection: Connection, schema: Optional[str] = None, **kw: Any + ) -> List[str]: + """A wrapper around get_view_names taht fetches only the names of temporary views""" + return self.get_view_names(connection, schema, only_temp=True) def do_rollback(self, dbapi_connection): # Databricks SQL Does not support transactions diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 8ea242fa7..870b87632 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -411,24 +411,33 @@ class ComponentReflectionTest(ComponentReflectionTest): """This test requires two schemas be present in the target Databricks workspace: - The schema set in --dburi - A second schema named "test_schema" + + Note that test_get_multi_foreign keys is flaky because DBR does not guarantee the order of data returned in DESCRIBE TABLE EXTENDED """ # We've reviewed these tests: # test_get_schema_names # test_not_existing_table - @pytest.mark.skip("This dialect doesn't implement test_get_view_definition") + @pytest.mark.skip("This dialect doesn't implement get_view_definition") def test_get_view_definition(self): pass - - @pytest.mark.skip("This dialect doesn't implement get_multi_pk_constraint") - def test_get_multi_pk_constraint(self): + + @pytest.mark.skip(reason="This dialect doesn't implement get_view_definition") + def test_get_view_definition_does_not_exist(self): pass - @pytest.mark.skip("This dialect doesn't implement get_multi_foreign_keys") - def test_get_multi_foreign_keys(self): + @pytest.mark.skip(reason="Strange test design. See test_suite.py") + def test_get_temp_view_names(self): + """While Databricks supports temporary views, this test creates a temp view aimed at a temp table. + Databricks doesn't support temp tables. So the test can never pass. + """ pass + + @pytest.mark.skip("This dialect doesn't implement get_multi_pk_constraint") + def test_get_multi_pk_constraint(self): + pass @pytest.mark.skip(reason="Databricks doesn't support temp tables.") def test_get_temp_table_columns(self): From 0ba46959fded336c38a01bded767cdcadeb996d2 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Mon, 16 Oct 2023 00:33:45 -0400 Subject: [PATCH 06/10] Add more detailed skip directives for ComponentReflection test Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/_ddl.py | 2 +- src/databricks/sqlalchemy/requirements.py | 2 +- src/databricks/sqlalchemy/test/test_suite.py | 16 ++++++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/databricks/sqlalchemy/_ddl.py b/src/databricks/sqlalchemy/_ddl.py index 4d825c9fe..f8dfa670d 100644 --- a/src/databricks/sqlalchemy/_ddl.py +++ b/src/databricks/sqlalchemy/_ddl.py @@ -19,7 +19,7 @@ def post_create_table(self, table): return " USING DELTA" def visit_unique_constraint(self, constraint, **kw): - logger.warn("Databricks does not support unique constraints") + logger.warning("Databricks does not support unique constraints") pass def visit_check_constraint(self, constraint, **kw): diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index 614eea2e6..e97bb54c5 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -173,7 +173,7 @@ def unique_constraint_reflection(self): """ComponentReflection test is intricate and simply cannot function without this exclusion being defined here. This happens because we cannot skip individual combinations used in ComponentReflection test. - Databricks supports unique constraints but they are not implemented in this dialect. + Databricks doesn't support UNIQUE constraints. """ return sqlalchemy.testing.exclusions.closed() diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 870b87632..0f805a740 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -419,6 +419,22 @@ class ComponentReflectionTest(ComponentReflectionTest): # test_get_schema_names # test_not_existing_table + @pytest.mark.skip(reason="Comment reflection is possible but not enabled in this dialect") + def test_get_multi_table_comment(self): + """There are 84 permutations of this test that are skipped. + """ + pass + + @pytest.mark.skip(reason="Databricks doesn't support UNIQUE constraints") + def test_get_multi_unique_constraints(self): + pass + + @pytest.mark.skip(reason="This dialect doesn't support get_table_options. See comment in test_suite.py") + def test_multi_get_table_options_tables(self): + """It's not clear what the expected ouput from this method would even _be_. Requires research. + """ + pass + @pytest.mark.skip("This dialect doesn't implement get_view_definition") def test_get_view_definition(self): pass From b28a068422ab8924767d667163c9640c67e86b62 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Mon, 16 Oct 2023 00:40:38 -0400 Subject: [PATCH 07/10] Cleaning up skip directives and requirements.py to give more specific skip logs Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/_ddl.py | 2 +- src/databricks/sqlalchemy/requirements.py | 5 +++ src/databricks/sqlalchemy/test/test_suite.py | 45 +++++++++----------- 3 files changed, 25 insertions(+), 27 deletions(-) diff --git a/src/databricks/sqlalchemy/_ddl.py b/src/databricks/sqlalchemy/_ddl.py index f8dfa670d..7aefb034e 100644 --- a/src/databricks/sqlalchemy/_ddl.py +++ b/src/databricks/sqlalchemy/_ddl.py @@ -23,7 +23,7 @@ def visit_unique_constraint(self, constraint, **kw): pass def visit_check_constraint(self, constraint, **kw): - logger.warn("Databricks does not support check constraints") + logger.warning("This dialect does not support check constraints") pass def visit_identity_column(self, identity, **kw): diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index e97bb54c5..6fb252dbc 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -154,6 +154,11 @@ def temporary_tables(self): """ return sqlalchemy.testing.exclusions.closed() + @property + def table_reflection(self): + """target database has general support for table reflection""" + return sqlalchemy.testing.exclusions.open() + @property def temp_table_reflection(self): """ComponentReflection test is intricate and simply cannot function without this exclusion being defined here. diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 0f805a740..a8d69e620 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -330,23 +330,19 @@ class CompositeKeyReflectionTest(CompositeKeyReflectionTest): pass +@pytest.mark.reviewed class ComponentReflectionTestExtra(ComponentReflectionTestExtra): - @pytest.mark.skip(reason="Test setup needs adjustment.") - def test_varchar_reflection(self): - """ - Exception: - databricks.sql.exc.ServerOperationError: [TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create table or view `pysql_sqlalchemy`.`t` because it already exists. - Choose a different name, drop or replace the existing object, add the IF NOT EXISTS clause to tolerate pre-existing objects, or add the OR REFRESH clause to refresh the existing streaming table. - """ + @pytest.mark.skip(reason="This dialect does not support check constraints") + def test_get_check_constraints(self): + pass - @pytest.mark.skip(reason="Test setup appears broken") - def test_numeric_reflection(self): - """ - Exception: - databricks.sql.exc.ServerOperationError: [SCHEMA_NOT_FOUND] The schema `main.test_schema` cannot be found. Verify the spelling and correctness of the schema and catalog. - If you did not qualify the name with a catalog, verify the current_schema() output, or qualify the name with the correct catalog. - To tolerate the error on drop use DROP SCHEMA IF EXISTS. - """ + @pytest.mark.skip(reason="Databricks does not support indexes.") + def test_reflect_covering_index(self): + pass + + @pytest.mark.skip(reason="Databricks does not support indexes.") + def test_reflect_expression_based_indexes(self): + pass class DifficultParametersTest(DifficultParametersTest): @@ -415,24 +411,22 @@ class ComponentReflectionTest(ComponentReflectionTest): Note that test_get_multi_foreign keys is flaky because DBR does not guarantee the order of data returned in DESCRIBE TABLE EXTENDED """ - # We've reviewed these tests: - # test_get_schema_names - # test_not_existing_table - - @pytest.mark.skip(reason="Comment reflection is possible but not enabled in this dialect") + @pytest.mark.skip( + reason="Comment reflection is possible but not enabled in this dialect" + ) def test_get_multi_table_comment(self): - """There are 84 permutations of this test that are skipped. - """ + """There are 84 permutations of this test that are skipped.""" pass @pytest.mark.skip(reason="Databricks doesn't support UNIQUE constraints") def test_get_multi_unique_constraints(self): pass - @pytest.mark.skip(reason="This dialect doesn't support get_table_options. See comment in test_suite.py") + @pytest.mark.skip( + reason="This dialect doesn't support get_table_options. See comment in test_suite.py" + ) def test_multi_get_table_options_tables(self): - """It's not clear what the expected ouput from this method would even _be_. Requires research. - """ + """It's not clear what the expected ouput from this method would even _be_. Requires research.""" pass @pytest.mark.skip("This dialect doesn't implement get_view_definition") @@ -450,7 +444,6 @@ def test_get_temp_view_names(self): """ pass - @pytest.mark.skip("This dialect doesn't implement get_multi_pk_constraint") def test_get_multi_pk_constraint(self): pass From 4dec379263e815e1b61c8281a1347927bfa69b9c Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Mon, 16 Oct 2023 00:53:22 -0400 Subject: [PATCH 08/10] Update get_columns parser to reflect DECIMAL precision and scale Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/_parse.py | 31 +++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/src/databricks/sqlalchemy/_parse.py b/src/databricks/sqlalchemy/_parse.py index 4bc7d65e7..55f34f950 100644 --- a/src/databricks/sqlalchemy/_parse.py +++ b/src/databricks/sqlalchemy/_parse.py @@ -258,7 +258,7 @@ def get_pk_strings_from_dte_output( return output -# The keys of this dictionary are the values we expect to see in a +# The keys of this dictionary are the values we expect to see in a # TGetColumnsRequest's .TYPE_NAME attribute. # These are enumerated in ttypes.py as class TTypeId. # TODO: confirm that all types in TTypeId are included here. @@ -283,17 +283,40 @@ def get_pk_strings_from_dte_output( } +def parse_numeric_type_precision_and_scale(type_name_str): + """Return an intantiated sqlalchemy Numeric() type that preserves the precision and scale indicated + in the output from TGetColumnsRequest. + + type_name_str + The value of TGetColumnsReq.TYPE_NAME. + + If type_name_str is "DECIMAL(18,5) returns sqlalchemy.types.Numeric(18,5) + """ + + pattern = re.compile(r"DECIMAL\((\d+,\d+)\)") + match = re.search(pattern, type_name_str) + precision_and_scale = match.group(1) + precision, scale = tuple(precision_and_scale.split(",")) + + return sqlalchemy.types.Numeric(int(precision), int(scale)) + + def parse_column_info_from_tgetcolumnsresponse(thrift_resp_row) -> ReflectedColumn: """Returns a dictionary of the ReflectedColumn schema parsed from a single of the result of a TGetColumnsRequest thrift RPC - - Currently doesn't preserve decimal precision """ pat = re.compile(r"^\w+") _raw_col_type = re.search(pat, thrift_resp_row.TYPE_NAME).group(0).lower() _col_type = GET_COLUMNS_TYPE_MAP[_raw_col_type] + if _raw_col_type == "decimal": + final_col_type = parse_numeric_type_precision_and_scale( + thrift_resp_row.TYPE_NAME + ) + else: + final_col_type = _col_type + # See comments about autoincrement in test_suite.py # Since Databricks SQL doesn't currently support inline AUTOINCREMENT declarations # the autoincrement must be manually declared with an Identity() construct in SQLAlchemy @@ -306,7 +329,7 @@ def parse_column_info_from_tgetcolumnsresponse(thrift_resp_row) -> ReflectedColu # key in this dictionary. this_column = { "name": thrift_resp_row.COLUMN_NAME, - "type": _col_type, + "type": final_col_type, "nullable": bool(thrift_resp_row.NULLABLE), "default": thrift_resp_row.COLUMN_DEF, } From 448334f239c7fa1a669f3c353e2d801b8b20ae17 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Mon, 16 Oct 2023 01:04:08 -0400 Subject: [PATCH 09/10] Update skip messages for unrunnable tests in ComponentReflectionTestExtra Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/test/test_suite.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index a8d69e620..886daa2e3 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -344,6 +344,18 @@ def test_reflect_covering_index(self): def test_reflect_expression_based_indexes(self): pass + @pytest.mark.skip(reason="Databricks doesn't enforce String or VARCHAR length limitations.") + def test_varchar_reflection(self): + """Even if a user specifies String(52), Databricks won't enforce that limit. + """ + pass + + @pytest.mark.skip(reason="This dialect doesn't implement foreign key options checks.") + def test_get_foreign_key_options(self): + """It's not clear from the test code what the expected output is here. Further research required. + """ + pass + class DifficultParametersTest(DifficultParametersTest): @pytest.mark.skip(reason="Error during execution. Requires investigation.") From e102f87dc2546dfe330fe3b4c294e80bf069e8a6 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Mon, 16 Oct 2023 10:52:13 -0400 Subject: [PATCH 10/10] Skip BizarroCharacterFKResolutionTest and DifficultParametersTest with reasoning. This leaves just a few more suites to review. Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/test/test_suite.py | 32 ++++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 886daa2e3..4b13dbeee 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -344,16 +344,18 @@ def test_reflect_covering_index(self): def test_reflect_expression_based_indexes(self): pass - @pytest.mark.skip(reason="Databricks doesn't enforce String or VARCHAR length limitations.") + @pytest.mark.skip( + reason="Databricks doesn't enforce String or VARCHAR length limitations." + ) def test_varchar_reflection(self): - """Even if a user specifies String(52), Databricks won't enforce that limit. - """ + """Even if a user specifies String(52), Databricks won't enforce that limit.""" pass - @pytest.mark.skip(reason="This dialect doesn't implement foreign key options checks.") + @pytest.mark.skip( + reason="This dialect doesn't implement foreign key options checks." + ) def test_get_foreign_key_options(self): - """It's not clear from the test code what the expected output is here. Further research required. - """ + """It's not clear from the test code what the expected output is here. Further research required.""" pass @@ -528,3 +530,21 @@ class QuotedNameArgumentTest(QuotedNameArgumentTest): also checks the behaviour of DDL identifier preparation process. We need to override some of IdentifierPreparer methods because these are the ultimate control for whether or not CHECK and UNIQUE constraints are emitted. """ + + +@pytest.mark.reviewed +@pytest.mark.skip(reason="Implementation deferred. See test_suite.py") +class BizarroCharacterFKResolutionTest: + """Some of the combinations in this test pass. Others fail. Given the esoteric nature of these failures, + we have opted to defer implementing fixes to a later time, guided by customer feedback. Passage of + these tests is not an acceptance criteria for our dialect. + """ + + +@pytest.mark.reviewed +@pytest.mark.skip(reason="Implementation deferred. See test_suite.py") +class DifficultParametersTest: + """Some of the combinations in this test pass. Others fail. Given the esoteric nature of these failures, + we have opted to defer implementing fixes to a later time, guided by customer feedback. Passage of + these tests is not an acceptance criteria for our dialect. + """