-
Notifications
You must be signed in to change notification settings - Fork 13.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: Ensure Presto database engine spec correctly handles Trino #20729
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -23,7 +23,18 @@ | |||||||||||
from contextlib import closing | ||||||||||||
from datetime import datetime | ||||||||||||
from distutils.version import StrictVersion | ||||||||||||
from typing import Any, cast, Dict, List, Optional, Pattern, Tuple, TYPE_CHECKING, Union | ||||||||||||
from typing import ( | ||||||||||||
Any, | ||||||||||||
cast, | ||||||||||||
Dict, | ||||||||||||
List, | ||||||||||||
Optional, | ||||||||||||
Pattern, | ||||||||||||
Tuple, | ||||||||||||
Type, | ||||||||||||
TYPE_CHECKING, | ||||||||||||
Union, | ||||||||||||
) | ||||||||||||
from urllib import parse | ||||||||||||
|
||||||||||||
import pandas as pd | ||||||||||||
|
@@ -35,13 +46,16 @@ | |||||||||||
from sqlalchemy.engine.reflection import Inspector | ||||||||||||
from sqlalchemy.engine.result import Row as ResultRow | ||||||||||||
from sqlalchemy.engine.url import URL | ||||||||||||
from sqlalchemy.exc import DatabaseError | ||||||||||||
from sqlalchemy.orm import Session | ||||||||||||
from sqlalchemy.sql.expression import ColumnClause, Select | ||||||||||||
|
||||||||||||
from superset import cache_manager, is_feature_enabled | ||||||||||||
from superset.common.db_query_status import QueryStatus | ||||||||||||
from superset.databases.utils import make_url_safe | ||||||||||||
from superset.db_engine_specs.base import BaseEngineSpec, ColumnTypeMapping | ||||||||||||
from superset.db_engine_specs.exceptions import SupersetDBAPIDatabaseError | ||||||||||||
|
||||||||||||
from superset.errors import SupersetErrorType | ||||||||||||
from superset.exceptions import SupersetTemplateException | ||||||||||||
from superset.models.sql_lab import Query | ||||||||||||
|
@@ -224,6 +238,15 @@ class PrestoEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-metho | |||||||||||
), | ||||||||||||
} | ||||||||||||
|
||||||||||||
@classmethod | ||||||||||||
def get_dbapi_exception_mapping(cls) -> Dict[Type[Exception], Type[Exception]]: | ||||||||||||
# pylint: disable=import-outside-toplevel,import-error | ||||||||||||
from pyhive.exc import DatabaseError | ||||||||||||
|
||||||||||||
return { | ||||||||||||
DatabaseError: SupersetDBAPIDatabaseError, | ||||||||||||
} | ||||||||||||
|
||||||||||||
@classmethod | ||||||||||||
def get_allow_cost_estimate(cls, extra: Dict[str, Any]) -> bool: | ||||||||||||
version = extra.get("version") | ||||||||||||
|
@@ -913,21 +936,23 @@ def extra_table_metadata( | |||||||||||
indexes = database.get_indexes(table_name, schema_name) | ||||||||||||
if indexes: | ||||||||||||
cols = indexes[0].get("column_names", []) | ||||||||||||
full_table_name = table_name | ||||||||||||
if schema_name and "." not in table_name: | ||||||||||||
full_table_name = "{}.{}".format(schema_name, table_name) | ||||||||||||
pql = cls._partition_query(full_table_name, database) | ||||||||||||
col_names, latest_parts = cls.latest_partition( | ||||||||||||
table_name, schema_name, database, show_first=True | ||||||||||||
) | ||||||||||||
|
||||||||||||
if not latest_parts: | ||||||||||||
latest_parts = tuple([None] * len(col_names)) | ||||||||||||
metadata["partitions"] = { | ||||||||||||
"cols": cols, | ||||||||||||
"latest": dict(zip(col_names, latest_parts)), | ||||||||||||
"partitionQuery": pql, | ||||||||||||
} | ||||||||||||
if cols: | ||||||||||||
full_table_name = table_name | ||||||||||||
if schema_name and "." not in table_name: | ||||||||||||
full_table_name = "{}.{}".format(schema_name, table_name) | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. f-string? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ktmud this code is unchanged, i.e., it's now nested under the |
||||||||||||
pql = cls._partition_query(full_table_name, database) | ||||||||||||
col_names, latest_parts = cls.latest_partition( | ||||||||||||
table_name, schema_name, database, show_first=True | ||||||||||||
) | ||||||||||||
|
||||||||||||
if not latest_parts: | ||||||||||||
latest_parts = tuple([None] * len(col_names)) | ||||||||||||
metadata["partitions"] = { | ||||||||||||
"cols": cols, | ||||||||||||
"latest": dict(zip(col_names, latest_parts)), | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I feel we should probably change the signature of For this code, it seems it can be simplified as:
Suggested change
|
||||||||||||
"partitionQuery": pql, | ||||||||||||
} | ||||||||||||
|
||||||||||||
# flake8 is not matching `Optional[str]` to `Any` for some reason... | ||||||||||||
metadata["view"] = cast( | ||||||||||||
|
@@ -947,20 +972,16 @@ def get_create_view( | |||||||||||
:param schema: Schema name | ||||||||||||
:param table: Table (view) name | ||||||||||||
""" | ||||||||||||
# pylint: disable=import-outside-toplevel | ||||||||||||
from pyhive.exc import DatabaseError | ||||||||||||
|
||||||||||||
engine = cls.get_engine(database, schema) | ||||||||||||
with closing(engine.raw_connection()) as conn: | ||||||||||||
cursor = conn.cursor() | ||||||||||||
sql = f"SHOW CREATE VIEW {schema}.{table}" | ||||||||||||
try: | ||||||||||||
cls.execute(cursor, sql) | ||||||||||||
|
||||||||||||
except DatabaseError: # not a VIEW | ||||||||||||
return cls.fetch_data(cursor, 1)[0][0] | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should never have been a |
||||||||||||
except SupersetDBAPIDatabaseError: # not a VIEW | ||||||||||||
return None | ||||||||||||
rows = cls.fetch_data(cursor, 1) | ||||||||||||
return rows[0][0] | ||||||||||||
|
||||||||||||
@classmethod | ||||||||||||
def get_tracking_url(cls, cursor: "Cursor") -> Optional[str]: | ||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same logic as previous just indented under the
if cols:
statement.Unlike Presto where
get_indexes
returns[]
for a non-partition table, Trino returns[{'name': 'partition', 'column_names': [], 'unique': False}]
. Rather than overriding the engine specificnormalize_indexes
method I though it would be more prudent to make this method more robust given there was already an expectation that there may be no columns associated with the index, i.e., a non-partitioned table.