-
Notifications
You must be signed in to change notification settings - Fork 11
feat(FIR-42854): Server side async query execution #415
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e7c1e47
fbd5cc0
a47036e
f784695
3b591d9
595ae9d
84d7d60
c285fe8
1e7d431
bc3013b
dea7fce
b19c860
52b02d1
4a001d6
0175955
35e52f9
e85f58b
c0380e2
8af78f8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
from typing import ( | ||
TYPE_CHECKING, | ||
Any, | ||
Dict, | ||
Iterator, | ||
List, | ||
Optional, | ||
|
@@ -39,16 +40,18 @@ | |
UPDATE_PARAMETERS_HEADER, | ||
BaseCursor, | ||
CursorState, | ||
RowSet, | ||
_parse_update_endpoint, | ||
_parse_update_parameters, | ||
_raise_if_internal_set_parameter, | ||
async_not_allowed, | ||
check_not_closed, | ||
check_query_executed, | ||
) | ||
from firebolt.utils.exception import ( | ||
EngineNotRunningError, | ||
FireboltDatabaseError, | ||
FireboltError, | ||
NotSupportedError, | ||
OperationalError, | ||
ProgrammingError, | ||
QueryTimeoutError, | ||
|
@@ -186,53 +189,93 @@ async def _parse_response_headers(self, headers: Headers) -> None: | |
param_dict = _parse_update_parameters(headers.get(UPDATE_PARAMETERS_HEADER)) | ||
self._update_set_parameters(param_dict) | ||
|
||
@abstractmethod | ||
async def execute_async( | ||
self, | ||
query: str, | ||
parameters: Optional[Sequence[ParameterType]] = None, | ||
skip_parsing: bool = False, | ||
) -> int: | ||
"""Execute a database query without maintaining a connection.""" | ||
... | ||
|
||
async def _do_execute( | ||
self, | ||
raw_query: str, | ||
parameters: Sequence[Sequence[ParameterType]], | ||
skip_parsing: bool = False, | ||
timeout: Optional[float] = None, | ||
async_execution: bool = False, | ||
) -> None: | ||
self._reset() | ||
# Allow users to manually skip parsing for performance improvement. | ||
queries: List[Union[SetParameter, str]] = ( | ||
[raw_query] if skip_parsing else split_format_sql(raw_query, parameters) | ||
) | ||
timeout_controller = TimeoutController(timeout) | ||
|
||
if len(queries) > 1 and async_execution: | ||
raise FireboltError( | ||
"Server side async does not support multi-statement queries" | ||
) | ||
try: | ||
for query in queries: | ||
start_time = time.time() | ||
Cursor._log_query(query) | ||
timeout_controller.raise_if_timeout() | ||
|
||
if isinstance(query, SetParameter): | ||
row_set: RowSet = (-1, None, None, None) | ||
await self._validate_set_parameter( | ||
query, timeout_controller.remaining() | ||
) | ||
else: | ||
resp = await self._api_request( | ||
query, | ||
{"output_format": JSON_OUTPUT_FORMAT}, | ||
timeout=timeout_controller.remaining(), | ||
) | ||
await self._raise_if_error(resp) | ||
await self._parse_response_headers(resp.headers) | ||
row_set = self._row_set_from_response(resp) | ||
|
||
self._append_row_set(row_set) | ||
|
||
logger.info( | ||
f"Query fetched {self.rowcount} rows in" | ||
f" {time.time() - start_time} seconds." | ||
await self._execute_single_query( | ||
query, timeout_controller, async_execution | ||
) | ||
|
||
self._state = CursorState.DONE | ||
|
||
except Exception: | ||
self._state = CursorState.ERROR | ||
raise | ||
|
||
async def _execute_single_query( | ||
self, | ||
query: Union[SetParameter, str], | ||
timeout_controller: TimeoutController, | ||
async_execution: bool, | ||
) -> None: | ||
start_time = time.time() | ||
Cursor._log_query(query) | ||
timeout_controller.raise_if_timeout() | ||
|
||
if isinstance(query, SetParameter): | ||
if async_execution: | ||
raise FireboltError( | ||
"Server side async does not support set statements, " | ||
"please use execute to set this parameter" | ||
) | ||
await self._validate_set_parameter(query, timeout_controller.remaining()) | ||
else: | ||
await self._handle_query_execution( | ||
query, timeout_controller, async_execution | ||
) | ||
|
||
if not async_execution: | ||
logger.info( | ||
ptiurin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
f"Query fetched {self.rowcount} rows in" | ||
f" {time.time() - start_time} seconds." | ||
) | ||
else: | ||
logger.info("Query submitted for async execution.") | ||
|
||
async def _handle_query_execution( | ||
self, query: str, timeout_controller: TimeoutController, async_execution: bool | ||
) -> None: | ||
query_params: Dict[str, Any] = {"output_format": JSON_OUTPUT_FORMAT} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we check the case for when There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That opens a can of worms that we are still debating. At the moment enforcing this is out of scope https://packboard.atlassian.net/browse/FIR-42854 |
||
if async_execution: | ||
query_params["async"] = True | ||
resp = await self._api_request( | ||
query, | ||
query_params, | ||
timeout=timeout_controller.remaining(), | ||
) | ||
await self._raise_if_error(resp) | ||
if async_execution: | ||
self._parse_async_response(resp) | ||
else: | ||
await self._parse_response_headers(resp.headers) | ||
row_set = self._row_set_from_response(resp) | ||
self._append_row_set(row_set) | ||
|
||
@check_not_closed | ||
async def execute( | ||
self, | ||
|
@@ -346,6 +389,7 @@ async def nextset(self) -> None: | |
|
||
# Iteration support | ||
@check_not_closed | ||
@async_not_allowed | ||
@check_query_executed | ||
def __aiter__(self) -> Cursor: | ||
return self | ||
|
@@ -373,6 +417,7 @@ async def __aexit__( | |
self.close() | ||
|
||
@check_not_closed | ||
@async_not_allowed | ||
@check_query_executed | ||
async def __anext__(self) -> List[ColType]: | ||
row = await self.fetchone() | ||
|
@@ -392,6 +437,47 @@ def __init__( | |
assert isinstance(client, AsyncClientV2) | ||
super().__init__(*args, client=client, connection=connection, **kwargs) | ||
|
||
@check_not_closed | ||
async def execute_async( | ||
self, | ||
query: str, | ||
parameters: Optional[Sequence[ParameterType]] = None, | ||
skip_parsing: bool = False, | ||
) -> int: | ||
""" | ||
Execute a database query without maintating a connection. | ||
|
||
Supported features: | ||
Parameterized queries: placeholder characters ('?') are substituted | ||
with values provided in `parameters`. Values are formatted to | ||
be properly recognized by database and to exclude SQL injection. | ||
|
||
Not supported: | ||
Multi-statement queries: multiple statements, provided in a single query | ||
and separated by semicolon. | ||
SET statements: to provide additional query execution parameters, execute | ||
`SET param=value` statement before it. Use `execute` method to set | ||
parameters. | ||
|
||
Args: | ||
query (str): SQL query to execute | ||
parameters (Optional[Sequence[ParameterType]]): A sequence of substitution | ||
parameters. Used to replace '?' placeholders inside a query with | ||
actual values | ||
skip_parsing (bool): Flag to disable query parsing. This will | ||
disable parameterized queries while potentially improving performance | ||
|
||
Returns: | ||
int: Always returns -1, as async execution does not return row count. | ||
""" | ||
await self._do_execute( | ||
query, | ||
[parameters] if parameters else [], | ||
skip_parsing, | ||
async_execution=True, | ||
) | ||
return -1 | ||
|
||
async def is_db_available(self, database_name: str) -> bool: | ||
""" | ||
Verify that the database exists. | ||
|
@@ -468,3 +554,13 @@ async def _filter_request(self, endpoint: str, filters: dict) -> Response: | |
) | ||
resp.raise_for_status() | ||
return resp | ||
|
||
async def execute_async( | ||
self, | ||
query: str, | ||
parameters: Optional[Sequence[ParameterType]] = None, | ||
skip_parsing: bool = False, | ||
) -> int: | ||
raise NotSupportedError( | ||
"Async execution is not supported in this version " " of Firebolt." | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,7 @@ | |
|
||
DEFAULT_API_URL: str = "api.app.firebolt.io" | ||
PROTOCOL_VERSION_HEADER_NAME = "Firebolt-Protocol-Version" | ||
PROTOCOL_VERSION: str = "2.1" | ||
PROTOCOL_VERSION: str = "2.3" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok I see we did a jump here. Can you please make sure this one is also ok? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have tests that confirm the behaviour described in there. I've just rerun them to be sure and everything passes. |
||
_REQUEST_ERRORS: Tuple[Type, ...] = ( | ||
HTTPError, | ||
InvalidURL, | ||
|
Uh oh!
There was an error while loading. Please reload this page.