Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for enabling ORDER BY on non projected columns #1155

Merged
merged 4 commits into from
Sep 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions evadb/binder/binder_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,9 +363,7 @@ def drop_row_id_from_target_list(
return filtered_list


def add_func_expr_outputs_to_binder_context(
func_expr: FunctionExpression, binder_context: StatementBinderContext
):
def get_bound_func_expr_outputs_as_tuple_value_expr(func_expr: FunctionExpression):
output_cols = []
for obj, alias in zip(func_expr.output_objs, func_expr.alias.col_names):
col_alias = "{}.{}".format(func_expr.alias.alias_name, alias)
Expand All @@ -376,4 +374,4 @@ def add_func_expr_outputs_to_binder_context(
col_alias=col_alias,
)
output_cols.append(alias_obj)
binder_context.add_derived_table_alias(func_expr.alias.alias_name, output_cols)
return output_cols
12 changes: 9 additions & 3 deletions evadb/binder/statement_binder.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@

from evadb.binder.binder_utils import (
BinderError,
add_func_expr_outputs_to_binder_context,
bind_table_info,
check_column_name_is_string,
check_groupby_pattern,
check_table_object_is_groupable,
drop_row_id_from_target_list,
extend_star,
get_bound_func_expr_outputs_as_tuple_value_expr,
get_column_definition_from_select_target_list,
handle_bind_extract_object_function,
resolve_alias_table_value_expression,
Expand Down Expand Up @@ -201,7 +201,10 @@ def _bind_select_statement(self, node: SelectStatement):
for expr in node.target_list:
self.bind(expr)
if isinstance(expr, FunctionExpression):
add_func_expr_outputs_to_binder_context(expr, self._binder_context)
output_cols = get_bound_func_expr_outputs_as_tuple_value_expr(expr)
self._binder_context.add_derived_table_alias(
expr.alias.alias_name, output_cols
)

if node.groupby_clause:
self.bind(node.groupby_clause)
Expand Down Expand Up @@ -279,7 +282,10 @@ def _bind_tableref(self, node: TableRef):
func_expr = node.table_valued_expr.func_expr
func_expr.alias = node.alias
self.bind(func_expr)
add_func_expr_outputs_to_binder_context(func_expr, self._binder_context)
output_cols = get_bound_func_expr_outputs_as_tuple_value_expr(func_expr)
self._binder_context.add_derived_table_alias(
func_expr.alias.alias_name, output_cols
)
else:
raise BinderError(f"Unsupported node {type(node)}")

Expand Down
62 changes: 54 additions & 8 deletions evadb/optimizer/statement_to_opr_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from evadb.binder.binder_utils import get_bound_func_expr_outputs_as_tuple_value_expr
from evadb.expression.abstract_expression import AbstractExpression
from evadb.expression.function_expression import FunctionExpression
from evadb.optimizer.operators import (
LogicalCreate,
LogicalCreateFunction,
Expand Down Expand Up @@ -53,8 +55,8 @@
from evadb.parser.select_statement import SelectStatement
from evadb.parser.show_statement import ShowStatement
from evadb.parser.statement import AbstractStatement
from evadb.parser.table_ref import TableRef
from evadb.parser.types import FunctionType
from evadb.parser.table_ref import JoinNode, TableRef, TableValuedExpression
from evadb.parser.types import FunctionType, JoinType
from evadb.utils.logging_manager import logger


Expand Down Expand Up @@ -121,7 +123,54 @@ def visit_select(self, statement: SelectStatement):

# order of evaluation
# from, where, group by, select, order by, limit, union

# if there is a table_ref, order by clause and no group by clause, we move all # the function expressions out of projection list to table valued expression.
# This is done to handle the
# https://github.com/georgia-tech-db/evadb/issues/1147
# and https://github.com/georgia-tech-db/evadb/issues/1130.
# It is a bit ugly but a complete fix would require modifying the binder

col_with_func_exprs = []

if (
statement.from_table
and statement.orderby_list
and statement.groupby_clause is None
):
projection_cols = []
for col in statement.target_list:
if isinstance(col, FunctionExpression):
col_with_func_exprs.append(col)
# append the TupleValueExpression for the FunctionExpression
projection_cols.extend(
get_bound_func_expr_outputs_as_tuple_value_expr(col)
)
else:
projection_cols.append(col)

# update target list with projection cols
statement.target_list = projection_cols

table_ref = statement.from_table
if not table_ref and col_with_func_exprs:
# if there is no table source, we add a projection node with all the
# function expressions
self._visit_projection(col_with_func_exprs)
else:
# add col_with_func_exprs to TableValuedExpressions
for col in col_with_func_exprs:
tve = TableValuedExpression(col)
if table_ref:
table_ref = TableRef(
JoinNode(
table_ref,
TableRef(tve, alias=col.alias),
join_type=JoinType.LATERAL_JOIN,
)
)

statement.from_table = table_ref

if table_ref is not None:
self.visit_table_ref(table_ref)

Expand All @@ -135,18 +184,15 @@ def visit_select(self, statement: SelectStatement):
if statement.groupby_clause is not None:
self._visit_groupby(statement.groupby_clause)

# Projection operator
select_columns = statement.target_list

if select_columns is not None:
self._visit_projection(select_columns)

if statement.orderby_list is not None:
self._visit_orderby(statement.orderby_list)

if statement.limit_count is not None:
self._visit_limit(statement.limit_count)

if statement.target_list is not None:
self._visit_projection(statement.target_list)

# union
if statement.union_link is not None:
self._visit_union(statement.union_link, statement.union_all)
Expand Down
6 changes: 4 additions & 2 deletions test/integration_tests/short/test_select_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

from evadb.binder.binder_utils import BinderError
from evadb.models.storage.batch import Batch
from evadb.optimizer.operators import LogicalFilter
from evadb.server.command_handler import execute_query_fetch_all

NUM_FRAMES = 10
Expand Down Expand Up @@ -416,9 +417,10 @@ def test_hash_join_with_multiple_on(self):

def test_expression_tree_signature(self):
plan = get_logical_query_plan(
self.evadb, "SELECT DummyMultiObjectDetector(data).labels FROM MyVideo"
self.evadb,
"SELECT id FROM MyVideo WHERE DummyMultiObjectDetector(data).labels @> ['person'];",
)
signature = plan.target_list[0].signature()
signature = next(plan.find_all(LogicalFilter)).predicate.children[0].signature()
function_id = (
self.evadb.catalog()
.get_function_catalog_entry_by_name("DummyMultiObjectDetector")
Expand Down