From d7679a170a2853fe520b604a55afb8c68e206aaf Mon Sep 17 00:00:00 2001 From: Moritz Lell Date: Mon, 11 May 2026 15:44:17 +0200 Subject: [PATCH 1/4] Fix outdated regex in BQL interval() function The function pre-filtered the input string accepting only day, month, or year, while the function accepts more inputs. Change the regex to split only number from word. --- beanquery/query_env.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/beanquery/query_env.py b/beanquery/query_env.py index 7606b193..e5198e90 100644 --- a/beanquery/query_env.py +++ b/beanquery/query_env.py @@ -684,8 +684,11 @@ def date_part(field, x): @function([str], relativedelta) def interval(x): - """Construct a relative time interval.""" - m = re.fullmatch(r'([-+]?[0-9]+)\s+(day|month|year)s?', x) + """Construct a relative time interval. Example argument: '2 weeks'. + Further options are day, month, year, century, millenium (plural s can be + appended). Use to modify dates: `date + interval(...)`""" + x = x.lower() + m = re.fullmatch(r'([-+]?[0-9]+)\s+([a-z]+?)s?', x) if not m: return None number = int(m.group(1)) @@ -702,7 +705,7 @@ def interval(x): return relativedelta(years=number * 10) if unit == 'century': return relativedelta(years=number * 100) - if unit == 'millennium': + if unit in ['millennium', 'millenia']: return relativedelta(years=number * 1000) return None From 15612c0f4cfbc5b7b81de2f1d35ca2c8abda678d Mon Sep 17 00:00:00 2001 From: Moritz Lell Date: Wed, 6 May 2026 16:19:57 +0200 Subject: [PATCH 2/4] Introduce ast.Query as the uniform top-level node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously the grammar rule `select` owned ORDER BY, LIMIT and PIVOT BY directly, and the parser returned a bare `ast.Select`. This conflated the data extraction (defining columns, a source, filters and grouping) with result-set modifiers (ORDER BY, LIMIT and PIVOT BY) that act on whatever comes out of that expression. Splitting this is a preparation for a future UNION chain. The new `ast.Query` node separates these concerns. The grammar rule `query::Query` wraps one (as of now) SELECT body and claims ORDER BY, LIMIT, and PIVOT BY for itself; `ast.Select` is now a pure table expression with no sorting or paging fields. `parse()` always returns `ast.Query`, even for the simplest `SELECT *`. **ast.py**: `Select` loses `order_by`, `limit`, and `pivot_by`; new `Query` node carries those fields and wraps a list of `Select` nodes. **bql.ebnf**: `select` rule no longer contains ORDER BY / LIMIT / PIVOT BY; a new `query::Query` rule wraps `select` and owns those clauses. Rename `subselect` to `subquery`, reflecting the change of top level `select` -> `query`. This delegates to `query` so parenthesised sub-queries may carry their own result-set modifiers. Updated `any` and `all` rules to avoid double parentheses when used with subselects. The `expression` rule requires `subquery` (instead of formerly `select`) to avoid ambiguities like `SELECT SELECT x FROM y WHERE z`. **query_compile.py**: Rename `EvalQuery` to `EvalSelect`. The dataclass holds the compiled SELECT body (table, targets, where, group_indexes, having_index, distinct). A new `EvalQuery` now wraps `EvalSelect` and owns `order_spec` and `limit`. `EvalQuery` properties `columns` and `c_targets` are retained, these are forwarded from the nested SELECT. In the future, this will only be possible for single-SELECT queries (not e.g., UNION chains). **compiler.py**: New `_query` dispatch handler is extracted from `_select`. `_select` compiles the inner SELECT body until GROUP BY. `_query` then compiles ORDER BY, performs the aggregate coverage check, and finally compiles LIMIT and PIVOT BY. In the function `_compile_from`, the subquery detection is updated from `ast.Select` to `ast.Query`. A new check rejects `SELECT DISTINCT ... ORDER BY ` when `` is not in the SELECT list, since this would produce non-deterministic results. This avoids handling DISTINCT on Query level. **query_execute.py**: New `execute_query()` wraps `execute_select()`, ensuing in changes in control flow: Before: execute_select(query) ├── Compute result_types (visible columns only) ├── Compute result_indexes (visible column indices) ├── Execute query (non-aggregated or aggregated path) ├── ORDER BY (on full rows) ├── Extract visible columns into result tuples ├── DISTINCT (on extracted rows) ├── LIMIT └── Return (result_types, rows) After: execute_query(query) ← New entry point ├── query.select() ← Delegates to EvalQuery.select() │ └── execute_select(query) ← Returns ALL columns + visibility mask │ ├── Compute result_types (ALL columns) │ ├── Compute visible_mask │ ├── Execute query (non-aggregated or aggregated) │ ├── DISTINCT (on visible columns, but keeps full rows) │ └── Return (result_types, rows, visible_mask) │ ├── ORDER BY (on full rows) ├── Extract visible columns ├── LIMIT └── Return (result_types, rows) **transform_journal / transform_balances**: These template-based desugaring functions now return `ast.Query` wrapping the constructed `ast.Select`, so ORDER BY from the BALANCES template reaches the `_query` handler through the normal path. **Tests**: Updated to expect `ast.Query` from parser, access `query.select` for inner fields, and construct `EvalQuery(select=EvalSelect(...), ...)`. --- beanquery/compiler.py | 151 +++++++++++++++------- beanquery/parser/ast.py | 17 ++- beanquery/parser/bql.ebnf | 40 ++++-- beanquery/parser/parser.py | 216 ++++++++++++++++++++------------ beanquery/parser_test.py | 146 ++++++++++++--------- beanquery/query_compile.py | 35 ++++-- beanquery/query_compile_test.py | 149 +++++++++++++--------- beanquery/query_execute.py | 103 +++++++++------ beanquery/query_execute_test.py | 4 +- 9 files changed, 551 insertions(+), 310 deletions(-) diff --git a/beanquery/compiler.py b/beanquery/compiler.py index 6e6c442e..0ccf1612 100644 --- a/beanquery/compiler.py +++ b/beanquery/compiler.py @@ -28,6 +28,7 @@ EvalOr, EvalPivot, EvalQuery, + EvalSelect, EvalConstantSubquery1D, EvalRow, EvalTarget, @@ -96,6 +97,66 @@ def _compile(self, node: Optional[ast.Node]): return None raise NotImplementedError + @_compile.register + def _query(self, node: ast.Query): + return self._compile_single_select_query(node) + + def _compile_single_select_query(self, node: ast.Query): + # Compile the single SELECT body (UNION support comes later). + select = node.queries[0] + eval_select = self._select(select) + + # ORDER BY belongs to the enclosing Query, not the Select. + new_targets, order_spec = self._compile_order_by(node.order_by, eval_select.c_targets) + eval_select.c_targets.extend(new_targets) + + # DISTINCT with ORDER BY on columns not in SELECT produces non-deterministic + # results: when multiple rows have the same visible values but different + # ORDER BY values, which row survives DISTINCT is arbitrary. + # We allow ORDER BY f(x) if x is visible, since f(x) is computable from x. + if eval_select.distinct and new_targets: + visible_column_ids = set() + for t in eval_select.c_targets: + if t.name is not None: + visible_column_ids.update(id(c) for c in _collect_columns(t.c_expr)) + + for target in new_targets: + for col in _collect_columns(target.c_expr): + if id(col) not in visible_column_ids: + raise CompilationError( + f'When using DISTINCT, ORDER BY expressions must only ' + f'reference columns that appear in the SELECT list. ' + f'Offending ORDER BY expression: {node.order_by[0].column.text}') + + # If this is an aggregate query (it groups, see list of indexes), check that + # the set of non-aggregates match exactly the group indexes. This should + # always be the case at this point, because we have added all the necessary + # targets to the list of group-by expressions and should have resolved all + # the indexes. + if eval_select.group_indexes is not None: + non_aggregate_indexes = {i for i, t in enumerate(eval_select.c_targets) + if not t.is_aggregate} + if non_aggregate_indexes != set(eval_select.group_indexes): + missing_names = ['"{}"'.format(eval_select.c_targets[i].name) + for i in non_aggregate_indexes - set(eval_select.group_indexes)] + raise CompilationError( + 'all non-aggregates must be covered by GROUP-BY clause in aggregate query: ' + 'the following targets are missing: {}'.format(','.join(missing_names))) + + # Wrap in EvalQuery with ORDER BY and LIMIT. + eval_query = EvalQuery( + select=eval_select, + order_spec=order_spec, + limit=node.limit, + ) + + # PIVOT applies to the final sorted/paged result set. + pivots = self._compile_pivot_by(node.pivot_by, eval_select.c_targets, eval_select.group_indexes) + if pivots: + return EvalPivot(eval_query, pivots) + + return eval_query + @_compile.register def _select(self, node: ast.Select): self.stack.append(self.table) @@ -123,47 +184,26 @@ def _select(self, node: ast.Select): new_targets, group_indexes, having_index = self._compile_group_by(node.group_by, c_targets) c_targets.extend(new_targets) - # Process the ORDER-BY clause. - new_targets, order_spec = self._compile_order_by(node.order_by, c_targets) - c_targets.extend(new_targets) - - # If this is an aggregate query (it groups, see list of indexes), check that - # the set of non-aggregates match exactly the group indexes. This should - # always be the case at this point, because we have added all the necessary - # targets to the list of group-by expressions and should have resolved all - # the indexes. - if group_indexes is not None: - non_aggregate_indexes = {index for index, c_target in enumerate(c_targets) - if not c_target.is_aggregate} - if non_aggregate_indexes != set(group_indexes): - missing_names = ['"{}"'.format(c_targets[index].name) - for index in non_aggregate_indexes - set(group_indexes)] - raise CompilationError( - 'all non-aggregates must be covered by GROUP-BY clause in aggregate query: ' - 'the following targets are missing: {}'.format(','.join(missing_names))) - - query = EvalQuery(self.table, - c_targets, - c_where, - group_indexes, - having_index, - order_spec, - node.limit, - node.distinct) - - pivots = self._compile_pivot_by(node.pivot_by, c_targets, group_indexes) - if pivots: - return EvalPivot(query, pivots) + # ORDER BY and LIMIT are compiled by the enclosing _query handler, + # which also validates aggregate coverage after ORDER BY targets are added. + select = EvalSelect( + table=self.table, + c_targets=c_targets, + c_where=c_where, + group_indexes=group_indexes, + having_index=having_index, + distinct=node.distinct, + ) self.stack.pop() - return query + return select def _compile_from(self, node): if node is None: return None # Subquery. - if isinstance(node, ast.Select): + if isinstance(node, ast.Query): self.table = SubqueryTable(self._compile(node)) return None @@ -735,7 +775,11 @@ def _print(self, node: ast.Print): self.table = self.context.tables.get('entries') expr = self._compile_from(node.from_clause) targets = [EvalTarget(EvalRow(), 'ROW(*)', False)] - return EvalQuery(self.table, targets, expr, None, None, None, None, False) + return EvalQuery( + select=EvalSelect(self.table, targets, expr, None, None, False), + order_spec=None, + limit=None, + ) @_compile.register def _create_table(self, node: ast.CreateTable): @@ -789,7 +833,7 @@ def transform_journal(journal): Returns: An instance of an uncompiled Select object. """ - cooked_select = parser.parse(""" + cooked = parser.parse(""" SELECT date, @@ -804,12 +848,15 @@ def transform_journal(journal): """.format(where=('WHERE account ~ "{}"'.format(journal.account) if journal.account else ''), - summary_func=journal.summary_func or '')) + summary_func=journal.summary_func or '')).queries[0] - return ast.Select(cooked_select.targets, - journal.from_clause, - cooked_select.where_clause, - None, None, None, None, None) + select = ast.Select( + cooked.targets, + journal.from_clause, + cooked.where_clause, + None, None) + + return ast.Query(queries=[select], order_by=None, limit=None, pivot_by=None) def transform_balances(balances): @@ -826,20 +873,22 @@ def transform_balances(balances): ## the first or last sort-order value gets used, because it would simplify ## the input statement. - cooked_select = parser.parse(""" + cooked_query = parser.parse(""" SELECT account, SUM({}(position)) GROUP BY account, ACCOUNT_SORTKEY(account) ORDER BY ACCOUNT_SORTKEY(account) """.format(balances.summary_func or "")) + cooked = cooked_query.queries[0] - return ast.Select(cooked_select.targets, - balances.from_clause, - balances.where_clause, - cooked_select.group_by, - cooked_select.order_by, - None, None, None) + select = ast.Select( + cooked.targets, + balances.from_clause, + balances.where_clause, + cooked.group_by, + None) + return ast.Query(queries=[select], order_by=cooked_query.order_by, limit=None, pivot_by=None) def get_target_name(target): @@ -909,5 +958,13 @@ def is_aggregate(node): return bool(aggregates) +def _collect_columns(node): + """Recursively collect all EvalColumn nodes from an expression tree.""" + if isinstance(node, EvalColumn): + yield node + for child in node.childnodes(): + yield from _collect_columns(child) + + def compile(context, statement, parameters=None): return Compiler(context).compile(statement, parameters) diff --git a/beanquery/parser/ast.py b/beanquery/parser/ast.py index 0fc9e49e..1e61fa11 100644 --- a/beanquery/parser/ast.py +++ b/beanquery/parser/ast.py @@ -81,11 +81,20 @@ def node(name, fields): # from_clause: An instance of 'From', or None if absent. # where_clause: A root expression node, or None if absent. # group_by: An instance of 'GroupBy', or None if absent. -# order_by: An instance of 'OrderBy', or None if absent. -# pivot_by: An instance of 'PivotBy', or None if absent. -# limit: An integer, or None is absent. # distinct: A boolean value (True), or None if absent. -Select = node('Select', 'targets from_clause where_clause group_by order_by pivot_by limit distinct') +Select = node('Select', 'targets from_clause where_clause group_by distinct') + +# The top-level query node wrapping one or more SELECT bodies. +# +# A single SELECT is the degenerate case (len(queries) == 1). +# In the future, UNION chain support will be added where len(queries) > 1. +# +# Attributes: +# queries: List of Select nodes. +# order_by: Optional list of OrderBy applied to the combined result. +# limit: Optional integer limit applied to the combined result. +# pivot_by: Optional PivotBy applied to the combined result. +Query = node('Query', 'queries order_by limit pivot_by') # A select query that produces final balances for accounts. # This is equivalent to diff --git a/beanquery/parser/bql.ebnf b/beanquery/parser/bql.ebnf index 1c078b63..24601124 100644 --- a/beanquery/parser/bql.ebnf +++ b/beanquery/parser/bql.ebnf @@ -15,7 +15,7 @@ bql statement = - | select + | query | balances | journal | print @@ -23,18 +23,28 @@ statement | insert ; +(* Wrapper for queries, currently a plain SELECT or a subquery. Future: UNION. + ORDER BY, LIMIT, and PIVOT BY after the last operand apply to the result set. +*) +query::Query + = queries+:( select | subquery ) + ['ORDER' 'BY' order_by:','.{order}+] + ['LIMIT' limit:integer] + ['PIVOT' 'BY' pivot_by:pivotby] + ; + +(* SELECT body without ORDER BY / LIMIT / PIVOT BY so the enclosing query rule can claim + those tokens for the combined result set. *) select::Select = 'SELECT' ['DISTINCT' distinct:`True`] targets:(','.{ target }+ | asterisk) - ['FROM' from_clause:(_table | subselect | from)] + ['FROM' from_clause:(_table | subquery | from)] ['WHERE' where_clause:expression] ['GROUP' 'BY' group_by:groupby] - ['ORDER' 'BY' order_by:','.{order}+] - ['PIVOT' 'BY' pivot_by:pivotby] - ['LIMIT' limit:integer] ; -subselect - = '(' @:select ')' +(* Parenthesised sub-query; uses query so ORDER BY / LIMIT are allowed inside. *) +subquery + = '(' @:query ')' ; from::From @@ -132,12 +142,20 @@ comparison | sum ; +(* This operator is special in that it has parentheses. Avoid double parentheses +with subquerys ALL( (SELECT ...) ) by &(...) look-ahead *) any::Any - = left:sum op:op 'any' '(' right:expression ')' + = + | left:sum op:op 'any' &('(' 'SELECT') right:subquery + | left:sum op:op 'any' '(' right:expression ')' ; +(* This operator is special in that it has parentheses. Avoid double parentheses +with subquerys ALL( (SELECT ...) ) by &(...) look-ahead *) all::All - = left:sum op:op 'all' '(' right:expression ')' + = + | left:sum op:op 'all' &('(' 'SELECT') right:subquery + | left:sum op:op 'all' '(' right:expression ')' ; op @@ -282,7 +300,7 @@ subscript::Subscript atom = - | select + | subquery | function | constant | column @@ -390,7 +408,7 @@ create_table::CreateTable ( | '(' columns:','.{( identifier identifier )} ')' ['USING' using:string] | 'USING' using:string - | 'AS' query:select + | 'AS' query:query ) ; diff --git a/beanquery/parser/parser.py b/beanquery/parser/parser.py index 2ec9ee84..cbf78945 100644 --- a/beanquery/parser/parser.py +++ b/beanquery/parser/parser.py @@ -9,7 +9,7 @@ # Any changes you make to it will be overwritten the next time # the file is generated. -# ruff: noqa: C405, COM812, I001, F401, PLR1702, PLC2801, SIM117 +# ruff: noqa: RUF100, C405, COM812, I001, F401, PLR1702, PLC2801, SIM117 from __future__ import annotations @@ -25,34 +25,34 @@ KEYWORDS: set[str] = { - 'AND', - 'AS', + 'IS', + 'TABLE', 'ASC', - 'BY', - 'DESC', - 'DISTINCT', - 'FALSE', 'FROM', - 'GROUP', + 'AS', 'HAVING', - 'IN', - 'IS', + 'CREATE', 'LIMIT', - 'NOT', - 'OR', + 'WHERE', + 'INSERT', + 'PRINT', + 'GROUP', + 'JOURNAL', + 'BALANCES', 'ORDER', - 'PIVOT', 'SELECT', + 'INTO', + 'BY', 'TRUE', - 'WHERE', - 'CREATE', - 'TABLE', + 'NOT', + 'OR', + 'AND', + 'DISTINCT', 'USING', - 'INSERT', - 'INTO', - 'BALANCES', - 'JOURNAL', - 'PRINT', + 'DESC', + 'PIVOT', + 'IN', + 'FALSE', } @@ -60,7 +60,6 @@ class BQLBuffer(Buffer): def __init__(self, text, /, config: ParserConfig | None = None, **settings): config = ParserConfig.new( config, - owner=self, whitespace=None, nameguard=None, ignorecase=True, @@ -80,7 +79,6 @@ class BQLParser(Parser): def __init__(self, /, config: ParserConfig | None = None, **settings): config = ParserConfig.new( config, - owner=self, whitespace=None, nameguard=None, ignorecase=True, @@ -107,7 +105,7 @@ def _bql_(self): def _statement_(self): with self._choice(): with self._option(): - self._select_() + self._query_() with self._option(): self._balances_() with self._option(): @@ -121,11 +119,52 @@ def _statement_(self): self._error( 'expecting one of: ' "'BALANCES' 'CREATE' 'INSERT' 'JOURNAL'" - "'PRINT' 'SELECT' " - ' ' - ' ' ) + @tatsumasu('Query') + def _query_(self): + with self._group(): + with self._choice(): + with self._option(): + self._select_() + with self._option(): + self._subquery_() + self._error( + 'expecting one of: ' + ' ' + ' ' ) @tatsumasu('Attribute') @@ -941,7 +995,7 @@ def _subscript_(self): def _atom_(self): with self._choice(): with self._option(): - self._select_() + self._subquery_() with self._option(): self._function_() with self._option(): @@ -952,10 +1006,10 @@ def _atom_(self): self._placeholder_() self._error( 'expecting one of: ' - "'%(' '%s' 'SELECT' " + "'%(' '%s' '(' " ' ' ' ' - ' ' ) self.add_last_node_to_name('queries') + + def block0(): + with self._group(): + with self._choice(): + with self._option(): + self._token('UNION') + self._token('ALL') + self._constant('union_all') + self.add_last_node_to_name('set_operators') + self._define( + [], + ['set_operators'], + ) + with self._option(): + self._token('UNION') + self._constant('union') + self.add_last_node_to_name('set_operators') + self._define( + [], + ['set_operators'], + ) + self._error( + 'expecting one of: ' + "'UNION'" + ) + with self._group(): + with self._choice(): + with self._option(): + self._select_() + with self._option(): + self._subquery_() + self._error( + 'expecting one of: ' + '