New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
LIKE clause #67
LIKE clause #67
Changes from all commits
57432f5
9d9fee2
2152e20
f7ccc90
0e0a830
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -207,6 +207,42 @@ def parse_select(sel, strings): | |
return res, has_distinct, has_partials | ||
|
||
|
||
def parse_wherelike(clause, strings): | ||
"""splits the LIKE clause and completely supports the SQL syntax | ||
https://docs.microsoft.com/en-us/sql/t-sql/language-elements/like-transact-sql?view=sql-server-ver15""" | ||
# We're not in a LIKE expression, do nothing | ||
if not re.search("LIKE", clause): | ||
return clause | ||
|
||
# Supports words containing [a-zA-Z0-9_\-] | ||
expr_pattern = re.compile(r"([\w-]+)(?:\s+(NOT))?\s+LIKE\s+([\w-]+)", re.IGNORECASE) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that this regex does not do the trick. It fails in cases like:
I think we have one of the following options:
I am fine with both approaches. I would be happy on having the first at short-term and the second at longer term. |
||
groups = re.search(expr_pattern, clause) | ||
if groups is None: | ||
spyql.log.user_error( | ||
f"{clause}", | ||
SyntaxError("unexpected EOF while parsing") | ||
) | ||
|
||
groups = groups.groups() | ||
negate = "NOT" in {groups[1]} # placed within {} because it can be None | ||
|
||
if not groups[2] in strings: | ||
spyql.log.user_error( | ||
f"{groups[2]}: missing quotes, must be a string", | ||
SyntaxError("bad query") | ||
) | ||
|
||
# Replacing SQL wildcard '%' for regex wildcard '.*' if not preceded by '\' | ||
pattern = strings.put_strings_back(groups[2]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this mean that we only accept LIKE wildcards in the right side? |
||
pattern = re.compile(r"(?<!\\)%").sub(r".*" , pattern) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice :-) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shouldn't we first escape any regex special character/command? For instance |
||
pattern = re.compile(r"([^\"].*[^\"])").sub(r"^\1$", pattern) | ||
|
||
clause = "re.match({}, str({}))".format(pattern, groups[0]) | ||
clause = "not " + clause if negate else clause | ||
|
||
return clause | ||
|
||
|
||
def parse_orderby(clause, strings): | ||
"""splits the ORDER BY clause and handles modifiers""" | ||
|
||
|
@@ -275,9 +311,10 @@ def parse(query): | |
"order by", | ||
}: | ||
if prs[clause]: | ||
prs[clause] = make_expr_ready(prs[clause], strings) | ||
if clause in {"where", "from"}: | ||
throw_error_if_has_agg_func(prs[clause], clause.upper()) | ||
prs[clause] = make_expr_ready(prs[clause], strings) | ||
prs[clause] = parse_wherelike(prs[clause], strings) | ||
|
||
for clause in {"group by"}: | ||
if prs[clause]: | ||
|
@@ -400,7 +437,7 @@ def main(query, warning_flag, verbose, unbuffered, input_opt, output_opt): | |
SELECT [ DISTINCT | PARTIALS ] | ||
[ * | python_expression [ AS output_column_name ] [, ...] ] | ||
[ FROM csv | spy | text | python_expression | json [ EXPLODE path ] ] | ||
[ WHERE python_expression ] | ||
[ WHERE python_expression [ [NOT] LIKE string] ] | ||
[ GROUP BY output_column_number | python_expression [, ...] ] | ||
[ ORDER BY output_column_number | python_expression | ||
[ ASC | DESC ] [ NULLS { FIRST | LAST } ] [, ...] ] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,9 @@ class QuotesHandler: | |
def __init__(self): | ||
self.strings = {} | ||
|
||
def __iter__(self): | ||
return iter(self.strings) | ||
Comment on lines
+12
to
+13
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cool! |
||
|
||
# replaces quoted strings by placeholders to make parsing easier | ||
# populates dictionary of placeholders and the strings they hold | ||
def extract_strings(self, query): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
python_expression
). We would then highlight this in the documentation.LIKE
to anypython_expression
. Example of a use ofLIKE
outside of the WHERE clause:SELECT 'error' if msg like 'error%' else 'OK'
ILIKE
, which has the same behaviour asLIKE
but it is case-insensitive