From 6f1224786a98578c1fd2b2f5a4370911a364ea4b Mon Sep 17 00:00:00 2001 From: Fredy Wijaya Date: Wed, 30 May 2018 01:18:06 -0700 Subject: [PATCH] IMPALA-2751: Matching quotes are not required in comments This patch fixes the issue where non-matching quotes inside comments will cause the shell to not terminate. The fix is to strip any SQL comments before sending to shlex since shlex does not understand SQL comments and will raise an exception when it sees unmatched quotes regardless whether the quotes are in the comments or not. Testing: - Added new shell tests - Ran all end-to-end shell tests on Python 2.6 and Python 2.7 Change-Id: I2feae34026a7e63f3d31489f757f093a73ca5d2c Reviewed-on: http://gerrit.cloudera.org:8080/10541 Reviewed-by: Impala Public Jenkins Tested-by: Impala Public Jenkins --- shell/impala_shell.py | 14 ++++++++------ tests/shell/test_shell_interactive.py | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/shell/impala_shell.py b/shell/impala_shell.py index a2dd89fd369..8cb14423502 100755 --- a/shell/impala_shell.py +++ b/shell/impala_shell.py @@ -393,12 +393,13 @@ def _cmd_ends_with_delim(self, line): # Strip any comments to make a statement such as the following be considered as # ending with a delimiter: # select 1 + 1; -- this is a comment - line = sqlparse.format(line, strip_comments=True).rstrip() + line = sqlparse.format(line, strip_comments=True).encode('utf-8').rstrip() if line.endswith(ImpalaShell.CMD_DELIM): try: # Look for an open quotation in the entire command, and not just the # current line. - if self.partial_cmd: line = '%s %s' % (self.partial_cmd, line) + if self.partial_cmd: + line = sqlparse.format('%s %s' % (self.partial_cmd, line), strip_comments=True) self._shlex_split(line) return True # If the command ends with a delimiter, check if it has an open quotation. @@ -416,8 +417,8 @@ def _cmd_ends_with_delim(self, line): # Iterate through the line and switch the state if a single or double quote is found # and ignore escaped single and double quotes if the line is considered open (meaning # a previous single or double quote has not been closed yet) - state_closed = True; - opener = None; + state_closed = True + opener = None for i, char in enumerate(line): if state_closed and (char in ['\'', '\"']): state_closed = False @@ -425,7 +426,7 @@ def _cmd_ends_with_delim(self, line): elif not state_closed and opener == char: if line[i - 1] != '\\': state_closed = True - opener = None; + opener = None return state_closed @@ -1129,7 +1130,8 @@ def do_with(self, args): query = self._create_beeswax_query(args) # Set posix=True and add "'" to escaped quotes # to deal with escaped quotes in string literals - lexer = shlex.shlex(query.query.lstrip(), posix=True) + lexer = shlex.shlex(sqlparse.format(query.query.lstrip(), strip_comments=True) + .encode('utf-8'), posix=True) lexer.escapedquotes += "'" # Because the WITH clause may precede DML or SELECT queries, # just checking the first token is insufficient. diff --git a/tests/shell/test_shell_interactive.py b/tests/shell/test_shell_interactive.py index e112e3e5706..fe41e368c3c 100755 --- a/tests/shell/test_shell_interactive.py +++ b/tests/shell/test_shell_interactive.py @@ -511,6 +511,24 @@ def test_fix_infinite_loop(self): result = run_impala_shell_interactive("select '1\"23\"4'\";\n;\n\";") assert '| 1"23"4 |' in result.stdout + @pytest.mark.execute_serially + def test_comment_with_quotes(self): + # IMPALA-2751: Comment does not need to have matching quotes + queries = [ + "select -- '\n1;", + 'select -- "\n1;', + "select -- \"'\n 1;", + "select /*'\n*/ 1;", + 'select /*"\n*/ 1;', + "select /*\"'\n*/ 1;", + "with a as (\nselect 1\n-- '\n) select * from a", + 'with a as (\nselect 1\n-- "\n) select * from a', + "with a as (\nselect 1\n-- '\"\n) select * from a", + ] + for query in queries: + result = run_impala_shell_interactive(query) + assert '| 1 |' in result.stdout + @pytest.mark.execute_serially def test_shell_prompt(self): proc = pexpect.spawn(SHELL_CMD)