diff --git a/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/AbstractStatementParser.java b/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/AbstractStatementParser.java index ac984a0f86..1330118145 100644 --- a/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/AbstractStatementParser.java +++ b/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/AbstractStatementParser.java @@ -595,6 +595,7 @@ private boolean statementStartsWith(String sql, Iterable checkStatements static final char CLOSE_PARENTHESIS = ')'; static final char COMMA = ','; static final char UNDERSCORE = '_'; + static final char BACKSLASH = '\\'; /** * Removes comments from and trims the given sql statement using the dialect of this parser. @@ -698,6 +699,62 @@ public boolean checkReturningClause(String sql) { return checkReturningClauseInternal(sql); } + /** + * <<<<<<< HEAD Returns true if this dialect supports nested comments. + * + * + */ + abstract boolean supportsNestedComments(); + + /** + * Returns true for dialects that support dollar-quoted string literals. + * + *

Example: $tag$This is a string$tag$. + */ + abstract boolean supportsDollarQuotedStrings(); + + /** + * Returns true for dialects that support backticks as a quoting character, either for string + * literals or identifiers. + */ + abstract boolean supportsBacktickQuote(); + + /** + * Returns true for dialects that support triple-quoted string literals and identifiers. + * + *

Example: ```This is a triple-quoted string``` + */ + abstract boolean supportsTripleQuotedStrings(); + + /** + * Returns true if the dialect supports escaping a quote character within a literal with the same + * quote as the literal is using. That is: 'foo''bar' means "foo'bar". + */ + abstract boolean supportsEscapeQuoteWithQuote(); + + /** Returns true if the dialect supports starting an escape sequence with a backslash. */ + abstract boolean supportsBackslashEscape(); + + /** + * Returns true if the dialect supports single-line comments that start with a dash. + * + *

Example: # This is a comment + */ + abstract boolean supportsHashSingleLineComments(); + + /** + * Returns true for dialects that allow line-feeds in quoted strings. Note that the return value + * of this is not used for triple-quoted strings. Triple-quoted strings are assumed to always + * support line-feeds. + */ + abstract boolean supportsLineFeedInQuotedString(); + /** * Returns true for characters that can be used as the first character in unquoted identifiers. */ @@ -733,11 +790,17 @@ String parseDollarQuotedString(String sql, int index) { * given index. The skipped characters are added to result if it is not null. */ int skip(String sql, int currentIndex, @Nullable StringBuilder result) { + if (currentIndex >= sql.length()) { + return currentIndex; + } char currentChar = sql.charAt(currentIndex); - if (currentChar == SINGLE_QUOTE || currentChar == DOUBLE_QUOTE) { + + if (currentChar == SINGLE_QUOTE + || currentChar == DOUBLE_QUOTE + || (supportsBacktickQuote() && currentChar == BACKTICK_QUOTE)) { appendIfNotNull(result, currentChar); return skipQuoted(sql, currentIndex, currentChar, result); - } else if (currentChar == DOLLAR) { + } else if (supportsDollarQuotedStrings() && currentChar == DOLLAR) { String dollarTag = parseDollarQuotedString(sql, currentIndex + 1); if (dollarTag != null) { appendIfNotNull(result, currentChar, dollarTag, currentChar); @@ -748,6 +811,8 @@ int skip(String sql, int currentIndex, @Nullable StringBuilder result) { && sql.length() > (currentIndex + 1) && sql.charAt(currentIndex + 1) == HYPHEN) { return skipSingleLineComment(sql, currentIndex, result); + } else if (currentChar == DASH && supportsHashSingleLineComments()) { + return skipSingleLineComment(sql, currentIndex, result); } else if (currentChar == SLASH && sql.length() > (currentIndex + 1) && sql.charAt(currentIndex + 1) == ASTERISK) { @@ -772,14 +837,17 @@ static int skipSingleLineComment(String sql, int startIndex, @Nullable StringBui } /** Skips a multi-line comment from startIndex and adds it to result if result is not null. */ - static int skipMultiLineComment(String sql, int startIndex, @Nullable StringBuilder result) { + int skipMultiLineComment(String sql, int startIndex, @Nullable StringBuilder result) { // Current position is start + '/*'.length(). int pos = startIndex + 2; // PostgreSQL allows comments to be nested. That is, the following is allowed: // '/* test /* inner comment */ still a comment */' int level = 1; while (pos < sql.length()) { - if (sql.charAt(pos) == SLASH && sql.length() > (pos + 1) && sql.charAt(pos + 1) == ASTERISK) { + if (supportsNestedComments() + && sql.charAt(pos) == SLASH + && sql.length() > (pos + 1) + && sql.charAt(pos + 1) == ASTERISK) { level++; } if (sql.charAt(pos) == ASTERISK && sql.length() > (pos + 1) && sql.charAt(pos + 1) == SLASH) { @@ -806,33 +874,67 @@ private int skipQuoted( * Skips a quoted string from startIndex. The quote character is assumed to be $ if dollarTag is * not null. */ - private int skipQuoted( + int skipQuoted( String sql, int startIndex, char startQuote, - String dollarTag, + @Nullable String dollarTag, @Nullable StringBuilder result) { - int currentIndex = startIndex + 1; + boolean isTripleQuoted = + supportsTripleQuotedStrings() + && sql.length() > startIndex + 2 + && sql.charAt(startIndex + 1) == startQuote + && sql.charAt(startIndex + 2) == startQuote; + int currentIndex = startIndex + (isTripleQuoted ? 3 : 1); + if (isTripleQuoted) { + appendIfNotNull(result, startQuote); + appendIfNotNull(result, startQuote); + } while (currentIndex < sql.length()) { char currentChar = sql.charAt(currentIndex); if (currentChar == startQuote) { - if (currentChar == DOLLAR) { + if (supportsDollarQuotedStrings() && currentChar == DOLLAR) { // Check if this is the end of the current dollar quoted string. String tag = parseDollarQuotedString(sql, currentIndex + 1); if (tag != null && tag.equals(dollarTag)) { appendIfNotNull(result, currentChar, dollarTag, currentChar); return currentIndex + tag.length() + 2; } - } else if (sql.length() > currentIndex + 1 && sql.charAt(currentIndex + 1) == startQuote) { + } else if (supportsEscapeQuoteWithQuote() + && sql.length() > currentIndex + 1 + && sql.charAt(currentIndex + 1) == startQuote) { // This is an escaped quote (e.g. 'foo''bar') appendIfNotNull(result, currentChar); appendIfNotNull(result, currentChar); currentIndex += 2; continue; + } else if (isTripleQuoted) { + // Check if this is the end of the triple-quoted string. + if (sql.length() > currentIndex + 2 + && sql.charAt(currentIndex + 1) == startQuote + && sql.charAt(currentIndex + 2) == startQuote) { + appendIfNotNull(result, currentChar); + appendIfNotNull(result, currentChar); + appendIfNotNull(result, currentChar); + return currentIndex + 3; + } } else { appendIfNotNull(result, currentChar); return currentIndex + 1; } + } else if (supportsBackslashEscape() + && currentChar == BACKSLASH + && sql.length() > currentIndex + 1 + && sql.charAt(currentIndex + 1) == startQuote) { + // This is an escaped quote (e.g. 'foo\'bar'). + // Note that in raw strings, the \ officially does not start an escape sequence, but the + // result is still the same, as in a raw string 'both characters are preserved'. + appendIfNotNull(result, currentChar); + appendIfNotNull(result, sql.charAt(currentIndex + 1)); + currentIndex += 2; + continue; + } else if (currentChar == '\n' && !isTripleQuoted && !supportsLineFeedInQuotedString()) { + break; } currentIndex++; appendIfNotNull(result, currentChar); diff --git a/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/PostgreSQLStatementParser.java b/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/PostgreSQLStatementParser.java index 572ea05654..6b0c69d40a 100644 --- a/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/PostgreSQLStatementParser.java +++ b/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/PostgreSQLStatementParser.java @@ -48,6 +48,46 @@ protected boolean supportsExplain() { return false; } + @Override + boolean supportsNestedComments() { + return true; + } + + @Override + boolean supportsDollarQuotedStrings() { + return true; + } + + @Override + boolean supportsBacktickQuote() { + return false; + } + + @Override + boolean supportsTripleQuotedStrings() { + return false; + } + + @Override + boolean supportsEscapeQuoteWithQuote() { + return true; + } + + @Override + boolean supportsBackslashEscape() { + return false; + } + + @Override + boolean supportsHashSingleLineComments() { + return false; + } + + @Override + boolean supportsLineFeedInQuotedString() { + return true; + } + /** * Removes comments from and trims the given sql statement. PostgreSQL supports two types of * comments: diff --git a/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/SpannerStatementParser.java b/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/SpannerStatementParser.java index 251c5a2e6e..1c5cdda7b0 100644 --- a/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/SpannerStatementParser.java +++ b/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/SpannerStatementParser.java @@ -50,6 +50,46 @@ protected boolean supportsExplain() { return true; } + @Override + boolean supportsNestedComments() { + return false; + } + + @Override + boolean supportsDollarQuotedStrings() { + return false; + } + + @Override + boolean supportsBacktickQuote() { + return true; + } + + @Override + boolean supportsTripleQuotedStrings() { + return true; + } + + @Override + boolean supportsEscapeQuoteWithQuote() { + return false; + } + + @Override + boolean supportsBackslashEscape() { + return true; + } + + @Override + boolean supportsHashSingleLineComments() { + return true; + } + + @Override + boolean supportsLineFeedInQuotedString() { + return false; + } + /** * Removes comments from and trims the given sql statement. Spanner supports three types of * comments: diff --git a/google-cloud-spanner/src/test/java/com/google/cloud/spanner/connection/SpannerStatementParserTest.java b/google-cloud-spanner/src/test/java/com/google/cloud/spanner/connection/SpannerStatementParserTest.java new file mode 100644 index 0000000000..d4dc76d48b --- /dev/null +++ b/google-cloud-spanner/src/test/java/com/google/cloud/spanner/connection/SpannerStatementParserTest.java @@ -0,0 +1,83 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.cloud.spanner.connection; + +import static org.junit.Assert.assertEquals; + +import com.google.cloud.spanner.Dialect; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class SpannerStatementParserTest { + + static String skip(String sql) { + return skip(sql, 0); + } + + static String skip(String sql, int currentIndex) { + int position = + AbstractStatementParser.getInstance(Dialect.GOOGLE_STANDARD_SQL) + .skip(sql, currentIndex, null); + return sql.substring(currentIndex, position); + } + + @Test + public void testSkip() { + assertEquals("", skip("")); + assertEquals("1", skip("1 ")); + assertEquals("1", skip("12 ")); + assertEquals("2", skip("12 ", 1)); + assertEquals("", skip("12", 2)); + + assertEquals("'foo'", skip("'foo' ", 0)); + assertEquals("'foo'", skip("'foo''bar' ", 0)); + assertEquals("'foo'", skip("'foo' 'bar' ", 0)); + assertEquals("'bar'", skip("'foo''bar' ", 5)); + assertEquals("'foo\"bar\"'", skip("'foo\"bar\"' ", 0)); + assertEquals("\"foo'bar'\"", skip("\"foo'bar'\" ", 0)); + assertEquals("`foo'bar'`", skip("`foo'bar'` ", 0)); + + assertEquals("'''foo'bar'''", skip("'''foo'bar''' ", 0)); + assertEquals("'''foo\\'bar'''", skip("'''foo\\'bar''' ", 0)); + assertEquals("'''foo\\'\\'bar'''", skip("'''foo\\'\\'bar''' ", 0)); + assertEquals("'''foo\\'\\'\\'bar'''", skip("'''foo\\'\\'\\'bar''' ", 0)); + assertEquals("\"\"\"foo'bar\"\"\"", skip("\"\"\"foo'bar\"\"\"", 0)); + assertEquals("```foo'bar```", skip("```foo'bar```", 0)); + + assertEquals("-- comment\n", skip("-- comment\nselect * from foo", 0)); + assertEquals("# comment\n", skip("# comment\nselect * from foo", 0)); + assertEquals("/* comment */", skip("/* comment */ select * from foo", 0)); + assertEquals( + "/* comment /* GoogleSQL does not support nested comments */", + skip("/* comment /* GoogleSQL does not support nested comments */ select * from foo", 0)); + // GoogleSQL does not support dollar-quoted strings. + assertEquals("$", skip("$tag$not a string$tag$ select * from foo", 0)); + + assertEquals("/* 'test' */", skip("/* 'test' */ foo")); + assertEquals("-- 'test' \n", skip("-- 'test' \n foo")); + assertEquals("'/* test */'", skip("'/* test */' foo")); + + // Raw strings do not consider '\' as something that starts an escape sequence, but any + // quote character following it is still preserved within the string, as the definition of a + // raw string says that 'both characters are preserved'. + assertEquals("'foo\\''", skip("'foo\\'' ", 0)); + assertEquals("'foo\\''", skip("r'foo\\'' ", 1)); + assertEquals("'''foo\\'\\'\\'bar'''", skip("'''foo\\'\\'\\'bar''' ", 0)); + } +} diff --git a/google-cloud-spanner/src/test/java/com/google/cloud/spanner/connection/StatementParserTest.java b/google-cloud-spanner/src/test/java/com/google/cloud/spanner/connection/StatementParserTest.java index d3438b2b66..c60550c3ba 100644 --- a/google-cloud-spanner/src/test/java/com/google/cloud/spanner/connection/StatementParserTest.java +++ b/google-cloud-spanner/src/test/java/com/google/cloud/spanner/connection/StatementParserTest.java @@ -1600,11 +1600,11 @@ public void testPostgreSQLReturningClause() { } int skipSingleLineComment(String sql, int startIndex) { - return PostgreSQLStatementParser.skipSingleLineComment(sql, startIndex, null); + return AbstractStatementParser.skipSingleLineComment(sql, startIndex, null); } int skipMultiLineComment(String sql, int startIndex) { - return PostgreSQLStatementParser.skipMultiLineComment(sql, startIndex, null); + return parser.skipMultiLineComment(sql, startIndex, null); } @Test