Skip to content

Commit

Permalink
refactor: generalize skip methods (#2949)
Browse files Browse the repository at this point in the history
Generalize the various skip methods so these can be used for both dialects. Each dialect implements a number of abstract methods to indicate what type of statements and constructs they support. These methods are used by the generalized skip methods to determine the start and end of literals, identifiers, and comments.

This is step 2 of the refactor that is needed to share more of the code between the SpannerStatementParser and PostgreSQLStatementParser.
  • Loading branch information
olavloite committed Mar 19, 2024
1 parent 6e937ab commit 81ec3e0
Show file tree
Hide file tree
Showing 5 changed files with 276 additions and 11 deletions.
Expand Up @@ -595,6 +595,7 @@ private boolean statementStartsWith(String sql, Iterable<String> checkStatements
static final char CLOSE_PARENTHESIS = ')';
static final char COMMA = ',';
static final char UNDERSCORE = '_';
static final char BACKSLASH = '\\';

/**
* Removes comments from and trims the given sql statement using the dialect of this parser.
Expand Down Expand Up @@ -698,6 +699,62 @@ public boolean checkReturningClause(String sql) {
return checkReturningClauseInternal(sql);
}

/**
* <<<<<<< HEAD Returns true if this dialect supports nested comments.
*
* <ul>
* <li>This method should return false for dialects that consider this to be a valid comment:
* <code>/* A comment /* still a comment *&#47;</code>.
* <li>This method should return true for dialects that require all comment start sequences to
* be balanced with a comment end sequence: <code>
* /* A comment /* still a comment *&#47; Also still a comment *&#47;</code>.
* </ul>
*/
abstract boolean supportsNestedComments();

/**
* Returns true for dialects that support dollar-quoted string literals.
*
* <p>Example: <code>$tag$This is a string$tag$</code>.
*/
abstract boolean supportsDollarQuotedStrings();

/**
* Returns true for dialects that support backticks as a quoting character, either for string
* literals or identifiers.
*/
abstract boolean supportsBacktickQuote();

/**
* Returns true for dialects that support triple-quoted string literals and identifiers.
*
* <p>Example: ```This is a triple-quoted string```
*/
abstract boolean supportsTripleQuotedStrings();

/**
* Returns true if the dialect supports escaping a quote character within a literal with the same
* quote as the literal is using. That is: 'foo''bar' means "foo'bar".
*/
abstract boolean supportsEscapeQuoteWithQuote();

/** Returns true if the dialect supports starting an escape sequence with a backslash. */
abstract boolean supportsBackslashEscape();

/**
* Returns true if the dialect supports single-line comments that start with a dash.
*
* <p>Example: # This is a comment
*/
abstract boolean supportsHashSingleLineComments();

/**
* Returns true for dialects that allow line-feeds in quoted strings. Note that the return value
* of this is not used for triple-quoted strings. Triple-quoted strings are assumed to always
* support line-feeds.
*/
abstract boolean supportsLineFeedInQuotedString();

/**
* Returns true for characters that can be used as the first character in unquoted identifiers.
*/
Expand Down Expand Up @@ -733,11 +790,17 @@ String parseDollarQuotedString(String sql, int index) {
* given index. The skipped characters are added to result if it is not null.
*/
int skip(String sql, int currentIndex, @Nullable StringBuilder result) {
if (currentIndex >= sql.length()) {
return currentIndex;
}
char currentChar = sql.charAt(currentIndex);
if (currentChar == SINGLE_QUOTE || currentChar == DOUBLE_QUOTE) {

if (currentChar == SINGLE_QUOTE
|| currentChar == DOUBLE_QUOTE
|| (supportsBacktickQuote() && currentChar == BACKTICK_QUOTE)) {
appendIfNotNull(result, currentChar);
return skipQuoted(sql, currentIndex, currentChar, result);
} else if (currentChar == DOLLAR) {
} else if (supportsDollarQuotedStrings() && currentChar == DOLLAR) {
String dollarTag = parseDollarQuotedString(sql, currentIndex + 1);
if (dollarTag != null) {
appendIfNotNull(result, currentChar, dollarTag, currentChar);
Expand All @@ -748,6 +811,8 @@ int skip(String sql, int currentIndex, @Nullable StringBuilder result) {
&& sql.length() > (currentIndex + 1)
&& sql.charAt(currentIndex + 1) == HYPHEN) {
return skipSingleLineComment(sql, currentIndex, result);
} else if (currentChar == DASH && supportsHashSingleLineComments()) {
return skipSingleLineComment(sql, currentIndex, result);
} else if (currentChar == SLASH
&& sql.length() > (currentIndex + 1)
&& sql.charAt(currentIndex + 1) == ASTERISK) {
Expand All @@ -772,14 +837,17 @@ static int skipSingleLineComment(String sql, int startIndex, @Nullable StringBui
}

/** Skips a multi-line comment from startIndex and adds it to result if result is not null. */
static int skipMultiLineComment(String sql, int startIndex, @Nullable StringBuilder result) {
int skipMultiLineComment(String sql, int startIndex, @Nullable StringBuilder result) {
// Current position is start + '/*'.length().
int pos = startIndex + 2;
// PostgreSQL allows comments to be nested. That is, the following is allowed:
// '/* test /* inner comment */ still a comment */'
int level = 1;
while (pos < sql.length()) {
if (sql.charAt(pos) == SLASH && sql.length() > (pos + 1) && sql.charAt(pos + 1) == ASTERISK) {
if (supportsNestedComments()
&& sql.charAt(pos) == SLASH
&& sql.length() > (pos + 1)
&& sql.charAt(pos + 1) == ASTERISK) {
level++;
}
if (sql.charAt(pos) == ASTERISK && sql.length() > (pos + 1) && sql.charAt(pos + 1) == SLASH) {
Expand All @@ -806,33 +874,67 @@ private int skipQuoted(
* Skips a quoted string from startIndex. The quote character is assumed to be $ if dollarTag is
* not null.
*/
private int skipQuoted(
int skipQuoted(
String sql,
int startIndex,
char startQuote,
String dollarTag,
@Nullable String dollarTag,
@Nullable StringBuilder result) {
int currentIndex = startIndex + 1;
boolean isTripleQuoted =
supportsTripleQuotedStrings()
&& sql.length() > startIndex + 2
&& sql.charAt(startIndex + 1) == startQuote
&& sql.charAt(startIndex + 2) == startQuote;
int currentIndex = startIndex + (isTripleQuoted ? 3 : 1);
if (isTripleQuoted) {
appendIfNotNull(result, startQuote);
appendIfNotNull(result, startQuote);
}
while (currentIndex < sql.length()) {
char currentChar = sql.charAt(currentIndex);
if (currentChar == startQuote) {
if (currentChar == DOLLAR) {
if (supportsDollarQuotedStrings() && currentChar == DOLLAR) {
// Check if this is the end of the current dollar quoted string.
String tag = parseDollarQuotedString(sql, currentIndex + 1);
if (tag != null && tag.equals(dollarTag)) {
appendIfNotNull(result, currentChar, dollarTag, currentChar);
return currentIndex + tag.length() + 2;
}
} else if (sql.length() > currentIndex + 1 && sql.charAt(currentIndex + 1) == startQuote) {
} else if (supportsEscapeQuoteWithQuote()
&& sql.length() > currentIndex + 1
&& sql.charAt(currentIndex + 1) == startQuote) {
// This is an escaped quote (e.g. 'foo''bar')
appendIfNotNull(result, currentChar);
appendIfNotNull(result, currentChar);
currentIndex += 2;
continue;
} else if (isTripleQuoted) {
// Check if this is the end of the triple-quoted string.
if (sql.length() > currentIndex + 2
&& sql.charAt(currentIndex + 1) == startQuote
&& sql.charAt(currentIndex + 2) == startQuote) {
appendIfNotNull(result, currentChar);
appendIfNotNull(result, currentChar);
appendIfNotNull(result, currentChar);
return currentIndex + 3;
}
} else {
appendIfNotNull(result, currentChar);
return currentIndex + 1;
}
} else if (supportsBackslashEscape()
&& currentChar == BACKSLASH
&& sql.length() > currentIndex + 1
&& sql.charAt(currentIndex + 1) == startQuote) {
// This is an escaped quote (e.g. 'foo\'bar').
// Note that in raw strings, the \ officially does not start an escape sequence, but the
// result is still the same, as in a raw string 'both characters are preserved'.
appendIfNotNull(result, currentChar);
appendIfNotNull(result, sql.charAt(currentIndex + 1));
currentIndex += 2;
continue;
} else if (currentChar == '\n' && !isTripleQuoted && !supportsLineFeedInQuotedString()) {
break;
}
currentIndex++;
appendIfNotNull(result, currentChar);
Expand Down
Expand Up @@ -48,6 +48,46 @@ protected boolean supportsExplain() {
return false;
}

@Override
boolean supportsNestedComments() {
return true;
}

@Override
boolean supportsDollarQuotedStrings() {
return true;
}

@Override
boolean supportsBacktickQuote() {
return false;
}

@Override
boolean supportsTripleQuotedStrings() {
return false;
}

@Override
boolean supportsEscapeQuoteWithQuote() {
return true;
}

@Override
boolean supportsBackslashEscape() {
return false;
}

@Override
boolean supportsHashSingleLineComments() {
return false;
}

@Override
boolean supportsLineFeedInQuotedString() {
return true;
}

/**
* Removes comments from and trims the given sql statement. PostgreSQL supports two types of
* comments:
Expand Down
Expand Up @@ -50,6 +50,46 @@ protected boolean supportsExplain() {
return true;
}

@Override
boolean supportsNestedComments() {
return false;
}

@Override
boolean supportsDollarQuotedStrings() {
return false;
}

@Override
boolean supportsBacktickQuote() {
return true;
}

@Override
boolean supportsTripleQuotedStrings() {
return true;
}

@Override
boolean supportsEscapeQuoteWithQuote() {
return false;
}

@Override
boolean supportsBackslashEscape() {
return true;
}

@Override
boolean supportsHashSingleLineComments() {
return true;
}

@Override
boolean supportsLineFeedInQuotedString() {
return false;
}

/**
* Removes comments from and trims the given sql statement. Spanner supports three types of
* comments:
Expand Down
@@ -0,0 +1,83 @@
/*
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.cloud.spanner.connection;

import static org.junit.Assert.assertEquals;

import com.google.cloud.spanner.Dialect;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

@RunWith(JUnit4.class)
public class SpannerStatementParserTest {

static String skip(String sql) {
return skip(sql, 0);
}

static String skip(String sql, int currentIndex) {
int position =
AbstractStatementParser.getInstance(Dialect.GOOGLE_STANDARD_SQL)
.skip(sql, currentIndex, null);
return sql.substring(currentIndex, position);
}

@Test
public void testSkip() {
assertEquals("", skip(""));
assertEquals("1", skip("1 "));
assertEquals("1", skip("12 "));
assertEquals("2", skip("12 ", 1));
assertEquals("", skip("12", 2));

assertEquals("'foo'", skip("'foo' ", 0));
assertEquals("'foo'", skip("'foo''bar' ", 0));
assertEquals("'foo'", skip("'foo' 'bar' ", 0));
assertEquals("'bar'", skip("'foo''bar' ", 5));
assertEquals("'foo\"bar\"'", skip("'foo\"bar\"' ", 0));
assertEquals("\"foo'bar'\"", skip("\"foo'bar'\" ", 0));
assertEquals("`foo'bar'`", skip("`foo'bar'` ", 0));

assertEquals("'''foo'bar'''", skip("'''foo'bar''' ", 0));
assertEquals("'''foo\\'bar'''", skip("'''foo\\'bar''' ", 0));
assertEquals("'''foo\\'\\'bar'''", skip("'''foo\\'\\'bar''' ", 0));
assertEquals("'''foo\\'\\'\\'bar'''", skip("'''foo\\'\\'\\'bar''' ", 0));
assertEquals("\"\"\"foo'bar\"\"\"", skip("\"\"\"foo'bar\"\"\"", 0));
assertEquals("```foo'bar```", skip("```foo'bar```", 0));

assertEquals("-- comment\n", skip("-- comment\nselect * from foo", 0));
assertEquals("# comment\n", skip("# comment\nselect * from foo", 0));
assertEquals("/* comment */", skip("/* comment */ select * from foo", 0));
assertEquals(
"/* comment /* GoogleSQL does not support nested comments */",
skip("/* comment /* GoogleSQL does not support nested comments */ select * from foo", 0));
// GoogleSQL does not support dollar-quoted strings.
assertEquals("$", skip("$tag$not a string$tag$ select * from foo", 0));

assertEquals("/* 'test' */", skip("/* 'test' */ foo"));
assertEquals("-- 'test' \n", skip("-- 'test' \n foo"));
assertEquals("'/* test */'", skip("'/* test */' foo"));

// Raw strings do not consider '\' as something that starts an escape sequence, but any
// quote character following it is still preserved within the string, as the definition of a
// raw string says that 'both characters are preserved'.
assertEquals("'foo\\''", skip("'foo\\'' ", 0));
assertEquals("'foo\\''", skip("r'foo\\'' ", 1));
assertEquals("'''foo\\'\\'\\'bar'''", skip("'''foo\\'\\'\\'bar''' ", 0));
}
}
Expand Up @@ -1600,11 +1600,11 @@ public void testPostgreSQLReturningClause() {
}

int skipSingleLineComment(String sql, int startIndex) {
return PostgreSQLStatementParser.skipSingleLineComment(sql, startIndex, null);
return AbstractStatementParser.skipSingleLineComment(sql, startIndex, null);
}

int skipMultiLineComment(String sql, int startIndex) {
return PostgreSQLStatementParser.skipMultiLineComment(sql, startIndex, null);
return parser.skipMultiLineComment(sql, startIndex, null);
}

@Test
Expand Down

0 comments on commit 81ec3e0

Please sign in to comment.