Skip to content

Commit

Permalink
fix: include lower-case identifiers among those that need quotes (#3723)
Browse files Browse the repository at this point in the history
IdentifierUtils::needsQuotes is used by various code that formats SQL text to
decide what identifiers to wrap in quotes. Before this change, this just included
reserved identifiers. However, identifiers that include lower-case characters should
also be quoted. Otherwise, when the parser parses them it will convert them to
upper-case. So this patch changes needsQuotes to check for lower-case characters.

For schema inference, we don't want to quote column names generated from lower-case
avro schema fields. To handle this, this patch adds IdentifierUtils.isValid, that
only returns true if the identifier is valid (parsable). Schema inference uses this
to only quote invalid identifiers.
  • Loading branch information
rodesai committed Nov 4, 2019
1 parent c5b9e33 commit 62c47bf
Show file tree
Hide file tree
Showing 9 changed files with 78 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public enum Option {
/**
* Construct instance.
*
* <p>The {@code reservedWordPredicate} allows this formatter, which lives in the common module,
* <p>The {@code addQuotesPredicate} allows this formatter, which lives in the common module,
* to be wired up to the set of reserved words defined in the parser module. Wire up to
* {@code ParserUtil::isReservedWord}.
*
Expand All @@ -72,19 +72,19 @@ public enum Option {
* quotes. NB: this also makes the field name case-sensitive. So care must be taken to ensure
* field names have the correct case.
*
* @param reservedWordPredicate predicate to determine if a word is reserved in the SQL syntax.
* @param addQuotesPredicate predicate to determine if a word should be quoted.
* @param options the options to use when formatting the SQL.
*/
public SqlSchemaFormatter(
final Predicate<String> reservedWordPredicate,
final Predicate<String> addQuotesPredicate,
final Option... options
) {
this.options = options.length == 0
? EnumSet.noneOf(Option.class)
: EnumSet.of(options[0], options);

this.formatOptions = FormatOptions.of(
requireNonNull(reservedWordPredicate, "reservedWordPredicate")
requireNonNull(addQuotesPredicate, "addQuotesPredicate")
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

public final class FormatOptions {

private final Predicate<String> reservedWordPredicate;
private final Predicate<String> addQuotesPredicate;

public static FormatOptions none() {
return new FormatOptions(word -> true);
Expand All @@ -41,7 +41,7 @@ public static FormatOptions noEscape() {
/**
* Construct instance.
*
* <p>The {@code reservedWordPredicate} allows code that lives in the common module
* <p>The {@code addQuotesPredicate} allows code that lives in the common module
* to be wired up to the set of reserved words defined in the parser module. Wire this up to
* {@code ParserUtil::isReservedIdentifier}.
*
Expand All @@ -53,29 +53,29 @@ public static FormatOptions noEscape() {
* quotes. NB: this also makes the field name case-sensitive. So care must be taken to ensure
* field names have the correct case.
*
* @param reservedWordPredicate predicate to test if a word is a reserved in SQL syntax.
* @param addQuotesPredicate predicate to test if a word should be quoted.
* @return instance of {@code FormatOptions}.
*/
public static FormatOptions of(final Predicate<String> reservedWordPredicate) {
return new FormatOptions(reservedWordPredicate);
public static FormatOptions of(final Predicate<String> addQuotesPredicate) {
return new FormatOptions(addQuotesPredicate);
}

private FormatOptions(final Predicate<String> fieldNameEscaper) {
this.reservedWordPredicate = requireNonNull(fieldNameEscaper, "reservedWordPredicate");
this.addQuotesPredicate = requireNonNull(fieldNameEscaper, "addQuotesPredicate");
}

public boolean isReservedWord(final String word) {
return reservedWordPredicate.test(word);
private boolean shouldQuote(final String word) {
return addQuotesPredicate.test(word);
}

/**
* Escapes {@code word} if it is a reserved word, determined by {@link #isReservedWord(String)}.
* Escapes {@code word} if it is a reserved word, determined by {@link #shouldQuote(String)}.
*
* @param word the word to escape
* @return {@code word}, if it is not a reserved word, otherwise {@code word} wrapped in
* back quotes
*/
public String escape(final String word) {
return isReservedWord(word) ? KsqlConstants.ESCAPE + word + KsqlConstants.ESCAPE : word;
return shouldQuote(word) ? KsqlConstants.ESCAPE + word + KsqlConstants.ESCAPE : word;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
public class DefaultSchemaInjector implements Injector {

private static final SqlSchemaFormatter FORMATTER = new SqlSchemaFormatter(
IdentifierUtil::needsQuotes, Option.AS_COLUMN_LIST);
w -> !IdentifierUtil.isValid(w), Option.AS_COLUMN_LIST);

private final TopicSchemaSupplier schemaSupplier;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ public void shouldBuildNewCsStatementText() {

// Then:
assertThat(result.getStatementText(), is(
"CREATE STREAM cs ("
"CREATE STREAM `cs` ("
+ "INTFIELD INTEGER, "
+ "BIGINTFIELD BIGINT, "
+ "DOUBLEFIELD DOUBLE, "
Expand All @@ -303,7 +303,7 @@ public void shouldBuildNewCtStatementText() {

// Then:
assertThat(result.getStatementText(), is(
"CREATE TABLE ct ("
"CREATE TABLE `ct` ("
+ "INTFIELD INTEGER, "
+ "BIGINTFIELD BIGINT, "
+ "DOUBLEFIELD DOUBLE, "
Expand All @@ -330,7 +330,7 @@ public void shouldBuildNewCsStatementTextFromId() {

// Then:
assertThat(result.getStatementText(), is(
"CREATE STREAM cs ("
"CREATE STREAM `cs` ("
+ "INTFIELD INTEGER, "
+ "BIGINTFIELD BIGINT, "
+ "DOUBLEFIELD DOUBLE, "
Expand All @@ -357,7 +357,7 @@ public void shouldBuildNewCtStatementTextFromId() {

// Then:
assertThat(result.getStatementText(), is(
"CREATE TABLE ct ("
"CREATE TABLE `ct` ("
+ "INTFIELD INTEGER, "
+ "BIGINTFIELD BIGINT, "
+ "DOUBLEFIELD DOUBLE, "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -784,7 +784,7 @@ public void shouldSummarizeExecutionPlanCorrectly() {

// When/Then:
assertThat(schemaKtream.getExecutionPlan(new QueryId("query"), ""), equalTo(
" > [ SOURCE ] | Schema: [ROWKEY STRING KEY, key STRING, val BIGINT] | "
" > [ SOURCE ] | Schema: [ROWKEY STRING KEY, `key` STRING, `val` BIGINT] | "
+ "Logger: query.node.source\n"
+ "\tparent plan"));
}
Expand All @@ -809,7 +809,7 @@ public void shouldSummarizeExecutionPlanCorrectlyForRoot() {

// When/Then:
assertThat(schemaKtream.getExecutionPlan(new QueryId("query"), ""), equalTo(
" > [ SOURCE ] | Schema: [ROWKEY STRING KEY, key STRING, val BIGINT] | "
" > [ SOURCE ] | Schema: [ROWKEY STRING KEY, `key` STRING, `val` BIGINT] | "
+ "Logger: query.node.source\n"));
}

Expand Down Expand Up @@ -839,7 +839,7 @@ public void shouldSummarizeExecutionPlanCorrectlyWhenMultipleParents() {

// When/Then:
assertThat(schemaKtream.getExecutionPlan(new QueryId("query"), ""), equalTo(
" > [ SOURCE ] | Schema: [ROWKEY STRING KEY, key STRING, val BIGINT] | "
" > [ SOURCE ] | Schema: [ROWKEY STRING KEY, `key` STRING, `val` BIGINT] | "
+ "Logger: query.node.source\n"
+ "\tparent 1 plan"
+ "\tparent 2 plan"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ private IdentifierUtil() { }
* @param identifier the identifier
* @return whether or not {@code identifier} is a valid identifier without quotes
*/
public static boolean needsQuotes(final String identifier) {
public static boolean isValid(final String identifier) {
final SqlBaseLexer sqlBaseLexer = new SqlBaseLexer(
new CaseInsensitiveStream(CharStreams.fromString(identifier)));
final CommonTokenStream tokenStream = new CommonTokenStream(sqlBaseLexer);
Expand All @@ -43,8 +43,20 @@ public static boolean needsQuotes(final String identifier) {
sqlBaseParser.identifier();

// needs quotes if the `identifier` was not able to read the entire line
return sqlBaseParser.getNumberOfSyntaxErrors() != 0
|| sqlBaseParser.getCurrentToken().getCharPositionInLine() != identifier.length();
return sqlBaseParser.getNumberOfSyntaxErrors() == 0
&& sqlBaseParser.getCurrentToken().getCharPositionInLine() == identifier.length();
}

/**
* @param identifier the identifier
* @return whether or not {@code identifier} needs quotes to be parsed as the same identifier
*/
public static boolean needsQuotes(final String identifier) {
return !(isValid(identifier) && upperCase(identifier));
}

private static boolean upperCase(final String identifier) {
return identifier.toUpperCase().equals(identifier);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ public void shouldFormatCreateStreamStatementWithExplicitKey() {
final String sql = SqlFormatter.formatSql(createStream);

// Then:
assertThat(sql, is("CREATE STREAM TEST (ROWKEY STRING KEY, Foo STRING) "
assertThat(sql, is("CREATE STREAM TEST (ROWKEY STRING KEY, `Foo` STRING) "
+ "WITH (KAFKA_TOPIC='topic_test', KEY='ORDERID', VALUE_FORMAT='JSON');"));
}

Expand All @@ -247,7 +247,7 @@ public void shouldFormatCreateStreamStatementWithImplicitKey() {
final String sql = SqlFormatter.formatSql(createStream);

// Then:
assertThat(sql, is("CREATE STREAM TEST (Foo STRING, Bar STRING) "
assertThat(sql, is("CREATE STREAM TEST (`Foo` STRING, `Bar` STRING) "
+ "WITH (KAFKA_TOPIC='topic_test', KEY='ORDERID', VALUE_FORMAT='JSON');"));
}

Expand All @@ -264,7 +264,7 @@ public void shouldFormatCreateTableStatementWithExplicitKey() {
final String sql = SqlFormatter.formatSql(createTable);

// Then:
assertThat(sql, is("CREATE TABLE TEST (ROWKEY STRING KEY, Foo STRING) "
assertThat(sql, is("CREATE TABLE TEST (ROWKEY STRING KEY, `Foo` STRING) "
+ "WITH (KAFKA_TOPIC='topic_test', KEY='ORDERID', VALUE_FORMAT='JSON');"));
}

Expand All @@ -281,7 +281,7 @@ public void shouldFormatCreateTableStatementWithImplicitKey() {
final String sql = SqlFormatter.formatSql(createTable);

// Then:
assertThat(sql, is("CREATE TABLE TEST (Foo STRING, Bar STRING) "
assertThat(sql, is("CREATE TABLE TEST (`Foo` STRING, `Bar` STRING) "
+ "WITH (KAFKA_TOPIC='topic_test', KEY='ORDERID', VALUE_FORMAT='JSON');"));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ public void shouldNeedBackQuotes() {
"SELECT", // reserved word
"@ID", // invalid character
"FOO.BAR", // with a dot
"foo" // lower case
};

// Then:
Expand All @@ -52,4 +53,39 @@ public void shouldNotNeedBackQuotes() {
}
}

@Test
public void shouldBeValid() {
// Given:
final String[] identifiers = new String[]{
"FOO", // nothing special
"foo", // lower-case
};

// Then:
for (final String identifier : identifiers) {
assertThat(
"Expected " + identifier + " to be valid.",
IdentifierUtil.isValid(identifier)
);
}
}

@Test
public void shouldNotBeValid() {
// Given:
final String[] identifiers = new String[]{
"@FOO", // invalid character
"FOO.BAR", // Dot
"SELECT" // reserved word
};

// Then:
for (final String identifier : identifiers) {
assertThat(
"Expected " + identifier + " to be invalid",
!IdentifierUtil.isValid(identifier)
);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public final class ProcessingLogServerUtils {

private static final Logger LOGGER = LoggerFactory.getLogger(ProcessingLogServerUtils.class);
private static final SqlSchemaFormatter FORMATTER =
new SqlSchemaFormatter(IdentifierUtil::needsQuotes, Option.AS_COLUMN_LIST);
new SqlSchemaFormatter(w -> !IdentifierUtil.isValid(w), Option.AS_COLUMN_LIST);

private ProcessingLogServerUtils() {
}
Expand Down

0 comments on commit 62c47bf

Please sign in to comment.