Skip to content

Commit

Permalink
ESQL: change from quoting from backtick to quote (elastic#108395)
Browse files Browse the repository at this point in the history
* ESQL: change from quoting from backtick to quote

For historical reasons, the source declaration inside FROM command is
 treated as an identifier, using backticks (`) for escaping the value.
This is inconsistent since the source is not an identifier (field name)
 but an index name which has different semantics.
 `index` means a field name index while "index" means a literal with
 said value.

In case of FROM, the index name/location is more like a literal (also in
 unquoted form) than an identifier (that is a reference to a value).

This PR tweaks the grammar and plugs in the quoted string logic so that
 both the single quote (") and triple quote (""") are allowed.

* Update grammar

* Add more tests

* Add a few more tests

* Add extra test

* Update docs/changelog/108395.yaml

* Adress review comments

* Add doc note

* Revert test rename

* Fix quoting with remote cluster

* Update docs/reference/esql/source-commands/from.asciidoc

Co-authored-by: marciw <333176+marciw@users.noreply.github.com>

---------

Co-authored-by: Bogdan Pintea <bogdan.pintea@elastic.co>
Co-authored-by: Bogdan Pintea <pintea@mailbox.org>
Co-authored-by: marciw <333176+marciw@users.noreply.github.com>
Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
  • Loading branch information
5 people committed Jun 30, 2024
1 parent 3bc485c commit b906ce3
Show file tree
Hide file tree
Showing 23 changed files with 2,168 additions and 1,788 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/108395.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 108395
summary: "ESQL: change from quoting from backtick to quote"
area: ES|QL
type: bug
issues: []
8 changes: 8 additions & 0 deletions docs/reference/esql/source-commands/from.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,11 @@ Use the optional `METADATA` directive to enable <<esql-metadata-fields,metadata
----
FROM employees METADATA _id
----

Use enclosing double quotes (`"`) or three enclosing double quotes (`"""`) to escape index names
that contain special characters:

[source,esql]
----
FROM "this=that", """this[that"""
----
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,21 @@ METRICS k8s | sort @timestamp DESC, cluster, pod | keep @timestamp,cluster,pod,n
2024-05-10T00:22:49.000Z | staging | two | 3 | 1.75
;

metricsWithAggs
metricsWithAggsAndSourceQuoting
required_capability: metrics_syntax
METRICS k8s max_bytes=max(to_long(network.total_bytes_in)) BY cluster | SORT max_bytes DESC;
required_capability: double_quotes_source_enclosing
METRICS "k8s" max_bytes=max(to_long(network.total_bytes_in)) BY cluster | SORT max_bytes DESC;

max_bytes:long | cluster: keyword
10797 | qa
10277 | prod
7403 | staging
;

maxRate
maxRateAndSourceTripleQuoting
required_capability: metrics_syntax
METRICS k8s max(rate(network.total_bytes_in, 1minute));
required_capability: double_quotes_source_enclosing
METRICS """k8s""" max(rate(network.total_bytes_in, 1minute));

max(rate(network.total_bytes_in, 1minute)): double
790.4235090751945
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,20 @@ emp_no:integer | languages:integer | lang_name:keyword
10004 | 5 | five
;

keywordByMvInt
keywordByMvIntAndQuotedSource
required_capability: lookup_v4
ROW int=[1, 2, 3]
| LOOKUP int_number_names ON int
| LOOKUP "int_number_names" ON int
;

int:integer | name:keyword
[1, 2, 3] | [one, two, three]
;

keywordByDupeInt
keywordByDupeIntAndTripleQuotedSource
required_capability: lookup_v4
ROW int=[1, 1, 1]
| LOOKUP int_number_names ON int
| LOOKUP """int_number_names""" ON int
;

int:integer | name:keyword
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ a:integer | b:integer
;


length
from employees | sort emp_no | limit 3 | eval l = length(first_name) | keep emp_no, l;
lengthAndSourceQuoting
required_capability: double_quotes_source_enclosing
from "employees" | sort emp_no | limit 3 | eval l = length(first_name) | keep emp_no, l;

emp_no:integer | l:integer
10001 | 6
Expand Down Expand Up @@ -79,10 +80,11 @@ a:integer | ss:keyword | l:keyword | r:keyword
1 | bcd | ab | cd
;

stringCastEmp
stringCastEmpAndSourceTripleQuoting
required_capability: string_literal_auto_casting
required_capability: double_quotes_source_enclosing

from employees
from """employees"""
| eval ss = substring(first_name, "2")
| sort emp_no
| keep emp_no, first_name, ss
Expand Down
41 changes: 20 additions & 21 deletions x-pack/plugin/esql/src/main/antlr/EsqlBaseLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,15 @@ WS
: [ \r\n\t]+ -> channel(HIDDEN)
;

fragment INDEX_UNQUOTED_IDENTIFIER_PART
: ~[=`|,[\]/ \t\r\n]
| '/' ~[*/] // allow single / but not followed by another / or * which would start a comment
// in 8.14 ` were not allowed
// this has been relaxed in 8.15 since " is used for quoting
fragment UNQUOTED_SOURCE_PART
: ~[:"=|,[\]/ \t\r\n]
| '/' ~[*/] // allow single / but not followed by another / or * which would start a comment -- used in index pattern date spec
;
INDEX_UNQUOTED_IDENTIFIER
: INDEX_UNQUOTED_IDENTIFIER_PART+
UNQUOTED_SOURCE
: UNQUOTED_SOURCE_PART+
;
//
Expand Down Expand Up @@ -202,15 +204,13 @@ mode FROM_MODE;
FROM_PIPE : PIPE -> type(PIPE), popMode;
FROM_OPENING_BRACKET : OPENING_BRACKET -> type(OPENING_BRACKET);
FROM_CLOSING_BRACKET : CLOSING_BRACKET -> type(CLOSING_BRACKET);
FROM_COLON : COLON -> type(COLON);
FROM_COMMA : COMMA -> type(COMMA);
FROM_ASSIGN : ASSIGN -> type(ASSIGN);
FROM_QUOTED_STRING : QUOTED_STRING -> type(QUOTED_STRING);

METADATA : 'metadata';
FROM_INDEX_UNQUOTED_IDENTIFIER
: INDEX_UNQUOTED_IDENTIFIER -> type(INDEX_UNQUOTED_IDENTIFIER)
;
FROM_UNQUOTED_SOURCE : UNQUOTED_SOURCE -> type(UNQUOTED_SOURCE);
FROM_QUOTED_SOURCE : QUOTED_STRING -> type(QUOTED_STRING);
FROM_LINE_COMMENT
: LINE_COMMENT -> channel(HIDDEN)
Expand Down Expand Up @@ -301,10 +301,6 @@ ENRICH_POLICY_NAME
: (ENRICH_POLICY_NAME_BODY+ COLON)? ENRICH_POLICY_NAME_BODY+
;

ENRICH_QUOTED_IDENTIFIER
: QUOTED_IDENTIFIER -> type(QUOTED_IDENTIFIER)
;
ENRICH_MODE_UNQUOTED_VALUE
: ENRICH_POLICY_NAME -> type(ENRICH_POLICY_NAME)
;
Expand All @@ -321,7 +317,7 @@ ENRICH_WS
: WS -> channel(HIDDEN)
;

// submode for Enrich to allow different lexing between policy identifier (loose) and field identifiers
// submode for Enrich to allow different lexing between policy source (loose) and field identifiers
mode ENRICH_FIELD_MODE;
ENRICH_FIELD_PIPE : PIPE -> type(PIPE), popMode, popMode;
ENRICH_FIELD_ASSIGN : ASSIGN -> type(ASSIGN);
Expand Down Expand Up @@ -353,13 +349,13 @@ ENRICH_FIELD_WS
// LOOKUP ON key
mode LOOKUP_MODE;
LOOKUP_PIPE : PIPE -> type(PIPE), popMode;
LOOKUP_COLON : COLON -> type(COLON);
LOOKUP_COMMA : COMMA -> type(COMMA);
LOOKUP_DOT: DOT -> type(DOT);
LOOKUP_ON : ON -> type(ON), pushMode(LOOKUP_FIELD_MODE);

LOOKUP_INDEX_UNQUOTED_IDENTIFIER
: INDEX_UNQUOTED_IDENTIFIER -> type(INDEX_UNQUOTED_IDENTIFIER)
;
LOOKUP_UNQUOTED_SOURCE: UNQUOTED_SOURCE -> type(UNQUOTED_SOURCE);
LOOKUP_QUOTED_SOURCE : QUOTED_STRING -> type(QUOTED_STRING);

LOOKUP_LINE_COMMENT
: LINE_COMMENT -> channel(HIDDEN)
Expand Down Expand Up @@ -486,9 +482,8 @@ SETTING_WS
mode METRICS_MODE;
METRICS_PIPE : PIPE -> type(PIPE), popMode;

METRICS_INDEX_UNQUOTED_IDENTIFIER
: INDEX_UNQUOTED_IDENTIFIER -> type(INDEX_UNQUOTED_IDENTIFIER), popMode, pushMode(CLOSING_METRICS_MODE)
;
METRICS_UNQUOTED_SOURCE: UNQUOTED_SOURCE -> type(UNQUOTED_SOURCE), popMode, pushMode(CLOSING_METRICS_MODE);
METRICS_QUOTED_SOURCE : QUOTED_STRING -> type(QUOTED_STRING), popMode, pushMode(CLOSING_METRICS_MODE);

METRICS_LINE_COMMENT
: LINE_COMMENT -> channel(HIDDEN)
Expand All @@ -505,6 +500,10 @@ METRICS_WS
// TODO: remove this workaround mode - see https://github.com/elastic/elasticsearch/issues/108528
mode CLOSING_METRICS_MODE;

CLOSING_METRICS_COLON
: COLON -> type(COLON), popMode, pushMode(METRICS_MODE)
;

CLOSING_METRICS_COMMA
: COMMA -> type(COMMA), popMode, pushMode(METRICS_MODE)
;
Expand Down
2 changes: 1 addition & 1 deletion x-pack/plugin/esql/src/main/antlr/EsqlBaseLexer.tokens

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 16 additions & 6 deletions x-pack/plugin/esql/src/main/antlr/EsqlBaseParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,21 @@ field
;

fromCommand
: FROM indexIdentifier (COMMA indexIdentifier)* metadata?
: FROM indexPattern (COMMA indexPattern)* metadata?
;

indexIdentifier
: INDEX_UNQUOTED_IDENTIFIER
indexPattern
: clusterString COLON indexString
| indexString
;

clusterString
: UNQUOTED_SOURCE
;

indexString
: UNQUOTED_SOURCE
| QUOTED_STRING
;

metadata
Expand All @@ -119,15 +129,15 @@ metadata
;

metadataOption
: METADATA indexIdentifier (COMMA indexIdentifier)*
: METADATA UNQUOTED_SOURCE (COMMA UNQUOTED_SOURCE)*
;

deprecated_metadata
: OPENING_BRACKET metadataOption CLOSING_BRACKET
;

metricsCommand
: METRICS indexIdentifier (COMMA indexIdentifier)* aggregates=fields? (BY grouping=fields)?
: METRICS indexPattern (COMMA indexPattern)* aggregates=fields? (BY grouping=fields)?
;

evalCommand
Expand Down Expand Up @@ -280,5 +290,5 @@ enrichWithClause
;

lookupCommand
: LOOKUP tableName=INDEX_UNQUOTED_IDENTIFIER ON matchFields=qualifiedNamePatterns
: LOOKUP tableName=indexPattern ON matchFields=qualifiedNamePatterns
;
2 changes: 1 addition & 1 deletion x-pack/plugin/esql/src/main/antlr/EsqlBaseParser.tokens

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,12 @@ public enum Cap {
* Fix to GROK and DISSECT that allows extracting attributes with the same name as the input
* https://github.com/elastic/elasticsearch/issues/110184
*/
GROK_DISSECT_MASKING;
GROK_DISSECT_MASKING,

/**
* Support for quoting index sources in double quotes.
*/
DOUBLE_QUOTES_SOURCE_ENCLOSING;

Cap() {
snapshotOnly = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,28 +20,31 @@ public Object visit(ParseTree tree) {
}

@Override
public Object visitTerminal(TerminalNode node) {
public Source visitTerminal(TerminalNode node) {
return ParserUtils.source(node);
}

static String unquoteString(Source source) {
String text = source.text();
if (text == null) {
static String unquote(Source source) {
return unquote(source.text());
}

static String unquote(String string) {
if (string == null) {
return null;
}

// unescaped strings can be interpreted directly
if (text.startsWith("\"\"\"")) {
return text.substring(3, text.length() - 3);
if (string.startsWith("\"\"\"")) {
return string.substring(3, string.length() - 3);
}

text = text.substring(1, text.length() - 1);
string = string.substring(1, string.length() - 1);
StringBuilder sb = new StringBuilder();

for (int i = 0; i < text.length();) {
if (text.charAt(i) == '\\') {
for (int i = 0; i < string.length();) {
if (string.charAt(i) == '\\') {
// ANTLR4 Grammar guarantees there is always a character after the `\`
switch (text.charAt(++i)) {
switch (string.charAt(++i)) {
case 't' -> sb.append('\t');
case 'n' -> sb.append('\n');
case 'r' -> sb.append('\r');
Expand All @@ -51,11 +54,11 @@ static String unquoteString(Source source) {
// will be interpreted as regex, so we have to escape it
default ->
// unknown escape sequence, pass through as-is, e.g: `...\w...`
sb.append('\\').append(text.charAt(i));
sb.append('\\').append(string.charAt(i));
}
i++;
} else {
sb.append(text.charAt(i++));
sb.append(string.charAt(i++));
}
}
return sb.toString();
Expand Down
Loading

0 comments on commit b906ce3

Please sign in to comment.