ESQL: change from quoting from backtick to quote (elastic#108395)

* ESQL: change from quoting from backtick to quote For historical reasons, the source declaration inside FROM command is treated as an identifier, using backticks (`) for escaping the value. This is inconsistent since the source is not an identifier (field name) but an index name which has different semantics. `index` means a field name index while "index" means a literal with said value. In case of FROM, the index name/location is more like a literal (also in unquoted form) than an identifier (that is a reference to a value). This PR tweaks the grammar and plugs in the quoted string logic so that both the single quote (") and triple quote (""") are allowed. * Update grammar * Add more tests * Add a few more tests * Add extra test * Update docs/changelog/108395.yaml * Adress review comments * Add doc note * Revert test rename * Fix quoting with remote cluster * Update docs/reference/esql/source-commands/from.asciidoc Co-authored-by: marciw <333176+marciw@users.noreply.github.com> --------- Co-authored-by: Bogdan Pintea <bogdan.pintea@elastic.co> Co-authored-by: Bogdan Pintea <pintea@mailbox.org> Co-authored-by: marciw <333176+marciw@users.noreply.github.com> Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
arteam · Jun 30, 2024 · b906ce3 · b906ce3
1 parent 3bc485c
commit b906ce3
Show file tree

Hide file tree

Showing 23 changed files with 2,168 additions and 1,788 deletions.
diff --git a/docs/changelog/108395.yaml b/docs/changelog/108395.yaml
@@ -0,0 +1,5 @@
+pr: 108395
+summary: "ESQL: change from quoting from backtick to quote"
+area: ES|QL
+type: bug
+issues: []
diff --git a/docs/reference/esql/source-commands/from.asciidoc b/docs/reference/esql/source-commands/from.asciidoc
@@ -82,3 +82,11 @@ Use the optional `METADATA` directive to enable <<esql-metadata-fields,metadata
 ----
 FROM employees METADATA _id
 ----
+
+Use enclosing double quotes (`"`) or three enclosing double quotes (`"""`) to escape index names
+that contain special characters:
+
+[source,esql]
+----
+FROM "this=that", """this[that"""
+----
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/k8s-metrics.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/k8s-metrics.csv-spec
@@ -10,19 +10,21 @@ METRICS k8s | sort @timestamp DESC, cluster, pod | keep @timestamp,cluster,pod,n
 2024-05-10T00:22:49.000Z | staging         | two         | 3                     | 1.75
 ;
 
-metricsWithAggs
+metricsWithAggsAndSourceQuoting
 required_capability: metrics_syntax
-METRICS k8s max_bytes=max(to_long(network.total_bytes_in)) BY cluster | SORT max_bytes DESC;
+required_capability: double_quotes_source_enclosing
+METRICS "k8s" max_bytes=max(to_long(network.total_bytes_in)) BY cluster | SORT max_bytes DESC;
 
 max_bytes:long | cluster: keyword
 10797          | qa        
 10277          | prod
 7403           | staging
 ;
 
-maxRate
+maxRateAndSourceTripleQuoting
 required_capability: metrics_syntax
-METRICS k8s max(rate(network.total_bytes_in, 1minute));
+required_capability: double_quotes_source_enclosing
+METRICS """k8s""" max(rate(network.total_bytes_in, 1minute));
 
 max(rate(network.total_bytes_in, 1minute)): double
 790.4235090751945

diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup.csv-spec
@@ -16,20 +16,20 @@ emp_no:integer | languages:integer | lang_name:keyword
          10004 |                 5 | five
 ;
 
-keywordByMvInt
+keywordByMvIntAndQuotedSource
 required_capability: lookup_v4
 ROW int=[1, 2, 3]
-| LOOKUP int_number_names ON int
+| LOOKUP "int_number_names" ON int
 ;
 
 int:integer | name:keyword
   [1, 2, 3] | [one, two, three]
 ;
 
-keywordByDupeInt
+keywordByDupeIntAndTripleQuotedSource
 required_capability: lookup_v4
 ROW int=[1, 1, 1]
-| LOOKUP int_number_names ON int
+| LOOKUP """int_number_names""" ON int
 ;
 
 int:integer | name:keyword

diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec
@@ -28,8 +28,9 @@ a:integer | b:integer
 ;
 
 
-length
-from employees | sort emp_no | limit 3 | eval l = length(first_name) | keep emp_no, l;
+lengthAndSourceQuoting
+required_capability: double_quotes_source_enclosing
+from "employees" | sort emp_no | limit 3 | eval l = length(first_name) | keep emp_no, l;
 
 emp_no:integer | l:integer
 10001 | 6
@@ -79,10 +80,11 @@ a:integer | ss:keyword | l:keyword | r:keyword
 1         | bcd        | ab        | cd
 ;
 
-stringCastEmp
+stringCastEmpAndSourceTripleQuoting
 required_capability: string_literal_auto_casting
+required_capability: double_quotes_source_enclosing
 
-from employees
+from """employees"""
 | eval ss = substring(first_name, "2")
 | sort emp_no
 | keep emp_no, first_name, ss

diff --git a/x-pack/plugin/esql/src/main/antlr/EsqlBaseLexer.g4 b/x-pack/plugin/esql/src/main/antlr/EsqlBaseLexer.g4
@@ -34,13 +34,15 @@ WS
     : [ \r\n\t]+ -> channel(HIDDEN)
     ;
 
-fragment INDEX_UNQUOTED_IDENTIFIER_PART
-    : ~[=`|,[\]/ \t\r\n]
-    | '/' ~[*/] // allow single / but not followed by another / or * which would start a comment
+// in 8.14 ` were not allowed
+// this has been relaxed in 8.15 since " is used for quoting
+fragment UNQUOTED_SOURCE_PART
+    : ~[:"=|,[\]/ \t\r\n]
+    | '/' ~[*/] // allow single / but not followed by another / or * which would start a comment -- used in index pattern date spec
     ;
 
-INDEX_UNQUOTED_IDENTIFIER
-    : INDEX_UNQUOTED_IDENTIFIER_PART+
+UNQUOTED_SOURCE
+    : UNQUOTED_SOURCE_PART+
     ;
 
 //
@@ -202,15 +204,13 @@ mode FROM_MODE;
 FROM_PIPE : PIPE -> type(PIPE), popMode;
 FROM_OPENING_BRACKET : OPENING_BRACKET -> type(OPENING_BRACKET);
 FROM_CLOSING_BRACKET : CLOSING_BRACKET -> type(CLOSING_BRACKET);
+FROM_COLON : COLON -> type(COLON);
 FROM_COMMA : COMMA -> type(COMMA);
 FROM_ASSIGN : ASSIGN -> type(ASSIGN);
-FROM_QUOTED_STRING : QUOTED_STRING -> type(QUOTED_STRING);
-
 METADATA : 'metadata';
 
-FROM_INDEX_UNQUOTED_IDENTIFIER
-    : INDEX_UNQUOTED_IDENTIFIER -> type(INDEX_UNQUOTED_IDENTIFIER)
-    ;
+FROM_UNQUOTED_SOURCE : UNQUOTED_SOURCE -> type(UNQUOTED_SOURCE);
+FROM_QUOTED_SOURCE : QUOTED_STRING -> type(QUOTED_STRING);
 
 FROM_LINE_COMMENT
     : LINE_COMMENT -> channel(HIDDEN)
@@ -301,10 +301,6 @@ ENRICH_POLICY_NAME
     : (ENRICH_POLICY_NAME_BODY+ COLON)? ENRICH_POLICY_NAME_BODY+
     ;
 
-ENRICH_QUOTED_IDENTIFIER
-    : QUOTED_IDENTIFIER -> type(QUOTED_IDENTIFIER)
-    ;
-
 ENRICH_MODE_UNQUOTED_VALUE
     : ENRICH_POLICY_NAME -> type(ENRICH_POLICY_NAME)
     ;
@@ -321,7 +317,7 @@ ENRICH_WS
     : WS -> channel(HIDDEN)
     ;
 
-// submode for Enrich to allow different lexing between policy identifier (loose) and field identifiers
+// submode for Enrich to allow different lexing between policy source (loose) and field identifiers
 mode ENRICH_FIELD_MODE;
 ENRICH_FIELD_PIPE : PIPE -> type(PIPE), popMode, popMode;
 ENRICH_FIELD_ASSIGN : ASSIGN -> type(ASSIGN);
@@ -353,13 +349,13 @@ ENRICH_FIELD_WS
 // LOOKUP ON key
 mode LOOKUP_MODE;
 LOOKUP_PIPE : PIPE -> type(PIPE), popMode;
+LOOKUP_COLON : COLON -> type(COLON);
 LOOKUP_COMMA : COMMA -> type(COMMA);
 LOOKUP_DOT: DOT -> type(DOT);
 LOOKUP_ON : ON -> type(ON), pushMode(LOOKUP_FIELD_MODE);
 
-LOOKUP_INDEX_UNQUOTED_IDENTIFIER
-    : INDEX_UNQUOTED_IDENTIFIER -> type(INDEX_UNQUOTED_IDENTIFIER)
-    ;
+LOOKUP_UNQUOTED_SOURCE: UNQUOTED_SOURCE -> type(UNQUOTED_SOURCE);
+LOOKUP_QUOTED_SOURCE : QUOTED_STRING -> type(QUOTED_STRING);
 
 LOOKUP_LINE_COMMENT
     : LINE_COMMENT -> channel(HIDDEN)
@@ -486,9 +482,8 @@ SETTING_WS
 mode METRICS_MODE;
 METRICS_PIPE : PIPE -> type(PIPE), popMode;
 
-METRICS_INDEX_UNQUOTED_IDENTIFIER
-    : INDEX_UNQUOTED_IDENTIFIER -> type(INDEX_UNQUOTED_IDENTIFIER), popMode, pushMode(CLOSING_METRICS_MODE)
-    ;
+METRICS_UNQUOTED_SOURCE: UNQUOTED_SOURCE -> type(UNQUOTED_SOURCE), popMode, pushMode(CLOSING_METRICS_MODE);
+METRICS_QUOTED_SOURCE : QUOTED_STRING -> type(QUOTED_STRING), popMode, pushMode(CLOSING_METRICS_MODE);
 
 METRICS_LINE_COMMENT
     : LINE_COMMENT -> channel(HIDDEN)
@@ -505,6 +500,10 @@ METRICS_WS
 // TODO: remove this workaround mode - see https://github.com/elastic/elasticsearch/issues/108528
 mode CLOSING_METRICS_MODE;
 
+CLOSING_METRICS_COLON
+    : COLON -> type(COLON), popMode, pushMode(METRICS_MODE)
+    ;
+
 CLOSING_METRICS_COMMA
     : COMMA -> type(COMMA), popMode, pushMode(METRICS_MODE)
     ;

diff --git a/x-pack/plugin/esql/src/main/antlr/EsqlBaseLexer.tokens b/x-pack/plugin/esql/src/main/antlr/EsqlBaseLexer.tokens
diff --git a/x-pack/plugin/esql/src/main/antlr/EsqlBaseParser.g4 b/x-pack/plugin/esql/src/main/antlr/EsqlBaseParser.g4
@@ -106,11 +106,21 @@ field
     ;
 
 fromCommand
-    : FROM indexIdentifier (COMMA indexIdentifier)* metadata?
+    : FROM indexPattern (COMMA indexPattern)* metadata?
     ;
 
-indexIdentifier
-    : INDEX_UNQUOTED_IDENTIFIER
+indexPattern
+    : clusterString COLON indexString
+    | indexString
+    ;
+
+clusterString
+    : UNQUOTED_SOURCE
+    ;
+
+indexString
+    : UNQUOTED_SOURCE
+    | QUOTED_STRING
     ;
 
 metadata
@@ -119,15 +129,15 @@ metadata
     ;
 
 metadataOption
-    : METADATA indexIdentifier (COMMA indexIdentifier)*
+    : METADATA UNQUOTED_SOURCE (COMMA UNQUOTED_SOURCE)*
     ;
 
 deprecated_metadata
     : OPENING_BRACKET metadataOption CLOSING_BRACKET
     ;
 
 metricsCommand
-    : METRICS indexIdentifier (COMMA indexIdentifier)* aggregates=fields? (BY grouping=fields)?
+    : METRICS indexPattern (COMMA indexPattern)* aggregates=fields? (BY grouping=fields)?
     ;
 
 evalCommand
@@ -280,5 +290,5 @@ enrichWithClause
     ;
 
 lookupCommand
-    : LOOKUP tableName=INDEX_UNQUOTED_IDENTIFIER ON matchFields=qualifiedNamePatterns
+    : LOOKUP tableName=indexPattern ON matchFields=qualifiedNamePatterns
     ;
diff --git a/x-pack/plugin/esql/src/main/antlr/EsqlBaseParser.tokens b/x-pack/plugin/esql/src/main/antlr/EsqlBaseParser.tokens
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
@@ -96,7 +96,12 @@ public enum Cap {
          * Fix to GROK and DISSECT that allows extracting attributes with the same name as the input
          * https://github.com/elastic/elasticsearch/issues/110184
          */
-        GROK_DISSECT_MASKING;
+        GROK_DISSECT_MASKING,
+
+        /**
+         * Support for quoting index sources in double quotes.
+         */
+        DOUBLE_QUOTES_SOURCE_ENCLOSING;
 
         Cap() {
             snapshotOnly = false;

diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/AbstractBuilder.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/AbstractBuilder.java
@@ -20,28 +20,31 @@ public Object visit(ParseTree tree) {
     }
 
     @Override
-    public Object visitTerminal(TerminalNode node) {
+    public Source visitTerminal(TerminalNode node) {
         return ParserUtils.source(node);
     }
 
-    static String unquoteString(Source source) {
-        String text = source.text();
-        if (text == null) {
+    static String unquote(Source source) {
+        return unquote(source.text());
+    }
+
+    static String unquote(String string) {
+        if (string == null) {
             return null;
         }
 
         // unescaped strings can be interpreted directly
-        if (text.startsWith("\"\"\"")) {
-            return text.substring(3, text.length() - 3);
+        if (string.startsWith("\"\"\"")) {
+            return string.substring(3, string.length() - 3);
         }
 
-        text = text.substring(1, text.length() - 1);
+        string = string.substring(1, string.length() - 1);
         StringBuilder sb = new StringBuilder();
 
-        for (int i = 0; i < text.length();) {
-            if (text.charAt(i) == '\\') {
+        for (int i = 0; i < string.length();) {
+            if (string.charAt(i) == '\\') {
                 // ANTLR4 Grammar guarantees there is always a character after the `\`
-                switch (text.charAt(++i)) {
+                switch (string.charAt(++i)) {
                     case 't' -> sb.append('\t');
                     case 'n' -> sb.append('\n');
                     case 'r' -> sb.append('\r');
@@ -51,11 +54,11 @@ static String unquoteString(Source source) {
                     // will be interpreted as regex, so we have to escape it
                     default ->
                         // unknown escape sequence, pass through as-is, e.g: `...\w...`
-                        sb.append('\\').append(text.charAt(i));
+                        sb.append('\\').append(string.charAt(i));
                 }
                 i++;
             } else {
-                sb.append(text.charAt(i++));
+                sb.append(string.charAt(i++));
             }
         }
         return sb.toString();