Skip to content

Commit

Permalink
[KYUUBI #4098] Separate Trino lexer file
Browse files Browse the repository at this point in the history
### _Why are the changes needed?_

There is something difference between Spark and Trino with string pattern.
Spark:
```
STRING
    : '\'' ( ~'\'' | '\'\'' )* '\''
    | 'R\'' (~'\'')* '\''
    | 'R"'(~'"')* '"'
    ;
```

Trino:
```
STRING
    : '\'' ( ~'\'' | '\'\'' )* '\''
```

Since this is for Trino only, so we should follow Trino behavior.

### _How was this patch tested?_
Pass CI

Closes #4098 from ulysses-you/string.

Closes #4098

00b77c5 [ulysses-you] address comments
f371f09 [ulysses-you] Separate Trino lexer file

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: ulysses-you <ulyssesyou@apache.org>
  • Loading branch information
ulysses-you committed Jan 6, 2023
1 parent 15742ad commit 88e9498
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 83 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -81,5 +81,5 @@ conf/kyuubi-env.sh

# For Antlr
kyuubi-server/gen
kyuubi-server/src/main/antlr4/org/apache/kyuubi/sql/KyuubiSqlBaseLexer.tokens
kyuubi-server/src/main/antlr4/org/apache/kyuubi/sql/*.tokens
kyuubi-server/src/main/antlr4/org/apache/kyuubi/sql/gen/
Original file line number Diff line number Diff line change
Expand Up @@ -43,74 +43,13 @@ lexer grammar KyuubiSqlBaseLexer;

SEMICOLON: ';';

AND: 'AND';
BQ: '`';
BY: 'BY';
COMMA: ',';
DOT: '.';
EQ : '=' | '==';
NSEQ: '<=>';
NEQ : '<>';
NEQJ: '!=';
LT : '<';
LTE : '<=' | '!>';
GT : '>';
GTE : '>=' | '!<';
LEFT_PAREN: '(';
RIGHT_PAREN: ')';
OR: 'OR';
SEARCH_STRING_ESCAPE: '\'' '\\' '\'';

DESC: 'DESC';
DESCRIBE: 'DESCRIBE';
FROM: 'FROM';
FALSE: 'FALSE';
LIKE: 'LIKE';
IN: 'IN';
WHERE: 'WHERE';

KYUUBI: 'KYUUBI';
KYUUBIADMIN: 'KYUUBIADMIN';

AUTO_INCREMENT: 'AUTO_INCREMENT';
CASE_SENSITIVE: 'CASE_SENSITIVE';
CREATE_PARAMS: 'CREATE_PARAMS';
DATA_TYPE: 'DATA_TYPE';
ESCAPE: 'ESCAPE';
FIXED_PREC_SCALE: 'FIXED_PREC_SCALE';
IS: 'IS';
LITERAL_PREFIX: 'LITERAL_PREFIX';
LITERAL_SUFFIX: 'LITERAL_SUFFIX';
LOCAL_TYPE_NAME: 'LOCAL_TYPE_NAME';
MAXIMUM_SCALE: 'MAXIMUM_SCALE';
MINIMUM_SCALE: 'MINIMUM_SCALE';
NULL: 'NULL';
NULLABLE: 'NULLABLE';
NUM_PREC_RADIX: 'NUM_PREC_RADIX';
ORDER: 'ORDER';
PRECISION: 'PRECISION';
REMARKS: 'REMARKS';
REF_GENERATION: 'REF_GENERATION';
SEARCHABLE: 'SEARCHABLE';
SELECT: 'SELECT';
SESSION: 'SESSION';
SQL_DATA_TYPE: 'SQL_DATA_TYPE';
SQL_DATETIME_SUB: 'SQL_DATETIME_SUB';
SYSTEM_JDBC_CATALOGS: 'SYSTEM.JDBC.CATALOGS';
SYSTEM_JDBC_SCHEMAS: 'SYSTEM.JDBC.SCHEMAS';
SYSTEM_JDBC_TABLES: 'SYSTEM.JDBC.TABLES';
SYSTEM_JDBC_TABLE_TYPES: 'SYSTEM.JDBC.TABLE_TYPES';
SYSTEM_JDBC_TYPES: 'SYSTEM.JDBC.TYPES';
SELF_REFERENCING_COL_NAME: 'SELF_REFERENCING_COL_NAME';
UNSIGNED_ATTRIBUTE: 'UNSIGNED_ATTRIBUTE';
TABLE_CAT: 'TABLE_CAT';
TABLE_CATALOG: 'TABLE_CATALOG';
TABLE_NAME: 'TABLE_NAME';
TABLE_SCHEM: 'TABLE_SCHEM';
TABLE_TYPE: 'TABLE_TYPE';
TYPE_CAT: 'TYPE_CAT';
TYPE_NAME: 'TYPE_NAME';
TYPE_SCHEM: 'TYPE_SCHEM';

BACKQUOTED_IDENTIFIER
: '`' ( ~'`' | '``' )* '`'
Expand All @@ -129,7 +68,7 @@ IDENTIFIER
;

STRING
: '\'' ( ~'\'' | '\'\'' )* '\''
: '\'' ( ~('\''|'\\') | ('\\' .) )* '\''
| 'R\'' (~'\'')* '\''
| 'R"'(~'"')* '"'
;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// This lexer should follow Trino `https://github.com/trinodb/trino/blob/master/core/trino-parser/src/main/antlr4/io/trino/sql/parser/SqlBase.g4`

lexer grammar KyuubiTrinoFeBaseLexer;

SEMICOLON: ';';
LEFT_PAREN: '(';
RIGHT_PAREN: ')';

AND: 'AND';
BQ: '`';
BY: 'BY';
COMMA: ',';
DOT: '.';
EQ : '=' | '==';
NSEQ: '<=>';
NEQ : '<>';
NEQJ: '!=';
LT : '<';
LTE : '<=' | '!>';
GT : '>';
GTE : '>=' | '!<';
OR: 'OR';

FROM: 'FROM';
FALSE: 'FALSE';
LIKE: 'LIKE';
IN: 'IN';
WHERE: 'WHERE';

ESCAPE: 'ESCAPE';
AUTO_INCREMENT: 'AUTO_INCREMENT';
CASE_SENSITIVE: 'CASE_SENSITIVE';
CREATE_PARAMS: 'CREATE_PARAMS';
DATA_TYPE: 'DATA_TYPE';
FIXED_PREC_SCALE: 'FIXED_PREC_SCALE';
IS: 'IS';
LITERAL_PREFIX: 'LITERAL_PREFIX';
LITERAL_SUFFIX: 'LITERAL_SUFFIX';
LOCAL_TYPE_NAME: 'LOCAL_TYPE_NAME';
MAXIMUM_SCALE: 'MAXIMUM_SCALE';
MINIMUM_SCALE: 'MINIMUM_SCALE';
NULL: 'NULL';
NULLABLE: 'NULLABLE';
NUM_PREC_RADIX: 'NUM_PREC_RADIX';
ORDER: 'ORDER';
PRECISION: 'PRECISION';
REMARKS: 'REMARKS';
REF_GENERATION: 'REF_GENERATION';
SEARCHABLE: 'SEARCHABLE';
SELECT: 'SELECT';
SQL_DATA_TYPE: 'SQL_DATA_TYPE';
SQL_DATETIME_SUB: 'SQL_DATETIME_SUB';
SYSTEM_JDBC_CATALOGS: 'SYSTEM.JDBC.CATALOGS';
SYSTEM_JDBC_SCHEMAS: 'SYSTEM.JDBC.SCHEMAS';
SYSTEM_JDBC_TABLES: 'SYSTEM.JDBC.TABLES';
SYSTEM_JDBC_TABLE_TYPES: 'SYSTEM.JDBC.TABLE_TYPES';
SYSTEM_JDBC_TYPES: 'SYSTEM.JDBC.TYPES';
SELF_REFERENCING_COL_NAME: 'SELF_REFERENCING_COL_NAME';
UNSIGNED_ATTRIBUTE: 'UNSIGNED_ATTRIBUTE';
TABLE_CAT: 'TABLE_CAT';
TABLE_CATALOG: 'TABLE_CATALOG';
TABLE_NAME: 'TABLE_NAME';
TABLE_SCHEM: 'TABLE_SCHEM';
TABLE_TYPE: 'TABLE_TYPE';
TYPE_CAT: 'TYPE_CAT';
TYPE_NAME: 'TYPE_NAME';
TYPE_SCHEM: 'TYPE_SCHEM';

fragment SEARCH_STRING_ESCAPE: '\'' '\\' '\'';

STRING_ESCAPE
: SEARCH_STRING_ESCAPE
;

STRING
: '\'' ( ~'\'' | '\'\'' )* '\''
;

SIMPLE_COMMENT
: '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN)
;

BRACKETED_COMMENT
: '/*' .*? '*/' -> channel(HIDDEN)
;

WS : [ \r\n\t]+ -> channel(HIDDEN)
;

// Catch-all for anything we can't recognize.
// We use this to be able to ignore and recover all the text
// when splitting statements with DelimiterLexer
UNRECOGNIZED
: .
;
Original file line number Diff line number Diff line change
Expand Up @@ -17,50 +17,50 @@

parser grammar KyuubiTrinoFeBaseParser;

options { tokenVocab = KyuubiSqlBaseLexer; }
options { tokenVocab = KyuubiTrinoFeBaseLexer; }

singleStatement
: statement SEMICOLON* EOF
;

statement
: SELECT TABLE_SCHEM COMMA TABLE_CATALOG FROM SYSTEM_JDBC_SCHEMAS
(WHERE (TABLE_CATALOG EQ catalog=STRING+)? AND? (TABLE_SCHEM LIKE schema=STRING+)?)?
ORDER BY TABLE_CATALOG COMMA TABLE_SCHEM #getSchemas
| SELECT TABLE_CAT FROM SYSTEM_JDBC_CATALOGS ORDER BY TABLE_CAT #getCatalogs
| SELECT TABLE_TYPE FROM SYSTEM_JDBC_TABLE_TYPES ORDER BY TABLE_TYPE #getTableTypes
(WHERE (TABLE_CATALOG EQ catalog=stringLit)? AND? (TABLE_SCHEM LIKE schema=stringLit)?)?
ORDER BY TABLE_CATALOG COMMA TABLE_SCHEM #getSchemas
| SELECT TABLE_CAT FROM SYSTEM_JDBC_CATALOGS ORDER BY TABLE_CAT #getCatalogs
| SELECT TABLE_TYPE FROM SYSTEM_JDBC_TABLE_TYPES ORDER BY TABLE_TYPE #getTableTypes
| SELECT TYPE_NAME COMMA DATA_TYPE COMMA PRECISION COMMA LITERAL_PREFIX COMMA
LITERAL_SUFFIX COMMA CREATE_PARAMS COMMA NULLABLE COMMA CASE_SENSITIVE COMMA
SEARCHABLE COMMA UNSIGNED_ATTRIBUTE COMMA FIXED_PREC_SCALE COMMA AUTO_INCREMENT
COMMA LOCAL_TYPE_NAME COMMA MINIMUM_SCALE COMMA MAXIMUM_SCALE COMMA SQL_DATA_TYPE
COMMA SQL_DATETIME_SUB COMMA NUM_PREC_RADIX FROM SYSTEM_JDBC_TYPES ORDER BY DATA_TYPE #getTypeInfo
COMMA SQL_DATETIME_SUB COMMA NUM_PREC_RADIX FROM SYSTEM_JDBC_TYPES ORDER BY DATA_TYPE #getTypeInfo
| SELECT TABLE_CAT COMMA TABLE_SCHEM COMMA TABLE_NAME COMMA TABLE_TYPE COMMA REMARKS COMMA
TYPE_CAT COMMA TYPE_SCHEM COMMA TYPE_NAME COMMA SELF_REFERENCING_COL_NAME COMMA REF_GENERATION
FROM SYSTEM_JDBC_TABLES
(WHERE tableCatalogFilter? AND? tableSchemaFilter? AND? tableNameFilter? AND? tableTypeFilter?)?
ORDER BY TABLE_TYPE COMMA TABLE_CAT COMMA TABLE_SCHEM COMMA TABLE_NAME #getTables
| .*? #passThrough
ORDER BY TABLE_TYPE COMMA TABLE_CAT COMMA TABLE_SCHEM COMMA TABLE_NAME #getTables
| .*? #passThrough
;

tableCatalogFilter
: TABLE_CAT IS NULL #nullCatalog
| TABLE_CAT EQ catalog=STRING+ #catalogFilter
: TABLE_CAT IS NULL #nullCatalog
| TABLE_CAT EQ catalog=stringLit #catalogFilter
;

tableSchemaFilter
: TABLE_SCHEM IS NULL #nulTableSchema
| TABLE_SCHEM LIKE schemaPattern=STRING+ ESCAPE SEARCH_STRING_ESCAPE #schemaFilter
: TABLE_SCHEM IS NULL #nulTableSchema
| TABLE_SCHEM LIKE schemaPattern=stringLit ESCAPE STRING_ESCAPE #schemaFilter
;

tableNameFilter
: TABLE_NAME LIKE tableNamePattern=STRING+ ESCAPE SEARCH_STRING_ESCAPE
: TABLE_NAME LIKE tableNamePattern=stringLit ESCAPE STRING_ESCAPE
;

tableTypeFilter
: FALSE #tableTypesAlwaysFalse
| TABLE_TYPE IN '(' stirngInValue (',' stirngInValue)* ')' #typesFilter
: FALSE #tableTypesAlwaysFalse
| TABLE_TYPE IN LEFT_PAREN stringLit (COMMA stringLit)* RIGHT_PAREN #typesFilter
;

stirngInValue
: STRING+
stringLit
: STRING
;
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,6 @@ class KyuubiTrinoFeAstBuilder extends KyuubiTrinoFeBaseParserBaseVisitor[AnyRef]
}

override def visitTypesFilter(ctx: TypesFilterContext): List[String] = {
ctx.stirngInValue().asScala.map(v => unescapeSQLString(v.getText)).toList
ctx.stringLit().asScala.map(v => unescapeSQLString(v.getText)).toList
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@ import org.antlr.v4.runtime.atn.PredictionMode
import org.antlr.v4.runtime.misc.ParseCancellationException
import org.antlr.v4.runtime.tree.ParseTree

import org.apache.kyuubi.sql.{KyuubiSqlBaseLexer, KyuubiTrinoFeBaseParser}
import org.apache.kyuubi.sql.{KyuubiTrinoFeBaseLexer, KyuubiTrinoFeBaseParser}
import org.apache.kyuubi.sql.parser.{KyuubiParserBase, PostProcessor, UpperCaseCharStream}

class KyuubiTrinoFeParser extends KyuubiParserBase[KyuubiTrinoFeBaseParser] {

override lazy val astBuilder = new KyuubiTrinoFeAstBuilder

protected def parse[T](command: String)(toResult: KyuubiTrinoFeBaseParser => T): T = {
val lexer = new KyuubiSqlBaseLexer(
val lexer = new KyuubiTrinoFeBaseLexer(
new UpperCaseCharStream(CharStreams.fromString(command)))
lexer.removeErrorListeners()

Expand Down

0 comments on commit 88e9498

Please sign in to comment.