Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: make keywords case insensitive in parser #690

Merged
merged 2 commits into from
May 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
242 changes: 121 additions & 121 deletions eva/parser/eva.lark
Original file line number Diff line number Diff line change
Expand Up @@ -298,151 +298,151 @@ math_operator: STAR | DIVIDE | MODULUS | DIV | MOD | PLUS | MINUS | MINUSMINUS

// KEYWORDS

ALL: "ALL"
ALTER: "ALTER"
AND: "AND"
ANY: "ANY"
ANYDIM: "ANYDIM"
AS: "AS"
ASC: "ASC"
BLOB: "BLOB"
BY: "BY"
COLUMN: "COLUMN"
CREATE: "CREATE"
DATABASE: "DATABASE"
DEFAULT: "DEFAULT"
DELETE: "DELETE"
DESC: "DESC"
DESCRIBE: "DESCRIBE"
DISTINCT: "DISTINCT"
DROP: "DROP"
EXIT: "EXIT"
EXISTS: "EXISTS"
EXPLAIN: "EXPLAIN"
FALSE: "FALSE"
FROM: "FROM"
GROUP: "GROUP"
HAVING: "HAVING"
IF: "IF"
IN: "IN"
FILE: "FILE"
INDIR: "INDIR"
INTO: "INTO"
INDEX: "INDEX"
INSERT: "INSERT"
IS: "IS"
JOIN: "JOIN"
KEY: "KEY"
LATERAL: "LATERAL"
LIKE: "LIKE"
LIMIT: "LIMIT"
LOAD: "LOAD"
NO: "NO"
NOT: "NOT"
NULL_LITERAL: "NULL"
OFFSET: "OFFSET"
ON: "ON"
OR: "OR"
ORDER: "ORDER"
PATH: "PATH"
PRIMARY: "PRIMARY"
REFERENCES: "REFERENCES"
RENAME: "RENAME"
SAMPLE: "SAMPLE"
IFRAMES: "IFRAMES"
AUDIORATE: "AUDIORATE"
SELECT: "SELECT"
SET: "SET"
SHUTDOWN: "SHUTDOWN"
SHOW: "SHOW"
SOME: "SOME"
TABLE: "TABLE"
TABLES: "TABLES"
TO: "TO"
TRUE: "TRUE"
UDFS: "UDFS"
UNION: "UNION"
UNIQUE: "UNIQUE"
UNKNOWN: "UNKNOWN"
UNLOCK: "UNLOCK"
UNNEST: "UNNEST"
UNSIGNED: "UNSIGNED"
UPDATE: "UPDATE"
USING: "USING"
VALUES: "VALUES"
WHERE: "WHERE"
XOR: "XOR"
ALL: "ALL"i
ALTER: "ALTER"i
AND: "AND"i
ANY: "ANY"i
ANYDIM: "ANYDIM"i
AS: "AS"i
ASC: "ASC"i
BLOB: "BLOB"i
BY: "BY"i
COLUMN: "COLUMN"i
CREATE: "CREATE"i
DATABASE: "DATABASE"i
DEFAULT: "DEFAULT"i
DELETE: "DELETE"i
DESC: "DESC"i
DESCRIBE: "DESCRIBE"i
DISTINCT: "DISTINCT"i
DROP: "DROP"i
EXIT: "EXIT"i
EXISTS: "EXISTS"i
EXPLAIN: "EXPLAIN"i
FALSE: "FALSE"i
FROM: "FROM"i
GROUP: "GROUP"i
HAVING: "HAVING"i
IF: "IF"i
IN: "IN"i
FILE: "FILE"i
INDIR: "INDIR"i
INTO: "INTO"i
INDEX: "INDEX"i
INSERT: "INSERT"i
IS: "IS"i
JOIN: "JOIN"i
KEY: "KEY"i
LATERAL: "LATERAL"i
LIKE: "LIKE"i
LIMIT: "LIMIT"i
LOAD: "LOAD"i
NO: "NO"i
NOT: "NOT"i
NULL_LITERAL: "NULL"i
OFFSET: "OFFSET"i
ON: "ON"i
OR: "OR"i
ORDER: "ORDER"i
PATH: "PATH"i
PRIMARY: "PRIMARY"i
REFERENCES: "REFERENCES"i
RENAME: "RENAME"i
SAMPLE: "SAMPLE"i
IFRAMES: "IFRAMES"i
AUDIORATE: "AUDIORATE"i
SELECT: "SELECT"i
SET: "SET"i
SHUTDOWN: "SHUTDOWN"i
SHOW: "SHOW"i
SOME: "SOME"i
TABLE: "TABLE"i
TABLES: "TABLES"i
TO: "TO"i
TRUE: "TRUE"i
UDFS: "UDFS"i
UNION: "UNION"i
UNIQUE: "UNIQUE"i
UNKNOWN: "UNKNOWN"i
UNLOCK: "UNLOCK"i
UNNEST: "UNNEST"i
UNSIGNED: "UNSIGNED"i
UPDATE: "UPDATE"i
USING: "USING"i
VALUES: "VALUES"i
WHERE: "WHERE"i
XOR: "XOR"i

// File Formats
WITH: "WITH"
FORMAT: "FORMAT"
CSV: "CSV"
VIDEO: "VIDEO"
IMAGE: "IMAGE"
WITH: "WITH"i
FORMAT: "FORMAT"i
CSV: "CSV"i
VIDEO: "VIDEO"i
IMAGE: "IMAGE"i

// Index types
HNSW: "HNSW"
HNSW: "HNSW"i

// Computer vision tasks

OBJECT_DETECTION: "OBJECT_DETECTION"
ACTION_CLASSICATION: "ACTION_CLASSICATION"
OBJECT_DETECTION: "OBJECT_DETECTION"i
ACTION_CLASSICATION: "ACTION_CLASSICATION"i

// DATA TYPE Keywords

BOOLEAN: "BOOLEAN"
INTEGER: "INTEGER"
FLOAT: "FLOAT"
TEXT: "TEXT"
NDARRAY: "NDARRAY"
INT8: "INT8"
UINT8: "UINT8"
INT16: "INT16"
INT32: "INT32"
INT64: "INT64"
UNICODE: "UNICODE"
FLOAT32: "FLOAT32"
FLOAT64: "FLOAT64"
DECIMAL: "DECIMAL"
STR: "STR"
DATETIME: "DATETIME"
ANYTYPE: "ANYTYPE"
BOOLEAN: "BOOLEAN"i
INTEGER: "INTEGER"i
FLOAT: "FLOAT"i
TEXT: "TEXT"i
NDARRAY: "NDARRAY"i
INT8: "INT8"i
UINT8: "UINT8"i
INT16: "INT16"i
INT32: "INT32"i
INT64: "INT64"i
UNICODE: "UNICODE"i
FLOAT32: "FLOAT32"i
FLOAT64: "FLOAT64"i
DECIMAL: "DECIMAL"i
STR: "STR"i
DATETIME: "DATETIME"i
ANYTYPE: "ANYTYPE"i

// Group function Keywords

AVG: "AVG"
COUNT: "COUNT"
MAX: "MAX"
MIN: "MIN"
STD: "STD"
SUM: "SUM"
FCOUNT: "FCOUNT"
FIRST: "FIRST"
LAST: "LAST"
SEGMENT: "SEGMENT"
AVG: "AVG"i
COUNT: "COUNT"i
MAX: "MAX"i
MIN: "MIN"i
STD: "STD"i
SUM: "SUM"i
FCOUNT: "FCOUNT"i
FIRST: "FIRST"i
LAST: "LAST"i
SEGMENT: "SEGMENT"i

// Keywords, but can be ID
// Common Keywords, but can be ID

AUTO_INCREMENT: "AUTO_INCREMENT"
HELP: "HELP"
TEMPTABLE: "TEMPTABLE"
VALUE: "VALUE"
AUTO_INCREMENT: "AUTO_INCREMENT"i
HELP: "HELP"i
TEMPTABLE: "TEMPTABLE"i
VALUE: "VALUE"i

// UDF
UDF: "UDF"
INPUT: "INPUT"
OUTPUT: "OUTPUT"
TYPE: "TYPE"
IMPL: "IMPL"
UDF: "UDF"i
INPUT: "INPUT"i
OUTPUT: "OUTPUT"i
TYPE: "TYPE"i
IMPL: "IMPL"i

// MATERIALIZED
MATERIALIZED: "MATERIALIZED"
VIEW: "VIEW"
MATERIALIZED: "MATERIALIZED"i
VIEW: "VIEW"i

// Common function names

ABS: "ABS"
ABS: "ABS"i

// Operators
// Operators. Assigns
Expand Down
2 changes: 1 addition & 1 deletion eva/parser/lark_visitor/_common_clauses_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def simple_id(self, tree):
def decimal_literal(self, tree):
decimal = None
token = tree.children[0]
if token == "ANYDIM":
if str.upper(token) == "ANYDIM":
decimal = Dimension.ANYDIM
else:
decimal = int(str(token))
Expand Down
36 changes: 18 additions & 18 deletions eva/parser/lark_visitor/_create_statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def simple_data_type(self, tree):
dimensions = []

token = tree.children[0]
if token == "BOOLEAN":
if str.upper(token) == "BOOLEAN":
data_type = ColumnType.BOOLEAN

return data_type, array_type, dimensions
Expand All @@ -139,7 +139,7 @@ def integer_data_type(self, tree):
dimensions = []

token = tree.children[0]
if token == "INTEGER":
if str.upper(token) == "INTEGER":
data_type = ColumnType.INTEGER

return data_type, array_type, dimensions
Expand All @@ -150,10 +150,10 @@ def dimension_data_type(self, tree):
dimensions = []

token = tree.children[0]
if token == "FLOAT":
if str.upper(token) == "FLOAT":
data_type = ColumnType.FLOAT
dimensions = self.visit(tree.children[1])
elif token == "TEXT":
elif str.upper(token) == "TEXT":
data_type = ColumnType.TEXT
dimensions = self.visit(tree.children[1])

Expand All @@ -180,31 +180,31 @@ def array_type(self, tree):
array_type = None

token = tree.children[0]
if token == "INT8":
if str.upper(token) == "INT8":
array_type = NdArrayType.INT8
elif token == "UINT8":
elif str.upper(token) == "UINT8":
array_type = NdArrayType.UINT8
elif token == "INT16":
elif str.upper(token) == "INT16":
array_type = NdArrayType.INT16
elif token == "INT32":
elif str.upper(token) == "INT32":
array_type = NdArrayType.INT32
elif token == "INT64":
elif str.upper(token) == "INT64":
array_type = NdArrayType.INT64
elif token == "UNICODE":
elif str.upper(token) == "UNICODE":
array_type = NdArrayType.UNICODE
elif token == "BOOLEAN":
elif str.upper(token) == "BOOLEAN":
array_type = NdArrayType.BOOL
elif token == "FLOAT32":
elif str.upper(token) == "FLOAT32":
array_type = NdArrayType.FLOAT32
elif token == "FLOAT64":
elif str.upper(token) == "FLOAT64":
array_type = NdArrayType.FLOAT64
elif token == "DECIMAL":
elif str.upper(token) == "DECIMAL":
array_type = NdArrayType.DECIMAL
elif token == "STR":
elif str.upper(token) == "STR":
array_type = NdArrayType.STR
elif token == "DATETIME":
elif str.upper(token) == "DATETIME":
array_type = NdArrayType.DATETIME
elif token == "ANYTYPE":
elif str.upper(token) == "ANYTYPE":
array_type = NdArrayType.ANYTYPE
return array_type

Expand Down Expand Up @@ -259,7 +259,7 @@ def index_type(self, tree):
index_type = None
token = tree.children[1]

if token == "HNSW":
if str.upper(token) == "HNSW":
index_type = IndexType.HNSW
return index_type

Expand Down
4 changes: 2 additions & 2 deletions eva/parser/lark_visitor/_select_statement.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ def sort_order(self, tree):
token = tree.children[0]
sort_order = None

if token == "ASC":
if str.upper(token) == "ASC":
sort_order = ParserOrderBySortType.ASC
elif token == "DESC":
elif str.upper(token) == "DESC":
sort_order = ParserOrderBySortType.DESC
return sort_order

Expand Down
5 changes: 2 additions & 3 deletions eva/parser/lark_visitor/_show_statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,9 @@
##################################################################
class Show:
def show_statement(self, tree):

token = tree.children[1]

if token == "UDFS":
if str.upper(token) == "UDFS":
return ShowStatement(show_type=ShowType.UDFS)
elif token == "TABLES":
elif str.upper(token) == "TABLES":
return ShowStatement(show_type=ShowType.TABLES)