Skip to content

Commit

Permalink
feat: make keywords case insensitive in parser (#690)
Browse files Browse the repository at this point in the history
* make keywords case insensitive

* Update test_parser_statements.py
  • Loading branch information
gaurav274 committed May 2, 2023
1 parent c9cb9da commit 30af85e
Show file tree
Hide file tree
Showing 6 changed files with 198 additions and 146 deletions.
242 changes: 121 additions & 121 deletions eva/parser/eva.lark
Original file line number Diff line number Diff line change
Expand Up @@ -298,151 +298,151 @@ math_operator: STAR | DIVIDE | MODULUS | DIV | MOD | PLUS | MINUS | MINUSMINUS

// KEYWORDS

ALL: "ALL"
ALTER: "ALTER"
AND: "AND"
ANY: "ANY"
ANYDIM: "ANYDIM"
AS: "AS"
ASC: "ASC"
BLOB: "BLOB"
BY: "BY"
COLUMN: "COLUMN"
CREATE: "CREATE"
DATABASE: "DATABASE"
DEFAULT: "DEFAULT"
DELETE: "DELETE"
DESC: "DESC"
DESCRIBE: "DESCRIBE"
DISTINCT: "DISTINCT"
DROP: "DROP"
EXIT: "EXIT"
EXISTS: "EXISTS"
EXPLAIN: "EXPLAIN"
FALSE: "FALSE"
FROM: "FROM"
GROUP: "GROUP"
HAVING: "HAVING"
IF: "IF"
IN: "IN"
FILE: "FILE"
INDIR: "INDIR"
INTO: "INTO"
INDEX: "INDEX"
INSERT: "INSERT"
IS: "IS"
JOIN: "JOIN"
KEY: "KEY"
LATERAL: "LATERAL"
LIKE: "LIKE"
LIMIT: "LIMIT"
LOAD: "LOAD"
NO: "NO"
NOT: "NOT"
NULL_LITERAL: "NULL"
OFFSET: "OFFSET"
ON: "ON"
OR: "OR"
ORDER: "ORDER"
PATH: "PATH"
PRIMARY: "PRIMARY"
REFERENCES: "REFERENCES"
RENAME: "RENAME"
SAMPLE: "SAMPLE"
IFRAMES: "IFRAMES"
AUDIORATE: "AUDIORATE"
SELECT: "SELECT"
SET: "SET"
SHUTDOWN: "SHUTDOWN"
SHOW: "SHOW"
SOME: "SOME"
TABLE: "TABLE"
TABLES: "TABLES"
TO: "TO"
TRUE: "TRUE"
UDFS: "UDFS"
UNION: "UNION"
UNIQUE: "UNIQUE"
UNKNOWN: "UNKNOWN"
UNLOCK: "UNLOCK"
UNNEST: "UNNEST"
UNSIGNED: "UNSIGNED"
UPDATE: "UPDATE"
USING: "USING"
VALUES: "VALUES"
WHERE: "WHERE"
XOR: "XOR"
ALL: "ALL"i
ALTER: "ALTER"i
AND: "AND"i
ANY: "ANY"i
ANYDIM: "ANYDIM"i
AS: "AS"i
ASC: "ASC"i
BLOB: "BLOB"i
BY: "BY"i
COLUMN: "COLUMN"i
CREATE: "CREATE"i
DATABASE: "DATABASE"i
DEFAULT: "DEFAULT"i
DELETE: "DELETE"i
DESC: "DESC"i
DESCRIBE: "DESCRIBE"i
DISTINCT: "DISTINCT"i
DROP: "DROP"i
EXIT: "EXIT"i
EXISTS: "EXISTS"i
EXPLAIN: "EXPLAIN"i
FALSE: "FALSE"i
FROM: "FROM"i
GROUP: "GROUP"i
HAVING: "HAVING"i
IF: "IF"i
IN: "IN"i
FILE: "FILE"i
INDIR: "INDIR"i
INTO: "INTO"i
INDEX: "INDEX"i
INSERT: "INSERT"i
IS: "IS"i
JOIN: "JOIN"i
KEY: "KEY"i
LATERAL: "LATERAL"i
LIKE: "LIKE"i
LIMIT: "LIMIT"i
LOAD: "LOAD"i
NO: "NO"i
NOT: "NOT"i
NULL_LITERAL: "NULL"i
OFFSET: "OFFSET"i
ON: "ON"i
OR: "OR"i
ORDER: "ORDER"i
PATH: "PATH"i
PRIMARY: "PRIMARY"i
REFERENCES: "REFERENCES"i
RENAME: "RENAME"i
SAMPLE: "SAMPLE"i
IFRAMES: "IFRAMES"i
AUDIORATE: "AUDIORATE"i
SELECT: "SELECT"i
SET: "SET"i
SHUTDOWN: "SHUTDOWN"i
SHOW: "SHOW"i
SOME: "SOME"i
TABLE: "TABLE"i
TABLES: "TABLES"i
TO: "TO"i
TRUE: "TRUE"i
UDFS: "UDFS"i
UNION: "UNION"i
UNIQUE: "UNIQUE"i
UNKNOWN: "UNKNOWN"i
UNLOCK: "UNLOCK"i
UNNEST: "UNNEST"i
UNSIGNED: "UNSIGNED"i
UPDATE: "UPDATE"i
USING: "USING"i
VALUES: "VALUES"i
WHERE: "WHERE"i
XOR: "XOR"i

// File Formats
WITH: "WITH"
FORMAT: "FORMAT"
CSV: "CSV"
VIDEO: "VIDEO"
IMAGE: "IMAGE"
WITH: "WITH"i
FORMAT: "FORMAT"i
CSV: "CSV"i
VIDEO: "VIDEO"i
IMAGE: "IMAGE"i

// Index types
HNSW: "HNSW"
HNSW: "HNSW"i

// Computer vision tasks

OBJECT_DETECTION: "OBJECT_DETECTION"
ACTION_CLASSICATION: "ACTION_CLASSICATION"
OBJECT_DETECTION: "OBJECT_DETECTION"i
ACTION_CLASSICATION: "ACTION_CLASSICATION"i

// DATA TYPE Keywords

BOOLEAN: "BOOLEAN"
INTEGER: "INTEGER"
FLOAT: "FLOAT"
TEXT: "TEXT"
NDARRAY: "NDARRAY"
INT8: "INT8"
UINT8: "UINT8"
INT16: "INT16"
INT32: "INT32"
INT64: "INT64"
UNICODE: "UNICODE"
FLOAT32: "FLOAT32"
FLOAT64: "FLOAT64"
DECIMAL: "DECIMAL"
STR: "STR"
DATETIME: "DATETIME"
ANYTYPE: "ANYTYPE"
BOOLEAN: "BOOLEAN"i
INTEGER: "INTEGER"i
FLOAT: "FLOAT"i
TEXT: "TEXT"i
NDARRAY: "NDARRAY"i
INT8: "INT8"i
UINT8: "UINT8"i
INT16: "INT16"i
INT32: "INT32"i
INT64: "INT64"i
UNICODE: "UNICODE"i
FLOAT32: "FLOAT32"i
FLOAT64: "FLOAT64"i
DECIMAL: "DECIMAL"i
STR: "STR"i
DATETIME: "DATETIME"i
ANYTYPE: "ANYTYPE"i

// Group function Keywords

AVG: "AVG"
COUNT: "COUNT"
MAX: "MAX"
MIN: "MIN"
STD: "STD"
SUM: "SUM"
FCOUNT: "FCOUNT"
FIRST: "FIRST"
LAST: "LAST"
SEGMENT: "SEGMENT"
AVG: "AVG"i
COUNT: "COUNT"i
MAX: "MAX"i
MIN: "MIN"i
STD: "STD"i
SUM: "SUM"i
FCOUNT: "FCOUNT"i
FIRST: "FIRST"i
LAST: "LAST"i
SEGMENT: "SEGMENT"i

// Keywords, but can be ID
// Common Keywords, but can be ID

AUTO_INCREMENT: "AUTO_INCREMENT"
HELP: "HELP"
TEMPTABLE: "TEMPTABLE"
VALUE: "VALUE"
AUTO_INCREMENT: "AUTO_INCREMENT"i
HELP: "HELP"i
TEMPTABLE: "TEMPTABLE"i
VALUE: "VALUE"i

// UDF
UDF: "UDF"
INPUT: "INPUT"
OUTPUT: "OUTPUT"
TYPE: "TYPE"
IMPL: "IMPL"
UDF: "UDF"i
INPUT: "INPUT"i
OUTPUT: "OUTPUT"i
TYPE: "TYPE"i
IMPL: "IMPL"i

// MATERIALIZED
MATERIALIZED: "MATERIALIZED"
VIEW: "VIEW"
MATERIALIZED: "MATERIALIZED"i
VIEW: "VIEW"i

// Common function names

ABS: "ABS"
ABS: "ABS"i

// Operators
// Operators. Assigns
Expand Down
2 changes: 1 addition & 1 deletion eva/parser/lark_visitor/_common_clauses_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def simple_id(self, tree):
def decimal_literal(self, tree):
decimal = None
token = tree.children[0]
if token == "ANYDIM":
if str.upper(token) == "ANYDIM":
decimal = Dimension.ANYDIM
else:
decimal = int(str(token))
Expand Down
36 changes: 18 additions & 18 deletions eva/parser/lark_visitor/_create_statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def simple_data_type(self, tree):
dimensions = []

token = tree.children[0]
if token == "BOOLEAN":
if str.upper(token) == "BOOLEAN":
data_type = ColumnType.BOOLEAN

return data_type, array_type, dimensions
Expand All @@ -139,7 +139,7 @@ def integer_data_type(self, tree):
dimensions = []

token = tree.children[0]
if token == "INTEGER":
if str.upper(token) == "INTEGER":
data_type = ColumnType.INTEGER

return data_type, array_type, dimensions
Expand All @@ -150,10 +150,10 @@ def dimension_data_type(self, tree):
dimensions = []

token = tree.children[0]
if token == "FLOAT":
if str.upper(token) == "FLOAT":
data_type = ColumnType.FLOAT
dimensions = self.visit(tree.children[1])
elif token == "TEXT":
elif str.upper(token) == "TEXT":
data_type = ColumnType.TEXT
dimensions = self.visit(tree.children[1])

Expand All @@ -180,31 +180,31 @@ def array_type(self, tree):
array_type = None

token = tree.children[0]
if token == "INT8":
if str.upper(token) == "INT8":
array_type = NdArrayType.INT8
elif token == "UINT8":
elif str.upper(token) == "UINT8":
array_type = NdArrayType.UINT8
elif token == "INT16":
elif str.upper(token) == "INT16":
array_type = NdArrayType.INT16
elif token == "INT32":
elif str.upper(token) == "INT32":
array_type = NdArrayType.INT32
elif token == "INT64":
elif str.upper(token) == "INT64":
array_type = NdArrayType.INT64
elif token == "UNICODE":
elif str.upper(token) == "UNICODE":
array_type = NdArrayType.UNICODE
elif token == "BOOLEAN":
elif str.upper(token) == "BOOLEAN":
array_type = NdArrayType.BOOL
elif token == "FLOAT32":
elif str.upper(token) == "FLOAT32":
array_type = NdArrayType.FLOAT32
elif token == "FLOAT64":
elif str.upper(token) == "FLOAT64":
array_type = NdArrayType.FLOAT64
elif token == "DECIMAL":
elif str.upper(token) == "DECIMAL":
array_type = NdArrayType.DECIMAL
elif token == "STR":
elif str.upper(token) == "STR":
array_type = NdArrayType.STR
elif token == "DATETIME":
elif str.upper(token) == "DATETIME":
array_type = NdArrayType.DATETIME
elif token == "ANYTYPE":
elif str.upper(token) == "ANYTYPE":
array_type = NdArrayType.ANYTYPE
return array_type

Expand Down Expand Up @@ -259,7 +259,7 @@ def index_type(self, tree):
index_type = None
token = tree.children[1]

if token == "HNSW":
if str.upper(token) == "HNSW":
index_type = IndexType.HNSW
return index_type

Expand Down
4 changes: 2 additions & 2 deletions eva/parser/lark_visitor/_select_statement.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ def sort_order(self, tree):
token = tree.children[0]
sort_order = None

if token == "ASC":
if str.upper(token) == "ASC":
sort_order = ParserOrderBySortType.ASC
elif token == "DESC":
elif str.upper(token) == "DESC":
sort_order = ParserOrderBySortType.DESC
return sort_order

Expand Down
5 changes: 2 additions & 3 deletions eva/parser/lark_visitor/_show_statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,9 @@
##################################################################
class Show:
def show_statement(self, tree):

token = tree.children[1]

if token == "UDFS":
if str.upper(token) == "UDFS":
return ShowStatement(show_type=ShowType.UDFS)
elif token == "TABLES":
elif str.upper(token) == "TABLES":
return ShowStatement(show_type=ShowType.TABLES)
Loading

0 comments on commit 30af85e

Please sign in to comment.