In [1]:
!pip3 install lark

Collecting lark
  Downloading lark-1.1.9-py3-none-any.whl (111 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m111.7/111.7 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: lark
Successfully installed lark-1.1.9


In [2]:
from lark import Lark, Transformer

In [7]:
grammar = """
    start: select_statement

    ?select_statement: "SELECT" columns "FROM" table ("WHERE" where_expression)?

    columns: "*"
           | column ("," column)*

    column: CNAME

    table: CNAME

    where_expression: expression

    ?expression: term
                | expression "AND" term
                | expression "OR" term

    term: factor
        | factor "BETWEEN" factor "AND" factor
        | factor "=" factor
        | factor "!=" factor
        | factor "<" factor
        | factor "<=" factor
        | factor ">" factor
        | factor ">=" factor

    factor: CNAME
          | NUMBER
          | STRING

    %import common.CNAME
    %import common.NUMBER
    %import common.ESCAPED_STRING -> STRING
    %ignore " "
"""

class MyTransformer(Transformer):
    def start(self, items):
        return items[0]

parser = Lark(grammar, parser='lalr', transformer=MyTransformer())

def parse_html(input_str):
    return parser.parse(input_str)

In [12]:
sql = "SELECT name, age FROM users WHERE age >= 18 AND age <= 30"
sql_parsed = parse_html(sql)
print(sql_parsed)

Tree(Token('RULE', 'select_statement'), [Tree(Token('RULE', 'columns'), [Tree(Token('RULE', 'column'), [Token('CNAME', 'name')]), Tree(Token('RULE', 'column'), [Token('CNAME', 'age')])]), Tree(Token('RULE', 'table'), [Token('CNAME', 'users')]), Tree(Token('RULE', 'where_expression'), [Tree(Token('RULE', 'expression'), [Tree(Token('RULE', 'term'), [Tree(Token('RULE', 'factor'), [Token('CNAME', 'age')]), Tree(Token('RULE', 'factor'), [Token('NUMBER', '18')])]), Tree(Token('RULE', 'term'), [Tree(Token('RULE', 'factor'), [Token('CNAME', 'age')]), Tree(Token('RULE', 'factor'), [Token('NUMBER', '30')])])])])])


In [13]:
sql_parser = Lark(grammar, start='start')

query = "SELECT name, age FROM users WHERE age >= 18 AND age <= 30"
parsed = sql_parser.parse(query)

print(parsed.pretty())

start
  select_statement
    columns
      column	name
      column	age
    table	users
    where_expression
      expression
        term
          factor	age
          factor	18
        term
          factor	age
          factor	30

