Skip to content

Commit

Permalink
Merge pull request #243 from vmuriart/refactor_lexer.py
Browse files Browse the repository at this point in the history
Refactor lexer.py
  • Loading branch information
andialbrecht committed Jun 3, 2016
2 parents 517242a + 8240d96 commit 88a9daf
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 342 deletions.
18 changes: 1 addition & 17 deletions sqlparse/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,10 @@
PY3 = sys.version_info[0] == 3

if PY3:
def u(s):
def u(s, encoding=None):
return str(s)


range = range
text_type = str
string_types = (str,)
from io import StringIO
Expand All @@ -33,21 +32,6 @@ def u(s, encoding=None):
return unicode(s, encoding)


range = xrange
text_type = unicode
string_types = (basestring,)
from StringIO import StringIO


# Directly copied from six:
def with_metaclass(meta, *bases):
"""Create a base class with a metaclass."""

# This requires a bit of explanation: the basic idea is to make a dummy
# metaclass for one level of class instantiation that replaces itself with
# the actual metaclass.
class metaclass(meta):
def __new__(cls, name, this_bases, d):
return meta(name, bases, d)

return type.__new__(metaclass, 'temporary_class', (), {})
67 changes: 67 additions & 0 deletions sqlparse/keywords.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,72 @@
# -*- coding: utf-8 -*-

from sqlparse import tokens


def is_keyword(value):
val = value.upper()
return (KEYWORDS_COMMON.get(val) or KEYWORDS.get(val, tokens.Name)), value


SQL_REGEX = {
'root': [
(r'(--|# ).*?(\r\n|\r|\n)', tokens.Comment.Single),
# $ matches *before* newline, therefore we have two patterns
# to match Comment.Single
(r'(--|# ).*?$', tokens.Comment.Single),
(r'(\r\n|\r|\n)', tokens.Newline),
(r'\s+', tokens.Whitespace),
(r'/\*', tokens.Comment.Multiline, 'multiline-comments'),
(r':=', tokens.Assignment),
(r'::', tokens.Punctuation),
(r'[*]', tokens.Wildcard),
(r'CASE\b', tokens.Keyword), # extended CASE(foo)
(r"`(``|[^`])*`", tokens.Name),
(r"´(´´|[^´])*´", tokens.Name),
(r'\$([^\W\d]\w*)?\$', tokens.Name.Builtin),
(r'\?{1}', tokens.Name.Placeholder),
(r'%\(\w+\)s', tokens.Name.Placeholder),
(r'%s', tokens.Name.Placeholder),
(r'[$:?]\w+', tokens.Name.Placeholder),
# FIXME(andi): VALUES shouldn't be listed here
# see https://github.com/andialbrecht/sqlparse/pull/64
(r'VALUES', tokens.Keyword),
(r'(@|##|#)[^\W\d_]\w+', tokens.Name),
# IN is special, it may be followed by a parenthesis, but
# is never a functino, see issue183
(r'in\b(?=[ (])?', tokens.Keyword),
(r'USING(?=\()', tokens.Keyword),
(r'[^\W\d_]\w*(?=[.(])', tokens.Name), # see issue39
(r'[-]?0x[0-9a-fA-F]+', tokens.Number.Hexadecimal),
(r'[-]?[0-9]*(\.[0-9]+)?[eE][-]?[0-9]+', tokens.Number.Float),
(r'[-]?[0-9]*\.[0-9]+', tokens.Number.Float),
(r'[-]?[0-9]+', tokens.Number.Integer),
(r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single),
# not a real string literal in ANSI SQL:
(r'(""|".*?[^\\]")', tokens.String.Symbol),
# sqlite names can be escaped with [square brackets]. left bracket
# cannot be preceded by word character or a right bracket --
# otherwise it's probably an array index
(r'(?<![\w\])])(\[[^\]]+\])', tokens.Name),
(r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?'
r'|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword),
(r'END(\s+IF|\s+LOOP|\s+WHILE)?\b', tokens.Keyword),
(r'NOT NULL\b', tokens.Keyword),
(r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL),
(r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin),
(r'(?<=\.)[^\W\d_]\w*', tokens.Name),
(r'[^\W\d]\w*', is_keyword),
(r'[;:()\[\],\.]', tokens.Punctuation),
(r'[<>=~!]+', tokens.Operator.Comparison),
(r'[+/@#%^&|`?^-]+', tokens.Operator),
],
'multiline-comments': [
(r'/\*', tokens.Comment.Multiline, 'multiline-comments'),
(r'\*/', tokens.Comment.Multiline, '#pop'),
(r'[^/\*]+', tokens.Comment.Multiline),
(r'[/*]', tokens.Comment.Multiline),
]}

KEYWORDS = {
'ABORT': tokens.Keyword,
'ABS': tokens.Keyword,
Expand Down

0 comments on commit 88a9daf

Please sign in to comment.