Skip to content

Commit

Permalink
change singleton behavior
Browse files Browse the repository at this point in the history
  • Loading branch information
mrmasterplan authored and andialbrecht committed Jan 2, 2023
1 parent fbf9a57 commit 907fb49
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 24 deletions.
2 changes: 1 addition & 1 deletion docs/source/extending.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ a keyword to the lexer:
from sqlparse.lexer import Lexer
# get the lexer singleton object to configure it
lex = Lexer()
lex = Lexer.get_default_instance()
# Clear the default configurations.
# After this call, reg-exps and keyword dictionaries need to be loaded
Expand Down
52 changes: 34 additions & 18 deletions sqlparse/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

"""SQL Lexer"""
import re

# This code is based on the SqlLexer in pygments.
# http://pygments.org/
# It's separated from the rest of pygments to increase performance
Expand All @@ -18,21 +19,39 @@
from sqlparse.utils import consume


class _LexerSingletonMetaclass(type):
_lexer_instance = None

def __call__(cls, *args, **kwargs):
if _LexerSingletonMetaclass._lexer_instance is None:
_LexerSingletonMetaclass._lexer_instance = super(
_LexerSingletonMetaclass, cls
).__call__(*args, **kwargs)
return _LexerSingletonMetaclass._lexer_instance


class Lexer(metaclass=_LexerSingletonMetaclass):
class Lexer:
"""The Lexer supports configurable syntax.
To add support for additional keywords, use the `add_keywords` method."""

_default_intance = None

# Development notes:
# - This class is prepared to be able to support additional SQL dialects
# in the future by adding additional functions that take the place of
# the function default_initialization()
# - The lexer class uses an explicit singleton behavior with the
# instance-getter method get_default_instance(). This mechanism has
# the advantage that the call signature of the entry-points to the
# sqlparse library are not affected. Also, usage of sqlparse in third
# party code does not need to be adapted. On the other hand, singleton
# behavior is not thread safe, and the current implementation does not
# easily allow for multiple SQL dialects to be parsed in the same
# process. Such behavior can be supported in the future by passing a
# suitably initialized lexer object as an additional parameter to the
# entry-point functions (such as `parse`). Code will need to be written
# to pass down and utilize such an object. The current implementation
# is prepared to support this thread safe approach without the
# default_instance part needing to change interface.

@classmethod
def get_default_instance(cls):
"""Returns the lexer instance used internally
by the sqlparse core functions."""
if cls._default_intance is None:
cls._default_intance = cls()
cls._default_intance.default_initialization()
return cls._default_intance

def default_initialization(self):
"""Initialize the lexer with default dictionaries.
Useful if you need to revert custom syntax settings."""
Expand All @@ -45,13 +64,10 @@ def default_initialization(self):
self.add_keywords(keywords.KEYWORDS_MSACCESS)
self.add_keywords(keywords.KEYWORDS)

def __init__(self):
self.default_initialization()

def clear(self):
"""Clear all syntax configurations.
Useful if you want to load a reduced set of syntax configurations.
After this call, reg-exps and keyword dictionaries need to be loaded
After this call, regexps and keyword dictionaries need to be loaded
to make the lexer functional again."""
self._SQL_REGEX = []
self._keywords = []
Expand All @@ -73,7 +89,7 @@ def is_keyword(self, value):
"""Checks for a keyword.
If the given value is in one of the KEYWORDS_* dictionary
it's considered a keyword. Otherwise tokens.Name is returned.
it's considered a keyword. Otherwise, tokens.Name is returned.
"""
val = value.upper()
for kwdict in self._keywords:
Expand Down Expand Up @@ -136,4 +152,4 @@ def tokenize(sql, encoding=None):
Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
of ``(token type, value)`` items.
"""
return Lexer().get_tokens(sql, encoding)
return Lexer.get_default_instance().get_tokens(sql, encoding)
2 changes: 1 addition & 1 deletion tests/test_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ class TestSQLREGEX:
'1.', '-1.',
'.1', '-.1'])
def test_float_numbers(self, number):
ttype = next(tt for action, tt in Lexer()._SQL_REGEX if action(number))
ttype = next(tt for action, tt in Lexer.get_default_instance()._SQL_REGEX if action(number))
assert tokens.Number.Float == ttype
8 changes: 4 additions & 4 deletions tests/test_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ def test_configurable_keywords():
(sqlparse.tokens.Punctuation, ";"),
]

Lexer().add_keywords(
Lexer.get_default_instance().add_keywords(
{
"BACON": sqlparse.tokens.Name.Builtin,
"SPAM": sqlparse.tokens.Keyword,
Expand All @@ -520,7 +520,7 @@ def test_configurable_keywords():
tokens = sqlparse.parse(sql)[0]

# reset the syntax for later tests.
Lexer().default_initialization()
Lexer.get_default_instance().default_initialization()

assert list(
(t.ttype, t.value)
Expand All @@ -539,7 +539,7 @@ def test_configurable_keywords():


def test_configurable_regex():
lex = Lexer()
lex = Lexer.get_default_instance()
lex.clear()

my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword)
Expand All @@ -559,7 +559,7 @@ def test_configurable_regex():
tokens = sqlparse.parse("select * from foo zorder by bar;")[0]

# reset the syntax for later tests.
Lexer().default_initialization()
Lexer.get_default_instance().default_initialization()

assert list(
(t.ttype, t.value)
Expand Down

0 comments on commit 907fb49

Please sign in to comment.