change singleton behavior

andialbrecht · Jan 2, 2023 · 907fb49 · 907fb49
1 parent fbf9a57
commit 907fb49
Show file tree

Hide file tree

Showing 4 changed files with 40 additions and 24 deletions.
diff --git a/docs/source/extending.rst b/docs/source/extending.rst
@@ -45,7 +45,7 @@ a keyword to the lexer:
     from sqlparse.lexer import Lexer
 
     # get the lexer singleton object to configure it
-    lex = Lexer()
+    lex = Lexer.get_default_instance()
 
     # Clear the default configurations.
     # After this call, reg-exps and keyword dictionaries need to be loaded

diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
@@ -7,6 +7,7 @@
 
 """SQL Lexer"""
 import re
+
 # This code is based on the SqlLexer in pygments.
 # http://pygments.org/
 # It's separated from the rest of pygments to increase performance
@@ -18,21 +19,39 @@
 from sqlparse.utils import consume
 
 
-class _LexerSingletonMetaclass(type):
-    _lexer_instance = None
-
-    def __call__(cls, *args, **kwargs):
-        if _LexerSingletonMetaclass._lexer_instance is None:
-            _LexerSingletonMetaclass._lexer_instance = super(
-                _LexerSingletonMetaclass, cls
-            ).__call__(*args, **kwargs)
-        return _LexerSingletonMetaclass._lexer_instance
-
-
-class Lexer(metaclass=_LexerSingletonMetaclass):
+class Lexer:
     """The Lexer supports configurable syntax.
     To add support for additional keywords, use the `add_keywords` method."""
 
+    _default_intance = None
+
+    # Development notes:
+    # - This class is prepared to be able to support additional SQL dialects
+    #   in the future by adding additional functions that take the place of
+    #   the function default_initialization()
+    # - The lexer class uses an explicit singleton behavior with the
+    #   instance-getter method get_default_instance(). This mechanism has
+    #   the advantage that the call signature of the entry-points to the
+    #   sqlparse library are not affected. Also, usage of sqlparse in third
+    #   party code does not need to be adapted. On the other hand, singleton
+    #   behavior is not thread safe, and the current implementation does not
+    #   easily allow for multiple SQL dialects to be parsed in the same
+    #   process. Such behavior can be supported in the future by passing a
+    #   suitably initialized lexer object as an additional parameter to the
+    #   entry-point functions (such as `parse`). Code will need to be written
+    #   to pass down and utilize such an object. The current implementation
+    #   is prepared to support this thread safe approach without the
+    #   default_instance part needing to change interface.
+
+    @classmethod
+    def get_default_instance(cls):
+        """Returns the lexer instance used internally
+        by the sqlparse core functions."""
+        if cls._default_intance is None:
+            cls._default_intance = cls()
+            cls._default_intance.default_initialization()
+        return cls._default_intance
+
     def default_initialization(self):
         """Initialize the lexer with default dictionaries.
         Useful if you need to revert custom syntax settings."""
@@ -45,13 +64,10 @@ def default_initialization(self):
         self.add_keywords(keywords.KEYWORDS_MSACCESS)
         self.add_keywords(keywords.KEYWORDS)
 
-    def __init__(self):
-        self.default_initialization()
-
     def clear(self):
         """Clear all syntax configurations.
         Useful if you want to load a reduced set of syntax configurations.
-        After this call, reg-exps and keyword dictionaries need to be loaded
+        After this call, regexps and keyword dictionaries need to be loaded
         to make the lexer functional again."""
         self._SQL_REGEX = []
         self._keywords = []
@@ -73,7 +89,7 @@ def is_keyword(self, value):
         """Checks for a keyword.
 
         If the given value is in one of the KEYWORDS_* dictionary
-        it's considered a keyword. Otherwise tokens.Name is returned.
+        it's considered a keyword. Otherwise, tokens.Name is returned.
         """
         val = value.upper()
         for kwdict in self._keywords:
@@ -136,4 +152,4 @@ def tokenize(sql, encoding=None):
     Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
     of ``(token type, value)`` items.
     """
-    return Lexer().get_tokens(sql, encoding)
+    return Lexer.get_default_instance().get_tokens(sql, encoding)
diff --git a/tests/test_keywords.py b/tests/test_keywords.py
@@ -9,5 +9,5 @@ class TestSQLREGEX:
                                         '1.', '-1.',
                                         '.1', '-.1'])
     def test_float_numbers(self, number):
-        ttype = next(tt for action, tt in Lexer()._SQL_REGEX if action(number))
+        ttype = next(tt for action, tt in Lexer.get_default_instance()._SQL_REGEX if action(number))
         assert tokens.Number.Float == ttype
diff --git a/tests/test_parse.py b/tests/test_parse.py
@@ -509,7 +509,7 @@ def test_configurable_keywords():
         (sqlparse.tokens.Punctuation, ";"),
     ]
 
-    Lexer().add_keywords(
+    Lexer.get_default_instance().add_keywords(
         {
             "BACON": sqlparse.tokens.Name.Builtin,
             "SPAM": sqlparse.tokens.Keyword,
@@ -520,7 +520,7 @@ def test_configurable_keywords():
     tokens = sqlparse.parse(sql)[0]
 
     # reset the syntax for later tests.
-    Lexer().default_initialization()
+    Lexer.get_default_instance().default_initialization()
 
     assert list(
         (t.ttype, t.value)
@@ -539,7 +539,7 @@ def test_configurable_keywords():
 
 
 def test_configurable_regex():
-    lex = Lexer()
+    lex = Lexer.get_default_instance()
     lex.clear()
 
     my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword)
@@ -559,7 +559,7 @@ def test_configurable_regex():
     tokens = sqlparse.parse("select * from foo zorder by bar;")[0]
 
     # reset the syntax for later tests.
-    Lexer().default_initialization()
+    Lexer.get_default_instance().default_initialization()
 
     assert list(
         (t.ttype, t.value)