Skip to content

Commit

Permalink
Revert "Remove filename pattern caches (pygments#2153)"
Browse files Browse the repository at this point in the history
This introduced a performance regession. While it is true that
fnmatch already uses functools.lru_cache, that cache is limited to 256
on python 3.10 and older and we have over 1000 matching patterns, which
means the cache is evicted entirely on every iteration.

This reverts commit 951c894.
  • Loading branch information
dirkmueller committed Jan 31, 2023
1 parent d4403f0 commit 0a9a56b
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 8 deletions.
17 changes: 14 additions & 3 deletions pygments/formatters/__init__.py
Expand Up @@ -8,9 +8,10 @@
:license: BSD, see LICENSE for details.
"""

import re
import sys
import types
from fnmatch import fnmatch
import fnmatch
from os.path import basename

from pygments.formatters._mapping import FORMATTERS
Expand All @@ -21,6 +22,16 @@
'get_all_formatters', 'load_formatter_from_file'] + list(FORMATTERS)

_formatter_cache = {} # classes by name
_pattern_cache = {}


def _fn_matches(fn, glob):
"""Return whether the supplied file name fn matches pattern filename."""
if glob not in _pattern_cache:
pattern = _pattern_cache[glob] = re.compile(fnmatch.translate(glob))
return pattern.match(fn)
return _pattern_cache[glob].match(fn)


def _load_formatters(module_name):
"""Load a formatter (and all others in the module too)."""
Expand Down Expand Up @@ -111,13 +122,13 @@ def get_formatter_for_filename(fn, **options):
fn = basename(fn)
for modname, name, _, filenames, _ in FORMATTERS.values():
for filename in filenames:
if fnmatch(fn, filename):
if _fn_matches(fn, filename):
if name not in _formatter_cache:
_load_formatters(modname)
return _formatter_cache[name](**options)
for cls in find_plugin_formatters():
for filename in cls.filenames:
if fnmatch(fn, filename):
if _fn_matches(fn, filename):
return cls(**options)
raise ClassNotFound("no formatter found for file name %r" % fn)

Expand Down
21 changes: 16 additions & 5 deletions pygments/lexers/__init__.py
Expand Up @@ -8,9 +8,10 @@
:license: BSD, see LICENSE for details.
"""

import re
import sys
import types
from fnmatch import fnmatch
import fnmatch
from os.path import basename

from pygments.lexers._mapping import LEXERS
Expand All @@ -27,6 +28,16 @@
'guess_lexer', 'load_lexer_from_file'] + list(LEXERS) + list(COMPAT)

_lexer_cache = {}
_pattern_cache = {}


def _fn_matches(fn, glob):
"""Return whether the supplied file name fn matches pattern filename."""
if glob not in _pattern_cache:
pattern = _pattern_cache[glob] = re.compile(fnmatch.translate(glob))
return pattern.match(fn)
return _pattern_cache[glob].match(fn)


def _load_lexers(module_name):
"""Load a lexer (and all others in the module too)."""
Expand Down Expand Up @@ -158,13 +169,13 @@ def find_lexer_class_for_filename(_fn, code=None):
fn = basename(_fn)
for modname, name, _, filenames, _ in LEXERS.values():
for filename in filenames:
if fnmatch(fn, filename):
if _fn_matches(fn, filename):
if name not in _lexer_cache:
_load_lexers(modname)
matches.append((_lexer_cache[name], filename))
for cls in find_plugin_lexers():
for filename in cls.filenames:
if fnmatch(fn, filename):
if _fn_matches(fn, filename):
matches.append((cls, filename))

if isinstance(code, bytes):
Expand Down Expand Up @@ -251,11 +262,11 @@ def guess_lexer_for_filename(_fn, _text, **options):
matching_lexers = set()
for lexer in _iter_lexerclasses():
for filename in lexer.filenames:
if fnmatch(fn, filename):
if _fn_matches(fn, filename):
matching_lexers.add(lexer)
primary[lexer] = True
for filename in lexer.alias_filenames:
if fnmatch(fn, filename):
if _fn_matches(fn, filename):
matching_lexers.add(lexer)
primary[lexer] = False
if not matching_lexers:
Expand Down

0 comments on commit 0a9a56b

Please sign in to comment.