Skip to content

Commit

Permalink
Guess lexer based on filename for plaintext uploads
Browse files Browse the repository at this point in the history
  • Loading branch information
chriskuehl committed Apr 23, 2019
1 parent a3b1709 commit 023e9a3
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 28 deletions.
32 changes: 22 additions & 10 deletions fluffy/component/highlighting.py
Expand Up @@ -133,11 +133,11 @@ def strip_diff_things(text):
return s


def get_highlighter(text, language):
def get_highlighter(text, language, filename):
if language in {None, 'autodetect'} and looks_like_ansi_color(text):
language = 'ansi-color'

lexer = guess_lexer(text, language)
lexer = guess_lexer(text, language, filename)

diff_requested = (language or '').startswith('diff-')

Expand All @@ -158,30 +158,42 @@ def get_highlighter(text, language):
# ourselves a bit.
if diff_requested or lexer.name.lower() == 'diff' or looks_like_diff(text):
return DiffHighlighter(
guess_lexer(strip_diff_things(text), requested_diff_language),
guess_lexer(strip_diff_things(text), requested_diff_language, filename),
)

return PygmentsHighlighter(lexer)


def guess_lexer(text, language, opts=None):
def guess_lexer(text, language, filename, opts=None):
lexer_opts = {'stripnl': False}
if opts:
lexer_opts = dict(lexer_opts, **opts)

# First, look for an exact lexer match name.
try:
return pygments.lexers.get_lexer_by_name(language, **lexer_opts)
except pygments.util.ClassNotFound:
pass

# If that didn't work, if given a file name, try finding a lexer using that.
if filename is not None:
try:
lexer = pygments.lexers.guess_lexer(text, **lexer_opts)
# Newer versions of Pygments will virtually always fall back to
# TextLexer due to its 0.01 priority (which is what it returns on
# analyzing any text).
if not isinstance(lexer, pygments.lexers.TextLexer):
return lexer
return pygments.lexers.guess_lexer_for_filename(filename, text, **lexer_opts)
except pygments.util.ClassNotFound:
pass

# Finally, try to guess by looking at the file content.
try:
lexer = pygments.lexers.guess_lexer(text, **lexer_opts)

# Newer versions of Pygments will virtually always fall back to
# TextLexer due to its 0.01 priority (which is what it returns on
# analyzing any text).
if not isinstance(lexer, pygments.lexers.TextLexer):
return lexer
except pygments.util.ClassNotFound:
pass

# Default to Python, it highlights most things reasonably.
return pygments.lexers.get_lexer_by_name('python', **lexer_opts)

Expand Down
2 changes: 1 addition & 1 deletion fluffy/component/markdown.py
Expand Up @@ -38,7 +38,7 @@ class CodeRendererMixin:

def block_code(self, code, lang):
return PygmentsHighlighter(
guess_lexer(code, lang, opts={'stripnl': True}),
guess_lexer(code, lang, None, opts={'stripnl': True}),
).highlight(code)


Expand Down
6 changes: 3 additions & 3 deletions fluffy/run.py
Expand Up @@ -21,7 +21,7 @@ def view_paste():
return render_template(
'paste.html',
text=(TESTING_DIR / 'files' / 'code.py').open().read(),
highlighter=get_highlighter('', 'python'),
highlighter=get_highlighter('', 'python', None),
edit_url='#edit',
raw_url='#raw',
styles=STYLES_BY_CATEGORY,
Expand All @@ -33,7 +33,7 @@ def view_diff():
return render_template(
'paste.html',
text=text,
highlighter=get_highlighter(text, None),
highlighter=get_highlighter(text, None, None),
edit_url='#edit',
raw_url='#raw',
styles=STYLES_BY_CATEGORY,
Expand All @@ -45,7 +45,7 @@ def view_ansi_color():
return render_template(
'paste.html',
text=text,
highlighter=get_highlighter(text, None),
highlighter=get_highlighter(text, None, None),
edit_url='#edit',
raw_url='#raw',
styles=STYLES_BY_CATEGORY,
Expand Down
4 changes: 2 additions & 2 deletions fluffy/views.py
Expand Up @@ -73,7 +73,7 @@ def upload():
pb = ctx.enter_context(HtmlToStore.from_html(render_template(
'paste.html',
text=text,
highlighter=get_highlighter(text, None),
highlighter=get_highlighter(text, None, uf.human_name),
raw_url=app.config['FILE_URL'].format(name=uf.name),
styles=STYLES_BY_CATEGORY,
)))
Expand Down Expand Up @@ -142,7 +142,7 @@ def paste():
# HTML view (Markdown or paste)
lang = request.form['language']
if lang != 'rendered-markdown':
highlighter = get_highlighter(text, lang)
highlighter = get_highlighter(text, lang, None)
lang_title = highlighter.name
paste_obj = ctx.enter_context(HtmlToStore.from_html(render_template(
'paste.html',
Expand Down
32 changes: 20 additions & 12 deletions tests/unit/component/highlighting_test.py
Expand Up @@ -61,17 +61,24 @@ def test_ui_language_exists(language):
assert pygments.lexers.get_lexer_by_name('python') is not None


def test_guess_lexer_uses_valid_lang():
assert guess_lexer(EXAMPLE_C, 'ruby').name == 'Ruby'
def test_guess_lexer_precedence():
# Prefers exact lexer name match
assert guess_lexer(EXAMPLE_C, 'ruby', 'my-thing.css').name == 'Ruby'

# Otherwise uses filename detection
assert guess_lexer(EXAMPLE_C, 'not-a-lexer', 'my-thing.css').name == 'CSS'

# Finally uses text detection
assert guess_lexer(EXAMPLE_C, 'not-a-lexer', 'not-a-filename-that-matches').name == 'C'


@pytest.mark.parametrize('invalid_lang', ['herpderp', '', None, 'autodetect'])
def test_guess_lexer_autodetects_with_invalid_lang(invalid_lang):
assert guess_lexer(EXAMPLE_C, invalid_lang).name == 'C'
assert guess_lexer(EXAMPLE_C, invalid_lang, None).name == 'C'


def test_guess_lexer_falls_back_to_python():
assert guess_lexer('what language even is this', None).name == 'Python'
assert guess_lexer('what language even is this', None, None).name == 'Python'


@pytest.mark.parametrize(
Expand Down Expand Up @@ -110,15 +117,16 @@ def guess_lexer(text, language):


@pytest.mark.parametrize(
('text', 'language', 'expected'), (
(EXAMPLE_C, 'c', pygments.lexers.get_lexer_by_name('c')),
(EXAMPLE_C, 'does not exist', pygments.lexers.get_lexer_by_name('c')),
(EXAMPLE_C, None, pygments.lexers.get_lexer_by_name('c')),
(EXAMPLE_DIFF, 'c', pygments.lexers.get_lexer_by_name('c')),
('text', 'language', 'filename', 'expected'), (
(EXAMPLE_C, 'c', None, pygments.lexers.get_lexer_by_name('c')),
(EXAMPLE_C, 'does not exist', None, pygments.lexers.get_lexer_by_name('c')),
(EXAMPLE_C, None, None, pygments.lexers.get_lexer_by_name('c')),
(EXAMPLE_DIFF, 'c', None, pygments.lexers.get_lexer_by_name('c')),
(EXAMPLE_C, None, 'my_file.rs', pygments.lexers.get_lexer_by_name('rust')),
),
)
def test_get_highlighter_pygments(text, language, expected):
h = get_highlighter(text, language)
def test_get_highlighter_pygments(text, language, filename, expected):
h = get_highlighter(text, language, filename)
assert isinstance(h, PygmentsHighlighter)
assert type(h.lexer) is type(expected)

Expand All @@ -136,6 +144,6 @@ def test_get_highlighter_pygments(text, language, expected):
),
)
def test_get_highlighter_diff(text, language, expected):
h = get_highlighter(text, language)
h = get_highlighter(text, language, None)
assert isinstance(h, DiffHighlighter)
assert type(h.lexer) is type(expected)

0 comments on commit 023e9a3

Please sign in to comment.