Skip to content

Commit

Permalink
Move unportable parser from lexer into syntax module
Browse files Browse the repository at this point in the history
  • Loading branch information
Maxim Avanov committed Apr 1, 2014
1 parent b24dddf commit 1cd6cc0
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 28 deletions.
29 changes: 7 additions & 22 deletions plim/lexer.py
Expand Up @@ -6,7 +6,7 @@
import markdown2

from . import errors
from .util import StringIO, PY3K, MAXSIZE, joined, space_separated, as_unicode
from .util import StringIO, MAXSIZE, joined, space_separated, as_unicode
from .extensions import rst_to_html
from .extensions import coffee_to_js
from .extensions import scss_to_css
Expand Down Expand Up @@ -54,8 +54,8 @@
'(?P<value>(?:[-+]?[0-9]*\.[0-9]+|[-+]?[0-9]+%?))'
)

PARSE_TAG_TREE_RE = re.compile('(?:#|\.|{tag}).*'.format(tag=TAG_RULE))
PARSE_HANDLEBARS_RE = re.compile('(?:handlebars).*')


PARSE_STATEMENTS_RE = re.compile('-\s*(?P<stmnt>if|for|while|with|try)(?P<expr>.*)')
PARSE_FOREIGN_STATEMENTS_RE = re.compile('-\s*(?P<stmnt>unless|until)(?P<expr>.*)')
STATEMENT_CONVERT = {
Expand All @@ -73,33 +73,18 @@
PARSE_DEF_BLOCK_RE = re.compile('-\s*(?P<line>(?:def|block)(?:\s+.*)?)')
PARSE_MAKO_ONE_LINERS_RE = re.compile('-\s*(?P<line>(?:include|inherit|page|namespace)(?:\s+.*)?)')

if PY3K:
PARSE_IMPLICIT_LITERAL_RE = re.compile(
# Order matters
'(?P<line>(?:'
'\$?\{|\(|\[|&.+;|[0-9]+|'
'(?:'
'[^\u0021-\u007E]' # not ASCII 33 - 126
'|' # or
'[A-Z]' # uppercase latin letters (ASCII 65 - 90)
')' # It is possible because TAG_RE can match only lowercase tag names
').*)\s*'
)
else:
from .unportable import PARSE_IMPLICIT_LITERAL_RE


PARSE_RAW_HTML_RE = re.compile('\<.*')
PARSE_MAKO_TEXT_RE = re.compile('-\s*(?P<line>text(?:\s+.*)?)')
PARSE_CALL_RE = re.compile('-\s*(?P<line>call(?:\s+.*)?)')
PARSE_EARLY_RETURN_RE = re.compile('-\s*(?P<keyword>return|continue|break)\s*')

# This constant uses LITERAL_CONTENT_PREFIX and LITERAL_CONTENT_SPACE_PREFIX
PARSE_EXPLICIT_LITERAL_RE = re.compile("(?:\||,).*", re.IGNORECASE)


PARSE_VARIABLE_RE = re.compile("=(?P<prevent_escape>=)?(?P<explicit_space>,)?\s*(?P<line>.*)", re.IGNORECASE)
PARSE_COMMENT_RE = re.compile('/.*')
PARSE_DOCTYPE_RE = re.compile('doctype\s+(?P<type>[0-9a-z\.]+)', re.IGNORECASE)
PARSE_STYLE_SCRIPT_RE = re.compile('(?:style|script).*', re.IGNORECASE)

PARSE_EXTENSION_LANGUAGES_RE = re.compile('-\s*(?P<lang>md|markdown|rst|rest|coffee|scss|sass|stylus)\s*')

CSS_ID_SHORTCUT_TERMINATORS = (
Expand Down Expand Up @@ -836,7 +821,7 @@ def parse_doctype(indent_level, current_line, ___, source, syntax):
:type parsers: tuple
:return:
"""
match = PARSE_DOCTYPE_RE.match(current_line.strip())
match = syntax.PARSE_DOCTYPE_RE.match(current_line.strip())
doctype = match.group('type')
return DOCTYPES.get(doctype, DOCTYPES['5']), indent_level, '', source

Expand Down
41 changes: 35 additions & 6 deletions plim/syntax.py
@@ -1,4 +1,23 @@
import re

from . import lexer as l
from .util import PY3K


if PY3K:
PARSE_IMPLICIT_LITERAL_RE = re.compile(
# Order matters
'(?P<line>(?:'
'\$?\{|\(|\[|&.+;|[0-9]+|'
'(?:'
'[^\u0021-\u007E]' # not ASCII 33 - 126
'|' # or
'[A-Z]' # uppercase latin letters (ASCII 65 - 90)
')' # It is possible because TAG_RE can match only lowercase tag names
').*)\s*'
)
else:
from .unportable import PARSE_IMPLICIT_LITERAL_RE


class BaseSyntax(object):
Expand All @@ -10,13 +29,23 @@ class BaseSyntax(object):
STATEMENT_END_START_SEQUENCE = '%'
STATEMENT_END_END_SEQUENCE = ''

# Parsers
# ----------------------------------
PARSE_DOCTYPE_RE = re.compile('doctype\s+(?P<type>[0-9a-z\.]+)', re.IGNORECASE)
PARSE_STYLE_SCRIPT_RE = re.compile('(?:style|script).*', re.IGNORECASE)
PARSE_HANDLEBARS_RE = re.compile('(?:handlebars).*')
PARSE_TAG_TREE_RE = re.compile('(?:#|\.|{tag}).*'.format(tag=l.TAG_RULE))
# This constant uses l.LITERAL_CONTENT_PREFIX and l.LITERAL_CONTENT_SPACE_PREFIX
PARSE_EXPLICIT_LITERAL_RE = re.compile("(?:\||,).*", re.IGNORECASE)
PARSE_IMPLICIT_LITERAL_RE = PARSE_IMPLICIT_LITERAL_RE

STANDARD_PARSERS = ( # Order matters
(l.PARSE_STYLE_SCRIPT_RE, l.parse_style_script),
(l.PARSE_DOCTYPE_RE, l.parse_doctype),
(l.PARSE_HANDLEBARS_RE, l.parse_handlebars),
(l.PARSE_TAG_TREE_RE, l.parse_tag_tree),
(l.PARSE_EXPLICIT_LITERAL_RE, l.parse_explicit_literal_with_embedded_markup),
(l.PARSE_IMPLICIT_LITERAL_RE, l.parse_implicit_literal),
(PARSE_STYLE_SCRIPT_RE, l.parse_style_script),
(PARSE_DOCTYPE_RE, l.parse_doctype),
(PARSE_HANDLEBARS_RE, l.parse_handlebars),
(PARSE_TAG_TREE_RE, l.parse_tag_tree),
(PARSE_EXPLICIT_LITERAL_RE, l.parse_explicit_literal_with_embedded_markup),
(PARSE_IMPLICIT_LITERAL_RE, l.parse_implicit_literal),
(l.PARSE_RAW_HTML_RE, l.parse_raw_html),
(l.PARSE_VARIABLE_RE, l.parse_variable),
(l.PARSE_COMMENT_RE, l.parse_comment),
Expand Down

0 comments on commit 1cd6cc0

Please sign in to comment.