diff --git a/plim/lexer.py b/plim/lexer.py index f624685..9a36ee0 100644 --- a/plim/lexer.py +++ b/plim/lexer.py @@ -6,7 +6,7 @@ import markdown2 from . import errors -from .util import StringIO, PY3K, MAXSIZE, joined, space_separated, as_unicode +from .util import StringIO, MAXSIZE, joined, space_separated, as_unicode from .extensions import rst_to_html from .extensions import coffee_to_js from .extensions import scss_to_css @@ -54,8 +54,8 @@ '(?P(?:[-+]?[0-9]*\.[0-9]+|[-+]?[0-9]+%?))' ) -PARSE_TAG_TREE_RE = re.compile('(?:#|\.|{tag}).*'.format(tag=TAG_RULE)) -PARSE_HANDLEBARS_RE = re.compile('(?:handlebars).*') + + PARSE_STATEMENTS_RE = re.compile('-\s*(?Pif|for|while|with|try)(?P.*)') PARSE_FOREIGN_STATEMENTS_RE = re.compile('-\s*(?Punless|until)(?P.*)') STATEMENT_CONVERT = { @@ -73,33 +73,18 @@ PARSE_DEF_BLOCK_RE = re.compile('-\s*(?P(?:def|block)(?:\s+.*)?)') PARSE_MAKO_ONE_LINERS_RE = re.compile('-\s*(?P(?:include|inherit|page|namespace)(?:\s+.*)?)') -if PY3K: - PARSE_IMPLICIT_LITERAL_RE = re.compile( - # Order matters - '(?P(?:' - '\$?\{|\(|\[|&.+;|[0-9]+|' - '(?:' - '[^\u0021-\u007E]' # not ASCII 33 - 126 - '|' # or - '[A-Z]' # uppercase latin letters (ASCII 65 - 90) - ')' # It is possible because TAG_RE can match only lowercase tag names - ').*)\s*' - ) -else: - from .unportable import PARSE_IMPLICIT_LITERAL_RE + PARSE_RAW_HTML_RE = re.compile('\<.*') PARSE_MAKO_TEXT_RE = re.compile('-\s*(?Ptext(?:\s+.*)?)') PARSE_CALL_RE = re.compile('-\s*(?Pcall(?:\s+.*)?)') PARSE_EARLY_RETURN_RE = re.compile('-\s*(?Preturn|continue|break)\s*') -# This constant uses LITERAL_CONTENT_PREFIX and LITERAL_CONTENT_SPACE_PREFIX -PARSE_EXPLICIT_LITERAL_RE = re.compile("(?:\||,).*", re.IGNORECASE) + PARSE_VARIABLE_RE = re.compile("=(?P=)?(?P,)?\s*(?P.*)", re.IGNORECASE) PARSE_COMMENT_RE = re.compile('/.*') -PARSE_DOCTYPE_RE = re.compile('doctype\s+(?P[0-9a-z\.]+)', re.IGNORECASE) -PARSE_STYLE_SCRIPT_RE = re.compile('(?:style|script).*', re.IGNORECASE) + PARSE_EXTENSION_LANGUAGES_RE = re.compile('-\s*(?Pmd|markdown|rst|rest|coffee|scss|sass|stylus)\s*') CSS_ID_SHORTCUT_TERMINATORS = ( @@ -836,7 +821,7 @@ def parse_doctype(indent_level, current_line, ___, source, syntax): :type parsers: tuple :return: """ - match = PARSE_DOCTYPE_RE.match(current_line.strip()) + match = syntax.PARSE_DOCTYPE_RE.match(current_line.strip()) doctype = match.group('type') return DOCTYPES.get(doctype, DOCTYPES['5']), indent_level, '', source diff --git a/plim/syntax.py b/plim/syntax.py index 24e0809..683a627 100644 --- a/plim/syntax.py +++ b/plim/syntax.py @@ -1,4 +1,23 @@ +import re + from . import lexer as l +from .util import PY3K + + +if PY3K: + PARSE_IMPLICIT_LITERAL_RE = re.compile( + # Order matters + '(?P(?:' + '\$?\{|\(|\[|&.+;|[0-9]+|' + '(?:' + '[^\u0021-\u007E]' # not ASCII 33 - 126 + '|' # or + '[A-Z]' # uppercase latin letters (ASCII 65 - 90) + ')' # It is possible because TAG_RE can match only lowercase tag names + ').*)\s*' + ) +else: + from .unportable import PARSE_IMPLICIT_LITERAL_RE class BaseSyntax(object): @@ -10,13 +29,23 @@ class BaseSyntax(object): STATEMENT_END_START_SEQUENCE = '%' STATEMENT_END_END_SEQUENCE = '' + # Parsers + # ---------------------------------- + PARSE_DOCTYPE_RE = re.compile('doctype\s+(?P[0-9a-z\.]+)', re.IGNORECASE) + PARSE_STYLE_SCRIPT_RE = re.compile('(?:style|script).*', re.IGNORECASE) + PARSE_HANDLEBARS_RE = re.compile('(?:handlebars).*') + PARSE_TAG_TREE_RE = re.compile('(?:#|\.|{tag}).*'.format(tag=l.TAG_RULE)) + # This constant uses l.LITERAL_CONTENT_PREFIX and l.LITERAL_CONTENT_SPACE_PREFIX + PARSE_EXPLICIT_LITERAL_RE = re.compile("(?:\||,).*", re.IGNORECASE) + PARSE_IMPLICIT_LITERAL_RE = PARSE_IMPLICIT_LITERAL_RE + STANDARD_PARSERS = ( # Order matters - (l.PARSE_STYLE_SCRIPT_RE, l.parse_style_script), - (l.PARSE_DOCTYPE_RE, l.parse_doctype), - (l.PARSE_HANDLEBARS_RE, l.parse_handlebars), - (l.PARSE_TAG_TREE_RE, l.parse_tag_tree), - (l.PARSE_EXPLICIT_LITERAL_RE, l.parse_explicit_literal_with_embedded_markup), - (l.PARSE_IMPLICIT_LITERAL_RE, l.parse_implicit_literal), + (PARSE_STYLE_SCRIPT_RE, l.parse_style_script), + (PARSE_DOCTYPE_RE, l.parse_doctype), + (PARSE_HANDLEBARS_RE, l.parse_handlebars), + (PARSE_TAG_TREE_RE, l.parse_tag_tree), + (PARSE_EXPLICIT_LITERAL_RE, l.parse_explicit_literal_with_embedded_markup), + (PARSE_IMPLICIT_LITERAL_RE, l.parse_implicit_literal), (l.PARSE_RAW_HTML_RE, l.parse_raw_html), (l.PARSE_VARIABLE_RE, l.parse_variable), (l.PARSE_COMMENT_RE, l.parse_comment),