Add comments to the UL4 lexers.

Fix list of UL4 operators in lexer. Fix list for keywords for `<?end?>` tag.
doerwalter · Jan 20, 2022 · 6ff106f · 6ff106f
1 parent 5b10f40
commit 6ff106f
Showing 1 changed file with 57 additions and 11 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -422,7 +422,7 @@ class HTML5Translator(html5.HTML5Translator):
 
 class UL4Lexer(lexer.RegexLexer):
 	"""
-	Generic lexer for UL4 (the Universal LivingLogic Layout Language).
+	Generic lexer for UL4.
 	"""
 
 	flags = re.MULTILINE | re.DOTALL | re.UNICODE
@@ -434,109 +434,155 @@ class UL4Lexer(lexer.RegexLexer):
 	tokens = {
 		"root": [
 			(
+				# Template header without name:
+				# ``<?ul4?>``
 				r"(<\?)(\s*)(ul4)(\s*)(\?>)",
 				lexer.bygroups(token.Comment.Preproc, token.Text, token.Keyword, token.Text, token.Comment.Preproc),
 			),
 			(
+				# Template header with name (potentially followed by the signature):
+				# ``<?ul4 foo(bar=42)?>``
 				r"(<\?)(\s*)(ul4)(\s*)([a-zA-Z_][a-zA-Z_0-9]*)?\b",
 				lexer.bygroups(token.Comment.Preproc, token.Text, token.Keyword, token.Text, token.Token.Name.Function),
-				"ul4",
+				"ul4", # Switch to "expression" mode
 			),
 			(
+				# Comment:
+				# ``<?note foobar?>``
 				r"<\?\s*note\s+.*?\?>",
 				token.Comment,
 			),
 			(
+				# Template documentation:
+				# ``<?doc foobar?>``
 				r"<\?\s*doc\s+.*?\?>",
 				token.String.Doc,
 			),
 			(
+				# ``<?ignore?>`` tag for commenting out code:
+				# ``<?ignore?>...<?end ignore?>``
 				r"<\?\s*ignore\s*\?>",
 				token.Comment,
-				"ignore",
+				"ignore", # Switch to "ignore" mode
 			),
 			(
+				# ``<?def?>`` tag for defining local templates
+				# ``<?def foo(bar=42)?>...<?end def?>``
 				r"(<\?\s*)(def)(\s*)([a-zA-Z_][a-zA-Z_0-9]*)?\b",
 				lexer.bygroups(token.Comment.Preproc, token.Keyword, token.Text, token.Token.Name.Function),
-				"ul4",
+				"ul4", # Switch to "expression" mode
 			),
 			(
+				# The rest of the supported tags
 				r"(<\?)(\s*)(printx|print|for|if|elif|else|while|code|renderblocks?|render)\b",
 				lexer.bygroups(token.Comment.Preproc, token.Text, token.Keyword),
-				"ul4",
+				"ul4", # Switch to "expression" mode
 			),
 			(
+				# ``<?end?>`` tag for ending ``<?def?>``, ``<?for?>``,
+				# ``<?if?>``, ``<?while?>``, ``<?renderblock?>`` and
+				# ``<?renderblocks?>`` blocks.
 				r"(<\?)(\s*)(end)\b",
 				lexer.bygroups(token.Comment.Preproc, token.Text, token.Keyword),
-				"end",
+				"end", # Switch to "end tag" mode
 			),
 			(
+				# ``<?whitespace?>`` tag for configuring whitespace handlng
 				r"(<\?)(\s*)(whitespace)\b",
 				lexer.bygroups(token.Comment.Preproc, token.Text, token.Keyword),
-				"whitespace",
+				"whitespace", # Switch to "whitespace" mode
 			),
+			# Plain text
 			(r"[^<]+", token.Token.Other),
 			(r"<", token.Token.Other),
 		],
+		# Ignore mode ignores everything upto the matching ``<?end ignore?>`` tag
 		"ignore": [
+			# Nested ``<?ignore?>`` tag
 			(r"<\?\s*ignore\s*\?>", token.Comment, "ignore"),
+			# ``<?end ignore?>`` tag
 			(r"<\?\s*end\s+ignore\s*\?>", token.Comment, "#pop"),
+			# Everything else
 			(r".+", token.Comment),
 		],
+		# UL4 expressions
 		"ul4": [
+			# End the tag
 			(r"\?>", token.Comment.Preproc, "#pop"),
+			# Start triple quoted string constant
 			("'''", token.String, "string13"),
 			('""""', token.String, "string23"),
+			# Start single quoted string constant
 			("'", token.String, "string1"),
 			('"', token.String, "string2"),
+			# Floating point number
 			(r"\d+\.\d*([eE][+-]?\d+)?", token.Number.Float),
 			(r"\.\d+([eE][+-]?\d+)?", token.Number.Float),
 			(r"\d+[eE][+-]?\d+", token.Number.Float),
+			# Binary integer: ``0b101010``
 			(r"0[bB][01]+", token.Number.Bin),
+			# Octal integer: ``0o52``
 			(r"0[oO][0-7]+", token.Number.Oct),
+			# Hexadecimal integer: ``0x2a``
 			(r"0[xX][0-9a-fA-F]+", token.Number.Hex),
+			# Date or datetime: ``@(2000-02-29)``/``@(2000-02-29T12:34:56.987654)``
 			(r"@\(\d\d\d\d-\d\d-\d\d(T(\d\d:\d\d(:\d\d(\.\d{6})?)?)?)?\)", token.Literal.Date),
+			# Color: ``#fff``, ``#fff8f0`` etc.
 			(r"#[0-9a-fA-F]{8}", token.Literal.Color),
 			(r"#[0-9a-fA-F]{6}", token.Literal.Color),
 			(r"#[0-9a-fA-F]{3,4}", token.Literal.Color),
+			# Decimal integer: ``42``
 			(r"\d+", token.Number.Integer),
-			(r"//|==|=|>=|<=|<<|>>|\+=|-=|\*=|/=|//=|<<=|>>=|&=|\|=|^=|[\[\]{},:*/().~%&|<>^+-]", token.Token.Operator),
+			# Operators
+			(r"//|==|!=|>=|<=|<<|>>|\+=|-=|\*=|/=|//=|<<=|>>=|&=|\|=|^=|=|[\[\]{},:*/().~%&|<>^+-]", token.Token.Operator),
+			# Keywords
 			(lexer.words(("for", "in", "if", "else", "not", "is", "and", "or"), suffix=r"\b"), token.Keyword),
+			# Builtin constants
 			(lexer.words(("None", "False", "True"), suffix=r"\b"), token.Keyword.Constant),
+			# Variable names
 			(r"[a-zA-Z_][a-zA-Z0-9_]*", token.Name),
+			# Whitespace
 			(r"\s+", token.Text),
 		],
+		# ``<?end ...?>`` tag for closing the last open block
 		"end": [
 			(r"\?>", token.Comment.Preproc, "#pop"),
-			(lexer.words(("for", "if", "def", "renderblock", "renderblocks"), suffix=r"\b"), token.Keyword),
+			(lexer.words(("for", "if", "def", "while", "renderblock", "renderblocks"), suffix=r"\b"), token.Keyword),
 			(r"\s+", token.Text),
-			(r".", token.Error),
+			(r".", token.Error), # Unrecognized tag name
 		],
+		# Content of the ``<?whitespace ...?>`` tag:
+		# ``keep``, ``string`` or ``smart``
 		"whitespace": [
 			(r"\?>", token.Comment.Preproc, "#pop"),
 			(lexer.words(("keep", "strip", "smart"), suffix=r"\b"), token.Comment.Preproc),
 			(r"\s+", token.Text),
-			(r".", token.Error),
+			(r".", token.Error), # Unrecognized whitespace mode
 		],
+		# Inside a string constant
 		"string": [
 			("\\\\['\"abtnfr]", token.String.Escape),
 			(r"\\x[0-9a-fA-F]{2}", token.String.Escape),
 			(r"\\u[0-9a-fA-F]{4}", token.String.Escape),
 			(r"\\U[0-9a-fA-F]{8}", token.String.Escape),
 			(r".", token.String),
 		],
+		# Inside a triple quoted string started with ``'''``
 		"string13": [
 			(r"'''", token.String, "#pop"),
 			lexer.include("string"),
 		],
+		# Inside a triple quoted string started with ``"""``
 		"string23": [
 			(r'"""', token.String, "#pop"),
 			lexer.include("string"),
 		],
+		# Inside a single quoted string started with ``'``
 		"string1": [
 			(r"'", token.String, "#pop"),
 			lexer.include("string"),
 		],
+		# Inside a single quoted string started with ``"``
 		"string2": [
 			(r'"', token.String, "#pop"),
 			lexer.include("string"),