Merge pull request modelica#2846 from henrikt-ma/cleanup/extended-bnf

Cleanup formatting related to lexical conventions
henrikt-ma · Feb 12, 2021 · bd035cc · bd035cc
2 parents 2722a65 + 91475b6
commit bd035cc
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 32 deletions.
diff --git a/chapters/annotations.tex b/chapters/annotations.tex
@@ -1598,6 +1598,7 @@ \subsection{Version Numbering}\label{version-numbering}
 \subsection{Version Handling}\label{version-handling}
 
 In a top-level class, the version number and the dependency to earlier versions of this class are defined using one or more of the following annotations:
+% TODO: Syntax below is a mess: neither Modelica, pseudo-code record, nor grammar.
 \begin{itemize}
 \item
   \lstinline!version = CURRENT-VERSION-NUMBER!\annotationindex{version}\\

diff --git a/chapters/lexicalstructure.tex b/chapters/lexicalstructure.tex
@@ -67,19 +67,19 @@ \subsection{Identifiers}\label{identifiers}
 Modelica \firstuse{identifiers}\index{identifier}, used for naming classes, variables, constants, and other items, are of two forms.
 The first form always starts with a letter or underscore (`\_'), followed by any number of letters, digits, or underscores.
 Case is significant, i.e., the identifiers \lstinline!Inductor! and \lstinline!inductor! are different.
-The second form \lstinline!(Q-IDENT)! starts with a single quote, followed by a sequence of any printable ASCII character, where single-quote must be preceded by backslash, and terminated by a single quote, e.g.\ \lstinline!'12H'!, \lstinline!'13\'H'!, \lstinline!'+foo'!.
+The second form (\lstinline[language=grammar]!Q-IDENT!) starts with a single quote, followed by a sequence of any printable ASCII character, where single-quote must be preceded by backslash, and terminated by a single quote, e.g.\ \lstinline!'12H'!, \lstinline!'13\'H'!, \lstinline!'+foo'!.
 Control characters in quoted identifiers have to use string escapes.
 The single quotes are part of the identifier, i.e., \lstinline!'x'! and \lstinline!x! are distinct identifiers.
 The redundant escapes (\lstinline!'\?'! and \lstinline!'\"'!) are the same as the corresponding non-escaped variants (\lstinline!'?'! and \lstinline!'"'!), but are only for use in Modelica source code.
 A full BNF definition of the Modelica syntax and lexical units is available in \cref{modelica-concrete-syntax}.
 
 % For easy maintenance, the lexing rules below should be a substring of the full lexing rules in \cref{lexical-conventions}.
 \begin{lstlisting}[language=grammar]
-IDENT = NONDIGIT { DIGIT | NONDIGIT } | Q-IDENT
+IDENT = NON-DIGIT { DIGIT | NON-DIGIT } | Q-IDENT
 Q-IDENT = "'" { Q-CHAR | S-ESCAPE } "'"
-NONDIGIT = "_" | letters "a" $\ldots$ "z" | letters "A" $\ldots$ "Z"
+NON-DIGIT = "_" | letters "a" $\ldots$ "z" | letters "A" $\ldots$ "Z"
 DIGIT = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
-Q-CHAR = NONDIGIT | DIGIT | "!" | "#" | "$\mbox{\textdollar}$" | "%" | "&" | "(" | ")"
+Q-CHAR = NON-DIGIT | DIGIT | "!" | "#" | "$\mbox{\textdollar}$" | "%" | "&" | "(" | ")"
    | "*" | "+" | "," | "-" | "." | "/" | ":" | ";" | "<" | ">" | "="
    | "?" | "@" | "[" | "]" | "^" | "{" | "}" | "|" | "~" | " " | """
 S-ESCAPE = "\'" | "\"" | "\?" | "\\"

diff --git a/chapters/syntax.tex b/chapters/syntax.tex
@@ -2,23 +2,28 @@ \chapter{Modelica Concrete Syntax}\label{modelica-concrete-syntax}
 \section{Lexical conventions}\label{lexical-conventions}
 
 The following syntactic meta symbols are used (extended BNF):
-\begin{lstlisting}[language=grammar]
-[ ] optional
-{ } repeat zero or more times
-| or
-"text" The text is treated as a single token (no white-space between any characters)
-\end{lstlisting}
-
-The following lexical units are defined (the ones in boldface are the
-ones used in the grammar, the rest are just internal to the definition
-of other lexical units):
+\begin{center}
+\begin{tabular}{c l}
+\hline
+\tablehead{Syntax} & \tablehead{Description}\\
+\hline
+\hline
+\lstinline[language=grammar]![ $\ldots$ ]! & Optional\\
+\lstinline[language=grammar]!{ $\ldots$ }! & Repeat zero or more times\\
+\lstinline[language=grammar]!$\ldots$ | $\ldots$! & Alternatives\\
+\lstinline[language=grammar]!"$\mathit{text}$"! & The $\mathit{text}$ is treated as a single token (no white-space between any characters)\\
+\hline
+\end{tabular}
+\end{center}
+
+The following lexical units are defined:
 % Beware that the first lines of the lexing rules below are duplicated in \cref{identifiers}, and must be kept in sync.
 \begin{lstlisting}[language=grammar]
-IDENT = NONDIGIT { DIGIT | NONDIGIT } | Q-IDENT
+IDENT = NON-DIGIT { DIGIT | NON-DIGIT } | Q-IDENT
 Q-IDENT = "'" { Q-CHAR | S-ESCAPE } "'"
-NONDIGIT = "_" | letters "a" $\ldots$ "z" | letters "A" $\ldots$ "Z"
+NON-DIGIT = "_" | letters "a" $\ldots$ "z" | letters "A" $\ldots$ "Z"
 DIGIT = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
-Q-CHAR = NONDIGIT | DIGIT | "!" | "#" | "$\mbox{\textdollar}$" | "%" | "&" | "(" | ")"
+Q-CHAR = NON-DIGIT | DIGIT | "!" | "#" | "$\mbox{\textdollar}$" | "%" | "&" | "(" | ")"
    | "*" | "+" | "," | "-" | "." | "/" | ":" | ";" | "<" | ">" | "="
    | "?" | "@" | "[" | "]" | "^" | "{" | "}" | "|" | "~" | " " | """
 S-ESCAPE = "\'" | "\"" | "\?" | "\\"
@@ -32,22 +37,17 @@ \section{Lexical conventions}\label{lexical-conventions}
      ( "e" | "E" ) [ "+" | "-" ] UNSIGNED-INTEGER
    | "."  UNSIGNED-INTEGER [ ( "e" | "E" ) [ "+" | "-" ] UNSIGNED-INTEGER ]
 \end{lstlisting}
-\textrm{S-CHAR} is any member of the Unicode character set
-(\url{http://www.unicode.org}; see \cref{mapping-package-class-structures-to-a-hierarchical-file-system} for storing as UTF-8 on files) except double-quote `"', and backslash `\textbackslash{}'.
+\lstinline[language=grammar]!S-CHAR! is any member of the Unicode character set (\url{http://www.unicode.org}; see \cref{mapping-package-class-structures-to-a-hierarchical-file-system} for storing as UTF-8 on files) except double-quote `"', and backslash `\textbackslash{}'.
 
-For identifiers the redundant escapes (`\lstinline!\?!' and `\lstinline!\"!') are the same as the corresponding non-escaped
-variants (`\lstinline!?!' and '\lstinline!"!').  The single quotes are part of an identifier. E.g.\ \lstinline!'x'! and
-\lstinline!x! are different IDENTs.
+For identifiers the redundant escapes (`\lstinline!\?!' and `\lstinline!\"!') are the same as the corresponding non-escaped variants (`\lstinline!?!' and '\lstinline!"!').
+The single quotes are part of an identifier.
+For example, the identifiers \lstinline!'x'! and \lstinline!x! are different.
 
 Note:
 \begin{itemize}
 \item
-  White-space and comments can be used between separate lexical units
-  and/or symbols, and also separates them. Each lexical unit will consume the maximum number of characters from the input stream.
-  White-space and comments
-  cannot be used inside other lexical units, except for STRING and
-  Q-IDENT where they are treated as part of the STRING or Q-IDENT
-  lexical unit.
+  White-space and comments can be used between separate lexical units and/or symbols, and also separates them. Each lexical unit will consume the maximum number of characters from the input stream.
+  White-space and comments cannot be used inside other lexical units, except for \lstinline[language=grammar]!STRING! and \lstinline[language=grammar]!Q-IDENT! where they are treated as part of the \lstinline[language=grammar]!STRING! or \lstinline[language=grammar]!Q-IDENT! lexical unit.
 \item
   Concatenation of string literals requires a binary expression.
   For example, \lstinline!"a" + "b"! evaluates to \lstinline!"ab"!.
@@ -56,10 +56,8 @@ \section{Lexical conventions}\label{lexical-conventions}
   Modelica uses the same comment syntax as C++ and Java (i.e., \lstinline!//! signals the start of a line comment and \lstinline!/* $\ldots$ */! is a multi-line comment); comments may contain any Unicode character.
   Modelica also has structured comments in the form of annotations and string comments.
 \item
-  Each description-string or string in annotations (= STRING with production annotation-clause in the
-  grammar) may contain any member of the Unicode character set. All
-  other strings have to contain only the sub-set of Unicode characters
-  identical with the 7-bit US-ASCII character set.
+  Each description-string or string in annotations (= \lstinline[language=grammar]!STRING! with production annotation-clause in the grammar) may contain any member of the Unicode character set.
+  All other strings have to contain only the sub-set of Unicode characters identical with the 7-bit US-ASCII character set.
   \begin{nonnormative}
   As a consequence, operators like `\lstinline!>!' or `\lstinline!<!', and external functions only operate on ASCII strings and not on Unicode-strings.
   Within a description-string the tags \lstinline!<HTML>! and \lstinline!</HTML>! or \lstinline!<html>! and \lstinline!</html>! define optionally begin and end of content that is HTML encoded.