Skip to content

Commit

Permalink
P2071R2 Named universal character escapes
Browse files Browse the repository at this point in the history
  • Loading branch information
jensmaurer authored and tkoeppe committed Aug 5, 2022
1 parent d696d94 commit 5f48a6e
Show file tree
Hide file tree
Showing 2 changed files with 143 additions and 2 deletions.
144 changes: 142 additions & 2 deletions source/lex.tex
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,25 @@
The \grammarterm{universal-character-name} construct provides a way to name
other characters.

\begin{bnf}
\nontermdef{n-char} \textnormal{one of}\br
\terminal{A B C D E F G H I J K L M N O P Q R S T U V W X Y Z}\br
\terminal{0 1 2 3 4 5 6 7 8 9}\br
\textnormal{\unicode{002d}{hyphen-minus}}\br
\textnormal{\unicode{0020}{space}}
\end{bnf}

\begin{bnf}
\nontermdef{n-char-sequence}\br
n-char\br
n-char-sequence n-char
\end{bnf}

\begin{bnf}
\nontermdef{named-universal-character}\br
\terminal{\textbackslash N\{} n-char-sequence \terminal{\}}
\end{bnf}

\begin{bnf}
\nontermdef{hex-quad}\br
hexadecimal-digit hexadecimal-digit hexadecimal-digit hexadecimal-digit
Expand All @@ -303,15 +322,136 @@
\nontermdef{universal-character-name}\br
\terminal{\textbackslash u} hex-quad\br
\terminal{\textbackslash U} hex-quad hex-quad\br
\terminal{\textbackslash u\{} simple-hexadecimal-digit-sequence \terminal{\}}
\terminal{\textbackslash u\{} simple-hexadecimal-digit-sequence \terminal{\}}\br
named-universal-character
\end{bnf}

\pnum
A \grammarterm{universal-character-name}
of the form \tcode{\textbackslash u} \grammarterm{hex-quad} or
\tcode{\textbackslash U} \grammarterm{hex-quad} \grammarterm{hex-quad}
designates the character in the translation character set
whose UCS scalar value is the hexadecimal number represented by
the sequence of \grammarterm{hexadecimal-digit}s
in the \grammarterm{universal-character-name}.
The program is ill-formed if that number is not a UCS scalar value.

\pnum
A \grammarterm{universal-character-name}
that is a \grammarterm{named-universal-character}
designates the character named by its \grammarterm{n-char-sequence}.
A character is so named if the \grammarterm{n-char-sequence} is equal to
\begin{itemize}
\item
the associated character name or associated character name alias
specified in ISO/IEC 10646 subclause ``Code charts and lists of character names''
or
\item
the control code alias given in \tref{lex.charset.ucn}.
\begin{note}
The aliases in \tref{lex.charset.ucn} are provided for control characters
which otherwise have no associated character name or character name alias.
These names are derived from
the Unicode Character Database's \tcode{NameAliases.txt}.
For historical reasons, control characters are formally unnamed.
\end{note}
\end{itemize}
\begin{note}
None of the associated character names,
associated character name aliases, or
control code aliases
have leading or trailing spaces.
\end{note}

\begin{multicolfloattable}{Control code aliases}{lex.charset.ucn}{ll}
\unicode{0000}{null} \\
\unicode{0001}{start of heading} \\
\unicode{0002}{start of text} \\
\unicode{0003}{end of text} \\
\unicode{0004}{end of transmission} \\
\unicode{0005}{enquiry} \\
\unicode{0006}{acknowledge} \\
\unicode{0007}{alert} \\
\unicode{0008}{backspace} \\
\unicode{0009}{character tabulation} \\
\unicode{0009}{horizontal tabulation} \\
\unicode{000a}{line feed} \\
\unicode{000a}{new line} \\
\unicode{000a}{end of line} \\
\unicode{000b}{line tabulation} \\
\unicode{000b}{vertical tabulation} \\
\unicode{000c}{form feed} \\
\unicode{000d}{carriage return} \\
\unicode{000e}{shift out} \\
\unicode{000e}{locking-shift one} \\
\unicode{000f}{shift in} \\
\unicode{000f}{locking-shift zero} \\
\unicode{0010}{data link escape} \\
\unicode{0011}{device control one} \\
\unicode{0012}{device control two} \\
\unicode{0013}{device control three} \\
\unicode{0014}{device control four} \\
\unicode{0015}{negative acknowledge} \\
\unicode{0016}{synchronous idle} \\
\unicode{0017}{end of transmission block} \\
\unicode{0018}{cancel} \\
\unicode{0019}{end of medium} \\
\unicode{001a}{substitute} \\
\unicode{001b}{escape} \\
\unicode{001c}{information separator four} \\
\unicode{001c}{file separator} \\
\unicode{001d}{information separator three} \\
\unicode{001d}{group separator} \\
\unicode{001e}{information separator two} \\
\unicode{001e}{record separator} \\
\unicode{001f}{information separator one} \\
\unicode{001f}{unit separator} \\
\columnbreak
\unicode{007f}{delete} \\
\unicode{0082}{break permitted here} \\
\unicode{0083}{no break here} \\
\unicode{0084}{index} \\
\unicode{0085}{next line} \\
\unicode{0086}{start of selected area} \\
\unicode{0087}{end of selected area} \\
\unicode{0088}{character tabulation set} \\
\unicode{0088}{horizontal tabulation set} \\
\unicode{0089}{character tabulation with justification} \\
\unicode{0089}{horizontal tabulation with justification} \\
\unicode{008a}{line tabulation set} \\
\unicode{008a}{vertical tabulation set} \\
\unicode{008b}{partial line forward} \\
\unicode{008b}{partial line down} \\
\unicode{008c}{partial line backward} \\
\unicode{008c}{partial line up} \\
\unicode{008d}{reverse line feed} \\
\unicode{008d}{reverse index} \\
\unicode{008e}{single shift two} \\
\unicode{008e}{single shift-2} \\
\unicode{008f}{single shift three} \\
\unicode{008f}{single shift-3} \\
\unicode{0090}{device control string} \\
\unicode{0091}{private use one} \\
\unicode{0091}{private use-1} \\
\unicode{0092}{private use two} \\
\unicode{0092}{private use-2} \\
\unicode{0093}{set transmit state} \\
\unicode{0094}{cancel character} \\
\unicode{0095}{message waiting} \\
\unicode{0096}{start of guarded area} \\
\unicode{0096}{start of protected area} \\
\unicode{0097}{end of guarded area} \\
\unicode{0097}{end of protected area} \\
\unicode{0098}{start of string} \\
\unicode{009a}{single character introducer} \\
\unicode{009b}{control sequence introducer} \\
\unicode{009c}{string terminator} \\
\unicode{009d}{operating system command} \\
\unicode{009e}{privacy message} \\
\unicode{009f}{application program command} \\
\end{multicolfloattable}

\pnum
If a \grammarterm{universal-character-name} outside
the \grammarterm{c-char-sequence}, \grammarterm{s-char-sequence}, or
\grammarterm{r-char-sequence} of
Expand Down Expand Up @@ -1345,7 +1485,7 @@

\begin{bnf}
\nontermdef{conditional-escape-sequence-char}\br
\textnormal{any member of the basic character set that is not an} octal-digit\textnormal{, a} simple-escape-sequence-char\textnormal{, or the characters \terminal{o}, \terminal{u}, \terminal{U}, or \terminal{x}}
\textnormal{any member of the basic character set that is not an} octal-digit\textnormal{, a} simple-escape-sequence-char\textnormal{, or the characters \terminal{N}, \terminal{o}, \terminal{u}, \terminal{U}, or \terminal{x}}
\end{bnf}

\pnum
Expand Down
1 change: 1 addition & 0 deletions source/preprocessor.tex
Original file line number Diff line number Diff line change
Expand Up @@ -1789,6 +1789,7 @@
\defnxname{cpp_lambdas} & \tcode{200907L} \\ \rowsep
\defnxname{cpp_modules} & \tcode{201907L} \\ \rowsep
\defnxname{cpp_multidimensional_subscript} & \tcode{202110L} \\ \rowsep
\defnxname{cpp_named_character_escapes} & \tcode{202207L} \\ \rowsep
\defnxname{cpp_namespace_attributes} & \tcode{201411L} \\ \rowsep
\defnxname{cpp_noexcept_function_type} & \tcode{201510L} \\ \rowsep
\defnxname{cpp_nontype_template_args} & \tcode{201911L} \\ \rowsep
Expand Down

0 comments on commit 5f48a6e

Please sign in to comment.