From 5f48a6e19fb083a91d941d568370dd65ed500868 Mon Sep 17 00:00:00 2001
From: Jens Maurer <Jens.Maurer@gmx.net>
Date: Wed, 27 Jul 2022 01:01:26 +0200
Subject: [PATCH 1/2] P2071R2 Named universal character escapes

---
 source/lex.tex          | 144 +++++++++++++++++++++++++++++++++++++++-
 source/preprocessor.tex |   1 +
 2 files changed, 143 insertions(+), 2 deletions(-)

diff --git a/source/lex.tex b/source/lex.tex
index 7fb88c4f94..2c7fed4d5b 100644
--- a/source/lex.tex
+++ b/source/lex.tex
@@ -288,6 +288,25 @@
 The \grammarterm{universal-character-name} construct provides a way to name
 other characters.
 
+\begin{bnf}
+\nontermdef{n-char} \textnormal{one of}\br
+    \terminal{A B C D E F G H I J K L M N O P Q R S T U V W X Y Z}\br
+    \terminal{0 1 2 3 4 5 6 7 8 9}\br
+    \textnormal{\unicode{002d}{hyphen-minus}}\br
+    \textnormal{\unicode{0020}{space}}
+\end{bnf}
+
+\begin{bnf}
+\nontermdef{n-char-sequence}\br
+    n-char\br
+    n-char-sequence n-char
+\end{bnf}
+
+\begin{bnf}
+\nontermdef{named-universal-character}\br
+    \terminal{\textbackslash N\{} n-char-sequence \terminal{\}}
+\end{bnf}
+
 \begin{bnf}
 \nontermdef{hex-quad}\br
     hexadecimal-digit hexadecimal-digit hexadecimal-digit hexadecimal-digit
@@ -303,15 +322,136 @@
 \nontermdef{universal-character-name}\br
     \terminal{\textbackslash u} hex-quad\br
     \terminal{\textbackslash U} hex-quad hex-quad\br
-    \terminal{\textbackslash u\{} simple-hexadecimal-digit-sequence \terminal{\}}
+    \terminal{\textbackslash u\{} simple-hexadecimal-digit-sequence \terminal{\}}\br
+    named-universal-character
 \end{bnf}
 
+\pnum
 A \grammarterm{universal-character-name}
+of the form \tcode{\textbackslash u} \grammarterm{hex-quad} or
+\tcode{\textbackslash U} \grammarterm{hex-quad} \grammarterm{hex-quad}
 designates the character in the translation character set
 whose UCS scalar value is the hexadecimal number represented by
 the sequence of \grammarterm{hexadecimal-digit}s
 in the \grammarterm{universal-character-name}.
 The program is ill-formed if that number is not a UCS scalar value.
+
+\pnum
+A \grammarterm{universal-character-name}
+that is a \grammarterm{named-universal-character}
+designates the character named by its \grammarterm{n-char-sequence}.
+A character is so named if the \grammarterm{n-char-sequence} is equal to
+\begin{itemize}
+\item
+the associated character name or associated character name alias
+specified in ISO/IEC 10646 subclause ``Code charts and lists of character names''
+or
+\item
+the control code alias given in \tref{lex.charset.ucn}.
+\begin{note}
+The aliases in \tref{lex.charset.ucn} are provided for control characters
+which otherwise have no associated character name or character name alias.
+These names are derived from
+the Unicode Character Database's \tcode{NameAliases.txt}.
+For historical reasons, control characters are formally unnamed.
+\end{note}
+\end{itemize}
+\begin{note}
+None of the associated character names,
+associated character name aliases, or
+control code aliases
+have leading or trailing spaces.
+\end{note}
+
+\begin{multicolfloattable}{Control code aliases}{lex.charset.ucn}{ll}
+\unicode{0000}{null} \\
+\unicode{0001}{start of heading} \\
+\unicode{0002}{start of text} \\
+\unicode{0003}{end of text} \\
+\unicode{0004}{end of transmission} \\
+\unicode{0005}{enquiry} \\
+\unicode{0006}{acknowledge} \\
+\unicode{0007}{alert} \\
+\unicode{0008}{backspace} \\
+\unicode{0009}{character tabulation} \\
+\unicode{0009}{horizontal tabulation} \\
+\unicode{000a}{line feed} \\
+\unicode{000a}{new line} \\
+\unicode{000a}{end of line} \\
+\unicode{000b}{line tabulation} \\
+\unicode{000b}{vertical tabulation} \\
+\unicode{000c}{form feed} \\
+\unicode{000d}{carriage return} \\
+\unicode{000e}{shift out} \\
+\unicode{000e}{locking-shift one} \\
+\unicode{000f}{shift in} \\
+\unicode{000f}{locking-shift zero} \\
+\unicode{0010}{data link escape} \\
+\unicode{0011}{device control one} \\
+\unicode{0012}{device control two} \\
+\unicode{0013}{device control three} \\
+\unicode{0014}{device control four} \\
+\unicode{0015}{negative acknowledge} \\
+\unicode{0016}{synchronous idle} \\
+\unicode{0017}{end of transmission block} \\
+\unicode{0018}{cancel} \\
+\unicode{0019}{end of medium} \\
+\unicode{001a}{substitute} \\
+\unicode{001b}{escape} \\
+\unicode{001c}{information separator four} \\
+\unicode{001c}{file separator} \\
+\unicode{001d}{information separator three} \\
+\unicode{001d}{group separator} \\
+\unicode{001e}{information separator two} \\
+\unicode{001e}{record separator} \\
+\unicode{001f}{information separator one} \\
+\unicode{001f}{unit separator} \\
+\columnbreak
+\unicode{007f}{delete} \\
+\unicode{0082}{break permitted here} \\
+\unicode{0083}{no break here} \\
+\unicode{0084}{index} \\
+\unicode{0085}{next line} \\
+\unicode{0086}{start of selected area} \\
+\unicode{0087}{end of selected area} \\
+\unicode{0088}{character tabulation set} \\
+\unicode{0088}{horizontal tabulation set} \\
+\unicode{0089}{character tabulation with justification} \\
+\unicode{0089}{horizontal tabulation with justification} \\
+\unicode{008a}{line tabulation set} \\
+\unicode{008a}{vertical tabulation set} \\
+\unicode{008b}{partial line forward} \\
+\unicode{008b}{partial line down} \\
+\unicode{008c}{partial line backward} \\
+\unicode{008c}{partial line up} \\
+\unicode{008d}{reverse line feed} \\
+\unicode{008d}{reverse index} \\
+\unicode{008e}{single shift two} \\
+\unicode{008e}{single shift-2} \\
+\unicode{008f}{single shift three} \\
+\unicode{008f}{single shift-3} \\
+\unicode{0090}{device control string} \\
+\unicode{0091}{private use one} \\
+\unicode{0091}{private use-1} \\
+\unicode{0092}{private use two} \\
+\unicode{0092}{private use-2} \\
+\unicode{0093}{set transmit state} \\
+\unicode{0094}{cancel character} \\
+\unicode{0095}{message waiting} \\
+\unicode{0096}{start of guarded area} \\
+\unicode{0096}{start of protected area} \\
+\unicode{0097}{end of guarded area} \\
+\unicode{0097}{end of protected area} \\
+\unicode{0098}{start of string} \\
+\unicode{009a}{single character introducer} \\
+\unicode{009b}{control sequence introducer} \\
+\unicode{009c}{string terminator} \\
+\unicode{009d}{operating system command} \\
+\unicode{009e}{privacy message} \\
+\unicode{009f}{application program command} \\
+\end{multicolfloattable}
+
+\pnum
 If a \grammarterm{universal-character-name} outside
 the \grammarterm{c-char-sequence}, \grammarterm{s-char-sequence}, or
 \grammarterm{r-char-sequence} of
@@ -1345,7 +1485,7 @@
 
 \begin{bnf}
 \nontermdef{conditional-escape-sequence-char}\br
-    \textnormal{any member of the basic character set that is not an} octal-digit\textnormal{, a} simple-escape-sequence-char\textnormal{, or the characters \terminal{o}, \terminal{u}, \terminal{U}, or \terminal{x}}
+    \textnormal{any member of the basic character set that is not an} octal-digit\textnormal{, a} simple-escape-sequence-char\textnormal{, or the characters \terminal{N}, \terminal{o}, \terminal{u}, \terminal{U}, or \terminal{x}}
 \end{bnf}
 
 \pnum
diff --git a/source/preprocessor.tex b/source/preprocessor.tex
index 775055f57e..190f7dc1eb 100644
--- a/source/preprocessor.tex
+++ b/source/preprocessor.tex
@@ -1789,6 +1789,7 @@
 \defnxname{cpp_lambdas}                           & \tcode{200907L} \\ \rowsep
 \defnxname{cpp_modules}                           & \tcode{201907L} \\ \rowsep
 \defnxname{cpp_multidimensional_subscript}        & \tcode{202110L} \\ \rowsep
+\defnxname{cpp_named_character_escapes}           & \tcode{202207L} \\ \rowsep
 \defnxname{cpp_namespace_attributes}              & \tcode{201411L} \\ \rowsep
 \defnxname{cpp_noexcept_function_type}            & \tcode{201510L} \\ \rowsep
 \defnxname{cpp_nontype_template_args}             & \tcode{201911L} \\ \rowsep

From f9cb3dbcc6cf10632aa453b179d927d1b6be459d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20K=C3=B6ppe?= <tkoeppe@google.com>
Date: Fri, 5 Aug 2022 23:07:37 +0100
Subject: [PATCH 2/2] [lex.charset] Add delimited form \u{...} of universal
 character name

The adoption of P2290R3 "Delimited escape sequences" via CWG Motion 8
added another form of universal character name, which we now need to
list as well.
---
 source/lex.tex | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/source/lex.tex b/source/lex.tex
index 2c7fed4d5b..afc0772746 100644
--- a/source/lex.tex
+++ b/source/lex.tex
@@ -328,8 +328,9 @@
 
 \pnum
 A \grammarterm{universal-character-name}
-of the form \tcode{\textbackslash u} \grammarterm{hex-quad} or
-\tcode{\textbackslash U} \grammarterm{hex-quad} \grammarterm{hex-quad}
+of the form \tcode{\textbackslash u} \grammarterm{hex-quad},
+\tcode{\textbackslash U} \grammarterm{hex-quad} \grammarterm{hex-quad}, or
+\tcode{\textbackslash u\{\grammarterm{simple-hexadecimal-digit-sequence}\}}
 designates the character in the translation character set
 whose UCS scalar value is the hexadecimal number represented by
 the sequence of \grammarterm{hexadecimal-digit}s