From 8d7e1993ffeacc418c12dc6be3ca98340c97c4e1 Mon Sep 17 00:00:00 2001
From: Jens Maurer <Jens.Maurer@gmx.net>
Date: Sat, 17 Jun 2023 20:21:36 +0200
Subject: [PATCH] P2361R6 Unevaluated strings

---
 source/declarations.tex | 48 +++++++++++++++++++++++------------------
 source/lex.tex          | 22 +++++++++++++++++++
 source/overloading.tex  | 11 +++++-----
 3 files changed, 55 insertions(+), 26 deletions(-)

diff --git a/source/declarations.tex b/source/declarations.tex
index aa28c9fcdd..0b70486f08 100644
--- a/source/declarations.tex
+++ b/source/declarations.tex
@@ -77,7 +77,7 @@
 \begin{bnf}
 \nontermdef{static_assert-declaration}\br
   \keyword{static_assert} \terminal{(} constant-expression \terminal{)} \terminal{;}\br
-  \keyword{static_assert} \terminal{(} constant-expression \terminal{,} string-literal \terminal{)} \terminal{;}
+  \keyword{static_assert} \terminal{(} constant-expression \terminal{,} unevaluated-string \terminal{)} \terminal{;}
 \end{bnf}
 
 \begin{bnf}
@@ -264,7 +264,7 @@
 the \grammarterm{static_assert-declaration} \defnx{fails}{\idxcode{static_assert}!failed},
 the program is ill-formed, and the resulting
 diagnostic message\iref{intro.compliance} should include the text of
-the \grammarterm{string-literal}, if one is supplied.
+the \grammarterm{unevaluated-string}, if one is supplied.
 \begin{example}
 \begin{codeblock}
 static_assert(sizeof(int) == sizeof(void*), "wrong pointer size");
@@ -8169,11 +8169,12 @@
 An \tcode{asm} declaration has the form
 \begin{bnf}
 \nontermdef{asm-declaration}\br
-    \opt{attribute-specifier-seq} \keyword{asm} \terminal{(} string-literal \terminal{)} \terminal{;}
+    \opt{attribute-specifier-seq} \keyword{asm} \terminal{(} balanced-token-seq \terminal{)} \terminal{;}
 \end{bnf}
 
-The \tcode{asm} declaration is conditionally-supported; its meaning is
-\impldef{meaning of \tcode{asm} declaration}.
+The \tcode{asm} declaration is conditionally-supported;
+any restrictions on the \grammarterm{balanced-token-seq} and
+its meaning are \impldef{meaning of \tcode{asm} declaration}.
 The optional \grammarterm{attribute-specifier-seq} in
 an \grammarterm{asm-declaration} appertains to the \tcode{asm} declaration.
 \begin{note}
@@ -8209,25 +8210,30 @@
 %
 \begin{bnf}
 \nontermdef{linkage-specification}\br
-    \keyword{extern} string-literal \terminal{\{} \opt{declaration-seq} \terminal{\}}\br
-    \keyword{extern} string-literal name-declaration
+    \keyword{extern} unevaluated-string \terminal{\{} \opt{declaration-seq} \terminal{\}}\br
+    \keyword{extern} unevaluated-string name-declaration
 \end{bnf}
 
-The \grammarterm{string-literal} indicates the required language linkage.
+The \grammarterm{unevaluated-string} indicates the required language linkage.
+\begin{note}
+Escape sequences and \grammarterm{universal-character-name}s
+have been replaced\iref{lex.string.uneval}.
+\end{note}
 This document specifies the semantics for the
-\grammarterm{string-literal}{s} \tcode{"C"} and \tcode{"C++"}. Use of a
-\grammarterm{string-literal} other than \tcode{"C"} or \tcode{"C++"} is
-conditionally-supported, with \impldef{semantics of linkage specifiers} semantics.
+\grammarterm{unevaluated-string}{s} \tcode{"C"} and \tcode{"C++"}.
+Use of an \grammarterm{unevaluated-string}
+other than \tcode{"C"} or \tcode{"C++"} is conditionally-supported,
+with \impldef{semantics of linkage specifiers} semantics.
 \begin{note}
-Therefore, a linkage-specification with a \grammarterm{string-literal} that
-is unknown to the implementation requires a diagnostic.
+Therefore, a \grammarterm{linkage-specification} with a language linkage
+that is unknown to the implementation requires a diagnostic.
 \end{note}
 
 \recommended
-The spelling of the \grammarterm{string-literal} should be
-taken from the document defining that language. For example, \tcode{Ada}
-(not \tcode{ADA}) and \tcode{Fortran} or \tcode{FORTRAN}, depending on
-the vintage.
+The spelling of the language linkage should be taken
+from the document defining that language.
+For example, \tcode{Ada} (not \tcode{ADA}) and
+\tcode{Fortran} or \tcode{FORTRAN}, depending on the vintage.
 
 \pnum
 \indextext{specification!linkage!implementation-defined}%
@@ -8830,10 +8836,10 @@
 An
 \grammarterm{attribute-argument-clause} may be present and, if present, it shall have the form:
 \begin{ncbnf}
-\terminal{(} string-literal \terminal{)}
+\terminal{(} unevaluated-string \terminal{)}
 \end{ncbnf}
 \begin{note}
-The \grammarterm{string-literal} in the \grammarterm{attribute-argument-clause}
+The \grammarterm{unevaluated-string} in the \grammarterm{attribute-argument-clause}
 can be used to explain the rationale for deprecation and/or to suggest a replacing entity.
 \end{note}
 
@@ -9044,7 +9050,7 @@
 and, if present, shall have the form:
 
 \begin{ncbnf}
-\terminal{(} string-literal \terminal{)}
+\terminal{(} unevaluated-string \terminal{)}
 \end{ncbnf}
 
 \pnum
@@ -9091,7 +9097,7 @@
 This is typically because discarding the return value
 of a nodiscard call has surprising consequences.
 \end{note}
-The \grammarterm{string-literal}
+The \grammarterm{unevaluated-string}
 in a \tcode{nodiscard} \grammarterm{attribute-argument-clause}
 should be used in the message of the warning
 as the rationale for why the result should not be discarded.
diff --git a/source/lex.tex b/source/lex.tex
index 29016afc73..006632b4a6 100644
--- a/source/lex.tex
+++ b/source/lex.tex
@@ -2033,6 +2033,28 @@
 what effect these sequences have on encoding state.
 \end{itemize}
 
+\rSec2[lex.string.uneval]{Unevaluated strings}
+
+\begin{bnf}
+\nontermdef{unevaluated-string}\br
+    string-literal
+\end{bnf}
+
+\pnum
+An \grammarterm{unevaluated-string} shall have no \grammarterm{encoding-prefix}.
+
+\pnum
+Each \grammarterm{universal-character-name} and each \grammarterm{simple-escape-sequence} in an \grammarterm{unevaluated-string} is
+replaced by the member of the translation character set it denotes.
+An \grammarterm{unevaluated-string} that contains
+a \grammarterm{numeric-escape-sequence} or
+a \grammarterm{conditional-escape-sequence}
+is ill-formed.
+
+\pnum
+An \grammarterm{unevaluated-string} is never evaluated and
+its interpretation depends on the context in which it appears.
+
 \rSec2[lex.bool]{Boolean literals}
 
 \indextext{literal!boolean}%
diff --git a/source/overloading.tex b/source/overloading.tex
index edf9542eaa..04a4fb351c 100644
--- a/source/overloading.tex
+++ b/source/overloading.tex
@@ -3936,15 +3936,16 @@
 
 \begin{bnf}
 \nontermdef{literal-operator-id}\br
-    \keyword{operator} string-literal identifier\br
+    \keyword{operator} unevaluated-string identifier\br
     \keyword{operator} user-defined-string-literal
 \end{bnf}
 
 \pnum
-The \grammarterm{string-literal} or \grammarterm{user-defined-string-literal}
-in a \grammarterm{literal-operator-id} shall have no
-\grammarterm{encoding-prefix} and shall contain no characters other than the
-implicit terminating \tcode{'\textbackslash 0'}.
+The \grammarterm{user-defined-string-literal}
+in a \grammarterm{literal-operator-id}
+shall have no \grammarterm{encoding-prefix}.
+The \grammarterm{unevaluated-string} or
+\grammarterm{user-defined-string-literal} shall be empty.
 The \grammarterm{ud-suffix} of the \grammarterm{user-defined-string-literal} or
 the \grammarterm{identifier} in a \grammarterm{literal-operator-id} is called a
 \defnx{literal suffix identifier}{literal!suffix identifier}.