-
Notifications
You must be signed in to change notification settings - Fork 14
/
TextDesc.scala
248 lines (241 loc) · 11.9 KB
/
TextDesc.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
/* SPDX-FileCopyrightText: © 2022 Parsley Contributors <https://github.com/j-mie6/Parsley/graphs/contributors>
* SPDX-License-Identifier: BSD-3-Clause
*/
package parsley.token.descriptions.text
import parsley.token.predicate.{CharPredicate, Unicode}
/** This class, and its subtypes, describe how many digits a numeric escape sequence is allowed.
*
* @since 4.0.0
*/
sealed abstract class NumberOfDigits
/** This object contains the concrete subtypes of `NumberOfDigits`.
*
* @since 4.0.0
*/
object NumberOfDigits {
/** There must be at most `n` digits in the numeric escape literal, up to and including the value given.
*
* @param n the maximum (inclusive) number of digits allowed in the literal.
* @since 4.0.0
*/
final case class AtMost(n: Int) extends NumberOfDigits {
require(n > 0, "AtMost may only be passed a number of digits greater than 0")
}
/** The number of digits in the literal must be one of the given values.
*
* @param n0 the first possible digit width.
* @param ns the other possible digit widths.
* @since 4.0.0
*/
final case class Exactly(n0: Int, ns: Int*) extends NumberOfDigits {
require(n0 > 0, "Exactly may only be passed a number of digits greater than 0")
require(ns.forall(_ > 0), "Exactly may only be passed a number of digits greater than 0")
}
/** There is no limit on the number of digits that may appear in this sequence.
*
* @since 4.0.0
*/
case object Unbounded extends NumberOfDigits
}
/** This class, and its subtypes, describe how numeric escape sequences should work for a specific base.
*
* @since 4.0.0
*/
sealed abstract class NumericEscape
/** This object contains the concrete subtypes of `NumericEscape`.
*
* @since 4.0.0
*/
object NumericEscape {
/** Numeric literals are supported for this specific base.
*
* @param prefix the character, if any, that is required to start the literal (like `x` for hexadecimal escapes in some languages).
* @param numDigits the number of digits required for this literal: this may be unbounded, an exact number, or up to a specific number.
* @param maxValue the largest character value that can be expressed by this numeric escape.
* @since 4.0.0
*/
final case class Supported(prefix: Option[Char], numDigits: NumberOfDigits, maxValue: Int) extends NumericEscape
/** Numeric literals are disallowed for this specific base.
*
* @since 4.0.0
*/
case object Illegal extends NumericEscape
}
/** This class describes the valid escape sequences within character and string literals.
*
* This allows for the definition of different escape sequences as direct literals, mapping
* from single or multiple characters to specific values, numeric escape sequences with different
* bases, as well as supporting zero-width escapes and line continuations via string gaps.
*
* @define numericEscape if allowed, the description of how numeric escape seqeunces work for base
*
* @param escBegin the character that starts an escape sequence, very often this is `'\\'`.
* @param literals the characters that can be directly escaped, but still represent themselves, for instance `'"'`, or `'\\'`.
* @param singleMap the possible single-character escape sequences and the (full UTF-16) character they map to, for instance `'n' -> 0xa`.
* @param multiMap the possible multi-character escape sequences and the (full UTF-16) character they map to.
* @param decimalEscape $numericEscape 10.
* @param hexadecimalEscape $numericEscape 16.
* @param octalEscape $numericEscape 8.
* @param binaryEscape $numericEscape 2.
* @param emptyEscape if one should exist, the character which has no effect on the string but can be used to disambiguate other
escape sequences: in Haskell this would be `\&`.
* @param gapsSupported specifies whether or not ''string gaps'' are supported: this is where whitespace can be injected between two
* `escBegin` characters and this will all be ignored in the final string, such that `"hello \ \world"` is `"hello world"`.
* @since 4.0.0
*/
final case class EscapeDesc (escBegin: Char,
literals: Set[Char],
singleMap: Map[Char, Int],
multiMap: Map[String, Int],
decimalEscape: NumericEscape,
hexadecimalEscape: NumericEscape,
octalEscape: NumericEscape,
binaryEscape: NumericEscape,
emptyEscape: Option[Char],
gapsSupported: Boolean,
) {
locally {
val multiKeys = multiMap.keySet
val singleKeys = singleMap.keySet
require(multiKeys.forall(_.nonEmpty), "empty strings cannot be escape sequences")
val litAndSingle = literals & singleKeys
val litSingleAndMulti = (literals | singleKeys).map(c => s"$c") & multiKeys
require(litAndSingle.isEmpty && litSingleAndMulti.isEmpty, "there can be no overlap between literals, singleMap, and multiMap")
}
// TODO: this needs to be a Radix, I think we'll need parsley.collection.immutable.Radix too
private [token] val escMap = multiMap ++ literals.map(c => s"$c" -> c.toInt) ++ singleMap.map {
case (k, v) => s"$k" -> v
}
require(escMap.forall(kv => Character.isValidCodePoint(kv._2)), "Escape characters cannot map to invalid characters")
}
/** This object contains default implementations of the `EscapeDesc` class, which align with
* different languages or styles.
*
* @since 4.0.0
*/
object EscapeDesc {
/** This is a minimal description of escape characters with the only supported sequence as `\\`.
*
* {{{
* escBegin = '\\'
* literals = Set('\\')
* singleMap = Map.empty
* multiMap = Map.empty
* decimalEscape = NumericEscape.Illegal
* hexadecimalEscape = NumericEscape.Illegal
* octalEscape = NumericEscape.Illegal
* binaryEscape = NumericEscape.Illegal
* emptyEscape = None
* gapsSupported = false
* }}}
*
* @since 4.0.0
*/
val plain = EscapeDesc(escBegin = '\\',
literals = Set('\\'),
singleMap = Map.empty,
multiMap = Map.empty,
decimalEscape = NumericEscape.Illegal,
hexadecimalEscape = NumericEscape.Illegal,
octalEscape = NumericEscape.Illegal,
binaryEscape = NumericEscape.Illegal,
emptyEscape = None,
gapsSupported = false)
/** This description of escape sequences is compliant with the Haskell Report.
*
* @since 4.0.0
*/
val haskell = EscapeDesc(escBegin = '\\',
literals = Set('\'', '\"', '\\'),
singleMap = Map('0' -> 0x0000,
'a' -> 0x0007,
'b' -> 0x0008,
'f' -> 0x000c,
'n' -> 0x000a,
'r' -> 0x000d,
't' -> 0x0009,
'v' -> 0x000b),
multiMap = Map("NUL" -> 0x0000,
"SOH" -> 0x0001,
"STX" -> 0x0002,
"ETX" -> 0x0003,
"EOT" -> 0x0004,
"ENQ" -> 0x0005,
"ACK" -> 0x0006,
"BEL" -> 0x0007,
"BS" -> 0x0008,
"HT" -> 0x0009,
"LF" -> 0x000a,
"VT" -> 0x000b,
"FF" -> 0x000c,
"CR" -> 0x000d,
"SO" -> 0x000e,
"SI" -> 0x000f,
"DLE" -> 0x0010,
"DC1" -> 0x0011,
"DC2" -> 0x0012,
"DC3" -> 0x0013,
"DC4" -> 0x0014,
"NAK" -> 0x0015,
"SYN" -> 0x0016,
"ETB" -> 0x0017,
"CAN" -> 0x0018,
"EM" -> 0x0019,
"SUB" -> 0x001a,
"ESC" -> 0x001b,
"FS" -> 0x001c,
"GS" -> 0x001d,
"RS" -> 0x001e,
"US" -> 0x001f,
"SP" -> 0x0020,
"DEL" -> 0x007f) ++
// Control escape sequences
('@' to '_').map(c => s"^$c" -> (c - '@')).toMap,
decimalEscape = NumericEscape.Supported(prefix = None, NumberOfDigits.Unbounded, maxValue = Character.MAX_CODE_POINT),
hexadecimalEscape = NumericEscape.Supported(prefix = Some('x'), NumberOfDigits.Unbounded, maxValue = Character.MAX_CODE_POINT),
octalEscape = NumericEscape.Supported(prefix = Some('o'), NumberOfDigits.Unbounded, maxValue = Character.MAX_CODE_POINT),
binaryEscape = NumericEscape.Illegal,
emptyEscape = Some('&'),
gapsSupported = true)
}
/** This class describes how textual literals like strings and characters
* should be processed lexically.
*
* @param escapeSequences the description of how escape sequences in literals.
* @param characterLiteralEnd what character starts and ends a character literal.
* @param stringEnds what sequences may begin and end a string literal.
* @param multiStringEnds what sequences may begin and end a multi-line string literal.
* @param graphicCharacter what characters can be written verbatim into a character or string literal.
* @since 4.0.0
*/
final case class TextDesc (escapeSequences: EscapeDesc,
characterLiteralEnd: Char,
stringEnds: Set[String],
multiStringEnds: Set[String],
graphicCharacter: CharPredicate) {
require(stringEnds.forall(_.nonEmpty), "string ends cannot be empty")
require(multiStringEnds.forall(_.nonEmpty), "multiline string ends cannot be empty")
}
/** This object contains any preconfigured text definitions.
*
* @since 4.0.0
*/
object TextDesc {
/** Plain definition of text.
*
* {{{
* escapeSequences = EscapeDesc.plain
* characterLiteralEnd = '\''
* stringEnds = Set("\"")
* multiStringEnds = Set.empty
* graphicCharacter = Unicode(_ >= ' '.toInt)
* }}}
*
* @since 4.0.0
*/
val plain = TextDesc(escapeSequences = EscapeDesc.plain,
characterLiteralEnd = '\'',
stringEnds = Set("\""),
multiStringEnds = Set.empty,
graphicCharacter = Unicode(_ >= ' '.toInt))
}