-
Notifications
You must be signed in to change notification settings - Fork 15
/
Character.scala
126 lines (120 loc) · 5.29 KB
/
Character.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
/* SPDX-FileCopyrightText: © 2022 Parsley Contributors <https://github.com/j-mie6/Parsley/graphs/contributors>
* SPDX-License-Identifier: BSD-3-Clause
*/
package parsley.token.text
import parsley.Parsley
import parsley.token.predicate.{Basic, CharPredicate, NotRequired, Unicode}
/** This class defines a uniform interface for defining parsers for character
* literals, independent of how whitespace should be handled after the literal.
*
* @since 4.0.0
* @note implementations of this class found within `Lexer` may employ sharing
* and refine the `def`s in this class into `val` or `lazy val` when overriding.
*
* @define disclaimer
* the exact behaviour of this parser is decided by the implementations given in
* `Lexer`, which will depend on user-defined configuration. Please see the
* relevant documentation of these specific objects.
*/
abstract class Character private[text] {
/** This parser will parse a single character literal, which may contain
* any unicode graphic character as defined by up to two UTF-16 codepoints.
* It may also contain escape sequences.
*
* @example {{{
* scala> fullUtf16.parse("'a'")
* val res0 = Success(97)
* scala> fullUtf16.parse("'£'")
* val res1 = Success(163)
* scala> fullUtf16.parse("'λ'")
* val res2 = Success(0x03BB)
* scala> fullUtf16.parse("'🙂'")
* val res3 = Success(0x1F642)
* }}}
*
* @since 4.0.0
* @note $disclaimer
*/
def fullUtf16: Parsley[Int]
/** This parser will parse a single character literal, which may contain
* any graphic character that falls within the "Basic Multilingual Plane" (BMP).
* This is defined as any UTF-16 character that fits into 16 bits. A Scala `Char`
* is exactly large enough to hold any BMP character. It may also contain escape sequences,
* but only those which result in BMP characters.
*
* @example {{{
* scala> basicMultilingualPlane.parse("'a'")
* val res0 = Success('a')
* scala> basicMultilingualPlane.parse("'£'")
* val res1 = Success('£')
* scala> basicMultilingualPlane.parse("'λ'")
* val res2 = Success('λ')
* scala> basicMultilingualPlane.parse("'🙂'")
* val res3 = Failure(...) // 🙂 has a 32-bit codepoint of larger than 0xffff
* }}}
*
* @since 4.0.0
* @note $disclaimer
*/
def basicMultilingualPlane: Parsley[Char]
/** This parser will parse a single character literal, which may contain
* any graphic ASCII character. These are characters with ordinals in range
* 0 to 127 inclusive. It may also contain escape sequences, but only
* those which result in ASCII characters.
*
* @example {{{
* scala> ascii.parse("'a'")
* val res0 = Success('a')
* scala> ascii.parse("'£'")
* val res1 = Failure(...) // £'s ordinal is not less than 127
* scala> ascii.parse("'λ'")
* val res2 = Failure(...) // λ's ordinal is not less than 127
* scala> ascii.parse("'🙂'")
* val res3 = Failure(...) // 🙂's ordinal is not less than 127
* }}}
*
* @since 4.0.0
* @note $disclaimer
*/
def ascii: Parsley[Char]
/** This parser will parse a single character literal, which may contain
* any graphic extended ASCII character. These are characters with ordinals in range
* 0 to 255 inclusive. It may also contain escape sequences, but only
* those which result in extended ASCII characters.
*
* @example {{{
* scala> latin1.parse("'a'")
* val res0 = Success('a')
* scala> latin1.parse("'£'")
* val res1 = Success('£')
* scala> latin1.parse("'λ'")
* val res2 = Failure(...) // λ's ordinal is not less than 255
* scala> latin1.parse("'🙂'")
* val res3 = Failure(...) // 🙂's ordinal is not less than 255
* }}}
*
* @since 4.0.0
* @note $disclaimer
*/
def latin1: Parsley[Char]
}
private [text] object Character {
final val MaxAscii: Int = 0x7f
final val MaxLatin1: Int = 0xff
def letter(terminalLead: Char, allowsAllSpace: Boolean, isGraphic: CharPredicate): CharPredicate = isGraphic match {
case Unicode(g) if allowsAllSpace => Unicode(c => c != terminalLead.toInt && (g(c) || parsley.character.isWhitespace(c.toChar)))
case Unicode(g) => Unicode(c => c != terminalLead.toInt && g(c))
case Basic(g) if allowsAllSpace => Basic(c => c != terminalLead && (g(c) || parsley.character.isWhitespace(c)))
case Basic(g) => Basic(c => c != terminalLead && g(c))
case NotRequired => NotRequired
}
def letter(terminalLead: Char, escapeLead: Char, allowsAllSpace: Boolean, isGraphic: CharPredicate): CharPredicate = {
letter(terminalLead, allowsAllSpace, isGraphic) match {
case Unicode(g) => Unicode(c => c != escapeLead.toInt && g(c))
case Basic(g) => Basic(c => c != escapeLead && g(c))
case NotRequired => NotRequired
}
}
@inline def isBmpCodePoint(codepoint: Int): Boolean = java.lang.Character.isBmpCodePoint(codepoint)
@inline def isValidCodePoint(codepoint: Int): Boolean = java.lang.Character.isValidCodePoint(codepoint)
}