-
Notifications
You must be signed in to change notification settings - Fork 15
/
predicate.scala
145 lines (137 loc) · 6.16 KB
/
predicate.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
/* SPDX-FileCopyrightText: © 2021 Parsley Contributors <https://github.com/j-mie6/Parsley/graphs/contributors>
* SPDX-License-Identifier: BSD-3-Clause
*/
package parsley.token
import scala.collection.immutable.NumericRange
import parsley.Parsley, Parsley.empty
import parsley.character.{satisfy, satisfyUtf16}
import parsley.exceptions.ParsleyException
// TODO: for parsley 5.0.0, make this a package?
/** This module contains functionality to describe character predicates, which can
* be used to determine what characters are valid for different tokens.
*
* @since 4.0.0
*/
object predicate {
/** Base class for character predicates.
* @since 4.0.0
*/
sealed abstract class CharPredicate {
private [token] def toBmp: Parsley[Char]
private [token] def toUnicode: Parsley[Int]
private [token] def toNative: Parsley[Unit]
private [token] def startsWith(s: String): Boolean
private [token] def endsWith(s: String): Boolean
}
/** More generic character predicate, which reads any unicode codepoint.
*
* Full unicode characters can be up to 24-bits, which is handled by a
* 32-bit number on the JVM. This predicate can be used, therefore, to
* handle any single unicode codepoint: this excludes multi-codepoint
* characters like flags, or modified emojis.
*
* In Scala, characters can be upcast to integers, so still can be used
* in the description of this predicate.
*
* @since 4.0.0
*/
final case class Unicode(predicate: Int => Boolean) extends CharPredicate {
private [token] override def toBmp = satisfy(c => predicate(c.toInt))
private [token] override def toUnicode = satisfyUtf16(predicate)
private [token] override def toNative = toUnicode.void
private [token] def startsWith(s: String) = s.nonEmpty && predicate(s.codePointAt(0))
private [token] def endsWith(s: String) = s.nonEmpty && predicate(s.codePointBefore(s.length))
}
/** Basic character predicate, which reads regular Scala 16-bit characters.
*
* This predicate is only capable of recognising characters within the
* Basic Multilingual Plane.
*
* @since 4.0.0
*/
final case class Basic(predicate: Char => Boolean) extends CharPredicate {
private [token] override def toBmp = satisfy(predicate)
// $COVERAGE-OFF$
private [token] override def toUnicode =
throw new ParsleyException("Cannot parse unicode with a `Basic` `Char => Boolean` predicate") // scalastyle:ignore throw
// $COVERAGE-ON$
private [token] override def toNative = toBmp.void
private [token] def startsWith(s: String) = s.headOption.exists(predicate)
private [token] def endsWith(s: String) = s.lastOption.exists(predicate)
}
// this runs the ability to pass functions in as it creates an overloading ambiguity
/*object Basic {
// TODO: expose
private [parsley] def apply(cs: Char*) = new Basic(Set(cs: _*))
}*/
/** Character predicate that never succeeds.
*
* @since 4.0.0
*/
case object NotRequired extends CharPredicate {
private [token] override def toBmp = empty
private [token] override def toUnicode = empty
private [token] override def toNative = empty
private [token] def startsWith(s: String) = true
private [token] def endsWith(s: String) = true
}
/** This object provides implicit functionality for constructing `CharPredicate` values.
* @since 4.1.0
*/
object implicits {
/** Implicit conversions to make `Basic` values.
* @since 4.1.0
*/
object Basic {
// $COVERAGE-OFF$
/** Lifts a regular character predicate.
* @since 4.1.0
*/
implicit def funToBasic(pred: Char => Boolean): CharPredicate = predicate.Basic(pred)
/** Constructs a predicate for the specific given character.
* @since 4.1.0
*/
implicit def charToBasic(c: Char): CharPredicate = predicate.Basic(_ == c)
/** Constructs a predicate for anything in a range of specific characters.
* @since 4.1.0
*/
implicit def rangeToBasic(cs: NumericRange[Char]): CharPredicate = predicate.Basic(cs.contains)
// $COVERAGE-ON$
}
/** Implicit conversions to make `Unicode` values.
* @since 4.1.0
*/
object Unicode {
// $COVERAGE-OFF$
/** Lifts a regular full-width character predicate.
* @since 4.1.0
*/
implicit def funToUnicode(pred: Int => Boolean): CharPredicate = predicate.Unicode(pred)
/** Lifts a regular character predicate.
* @since 4.1.0
*/
implicit def charFunToUnicode(pred: Char => Boolean): CharPredicate = predicate.Unicode(c => c.isValidChar && pred(c.toChar))
/** Constructs a predicate for the specific given character.
* @since 4.1.0
*/
implicit def charToUnicode(c: Char): CharPredicate = predicate.Unicode(_ == c.toInt)
/** Constructs a predicate for the specific given unicode codepoint.
* @since 4.1.0
*/
implicit def intToUnicode(c: Int): CharPredicate = predicate.Unicode(_ == c)
/** Constructs a predicate for anything in a range of specific characters.
* @since 4.1.0
*/
implicit def charRangeToUnicode(cs: NumericRange[Char]): CharPredicate = predicate.Unicode(cs.contains)
/** Constructs a predicate for anything in a range of specific unicode codepoints.
* @since 4.1.0
*/
implicit def intRangeToUnicode(cs: NumericRange[Int]): CharPredicate = predicate.Unicode(cs.contains)
/** Constructs a predicate for anything in a range of specific unicode codepoints.
* @since 4.1.0
*/
implicit def rangeToUnicode(cs: Range): CharPredicate = predicate.Unicode(cs.contains)
// $COVERAGE-ON$
}
}
}