forked from apple/swift
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ParserCombinators.swift
267 lines (225 loc) · 6.88 KB
/
ParserCombinators.swift
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
/// A parser which produces values of type T.
/// This parser produces all possible matches,
/// so it can deal with ambiguous expressions nicely
struct Parser<T> {
/// The actual parsing function.
/// Since we may need to explore multiple possibilities,
/// we return an array of possible parses.
/// For example, an optional parser of type Parser<T?>
/// may return (nil, input) as one possibility where
/// no value was parsed and also (value, some proper suffix) to indicate
/// another possibility where a value was parsed
let run: (ArraySlice<Character>) -> [(T, ArraySlice<Character>)]
}
extension Parser {
/// Runs the parser on a given string and produces the
/// longest match. For printing convenience, this
/// produces T instead of T?
func run(on str: String) -> T {
let results = self.run(Array(str)[...])
// Return the result with the most input consumed
return results.min(by: { $0.1.count < $1.1.count })!.0
}
}
/// Runs 'parser' and feeds the result to the function 'next'
func bind<T, R>(_ parser: Parser<T>, _ next: @escaping (T) -> Parser<R>) -> Parser<R> {
Parser {
(input) -> [(R, ArraySlice<Character>)] in
parser.run(input).flatMap { (result) -> [(R, ArraySlice<Character>)] in
let (match, rest) = result
return next(match).run(rest)
}
}
}
extension Parser {
/// Changes the value returned by the parser
func map<R>(_ f: @escaping (T) -> R) -> Parser<R> {
Parser<R> { input in self.run(input).map { (v, restInput) in (f(v), restInput) } }
}
func flatMap<R>(_ f: @escaping (T) -> Parser<R>) -> Parser<R> {
bind(self, f)
}
func ignore() -> Parser<()> {
self.map { _ in () }
}
}
func map<T, R>(_ parser: Parser<T>, _ f: @escaping (T) -> R) -> Parser<R> {
parser.map(f)
}
/// A parser which consumes no input and always succeeds
func succeed<T>(_ value: T) -> Parser<T> {
Parser { input in [(value, input)] }
}
func unit<T>(_ value: T) -> Parser<T> {
succeed(value)
}
/// A parser which always fails
func fail<T>() -> Parser<T> {
Parser { _ in [] }
}
/// Ignores the value returned by the parser
/// Succeeds iff we have reached the end of the input
let eof: Parser<()> = Parser { input in (input.isEmpty ? succeed(()) : fail()).run(input) }
/// Consumes characters while 'predicate' is true
func satisfies(_ predicate: @escaping (Character) -> Bool) -> Parser<Character> {
Parser { input in
if let firstChar = input.first, predicate(firstChar) {
return [(firstChar, input.dropFirst())]
} else {
return []
}
}
}
/// Chooses from a list of alternatives
func oneOf<T>(_ choices: [Parser<T>]) -> Parser<T> {
Parser { input in
choices.flatMap { $0.run(input) }
}
}
extension Parser {
/// Postfix version of 'oneOf' for convenience
func or(_ alternative: Parser<T>) -> Parser<T> {
oneOf([self, alternative])
}
}
/// Matches one specific character
func char(_ c: Character) -> Parser<Character> {
satisfies { c == $0 }
}
/// Matches a specific string
func string(_ str: String) -> Parser<String> {
str.reversed().reduce(succeed(str)) { next, c in
chain {
char(c)
next
}
}
}
/// Consumes characters while 'predicate' is true,
/// and returns the result as a string for convenience
func takeWhile(_ predicate: @escaping (Character) -> Bool) -> Parser<String> {
Parser { input in
let idx = input.firstIndex(where: { !predicate($0) }) ?? input.endIndex
return [(String(input[..<idx]), input[idx...])]
}
}
/// A parser which either runs 'parser' or doesn't consume input
func optionally<T>(_ parser: Parser<T>) -> Parser<T?> {
parser.map(Optional.some).or(succeed(.none))
}
/// Convenience form of 'optionally' which lets you specify the type directly
func optionally<T>(_ _: T.Type, _ parser: Parser<T>) -> Parser<T?> {
optionally(parser)
}
/// The equivalent of Kleene star, repeatedly applies 'parser'
/// and collects the results
func zeroOrMore<T>(_ parser: Parser<T>) -> Parser<[T]> {
Parser { input in
var results: [([T], ArraySlice<Character>)] = [([], input)]
parser.run(input).forEach {
let (elem, rest) = $0
for (elems, rest) in zeroOrMore(parser).run(rest) {
results.append(([elem] + elems, rest))
}
}
return results
}
}
/// The equivalent of + in regex.
/// Runs one or more times, and collects the results
func oneOrMore<T>(_ parser: Parser<T>) -> Parser<[T]> {
chain {
let first = parser
let rest = zeroOrMore(parser)
return [first] + rest
}
}
/// Does 'parser' exactly 'times' many times
func exactly<T>(times: Int, _ parser: Parser<T>) -> Parser<[T]> {
if times == 0 {
return succeed([])
} else {
return chain {
let elem = parser
let rest = exactly(times: times - 1, parser)
// for non-quadratic performance, we would instead
// build this in reverse order and then
// have a wrapper which reverses
succeed([elem] + rest)
}
}
}
/// Bounded quantification
func atMost<T>(times: Int, _ parser: Parser<T>) -> Parser<[T]> {
if times == 0 {
return succeed([])
} else {
let takeOne: Parser<[T]> = chain {
let elem = parser
let rest = atMost(times: times - 1, parser)
succeed([elem] + rest)
}
return takeOne.or(succeed([]))
}
}
/// Parses a range of possible repetitions
func between<T>(_ lowerBound: Int, and upperBound: Int, _ parser: Parser<T>) -> Parser<[T]> {
precondition(upperBound >= lowerBound, "Upper bound must be greater than lower bound");
return chain {
let initial = exactly(times: lowerBound, parser)
let rest = atMost(times: upperBound - lowerBound, parser)
return initial + rest
}
}
extension Parser {
func skipSpaces() -> Parser<T> {
chain {
let result = self
takeWhile(\.isWhitespace).ignore()
return result
}
}
}
extension Parser:
ExpressibleByStringLiteral,
ExpressibleByExtendedGraphemeClusterLiteral,
ExpressibleByUnicodeScalarLiteral
where T == String
{
typealias UnicodeScalarLiteralType = Character
typealias ExtendedGraphemeClusterLiteralType = Character
typealias StringLiteralType = String
init(unicodeScalarLiteral value: Character) {
self = string(String(value))
}
init(extendedGraphemeClusterLiteral value: ExtendedGraphemeClusterLiteralType) {
self = string(String(value))
}
init(stringLiteral value: StringLiteralType) {
self = string(value)
}
}
let foo: Parser<(Int, Int)> = chain {
let a = "AAAA"
"BBB"
eof
return (a.count, 3)
}
print(foo.run(on: "AAAABBB"))
let number: Parser<Int?> = chain {
let n: Int? = takeWhile(\.isWholeNumber).map(Int.init)
eof
return n
}
// deferences nullptr for some reason
// print(number.run(on: "1234"))
// Generic chain examples
func bind<T, R>(_ v: [T], _ f: (T) -> [R]) -> [R] {
v.flatMap(f)
}
let pairs: [(Int, Int)] = chain {
let x = [1,2,3,4,5]
let y = [1,2,3,4,5]
[(x, y)]
}
print(pairs)