-
-
Notifications
You must be signed in to change notification settings - Fork 1.6k
/
match_data.cr
234 lines (210 loc) · 6.82 KB
/
match_data.cr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
class Regex
# `Regex::MatchData` is the type of the special variable `$~`, and is the type
# returned by `Regex#match` and `String#match`. It encapsulates all the
# results of a regular expression match.
#
# ```
# if md = "Crystal".match(/[p-s]/)
# md.string # => "Crystal"
# md[0] # => "r"
# md[1]? # => nil
# end
# ```
#
# Many `Regex::MatchData` methods deal with capture groups, and accept an integer
# argument to select the desired capture group. Capture groups are numbered
# starting from `1`, so that `0` can be used to refer to the entire regular
# expression without needing to capture it explicitly.
class MatchData
# Returns the original regular expression.
#
# ```
# "Crystal".match(/[p-s]/).not_nil!.regex # => /[p-s]/
# ```
getter regex : Regex
# Returns the number of capture groups, including named capture groups.
#
# ```
# "Crystal".match(/[p-s]/).not_nil!.size # => 0
# "Crystal".match(/r(ys)/).not_nil!.size # => 1
# "Crystal".match(/r(ys)(?<ok>ta)/).not_nil!.size # => 2
# ```
getter size : Int32
# Returns the original string.
#
# ```
# "Crystal".match(/[p-s]/).not_nil!.string # => "Crystal"
# ```
getter string : String
# :nodoc:
def initialize(@regex : Regex, @code : LibPCRE::Pcre, @string : String, @pos : Int32, @ovector : Int32*, @size : Int32)
end
# Return the position of the first character of the *n*th match.
#
# When *n* is `0` or not given, uses the match of the entire `Regex`.
# Otherwise, uses the match of the *n*th capture group.
#
# ```
# "Crystal".match(/r/).not_nil!.begin(0) # => 1
# "Crystal".match(/r(ys)/).not_nil!.begin(1) # => 2
# "クリスタル".match(/リ(ス)/).not_nil!.begin(0) # => 1
# ```
def begin(n = 0)
@string.byte_index_to_char_index byte_begin(n)
end
# Return the position of the next character after the match.
#
# When *n* is `0` or not given, uses the match of the entire `Regex`.
# Otherwise, uses the match of the *n*th capture group.
#
# ```
# "Crystal".match(/r/).not_nil!.end(0) # => 2
# "Crystal".match(/r(ys)/).not_nil!.end(1) # => 4
# "クリスタル".match(/リ(ス)/).not_nil!.end(0) # => 3
# ```
def end(n = 0)
@string.byte_index_to_char_index byte_end(n)
end
# Return the position of the first byte of the *n*th match.
#
# When *n* is `0` or not given, uses the match of the entire `Regex`.
# Otherwise, uses the match of the *n*th capture group.
#
# ```
# "Crystal".match(/r/).not_nil!.byte_begin(0) # => 1
# "Crystal".match(/r(ys)/).not_nil!.byte_begin(1) # => 2
# "クリスタル".match(/リ(ス)/).not_nil!.byte_begin(0) # => 3
# ```
def byte_begin(n = 0)
check_index_out_of_bounds n
@ovector[n * 2]
end
# Return the position of the next byte after the match.
#
# When *n* is `0` or not given, uses the match of the entire `Regex`.
# Otherwise, uses the match of the *n*th capture group.
#
# ```
# "Crystal".match(/r/).not_nil!.byte_end(0) # => 2
# "Crystal".match(/r(ys)/).not_nil!.byte_end(1) # => 4
# "クリスタル".match(/リ(ス)/).not_nil!.byte_end(0) # => 9
# ```
def byte_end(n = 0)
check_index_out_of_bounds n
@ovector[n * 2 + 1]
end
# Returns the match of the *n*th capture group, or `nil` if there isn't
# an *n*th capture group.
#
# When *n* is `0`, returns the match for the entire `Regex`.
#
# ```
# "Crystal".match(/r(ys)/).not_nil![0]? # => "rys"
# "Crystal".match(/r(ys)/).not_nil![1]? # => "ys"
# "Crystal".match(/r(ys)/).not_nil![2]? # => nil
# ```
def []?(n)
return unless valid_group?(n)
start = @ovector[n * 2]
finish = @ovector[n * 2 + 1]
return if start < 0
@string.byte_slice(start, finish - start)
end
# Returns the match of the *n*th capture group, or raises an `IndexError`
# if there is no *n*th capture group.
#
# ```
# "Crystal".match(/r(ys)/).not_nil![1] # => "ys"
# "Crystal".match(/r(ys)/).not_nil![2] # raises IndexError
# ```
def [](n)
check_index_out_of_bounds n
value = self[n]?
raise_invalid_group_index(n) if value.nil?
value
end
# Returns the match of the capture group named by *group_name*, or
# `nil` if there is no such named capture group.
#
# ```
# "Crystal".match(/r(?<ok>ys)/).not_nil!["ok"]? # => "ys"
# "Crystal".match(/r(?<ok>ys)/).not_nil!["ng"]? # => nil
# ```
def []?(group_name : String)
ret = LibPCRE.get_stringnumber(@code, group_name)
return if ret < 0
self[ret]?
end
# Returns the match of the capture group named by *group_name*, or
# raises an `ArgumentError` if there is no such named capture group.
#
# ```
# "Crystal".match(/r(?<ok>ys)/).not_nil!["ok"] # => "ys"
# "Crystal".match(/r(?<ok>ys)/).not_nil!["ng"] # raises ArgumentError
# ```
def [](group_name : String)
match = self[group_name]?
unless match
raise ArgumentError.new("Match group named '#{group_name}' does not exist")
end
match
end
# Returns the part of the original string before the match. If the match
# starts at the start of the string, returns the empty string.
#
# ```
# "Crystal".match(/yst/).not_nil!.pre_match # => "Cr"
# ```
def pre_match
@string.byte_slice(0, byte_begin(0))
end
# Returns the part of the original string after the match. If the match ends
# at the end of the string, returns the empty string.
#
# ```
# "Crystal".match(/yst/).not_nil!.post_match # => "al"
# ```
def post_match
@string.byte_slice(byte_end(0))
end
def inspect(io : IO)
to_s(io)
end
def to_s(io : IO)
name_table = @regex.name_table
io << "#<Regex::MatchData "
self[0].inspect(io)
if size > 0
io << " "
size.times do |i|
io << " " if i > 0
io << name_table.fetch(i + 1) { i + 1 }
io << ":"
self[i + 1]?.inspect(io)
end
end
io << ">"
end
def dup
self
end
def clone
self
end
def ==(other : Regex::MatchData)
return false unless size == other.size
return false unless regex == other.regex
return false unless string == other.string
return @ovector.memcmp(other.@ovector, (size + 1) * 2) == 0
end
private def check_index_out_of_bounds(index)
raise_invalid_group_index(index) unless valid_group?(index)
end
private def valid_group?(index)
index <= @size
end
private def raise_invalid_group_index(index)
raise IndexError.new("Invalid capture group index: #{index}")
end
end
end