Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement #match! for Regex #13285

Merged
merged 18 commits into from
Jun 6, 2023
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions spec/std/regex_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,33 @@ describe "Regex" do
end
end

describe "#match!" do
it "returns match data" do
md = /(?<bar>.)(?<foo>.)/.match!("Crystal")
md[0].should eq "Cr"
md.captures.should eq [] of String
md.named_captures.should eq({"bar" => "C", "foo" => "r"})
end

it "assigns captures" do
md = /foo/.match!("foo")
$~.should eq md
end

it "raises on non-match" do
expect_raises(Regex::Error, "Match not found") { /Crystal/.match!("foo") }
expect_raises(NilAssertionError) { $~ }
end

context "with options" do
it "Regex::Match options" do
expect_raises(Regex::Error, "Match not found") do
/foo/.match!(".foo", options: Regex::MatchOptions::ANCHORED)
end
end
end
end

describe "#match_at_byte_index" do
it "assigns captures" do
matchdata = /foo/.match_at_byte_index("..foo", 1)
Expand Down
15 changes: 15 additions & 0 deletions src/regex.cr
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,21 @@ class Regex
match(str, pos, options: _options)
end

# Matches a regular expression against `String` *str*. This starts at the character
devnote-dev marked this conversation as resolved.
Show resolved Hide resolved
# index *pos* if given, otherwise at the start of *str*. Returns a `Regex::MatchData`
# if *str* matched, otherwise raises an exception. `$~` will contain the same value
devnote-dev marked this conversation as resolved.
Show resolved Hide resolved
# if matched.
#
# ```
# /(.)(.)(.)/.match!("abc")[2] # => "b"
# /(.)(.)/.match!("abc", 1)[2] # => "c"
# /(.)(タ)/.match!("クリスタル", 3)[2] # raises Exception
# ```
def match!(str : String, pos : Int32 = 0, *, options : Regex::MatchOptions = :none) : MatchData
byte_index = str.char_index_to_byte_index(pos) || raise Error.new "Match not found"
$~ = match_at_byte_index(str, byte_index, options) || raise Error.new "Match not found"
end

# Match at byte index. Matches a regular expression against `String`
# *str*. Starts at the byte index given by *pos* if given, otherwise at
# the start of *str*. Returns a `Regex::MatchData` if *str* matched, otherwise
Expand Down
98 changes: 49 additions & 49 deletions src/regex/match_data.cr
Original file line number Diff line number Diff line change
Expand Up @@ -21,32 +21,32 @@ class Regex
# Returns the original regular expression.
#
# ```
# "Crystal".match(/[p-s]/).not_nil!.regex # => /[p-s]/
# "Crystal".match!(/[p-s]/).regex # => /[p-s]/
# ```
getter regex : Regex

# Returns the number of capture groups, including named capture groups.
#
# ```
# "Crystal".match(/[p-s]/).not_nil!.group_size # => 0
# "Crystal".match(/r(ys)/).not_nil!.group_size # => 1
# "Crystal".match(/r(ys)(?<ok>ta)/).not_nil!.group_size # => 2
# "Crystal".match!(/[p-s]/).group_size # => 0
# "Crystal".match!(/r(ys)/).group_size # => 1
# "Crystal".match!(/r(ys)(?<ok>ta)/).group_size # => 2
# ```
getter group_size : Int32

# Returns the original string.
#
# ```
# "Crystal".match(/[p-s]/).not_nil!.string # => "Crystal"
# "Crystal".match!(/[p-s]/).string # => "Crystal"
# ```
getter string : String

# Returns the number of elements in this match object.
#
# ```
# "Crystal".match(/[p-s]/).not_nil!.size # => 1
# "Crystal".match(/r(ys)/).not_nil!.size # => 2
# "Crystal".match(/r(ys)(?<ok>ta)/).not_nil!.size # => 3
# "Crystal".match!(/[p-s]/).size # => 1
# "Crystal".match!(/r(ys)/).size # => 2
# "Crystal".match!(/r(ys)(?<ok>ta)/).size # => 3
# ```
def size : Int32
group_size + 1
Expand All @@ -61,11 +61,11 @@ class Regex
# subpattern is unused.
#
# ```
# "Crystal".match(/r/).not_nil!.begin(0) # => 1
# "Crystal".match(/r(ys)/).not_nil!.begin(1) # => 2
# "クリスタル".match(/リ(ス)/).not_nil!.begin(0) # => 1
# "Crystal".match(/r/).not_nil!.begin(1) # IndexError: Invalid capture group index: 1
# "Crystal".match(/r(x)?/).not_nil!.begin(1) # IndexError: Capture group 1 was not matched
# "Crystal".match!(/r/).begin(0) # => 1
# "Crystal".match!(/r(ys)/).begin(1) # => 2
# "クリスタル".match!(/リ(ス)/).begin(0) # => 1
# "Crystal".match!(/r/).begin(1) # IndexError: Invalid capture group index: 1
# "Crystal".match!(/r(x)?/).begin(1) # IndexError: Capture group 1 was not matched
# ```
def begin(n = 0) : Int32
@string.byte_index_to_char_index(byte_begin(n)).not_nil!
Expand All @@ -80,11 +80,11 @@ class Regex
# subpattern is unused.
#
# ```
# "Crystal".match(/r/).not_nil!.end(0) # => 2
# "Crystal".match(/r(ys)/).not_nil!.end(1) # => 4
# "クリスタル".match(/リ(ス)/).not_nil!.end(0) # => 3
# "Crystal".match(/r/).not_nil!.end(1) # IndexError: Invalid capture group index: 1
# "Crystal".match(/r(x)?/).not_nil!.end(1) # IndexError: Capture group 1 was not matched
# "Crystal".match!(/r/).end(0) # => 2
# "Crystal".match!(/r(ys)/).end(1) # => 4
# "クリスタル".match!(/リ(ス)/).end(0) # => 3
# "Crystal".match!(/r/).end(1) # IndexError: Invalid capture group index: 1
# "Crystal".match!(/r(x)?/).end(1) # IndexError: Capture group 1 was not matched
# ```
def end(n = 0) : Int32
@string.byte_index_to_char_index(byte_end(n)).not_nil!
Expand All @@ -99,11 +99,11 @@ class Regex
# subpattern is unused.
#
# ```
# "Crystal".match(/r/).not_nil!.byte_begin(0) # => 1
# "Crystal".match(/r(ys)/).not_nil!.byte_begin(1) # => 2
# "クリスタル".match(/リ(ス)/).not_nil!.byte_begin(0) # => 3
# "Crystal".match(/r/).not_nil!.byte_begin(1) # IndexError: Invalid capture group index: 1
# "Crystal".match(/r(x)?/).not_nil!.byte_begin(1) # IndexError: Capture group 1 was not matched
# "Crystal".match!(/r/).byte_begin(0) # => 1
# "Crystal".match!(/r(ys)/).byte_begin(1) # => 2
# "クリスタル".match!(/リ(ス)/).byte_begin(0) # => 3
# "Crystal".match!(/r/).byte_begin(1) # IndexError: Invalid capture group index: 1
# "Crystal".match!(/r(x)?/).byte_begin(1) # IndexError: Capture group 1 was not matched
# ```
def byte_begin(n = 0) : Int32
check_index_out_of_bounds n
Expand All @@ -119,11 +119,11 @@ class Regex
# subpattern is unused.
#
# ```
# "Crystal".match(/r/).not_nil!.byte_end(0) # => 2
# "Crystal".match(/r(ys)/).not_nil!.byte_end(1) # => 4
# "クリスタル".match(/リ(ス)/).not_nil!.byte_end(0) # => 9
# "Crystal".match(/r/).not_nil!.byte_end(1) # IndexError: Invalid capture group index: 1
# "Crystal".match(/r(x)?/).not_nil!.byte_end(1) # IndexError: Capture group 1 was not matched
# "Crystal".match!(/r/).byte_end(0) # => 2
# "Crystal".match!(/r(ys)/).byte_end(1) # => 4
# "クリスタル".match!(/リ(ス)/).byte_end(0) # => 9
# "Crystal".match!(/r/).byte_end(1) # IndexError: Invalid capture group index: 1
# "Crystal".match!(/r(x)?/).byte_end(1) # IndexError: Capture group 1 was not matched
# ```
def byte_end(n = 0) : Int32
check_index_out_of_bounds n
Expand All @@ -136,9 +136,9 @@ class Regex
# When *n* is `0`, returns the match for the entire `Regex`.
#
# ```
# "Crystal".match(/r(ys)/).not_nil![0]? # => "rys"
# "Crystal".match(/r(ys)/).not_nil![1]? # => "ys"
# "Crystal".match(/r(ys)/).not_nil![2]? # => nil
# "Crystal".match!(/r(ys)/)[0]? # => "rys"
# "Crystal".match!(/r(ys)/)[1]? # => "ys"
# "Crystal".match!(/r(ys)/)[2]? # => nil
# ```
def []?(n : Int) : String?
return unless valid_group?(n)
Expand All @@ -151,8 +151,8 @@ class Regex
# if there is no *n*th capture group.
#
# ```
# "Crystal".match(/r(ys)/).not_nil![1] # => "ys"
# "Crystal".match(/r(ys)/).not_nil![2] # raises IndexError
# "Crystal".match!(/r(ys)/)[1] # => "ys"
# "Crystal".match!(/r(ys)/)[2] # raises IndexError
# ```
def [](n : Int) : String
check_index_out_of_bounds n
Expand All @@ -165,15 +165,15 @@ class Regex
# `nil` if there is no such named capture group.
#
# ```
# "Crystal".match(/r(?<ok>ys)/).not_nil!["ok"]? # => "ys"
# "Crystal".match(/r(?<ok>ys)/).not_nil!["ng"]? # => nil
# "Crystal".match!(/r(?<ok>ys)/)["ok"]? # => "ys"
# "Crystal".match!(/r(?<ok>ys)/)["ng"]? # => nil
# ```
#
# When there are capture groups having same name, it returns the last
# matched capture group.
#
# ```
# "Crystal".match(/(?<ok>Cr).*(?<ok>al)/).not_nil!["ok"]? # => "al"
# "Crystal".match!(/(?<ok>Cr).*(?<ok>al)/)["ok"]? # => "al"
# ```
def []?(group_name : String) : String?
fetch_impl(group_name) { nil }
Expand All @@ -183,15 +183,15 @@ class Regex
# raises an `KeyError` if there is no such named capture group.
#
# ```
# "Crystal".match(/r(?<ok>ys)/).not_nil!["ok"] # => "ys"
# "Crystal".match(/r(?<ok>ys)/).not_nil!["ng"] # raises KeyError
# "Crystal".match!(/r(?<ok>ys)/)["ok"] # => "ys"
# "Crystal".match!(/r(?<ok>ys)/)["ng"] # raises KeyError
# ```
#
# When there are capture groups having same name, it returns the last
# matched capture group.
#
# ```
# "Crystal".match(/(?<ok>Cr).*(?<ok>al)/).not_nil!["ok"] # => "al"
# "Crystal".match!(/(?<ok>Cr).*(?<ok>al)/)["ok"] # => "al"
# ```
def [](group_name : String) : String
fetch_impl(group_name) { |exists|
Expand Down Expand Up @@ -230,7 +230,7 @@ class Regex
# starts at the start of the string, returns the empty string.
#
# ```
# "Crystal".match(/yst/).not_nil!.pre_match # => "Cr"
# "Crystal".match!(/yst/).pre_match # => "Cr"
# ```
def pre_match : String
@string.byte_slice(0, byte_begin(0))
Expand All @@ -240,7 +240,7 @@ class Regex
# at the end of the string, returns the empty string.
#
# ```
# "Crystal".match(/yst/).not_nil!.post_match # => "al"
# "Crystal".match!(/yst/).post_match # => "al"
# ```
def post_match : String
@string.byte_slice(byte_end(0))
Expand All @@ -251,12 +251,12 @@ class Regex
# It is a difference from `to_a` that the result array does not contain the match for the entire `Regex` (`self[0]`).
#
# ```
# match = "Crystal".match(/(Cr)(?<name1>y)(st)(?<name2>al)/).not_nil!
# match = "Crystal".match!(/(Cr)(?<name1>y)(st)(?<name2>al)/)
# match.captures # => ["Cr", "st"]
#
# # When this regex has an optional group, result array may contain
# # a `nil` if this group is not matched.
# match = "Crystal".match(/(Cr)(stal)?/).not_nil!
# match = "Crystal".match!(/(Cr)(stal)?/)
# match.captures # => ["Cr", nil]
# ```
def captures : Array(String?)
Expand All @@ -273,12 +273,12 @@ class Regex
# Returns a hash of named capture groups.
#
# ```
# match = "Crystal".match(/(Cr)(?<name1>y)(st)(?<name2>al)/).not_nil!
# match = "Crystal".match!(/(Cr)(?<name1>y)(st)(?<name2>al)/)
# match.named_captures # => {"name1" => "y", "name2" => "al"}
#
# # When this regex has an optional group, result hash may contain
# # a `nil` if this group is not matched.
# match = "Crystal".match(/(?<name1>Cr)(?<name2>stal)?/).not_nil!
# match = "Crystal".match!(/(?<name1>Cr)(?<name2>stal)?/)
# match.named_captures # => {"name1" => "Cr", "name2" => nil}
# ```
def named_captures : Hash(String, String?)
Expand All @@ -297,12 +297,12 @@ class Regex
# Convert this match data into an array.
#
# ```
# match = "Crystal".match(/(Cr)(?<name1>y)(st)(?<name2>al)/).not_nil!
# match = "Crystal".match!(/(Cr)(?<name1>y)(st)(?<name2>al)/)
# match.to_a # => ["Crystal", "Cr", "y", "st", "al"]
#
# # When this regex has an optional group, result array may contain
# # a `nil` if this group is not matched.
# match = "Crystal".match(/(Cr)(?<name1>stal)?/).not_nil!
# match = "Crystal".match!(/(Cr)(?<name1>stal)?/)
# match.to_a # => ["Cr", "Cr", nil]
# ```
def to_a : Array(String?)
Expand All @@ -312,12 +312,12 @@ class Regex
# Convert this match data into a hash.
#
# ```
# match = "Crystal".match(/(Cr)(?<name1>y)(st)(?<name2>al)/).not_nil!
# match = "Crystal".match!(/(Cr)(?<name1>y)(st)(?<name2>al)/)
# match.to_h # => {0 => "Crystal", 1 => "Cr", "name1" => "y", 3 => "st", "name2" => "al"}
#
# # When this regex has an optional group, result array may contain
# # a `nil` if this group is not matched.
# match = "Crystal".match(/(Cr)(?<name1>stal)?/).not_nil!
# match = "Crystal".match!(/(Cr)(?<name1>stal)?/)
# match.to_h # => {0 => "Cr", 1 => "Cr", "name1" => nil}
# ```
def to_h : Hash(Int32 | String, String?)
Expand Down
19 changes: 14 additions & 5 deletions src/string.cr
Original file line number Diff line number Diff line change
Expand Up @@ -4547,8 +4547,7 @@ class String
end
end

# Finds match of *regex*, starting at *pos*.
# It also updates `$~` with the result.
# Finds matches of *regex* starting at *pos* and updates `$~` to the result.
#
# ```
# "foo".match(/foo/) # => Regex::MatchData("foo")
Expand All @@ -4558,9 +4557,19 @@ class String
# $~ # raises Exception
# ```
def match(regex : Regex, pos = 0) : Regex::MatchData?
match = regex.match self, pos
$~ = match
match
$~ = regex.match self, pos
end

# Finds matches of *regex* starting at *pos* and updates `$~` to the result.
# This will raise an exception if there are no matches.
devnote-dev marked this conversation as resolved.
Show resolved Hide resolved
#
# ```
# "foo".match!(/foo/) # => Regex::MatchData("foo")
# $~ # => Regex::MatchData("foo")
#
# "foo".match!(/bar/) # => raises Exception
def match!(regex : Regex, pos = 0) : Regex::MatchData
$~ = regex.match! self, pos
end

# Finds match of *regex* like `#match`, but it returns `Bool` value.
Expand Down