Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add remove_empty option to many String#split overloads #5119

Merged
merged 3 commits into from
Oct 14, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions spec/std/string_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -820,19 +820,26 @@ describe "String" do
end

describe "split" do
describe "by whitespace" do
it { " foo bar\n\t baz ".split.should eq(["foo", "bar", "baz"]) }
it { " foo bar\n\t baz ".split(1).should eq([" foo bar\n\t baz "]) }
it { " foo bar\n\t baz ".split(2).should eq(["foo", "bar\n\t baz "]) }
it { "日本語 \n\t 日本 \n\n 語".split.should eq(["日本語", "日本", "語"]) }
end

describe "by char" do
it { "".split(',').should eq([""]) }
it { "".split(',', remove_empty: true).should eq([] of String) }
it { "foo,bar,,baz,".split(',').should eq(["foo", "bar", "", "baz", ""]) }
it { "foo,bar,,baz,".split(',', remove_empty: true).should eq(["foo", "bar", "baz"]) }
it { "foo,bar,,baz".split(',').should eq(["foo", "bar", "", "baz"]) }
it { "foo,bar,,baz".split(',', remove_empty: true).should eq(["foo", "bar", "baz"]) }
it { "foo".split(',').should eq(["foo"]) }
it { "foo".split(' ').should eq(["foo"]) }
it { " foo".split(' ').should eq(["", "", "", "foo"]) }
it { "foo ".split(' ').should eq(["foo", "", "", ""]) }
it { " foo bar".split(' ').should eq(["", "", "", "foo", "", "bar"]) }
it { " foo bar\n\t baz ".split(' ').should eq(["", "", "", "foo", "", "", "bar\n\t", "", "baz", "", "", ""]) }
it { " foo bar\n\t baz ".split.should eq(["foo", "bar", "baz"]) }
it { " foo bar\n\t baz ".split(1).should eq([" foo bar\n\t baz "]) }
it { " foo bar\n\t baz ".split(2).should eq(["foo", "bar\n\t baz "]) }
it { " foo bar\n\t baz ".split(" ").should eq(["", "", "", "foo", "", "", "bar\n\t", "", "baz", "", "", ""]) }
it { "foo,bar,baz,qux".split(',', 1).should eq(["foo,bar,baz,qux"]) }
it { "foo,bar,baz,qux".split(',', 3).should eq(["foo", "bar", "baz,qux"]) }
Expand All @@ -841,18 +848,20 @@ describe "String" do
it { "foo bar baz qux".split(' ', 3).should eq(["foo", "bar", "baz qux"]) }
it { "foo bar baz qux".split(' ', 30).should eq(["foo", "bar", "baz", "qux"]) }
it { "a,b,".split(',', 3).should eq(["a", "b", ""]) }
it { "日本語 \n\t 日本 \n\n 語".split.should eq(["日本語", "日本", "語"]) }
it { "日本ん語日本ん語".split('ん').should eq(["日本", "語日本", "語"]) }
it { "=".split('=').should eq(["", ""]) }
it { "a=".split('=').should eq(["a", ""]) }
it { "=b".split('=').should eq(["", "b"]) }
it { "=".split('=', 2).should eq(["", ""]) }
it { "=".split('=', 2, remove_empty: true).should eq([] of String) }
end

describe "by string" do
it { "".split(",").should eq([""]) }
it { "".split(":-").should eq([""]) }
it { "".split(":-", remove_empty: true).should eq([] of String) }
it { "foo:-bar:-:-baz:-".split(":-").should eq(["foo", "bar", "", "baz", ""]) }
it { "foo:-bar:-:-baz:-".split(":-", remove_empty: true).should eq(["foo", "bar", "baz"]) }
it { "foo:-bar:-:-baz".split(":-").should eq(["foo", "bar", "", "baz"]) }
it { "foo".split(":-").should eq(["foo"]) }
it { "foo".split("").should eq(["f", "o", "o"]) }
Expand All @@ -865,11 +874,14 @@ describe "String" do
it { "a=".split("=").should eq(["a", ""]) }
it { "=b".split("=").should eq(["", "b"]) }
it { "=".split("=", 2).should eq(["", ""]) }
it { "=".split("=", 2, remove_empty: true).should eq([] of String) }
end

describe "by regex" do
it { "".split(/\n\t/).should eq([""] of String) }
it { "".split(/\n\t/).should eq([""]) }
it { "".split(/\n\t/, remove_empty: true).should eq([] of String) }
it { "foo\n\tbar\n\t\n\tbaz".split(/\n\t/).should eq(["foo", "bar", "", "baz"]) }
it { "foo\n\tbar\n\t\n\tbaz".split(/\n\t/, remove_empty: true).should eq(["foo", "bar", "baz"]) }
it { "foo\n\tbar\n\t\n\tbaz".split(/(?:\n\t)+/).should eq(["foo", "bar", "baz"]) }
it { "foo,bar".split(/,/, 1).should eq(["foo,bar"]) }
it { "foo,bar,".split(/,/).should eq(["foo", "bar", ""]) }
Expand All @@ -888,6 +900,7 @@ describe "String" do
it { "a=".split(/\=/).should eq(["a", ""]) }
it { "=b".split(/\=/).should eq(["", "b"]) }
it { "=".split(/\=/, 2).should eq(["", ""]) }
it { "=".split(/\=/, 2, remove_empty: true).should eq([] of String) }
it { ",".split(/(?:(x)|(,))/).should eq(["", ",", ""]) }

it "keeps groups" do
Expand Down
100 changes: 71 additions & 29 deletions src/string.cr
Original file line number Diff line number Diff line change
Expand Up @@ -2951,11 +2951,11 @@ class String
!!index(search)
end

# Makes an array by splitting the string on any ASCII whitespace characters
# (and removing that whitespace).
# Makes an array by splitting the string on any amount of ASCII whitespace
# characters (and removing that whitespace).
#
# If *limit* is present, up to *limit* new strings will be created,
# with the entire remainder added to the last string.
# If *limit* is present, up to *limit* new strings will be created, with the
# entire remainder added to the last string.
#
# ```
# old_pond = "
Expand All @@ -2974,10 +2974,11 @@ class String
ary
end

# Splits the string after any ASCII whitespace character and yields each part to a block.
# Splits the string after any amount of ASCII whitespace characters and yields
# each non-whitespace part to a block.
#
# If *limit* is present, up to *limit* new strings will be created,
# with the entire remainder added to the last string.
# If *limit* is present, up to *limit* new strings will be created, with the
# entire remainder added to the last string.
#
# ```
# ary = [] of String
Expand Down Expand Up @@ -3052,13 +3053,16 @@ class String
# If *limit* is present, up to *limit* new strings will be created,
# with the entire remainder added to the last string.
#
# If *remove_empty* is `true`, any empty strings are removed from the result.
#
# ```
# "foo,bar,baz".split(',') # => ["foo", "bar", "baz"]
# "foo,bar,baz".split(',', 2) # => ["foo", "bar,baz"]
# "foo,,bar,baz".split(',') # => ["foo", "", "bar", "baz"]
# "foo,,bar,baz".split(',', remove_empty: true) # => ["foo", "bar", "baz"]
# "foo,bar,baz".split(',', 2) # => ["foo", "bar,baz"]
# ```
def split(separator : Char, limit = nil)
def split(separator : Char, limit = nil, *, remove_empty = false)
ary = Array(String).new
split(separator, limit) do |string|
split(separator, limit, remove_empty: remove_empty) do |string|
ary << string
end
ary
Expand All @@ -3069,18 +3073,29 @@ class String
# If *limit* is present, up to *limit* new strings will be created,
# with the entire remainder added to the last string.
#
# If *remove_empty* is `true`, any empty strings are not yielded.
#
# ```
# ary = [] of String
#
# "foo,bar,baz".split(',') { |string| ary << string }
# "foo,,bar,baz".split(',') { |string| ary << string }
# ary # => ["foo", "", "bar", "baz"]
# ary.clear
#
# "foo,,bar,baz".split(',', remove_empty: true) { |string| ary << string }
# ary # => ["foo", "bar", "baz"]
# ary.clear
#
# "foo,bar,baz".split(',', 2) { |string| ary << string }
# ary # => ["foo", "bar,baz"]
# ```
def split(separator : Char, limit = nil, &block : String -> _)
if empty? || limit && limit <= 1
def split(separator : Char, limit = nil, *, remove_empty = false, &block : String -> _)
if empty?
yield "" unless remove_empty
return
end

if limit && limit <= 1
yield self
return
end
Expand All @@ -3092,14 +3107,15 @@ class String
reader.each do |char|
if char == separator
piece_bytesize = reader.pos - byte_offset
yield String.new(to_unsafe + byte_offset, piece_bytesize)
yield String.new(to_unsafe + byte_offset, piece_bytesize) unless remove_empty && piece_bytesize == 0
yielded += 1
byte_offset = reader.pos + reader.current_char_width
break if limit && yielded + 1 == limit
end
end

piece_bytesize = bytesize - byte_offset
return if remove_empty && piece_bytesize == 0
yield String.new(to_unsafe + byte_offset, piece_bytesize)
end

Expand All @@ -3110,15 +3126,18 @@ class String
#
# If *separator* is an empty string (`""`), the string will be separated into one-character strings.
#
# If *remove_empty* is `true`, any empty strings are removed from the result.
#
# ```
# long_river_name = "Mississippi"
# long_river_name.split("ss") # => ["Mi", "i", "ippi"]
# long_river_name.split("i") # => ["M", "ss", "ss", "pp", ""]
# long_river_name.split("") # => ["M", "i", "s", "s", "i", "s", "s", "i", "p", "p", "i"]
# long_river_name.split("ss") # => ["Mi", "i", "ippi"]
# long_river_name.split("i") # => ["M", "ss", "ss", "pp", ""]
# long_river_name.split("i", remove_empty: true) # => ["M", "ss", "ss", "pp"]
# long_river_name.split("") # => ["M", "i", "s", "s", "i", "s", "s", "i", "p", "p", "i"]
# ```
def split(separator : String, limit = nil)
def split(separator : String, limit = nil, *, remove_empty = false)
ary = Array(String).new
split(separator, limit) do |string|
split(separator, limit, remove_empty: remove_empty) do |string|
ary << string
end
ary
Expand All @@ -3131,6 +3150,8 @@ class String
#
# If *separator* is an empty string (`""`), the string will be separated into one-character strings.
#
# If *remove_empty* is `true`, any empty strings are removed from the result.
#
# ```
# ary = [] of String
# long_river_name = "Mississippi"
Expand All @@ -3143,11 +3164,20 @@ class String
# ary # => ["M", "ss", "ss", "pp", ""]
# ary.clear
#
# long_river_name.split("i", remove_empty: true) { |s| ary << s }
# ary # => ["M", "ss", "ss", "pp"]
# ary.clear
#
# long_river_name.split("") { |s| ary << s }
# ary # => ["M", "i", "s", "s", "i", "s", "s", "i", "p", "p", "i"]
# ```
def split(separator : String, limit = nil, &block : String -> _)
if empty? || (limit && limit <= 1)
def split(separator : String, limit = nil, *, remove_empty = false, &block : String -> _)
if empty?
yield "" unless remove_empty
return
end

if limit && limit <= 1
yield self
return
end
Expand All @@ -3171,7 +3201,9 @@ class String
if (to_unsafe + i).memcmp(separator.to_unsafe, separator_bytesize) == 0
piece_bytesize = i - byte_offset
piece_size = single_byte_optimizable ? piece_bytesize : 0
yield String.new(to_unsafe + byte_offset, piece_bytesize, piece_size)
unless remove_empty && piece_bytesize == 0
yield String.new(to_unsafe + byte_offset, piece_bytesize, piece_size)
end
yielded += 1
byte_offset = i + separator_bytesize
i += separator_bytesize - 1
Expand All @@ -3181,6 +3213,7 @@ class String
end

piece_bytesize = bytesize - byte_offset
return if remove_empty && piece_bytesize == 0
piece_size = single_byte_optimizable ? piece_bytesize : 0
yield String.new(to_unsafe + byte_offset, piece_bytesize, piece_size)
end
Expand All @@ -3192,6 +3225,8 @@ class String
#
# If *separator* is an empty regex (`//`), the string will be separated into one-character strings.
#
# If *remove_empty* is `true`, any empty strings are removed from the result.
#
# ```
# ary = [] of String
# long_river_name = "Mississippi"
Expand All @@ -3203,9 +3238,9 @@ class String
# long_river_name.split(//) { |s| ary << s }
# ary # => ["M", "i", "s", "s", "i", "s", "s", "i", "p", "p", "i"]
# ```
def split(separator : Regex, limit = nil)
def split(separator : Regex, limit = nil, *, remove_empty = false)
ary = Array(String).new
split(separator, limit) do |string|
split(separator, limit, remove_empty: remove_empty) do |string|
ary << string
end
ary
Expand All @@ -3218,13 +3253,20 @@ class String
#
# If *separator* is an empty regex (`//`), the string will be separated into one-character strings.
#
# If *remove_empty* is `true`, any empty strings are removed from the result.
#
# ```
# long_river_name = "Mississippi"
# long_river_name.split(/s+/) # => ["Mi", "i", "ippi"]
# long_river_name.split(//) # => ["M", "i", "s", "s", "i", "s", "s", "i", "p", "p", "i"]
# ```
def split(separator : Regex, limit = nil, &block : String -> _)
if empty? || (limit && limit <= 1)
def split(separator : Regex, limit = nil, *, remove_empty = false, &block : String -> _)
if empty?
yield "" unless remove_empty
return
end

if limit && limit <= 1
yield self
return
end
Expand All @@ -3249,7 +3291,7 @@ class String
else
slice_size = index - slice_offset

yield byte_slice(slice_offset, slice_size)
yield byte_slice(slice_offset, slice_size) unless remove_empty && slice_size == 0
count += 1

1.upto(match.size) do |i|
Expand All @@ -3265,7 +3307,7 @@ class String
break if match_offset >= bytesize
end

yield byte_slice(slice_offset)
yield byte_slice(slice_offset) unless remove_empty && slice_offset == bytesize
end

private def split_by_empty_separator(limit, &block : String -> _)
Expand Down