Skip to content

Commit

Permalink
Change strings.Split, bytes.Split to take a maximum substring count a…
Browse files Browse the repository at this point in the history
…rgument.

R=rsc
APPROVED=r
DELTA=131  (39 added, 10 deleted, 82 changed)
OCL=30669
CL=30723
  • Loading branch information
dsymonds committed Jun 25, 2009
1 parent 466dd8d commit 30533d6
Show file tree
Hide file tree
Showing 9 changed files with 116 additions and 84 deletions.
43 changes: 27 additions & 16 deletions src/pkg/bytes/bytes.go
Expand Up @@ -55,19 +55,27 @@ func Copy(dst, src []byte) int {
return len(src)
}

// Explode splits s into an array of UTF-8 sequences, one per Unicode character (still arrays of bytes).
// Invalid UTF-8 sequences become correct encodings of U+FFF8.
func Explode(s []byte) [][]byte {
a := make([][]byte, utf8.RuneCount(s));
// explode splits s into an array of UTF-8 sequences, one per Unicode character (still arrays of bytes),
// up to a maximum of n byte arrays. Invalid UTF-8 sequences are chopped into individual bytes.
func explode(s []byte, n int) [][]byte {
if n <= 0 {
n = len(s);
}
a := make([][]byte, n);
var size, rune int;
i := 0;
na := 0;
for len(s) > 0 {
if na+1 >= n {
a[na] = s;
na++;
break
}
rune, size = utf8.DecodeRune(s);
a[i] = s[0:size];
a[na] = s[0:size];
s = s[size:len(s)];
i++;
na++;
}
return a
return a[0:na]
}

// Count counts the number of non-overlapping instances of sep in s.
Expand Down Expand Up @@ -101,27 +109,30 @@ func Index(s, sep []byte) int {
return -1
}

// Split returns the array representing the subarrays of s separated by sep. Adjacent
// occurrences of sep produce empty subarrays. If sep is empty, it is the same as Explode.
func Split(s, sep []byte) [][]byte {
// Split splits the array s around each instance of sep, returning an array of subarrays of s.
// If sep is empty, Split splits s after each UTF-8 sequence.
// If n > 0, split Splits s into at most n subarrays; the last subarray will contain an unsplit remainder.
func Split(s, sep []byte, n int) [][]byte {
if len(sep) == 0 {
return Explode(s)
return explode(s, n)
}
if n <= 0 {
n = Count(s, sep) + 1;
}
c := sep[0];
start := 0;
n := Count(s, sep)+1;
a := make([][]byte, n);
na := 0;
for i := 0; i+len(sep) <= len(s); i++ {
for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ {
if s[i] == c && (len(sep) == 1 || Equal(s[i:i+len(sep)], sep)) {
a[na] = s[start:i];
na++;
start = i+len(sep);
i += len(sep)-1
i += len(sep)-1;
}
}
a[na] = s[start:len(s)];
return a
return a[0:na+1]
}

// Join concatenates the elements of a to create a single byte array. The separator
Expand Down
46 changes: 26 additions & 20 deletions src/pkg/bytes/bytes_test.go
Expand Up @@ -75,24 +75,25 @@ func TestCompare(t *testing.T) {

type ExplodeTest struct {
s string;
n int;
a []string;
}
var explodetests = []ExplodeTest {
ExplodeTest{ abcd, []string{"a", "b", "c", "d"} },
ExplodeTest{ faces, []string{"☺", "☻", "☹" } },
ExplodeTest{ abcd, 0, []string{"a", "b", "c", "d"} },
ExplodeTest{ faces, 0, []string{"☺", "☻", "☹"} },
ExplodeTest{ abcd, 2, []string{"a", "bcd"} },
}
func TestExplode(t *testing.T) {
for i := 0; i < len(explodetests); i++ {
tt := explodetests[i];
a := Explode(io.StringBytes(tt.s));
for _, tt := range(explodetests) {
a := explode(io.StringBytes(tt.s), tt.n);
result := arrayOfString(a);
if !eq(result, tt.a) {
t.Errorf(`Explode("%s") = %v; want %v`, tt.s, result, tt.a);
t.Errorf(`Explode("%s", %d) = %v; want %v`, tt.s, tt.n, result, tt.a);
continue;
}
s := Join(a, []byte{});
if string(s) != tt.s {
t.Errorf(`Join(Explode("%s"), "") = "%s"`, tt.s, s);
t.Errorf(`Join(Explode("%s", %d), "") = "%s"`, tt.s, tt.n, s);
}
}
}
Expand All @@ -101,30 +102,35 @@ func TestExplode(t *testing.T) {
type SplitTest struct {
s string;
sep string;
n int;
a []string;
}
var splittests = []SplitTest {
SplitTest{ abcd, "a", []string{"", "bcd"} },
SplitTest{ abcd, "z", []string{"abcd"} },
SplitTest{ abcd, "", []string{"a", "b", "c", "d"} },
SplitTest{ commas, ",", []string{"1", "2", "3", "4"} },
SplitTest{ dots, "...", []string{"1", ".2", ".3", ".4"} },
SplitTest{ faces, "☹", []string{"☺☻", ""} },
SplitTest{ faces, "~", []string{faces} },
SplitTest{ faces, "", []string{"☺", "☻", "☹"} },
SplitTest{ abcd, "a", 0, []string{"", "bcd"} },
SplitTest{ abcd, "z", 0, []string{"abcd"} },
SplitTest{ abcd, "", 0, []string{"a", "b", "c", "d"} },
SplitTest{ commas, ",", 0, []string{"1", "2", "3", "4"} },
SplitTest{ dots, "...", 0, []string{"1", ".2", ".3", ".4"} },
SplitTest{ faces, "☹", 0, []string{"☺☻", ""} },
SplitTest{ faces, "~", 0, []string{faces} },
SplitTest{ faces, "", 0, []string{"☺", "☻", "☹"} },
SplitTest{ "1 2 3 4", " ", 3, []string{"1", "2", "3 4"} },
SplitTest{ "1 2 3", " ", 3, []string{"1", "2", "3"} },
SplitTest{ "1 2", " ", 3, []string{"1", "2"} },
SplitTest{ "123", "", 2, []string{"1", "23"} },
SplitTest{ "123", "", 17, []string{"1", "2", "3"} },
}
func TestSplit(t *testing.T) {
for i := 0; i < len(splittests); i++ {
tt := splittests[i];
a := Split(io.StringBytes(tt.s), io.StringBytes(tt.sep));
for _, tt := range splittests {
a := Split(io.StringBytes(tt.s), io.StringBytes(tt.sep), tt.n);
result := arrayOfString(a);
if !eq(result, tt.a) {
t.Errorf(`Split("%s", "%s") = %v; want %v`, tt.s, tt.sep, result, tt.a);
t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, result, tt.a);
continue;
}
s := Join(a, io.StringBytes(tt.sep));
if string(s) != tt.s {
t.Errorf(`Join(Split("%s", "%s"), "%s") = "%s"`, tt.s, tt.sep, tt.sep, s);
t.Errorf(`Join(Split(%q, %q, %d), %q) = %q`, tt.s, tt.sep, tt.n, tt.sep, s);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/pkg/exec/exec.go
Expand Up @@ -214,7 +214,7 @@ func LookPath(file string) (string, os.Error) {
// (equivalent to PATH=".").
pathenv = "";
}
for i, dir := range strings.Split(pathenv, ":") {
for i, dir := range strings.Split(pathenv, ":", 0) {
if dir == "" {
// Unix shell semantics: path element "" means "."
dir = ".";
Expand Down
2 changes: 1 addition & 1 deletion src/pkg/go/doc/comment.go
Expand Up @@ -45,7 +45,7 @@ func commentText(comments []string) string {
lines := make([]string, 0, 20);
for i, c := range comments {
// split on newlines
cl := strings.Split(c, "\n");
cl := strings.Split(c, "\n", 0);

// walk lines, stripping comment markers
w := 0;
Expand Down
9 changes: 4 additions & 5 deletions src/pkg/http/client.go
Expand Up @@ -108,13 +108,12 @@ func send(req *Request) (resp *Response, err os.Error) {
if err != nil {
return nil, err;
}
i := strings.Index(line, " ");
j := strings.Index(line[i+1:len(line)], " ") + i+1;
if i < 0 || j < 0 {
f := strings.Split(line, " ", 3);
if len(f) < 3 {
return nil, os.ErrorString(fmt.Sprintf("Invalid first line in HTTP response: %q", line));
}
resp.Status = line[i+1:len(line)];
resp.StatusCode, err = strconv.Atoi(line[i+1:j]);
resp.Status = f[1] + " " + f[2];
resp.StatusCode, err = strconv.Atoi(f[1]);
if err != nil {
return nil, os.ErrorString(fmt.Sprintf("Invalid status code in HTTP response: %q", line));
}
Expand Down
6 changes: 3 additions & 3 deletions src/pkg/http/request.go
Expand Up @@ -442,7 +442,7 @@ func ReadRequest(b *bufio.Reader) (req *Request, err os.Error) {
}

var f []string;
if f = strings.Split(s, " "); len(f) != 3 {
if f = strings.Split(s, " ", 3); len(f) < 3 {
return nil, BadRequest
}
req.Method, req.RawUrl, req.Proto = f[0], f[1], f[2];
Expand Down Expand Up @@ -572,8 +572,8 @@ func ReadRequest(b *bufio.Reader) (req *Request, err os.Error) {

func parseForm(body string) (data map[string] *vector.StringVector, err os.Error) {
data = make(map[string] *vector.StringVector);
for _, kv := range strings.Split(body, "&") {
kvPair := strings.Split(kv, "=");
for _, kv := range strings.Split(body, "&", 0) {
kvPair := strings.Split(kv, "=", 2);

var key, value string;
var e os.Error;
Expand Down
6 changes: 3 additions & 3 deletions src/pkg/strconv/fp_test.go
Expand Up @@ -28,7 +28,7 @@ func pow2(i int) float64 {
// Wrapper around strconv.Atof64. Handles dddddp+ddd (binary exponent)
// itself, passes the rest on to strconv.Atof64.
func myatof64(s string) (f float64, ok bool) {
a := strings.Split(s, "p");
a := strings.Split(s, "p", 2);
if len(a) == 2 {
n, err := strconv.Atoi64(a[0]);
if err != nil {
Expand Down Expand Up @@ -72,7 +72,7 @@ func myatof64(s string) (f float64, ok bool) {
// Wrapper around strconv.Atof32. Handles dddddp+ddd (binary exponent)
// itself, passes the rest on to strconv.Atof32.
func myatof32(s string) (f float32, ok bool) {
a := strings.Split(s, "p");
a := strings.Split(s, "p", 2);
if len(a) == 2 {
n, err := strconv.Atoi(a[0]);
if err != nil {
Expand Down Expand Up @@ -115,7 +115,7 @@ func TestFp(t *testing.T) {
if len(line) == 0 || line[0] == '#' {
continue
}
a := strings.Split(line, " ");
a := strings.Split(line, " ", 0);
if len(a) != 4 {
t.Error("testfp.txt:", lineno, ": wrong field count\n");
continue;
Expand Down
41 changes: 26 additions & 15 deletions src/pkg/strings/strings.go
Expand Up @@ -7,19 +7,27 @@ package strings

import "utf8"

// Explode splits s into an array of UTF-8 sequences, one per Unicode character (still strings).
// explode splits s into an array of UTF-8 sequences, one per Unicode character (still strings) up to a maximum of n (n <= 0 means no limit).
// Invalid UTF-8 sequences become correct encodings of U+FFF8.
func Explode(s string) []string {
a := make([]string, utf8.RuneCountInString(s));
func explode(s string, n int) []string {
if n <= 0 {
n = len(s);
}
a := make([]string, n);
var size, rune int;
i := 0;
na := 0;
for len(s) > 0 {
if na+1 >= n {
a[na] = s;
na++;
break
}
rune, size = utf8.DecodeRuneInString(s);
s = s[size:len(s)];
a[i] = string(rune);
i++;
a[na] = string(rune);
na++;
}
return a
return a[0:na]
}

// Count counts the number of non-overlapping instances of sep in s.
Expand Down Expand Up @@ -68,27 +76,30 @@ func LastIndex(s, sep string) int {
return -1
}

// Split returns the array representing the substrings of s separated by string sep. Adjacent
// occurrences of sep produce empty substrings. If sep is empty, it is the same as Explode.
func Split(s, sep string) []string {
// Split splits the string s around each instance of sep, returning an array of substrings of s.
// If sep is empty, Split splits s after each UTF-8 sequence.
// If n > 0, split Splits s into at most n substrings; the last subarray will contain an unsplit remainder string.
func Split(s, sep string, n int) []string {
if sep == "" {
return Explode(s)
return explode(s, n)
}
if n <= 0 {
n = Count(s, sep) + 1;
}
c := sep[0];
start := 0;
n := Count(s, sep)+1;
a := make([]string, n);
na := 0;
for i := 0; i+len(sep) <= len(s); i++ {
for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ {
if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) {
a[na] = s[start:i];
na++;
start = i+len(sep);
i += len(sep)-1
i += len(sep)-1;
}
}
a[na] = s[start:len(s)];
return a
return a[0:na+1]
}

// Join concatenates the elements of a to create a single string. The separator string
Expand Down
45 changes: 25 additions & 20 deletions src/pkg/strings/strings_test.go
Expand Up @@ -83,53 +83,58 @@ func TestLastIndex(t *testing.T) {

type ExplodeTest struct {
s string;
n int;
a []string;
}
var explodetests = []ExplodeTest {
ExplodeTest{ abcd, []string{"a", "b", "c", "d"} },
ExplodeTest{ faces, []string{"☺", "☻", "☹" } },
ExplodeTest{ abcd, 4, []string{"a", "b", "c", "d"} },
ExplodeTest{ faces, 3, []string{"☺", "☻", "☹"} },
ExplodeTest{ abcd, 2, []string{"a", "bcd"} },
}
func TestExplode(t *testing.T) {
for i := 0; i < len(explodetests); i++ {
tt := explodetests[i];
a := Explode(tt.s);
for _, tt := range explodetests {
a := explode(tt.s, tt.n);
if !eq(a, tt.a) {
t.Errorf("Explode(%q) = %v; want %v", tt.s, a, tt.a);
t.Errorf("explode(%q, %d) = %v; want %v", tt.s, tt.n, a, tt.a);
continue;
}
s := Join(a, "");
if s != tt.s {
t.Errorf(`Join(Explode(%q), "") = %q`, tt.s, s);
t.Errorf(`Join(explode(%q, %d), "") = %q`, tt.s, tt.n, s);
}
}
}

type SplitTest struct {
s string;
sep string;
n int;
a []string;
}
var splittests = []SplitTest {
SplitTest{ abcd, "a", []string{"", "bcd"} },
SplitTest{ abcd, "z", []string{"abcd"} },
SplitTest{ abcd, "", []string{"a", "b", "c", "d"} },
SplitTest{ commas, ",", []string{"1", "2", "3", "4"} },
SplitTest{ dots, "...", []string{"1", ".2", ".3", ".4"} },
SplitTest{ faces, "☹", []string{"☺☻", ""} },
SplitTest{ faces, "~", []string{faces} },
SplitTest{ faces, "", []string{"☺", "☻", "☹"} },
SplitTest{ abcd, "a", 0, []string{"", "bcd"} },
SplitTest{ abcd, "z", 0, []string{"abcd"} },
SplitTest{ abcd, "", 0, []string{"a", "b", "c", "d"} },
SplitTest{ commas, ",", 0, []string{"1", "2", "3", "4"} },
SplitTest{ dots, "...", 0, []string{"1", ".2", ".3", ".4"} },
SplitTest{ faces, "☹", 0, []string{"☺☻", ""} },
SplitTest{ faces, "~", 0, []string{faces} },
SplitTest{ faces, "", 0, []string{"☺", "☻", "☹"} },
SplitTest{ "1 2 3 4", " ", 3, []string{"1", "2", "3 4"} },
SplitTest{ "1 2", " ", 3, []string{"1", "2"} },
SplitTest{ "123", "", 2, []string{"1", "23"} },
SplitTest{ "123", "", 17, []string{"1", "2", "3"} },
}
func TestSplit(t *testing.T) {
for i := 0; i < len(splittests); i++ {
tt := splittests[i];
a := Split(tt.s, tt.sep);
for _, tt := range splittests {
a := Split(tt.s, tt.sep, tt.n);
if !eq(a, tt.a) {
t.Errorf("Split(%q, %q) = %v; want %v", tt.s, tt.sep, a, tt.a);
t.Errorf("Split(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, a, tt.a);
continue;
}
s := Join(a, tt.sep);
if s != tt.s {
t.Errorf("Join(Split(%q, %q), %q) = %q", tt.s, tt.sep, tt.sep, s);
t.Errorf("Join(Split(%q, %q, %d), %q) = %q", tt.s, tt.sep, tt.n, tt.sep, s);
}
}
}
Expand Down

0 comments on commit 30533d6

Please sign in to comment.