From 30533d607a3e10ae1a688ea88ee5976115681cba Mon Sep 17 00:00:00 2001 From: David Symonds Date: Wed, 24 Jun 2009 19:02:29 -0700 Subject: [PATCH] Change strings.Split, bytes.Split to take a maximum substring count argument. R=rsc APPROVED=r DELTA=131 (39 added, 10 deleted, 82 changed) OCL=30669 CL=30723 --- src/pkg/bytes/bytes.go | 43 ++++++++++++++++++------------ src/pkg/bytes/bytes_test.go | 46 +++++++++++++++++++-------------- src/pkg/exec/exec.go | 2 +- src/pkg/go/doc/comment.go | 2 +- src/pkg/http/client.go | 9 +++---- src/pkg/http/request.go | 6 ++--- src/pkg/strconv/fp_test.go | 6 ++--- src/pkg/strings/strings.go | 41 ++++++++++++++++++----------- src/pkg/strings/strings_test.go | 45 ++++++++++++++++++-------------- 9 files changed, 116 insertions(+), 84 deletions(-) diff --git a/src/pkg/bytes/bytes.go b/src/pkg/bytes/bytes.go index 17f82db7ce982..e5e8bffd8ccb3 100644 --- a/src/pkg/bytes/bytes.go +++ b/src/pkg/bytes/bytes.go @@ -55,19 +55,27 @@ func Copy(dst, src []byte) int { return len(src) } -// Explode splits s into an array of UTF-8 sequences, one per Unicode character (still arrays of bytes). -// Invalid UTF-8 sequences become correct encodings of U+FFF8. -func Explode(s []byte) [][]byte { - a := make([][]byte, utf8.RuneCount(s)); +// explode splits s into an array of UTF-8 sequences, one per Unicode character (still arrays of bytes), +// up to a maximum of n byte arrays. Invalid UTF-8 sequences are chopped into individual bytes. +func explode(s []byte, n int) [][]byte { + if n <= 0 { + n = len(s); + } + a := make([][]byte, n); var size, rune int; - i := 0; + na := 0; for len(s) > 0 { + if na+1 >= n { + a[na] = s; + na++; + break + } rune, size = utf8.DecodeRune(s); - a[i] = s[0:size]; + a[na] = s[0:size]; s = s[size:len(s)]; - i++; + na++; } - return a + return a[0:na] } // Count counts the number of non-overlapping instances of sep in s. @@ -101,27 +109,30 @@ func Index(s, sep []byte) int { return -1 } -// Split returns the array representing the subarrays of s separated by sep. Adjacent -// occurrences of sep produce empty subarrays. If sep is empty, it is the same as Explode. -func Split(s, sep []byte) [][]byte { +// Split splits the array s around each instance of sep, returning an array of subarrays of s. +// If sep is empty, Split splits s after each UTF-8 sequence. +// If n > 0, split Splits s into at most n subarrays; the last subarray will contain an unsplit remainder. +func Split(s, sep []byte, n int) [][]byte { if len(sep) == 0 { - return Explode(s) + return explode(s, n) + } + if n <= 0 { + n = Count(s, sep) + 1; } c := sep[0]; start := 0; - n := Count(s, sep)+1; a := make([][]byte, n); na := 0; - for i := 0; i+len(sep) <= len(s); i++ { + for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ { if s[i] == c && (len(sep) == 1 || Equal(s[i:i+len(sep)], sep)) { a[na] = s[start:i]; na++; start = i+len(sep); - i += len(sep)-1 + i += len(sep)-1; } } a[na] = s[start:len(s)]; - return a + return a[0:na+1] } // Join concatenates the elements of a to create a single byte array. The separator diff --git a/src/pkg/bytes/bytes_test.go b/src/pkg/bytes/bytes_test.go index 3fbe21c30dcc3..01adbccfd8bdd 100644 --- a/src/pkg/bytes/bytes_test.go +++ b/src/pkg/bytes/bytes_test.go @@ -75,24 +75,25 @@ func TestCompare(t *testing.T) { type ExplodeTest struct { s string; + n int; a []string; } var explodetests = []ExplodeTest { - ExplodeTest{ abcd, []string{"a", "b", "c", "d"} }, - ExplodeTest{ faces, []string{"☺", "☻", "☹" } }, + ExplodeTest{ abcd, 0, []string{"a", "b", "c", "d"} }, + ExplodeTest{ faces, 0, []string{"☺", "☻", "☹"} }, + ExplodeTest{ abcd, 2, []string{"a", "bcd"} }, } func TestExplode(t *testing.T) { - for i := 0; i < len(explodetests); i++ { - tt := explodetests[i]; - a := Explode(io.StringBytes(tt.s)); + for _, tt := range(explodetests) { + a := explode(io.StringBytes(tt.s), tt.n); result := arrayOfString(a); if !eq(result, tt.a) { - t.Errorf(`Explode("%s") = %v; want %v`, tt.s, result, tt.a); + t.Errorf(`Explode("%s", %d) = %v; want %v`, tt.s, tt.n, result, tt.a); continue; } s := Join(a, []byte{}); if string(s) != tt.s { - t.Errorf(`Join(Explode("%s"), "") = "%s"`, tt.s, s); + t.Errorf(`Join(Explode("%s", %d), "") = "%s"`, tt.s, tt.n, s); } } } @@ -101,30 +102,35 @@ func TestExplode(t *testing.T) { type SplitTest struct { s string; sep string; + n int; a []string; } var splittests = []SplitTest { - SplitTest{ abcd, "a", []string{"", "bcd"} }, - SplitTest{ abcd, "z", []string{"abcd"} }, - SplitTest{ abcd, "", []string{"a", "b", "c", "d"} }, - SplitTest{ commas, ",", []string{"1", "2", "3", "4"} }, - SplitTest{ dots, "...", []string{"1", ".2", ".3", ".4"} }, - SplitTest{ faces, "☹", []string{"☺☻", ""} }, - SplitTest{ faces, "~", []string{faces} }, - SplitTest{ faces, "", []string{"☺", "☻", "☹"} }, + SplitTest{ abcd, "a", 0, []string{"", "bcd"} }, + SplitTest{ abcd, "z", 0, []string{"abcd"} }, + SplitTest{ abcd, "", 0, []string{"a", "b", "c", "d"} }, + SplitTest{ commas, ",", 0, []string{"1", "2", "3", "4"} }, + SplitTest{ dots, "...", 0, []string{"1", ".2", ".3", ".4"} }, + SplitTest{ faces, "☹", 0, []string{"☺☻", ""} }, + SplitTest{ faces, "~", 0, []string{faces} }, + SplitTest{ faces, "", 0, []string{"☺", "☻", "☹"} }, + SplitTest{ "1 2 3 4", " ", 3, []string{"1", "2", "3 4"} }, + SplitTest{ "1 2 3", " ", 3, []string{"1", "2", "3"} }, + SplitTest{ "1 2", " ", 3, []string{"1", "2"} }, + SplitTest{ "123", "", 2, []string{"1", "23"} }, + SplitTest{ "123", "", 17, []string{"1", "2", "3"} }, } func TestSplit(t *testing.T) { - for i := 0; i < len(splittests); i++ { - tt := splittests[i]; - a := Split(io.StringBytes(tt.s), io.StringBytes(tt.sep)); + for _, tt := range splittests { + a := Split(io.StringBytes(tt.s), io.StringBytes(tt.sep), tt.n); result := arrayOfString(a); if !eq(result, tt.a) { - t.Errorf(`Split("%s", "%s") = %v; want %v`, tt.s, tt.sep, result, tt.a); + t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, result, tt.a); continue; } s := Join(a, io.StringBytes(tt.sep)); if string(s) != tt.s { - t.Errorf(`Join(Split("%s", "%s"), "%s") = "%s"`, tt.s, tt.sep, tt.sep, s); + t.Errorf(`Join(Split(%q, %q, %d), %q) = %q`, tt.s, tt.sep, tt.n, tt.sep, s); } } } diff --git a/src/pkg/exec/exec.go b/src/pkg/exec/exec.go index c2b7bdd59bf20..ebb40a2fe8392 100644 --- a/src/pkg/exec/exec.go +++ b/src/pkg/exec/exec.go @@ -214,7 +214,7 @@ func LookPath(file string) (string, os.Error) { // (equivalent to PATH="."). pathenv = ""; } - for i, dir := range strings.Split(pathenv, ":") { + for i, dir := range strings.Split(pathenv, ":", 0) { if dir == "" { // Unix shell semantics: path element "" means "." dir = "."; diff --git a/src/pkg/go/doc/comment.go b/src/pkg/go/doc/comment.go index 19a65a2277c2e..064080fe7658d 100644 --- a/src/pkg/go/doc/comment.go +++ b/src/pkg/go/doc/comment.go @@ -45,7 +45,7 @@ func commentText(comments []string) string { lines := make([]string, 0, 20); for i, c := range comments { // split on newlines - cl := strings.Split(c, "\n"); + cl := strings.Split(c, "\n", 0); // walk lines, stripping comment markers w := 0; diff --git a/src/pkg/http/client.go b/src/pkg/http/client.go index 52a536fb388d3..8c17eb8e36cb1 100644 --- a/src/pkg/http/client.go +++ b/src/pkg/http/client.go @@ -108,13 +108,12 @@ func send(req *Request) (resp *Response, err os.Error) { if err != nil { return nil, err; } - i := strings.Index(line, " "); - j := strings.Index(line[i+1:len(line)], " ") + i+1; - if i < 0 || j < 0 { + f := strings.Split(line, " ", 3); + if len(f) < 3 { return nil, os.ErrorString(fmt.Sprintf("Invalid first line in HTTP response: %q", line)); } - resp.Status = line[i+1:len(line)]; - resp.StatusCode, err = strconv.Atoi(line[i+1:j]); + resp.Status = f[1] + " " + f[2]; + resp.StatusCode, err = strconv.Atoi(f[1]); if err != nil { return nil, os.ErrorString(fmt.Sprintf("Invalid status code in HTTP response: %q", line)); } diff --git a/src/pkg/http/request.go b/src/pkg/http/request.go index b331eb08372a5..9051d4c43dd06 100644 --- a/src/pkg/http/request.go +++ b/src/pkg/http/request.go @@ -442,7 +442,7 @@ func ReadRequest(b *bufio.Reader) (req *Request, err os.Error) { } var f []string; - if f = strings.Split(s, " "); len(f) != 3 { + if f = strings.Split(s, " ", 3); len(f) < 3 { return nil, BadRequest } req.Method, req.RawUrl, req.Proto = f[0], f[1], f[2]; @@ -572,8 +572,8 @@ func ReadRequest(b *bufio.Reader) (req *Request, err os.Error) { func parseForm(body string) (data map[string] *vector.StringVector, err os.Error) { data = make(map[string] *vector.StringVector); - for _, kv := range strings.Split(body, "&") { - kvPair := strings.Split(kv, "="); + for _, kv := range strings.Split(body, "&", 0) { + kvPair := strings.Split(kv, "=", 2); var key, value string; var e os.Error; diff --git a/src/pkg/strconv/fp_test.go b/src/pkg/strconv/fp_test.go index f1993bb7e2f3d..20e158cec90ee 100644 --- a/src/pkg/strconv/fp_test.go +++ b/src/pkg/strconv/fp_test.go @@ -28,7 +28,7 @@ func pow2(i int) float64 { // Wrapper around strconv.Atof64. Handles dddddp+ddd (binary exponent) // itself, passes the rest on to strconv.Atof64. func myatof64(s string) (f float64, ok bool) { - a := strings.Split(s, "p"); + a := strings.Split(s, "p", 2); if len(a) == 2 { n, err := strconv.Atoi64(a[0]); if err != nil { @@ -72,7 +72,7 @@ func myatof64(s string) (f float64, ok bool) { // Wrapper around strconv.Atof32. Handles dddddp+ddd (binary exponent) // itself, passes the rest on to strconv.Atof32. func myatof32(s string) (f float32, ok bool) { - a := strings.Split(s, "p"); + a := strings.Split(s, "p", 2); if len(a) == 2 { n, err := strconv.Atoi(a[0]); if err != nil { @@ -115,7 +115,7 @@ func TestFp(t *testing.T) { if len(line) == 0 || line[0] == '#' { continue } - a := strings.Split(line, " "); + a := strings.Split(line, " ", 0); if len(a) != 4 { t.Error("testfp.txt:", lineno, ": wrong field count\n"); continue; diff --git a/src/pkg/strings/strings.go b/src/pkg/strings/strings.go index 03509077713df..9b0f031b9afd9 100644 --- a/src/pkg/strings/strings.go +++ b/src/pkg/strings/strings.go @@ -7,19 +7,27 @@ package strings import "utf8" -// Explode splits s into an array of UTF-8 sequences, one per Unicode character (still strings). +// explode splits s into an array of UTF-8 sequences, one per Unicode character (still strings) up to a maximum of n (n <= 0 means no limit). // Invalid UTF-8 sequences become correct encodings of U+FFF8. -func Explode(s string) []string { - a := make([]string, utf8.RuneCountInString(s)); +func explode(s string, n int) []string { + if n <= 0 { + n = len(s); + } + a := make([]string, n); var size, rune int; - i := 0; + na := 0; for len(s) > 0 { + if na+1 >= n { + a[na] = s; + na++; + break + } rune, size = utf8.DecodeRuneInString(s); s = s[size:len(s)]; - a[i] = string(rune); - i++; + a[na] = string(rune); + na++; } - return a + return a[0:na] } // Count counts the number of non-overlapping instances of sep in s. @@ -68,27 +76,30 @@ func LastIndex(s, sep string) int { return -1 } -// Split returns the array representing the substrings of s separated by string sep. Adjacent -// occurrences of sep produce empty substrings. If sep is empty, it is the same as Explode. -func Split(s, sep string) []string { +// Split splits the string s around each instance of sep, returning an array of substrings of s. +// If sep is empty, Split splits s after each UTF-8 sequence. +// If n > 0, split Splits s into at most n substrings; the last subarray will contain an unsplit remainder string. +func Split(s, sep string, n int) []string { if sep == "" { - return Explode(s) + return explode(s, n) + } + if n <= 0 { + n = Count(s, sep) + 1; } c := sep[0]; start := 0; - n := Count(s, sep)+1; a := make([]string, n); na := 0; - for i := 0; i+len(sep) <= len(s); i++ { + for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ { if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) { a[na] = s[start:i]; na++; start = i+len(sep); - i += len(sep)-1 + i += len(sep)-1; } } a[na] = s[start:len(s)]; - return a + return a[0:na+1] } // Join concatenates the elements of a to create a single string. The separator string diff --git a/src/pkg/strings/strings_test.go b/src/pkg/strings/strings_test.go index 6464ca3992df2..7a41584b70039 100644 --- a/src/pkg/strings/strings_test.go +++ b/src/pkg/strings/strings_test.go @@ -83,23 +83,24 @@ func TestLastIndex(t *testing.T) { type ExplodeTest struct { s string; + n int; a []string; } var explodetests = []ExplodeTest { - ExplodeTest{ abcd, []string{"a", "b", "c", "d"} }, - ExplodeTest{ faces, []string{"☺", "☻", "☹" } }, + ExplodeTest{ abcd, 4, []string{"a", "b", "c", "d"} }, + ExplodeTest{ faces, 3, []string{"☺", "☻", "☹"} }, + ExplodeTest{ abcd, 2, []string{"a", "bcd"} }, } func TestExplode(t *testing.T) { - for i := 0; i < len(explodetests); i++ { - tt := explodetests[i]; - a := Explode(tt.s); + for _, tt := range explodetests { + a := explode(tt.s, tt.n); if !eq(a, tt.a) { - t.Errorf("Explode(%q) = %v; want %v", tt.s, a, tt.a); + t.Errorf("explode(%q, %d) = %v; want %v", tt.s, tt.n, a, tt.a); continue; } s := Join(a, ""); if s != tt.s { - t.Errorf(`Join(Explode(%q), "") = %q`, tt.s, s); + t.Errorf(`Join(explode(%q, %d), "") = %q`, tt.s, tt.n, s); } } } @@ -107,29 +108,33 @@ func TestExplode(t *testing.T) { type SplitTest struct { s string; sep string; + n int; a []string; } var splittests = []SplitTest { - SplitTest{ abcd, "a", []string{"", "bcd"} }, - SplitTest{ abcd, "z", []string{"abcd"} }, - SplitTest{ abcd, "", []string{"a", "b", "c", "d"} }, - SplitTest{ commas, ",", []string{"1", "2", "3", "4"} }, - SplitTest{ dots, "...", []string{"1", ".2", ".3", ".4"} }, - SplitTest{ faces, "☹", []string{"☺☻", ""} }, - SplitTest{ faces, "~", []string{faces} }, - SplitTest{ faces, "", []string{"☺", "☻", "☹"} }, + SplitTest{ abcd, "a", 0, []string{"", "bcd"} }, + SplitTest{ abcd, "z", 0, []string{"abcd"} }, + SplitTest{ abcd, "", 0, []string{"a", "b", "c", "d"} }, + SplitTest{ commas, ",", 0, []string{"1", "2", "3", "4"} }, + SplitTest{ dots, "...", 0, []string{"1", ".2", ".3", ".4"} }, + SplitTest{ faces, "☹", 0, []string{"☺☻", ""} }, + SplitTest{ faces, "~", 0, []string{faces} }, + SplitTest{ faces, "", 0, []string{"☺", "☻", "☹"} }, + SplitTest{ "1 2 3 4", " ", 3, []string{"1", "2", "3 4"} }, + SplitTest{ "1 2", " ", 3, []string{"1", "2"} }, + SplitTest{ "123", "", 2, []string{"1", "23"} }, + SplitTest{ "123", "", 17, []string{"1", "2", "3"} }, } func TestSplit(t *testing.T) { - for i := 0; i < len(splittests); i++ { - tt := splittests[i]; - a := Split(tt.s, tt.sep); + for _, tt := range splittests { + a := Split(tt.s, tt.sep, tt.n); if !eq(a, tt.a) { - t.Errorf("Split(%q, %q) = %v; want %v", tt.s, tt.sep, a, tt.a); + t.Errorf("Split(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, a, tt.a); continue; } s := Join(a, tt.sep); if s != tt.s { - t.Errorf("Join(Split(%q, %q), %q) = %q", tt.s, tt.sep, tt.sep, s); + t.Errorf("Join(Split(%q, %q, %d), %q) = %q", tt.s, tt.sep, tt.n, tt.sep, s); } } }