Skip to content

Commit

Permalink
bufio: fix scanning with a final empty token.
Browse files Browse the repository at this point in the history
The Scan function's interface to the split function was not sufficient
to handle an empty final token in a pure function; state was required.
This was ugly.

We introduce a special error value that a split function can return
that signals that this token is OK, but is the last one and scanning
should stop immediately _after_ this token.

The same effect could be achieved using the same trick (a special
error value) and checking for that error after Scan finishes, but it's
a little clumsy. Providing a published sentinel value in bufio is
cleaner and means everyone can use the same trick. The result
is an error-free scan.

Rewrite the test (that was only barely working) to use the value
and be more robust.

Also write a new example showing how to do it.

Fixes #11836

Change-Id: Iaae77d0f95b4a2efa0175ced94d93c66353079e8
Reviewed-on: https://go-review.googlesource.com/14924
Reviewed-by: Ian Lance Taylor <iant@golang.org>
  • Loading branch information
robpike committed Sep 25, 2015
1 parent dc6df1b commit ec12754
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 11 deletions.
29 changes: 29 additions & 0 deletions src/bufio/example_test.go
Expand Up @@ -80,3 +80,32 @@ func ExampleScanner_custom() {
// 5678
// Invalid input: strconv.ParseInt: parsing "1234567901234567890": value out of range
}

// Use a Scanner with a custom split function to parse a comma-separated
// list with an empty final value.
func ExampleScanner_emptyFinalToken() {
// Comma-separated list; last entry is empty.
const input = "1,2,3,4,"
scanner := bufio.NewScanner(strings.NewReader(input))
// Define a split function that separates on commas.
onComma := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
for i := 0; i < len(data); i++ {
if data[i] == ',' {
return i + 1, data[:i], nil
}
}
// There is one final token to be delivered, which may be the empty string.
// Returning bufio.ErrFinalToken here tells Scan there are no more tokens after this
// but does not trigger an error to be returned from Scan itself.
return 0, data, bufio.ErrFinalToken
}
scanner.Split(onComma)
// Scan.
for scanner.Scan() {
fmt.Printf("%q ", scanner.Text())
}
if err := scanner.Err(); err != nil {
fmt.Fprintln(os.Stderr, "reading input:", err)
}
// Output: "1" "2" "3" "4" ""
}
19 changes: 19 additions & 0 deletions src/bufio/scan.go
Expand Up @@ -38,6 +38,7 @@ type Scanner struct {
err error // Sticky error.
empties int // Count of successive empty tokens.
scanCalled bool // Scan has been called; buffer is in use.
done bool // Scan has finished.
}

// SplitFunc is the signature of the split function used to tokenize the
Expand Down Expand Up @@ -106,6 +107,16 @@ func (s *Scanner) Text() string {
return string(s.token)
}

// ErrFinalToken is a special sentinel error value. It is intended to be
// returned by a Split function to indicate that the token being delivered
// with the error is the last token and scanning should stop after this one.
// After ErrFinalToken is received by Scan, scanning stops with no error.
// The value is useful to stop processing early or when it is necessary to
// deliver a final empty token. One could achieve the same behavior
// with a custom error value but providing one here is tidier.
// See the emptyFinalToken example for a use of this value.
var ErrFinalToken = errors.New("final token")

// Scan advances the Scanner to the next token, which will then be
// available through the Bytes or Text method. It returns false when the
// scan stops, either by reaching the end of the input or an error.
Expand All @@ -115,6 +126,9 @@ func (s *Scanner) Text() string {
// Scan panics if the split function returns 100 empty tokens without
// advancing the input. This is a common error mode for scanners.
func (s *Scanner) Scan() bool {
if s.done {
return false
}
s.scanCalled = true
// Loop until we have a token.
for {
Expand All @@ -124,6 +138,11 @@ func (s *Scanner) Scan() bool {
if s.end > s.start || s.err != nil {
advance, token, err := s.split(s.buf[s.start:s.end], s.err != nil)
if err != nil {
if err == ErrFinalToken {
s.token = token
s.done = true
return true
}
s.setErr(err)
return false
}
Expand Down
26 changes: 15 additions & 11 deletions src/bufio/scan_test.go
Expand Up @@ -429,33 +429,37 @@ func commaSplit(data []byte, atEOF bool) (advance int, token []byte, err error)
return i + 1, data[:i], nil
}
}
if !atEOF {
return 0, nil, nil
}
return 0, data, nil
return 0, data, ErrFinalToken
}

func TestEmptyTokens(t *testing.T) {
s := NewScanner(strings.NewReader("1,2,3,"))
values := []string{"1", "2", "3", ""}
func testEmptyTokens(t *testing.T, text string, values []string) {
s := NewScanner(strings.NewReader(text))
s.Split(commaSplit)
var i int
for i = 0; i < len(values); i++ {
if !s.Scan() {
break
for i = 0; s.Scan(); i++ {
if i >= len(values) {
t.Fatalf("got %d fields, expected %d", i+1, len(values))
}
if s.Text() != values[i] {
t.Errorf("%d: expected %q got %q", i, values[i], s.Text())
}
}
if i != len(values) {
t.Errorf("got %d fields, expected %d", i, len(values))
t.Fatalf("got %d fields, expected %d", i, len(values))
}
if err := s.Err(); err != nil {
t.Fatal(err)
}
}

func TestEmptyTokens(t *testing.T) {
testEmptyTokens(t, "1,2,3,", []string{"1", "2", "3", ""})
}

func TestWithNoEmptyTokens(t *testing.T) {
testEmptyTokens(t, "1,2,3", []string{"1", "2", "3"})
}

func loopAtEOFSplit(data []byte, atEOF bool) (advance int, token []byte, err error) {
if len(data) > 0 {
return 1, data[:1], nil
Expand Down

0 comments on commit ec12754

Please sign in to comment.