Permalink
Cannot retrieve contributors at this time
| // Copyright 2011 The Go Authors. All rights reserved. | |
| // Use of this source code is governed by a BSD-style | |
| // license that can be found in the LICENSE file. | |
| package csv | |
| import ( | |
| "io" | |
| "reflect" | |
| "strings" | |
| "testing" | |
| "unicode/utf8" | |
| ) | |
| func TestRead(t *testing.T) { | |
| tests := []struct { | |
| Name string | |
| Input string | |
| Output [][]string | |
| Error error | |
| // These fields are copied into the Reader | |
| Comma rune | |
| Comment rune | |
| UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1 | |
| FieldsPerRecord int | |
| LazyQuotes bool | |
| TrimLeadingSpace bool | |
| ReuseRecord bool | |
| }{{ | |
| Name: "Simple", | |
| Input: "a,b,c\n", | |
| Output: [][]string{{"a", "b", "c"}}, | |
| }, { | |
| Name: "CRLF", | |
| Input: "a,b\r\nc,d\r\n", | |
| Output: [][]string{{"a", "b"}, {"c", "d"}}, | |
| }, { | |
| Name: "BareCR", | |
| Input: "a,b\rc,d\r\n", | |
| Output: [][]string{{"a", "b\rc", "d"}}, | |
| }, { | |
| Name: "RFC4180test", | |
| Input: `#field1,field2,field3 | |
| "aaa","bb | |
| b","ccc" | |
| "a,a","b""bb","ccc" | |
| zzz,yyy,xxx | |
| `, | |
| Output: [][]string{ | |
| {"#field1", "field2", "field3"}, | |
| {"aaa", "bb\nb", "ccc"}, | |
| {"a,a", `b"bb`, "ccc"}, | |
| {"zzz", "yyy", "xxx"}, | |
| }, | |
| UseFieldsPerRecord: true, | |
| FieldsPerRecord: 0, | |
| }, { | |
| Name: "NoEOLTest", | |
| Input: "a,b,c", | |
| Output: [][]string{{"a", "b", "c"}}, | |
| }, { | |
| Name: "Semicolon", | |
| Input: "a;b;c\n", | |
| Output: [][]string{{"a", "b", "c"}}, | |
| Comma: ';', | |
| }, { | |
| Name: "MultiLine", | |
| Input: `"two | |
| line","one line","three | |
| line | |
| field"`, | |
| Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}}, | |
| }, { | |
| Name: "BlankLine", | |
| Input: "a,b,c\n\nd,e,f\n\n", | |
| Output: [][]string{ | |
| {"a", "b", "c"}, | |
| {"d", "e", "f"}, | |
| }, | |
| }, { | |
| Name: "BlankLineFieldCount", | |
| Input: "a,b,c\n\nd,e,f\n\n", | |
| Output: [][]string{ | |
| {"a", "b", "c"}, | |
| {"d", "e", "f"}, | |
| }, | |
| UseFieldsPerRecord: true, | |
| FieldsPerRecord: 0, | |
| }, { | |
| Name: "TrimSpace", | |
| Input: " a, b, c\n", | |
| Output: [][]string{{"a", "b", "c"}}, | |
| TrimLeadingSpace: true, | |
| }, { | |
| Name: "LeadingSpace", | |
| Input: " a, b, c\n", | |
| Output: [][]string{{" a", " b", " c"}}, | |
| }, { | |
| Name: "Comment", | |
| Input: "#1,2,3\na,b,c\n#comment", | |
| Output: [][]string{{"a", "b", "c"}}, | |
| Comment: '#', | |
| }, { | |
| Name: "NoComment", | |
| Input: "#1,2,3\na,b,c", | |
| Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}}, | |
| }, { | |
| Name: "LazyQuotes", | |
| Input: `a "word","1"2",a","b`, | |
| Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}}, | |
| LazyQuotes: true, | |
| }, { | |
| Name: "BareQuotes", | |
| Input: `a "word","1"2",a"`, | |
| Output: [][]string{{`a "word"`, `1"2`, `a"`}}, | |
| LazyQuotes: true, | |
| }, { | |
| Name: "BareDoubleQuotes", | |
| Input: `a""b,c`, | |
| Output: [][]string{{`a""b`, `c`}}, | |
| LazyQuotes: true, | |
| }, { | |
| Name: "BadDoubleQuotes", | |
| Input: `a""b,c`, | |
| Error: &ParseError{StartLine: 1, Line: 1, Column: 1, Err: ErrBareQuote}, | |
| }, { | |
| Name: "TrimQuote", | |
| Input: ` "a"," b",c`, | |
| Output: [][]string{{"a", " b", "c"}}, | |
| TrimLeadingSpace: true, | |
| }, { | |
| Name: "BadBareQuote", | |
| Input: `a "word","b"`, | |
| Error: &ParseError{StartLine: 1, Line: 1, Column: 2, Err: ErrBareQuote}, | |
| }, { | |
| Name: "BadTrailingQuote", | |
| Input: `"a word",b"`, | |
| Error: &ParseError{StartLine: 1, Line: 1, Column: 10, Err: ErrBareQuote}, | |
| }, { | |
| Name: "ExtraneousQuote", | |
| Input: `"a "word","b"`, | |
| Error: &ParseError{StartLine: 1, Line: 1, Column: 3, Err: ErrQuote}, | |
| }, { | |
| Name: "BadFieldCount", | |
| Input: "a,b,c\nd,e", | |
| Error: &ParseError{StartLine: 2, Line: 2, Err: ErrFieldCount}, | |
| UseFieldsPerRecord: true, | |
| FieldsPerRecord: 0, | |
| }, { | |
| Name: "BadFieldCount1", | |
| Input: `a,b,c`, | |
| Error: &ParseError{StartLine: 1, Line: 1, Err: ErrFieldCount}, | |
| UseFieldsPerRecord: true, | |
| FieldsPerRecord: 2, | |
| }, { | |
| Name: "FieldCount", | |
| Input: "a,b,c\nd,e", | |
| Output: [][]string{{"a", "b", "c"}, {"d", "e"}}, | |
| }, { | |
| Name: "TrailingCommaEOF", | |
| Input: "a,b,c,", | |
| Output: [][]string{{"a", "b", "c", ""}}, | |
| }, { | |
| Name: "TrailingCommaEOL", | |
| Input: "a,b,c,\n", | |
| Output: [][]string{{"a", "b", "c", ""}}, | |
| }, { | |
| Name: "TrailingCommaSpaceEOF", | |
| Input: "a,b,c, ", | |
| Output: [][]string{{"a", "b", "c", ""}}, | |
| TrimLeadingSpace: true, | |
| }, { | |
| Name: "TrailingCommaSpaceEOL", | |
| Input: "a,b,c, \n", | |
| Output: [][]string{{"a", "b", "c", ""}}, | |
| TrimLeadingSpace: true, | |
| }, { | |
| Name: "TrailingCommaLine3", | |
| Input: "a,b,c\nd,e,f\ng,hi,", | |
| Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}}, | |
| TrimLeadingSpace: true, | |
| }, { | |
| Name: "NotTrailingComma3", | |
| Input: "a,b,c, \n", | |
| Output: [][]string{{"a", "b", "c", " "}}, | |
| }, { | |
| Name: "CommaFieldTest", | |
| Input: `x,y,z,w | |
| x,y,z, | |
| x,y,, | |
| x,,, | |
| ,,, | |
| "x","y","z","w" | |
| "x","y","z","" | |
| "x","y","","" | |
| "x","","","" | |
| "","","","" | |
| `, | |
| Output: [][]string{ | |
| {"x", "y", "z", "w"}, | |
| {"x", "y", "z", ""}, | |
| {"x", "y", "", ""}, | |
| {"x", "", "", ""}, | |
| {"", "", "", ""}, | |
| {"x", "y", "z", "w"}, | |
| {"x", "y", "z", ""}, | |
| {"x", "y", "", ""}, | |
| {"x", "", "", ""}, | |
| {"", "", "", ""}, | |
| }, | |
| }, { | |
| Name: "TrailingCommaIneffective1", | |
| Input: "a,b,\nc,d,e", | |
| Output: [][]string{ | |
| {"a", "b", ""}, | |
| {"c", "d", "e"}, | |
| }, | |
| TrimLeadingSpace: true, | |
| }, { | |
| Name: "ReadAllReuseRecord", | |
| Input: "a,b\nc,d", | |
| Output: [][]string{ | |
| {"a", "b"}, | |
| {"c", "d"}, | |
| }, | |
| ReuseRecord: true, | |
| }, { | |
| Name: "StartLine1", // Issue 19019 | |
| Input: "a,\"b\nc\"d,e", | |
| Error: &ParseError{StartLine: 1, Line: 2, Column: 1, Err: ErrQuote}, | |
| }, { | |
| Name: "StartLine2", | |
| Input: "a,b\n\"d\n\n,e", | |
| Error: &ParseError{StartLine: 2, Line: 5, Column: 0, Err: ErrQuote}, | |
| }, { | |
| Name: "CRLFInQuotedField", // Issue 21201 | |
| Input: "A,\"Hello\r\nHi\",B\r\n", | |
| Output: [][]string{ | |
| {"A", "Hello\nHi", "B"}, | |
| }, | |
| }, { | |
| Name: "BinaryBlobField", // Issue 19410 | |
| Input: "x09\x41\xb4\x1c,aktau", | |
| Output: [][]string{{"x09A\xb4\x1c", "aktau"}}, | |
| }, { | |
| Name: "TrailingCR", | |
| Input: "field1,field2\r", | |
| Output: [][]string{{"field1", "field2"}}, | |
| }, { | |
| Name: "QuotedTrailingCR", | |
| Input: "\"field\"\r", | |
| Output: [][]string{{"field"}}, | |
| }, { | |
| Name: "QuotedTrailingCRCR", | |
| Input: "\"field\"\r\r", | |
| Error: &ParseError{StartLine: 1, Line: 1, Column: 6, Err: ErrQuote}, | |
| }, { | |
| Name: "FieldCR", | |
| Input: "field\rfield\r", | |
| Output: [][]string{{"field\rfield"}}, | |
| }, { | |
| Name: "FieldCRCR", | |
| Input: "field\r\rfield\r\r", | |
| Output: [][]string{{"field\r\rfield\r"}}, | |
| }, { | |
| Name: "FieldCRCRLF", | |
| Input: "field\r\r\nfield\r\r\n", | |
| Output: [][]string{{"field\r"}, {"field\r"}}, | |
| }, { | |
| Name: "FieldCRCRLFCR", | |
| Input: "field\r\r\n\rfield\r\r\n\r", | |
| Output: [][]string{{"field\r"}, {"\rfield\r"}}, | |
| }, { | |
| Name: "FieldCRCRLFCRCR", | |
| Input: "field\r\r\n\r\rfield\r\r\n\r\r", | |
| Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}}, | |
| }, { | |
| Name: "MultiFieldCRCRLFCRCR", | |
| Input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,", | |
| Output: [][]string{ | |
| {"field1", "field2\r"}, | |
| {"\r\rfield1", "field2\r"}, | |
| {"\r\r", ""}, | |
| }, | |
| }, { | |
| Name: "NonASCIICommaAndComment", | |
| Input: "a£b,c£ \td,e\n€ comment\n", | |
| Output: [][]string{{"a", "b,c", "d,e"}}, | |
| TrimLeadingSpace: true, | |
| Comma: '£', | |
| Comment: '€', | |
| }, { | |
| Name: "NonASCIICommaAndCommentWithQuotes", | |
| Input: "a€\" b,\"€ c\nλ comment\n", | |
| Output: [][]string{{"a", " b,", " c"}}, | |
| Comma: '€', | |
| Comment: 'λ', | |
| }, { | |
| // λ and θ start with the same byte. | |
| // This tests that the parser doesn't confuse such characters. | |
| Name: "NonASCIICommaConfusion", | |
| Input: "\"abθcd\"λefθgh", | |
| Output: [][]string{{"abθcd", "efθgh"}}, | |
| Comma: 'λ', | |
| Comment: '€', | |
| }, { | |
| Name: "NonASCIICommentConfusion", | |
| Input: "λ\nλ\nθ\nλ\n", | |
| Output: [][]string{{"λ"}, {"λ"}, {"λ"}}, | |
| Comment: 'θ', | |
| }, { | |
| Name: "QuotedFieldMultipleLF", | |
| Input: "\"\n\n\n\n\"", | |
| Output: [][]string{{"\n\n\n\n"}}, | |
| }, { | |
| Name: "MultipleCRLF", | |
| Input: "\r\n\r\n\r\n\r\n", | |
| }, { | |
| // The implementation may read each line in several chunks if it doesn't fit entirely | |
| // in the read buffer, so we should test the code to handle that condition. | |
| Name: "HugeLines", | |
| Input: strings.Repeat("#ignore\n", 10000) + strings.Repeat("@", 5000) + "," + strings.Repeat("*", 5000), | |
| Output: [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}}, | |
| Comment: '#', | |
| }, { | |
| Name: "QuoteWithTrailingCRLF", | |
| Input: "\"foo\"bar\"\r\n", | |
| Error: &ParseError{StartLine: 1, Line: 1, Column: 4, Err: ErrQuote}, | |
| }, { | |
| Name: "LazyQuoteWithTrailingCRLF", | |
| Input: "\"foo\"bar\"\r\n", | |
| Output: [][]string{{`foo"bar`}}, | |
| LazyQuotes: true, | |
| }, { | |
| Name: "DoubleQuoteWithTrailingCRLF", | |
| Input: "\"foo\"\"bar\"\r\n", | |
| Output: [][]string{{`foo"bar`}}, | |
| }, { | |
| Name: "EvenQuotes", | |
| Input: `""""""""`, | |
| Output: [][]string{{`"""`}}, | |
| }, { | |
| Name: "OddQuotes", | |
| Input: `"""""""`, | |
| Error: &ParseError{StartLine: 1, Line: 1, Column: 7, Err: ErrQuote}, | |
| }, { | |
| Name: "LazyOddQuotes", | |
| Input: `"""""""`, | |
| Output: [][]string{{`"""`}}, | |
| LazyQuotes: true, | |
| }, { | |
| Name: "BadComma1", | |
| Comma: '\n', | |
| Error: errInvalidDelim, | |
| }, { | |
| Name: "BadComma2", | |
| Comma: '\r', | |
| Error: errInvalidDelim, | |
| }, { | |
| Name: "BadComma3", | |
| Comma: '"', | |
| Error: errInvalidDelim, | |
| }, { | |
| Name: "BadComma4", | |
| Comma: utf8.RuneError, | |
| Error: errInvalidDelim, | |
| }, { | |
| Name: "BadComment1", | |
| Comment: '\n', | |
| Error: errInvalidDelim, | |
| }, { | |
| Name: "BadComment2", | |
| Comment: '\r', | |
| Error: errInvalidDelim, | |
| }, { | |
| Name: "BadComment3", | |
| Comment: utf8.RuneError, | |
| Error: errInvalidDelim, | |
| }, { | |
| Name: "BadCommaComment", | |
| Comma: 'X', | |
| Comment: 'X', | |
| Error: errInvalidDelim, | |
| }} | |
| for _, tt := range tests { | |
| t.Run(tt.Name, func(t *testing.T) { | |
| r := NewReader(strings.NewReader(tt.Input)) | |
| if tt.Comma != 0 { | |
| r.Comma = tt.Comma | |
| } | |
| r.Comment = tt.Comment | |
| if tt.UseFieldsPerRecord { | |
| r.FieldsPerRecord = tt.FieldsPerRecord | |
| } else { | |
| r.FieldsPerRecord = -1 | |
| } | |
| r.LazyQuotes = tt.LazyQuotes | |
| r.TrimLeadingSpace = tt.TrimLeadingSpace | |
| r.ReuseRecord = tt.ReuseRecord | |
| out, err := r.ReadAll() | |
| if !reflect.DeepEqual(err, tt.Error) { | |
| t.Errorf("ReadAll() error:\ngot %v\nwant %v", err, tt.Error) | |
| } else if !reflect.DeepEqual(out, tt.Output) { | |
| t.Errorf("ReadAll() output:\ngot %q\nwant %q", out, tt.Output) | |
| } | |
| }) | |
| } | |
| } | |
| // nTimes is an io.Reader which yields the string s n times. | |
| type nTimes struct { | |
| s string | |
| n int | |
| off int | |
| } | |
| func (r *nTimes) Read(p []byte) (n int, err error) { | |
| for { | |
| if r.n <= 0 || r.s == "" { | |
| return n, io.EOF | |
| } | |
| n0 := copy(p, r.s[r.off:]) | |
| p = p[n0:] | |
| n += n0 | |
| r.off += n0 | |
| if r.off == len(r.s) { | |
| r.off = 0 | |
| r.n-- | |
| } | |
| if len(p) == 0 { | |
| return | |
| } | |
| } | |
| } | |
| // benchmarkRead measures reading the provided CSV rows data. | |
| // initReader, if non-nil, modifies the Reader before it's used. | |
| func benchmarkRead(b *testing.B, initReader func(*Reader), rows string) { | |
| b.ReportAllocs() | |
| r := NewReader(&nTimes{s: rows, n: b.N}) | |
| if initReader != nil { | |
| initReader(r) | |
| } | |
| for { | |
| _, err := r.Read() | |
| if err == io.EOF { | |
| break | |
| } | |
| if err != nil { | |
| b.Fatal(err) | |
| } | |
| } | |
| } | |
| const benchmarkCSVData = `x,y,z,w | |
| x,y,z, | |
| x,y,, | |
| x,,, | |
| ,,, | |
| "x","y","z","w" | |
| "x","y","z","" | |
| "x","y","","" | |
| "x","","","" | |
| "","","","" | |
| ` | |
| func BenchmarkRead(b *testing.B) { | |
| benchmarkRead(b, nil, benchmarkCSVData) | |
| } | |
| func BenchmarkReadWithFieldsPerRecord(b *testing.B) { | |
| benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = 4 }, benchmarkCSVData) | |
| } | |
| func BenchmarkReadWithoutFieldsPerRecord(b *testing.B) { | |
| benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = -1 }, benchmarkCSVData) | |
| } | |
| func BenchmarkReadLargeFields(b *testing.B) { | |
| benchmarkRead(b, nil, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv | |
| xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv | |
| ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv | |
| xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv | |
| `, 3)) | |
| } | |
| func BenchmarkReadReuseRecord(b *testing.B) { | |
| benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, benchmarkCSVData) | |
| } | |
| func BenchmarkReadReuseRecordWithFieldsPerRecord(b *testing.B) { | |
| benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = 4 }, benchmarkCSVData) | |
| } | |
| func BenchmarkReadReuseRecordWithoutFieldsPerRecord(b *testing.B) { | |
| benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = -1 }, benchmarkCSVData) | |
| } | |
| func BenchmarkReadReuseRecordLargeFields(b *testing.B) { | |
| benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv | |
| xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv | |
| ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv | |
| xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv | |
| `, 3)) | |
| } |