forked from influxdata/telegraf
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support utf-16 in file and tail inputs (influxdata#7792)
- Loading branch information
1 parent
bd6edd8
commit 86145ef
Showing
18 changed files
with
883 additions
and
341 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
package encoding | ||
|
||
import ( | ||
"errors" | ||
|
||
"golang.org/x/text/encoding" | ||
"golang.org/x/text/encoding/unicode" | ||
) | ||
|
||
type Decoder = encoding.Decoder | ||
|
||
// NewDecoder returns a x/text Decoder for the specified text encoding. The | ||
// Decoder converts a character encoding into utf-8 bytes. If a BOM is found | ||
// it will be converted into a utf-8 BOM, you can use | ||
// github.com/dimchansky/utfbom to strip the BOM. | ||
// | ||
// The "none" or "" encoding will pass through bytes unchecked. Use the utf-8 | ||
// encoding if you want invalid bytes replaced using the the unicode | ||
// replacement character. | ||
// | ||
// Detection of utf-16 endianness using the BOM is not currently provided due | ||
// to the tail input plugins requirement to be able to start at the middle or | ||
// end of the file. | ||
func NewDecoder(enc string) (*Decoder, error) { | ||
switch enc { | ||
case "utf-8": | ||
return unicode.UTF8.NewDecoder(), nil | ||
case "utf-16le": | ||
return unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewDecoder(), nil | ||
case "utf-16be": | ||
return unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewDecoder(), nil | ||
case "none", "": | ||
return encoding.Nop.NewDecoder(), nil | ||
} | ||
return nil, errors.New("unknown character encoding") | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
package encoding | ||
|
||
import ( | ||
"bytes" | ||
"io/ioutil" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func TestDecoder(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
encoding string | ||
input []byte | ||
expected []byte | ||
expectedErr bool | ||
}{ | ||
{ | ||
name: "no decoder utf-8", | ||
encoding: "", | ||
input: []byte("howdy"), | ||
expected: []byte("howdy"), | ||
}, | ||
{ | ||
name: "utf-8 decoder", | ||
encoding: "utf-8", | ||
input: []byte("howdy"), | ||
expected: []byte("howdy"), | ||
}, | ||
{ | ||
name: "utf-8 decoder invalid bytes replaced with replacement char", | ||
encoding: "utf-8", | ||
input: []byte("\xff\xfe"), | ||
expected: []byte("\uFFFD\uFFFD"), | ||
}, | ||
{ | ||
name: "utf-16le decoder no BOM", | ||
encoding: "utf-16le", | ||
input: []byte("h\x00o\x00w\x00d\x00y\x00"), | ||
expected: []byte("howdy"), | ||
}, | ||
{ | ||
name: "utf-16le decoder with BOM", | ||
encoding: "utf-16le", | ||
input: []byte("\xff\xfeh\x00o\x00w\x00d\x00y\x00"), | ||
expected: []byte("\xef\xbb\xbfhowdy"), | ||
}, | ||
{ | ||
name: "utf-16be decoder no BOM", | ||
encoding: "utf-16be", | ||
input: []byte("\x00h\x00o\x00w\x00d\x00y"), | ||
expected: []byte("howdy"), | ||
}, | ||
{ | ||
name: "utf-16be decoder with BOM", | ||
encoding: "utf-16be", | ||
input: []byte("\xfe\xff\x00h\x00o\x00w\x00d\x00y"), | ||
expected: []byte("\xef\xbb\xbfhowdy"), | ||
}, | ||
} | ||
|
||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
decoder, err := NewDecoder(tt.encoding) | ||
require.NoError(t, err) | ||
buf := bytes.NewBuffer(tt.input) | ||
r := decoder.Reader(buf) | ||
actual, err := ioutil.ReadAll(r) | ||
if tt.expectedErr { | ||
require.Error(t, err) | ||
return | ||
} | ||
require.NoError(t, err) | ||
require.Equal(t, tt.expected, actual) | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.