Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support utf-16 in file and tail inputs #7792

Merged
merged 9 commits into from Jul 7, 2020
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 4 additions & 2 deletions go.mod
@@ -1,6 +1,6 @@
module github.com/influxdata/telegraf

go 1.12
go 1.13

require (
cloud.google.com/go v0.53.0
Expand Down Expand Up @@ -38,6 +38,7 @@ require (
github.com/couchbase/goutils v0.0.0-20180530154633-e865a1461c8a // indirect
github.com/denisenkom/go-mssqldb v0.0.0-20190707035753-2be1aa521ff4
github.com/dgrijalva/jwt-go v3.2.0+incompatible
github.com/dimchansky/utfbom v1.1.0
github.com/docker/distribution v2.6.0-rc.1.0.20170726174610-edc3ab29cdff+incompatible // indirect
github.com/docker/docker v1.4.2-0.20180327123150-ed7b6428c133
github.com/docker/go-connections v0.3.0 // indirect
Expand Down Expand Up @@ -71,7 +72,7 @@ require (
github.com/hashicorp/memberlist v0.1.5 // indirect
github.com/hashicorp/serf v0.8.1 // indirect
github.com/influxdata/go-syslog/v2 v2.0.1
github.com/influxdata/tail v1.0.1-0.20180327235535-c43482518d41
github.com/influxdata/tail v1.0.1-0.20200707024018-5a449acc01e2
github.com/influxdata/toml v0.0.0-20190415235208-270119a8ce65
github.com/influxdata/wlog v0.0.0-20160411224016-7c63b0a71ef8
github.com/jackc/fake v0.0.0-20150926172116-812a484cc733 // indirect
Expand Down Expand Up @@ -133,6 +134,7 @@ require (
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d
golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a
golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4
golang.org/x/text v0.3.3
golang.org/x/tools v0.0.0-20200317043434-63da46f3035e // indirect
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20200205215550-e35592f146e4
gonum.org/v1/gonum v0.6.2 // indirect
Expand Down
8 changes: 6 additions & 2 deletions go.sum
Expand Up @@ -335,8 +335,8 @@ github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpO
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/influxdata/go-syslog/v2 v2.0.1 h1:l44S4l4Q8MhGQcoOxJpbo+QQYxJqp0vdgIVHh4+DO0s=
github.com/influxdata/go-syslog/v2 v2.0.1/go.mod h1:hjvie1UTaD5E1fTnDmxaCw8RRDrT4Ve+XHr5O2dKSCo=
github.com/influxdata/tail v1.0.1-0.20180327235535-c43482518d41 h1:HxQo1NpNXQDpvEBzthbQLmePvTLFTa5GzSFUjL03aEs=
github.com/influxdata/tail v1.0.1-0.20180327235535-c43482518d41/go.mod h1:xTFF2SILpIYc5N+Srb0d5qpx7d+f733nBrbasb13DtQ=
github.com/influxdata/tail v1.0.1-0.20200707024018-5a449acc01e2 h1:MRFxy6Bz0m0+m6mbHpNzhK3ZOMYhU0ZuucDsSi3/ETE=
github.com/influxdata/tail v1.0.1-0.20200707024018-5a449acc01e2/go.mod h1:VeiWgI3qaGdJWust2fP27a6J+koITo/1c/UhxeOxgaM=
github.com/influxdata/toml v0.0.0-20190415235208-270119a8ce65 h1:vvyMtD5LTJc1W9sQKjDkAWdcg0478CszSdzlHtiAXCY=
github.com/influxdata/toml v0.0.0-20190415235208-270119a8ce65/go.mod h1:zApaNFpP/bTpQItGZNNUMISDMDAnTXu9UqJ4yT3ocz8=
github.com/influxdata/wlog v0.0.0-20160411224016-7c63b0a71ef8 h1:W2IgzRCb0L9VzMujq/QuTaZUKcH8096jWwP519mHN6Q=
Expand Down Expand Up @@ -720,6 +720,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c h1:fqgJT0MGcGpPgpWU7VRdRjuArfcOvC4AoJmILihzhDg=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
Expand Down Expand Up @@ -838,6 +840,7 @@ gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/fatih/pool.v2 v2.0.0 h1:xIFeWtxifuQJGk/IEPKsTduEKcKvPmhoiVDGpC40nKg=
gopkg.in/fatih/pool.v2 v2.0.0/go.mod h1:8xVGeu1/2jr2wm5V9SPuMht2H5AEmf5aFMGSQixtjTY=
gopkg.in/fsnotify.v1 v1.2.1/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
gopkg.in/gorethink/gorethink.v3 v3.0.5 h1:e2Uc/Xe+hpcVQFsj6MuHlYog3r0JYpnTzwDj/y2O4MU=
Expand All @@ -861,6 +864,7 @@ gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce h1:xcEWjVhvbDy+nHP67nPDDpbYrY
gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA=
gopkg.in/olivere/elastic.v5 v5.0.70 h1:DqFG2Odzs74JCz6SssgJjd6qpGnsOAzNc7+l5EnvsnE=
gopkg.in/olivere/elastic.v5 v5.0.70/go.mod h1:FylZT6jQWtfHsicejzOm3jIMVPOAksa80i3o+6qtQRk=
gopkg.in/tomb.v1 v1.0.0-20140529071818-c131134a1947/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
Expand Down
36 changes: 36 additions & 0 deletions plugins/common/encoding/decoder.go
@@ -0,0 +1,36 @@
package encoding

import (
"errors"

"golang.org/x/text/encoding"
"golang.org/x/text/encoding/unicode"
)

type Decoder = encoding.Decoder

// NewDecoder returns a x/text Decoder for the specified text encoding. The
// Decoder converts a character encoding into utf-8 bytes. If a BOM is found
// it will be converted into a utf-8 BOM, you can use
// github.com/dimchansky/utfbom to strip the BOM.
//
// The "none" or "" encoding will pass through bytes unchecked. Use the utf-8
// encoding if you want invalid bytes replaced using the the unicode
// replacement character.
//
// Detection of utf-16 endianness using the BOM is not currently provided due
// to the tail input plugins requirement to be able to start at the middle or
// end of the file.
func NewDecoder(enc string) (*Decoder, error) {
switch enc {
case "utf-8":
return unicode.UTF8.NewDecoder(), nil
case "utf-16le":
return unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewDecoder(), nil
case "utf-16be":
return unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewDecoder(), nil
case "none", "":
return encoding.Nop.NewDecoder(), nil
}
return nil, errors.New("unknown character encoding")
}
78 changes: 78 additions & 0 deletions plugins/common/encoding/decoder_test.go
@@ -0,0 +1,78 @@
package encoding

import (
"bytes"
"io/ioutil"
"testing"

"github.com/stretchr/testify/require"
)

func TestDecoder(t *testing.T) {
tests := []struct {
name string
encoding string
input []byte
expected []byte
expectedErr bool
}{
{
name: "no decoder utf-8",
encoding: "",
input: []byte("howdy"),
expected: []byte("howdy"),
},
{
name: "utf-8 decoder",
encoding: "utf-8",
input: []byte("howdy"),
expected: []byte("howdy"),
},
{
name: "utf-8 decoder invalid bytes replaced with replacement char",
encoding: "utf-8",
input: []byte("\xff\xfe"),
expected: []byte("\uFFFD\uFFFD"),
},
{
name: "utf-16le decoder no BOM",
encoding: "utf-16le",
input: []byte("h\x00o\x00w\x00d\x00y\x00"),
expected: []byte("howdy"),
},
{
name: "utf-16le decoder with BOM",
encoding: "utf-16le",
input: []byte("\xff\xfeh\x00o\x00w\x00d\x00y\x00"),
expected: []byte("\xef\xbb\xbfhowdy"),
},
{
name: "utf-16be decoder no BOM",
encoding: "utf-16be",
input: []byte("\x00h\x00o\x00w\x00d\x00y"),
expected: []byte("howdy"),
},
{
name: "utf-16be decoder with BOM",
encoding: "utf-16be",
input: []byte("\xfe\xff\x00h\x00o\x00w\x00d\x00y"),
expected: []byte("\xef\xbb\xbfhowdy"),
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
decoder, err := NewDecoder(tt.encoding)
require.NoError(t, err)
buf := bytes.NewBuffer(tt.input)
r := decoder.Reader(buf)
actual, err := ioutil.ReadAll(r)
if tt.expectedErr {
require.Error(t, err)
return
}
require.NoError(t, err)
require.Equal(t, tt.expected, actual)
})
}
}
46 changes: 36 additions & 10 deletions plugins/inputs/file/file.go
Expand Up @@ -3,36 +3,50 @@ package file
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"

"github.com/dimchansky/utfbom"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal/globpath"
"github.com/influxdata/telegraf/plugins/common/encoding"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/parsers"
)

type File struct {
Files []string `toml:"files"`
FileTag string `toml:"file_tag"`
parser parsers.Parser
Files []string `toml:"files"`
FileTag string `toml:"file_tag"`
CharacterEncoding string `toml:"character_encoding"`
parser parsers.Parser

filenames []string
decoder *encoding.Decoder
}

const sampleConfig = `
## Files to parse each interval. Accept standard unix glob matching rules,
## as well as ** to match recursive files and directories.
files = ["/tmp/metrics.out"]

## Name a tag containing the name of the file the data was parsed from. Leave empty
## to disable.
# file_tag = ""

## Character encoding to use when interpreting the file contents. Invalid
## characters are replaced using the unicode replacement character. When set
## to the empty string the data is not decoded to text.
## ex: character_encoding = "utf-8"
## character_encoding = "utf-16le"
## character_encoding = "utf-16be"
## character_encoding = ""
# character_encoding = ""

## The dataformat to be read from files
## Each data format has its own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "influx"

## Name a tag containing the name of the file the data was parsed from. Leave empty
## to disable.
# file_tag = ""
`

// SampleConfig returns the default configuration of the Input
Expand All @@ -44,6 +58,12 @@ func (f *File) Description() string {
return "Parse a complete file each interval"
}

func (f *File) Init() error {
var err error
f.decoder, err = encoding.NewDecoder(f.CharacterEncoding)
return err
}

func (f *File) Gather(acc telegraf.Accumulator) error {
err := f.refreshFilePaths()
if err != nil {
Expand All @@ -59,7 +79,7 @@ func (f *File) Gather(acc telegraf.Accumulator) error {
if f.FileTag != "" {
m.AddTag(f.FileTag, filepath.Base(k))
}
acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time())
acc.AddMetric(m)
}
}
return nil
Expand Down Expand Up @@ -88,12 +108,18 @@ func (f *File) refreshFilePaths() error {
}

func (f *File) readMetric(filename string) ([]telegraf.Metric, error) {
fileContents, err := ioutil.ReadFile(filename)
file, err := os.Open(filename)
if err != nil {
return nil, err
}
defer file.Close()

r, _ := utfbom.Skip(f.decoder.Reader(file))
fileContents, err := ioutil.ReadAll(r)
if err != nil {
return nil, fmt.Errorf("E! Error file: %v could not be read, %s", filename, err)
}
return f.parser.Parse(fileContents)

}

func init() {
Expand Down