Skip to content

Commit

Permalink
x-pack/filebeat/input/httpjson: provide XML decoding functionality
Browse files Browse the repository at this point in the history
This addition makes use of the mito XML library to allow users to decode
XML responses with type hints.
  • Loading branch information
efd6 committed May 7, 2023
1 parent 25e882f commit a440d28
Show file tree
Hide file tree
Showing 13 changed files with 212 additions and 18 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Expand Up @@ -270,6 +270,7 @@ automatic splitting at root level, if root level element is an array. {pull}3415
- Allow neflow v9 and ipfix templates to be shared between source addresses. {pull}35036[35036]
- Add support for collecting IPv6 metrics. {pull}35123[35123]
- Add oracle authentication messages parsing {pull}35127[35127]
- Add XML decoding support to HTTPJSON. {issue}34438[34438] {pull}35235[35235]

*Auditbeat*
- Migration of system/package module storage from gob encoding to flatbuffer encoding in bolt db. {pull}34817[34817]
Expand Down
35 changes: 33 additions & 2 deletions NOTICE.txt
Expand Up @@ -13412,11 +13412,11 @@ limitations under the License.

--------------------------------------------------------------------------------
Dependency : github.com/elastic/mito
Version: v0.0.0-20230302005114-1dda06e81678
Version: v0.0.0-20230427032043-b7fe987e263f
Licence type (autodetected): Apache-2.0
--------------------------------------------------------------------------------

Contents of probable licence file $GOMODCACHE/github.com/elastic/mito@v0.0.0-20230302005114-1dda06e81678/LICENSE:
Contents of probable licence file $GOMODCACHE/github.com/elastic/mito@v0.0.0-20230427032043-b7fe987e263f/LICENSE:


Apache License
Expand Down Expand Up @@ -25184,6 +25184,37 @@ Public License instead of this License.
Indirect dependencies


--------------------------------------------------------------------------------
Dependency : aqwari.net/xml
Version: v0.0.0-20210331023308-d9421b293817
Licence type (autodetected): MIT
--------------------------------------------------------------------------------

Contents of probable licence file $GOMODCACHE/aqwari.net/xml@v0.0.0-20210331023308-d9421b293817/LICENSE:

The MIT License (MIT)

Copyright (c) 2015 David Arroyo

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.


--------------------------------------------------------------------------------
Dependency : cloud.google.com/go/compute
Version: v1.12.1
Expand Down
3 changes: 2 additions & 1 deletion go.mod
Expand Up @@ -199,7 +199,7 @@ require (
github.com/elastic/elastic-agent-shipper-client v0.5.1-0.20230228231646-f04347b666f3
github.com/elastic/elastic-agent-system-metrics v0.6.0
github.com/elastic/go-elasticsearch/v8 v8.2.0
github.com/elastic/mito v0.0.0-20230302005114-1dda06e81678
github.com/elastic/mito v0.0.0-20230427032043-b7fe987e263f
github.com/elastic/toutoumomoma v0.0.0-20221026030040-594ef30cb640
github.com/foxcpp/go-mockdns v0.0.0-20201212160233-ede2f9158d15
github.com/google/cel-go v0.13.0
Expand All @@ -216,6 +216,7 @@ require (
)

require (
aqwari.net/xml v0.0.0-20210331023308-d9421b293817 // indirect
cloud.google.com/go/compute v1.12.1 // indirect
cloud.google.com/go/compute/metadata v0.2.1 // indirect
cloud.google.com/go/iam v0.6.0 // indirect
Expand Down
8 changes: 6 additions & 2 deletions go.sum
@@ -1,3 +1,5 @@
aqwari.net/xml v0.0.0-20210331023308-d9421b293817 h1:+3Rh5EaTzNLnzWx3/uy/mAaH/dGI7svJ6e0oOIDcPuE=
aqwari.net/xml v0.0.0-20210331023308-d9421b293817/go.mod h1:c7kkWzc7HS/t8Q2DcVY8P2d1dyWNEhEVT5pL0ZHO11c=
bazil.org/fuse v0.0.0-20160811212531-371fbbdaa898/go.mod h1:Xbm+BRKSBEpa4q4hTSxohYNQpsxXPbPry4JJWOB3LB8=
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
Expand Down Expand Up @@ -664,8 +666,8 @@ github.com/elastic/gopacket v1.1.20-0.20211202005954-d412fca7f83a h1:8WfL/X6fK11
github.com/elastic/gopacket v1.1.20-0.20211202005954-d412fca7f83a/go.mod h1:riddUzxTSBpJXk3qBHtYr4qOhFhT6k/1c0E3qkQjQpA=
github.com/elastic/gosigar v0.14.2 h1:Dg80n8cr90OZ7x+bAax/QjoW/XqTI11RmA79ZwIm9/4=
github.com/elastic/gosigar v0.14.2/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
github.com/elastic/mito v0.0.0-20230302005114-1dda06e81678 h1:+meybiVLjeeEOsxW7Wuq05uOZQ874sVBMgxoGzUvg7E=
github.com/elastic/mito v0.0.0-20230302005114-1dda06e81678/go.mod h1:7dZE6T9eC4uwo6HG4OHbHDy255spws7MCwA2KBkppkU=
github.com/elastic/mito v0.0.0-20230427032043-b7fe987e263f h1:/bqa4NNCFJry2j9QHTUj4wfC9YE5VgMwl0xbRf4zEQs=
github.com/elastic/mito v0.0.0-20230427032043-b7fe987e263f/go.mod h1:V4HuliUNTUpjlEoZtZ9rJTWW7GUVP7gxPbtiN0rTFmM=
github.com/elastic/ristretto v0.1.1-0.20220602190459-83b0895ca5b3 h1:ChPwRVv1RR4a0cxoGjKcyWjTEpxYfm5gydMIzo32cAw=
github.com/elastic/ristretto v0.1.1-0.20220602190459-83b0895ca5b3/go.mod h1:RAy2GVV4sTWVlNMavv3xhLsk18rxhfhDnombTe6EF5c=
github.com/elastic/sarama v1.19.1-0.20220310193331-ebc2b0d8eef3 h1:FzA0/n4iMt8ojGDGRoiFPSHFvvdVIvxOxyLtiFnrLBM=
Expand Down Expand Up @@ -2035,6 +2037,7 @@ golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/
golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20201006153459-a7d1128ccaa0/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
Expand Down Expand Up @@ -2334,6 +2337,7 @@ golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roY
golang.org/x/tools v0.0.0-20200721032237-77f530d86f9a/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200821192610-3366bbee4705/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200904185747-39188db58858/go.mod h1:Cj7w3i3Rnn0Xh82ur9kSqwfTHTeVxaDqrfMjpcNT6bE=
golang.org/x/tools v0.0.0-20201110124207-079ba7bd75cd/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
Expand Down
9 changes: 8 additions & 1 deletion x-pack/filebeat/docs/inputs/input-httpjson.asciidoc
Expand Up @@ -655,12 +655,19 @@ This determines whether rotated logs should be gzip compressed.
[float]
==== `response.decode_as`

ContentType used for decoding the response body. If set it will force the decoding in the specified format regardless of the `Content-Type` header value, otherwise it will honor it if possible or fallback to `application/json`. Supported values: `application/json, application/x-ndjson`, `text/csv`, `application/zip`. It is not set by default.
ContentType used for decoding the response body. If set it will force the decoding in the specified format regardless of the `Content-Type` header value, otherwise it will honor it if possible or fallback to `application/json`. Supported values: `application/json, application/x-ndjson`, `text/csv`, `application/zip`, `application/xml` and `text/xml`. It is not set by default.

NOTE: For `text/csv`, one event for each line will be created, using the header values as the object keys. For this reason is always assumed that a header exists.

NOTE: For `application/zip`, the `zip` file is expected to contain one or more `.json` or `.ndjson` files. The contents of all of them will be merged into a single list of `JSON` objects.

NOTE: For `application/xml` and `text/xml` type information for decoding the XML document can be provided via the `response.xsd` option.

[float]
==== `response.xsd`

XML documents may require additional type information to enable correct parsing and ingestion. This information can be provided as an XML Schema Definition (XSD) for the document using the `response.xsd` option.

[[response-transforms]]
[float]
==== `response.transforms`
Expand Down
1 change: 1 addition & 0 deletions x-pack/filebeat/input/httpjson/config_response.go
Expand Up @@ -17,6 +17,7 @@ const (

type responseConfig struct {
DecodeAs string `config:"decode_as"`
XSD string `config:"xsd"`
RequestBodyOnPagination bool `config:"request_body_on_pagination"`
Transforms transformsConfig `config:"transforms"`
Pagination transformsConfig `config:"pagination"`
Expand Down
17 changes: 17 additions & 0 deletions x-pack/filebeat/input/httpjson/encoding.go
Expand Up @@ -13,6 +13,8 @@ import (
"io"
"net/http"

"github.com/elastic/mito/lib/xml"

"github.com/elastic/elastic-agent-libs/logp"
)

Expand Down Expand Up @@ -101,6 +103,11 @@ func registerDecoders() {

log.Debugf("registering decoder 'application/zip': returned error: %#v",
registerDecoder("application/zip", decodeAsZip))

log.Debugf("registering decoder 'application/xml': returned error: %#v",
registerDecoder("application/xml", decodeAsXML))
log.Debugf("registering decoder 'text/xml': returned error: %#v",
registerDecoder("text/xml; charset=utf-8", decodeAsXML))
}

func encodeAsJSON(trReq transformable) ([]byte, error) {
Expand Down Expand Up @@ -217,3 +224,13 @@ func decodeAsZip(p []byte, dst *response) error {

return nil
}

func decodeAsXML(p []byte, dst *response) error {
cdata, v, err := xml.Unmarshal(bytes.NewReader(p), dst.xmlDetails)
if err != nil {
return err
}
dst.body = v
dst.header["XML-CDATA"] = []string{cdata}
return nil
}
12 changes: 11 additions & 1 deletion x-pack/filebeat/input/httpjson/input.go
Expand Up @@ -20,6 +20,8 @@ import (
"go.uber.org/zap"
"go.uber.org/zap/zapcore"

"github.com/elastic/mito/lib/xml"

v2 "github.com/elastic/beats/v7/filebeat/input/v2"
inputcursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor"
"github.com/elastic/beats/v7/libbeat/beat"
Expand Down Expand Up @@ -128,8 +130,16 @@ func run(
log.Errorf("Error while creating requestFactory: %v", err)
return err
}
var xmlDetails map[string]xml.Detail
if config.Response.XSD != "" {
xmlDetails, err = xml.Details([]byte(config.Response.XSD))
if err != nil {
log.Errorf("Error while creating requestFactory: %v", err)
return err
}
}
pagination := newPagination(config, httpClient, log)
responseProcessor := newResponseProcessor(config, pagination, log)
responseProcessor := newResponseProcessor(config, pagination, xmlDetails, log)
requester := newRequester(httpClient, requestFactory, responseProcessor, log)

trCtx := emptyTransformContext()
Expand Down
99 changes: 99 additions & 0 deletions x-pack/filebeat/input/httpjson/input_test.go
Expand Up @@ -23,6 +23,7 @@ import (
beattest "github.com/elastic/beats/v7/libbeat/publisher/testing"
conf "github.com/elastic/elastic-agent-libs/config"
"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent-libs/mapstr"
)

func TestInput(t *testing.T) {
Expand Down Expand Up @@ -1176,6 +1177,104 @@ func TestInput(t *testing.T) {
`{"space":{"world":"moon"}}`,
},
},
{
name: "Test simple XML decode",
setupServer: func(t *testing.T, h http.HandlerFunc, config map[string]interface{}) {
registerDecoders()
registerRequestTransforms()
r := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
const text = `<?xml version="1.0" encoding="UTF-8"?>
<order orderid="56733" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="sales.xsd">
<sender>Ástríðr Ragnar</sender>
<address>
<name>Joord Lennart</name>
<company>Sydøstlige Gruppe</company>
<address>Beekplantsoen 594, 2 hoog, 6849 IG</address>
<city>Boekend</city>
<country>Netherlands</country>
</address>
<item>
<name>Egil's Saga</name>
<note>Free Sample</note>
<number>1</number>
<cost>99.95</cost>
<sent>FALSE</sent>
</item>
</order>
`
io.ReadAll(r.Body)

Check failure on line 1205 in x-pack/filebeat/input/httpjson/input_test.go

View workflow job for this annotation

GitHub Actions / lint (linux)

Error return value of `io.ReadAll` is not checked (errcheck)
r.Body.Close()
w.Write([]byte(text))

Check failure on line 1207 in x-pack/filebeat/input/httpjson/input_test.go

View workflow job for this annotation

GitHub Actions / lint (linux)

Error return value of `w.Write` is not checked (errcheck)
})
server := httptest.NewServer(r)
t.Cleanup(func() { registeredTransforms = newRegistry() })
config["request.url"] = server.URL
t.Cleanup(server.Close)
},
baseConfig: map[string]interface{}{
"interval": 1,
"request.method": http.MethodGet,
"response.xsd": `<?xml version="1.0" encoding="UTF-8" ?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="order">
<xs:complexType>
<xs:sequence>
<xs:element name="sender" type="xs:string"/>
<xs:element name="address">
<xs:complexType>
<xs:sequence>
<xs:element name="name" type="xs:string"/>
<xs:element name="company" type="xs:string"/>
<xs:element name="address" type="xs:string"/>
<xs:element name="city" type="xs:string"/>
<xs:element name="country" type="xs:string"/>
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="item" maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<xs:element name="name" type="xs:string"/>
<xs:element name="note" type="xs:string" minOccurs="0"/>
<xs:element name="number" type="xs:positiveInteger"/>
<xs:element name="cost" type="xs:decimal"/>
<xs:element name="sent" type="xs:boolean"/>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
<xs:attribute name="orderid" type="xs:string" use="required"/>
</xs:complexType>
</xs:element>
</xs:schema>
`,
},
handler: defaultHandler(http.MethodGet, "", ""),
expected: []string{mapstr.M{
"order": map[string]interface{}{
"address": map[string]interface{}{
"address": "Beekplantsoen 594, 2 hoog, 6849 IG",
"city": "Boekend",
"company": "Sydøstlige Gruppe",
"country": "Netherlands",
"name": "Joord Lennart",
},
"item": []interface{}{
map[string]interface{}{
"cost": 99.95,
"name": "Egil's Saga",
"note": "Free Sample",
"number": 1,
"sent": false,
},
},
"noNamespaceSchemaLocation": "sales.xsd",
"orderid": "56733",
"sender": "Ástríðr Ragnar",
"xsi": "http://www.w3.org/2001/XMLSchema-instance",
},
}.String()},
},
}

for _, testCase := range testCases {
Expand Down
8 changes: 7 additions & 1 deletion x-pack/filebeat/input/httpjson/pagination.go
Expand Up @@ -11,6 +11,8 @@ import (
"net/http"
"net/url"

"github.com/elastic/mito/lib/xml"

"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent-libs/mapstr"
)
Expand Down Expand Up @@ -90,18 +92,21 @@ type pageIterator struct {

resp *http.Response

xmlDetails map[string]xml.Detail

isFirst bool
done bool

n int64
}

func (p *pagination) newPageIterator(stdCtx context.Context, trCtx *transformContext, resp *http.Response) *pageIterator {
func (p *pagination) newPageIterator(stdCtx context.Context, trCtx *transformContext, resp *http.Response, xmlDetails map[string]xml.Detail) *pageIterator {
return &pageIterator{
pagination: p,
stdCtx: stdCtx,
trCtx: trCtx,
resp: resp,
xmlDetails: xmlDetails,
isFirst: true,
}
}
Expand Down Expand Up @@ -179,6 +184,7 @@ func (iter *pageIterator) getPage() (*response, error) {
if iter.pagination.decoder != nil {
err = iter.pagination.decoder(bodyBytes, &r)
} else {
r.xmlDetails = iter.xmlDetails
err = decode(iter.resp.Header.Get("Content-Type"), bodyBytes, &r)
}
if err != nil {
Expand Down
14 changes: 12 additions & 2 deletions x-pack/filebeat/input/httpjson/request.go
Expand Up @@ -19,6 +19,7 @@ import (
inputcursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor"
"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent-libs/mapstr"
"github.com/elastic/mito/lib/xml"
)

const requestNamespace = "request"
Expand Down Expand Up @@ -127,6 +128,15 @@ func newRequestFactory(ctx context.Context, config config, log *logp.Logger) ([]
rf.user = config.Auth.Basic.User
rf.password = config.Auth.Basic.Password
}
var xmlDetails map[string]xml.Detail
if config.Response.XSD != "" {
var err error
xmlDetails, err = xml.Details([]byte(config.Response.XSD))
if err != nil {
log.Errorf("Error while creating requestFactory: %v", err)
return nil, err
}
}
rfs = append(rfs, rf)
for _, ch := range config.Chain {
var rf *requestFactory
Expand All @@ -142,7 +152,7 @@ func newRequestFactory(ctx context.Context, config config, log *logp.Logger) ([]
rf.user = ch.Step.Auth.Basic.User
rf.password = ch.Step.Auth.Basic.Password
}
responseProcessor := newChainResponseProcessor(ch, httpClient, log)
responseProcessor := newChainResponseProcessor(ch, httpClient, xmlDetails, log)
rf = &requestFactory{
url: *ch.Step.Request.URL.URL,
method: ch.Step.Request.Method,
Expand All @@ -168,7 +178,7 @@ func newRequestFactory(ctx context.Context, config config, log *logp.Logger) ([]
rf.user = ch.While.Auth.Basic.User
rf.password = ch.While.Auth.Basic.Password
}
responseProcessor := newChainResponseProcessor(ch, httpClient, log)
responseProcessor := newChainResponseProcessor(ch, httpClient, xmlDetails, log)
rf = &requestFactory{
url: *ch.While.Request.URL.URL,
method: ch.While.Request.Method,
Expand Down

0 comments on commit a440d28

Please sign in to comment.