Skip to content

Commit

Permalink
x-pack/filebeat/input/httpjson: provide XML decoding functionality
Browse files Browse the repository at this point in the history
This addition makes use of the mito XML library to allow users to decode
XML responses with type hints.
  • Loading branch information
efd6 committed May 11, 2023
1 parent 139b305 commit 238c351
Show file tree
Hide file tree
Showing 13 changed files with 212 additions and 18 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ automatic splitting at root level, if root level element is an array. {pull}3415
- Add oracle authentication messages parsing {pull}35127[35127]
- Add support for CRC validation in Filebeat's HTTP endpoint input. {pull}35204[35204]
- Add execution budget to CEL input. {pull}35409[35409]
- Add XML decoding support to HTTPJSON. {issue}34438[34438] {pull}35235[35235]

*Auditbeat*
- Migration of system/package module storage from gob encoding to flatbuffer encoding in bolt db. {pull}34817[34817]
Expand Down
35 changes: 33 additions & 2 deletions NOTICE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13412,11 +13412,11 @@ limitations under the License.

--------------------------------------------------------------------------------
Dependency : github.com/elastic/mito
Version: v0.0.0-20230302005114-1dda06e81678
Version: v0.0.0-20230427032043-b7fe987e263f
Licence type (autodetected): Apache-2.0
--------------------------------------------------------------------------------

Contents of probable licence file $GOMODCACHE/github.com/elastic/mito@v0.0.0-20230302005114-1dda06e81678/LICENSE:
Contents of probable licence file $GOMODCACHE/github.com/elastic/mito@v0.0.0-20230427032043-b7fe987e263f/LICENSE:


Apache License
Expand Down Expand Up @@ -25184,6 +25184,37 @@ Public License instead of this License.
Indirect dependencies


--------------------------------------------------------------------------------
Dependency : aqwari.net/xml
Version: v0.0.0-20210331023308-d9421b293817
Licence type (autodetected): MIT
--------------------------------------------------------------------------------

Contents of probable licence file $GOMODCACHE/aqwari.net/xml@v0.0.0-20210331023308-d9421b293817/LICENSE:

The MIT License (MIT)

Copyright (c) 2015 David Arroyo

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.


--------------------------------------------------------------------------------
Dependency : cloud.google.com/go/compute
Version: v1.12.1
Expand Down
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ require (
github.com/elastic/elastic-agent-shipper-client v0.5.1-0.20230228231646-f04347b666f3
github.com/elastic/elastic-agent-system-metrics v0.6.0
github.com/elastic/go-elasticsearch/v8 v8.2.0
github.com/elastic/mito v0.0.0-20230302005114-1dda06e81678
github.com/elastic/mito v0.0.0-20230427032043-b7fe987e263f
github.com/elastic/toutoumomoma v0.0.0-20221026030040-594ef30cb640
github.com/foxcpp/go-mockdns v0.0.0-20201212160233-ede2f9158d15
github.com/google/cel-go v0.13.0
Expand All @@ -216,6 +216,7 @@ require (
)

require (
aqwari.net/xml v0.0.0-20210331023308-d9421b293817 // indirect
cloud.google.com/go/compute v1.12.1 // indirect
cloud.google.com/go/compute/metadata v0.2.1 // indirect
cloud.google.com/go/iam v0.6.0 // indirect
Expand Down
8 changes: 6 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
aqwari.net/xml v0.0.0-20210331023308-d9421b293817 h1:+3Rh5EaTzNLnzWx3/uy/mAaH/dGI7svJ6e0oOIDcPuE=
aqwari.net/xml v0.0.0-20210331023308-d9421b293817/go.mod h1:c7kkWzc7HS/t8Q2DcVY8P2d1dyWNEhEVT5pL0ZHO11c=
bazil.org/fuse v0.0.0-20160811212531-371fbbdaa898/go.mod h1:Xbm+BRKSBEpa4q4hTSxohYNQpsxXPbPry4JJWOB3LB8=
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
Expand Down Expand Up @@ -668,8 +670,8 @@ github.com/elastic/gopacket v1.1.20-0.20211202005954-d412fca7f83a h1:8WfL/X6fK11
github.com/elastic/gopacket v1.1.20-0.20211202005954-d412fca7f83a/go.mod h1:riddUzxTSBpJXk3qBHtYr4qOhFhT6k/1c0E3qkQjQpA=
github.com/elastic/gosigar v0.14.2 h1:Dg80n8cr90OZ7x+bAax/QjoW/XqTI11RmA79ZwIm9/4=
github.com/elastic/gosigar v0.14.2/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
github.com/elastic/mito v0.0.0-20230302005114-1dda06e81678 h1:+meybiVLjeeEOsxW7Wuq05uOZQ874sVBMgxoGzUvg7E=
github.com/elastic/mito v0.0.0-20230302005114-1dda06e81678/go.mod h1:7dZE6T9eC4uwo6HG4OHbHDy255spws7MCwA2KBkppkU=
github.com/elastic/mito v0.0.0-20230427032043-b7fe987e263f h1:/bqa4NNCFJry2j9QHTUj4wfC9YE5VgMwl0xbRf4zEQs=
github.com/elastic/mito v0.0.0-20230427032043-b7fe987e263f/go.mod h1:V4HuliUNTUpjlEoZtZ9rJTWW7GUVP7gxPbtiN0rTFmM=
github.com/elastic/ristretto v0.1.1-0.20220602190459-83b0895ca5b3 h1:ChPwRVv1RR4a0cxoGjKcyWjTEpxYfm5gydMIzo32cAw=
github.com/elastic/ristretto v0.1.1-0.20220602190459-83b0895ca5b3/go.mod h1:RAy2GVV4sTWVlNMavv3xhLsk18rxhfhDnombTe6EF5c=
github.com/elastic/sarama v1.19.1-0.20220310193331-ebc2b0d8eef3 h1:FzA0/n4iMt8ojGDGRoiFPSHFvvdVIvxOxyLtiFnrLBM=
Expand Down Expand Up @@ -2043,6 +2045,7 @@ golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/
golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20201006153459-a7d1128ccaa0/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
Expand Down Expand Up @@ -2343,6 +2346,7 @@ golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roY
golang.org/x/tools v0.0.0-20200721032237-77f530d86f9a/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200821192610-3366bbee4705/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200904185747-39188db58858/go.mod h1:Cj7w3i3Rnn0Xh82ur9kSqwfTHTeVxaDqrfMjpcNT6bE=
golang.org/x/tools v0.0.0-20201110124207-079ba7bd75cd/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
Expand Down
9 changes: 8 additions & 1 deletion x-pack/filebeat/docs/inputs/input-httpjson.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -655,12 +655,19 @@ This determines whether rotated logs should be gzip compressed.
[float]
==== `response.decode_as`

ContentType used for decoding the response body. If set it will force the decoding in the specified format regardless of the `Content-Type` header value, otherwise it will honor it if possible or fallback to `application/json`. Supported values: `application/json, application/x-ndjson`, `text/csv`, `application/zip`. It is not set by default.
ContentType used for decoding the response body. If set it will force the decoding in the specified format regardless of the `Content-Type` header value, otherwise it will honor it if possible or fallback to `application/json`. Supported values: `application/json, application/x-ndjson`, `text/csv`, `application/zip`, `application/xml` and `text/xml`. It is not set by default.

NOTE: For `text/csv`, one event for each line will be created, using the header values as the object keys. For this reason is always assumed that a header exists.

NOTE: For `application/zip`, the `zip` file is expected to contain one or more `.json` or `.ndjson` files. The contents of all of them will be merged into a single list of `JSON` objects.

NOTE: For `application/xml` and `text/xml` type information for decoding the XML document can be provided via the `response.xsd` option.

[float]
==== `response.xsd`

XML documents may require additional type information to enable correct parsing and ingestion. This information can be provided as an XML Schema Definition (XSD) for the document using the `response.xsd` option.

[[response-transforms]]
[float]
==== `response.transforms`
Expand Down
1 change: 1 addition & 0 deletions x-pack/filebeat/input/httpjson/config_response.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ const (

type responseConfig struct {
DecodeAs string `config:"decode_as"`
XSD string `config:"xsd"`
RequestBodyOnPagination bool `config:"request_body_on_pagination"`
Transforms transformsConfig `config:"transforms"`
Pagination transformsConfig `config:"pagination"`
Expand Down
17 changes: 17 additions & 0 deletions x-pack/filebeat/input/httpjson/encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import (
"io"
"net/http"

"github.com/elastic/mito/lib/xml"

"github.com/elastic/elastic-agent-libs/logp"
)

Expand Down Expand Up @@ -101,6 +103,11 @@ func registerDecoders() {

log.Debugf("registering decoder 'application/zip': returned error: %#v",
registerDecoder("application/zip", decodeAsZip))

log.Debugf("registering decoder 'application/xml': returned error: %#v",
registerDecoder("application/xml", decodeAsXML))
log.Debugf("registering decoder 'text/xml': returned error: %#v",
registerDecoder("text/xml; charset=utf-8", decodeAsXML))
}

func encodeAsJSON(trReq transformable) ([]byte, error) {
Expand Down Expand Up @@ -217,3 +224,13 @@ func decodeAsZip(p []byte, dst *response) error {

return nil
}

func decodeAsXML(p []byte, dst *response) error {
cdata, v, err := xml.Unmarshal(bytes.NewReader(p), dst.xmlDetails)
if err != nil {
return err
}
dst.body = v
dst.header["XML-CDATA"] = []string{cdata}
return nil
}
12 changes: 11 additions & 1 deletion x-pack/filebeat/input/httpjson/input.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import (
"go.uber.org/zap"
"go.uber.org/zap/zapcore"

"github.com/elastic/mito/lib/xml"

v2 "github.com/elastic/beats/v7/filebeat/input/v2"
inputcursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor"
"github.com/elastic/beats/v7/libbeat/beat"
Expand Down Expand Up @@ -128,8 +130,16 @@ func run(
log.Errorf("Error while creating requestFactory: %v", err)
return err
}
var xmlDetails map[string]xml.Detail
if config.Response.XSD != "" {
xmlDetails, err = xml.Details([]byte(config.Response.XSD))
if err != nil {
log.Errorf("Error while creating requestFactory: %v", err)
return err
}
}
pagination := newPagination(config, httpClient, log)
responseProcessor := newResponseProcessor(config, pagination, log)
responseProcessor := newResponseProcessor(config, pagination, xmlDetails, log)
requester := newRequester(httpClient, requestFactory, responseProcessor, log)

trCtx := emptyTransformContext()
Expand Down
99 changes: 99 additions & 0 deletions x-pack/filebeat/input/httpjson/input_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
beattest "github.com/elastic/beats/v7/libbeat/publisher/testing"
conf "github.com/elastic/elastic-agent-libs/config"
"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent-libs/mapstr"
)

func TestInput(t *testing.T) {
Expand Down Expand Up @@ -1176,6 +1177,104 @@ func TestInput(t *testing.T) {
`{"space":{"world":"moon"}}`,
},
},
{
name: "Test simple XML decode",
setupServer: func(t *testing.T, h http.HandlerFunc, config map[string]interface{}) {
registerDecoders()
registerRequestTransforms()
r := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
const text = `<?xml version="1.0" encoding="UTF-8"?>
<order orderid="56733" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="sales.xsd">
<sender>Ástríðr Ragnar</sender>
<address>
<name>Joord Lennart</name>
<company>Sydøstlige Gruppe</company>
<address>Beekplantsoen 594, 2 hoog, 6849 IG</address>
<city>Boekend</city>
<country>Netherlands</country>
</address>
<item>
<name>Egil's Saga</name>
<note>Free Sample</note>
<number>1</number>
<cost>99.95</cost>
<sent>FALSE</sent>
</item>
</order>
`
io.ReadAll(r.Body)
r.Body.Close()
w.Write([]byte(text))
})
server := httptest.NewServer(r)
t.Cleanup(func() { registeredTransforms = newRegistry() })
config["request.url"] = server.URL
t.Cleanup(server.Close)
},
baseConfig: map[string]interface{}{
"interval": 1,
"request.method": http.MethodGet,
"response.xsd": `<?xml version="1.0" encoding="UTF-8" ?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="order">
<xs:complexType>
<xs:sequence>
<xs:element name="sender" type="xs:string"/>
<xs:element name="address">
<xs:complexType>
<xs:sequence>
<xs:element name="name" type="xs:string"/>
<xs:element name="company" type="xs:string"/>
<xs:element name="address" type="xs:string"/>
<xs:element name="city" type="xs:string"/>
<xs:element name="country" type="xs:string"/>
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="item" maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<xs:element name="name" type="xs:string"/>
<xs:element name="note" type="xs:string" minOccurs="0"/>
<xs:element name="number" type="xs:positiveInteger"/>
<xs:element name="cost" type="xs:decimal"/>
<xs:element name="sent" type="xs:boolean"/>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
<xs:attribute name="orderid" type="xs:string" use="required"/>
</xs:complexType>
</xs:element>
</xs:schema>
`,
},
handler: defaultHandler(http.MethodGet, "", ""),
expected: []string{mapstr.M{
"order": map[string]interface{}{
"address": map[string]interface{}{
"address": "Beekplantsoen 594, 2 hoog, 6849 IG",
"city": "Boekend",
"company": "Sydøstlige Gruppe",
"country": "Netherlands",
"name": "Joord Lennart",
},
"item": []interface{}{
map[string]interface{}{
"cost": 99.95,
"name": "Egil's Saga",
"note": "Free Sample",
"number": 1,
"sent": false,
},
},
"noNamespaceSchemaLocation": "sales.xsd",
"orderid": "56733",
"sender": "Ástríðr Ragnar",
"xsi": "http://www.w3.org/2001/XMLSchema-instance",
},
}.String()},
},
}

for _, testCase := range testCases {
Expand Down
8 changes: 7 additions & 1 deletion x-pack/filebeat/input/httpjson/pagination.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import (
"net/http"
"net/url"

"github.com/elastic/mito/lib/xml"

"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent-libs/mapstr"
)
Expand Down Expand Up @@ -90,18 +92,21 @@ type pageIterator struct {

resp *http.Response

xmlDetails map[string]xml.Detail

isFirst bool
done bool

n int64
}

func (p *pagination) newPageIterator(stdCtx context.Context, trCtx *transformContext, resp *http.Response) *pageIterator {
func (p *pagination) newPageIterator(stdCtx context.Context, trCtx *transformContext, resp *http.Response, xmlDetails map[string]xml.Detail) *pageIterator {
return &pageIterator{
pagination: p,
stdCtx: stdCtx,
trCtx: trCtx,
resp: resp,
xmlDetails: xmlDetails,
isFirst: true,
}
}
Expand Down Expand Up @@ -179,6 +184,7 @@ func (iter *pageIterator) getPage() (*response, error) {
if iter.pagination.decoder != nil {
err = iter.pagination.decoder(bodyBytes, &r)
} else {
r.xmlDetails = iter.xmlDetails
err = decode(iter.resp.Header.Get("Content-Type"), bodyBytes, &r)
}
if err != nil {
Expand Down
14 changes: 12 additions & 2 deletions x-pack/filebeat/input/httpjson/request.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
inputcursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor"
"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent-libs/mapstr"
"github.com/elastic/mito/lib/xml"
)

const requestNamespace = "request"
Expand Down Expand Up @@ -127,6 +128,15 @@ func newRequestFactory(ctx context.Context, config config, log *logp.Logger) ([]
rf.user = config.Auth.Basic.User
rf.password = config.Auth.Basic.Password
}
var xmlDetails map[string]xml.Detail
if config.Response.XSD != "" {
var err error
xmlDetails, err = xml.Details([]byte(config.Response.XSD))
if err != nil {
log.Errorf("Error while creating requestFactory: %v", err)
return nil, err
}
}
rfs = append(rfs, rf)
for _, ch := range config.Chain {
var rf *requestFactory
Expand All @@ -142,7 +152,7 @@ func newRequestFactory(ctx context.Context, config config, log *logp.Logger) ([]
rf.user = ch.Step.Auth.Basic.User
rf.password = ch.Step.Auth.Basic.Password
}
responseProcessor := newChainResponseProcessor(ch, httpClient, log)
responseProcessor := newChainResponseProcessor(ch, httpClient, xmlDetails, log)
rf = &requestFactory{
url: *ch.Step.Request.URL.URL,
method: ch.Step.Request.Method,
Expand All @@ -168,7 +178,7 @@ func newRequestFactory(ctx context.Context, config config, log *logp.Logger) ([]
rf.user = ch.While.Auth.Basic.User
rf.password = ch.While.Auth.Basic.Password
}
responseProcessor := newChainResponseProcessor(ch, httpClient, log)
responseProcessor := newChainResponseProcessor(ch, httpClient, xmlDetails, log)
rf = &requestFactory{
url: *ch.While.Request.URL.URL,
method: ch.While.Request.Method,
Expand Down
Loading

0 comments on commit 238c351

Please sign in to comment.