forked from benthosdev/benthos
/
processor_mapping.go
168 lines (141 loc) · 5.86 KB
/
processor_mapping.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
package pure
import (
"context"
"github.com/dafanshu/benthos/v4/internal/bloblang/mapping"
"github.com/dafanshu/benthos/v4/internal/component/interop"
"github.com/dafanshu/benthos/v4/internal/component/processor"
"github.com/dafanshu/benthos/v4/internal/message"
"github.com/dafanshu/benthos/v4/public/bloblang"
"github.com/dafanshu/benthos/v4/public/service"
)
func init() {
err := service.RegisterBatchProcessor(
"mapping",
service.NewConfigSpec().
Stable().
Version("4.5.0").
Categories("Mapping", "Parsing").
Field(service.NewBloblangField("")).
Summary("Executes a [Bloblang](/docs/guides/bloblang/about) mapping on messages, creating a new document that replaces (or filters) the original message.").
Description(`
Bloblang is a powerful language that enables a wide range of mapping, transformation and filtering tasks. For more information [check out the docs](/docs/guides/bloblang/about).
If your mapping is large and you'd prefer for it to live in a separate file then you can execute a mapping directly from a file with the expression `+"`from \"<path>\"`"+`, where the path must be absolute, or relative from the location that Benthos is executed from.
Note: This processor is equivalent to the [bloblang](/docs/components/processors/bloblang#component-rename) one. The latter will be deprecated in a future release.
## Input Document Immutability
Mapping operates by creating an entirely new object during assignments, this has the advantage of treating the original referenced document as immutable and therefore queryable at any stage of your mapping. For example, with the following mapping:
`+"```coffee"+`
root.id = this.id
root.invitees = this.invitees.filter(i -> i.mood >= 0.5)
root.rejected = this.invitees.filter(i -> i.mood < 0.5)
`+"```"+`
Notice that we mutate the value of `+"`invitees`"+` in the resulting document by filtering out objects with a lower mood. However, even after doing so we're still able to reference the unchanged original contents of this value from the input document in order to populate a second field. Within this mapping we also have the flexibility to reference the mutable mapped document by using the keyword `+"`root` (i.e. `root.invitees`)"+` on the right-hand side instead.
Mapping documents is advantageous in situations where the result is a document with a dramatically different shape to the input document, since we are effectively rebuilding the document in its entirety and might as well keep a reference to the unchanged input document throughout. However, in situations where we are only performing minor alterations to the input document, the rest of which is unchanged, it might be more efficient to use the `+"[`mutation` processor](/docs/components/processors/mutation)"+` instead.
## Error Handling
Bloblang mappings can fail, in which case the message remains unchanged, errors are logged, and the message is flagged as having failed, allowing you to use [standard processor error handling patterns](/docs/configuration/error_handling).
However, Bloblang itself also provides powerful ways of ensuring your mappings do not fail by specifying desired fallback behaviour, which you can read about [in this section](/docs/guides/bloblang/about#error-handling).
`).
Example("Mapping", `
Given JSON documents containing an array of fans:
`+"```json"+`
{
"id":"foo",
"description":"a show about foo",
"fans":[
{"name":"bev","obsession":0.57},
{"name":"grace","obsession":0.21},
{"name":"ali","obsession":0.89},
{"name":"vic","obsession":0.43}
]
}
`+"```"+`
We can reduce the documents down to just the ID and only those fans with an obsession score above 0.5, giving us:
`+"```json"+`
{
"id":"foo",
"fans":[
{"name":"bev","obsession":0.57},
{"name":"ali","obsession":0.89}
]
}
`+"```"+`
With the following config:`,
`
pipeline:
processors:
- mapping: |
root.id = this.id
root.fans = this.fans.filter(fan -> fan.obsession > 0.5)
`).
Example("More Mapping", `
When receiving JSON documents of the form:
`+"```json"+`
{
"locations": [
{"name": "Seattle", "state": "WA"},
{"name": "New York", "state": "NY"},
{"name": "Bellevue", "state": "WA"},
{"name": "Olympia", "state": "WA"}
]
}
`+"```"+`
We could collapse the location names from the state of Washington into a field `+"`Cities`"+`:
`+"```json"+`
{"Cities": "Bellevue, Olympia, Seattle"}
`+"```"+`
With the following config:`,
`
pipeline:
processors:
- mapping: |
root.Cities = this.locations.
filter(loc -> loc.state == "WA").
map_each(loc -> loc.name).
sort().join(", ")
`),
func(conf *service.ParsedConfig, mgr *service.Resources) (service.BatchProcessor, error) {
mapping, err := conf.FieldBloblang()
if err != nil {
return nil, err
}
v1Proc := processor.NewAutoObservedBatchedProcessor("mapping", newMapping(mapping, mgr.Logger()), interop.UnwrapManagement(mgr))
return interop.NewUnwrapInternalBatchProcessor(v1Proc), nil
})
if err != nil {
panic(err)
}
}
type mappingProc struct {
exec *mapping.Executor
log *service.Logger
}
func newMapping(exec *bloblang.Executor, log *service.Logger) *mappingProc {
uw := exec.XUnwrapper().(interface {
Unwrap() *mapping.Executor
}).Unwrap()
return &mappingProc{
exec: uw,
log: log,
}
}
func (m *mappingProc) ProcessBatch(ctx *processor.BatchProcContext, b message.Batch) ([]message.Batch, error) {
newBatch := make(message.Batch, 0, len(b))
for i, msg := range b {
newPart, err := m.exec.MapPart(i, b)
if err != nil {
ctx.OnError(err, i, msg)
m.log.Errorf("%v", err)
newBatch = append(newBatch, msg)
continue
}
if newPart != nil {
newBatch = append(newBatch, newPart)
}
}
if len(newBatch) == 0 {
return nil, nil
}
return []message.Batch{newBatch}, nil
}
func (m *mappingProc) Close(context.Context) error {
return nil
}