-
Notifications
You must be signed in to change notification settings - Fork 152
/
dataset.go
212 lines (178 loc) · 4.12 KB
/
dataset.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
package execute
import (
"github.com/influxdata/flux"
"github.com/influxdata/flux/plan"
uuid "github.com/satori/go.uuid"
)
// Dataset represents the set of data produced by a transformation.
type Dataset interface {
Node
RetractTable(key flux.GroupKey) error
UpdateProcessingTime(t Time) error
UpdateWatermark(mark Time) error
Finish(error)
SetTriggerSpec(t plan.TriggerSpec)
}
// DataCache holds all working data for a transformation.
type DataCache interface {
Table(flux.GroupKey) (flux.Table, error)
ForEach(func(flux.GroupKey))
ForEachWithContext(func(flux.GroupKey, Trigger, TableContext))
DiscardTable(flux.GroupKey)
ExpireTable(flux.GroupKey)
SetTriggerSpec(t plan.TriggerSpec)
}
type AccumulationMode int
const (
DiscardingMode AccumulationMode = iota
AccumulatingMode
AccumulatingRetractingMode
)
type DatasetID uuid.UUID
func (id DatasetID) String() string {
return uuid.UUID(id).String()
}
var ZeroDatasetID DatasetID
func (id DatasetID) IsZero() bool {
return id == ZeroDatasetID
}
func DatasetIDFromNodeID(id plan.NodeID) DatasetID {
return DatasetID(uuid.NewV5(uuid.UUID{}, string(id)))
}
type dataset struct {
id DatasetID
ts []Transformation
accMode AccumulationMode
watermark Time
processingTime Time
cache DataCache
}
func NewDataset(id DatasetID, accMode AccumulationMode, cache DataCache) *dataset {
return &dataset{
id: id,
accMode: accMode,
cache: cache,
}
}
func (d *dataset) AddTransformation(t Transformation) {
d.ts = append(d.ts, t)
}
func (d *dataset) SetTriggerSpec(spec plan.TriggerSpec) {
d.cache.SetTriggerSpec(spec)
}
func (d *dataset) UpdateWatermark(mark Time) error {
d.watermark = mark
if err := d.evalTriggers(); err != nil {
return err
}
for _, t := range d.ts {
if err := t.UpdateWatermark(d.id, mark); err != nil {
return err
}
}
return nil
}
func (d *dataset) UpdateProcessingTime(time Time) error {
d.processingTime = time
if err := d.evalTriggers(); err != nil {
return err
}
for _, t := range d.ts {
if err := t.UpdateProcessingTime(d.id, time); err != nil {
return err
}
}
return nil
}
func (d *dataset) evalTriggers() (err error) {
d.cache.ForEachWithContext(func(key flux.GroupKey, trigger Trigger, bc TableContext) {
if err != nil {
// Skip the rest once we have encountered an error
return
}
c := TriggerContext{
Table: bc,
Watermark: d.watermark,
CurrentProcessingTime: d.processingTime,
}
if trigger.Triggered(c) {
err = d.triggerTable(key)
}
if trigger.Finished() {
d.expireTable(key)
}
})
return err
}
func (d *dataset) triggerTable(key flux.GroupKey) error {
b, err := d.cache.Table(key)
if err != nil {
return err
}
switch d.accMode {
case DiscardingMode:
if err := d.processTable(b); err != nil {
return err
}
d.cache.DiscardTable(key)
case AccumulatingRetractingMode:
for _, t := range d.ts {
if err := t.RetractTable(d.id, b.Key()); err != nil {
return err
}
}
fallthrough
case AccumulatingMode:
if err := d.processTable(b); err != nil {
return err
}
}
return nil
}
func (d *dataset) processTable(tbl flux.Table) error {
if len(d.ts) == 0 {
return nil
} else if len(d.ts) == 1 {
return d.ts[0].Process(d.id, tbl)
}
// There is more than one transformation so we need to
// copy the table for each transformation.
bufTable, err := CopyTable(tbl)
if err != nil {
return err
}
defer bufTable.Done()
for _, t := range d.ts {
if err := t.Process(d.id, bufTable.Copy()); err != nil {
return err
}
}
return nil
}
func (d *dataset) expireTable(key flux.GroupKey) {
d.cache.ExpireTable(key)
}
func (d *dataset) RetractTable(key flux.GroupKey) error {
d.cache.DiscardTable(key)
for _, t := range d.ts {
if err := t.RetractTable(d.id, key); err != nil {
return err
}
}
return nil
}
func (d *dataset) Finish(err error) {
if err == nil {
// Only trigger tables we if we not finishing because of an error.
d.cache.ForEach(func(bk flux.GroupKey) {
if err != nil {
return
}
err = d.triggerTable(bk)
d.cache.ExpireTable(bk)
})
}
for _, t := range d.ts {
t.Finish(d.id, err)
}
}