-
Notifications
You must be signed in to change notification settings - Fork 126
/
eventsequence.go
102 lines (93 loc) · 3.71 KB
/
eventsequence.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
package pulsarutils
import (
"sync/atomic"
"github.com/apache/pulsar-client-go/pulsar"
"github.com/gogo/protobuf/proto"
"github.com/hashicorp/go-multierror"
"github.com/pkg/errors"
"github.com/armadaproject/armada/internal/common/armadacontext"
"github.com/armadaproject/armada/internal/common/eventutil"
"github.com/armadaproject/armada/internal/common/requestid"
"github.com/armadaproject/armada/internal/common/schedulers"
"github.com/armadaproject/armada/pkg/armadaevents"
)
// CompactAndPublishSequences reduces the number of sequences to the smallest possible,
// while respecting per-job set ordering and max Pulsar message size, and then publishes to Pulsar.
func CompactAndPublishSequences(ctx *armadacontext.Context, sequences []*armadaevents.EventSequence, producer pulsar.Producer, maxMessageSizeInBytes uint, scheduler schedulers.Scheduler) error {
// Reduce the number of sequences to send to the minimum possible,
// and then break up any sequences larger than maxMessageSizeInBytes.
sequences = eventutil.CompactEventSequences(sequences)
sequences, err := eventutil.LimitSequencesByteSize(sequences, maxMessageSizeInBytes, true)
if err != nil {
return err
}
return PublishSequences(ctx, producer, sequences, scheduler)
}
// PublishSequences publishes several event sequences to Pulsar.
// For efficiency, all sequences are queued for publishing and then flushed.
// Returns once all sequences have been received by Pulsar.
//
// To reduce the number of separate sequences sent and ensure limit message size, call
// eventutil.CompactEventSequences(sequences)
// and
// eventutil.LimitSequencesByteSize(sequences, int(srv.MaxAllowedMessageSize))
// before passing to this function.
func PublishSequences(ctx *armadacontext.Context, producer pulsar.Producer, sequences []*armadaevents.EventSequence, scheduler schedulers.Scheduler) error {
// Incoming gRPC requests are annotated with a unique id.
// Pass this id through the log by adding it to the Pulsar message properties.
requestId := requestid.FromContextOrMissing(ctx)
// First, serialise all payloads,
// to avoid a partial failure where some sequence fails to serialise
// after other sequences have already been sent.
payloads := make([][]byte, len(sequences))
for i, sequence := range sequences {
if sequence == nil {
return errors.Errorf("failed to send sequence %v", sequence)
}
payload, err := proto.Marshal(sequence)
if err != nil {
return errors.WithStack(err)
}
payloads[i] = payload
}
// Then, send all sequences concurrently (while respecting order),
// using Pulsar async send. Collect any errors via ch.
// ch must be buffered to avoid sending on ch blocking,
// which is not allowed in the callback.
ch := make(chan error, len(sequences))
var numSendCompleted uint32
for i := range sequences {
producer.SendAsync(
ctx,
&pulsar.ProducerMessage{
Payload: payloads[i],
Properties: map[string]string{
requestid.MetadataKey: requestId,
schedulers.PropertyName: schedulers.MsgPropertyFromScheduler(scheduler),
},
Key: sequences[i].JobSetName,
},
// Callback on send.
func(_ pulsar.MessageID, _ *pulsar.ProducerMessage, err error) {
ch <- err
// The final send to complete is responsible for closing the channel.
isFinalCallback := atomic.AddUint32(&numSendCompleted, 1) == uint32(len(sequences))
if isFinalCallback {
close(ch)
}
},
)
}
// Wait for all async send calls to complete, collect any errors, and return.
var result *multierror.Error
for range sequences {
select {
case <-ctx.Done():
result = multierror.Append(result, ctx.Err())
return result.ErrorOrNil()
case err := <-ch:
result = multierror.Append(result, err)
}
}
return result.ErrorOrNil()
}