This repository has been archived by the owner on Oct 9, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 59
/
workers.go
177 lines (154 loc) · 5.84 KB
/
workers.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
package controller
import (
"context"
"fmt"
"runtime/pprof"
"time"
"github.com/flyteorg/flytestdlib/contextutils"
"github.com/flyteorg/flytestdlib/logger"
"github.com/flyteorg/flytestdlib/promutils"
"github.com/prometheus/client_golang/prometheus"
"k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/client-go/tools/cache"
)
type Handler interface {
// Initialize the Handler
Initialize(ctx context.Context) error
// Handle method that should handle the object and try to converge the desired and the actual state
Handle(ctx context.Context, namespace, key string) error
}
type workerPoolMetrics struct {
Scope promutils.Scope
FreeWorkers prometheus.Gauge
PerRoundTimer promutils.StopWatch
RoundError prometheus.Counter
RoundSuccess prometheus.Counter
WorkersRestarted prometheus.Counter
}
type WorkerPool struct {
workQueue CompositeWorkQueue
metrics workerPoolMetrics
handler Handler
}
// processNextWorkItem will read a single work item off the workqueue and
// attempt to process it, by calling the handler.
func (w *WorkerPool) processNextWorkItem(ctx context.Context) bool {
obj, shutdown := w.workQueue.Get()
w.metrics.FreeWorkers.Dec()
defer w.metrics.FreeWorkers.Inc()
if shutdown {
return false
}
// We wrap this block in a func so we can defer c.workqueue.Done.
err := func(obj interface{}) error {
// We call Done here so the workqueue knows we have finished
// processing this item. We also must remember to call Forget if we
// do not want this work item being re-queued. For example, we do
// not call Forget if a transient error occurs, instead the item is
// put back on the workqueue and attempted again after a back-off
// period.
defer w.workQueue.Done(obj)
var key string
var ok bool
// We expect strings to come off the workqueue. These are of the
// form namespace/name. We do this as the delayed nature of the
// workqueue means the items in the informer cache may actually be
// more up to date that when the item was initially put onto the
// workqueue.
if key, ok = obj.(string); !ok {
// As the item in the workqueue is actually invalid, we call
// Forget here else we'd go into a loop of attempting to
// process a work item that is invalid.
w.workQueue.Forget(obj)
runtime.HandleError(fmt.Errorf("expected string in workqueue but got %#v", obj))
return nil
}
t := w.metrics.PerRoundTimer.Start()
defer t.Stop()
// Convert the namespace/name string into a distinct namespace and name
namespace, name, err := cache.SplitMetaNamespaceKey(key)
if err != nil {
logger.Errorf(ctx, "Unable to split enqueued key into namespace/execId. Error[%v]", err)
return nil
}
ctx = contextutils.WithNamespace(ctx, namespace)
ctx = contextutils.WithExecutionID(ctx, name)
// Reconcile the Workflow
if err := w.handler.Handle(ctx, namespace, name); err != nil {
w.metrics.RoundError.Inc()
return fmt.Errorf("error syncing '%s': %s", key, err.Error())
}
w.metrics.RoundSuccess.Inc()
// Finally, if no error occurs we Forget this item so it does not
// get queued again until another change happens.
w.workQueue.Forget(obj)
logger.Infof(ctx, "Successfully synced '%s'", key)
return nil
}(obj)
if err != nil {
runtime.HandleError(err)
return true
}
return true
}
// runWorker is a long-running function that will continually call the
// processNextWorkItem function in order to read and process a message on the
// workqueue.
func (w *WorkerPool) runWorker(ctx context.Context) {
logger.Infof(ctx, "Started Worker")
defer logger.Infof(ctx, "Exiting Worker")
for w.processNextWorkItem(ctx) {
}
}
func (w *WorkerPool) Initialize(ctx context.Context) error {
return w.handler.Initialize(ctx)
}
// Run will set up the event handlers for types we are interested in, as well
// as syncing informer caches and starting workers. It will block until stopCh
// is closed, at which point it will shutdown the workqueue and wait for
// workers to finish processing their current work items.
func (w *WorkerPool) Run(ctx context.Context, threadiness int, synced ...cache.InformerSynced) error {
defer runtime.HandleCrash()
defer w.workQueue.ShutdownAll()
// Start the informer factories to begin populating the informer caches
logger.Info(ctx, "Starting FlyteWorkflow controller")
w.metrics.WorkersRestarted.Inc()
// Wait for the caches to be synced before starting workers
logger.Info(ctx, "Waiting for informer caches to sync")
if ok := cache.WaitForCacheSync(ctx.Done(), synced...); !ok {
return fmt.Errorf("failed to wait for caches to sync")
}
logger.Infof(ctx, "Starting workers [%d]", threadiness)
// Launch workers to process FlyteWorkflow resources
for i := 0; i < threadiness; i++ {
w.metrics.FreeWorkers.Inc()
logger.Infof(ctx, "Starting worker [%d]", i)
workerLabel := fmt.Sprintf("worker-%v", i)
go func() {
workerCtx := contextutils.WithGoroutineLabel(ctx, workerLabel)
pprof.SetGoroutineLabels(workerCtx)
w.runWorker(workerCtx)
}()
}
w.workQueue.Start(ctx)
logger.Info(ctx, "Started workers")
<-ctx.Done()
logger.Info(ctx, "Shutting down workers")
return nil
}
func NewWorkerPool(ctx context.Context, scope promutils.Scope, workQueue CompositeWorkQueue, handler Handler) *WorkerPool {
roundScope := scope.NewSubScope("round")
metrics := workerPoolMetrics{
Scope: scope,
FreeWorkers: scope.MustNewGauge("free_workers_count", "Number of workers free"),
PerRoundTimer: roundScope.MustNewStopWatch("round_total", "Latency per round", time.Millisecond),
RoundSuccess: roundScope.MustNewCounter("success_count", "Round succeeded"),
RoundError: roundScope.MustNewCounter("error_count", "Round failed"),
WorkersRestarted: scope.MustNewCounter("workers_restarted", "Propeller worker-pool was restarted"),
}
return &WorkerPool{
workQueue: workQueue,
metrics: metrics,
handler: handler,
}
}