-
Notifications
You must be signed in to change notification settings - Fork 134
/
metrics.go
123 lines (113 loc) · 3.83 KB
/
metrics.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
package simulator
import (
"fmt"
"strings"
"time"
"golang.org/x/exp/maps"
"golang.org/x/exp/slices"
"github.com/armadaproject/armada/internal/common/armadacontext"
"github.com/armadaproject/armada/pkg/armadaevents"
)
type MetricsCollector struct {
c <-chan StateTransition
OverallMetrics MetricsVector
MetricsByQueue map[string]MetricsVector
// If non-zero, log a summary every this many events.
LogSummaryInterval int
}
type MetricsVector struct {
NumEvents int
NumSubmitEvents int
NumLeasedEvents int
NumPreemptedEvents int
NumJobSucceededEvents int
TimeOfMostRecentEvent time.Duration
TimeOfMostRecentJobSubmittedEvent time.Duration
TimeOfMostRecentJobLeasedEvent time.Duration
TimeOfMostRecentJobPreemptedEvent time.Duration
TimeOfMostRecentJobSucceededEvent time.Duration
}
func NewMetricsCollector(c <-chan StateTransition) *MetricsCollector {
return &MetricsCollector{
c: c,
MetricsByQueue: make(map[string]MetricsVector),
}
}
func (mc *MetricsCollector) String() string {
var sb strings.Builder
sb.WriteString("{")
sb.WriteString(fmt.Sprintf("Overall metrics: %s, Per-queue metrics: {", mc.OverallMetrics))
i := 0
queues := maps.Keys(mc.MetricsByQueue)
slices.Sort(queues)
for _, queue := range queues {
metrics := mc.MetricsByQueue[queue]
sb.WriteString(fmt.Sprintf("%s: %s", queue, metrics))
i++
if i != len(mc.MetricsByQueue) {
sb.WriteString(", ")
}
}
sb.WriteString("}}")
return sb.String()
}
func (m MetricsVector) String() string {
return fmt.Sprintf(
"{Run: %d, Subm: %d (%s), Pree: %d (%s), Succ: %d (%s), Tot: %d (%s)",
m.NumSubmitEvents-(m.NumPreemptedEvents+m.NumJobSucceededEvents),
m.NumSubmitEvents, m.TimeOfMostRecentJobSubmittedEvent,
m.NumPreemptedEvents, m.TimeOfMostRecentJobPreemptedEvent,
m.NumJobSucceededEvents, m.TimeOfMostRecentJobSucceededEvent,
m.NumEvents, m.TimeOfMostRecentEvent,
)
}
func (mc *MetricsCollector) Run(ctx *armadacontext.Context) error {
for {
select {
case <-ctx.Done():
return ctx.Err()
case stateTransition, ok := <-mc.c:
if !ok {
return nil
}
mc.addEventSequence(stateTransition.EventSequence)
if mc.LogSummaryInterval != 0 && mc.OverallMetrics.NumEvents%mc.LogSummaryInterval == 0 {
ctx.Info(mc.String())
}
}
}
}
func (mc *MetricsCollector) addEventSequence(eventSequence *armadaevents.EventSequence) {
queue := eventSequence.Queue
mc.OverallMetrics.NumEvents += 1
perQueueMetrics := mc.MetricsByQueue[queue]
perQueueMetrics.NumEvents += 1
for _, event := range eventSequence.Events {
d := event.Created.Sub(time.Time{})
mc.OverallMetrics.TimeOfMostRecentEvent = d
perQueueMetrics.TimeOfMostRecentEvent = d
switch event.GetEvent().(type) {
case *armadaevents.EventSequence_Event_SubmitJob:
mc.OverallMetrics.NumSubmitEvents += 1
perQueueMetrics.NumSubmitEvents += 1
mc.OverallMetrics.TimeOfMostRecentJobSubmittedEvent = d
perQueueMetrics.TimeOfMostRecentJobSubmittedEvent = d
case *armadaevents.EventSequence_Event_JobRunLeased:
mc.OverallMetrics.NumLeasedEvents += 1
perQueueMetrics.NumLeasedEvents += 1
mc.OverallMetrics.TimeOfMostRecentJobLeasedEvent = d
perQueueMetrics.TimeOfMostRecentJobLeasedEvent = d
case *armadaevents.EventSequence_Event_JobRunPreempted:
mc.OverallMetrics.NumPreemptedEvents += 1
perQueueMetrics.NumPreemptedEvents += 1
mc.OverallMetrics.TimeOfMostRecentJobPreemptedEvent = d
perQueueMetrics.TimeOfMostRecentJobPreemptedEvent = d
case *armadaevents.EventSequence_Event_JobSucceeded:
mc.OverallMetrics.NumJobSucceededEvents += 1
perQueueMetrics.NumJobSucceededEvents += 1
mc.OverallMetrics.TimeOfMostRecentJobSucceededEvent = d
perQueueMetrics.TimeOfMostRecentJobSucceededEvent = d
}
}
mc.MetricsByQueue[queue] = perQueueMetrics
}