-
Notifications
You must be signed in to change notification settings - Fork 18
/
o11y.go
422 lines (350 loc) · 12.9 KB
/
o11y.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
// Package o11y provides observability in the form of tracing and metrics
package o11y
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"runtime/debug"
"strings"
"github.com/DataDog/datadog-go/statsd"
"github.com/rollbar/rollbar-go"
)
type Provider interface {
// AddGlobalField adds data which should apply to every span in the application
//
// eg. version, service, k8s_replicaset
AddGlobalField(key string, val interface{})
// StartSpan begins a new span that'll represent a unit of work
//
// `name` should be a short human readable identifier of the work.
// It can and should include some details to distinguish it from other
// similar spans - like the URL or the DB query name.
//
// The caller is responsible for calling End(), usually via defer:
//
// ctx, span := o11y.StartSpan(ctx, "GET /help")
// defer span.End()
StartSpan(ctx context.Context, name string) (context.Context, Span)
// GetSpan returns the currently active span
GetSpan(ctx context.Context) Span
// AddField is for adding application-level information to the currently active span
//
// Any field name will be prefixed with "app."
AddField(ctx context.Context, key string, val interface{})
// AddFieldToTrace is for adding useful information to the root span.
//
// This will be propagated onto every child span.
//
// eg. build-url, plan-id, project-id, org-id etc
AddFieldToTrace(ctx context.Context, key string, val interface{})
// Log sends a zero duration trace event.
Log(ctx context.Context, name string, fields ...Pair)
Close(ctx context.Context)
// MetricsProvider grants lower control over the metrics that o11y sends, allowing skipping spans.
MetricsProvider() MetricsProvider
// Helpers returns some specific helper functions
Helpers() Helpers
}
// PropagationContext contains trace context values that are propagated from service to service.
// Typically, the Parent field is also present as a value in the Headers map.
// This seeming DRY breakage is so the special value for the field name of the Parent trace ID is not
// leaked, and accidentally depended upon.
type PropagationContext struct {
// Parent contains single string serialisation of just the trace parent fields
Parent string
// Headers contains the map of all context propagation headers
Headers map[string]string
}
// PropagationContextFromHeader is a helper constructs a PropagationContext from h. It is not filtered
// to the headers needed for propagation. It is expected to be used as the input to InjectPropagation.
func PropagationContextFromHeader(h http.Header) PropagationContext {
p := PropagationContext{
Headers: map[string]string{},
}
for k := range h {
p.Headers[k] = h.Get(k)
}
return p
}
type Helpers interface {
// ExtractPropagation pulls propagation information out of the context
ExtractPropagation(ctx context.Context) PropagationContext
// InjectPropagation adds propagation header fields into the returned root span returning
// the context carrying that span
InjectPropagation(context.Context, PropagationContext) (context.Context, Span)
// TraceIDs return standard o11y ids
TraceIDs(ctx context.Context) (traceID, parentID string)
}
type Span interface {
// AddField is for adding application-level information to the span
//
// Any field name will be prefixed with "app."
AddField(key string, val interface{})
// AddRawField is for adding useful information to the span in library/plumbing code
// Generally application code should prefer AddField() to avoid namespace clashes
//
// eg. result, http.status_code, db.system etc
//
// Refer to the opentelemetry draft spec for naming inspiration
// https://github.com/open-telemetry/opentelemetry-specification/tree/7ae3d066c95c716ef3086228ef955d84ba03ac88/specification/trace/semantic_conventions
AddRawField(key string, val interface{})
// RecordMetric tells the provider to emit a metric to its metric backend when the span ends
RecordMetric(metric Metric)
// End sets the duration of the span and tells the related provider that the span is complete,
// so it can do its appropriate processing. The span should not be used after End is called.
End()
}
type MetricType string
const (
MetricTimer = "timer"
MetricGauge = "gauge"
MetricCount = "count"
)
type Metric struct {
Type MetricType
// Name is the metric name that will be emitted
Name string
// Field is the span field to use as the metric's value
Field string
// FixedTag is an optional tag added at Metric definition time
FixedTag *Tag
// TagFields are additional span fields to use as metric tags
TagFields []string
}
type Tag struct {
Name string
Value interface{}
}
func NewTag(name string, value interface{}) *Tag {
return &Tag{Name: name, Value: value}
}
func Timing(name string, fields ...string) Metric {
return Metric{Type: MetricTimer, Name: name, Field: "duration_ms", TagFields: fields}
}
func Duration(name string, valueField string, fields ...string) Metric {
return Metric{Type: MetricTimer, Name: name, Field: valueField, TagFields: fields}
}
func Incr(name string, fields ...string) Metric {
return Metric{Type: MetricCount, Name: name, TagFields: fields}
}
func Gauge(name string, valueField string, tagFields ...string) Metric {
return Metric{
Type: MetricGauge,
Name: name,
Field: valueField,
TagFields: tagFields,
}
}
func Count(name string, valueField string, fixedTag *Tag, tagFields ...string) Metric {
return Metric{
Type: MetricCount,
Name: name,
Field: valueField,
FixedTag: fixedTag,
TagFields: tagFields,
}
}
type MetricsProvider interface {
// Histogram aggregates values agent side for a period of time.
// This is similar to TimeInMilliseconds, but not limited to timing data
Histogram(name string, value float64, tags []string, rate float64) error
// TimeInMilliseconds measures timing data only. For example, how long a network call takes
TimeInMilliseconds(name string, value float64, tags []string, rate float64) error
// Gauge measures the value of a metric at a particular time.
Gauge(name string, value float64, tags []string, rate float64) error
// Count sends an individual value in time.
Count(name string, value int64, tags []string, rate float64) error
}
type ClosableMetricsProvider interface {
MetricsProvider
io.Closer
}
type providerKey struct{}
// WithProvider returns a child context which contains the Provider. The Provider
// can be retrieved with FromContext.
func WithProvider(ctx context.Context, p Provider) context.Context {
return context.WithValue(ctx, providerKey{}, p)
}
// FromContext returns the provider stored in the context, or nil if none exists.
func FromContext(ctx context.Context) Provider {
provider, ok := ctx.Value(providerKey{}).(Provider)
if !ok {
return defaultProvider
}
return provider
}
// Log sends a zero duration trace event.
func Log(ctx context.Context, name string, fields ...Pair) {
FromContext(ctx).Log(ctx, name, fields...)
}
// LogError sends a zero duration trace event with an error.
func LogError(ctx context.Context, name string, err error, fields ...Pair) {
_, span := StartSpan(ctx, name)
for _, f := range fields {
span.AddField(f.Key, f.Value)
}
AddResultToSpan(span, err)
span.End()
}
// StartSpan starts a span from a context that must contain a provider for this to have any effect.
func StartSpan(ctx context.Context, name string) (context.Context, Span) {
return FromContext(ctx).StartSpan(ctx, name)
}
// AddField adds a field to the currently active span
func AddField(ctx context.Context, key string, val interface{}) {
FromContext(ctx).AddField(ctx, key, val)
}
// AddFieldToTrace adds a field to the currently active root span and all of its current and future child spans
func AddFieldToTrace(ctx context.Context, key string, val interface{}) {
FromContext(ctx).AddFieldToTrace(ctx, key, val)
}
// End completes a span, including using AddResultToSpan to set the error and result fields
//
// The correct way to capture the returned error is given in the doc example, it is like this..
// defer o11y.End(span, &err)
//
// Using the unusual pointer to the interface means that clients can call defer on End early,
// typically on the next line after calling StartSpan as it will capture the address of the named
// return error at that point. Any further assignments are made to the pointed to data, so that when
// our End func dereferences the pointer we get the last assigned error as desired.
func End(span Span, err *error) {
var actualErr error
if err != nil {
actualErr = *err
}
AddResultToSpan(span, actualErr)
span.End()
}
// AddResultToSpan takes a possibly nil error, and updates the "error" and "result" fields of the span appropriately.
func AddResultToSpan(span Span, err error) {
switch {
case IsWarning(err):
span.AddRawField("warning", err.Error())
case errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded):
// Context cancellation and timeouts are expected, for instance in timeout and shutdown scenarios.
// Tracing as an error adds clutter when looking for real errors.
span.AddRawField("result", "canceled")
span.AddRawField("warning", err.Error())
return
case err != nil:
span.AddRawField("result", "error")
span.AddRawField("error", err.Error())
return
}
span.AddRawField("result", "success")
}
// Pair is a key value pair used to add metadata to a span.
type Pair struct {
Key string
Value interface{}
}
// Field returns a new metadata pair.
func Field(key string, value interface{}) Pair {
return Pair{Key: key, Value: value}
}
// Baggage is a map of values used for telemetry purposes.
// See: https://github.com/open-telemetry/opentelemetry-specification/blob/14b5b6a944e390e368dd2e2ef234d220d8287d19/specification/baggage/api.md
type Baggage map[string]string
// AddToTrace adds all entries in the Baggage to the root span.
func (b Baggage) addToTrace(ctx context.Context) {
o := FromContext(ctx)
for k, v := range b {
k := strings.ReplaceAll(k, "-", "_")
o.AddFieldToTrace(ctx, k, v)
}
}
type baggageKey struct{}
func WithBaggage(ctx context.Context, baggage Baggage) context.Context {
baggage.addToTrace(ctx)
return context.WithValue(ctx, baggageKey{}, baggage)
}
func GetBaggage(ctx context.Context) Baggage {
b, ok := ctx.Value(baggageKey{}).(Baggage)
if !ok {
return Baggage{}
}
return b
}
var defaultProvider = &noopProvider{}
type noopProvider struct{}
func (c *noopProvider) AddGlobalField(key string, val interface{}) {}
func (c *noopProvider) StartSpan(ctx context.Context, name string) (context.Context, Span) {
return ctx, &noopSpan{}
}
func (c *noopProvider) GetSpan(ctx context.Context) Span {
return &noopSpan{}
}
func (c *noopProvider) AddField(ctx context.Context, key string, val interface{}) {}
func (c *noopProvider) AddFieldToTrace(ctx context.Context, key string, val interface{}) {}
func (c *noopProvider) Close(ctx context.Context) {}
func (c *noopProvider) Log(ctx context.Context, name string, fields ...Pair) {}
func (c *noopProvider) MetricsProvider() MetricsProvider {
return &statsd.NoOpClient{}
}
func (c *noopProvider) Helpers() Helpers {
return noopHelpers{}
}
type noopHelpers struct{}
func (n noopHelpers) ExtractPropagation(_ context.Context) PropagationContext {
return PropagationContext{}
}
func (n noopHelpers) InjectPropagation(ctx context.Context, _ PropagationContext) (context.Context, Span) {
return ctx, &noopSpan{}
}
func (n noopHelpers) TraceIDs(_ context.Context) (traceID, parentID string) {
return "", ""
}
type noopSpan struct{}
func (s *noopSpan) AddField(key string, val interface{}) {}
func (s *noopSpan) AddRawField(key string, val interface{}) {}
func (s *noopSpan) RecordMetric(metric Metric) {}
func (s *noopSpan) End() {}
func HandlePanic(ctx context.Context, span Span, panic interface{}, r *http.Request) (err error) {
err = fmt.Errorf("panic handled: %+v", panic)
span.AddRawField("panic", panic)
span.AddRawField("has_panicked", "true")
span.AddRawField("stack", string(debug.Stack()))
span.RecordMetric(Incr("panics", "name"))
provider := FromContext(ctx)
rollable, ok := provider.(rollbarAble)
if !ok {
return err
}
rollbarClient := rollable.RollBarClient()
if r != nil {
rollbarClient.RequestError(rollbar.CRIT, r, err)
} else {
rollbarClient.LogPanic(panic, true)
}
return err
}
type rollbarAble interface {
RollBarClient() *rollbar.Client
}
// Scan satisfies the `Scanner` interface to allow the database driver to un-marshall
// it back into a struct from the JSON blob in the database.
func (b *Baggage) Scan(value interface{}) error {
bytes, ok := value.([]byte)
if !ok {
return errors.New("type assertion to []byte failed")
}
return json.Unmarshal(bytes, b)
}
func DeserializeBaggage(s string) (Baggage, error) {
result := Baggage{}
// an encoded baggage is very much like a query string, so
// make it look like one first and then parse it as such
queryString := strings.ReplaceAll(s, ",", "&")
values, err := url.ParseQuery(queryString)
if err != nil {
return Baggage{}, err
}
for k, v := range values {
result[k] = v[0]
}
return result, nil
}