-
Notifications
You must be signed in to change notification settings - Fork 2.6k
/
metrics_collector.cjs
278 lines (248 loc) · 9.27 KB
/
metrics_collector.cjs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
/**
* @typedef {{
* "node.gc.collections": number;
* "node.gc.pause.ns": number;
* "node.gc.old.collections": number;
* "node.gc.old.pause.ns": number;
* "node.gc.young.collections": number;
* "node.gc.young.pause.ns": number;
* }} MemoryCounters
*
* @typedef {{
* "node.eventloop.usage.percent": number;
* "node.eventloop.delay.ms.median": number;
* "node.eventloop.delay.ms.p95": number;
* "node.eventloop.delay.ms.p99": number;
* "node.eventloop.delay.ms.max": number;
* }} EventLoopGauges
*
* @typedef {{
* counters: MemoryCounters;
* gauges: EventLoopGauges;
* }} MetricsPayload
*/
const { setInterval } = require('timers')
const { URL } = require('url');
const { debuglog } = require('util');
const { monitorEventLoopDelay, PerformanceObserver, constants, performance } = require('perf_hooks')
const { request: insecureRequest } = require('http');
const { request: secureRequest } = require('https');
try {
// failures from the instrumentation shouldn't mess with the application
registerInstrumentation()
} catch (e) {
log(`An unexpected error occurred: ${e.stack}`)
}
/**
* The main entry point of this instrumentation script. It sets up all
* the memory and event loop tracking then sets a repeating timer to
* collect the metrics and send them to a configured endpoint.
*/
function registerInstrumentation() {
log('Registering metrics instrumentation')
const herokuMetricsUrl = parseHerokuMetricsUrl()
if (herokuMetricsUrl === undefined) {
log('Metrics will not be collected for this application')
return
}
const herokuMetricsInterval = parseHerokuMetricsInterval()
let memoryCounters = initializeMemoryCounters()
const gcObserver = new PerformanceObserver((value) => {
value.getEntries().forEach(entry => updateMemoryCounters(memoryCounters, entry))
})
gcObserver.observe({ entryTypes: ['gc'] })
const eventLoopHistogram = monitorEventLoopDelay()
eventLoopHistogram.enable()
let previousEventLoopUtilization = performance.eventLoopUtilization()
const timeout = setInterval(() => {
try {
const eventLoopUtilization = performance.eventLoopUtilization(previousEventLoopUtilization)
eventLoopHistogram.disable()
gcObserver.disconnect()
sendMetrics(herokuMetricsUrl, {
counters: {...memoryCounters},
gauges: captureEventLoopGauges(eventLoopUtilization, eventLoopHistogram)
})
// reset memory and event loop measures
previousEventLoopUtilization = eventLoopUtilization
memoryCounters = initializeMemoryCounters()
gcObserver.observe({ entryTypes: ['gc'] })
eventLoopHistogram.reset()
eventLoopHistogram.enable()
} catch (e) {
log(`An unexpected error occurred: ${e.stack}`)
}
}, herokuMetricsInterval)
// `setInterval` actually returns a Timeout object but this isn't recognized by the type-checker which
// thinks it's a number so adding this little guard to silence the type warnings
if ('unref' in timeout) {
timeout.unref()
}
}
/**
* Log a message to the Node debug log. These messages will be displayed if `NODE_DEBUG=heroku` is set in the environment.
* @param {string} msg
*/
function log(msg) {
debuglog('heroku')(`[heroku-metrics] ${msg}`)
}
/**
* The url is where the runtime metrics will be posted to. This is parsed from the environment variable `HEROKU_METRICS_URL`
* which is added to dynos by runtime only if the app has opted into the heroku runtime metrics beta. If this value is not
* present, metrics collection must be disabled.
* @returns {URL | undefined}
*/
function parseHerokuMetricsUrl() {
const value = process.env.HEROKU_METRICS_URL
if (value) {
log(`HEROKU_METRICS_URL set to "${value}"`)
try {
return new URL(value)
} catch (e) {
log(`Invalid URL: ${e}`)
}
} else {
log(`HEROKU_METRICS_URL was not set in the environment`)
}
}
/**
* Returns the time in milliseconds to wait between requests to send metrics to the collecting service. This value is
* either parsed from the environment variable `METRICS_INTERVAL_OVERRIDE` or defaults to 20s. The parsed value also
* can be no less than 10s.
* @returns {number}
*/
function parseHerokuMetricsInterval() {
const minimumInterval = 10 * 1000 // 10 seconds
const defaultInterval = 20 * 1000 // 20 seconds
const value = process.env.METRICS_INTERVAL_OVERRIDE
if (value) {
log(`METRICS_INTERVAL_OVERRIDE set to "${value}"`)
const parsedValue = parseInt(value, 10)
if (isNaN(parsedValue)) {
log(`Invalid number, using the default interval of ${defaultInterval}ms instead`)
return defaultInterval
}
if (parsedValue < minimumInterval) {
log(`Interval is lower than the minimum, using the minimum interval of ${minimumInterval}ms instead`)
return minimumInterval
}
log(`Using interval of ${parsedValue}ms`)
return parsedValue
}
log(`Using default interval of ${defaultInterval}ms`)
return defaultInterval
}
/**
* Initializes all the memory counters with their starting values
* @returns {MemoryCounters}
*/
function initializeMemoryCounters(){
return {
"node.gc.collections": 0,
"node.gc.pause.ns": 0,
"node.gc.old.collections": 0,
"node.gc.old.pause.ns": 0,
"node.gc.young.collections": 0,
"node.gc.young.pause.ns": 0,
}
}
/**
* Increments the memory counters based on the values in the performance entry if the entry has NodeGCPerformanceDetail information.
* @param {MemoryCounters} memoryCounters
* @param {PerformanceEntry} performanceEntry
*/
function updateMemoryCounters(memoryCounters, performanceEntry) {
const nsDuration = millisecondsToNanoseconds(performanceEntry.duration)
memoryCounters['node.gc.collections'] += 1
memoryCounters['node.gc.pause.ns'] += nsDuration
if (getGcPerformanceEntryKind(performanceEntry) === constants.NODE_PERFORMANCE_GC_MINOR) {
memoryCounters['node.gc.young.collections'] += 1
memoryCounters['node.gc.young.pause.ns'] += nsDuration
} else {
memoryCounters['node.gc.old.collections'] += 1
memoryCounters['node.gc.old.pause.ns'] += nsDuration
}
}
/**
* Reads the `kind` field for the 'gc' performance entry in a backwards-compatible way
* @param {PerformanceEntry} performanceEntry
* @returns {number}
*/
function getGcPerformanceEntryKind(performanceEntry) {
// using try/catch to avoid triggering deprecation warnings
try {
// for v16 and up
return performanceEntry.detail.kind
} catch (e) {
// fallback for v14 & v15
return performanceEntry.kind
}
}
/**
* Collects details about the event loop metrics using perf_hooks functionality.
* @param {EventLoopUtilization} eventLoopUtilization
* @param {IntervalHistogram} eventLoopHistogram
* @returns {EventLoopGauges}
*/
function captureEventLoopGauges(eventLoopUtilization, eventLoopHistogram) {
return {
'node.eventloop.usage.percent': eventLoopUtilization.utilization,
'node.eventloop.delay.ms.median': nanosecondsToMilliseconds(eventLoopHistogram.percentile(50)),
'node.eventloop.delay.ms.p95': nanosecondsToMilliseconds(eventLoopHistogram.percentile(95)),
'node.eventloop.delay.ms.p99': nanosecondsToMilliseconds(eventLoopHistogram.percentile(99)),
'node.eventloop.delay.ms.max': nanosecondsToMilliseconds(eventLoopHistogram.max),
}
}
/**
* Converts the given value in milliseconds into nanoseconds
* @param {number} ms A millisecond value
* @return {number}
*/
function millisecondsToNanoseconds(ms) {
return ms * 1e6 // 1_000_000
}
/**
* Converts the given value in nanoseconds into seconds
* @param {number} ns A nanosecond value
* @return {number}
*/
function nanosecondsToMilliseconds(ns) {
return ns / 1e6 // 1_000_000
}
/**
* Sends the collected metrics to the given endpoint using a POST request.
* @param {URL} url
* @param {MetricsPayload} payload
* @returns void
*/
function sendMetrics(url, payload) {
const request = url.protocol === 'https:' ? secureRequest : insecureRequest
const payloadAsJson = JSON.stringify(payload)
log(`Sending metrics to ${url.toString()}`)
const clientRequest = request({
method: 'POST',
protocol: url.protocol,
hostname: url.hostname,
port: url.port,
path: url.pathname,
headers: {
'Content-Type': 'application/json',
'Content-Length': Buffer.byteLength(payloadAsJson)
}
})
clientRequest.on('response', (res) => {
if (res.statusCode === 200) {
log('Metrics sent successfully')
} else {
log(`Tried to send metrics but response was: ${res.statusCode} - ${res.statusMessage}`)
}
// consume response data to free up memory
// see: https://nodejs.org/docs/latest/api/http.html#http_class_http_clientrequest
res.resume()
})
clientRequest.on('error', (err) => {
log(`An error occurred while sending metrics - ${err}`)
})
clientRequest.write(payloadAsJson)
clientRequest.end()
}