-
Notifications
You must be signed in to change notification settings - Fork 151
/
metrics.lit
281 lines (269 loc) · 16.4 KB
/
metrics.lit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
\title{Metrics}{metrics}
\use-plugin{concourse-docs}
Metrics are essential in understanding how any large system is behaving and
performing. Concourse can emit metrics about both the system health itself and
about the builds that it is running. Operators can tap into these metrics in
order to observe the health of the system.
In the spirit of openness, the \link{metrics from our
deployment}{https://p.datadoghq.com/sb/bb663c3fb-c0f3ba68561631a1b130a4507bf36a15?tpl_var_environment=ci&tpl_var_web=ci-web&tpl_var_worker=ci-worker} are
public. We consider it a bug to emit anything sensitive or secret into our
metrics pipeline.
\section{
\title{Configuring Metrics}
The \reference{web-node} can be configured to emit metrics on start.
Currently supported metrics emitters are InfluxDB, NewRelic, Prometheus, and
Datadog. There is also a dummy emitter that will just spit the metrics out in
to the logs at \code{DEBUG} level, which can be enabled with the
\code{--emit-to-logs} flag.
Regardless of your metrics emitter, you can set
\code{CONCOURSE_METRICS_BUFFER_SIZE} to determine how many metrics emissions
are sent at a time. Increasing this number can be helpful if sending metrics
is regularly failing (due to rate limiting or network failures) or if latency
is particularly high.
There are various flags for different emitters; run \code{concourse web
--help} and look for "Metric Emitter" to see what's available.
}
\section{
\title{What's emitted?}
This reference section lists of all of the metrics that Concourse emits via
the Prometheus emitter.
To make this document easy to maintain, Prometheus is used as the "source of
truth" - primarily because it has help text built-in, making this list easy
to generate. Treat this list as a reference when looking for the equivalent
metric names for your emitter of choice.
\promethus-docs{{{
# HELP concourse_builds_latest_completed_build_status Status of Latest Completed Build. 0=Success, 1=Failed, 2=Aborted, 3=Errored.
# TYPE concourse_builds_latest_completed_build_status gauge
concourse_builds_latest_completed_build_status 0
# HELP concourse_builds_aborted_total Total number of Concourse builds aborted.
# TYPE concourse_builds_aborted_total counter
concourse_builds_aborted_total 0
# HELP concourse_builds_errored_total Total number of Concourse builds errored.
# TYPE concourse_builds_errored_total counter
concourse_builds_errored_total 0
# HELP concourse_builds_failed_total Total number of Concourse builds failed.
# TYPE concourse_builds_failed_total counter
concourse_builds_failed_total 0
# HELP concourse_builds_finished_total Total number of Concourse builds finished.
# TYPE concourse_builds_finished_total counter
concourse_builds_finished_total 0
# HELP concourse_builds_running Number of Concourse builds currently running.
# TYPE concourse_builds_running gauge
concourse_builds_running 0
# HELP concourse_builds_started_total Total number of Concourse builds started.
# TYPE concourse_builds_started_total counter
concourse_builds_started_total 0
# HELP concourse_builds_succeeded_total Total number of Concourse builds succeeded.
# TYPE concourse_builds_succeeded_total counter
concourse_builds_succeeded_total 0
# HELP concourse_concurrent_requests Number of concurrent requests being served by endpoints that have a specified limit of concurrent requests.
# TYPE concourse_concurrent_requests gauge
concourse_concurrent_requests{action="ListAllJobs"} 0
# HELP concourse_concurrent_requests_limit_hit_total Total number of requests rejected because the server was already serving too many concurrent requests.
# TYPE concourse_concurrent_requests_limit_hit_total counter
concourse_concurrent_requests_limit_hit_total{action="ListAllJobs"} 8
# HELP concourse_db_connections Current number of concourse database connections
# TYPE concourse_db_connections gauge
concourse_db_connections{dbname="api"} 1
concourse_db_connections{dbname="backend"} 1
concourse_db_connections{dbname="gc"} 1
# HELP concourse_db_queries_total Total number of database Concourse database queries
# TYPE concourse_db_queries_total counter
concourse_db_queries_total 81
# HELP concourse_http_responses_duration_seconds Response time in seconds
# TYPE concourse_http_responses_duration_seconds histogram
concourse_http_responses_duration_seconds_bucket{method="POST",route="RegisterWorker",status="200",le="0.005"} 0
concourse_http_responses_duration_seconds_bucket{method="POST",route="RegisterWorker",status="200",le="0.01"} 0
concourse_http_responses_duration_seconds_bucket{method="POST",route="RegisterWorker",status="200",le="0.025"} 1
concourse_http_responses_duration_seconds_bucket{method="POST",route="RegisterWorker",status="200",le="0.05"} 1
concourse_http_responses_duration_seconds_bucket{method="POST",route="RegisterWorker",status="200",le="0.1"} 1
concourse_http_responses_duration_seconds_bucket{method="POST",route="RegisterWorker",status="200",le="0.25"} 1
concourse_http_responses_duration_seconds_bucket{method="POST",route="RegisterWorker",status="200",le="0.5"} 1
concourse_http_responses_duration_seconds_bucket{method="POST",route="RegisterWorker",status="200",le="1"} 1
concourse_http_responses_duration_seconds_bucket{method="POST",route="RegisterWorker",status="200",le="2.5"} 1
concourse_http_responses_duration_seconds_bucket{method="POST",route="RegisterWorker",status="200",le="5"} 1
concourse_http_responses_duration_seconds_bucket{method="POST",route="RegisterWorker",status="200",le="10"} 1
concourse_http_responses_duration_seconds_bucket{method="POST",route="RegisterWorker",status="200",le="+Inf"} 1
concourse_http_responses_duration_seconds_sum{method="POST",route="RegisterWorker",status="200"} 0.016757433
concourse_http_responses_duration_seconds_count{method="POST",route="RegisterWorker",status="200"} 1
# HELP concourse_jobs_scheduled_total Total number of Concourse jobs scheduled.
# TYPE concourse_jobs_scheduled_total counter
concourse_jobs_scheduled_total 0
# HELP concourse_jobs_scheduling Number of Concourse jobs currently being scheduled.
# TYPE concourse_jobs_scheduling gauge
concourse_jobs_scheduling 0
# HELP concourse_lidar_check_queue_size The size of the checks queue
# TYPE concourse_lidar_check_queue_size gauge
concourse_lidar_check_queue_size 0
# HELP concourse_lidar_checks_enqueued_total Total number of checks enqueued
# TYPE concourse_lidar_checks_enqueued_total counter
concourse_lidar_checks_enqueued_total 0
# HELP concourse_lidar_checks_finished_total Total number of checks finished
# TYPE concourse_lidar_checks_finished_total counter
concourse_lidar_checks_finished_total{status="error"} 0
concourse_lidar_checks_finished_total{status="success"} 0
# HELP concourse_lidar_checks_started_total Total number of checks started
# TYPE concourse_lidar_checks_started_total counter
concourse_lidar_checks_started_total 0
# HELP concourse_locks_held Database locks held
# TYPE concourse_locks_held gauge
concourse_locks_held{type="Batch"} 0
concourse_locks_held{type="DatabaseMigration"} 0
# HELP concourse_tasks_wait_duration Elapsed time waiting for execution
# TYPE concourse_tasks_wait_duration histogram
concourse_tasks_wait_duration_bucket{platform="linux",teamId="1",workerTags="",le="1"} 0
concourse_tasks_wait_duration_bucket{platform="linux",teamId="1",workerTags="",le="15"} 0
concourse_tasks_wait_duration_bucket{platform="linux",teamId="1",workerTags="",le="30"} 0
concourse_tasks_wait_duration_bucket{platform="linux",teamId="1",workerTags="",le="60"} 0
concourse_tasks_wait_duration_bucket{platform="linux",teamId="1",workerTags="",le="120"} 1
concourse_tasks_wait_duration_bucket{platform="linux",teamId="1",workerTags="",le="180"} 1
concourse_tasks_wait_duration_bucket{platform="linux",teamId="1",workerTags="",le="240"} 1
concourse_tasks_wait_duration_bucket{platform="linux",teamId="1",workerTags="",le="300"} 1
concourse_tasks_wait_duration_bucket{platform="linux",teamId="1",workerTags="",le="600"} 1
concourse_tasks_wait_duration_bucket{platform="linux",teamId="1",workerTags="",le="1200"} 1
concourse_tasks_wait_duration_bucket{platform="linux",teamId="1",workerTags="",le="+Inf"} 1
concourse_tasks_wait_duration_sum{platform="linux",teamId="1",workerTags=""} 60.0001929
concourse_tasks_wait_duration_count{platform="linux",teamId="1",workerTags=""} 1
# HELP concourse_tasks_waiting Number of Concourse tasks currently waiting.
# TYPE concourse_tasks_waiting gauge
concourse_tasks_waiting{platform="linux",teamId="1",workerTags=""} 2
# HELP concourse_volumes_volumes_streamed Total number of volumes streamed from one worker to the other
# TYPE concourse_volumes_volumes_streamed counter
concourse_volumes_volumes_streamed 0
# HELP concourse_workers_containers Number of containers per worker
# TYPE concourse_workers_containers gauge
concourse_workers_containers{platform="linux",tags="",team="",worker="39d19eebdddb"} 0
# HELP concourse_workers_registered Number of workers per state as seen by the database
# TYPE concourse_workers_registered gauge
concourse_workers_registered{state="landed"} 0
concourse_workers_registered{state="landing"} 0
concourse_workers_registered{state="retiring"} 0
concourse_workers_registered{state="running"} 1
concourse_workers_registered{state="stalled"} 1
# HELP concourse_workers_tasks Number of active tasks per worker
# TYPE concourse_workers_tasks gauge
concourse_workers_tasks{platform="linux",worker="39d19eebdddb"} 0
# HELP concourse_workers_volumes Number of volumes per worker
# TYPE concourse_workers_volumes gauge
concourse_workers_volumes{platform="linux",tags="",team="",worker="39d19eebdddb"} 4
# HELP go_gc_duration_seconds A summary of the GC invocation durations.
# TYPE go_gc_duration_seconds summary
go_gc_duration_seconds{quantile="0"} 1.7691e-05
go_gc_duration_seconds{quantile="0.25"} 2.658e-05
go_gc_duration_seconds{quantile="0.5"} 3.1544e-05
go_gc_duration_seconds{quantile="0.75"} 5.8253e-05
go_gc_duration_seconds{quantile="1"} 0.000472771
go_gc_duration_seconds_sum 0.001205599
go_gc_duration_seconds_count 13
# HELP go_goroutines Number of goroutines that currently exist.
# TYPE go_goroutines gauge
go_goroutines 129
# HELP go_info Information about the Go environment.
# TYPE go_info gauge
go_info{version="go1.14"} 1
# HELP go_memstats_alloc_bytes Number of bytes allocated and still in use.
# TYPE go_memstats_alloc_bytes gauge
go_memstats_alloc_bytes 7.279464e+06
# HELP go_memstats_alloc_bytes_total Total number of bytes allocated, even if freed.
# TYPE go_memstats_alloc_bytes_total counter
go_memstats_alloc_bytes_total 3.9906496e+07
# HELP go_memstats_buck_hash_sys_bytes Number of bytes used by the profiling bucket hash table.
# TYPE go_memstats_buck_hash_sys_bytes gauge
go_memstats_buck_hash_sys_bytes 1.462814e+06
# HELP go_memstats_frees_total Total number of frees.
# TYPE go_memstats_frees_total counter
go_memstats_frees_total 162143
# HELP go_memstats_gc_cpu_fraction The fraction of this program's available CPU time used by the GC since the program started.
# TYPE go_memstats_gc_cpu_fraction gauge
go_memstats_gc_cpu_fraction 0.0008474863559755615
# HELP go_memstats_gc_sys_bytes Number of bytes used for garbage collection system metadata.
# TYPE go_memstats_gc_sys_bytes gauge
go_memstats_gc_sys_bytes 3.586312e+06
# HELP go_memstats_heap_alloc_bytes Number of heap bytes allocated and still in use.
# TYPE go_memstats_heap_alloc_bytes gauge
go_memstats_heap_alloc_bytes 7.279464e+06
# HELP go_memstats_heap_idle_bytes Number of heap bytes waiting to be used.
# TYPE go_memstats_heap_idle_bytes gauge
go_memstats_heap_idle_bytes 5.5525376e+07
# HELP go_memstats_heap_inuse_bytes Number of heap bytes that are in use.
# TYPE go_memstats_heap_inuse_bytes gauge
go_memstats_heap_inuse_bytes 9.91232e+06
# HELP go_memstats_heap_objects Number of allocated objects.
# TYPE go_memstats_heap_objects gauge
go_memstats_heap_objects 34933
# HELP go_memstats_heap_released_bytes Number of heap bytes released to OS.
# TYPE go_memstats_heap_released_bytes gauge
go_memstats_heap_released_bytes 5.2813824e+07
# HELP go_memstats_heap_sys_bytes Number of heap bytes obtained from system.
# TYPE go_memstats_heap_sys_bytes gauge
go_memstats_heap_sys_bytes 6.5437696e+07
# HELP go_memstats_last_gc_time_seconds Number of seconds since 1970 of last garbage collection.
# TYPE go_memstats_last_gc_time_seconds gauge
go_memstats_last_gc_time_seconds 1.5842847322165985e+09
# HELP go_memstats_lookups_total Total number of pointer lookups.
# TYPE go_memstats_lookups_total counter
go_memstats_lookups_total 0
# HELP go_memstats_mallocs_total Total number of mallocs.
# TYPE go_memstats_mallocs_total counter
go_memstats_mallocs_total 197076
# HELP go_memstats_mcache_inuse_bytes Number of bytes in use by mcache structures.
# TYPE go_memstats_mcache_inuse_bytes gauge
go_memstats_mcache_inuse_bytes 13888
# HELP go_memstats_mcache_sys_bytes Number of bytes used for mcache structures obtained from system.
# TYPE go_memstats_mcache_sys_bytes gauge
go_memstats_mcache_sys_bytes 16384
# HELP go_memstats_mspan_inuse_bytes Number of bytes in use by mspan structures.
# TYPE go_memstats_mspan_inuse_bytes gauge
go_memstats_mspan_inuse_bytes 164832
# HELP go_memstats_mspan_sys_bytes Number of bytes used for mspan structures obtained from system.
# TYPE go_memstats_mspan_sys_bytes gauge
go_memstats_mspan_sys_bytes 196608
# HELP go_memstats_next_gc_bytes Number of heap bytes when next garbage collection will take place.
# TYPE go_memstats_next_gc_bytes gauge
go_memstats_next_gc_bytes 1.1293728e+07
# HELP go_memstats_other_sys_bytes Number of bytes used for other system allocations.
# TYPE go_memstats_other_sys_bytes gauge
go_memstats_other_sys_bytes 2.16009e+06
# HELP go_memstats_stack_inuse_bytes Number of bytes in use by the stack allocator.
# TYPE go_memstats_stack_inuse_bytes gauge
go_memstats_stack_inuse_bytes 1.671168e+06
# HELP go_memstats_stack_sys_bytes Number of bytes obtained from system for stack allocator.
# TYPE go_memstats_stack_sys_bytes gauge
go_memstats_stack_sys_bytes 1.671168e+06
# HELP go_memstats_sys_bytes Number of bytes obtained from system.
# TYPE go_memstats_sys_bytes gauge
go_memstats_sys_bytes 7.4531072e+07
# HELP go_threads Number of OS threads created.
# TYPE go_threads gauge
go_threads 14
# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds.
# TYPE process_cpu_seconds_total counter
process_cpu_seconds_total 0.93
# HELP process_max_fds Maximum number of open file descriptors.
# TYPE process_max_fds gauge
process_max_fds 1.048576e+06
# HELP process_open_fds Number of open file descriptors.
# TYPE process_open_fds gauge
process_open_fds 30
# HELP process_resident_memory_bytes Resident memory size in bytes.
# TYPE process_resident_memory_bytes gauge
process_resident_memory_bytes 6.4331776e+07
# HELP process_start_time_seconds Start time of the process since unix epoch in seconds.
# TYPE process_start_time_seconds gauge
process_start_time_seconds 1.58428471564e+09
# HELP process_virtual_memory_bytes Virtual memory size in bytes.
# TYPE process_virtual_memory_bytes gauge
process_virtual_memory_bytes 1.322156032e+09
# HELP process_virtual_memory_max_bytes Maximum amount of virtual memory available in bytes.
# TYPE process_virtual_memory_max_bytes gauge
process_virtual_memory_max_bytes -1
# HELP promhttp_metric_handler_requests_in_flight Current number of scrapes being served.
# TYPE promhttp_metric_handler_requests_in_flight gauge
promhttp_metric_handler_requests_in_flight 1
# HELP promhttp_metric_handler_requests_total Total number of scrapes by HTTP status code.
# TYPE promhttp_metric_handler_requests_total counter
promhttp_metric_handler_requests_total{code="200"} 3
promhttp_metric_handler_requests_total{code="500"} 0
promhttp_metric_handler_requests_total{code="503"} 0
}}}
}