Skip to content

Commit

Permalink
metrics: allow for best practices
Browse files Browse the repository at this point in the history
To follow the official Prometheus guidelines [1], as well
as comments from Prometheus developers [2], we should be
using seconds as the unit for all ping_rtt metrics (e.g.
`ping_rtt_best_seconds` instead of `ping_rtt_best_ms`).

This adds a flag `--metrics.rttunit=s`, to allow the user
to follow those best practices.

To keep upgrading users from tripping over this change, the
default units will remain millis.

[1]: https://prometheus.io/docs/practices/naming/#base-units
[2]: https://www.robustperception.io/who-wants-seconds

Fixes: #16
  • Loading branch information
dmke committed Mar 19, 2021
1 parent 5fdbccb commit 92f0c72
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 22 deletions.
29 changes: 28 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ targets:
- 2001:4860:4860::8888
- 2001:4860:4860::8844
- google.com

dns:
refresh: 2m15s
nameserver: 1.1.1.1
Expand Down Expand Up @@ -61,6 +61,33 @@ name).
Additionally, a `ping_up` metric reports whether the exporter
is running (and in which version).

#### Different time unit

The `*_ms` metrics actually violate the recommendations by
[Prometheus](https://prometheus.io/docs/practices/naming/#base-units),
whereby time values should be expressed in seconds (not milliseconds).

To accomodate for this, we've added a command line switch to select
the proper scale:

```console
$ # keep using millis
$ ./ping_exporter --metrics.rttunit=ms [other options]
$ # use seconds instead
$ ./ping_exporter --metrics.rttunit=s [other options]
$ # use both millis and seconds
$ ./ping_exporter --metrics.rttunit=both [other options]
```

For the foreseeable future, the default is `--metrics.rttunit=ms`.

If you used the `ping_exporter` in the past, and want to migrate, start
using `--metrics.rttunit=both` now. This gives you the opportunity to
update all your alerts, dashboards, and other software depending on ms
values to use proper scale (you "just" need to apply a factor of 1000
on everything). When you're ready, you just need to switch to
`--metrics.rttunit=s`.

#### Deprecated metrics

- `ping_rtt_ms`: Round trip trim in millis
Expand Down
44 changes: 23 additions & 21 deletions collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,19 @@ import (
"github.com/prometheus/client_golang/prometheus"
)

const prefix = "ping_"
func newDesc(name, help string, variableLabels []string, constLabels prometheus.Labels) *prometheus.Desc {
return prometheus.NewDesc("ping_"+name, help, variableLabels, constLabels)
}

var (
labelNames = []string{"target", "ip", "ip_version"}
rttDesc = prometheus.NewDesc(prefix+"rtt_ms", "Round trip time in millis (deprecated)", append(labelNames, "type"), nil)
bestDesc = prometheus.NewDesc(prefix+"rtt_best_ms", "Best round trip time in millis", labelNames, nil)
worstDesc = prometheus.NewDesc(prefix+"rtt_worst_ms", "Worst round trip time in millis", labelNames, nil)
meanDesc = prometheus.NewDesc(prefix+"rtt_mean_ms", "Mean round trip time in millis", labelNames, nil)
stddevDesc = prometheus.NewDesc(prefix+"rtt_std_deviation_ms", "Standard deviation in millis", labelNames, nil)
lossDesc = prometheus.NewDesc(prefix+"loss_percent", "Packet loss in percent", labelNames, nil)
progDesc = prometheus.NewDesc(prefix+"up", "ping_exporter version", nil, prometheus.Labels{"version": version})
rttDesc = newScaledDesc("rtt_seconds", "Round trip time", append(labelNames, "type"))

This comment has been minimized.

Copy link
@lapo-luchini

lapo-luchini May 17, 2022

Is this no longer deprecated, or was that part removed by mistake?

This comment has been minimized.

Copy link
@lapo-luchini

lapo-luchini May 17, 2022

Judging from the rest of the patch is is stil under if enableDeprecatedMetrics.

bestDesc = newScaledDesc("rtt_best_seconds", "Best round trip time", labelNames)
worstDesc = newScaledDesc("rtt_worst_seconds", "Worst round trip time", labelNames)
meanDesc = newScaledDesc("rtt_mean_seconds", "Mean round trip time", labelNames)
stddevDesc = newScaledDesc("rtt_std_deviation_seconds", "Standard deviation", labelNames)
lossDesc = newDesc("loss_percent", "Packet loss in percent", labelNames, nil)
progDesc = newDesc("up", "ping_exporter version", nil, prometheus.Labels{"version": version})
mutex = &sync.Mutex{}
)

Expand All @@ -29,13 +31,13 @@ type pingCollector struct {

func (p *pingCollector) Describe(ch chan<- *prometheus.Desc) {
if enableDeprecatedMetrics {
ch <- rttDesc
rttDesc.Describe(ch)
}
bestDesc.Describe(ch)
worstDesc.Describe(ch)
meanDesc.Describe(ch)
stddevDesc.Describe(ch)
ch <- lossDesc
ch <- bestDesc
ch <- worstDesc
ch <- meanDesc
ch <- stddevDesc
ch <- progDesc
}

Expand All @@ -54,16 +56,16 @@ func (p *pingCollector) Collect(ch chan<- prometheus.Metric) {

if metrics.PacketsSent > metrics.PacketsLost {
if enableDeprecatedMetrics {
ch <- prometheus.MustNewConstMetric(rttDesc, prometheus.GaugeValue, float64(metrics.Best), append(l, "best")...)
ch <- prometheus.MustNewConstMetric(rttDesc, prometheus.GaugeValue, float64(metrics.Worst), append(l, "worst")...)
ch <- prometheus.MustNewConstMetric(rttDesc, prometheus.GaugeValue, float64(metrics.Mean), append(l, "mean")...)
ch <- prometheus.MustNewConstMetric(rttDesc, prometheus.GaugeValue, float64(metrics.StdDev), append(l, "std_dev")...)
rttDesc.Collect(ch, metrics.Best, append(l, "best")...)
rttDesc.Collect(ch, metrics.Worst, append(l, "worst")...)
rttDesc.Collect(ch, metrics.Mean, append(l, "mean")...)
rttDesc.Collect(ch, metrics.StdDev, append(l, "std_dev")...)
}

ch <- prometheus.MustNewConstMetric(bestDesc, prometheus.GaugeValue, float64(metrics.Best), l...)
ch <- prometheus.MustNewConstMetric(worstDesc, prometheus.GaugeValue, float64(metrics.Worst), l...)
ch <- prometheus.MustNewConstMetric(meanDesc, prometheus.GaugeValue, float64(metrics.Mean), l...)
ch <- prometheus.MustNewConstMetric(stddevDesc, prometheus.GaugeValue, float64(metrics.StdDev), l...)
bestDesc.Collect(ch, metrics.Best, l...)
worstDesc.Collect(ch, metrics.Worst, l...)
meanDesc.Collect(ch, metrics.Mean, l...)
stddevDesc.Collect(ch, metrics.StdDev, l...)
}

loss := float64(metrics.PacketsLost) / float64(metrics.PacketsSent)
Expand Down
8 changes: 8 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ var (
var (
enableDeprecatedMetrics = true // default may change in future
deprecatedMetrics = kingpin.Flag("metrics.deprecated", "Enable or disable deprecated metrics (`ping_rtt_ms{type=best|worst|mean|std_dev}`). Valid choices: [enable, disable]").Default("enable").String()

rttMetricsScale = rttInMills // might change in future
rttMode = kingpin.Flag("metrics.rttunit", "Export ping results as either millis (default), or seconds (best practice), or both (for migrations). Valid choices: [ms, s, both]").Default("ms").String()
)

func init() {
Expand Down Expand Up @@ -66,6 +69,11 @@ func main() {
kingpin.FatalUsage("metrics.deprecated must be `enable` or `disable`")
}

if rttMetricsScale = rttUnitFromString(*rttMode); rttMetricsScale == rttInvalid {
kingpin.FatalUsage("metrics.rttunit must be `ms` for millis, or `s` for seconds, or `both`")
}
log.Infof("rtt units: %#v", rttMetricsScale)

if mpath := *metricsPath; mpath == "" {
log.Warnln("web.telemetry-path is empty, correcting to `/metrics`")
mpath = "/metrics"
Expand Down
55 changes: 55 additions & 0 deletions rttscale.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package main

import "github.com/prometheus/client_golang/prometheus"

type rttUnit int

const (
rttInvalid rttUnit = iota
rttInMills
rttInSeconds
rttBoth
)

func rttUnitFromString(s string) rttUnit {
switch s {
case "s":
return rttInSeconds
case "ms":
return rttInMills
case "both":
return rttBoth
default:
return rttInvalid
}
}

type scaledMetrics struct {
Millis *prometheus.Desc
Seconds *prometheus.Desc
}

func (s *scaledMetrics) Describe(ch chan<- *prometheus.Desc) {
if rttMetricsScale == rttInMills || rttMetricsScale == rttBoth {
ch <- s.Millis
}
if rttMetricsScale == rttInSeconds || rttMetricsScale == rttBoth {
ch <- s.Seconds
}
}

func (s *scaledMetrics) Collect(ch chan<- prometheus.Metric, value float32, labelValues ...string) {
if rttMetricsScale == rttInMills || rttMetricsScale == rttBoth {
ch <- prometheus.MustNewConstMetric(s.Millis, prometheus.GaugeValue, float64(value), labelValues...)
}
if rttMetricsScale == rttInSeconds || rttMetricsScale == rttBoth {
ch <- prometheus.MustNewConstMetric(s.Seconds, prometheus.GaugeValue, float64(value)/1000, labelValues...)
}
}

func newScaledDesc(name, help string, variableLabels []string) scaledMetrics {
return scaledMetrics{
Millis: newDesc(name+"_ms", help+" in millis (deprecated)", variableLabels, nil),
Seconds: newDesc(name+"_seconds", help+" in seconds", variableLabels, nil),
}
}

0 comments on commit 92f0c72

Please sign in to comment.