Skip to content

Commit

Permalink
Implement prometheus metric for last DAG rebuilt timestamp
Browse files Browse the repository at this point in the history
Signed-off-by: Steve Sloka <steves@heptio.com>
  • Loading branch information
stevesloka committed Aug 16, 2018
1 parent 4f92758 commit e36ed24
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 26 deletions.
32 changes: 17 additions & 15 deletions cmd/contour/contour.go
Expand Up @@ -85,10 +85,27 @@ func main() {
FieldLogger: log.WithField("context", "CacheHandler"),
}

metricsvc := metrics.Service{
Service: httpsvc.Service{
FieldLogger: log.WithField("context", "metricsvc"),
},
}

registry := prometheus.NewRegistry()
metricsvc.Registry = registry

// register detault process / go collectors
registry.MustRegister(prometheus.NewProcessCollector(os.Getpid(), ""))
registry.MustRegister(prometheus.NewGoCollector())

// register our custom metrics
metrics := metrics.NewMetrics(registry)

reh := contour.ResourceEventHandler{
Notifier: &contour.HoldoffNotifier{
Notifier: &ch,
FieldLogger: log.WithField("context", "HoldoffNotifier"),
Metrics: metrics,
},
}

Expand All @@ -105,12 +122,6 @@ func main() {
serve.Flag("debug-http-address", "address the debug http endpoint will bind too").Default("127.0.0.1").StringVar(&debugsvc.Addr)
serve.Flag("debug-http-port", "port the debug http endpoint will bind too").Default("6060").IntVar(&debugsvc.Port)

metricsvc := metrics.Service{
Service: httpsvc.Service{
FieldLogger: log.WithField("context", "metricsvc"),
},
}

serve.Flag("http-address", "address the metrics http endpoint will bind too").Default("0.0.0.0").StringVar(&metricsvc.Addr)
serve.Flag("http-port", "port the metrics http endpoint will bind too").Default("8000").IntVar(&metricsvc.Port)

Expand Down Expand Up @@ -172,15 +183,6 @@ func main() {
}
k8s.WatchEndpoints(&g, client, wl, et)

registry := prometheus.NewRegistry()
metricsvc.Registry = registry

// register detault process / go collectors
registry.MustRegister(prometheus.NewProcessCollector(os.Getpid(), ""))
registry.MustRegister(prometheus.NewGoCollector())

// register our custom metrics
metrics := metrics.NewMetrics(registry)
ch.Metrics = metrics
reh.Metrics = metrics

Expand Down
1 change: 1 addition & 0 deletions design/ingressroute-design.md
Expand Up @@ -374,6 +374,7 @@ Metrics are essential to any system. Contour will expose a `/metrics` Prometheus
- **contour_ingressroute_invalid_total (gauge):** Number of `Invalid` IngressRoute objects
- namespace
- vhost
- **contour_ingressroute_dagrebuild_timestamp (gauge):** Timestamp of the last DAG rebuild

## Envoy Metrics

Expand Down
18 changes: 18 additions & 0 deletions docs/prometheus.md
Expand Up @@ -45,3 +45,21 @@ Prometheus needs a configuration block that looks like this:
The main difference from the [official Prometheus Kubernetes sample config](https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml)
is the added interpretation of the `__meta_kubernetes_pod_annotation_prometheus_io_format` label, because Envoy
currently requires a [`format=prometheus` url parameter to return the stats in Prometheus format.](https://github.com/envoyproxy/envoy/issues/2182)

## Metrics

Metrics are essential to any system. Contour will expose a `/metrics` Prometheus endpoint with the following metrics:

- **contour_ingressroute_total (gauge):** Total number of IngressRoutes objects that exist regardless of status (i.e. Valid / Invalid / Orphaned, etc). This metric should match the sum of `Orphaned` + `Valid` + `Invalid` IngressRoutes.
- namespace
- **contour_ingressroute_orphaned_total (gauge):** Number of `Orphaned` IngressRoute objects which have no root delegating to them
- namespace
- **contour_ingressroute_root_total (gauge):** Number of `Root` IngressRoute objects (Note: There will only be a single `Root` IngressRoute per vhost)
- namespace
- **contour_ingressroute_valid_total (gauge):** Number of `Valid` IngressRoute objects
- namespace
- vhost
- **contour_ingressroute_invalid_total (gauge):** Number of `Invalid` IngressRoute objects
- namespace
- vhost
- **contour_ingressroute_dagrebuild_timestamp (gauge):** Timestamp of the last DAG rebuild
4 changes: 4 additions & 0 deletions internal/contour/holdoff.go
Expand Up @@ -21,6 +21,7 @@ import (
"time"

"github.com/heptio/contour/internal/dag"
"github.com/heptio/contour/internal/metrics"
"github.com/sirupsen/logrus"
)

Expand All @@ -35,6 +36,7 @@ type HoldoffNotifier struct {

// Notifier to be called after delay.
Notifier
*metrics.Metrics

logrus.FieldLogger

Expand All @@ -55,6 +57,7 @@ func (hn *HoldoffNotifier) OnChange(builder *dag.Builder) {
hn.WithField("last update", since).Info("forcing update")
hn.Notifier.OnChange(builder)
hn.last = time.Now()
hn.Metrics.SetDAGRebuiltMetric(hn.last.Unix())
return
}

Expand All @@ -65,5 +68,6 @@ func (hn *HoldoffNotifier) OnChange(builder *dag.Builder) {
hn.WithField("last update", time.Since(hn.last)).Info("performing delayed update")
hn.Notifier.OnChange(builder)
hn.last = time.Now()
hn.Metrics.SetDAGRebuiltMetric(hn.last.Unix())
})
}
35 changes: 25 additions & 10 deletions internal/metrics/metrics.go
Expand Up @@ -24,11 +24,12 @@ import (

// Metrics provide Prometheus metrics for the app
type Metrics struct {
ingressRouteTotalGauge *prometheus.GaugeVec
ingressRouteRootTotalGauge *prometheus.GaugeVec
ingressRouteInvalidGauge *prometheus.GaugeVec
ingressRouteValidGauge *prometheus.GaugeVec
ingressRouteOrphanedGauge *prometheus.GaugeVec
ingressRouteTotalGauge *prometheus.GaugeVec
ingressRouteRootTotalGauge *prometheus.GaugeVec
ingressRouteInvalidGauge *prometheus.GaugeVec
ingressRouteValidGauge *prometheus.GaugeVec
ingressRouteOrphanedGauge *prometheus.GaugeVec
ingressRouteDAGRebuildGauge *prometheus.GaugeVec

CacheHandlerOnUpdateSummary prometheus.Summary
ResourceEventHandlerSummary *prometheus.SummaryVec
Expand All @@ -49,11 +50,12 @@ type Meta struct {
}

const (
IngressRouteTotalGauge = "contour_ingressroute_total"
IngressRouteRootTotalGauge = "contour_ingressroute_root_total"
IngressRouteInvalidGauge = "contour_ingressroute_invalid_total"
IngressRouteValidGauge = "contour_ingressroute_valid_total"
IngressRouteOrphanedGauge = "contour_ingressroute_orphaned_total"
IngressRouteTotalGauge = "contour_ingressroute_total"
IngressRouteRootTotalGauge = "contour_ingressroute_root_total"
IngressRouteInvalidGauge = "contour_ingressroute_invalid_total"
IngressRouteValidGauge = "contour_ingressroute_valid_total"
IngressRouteOrphanedGauge = "contour_ingressroute_orphaned_total"
IngressRouteDAGRebuildGauge = "contour_ingressroute_dagrebuild_timestamp"

cacheHandlerOnUpdateSummary = "contour_cachehandler_onupdate_duration_seconds"
resourceEventHandlerSummary = "contour_resourceeventhandler_duration_seconds"
Expand Down Expand Up @@ -98,6 +100,13 @@ func NewMetrics(registry *prometheus.Registry) *Metrics {
},
[]string{"namespace"},
),
ingressRouteDAGRebuildGauge: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: IngressRouteDAGRebuildGauge,
Help: "Timestamp of the last DAG rebuild",
},
[]string{},
),
CacheHandlerOnUpdateSummary: prometheus.NewSummary(prometheus.SummaryOpts{
Name: cacheHandlerOnUpdateSummary,
Help: "Histogram for the runtime of xDS cache regeneration",
Expand All @@ -123,11 +132,17 @@ func (m *Metrics) register(registry *prometheus.Registry) {
m.ingressRouteInvalidGauge,
m.ingressRouteValidGauge,
m.ingressRouteOrphanedGauge,
m.ingressRouteDAGRebuildGauge,
m.CacheHandlerOnUpdateSummary,
m.ResourceEventHandlerSummary,
)
}

// SetDAGRebuiltMetric records the last time the DAG was rebuilt
func (m *Metrics) SetDAGRebuiltMetric(timestamp int64) {
m.ingressRouteDAGRebuildGauge.WithLabelValues().Set(float64(timestamp))
}

// SetIngressRouteMetric takes
func (m *Metrics) SetIngressRouteMetric(metrics IngressRouteMetric) {
for meta, value := range metrics.Total {
Expand Down
53 changes: 52 additions & 1 deletion internal/metrics/metrics_test.go
Expand Up @@ -16,6 +16,7 @@ package metrics
import (
"reflect"
"testing"
"time"

"github.com/prometheus/client_model/go"

Expand All @@ -27,7 +28,57 @@ type testMetric struct {
want []*io_prometheus_client.Metric
}

func TestWriteMetric(t *testing.T) {
func TestWriteDAGTimestampMetric(t *testing.T) {
tests := map[string]struct {
timestampMetric testMetric
value int64
}{
"simple": {
value: time.Date(2009, 11, 17, 20, 34, 58, 651387237, time.UTC).Unix(),
timestampMetric: testMetric{
metric: IngressRouteDAGRebuildGauge,
want: []*io_prometheus_client.Metric{
{
Gauge: &io_prometheus_client.Gauge{
Value: func() *float64 { i := float64(1.258490098e+09); return &i }(),
},
},
},
},
},
}

for name, tc := range tests {
t.Run(name, func(t *testing.T) {
r := prometheus.NewRegistry()
m := NewMetrics(r)
m.SetDAGRebuiltMetric(tc.value)

gatherers := prometheus.Gatherers{
r,
prometheus.DefaultGatherer,
}

gathering, err := gatherers.Gather()
if err != nil {
t.Fatal(err)
}

gotTimestamp := []*io_prometheus_client.Metric{}
for _, mf := range gathering {
if mf.GetName() == tc.timestampMetric.metric {
gotTimestamp = mf.Metric
}
}

if !reflect.DeepEqual(gotTimestamp, tc.timestampMetric.want) {
t.Fatalf("write metric timestamp metric failed, want: %v got: %v", tc.timestampMetric.want, gotTimestamp)
}
})
}
}

func TestWriteIngressRouteMetric(t *testing.T) {
tests := map[string]struct {
irMetrics IngressRouteMetric
total testMetric
Expand Down

0 comments on commit e36ed24

Please sign in to comment.