Skip to content

Commit

Permalink
Add Badger expvar metrics to Prometheus metrics. (#5094)
Browse files Browse the repository at this point in the history
Fixes #4772 

This adds the following badger metrics into /debug/prometheus_metrics:

badger_v2_disk_reads_total
badger_v2_disk_writes_total
badger_v2_gets_total
badger_v2_lsm_bloom_hits_total (per level)
badger_v2_lsm_level_gets_total (per level)
badger_v2_memtable_gets_total
badger_v2_puts_total
badger_v2_read_bytes
badger_v2_written_bytes

This is added via the Prometheus expvar collector.
Update metrics_test.go for the seven initial Badger versions (excluding the LSM metrics).

This adds to the exposed Prometheus metrics. These metrics were already accessible via /debug/vars.

The LSM metrics don't show up immediately. They show up after there are hits/gets to the LSM tree.

Changes
* Add Badger metrics to Prometheus
* Update metrics test to check for badger metrics.

Co-authored-by: Ibrahim Jarif <ibrahim@dgraph.io>
  • Loading branch information
danielmai and Ibrahim Jarif committed Apr 24, 2020
1 parent 6c82ef0 commit 2a390c0
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 7 deletions.
12 changes: 5 additions & 7 deletions dgraph/cmd/alpha/metrics_test.go
Expand Up @@ -38,13 +38,11 @@ func TestMetrics(t *testing.T) {
// Go Runtime Metrics
"go_goroutines", "go_memstats_gc_cpu_fraction", "go_memstats_heap_alloc_bytes",
"go_memstats_heap_idle_bytes", "go_memstats_heap_inuse_bytes", "dgraph_latency_bucket",
// TODO: add support for the following Badger metrics, which is currently only available
// through /debug/vars. Consider manually add them as OpenCensus metrics, and then
// test them here

// "badger_disk_reads_total", "badger_disk_writes_total", "badger_gets_total",
// "badger_memtable_gets_total", "badger_puts_total", "badger_read_bytes",
// "badger_written_bytes",
// Badger Metrics
"badger_v2_disk_reads_total", "badger_v2_disk_writes_total", "badger_v2_gets_total",
"badger_v2_memtable_gets_total", "badger_v2_puts_total", "badger_v2_read_bytes",
"badger_v2_written_bytes",

// Dgraph Memory Metrics
"dgraph_memory_idle_bytes", "dgraph_memory_inuse_bytes", "dgraph_memory_proc_bytes",
Expand All @@ -61,7 +59,7 @@ func TestMetrics(t *testing.T) {

func extractMetrics(metrics string) (map[string]interface{}, error) {
lines := strings.Split(metrics, "\n")
metricRegex, err := regexp.Compile("(^[a-z_]+)")
metricRegex, err := regexp.Compile("(^[a-z0-9_]+)")
if err != nil {
return nil, err
}
Expand Down
65 changes: 65 additions & 0 deletions x/metrics.go
Expand Up @@ -223,7 +223,11 @@ func init() {

CheckfNoTrace(view.Register(allViews...))

prometheus.MustRegister(NewBadgerCollector())

pe, err := oc_prom.NewExporter(oc_prom.Options{
// DefaultRegisterer includes a ProcessCollector for process_* metrics, a GoCollector for
// go_* metrics, and the badger_* metrics.
Registry: prometheus.DefaultRegisterer.(*prometheus.Registry),
Namespace: "dgraph",
OnError: func(err error) { glog.Errorf("%v", err) },
Expand All @@ -234,6 +238,67 @@ func init() {
http.Handle("/debug/prometheus_metrics", pe)
}

// NewBadgerCollector returns a prometheus Collector for Badger metrics from expvar.
func NewBadgerCollector() prometheus.Collector {
return prometheus.NewExpvarCollector(map[string]*prometheus.Desc{
"badger_v2_disk_reads_total": prometheus.NewDesc(
"badger_v2_disk_reads_total",
"Number of cumulative reads by Badger",
nil, nil,
),
"badger_v2_disk_writes_total": prometheus.NewDesc(
"badger_v2_disk_writes_total",
"Number of cumulative writes by Badger",
nil, nil,
),
"badger_v2_read_bytes": prometheus.NewDesc(
"badger_v2_read_bytes",
"Number of cumulative bytes read by Badger",
nil, nil,
),
"badger_v2_written_bytes": prometheus.NewDesc(
"badger_v2_written_bytes",
"Number of cumulative bytes written by Badger",
nil, nil,
),
"badger_v2_lsm_level_gets_total": prometheus.NewDesc(
"badger_v2_lsm_level_gets_total",
"Total number of LSM gets",
[]string{"level"}, nil,
),
"badger_v2_lsm_bloom_hits_total": prometheus.NewDesc(
"badger_v2_lsm_bloom_hits_total",
"Total number of LSM bloom hits",
[]string{"level"}, nil,
),
"badger_v2_gets_total": prometheus.NewDesc(
"badger_v2_gets_total",
"Total number of gets",
nil, nil,
),
"badger_v2_puts_total": prometheus.NewDesc(
"badger_v2_puts_total",
"Total number of puts",
nil, nil,
),
"badger_v2_memtable_gets_total": prometheus.NewDesc(
"badger_v2_memtable_gets_total",
"Total number of memtable gets",
nil, nil,
),
"badger_v2_lsm_size": prometheus.NewDesc(
"badger_v2_lsm_size",
"Size of the LSM in bytes",
[]string{"dir"}, nil,
),
"badger_v2_vlog_size": prometheus.NewDesc(
"badger_v2_vlog_size",
"Size of the value log in bytes",
[]string{"dir"}, nil,
),
})
}

// MetricsContext returns a context with tags that are useful for
// distinguishing the state of the running system.
// This context will be used to derive other contexts.
Expand Down

0 comments on commit 2a390c0

Please sign in to comment.