Skip to content

Commit

Permalink
Merge branch 'release/0.0.9'
Browse files Browse the repository at this point in the history
  • Loading branch information
Alex Schmitz committed Jun 25, 2019
2 parents e51e8c1 + 0614a32 commit b3ae2c1
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 51 deletions.
29 changes: 19 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,23 @@ prevent hanging the stats poller when servers are slow to respond or unresponsiv

~~~
$ zookeeper_exporter --help
usage: zookeeper_exporter --zk.hosts=ZK.HOSTS [<flags>]
Flags:
-h, --help Show context-sensitive help (also try --help-long and --help-man). --web.listen-address="0.0.0.0:9898" Address on which to expose metrics
--zk.hosts=ZK.HOSTS list of ip:port of ZK hosts, comma separated --zk.poll-interval=30 How often to poll the ZK servers --zk.connect-timeout=5 Timeout value for connecting to ZK --zk.connect-rw-deadline=5 Socket deadline for read & write operations
--version Show application version.~~~

~~~
zookeeper_exporter --zk.hosts=10.0.0.9:2181,10.0.0.10:2181
usage: zookeeper_exporter --zk.hosts=ZK.HOSTS [<flags>]
A zookeeper metrics exporter for prometheus, with zk_version and leaderServes=no support.
Flags:
-h, --help Show context-sensitive help (also try --help-long and --help-man).
--web.listen-address="0.0.0.0:9898"
Address on which to expose metrics
--zk.hosts=ZK.HOSTS list of ip:port of ZK hosts, comma separated
--zk.poll-interval=30 How often to poll the ZK servers
--zk.connect-timeout=5 Timeout value for connecting to ZK
--zk.connect-rw-deadline=5
Socket deadline for read & write operations
--version Show application version.
$ zookeeper_exporter --zk.hosts=10.0.0.9:2181,10.0.0.10:2181
~~~

## Install
Expand All @@ -35,4 +43,5 @@ under [releases](https://github.com/davemcphee/zookeeper_exporter/releases).
## ToDo

- Add consul service registration as default
- Better test etc
- Better tests - need to mock a ZK server; argh.

2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.0.8
0.0.9
39 changes: 26 additions & 13 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,36 +4,39 @@ import (
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/sirupsen/logrus"
"gopkg.in/alecthomas/kingpin.v2"
"io/ioutil"
"net/http"
"os"
"strings"
"time"
)

const version = "0.1"

var (
bindHostPort = kingpin.Flag(
version = getVersion()

app = kingpin.New("zookeeper_exporter", "A zookeeper metrics exporter for prometheus, with zk_version and leaderServes=no support.")

bindHostPort = app.Flag(
"web.listen-address",
"Address on which to expose metrics",
).Default("0.0.0.0:9898").String()

zkHostString = kingpin.Flag(
zkHostString = app.Flag(
"zk.hosts",
"list of ip:port of ZK hosts, comma separated",
).Required().String()

pollInterval = kingpin.Flag(
pollInterval = app.Flag(
"zk.poll-interval",
"How often to poll the ZK servers",
).Default("30").Int()

zkTimeout = kingpin.Flag(
zkTimeout = app.Flag(
"zk.connect-timeout",
"Timeout value for connecting to ZK",
).Default("5").Int()

zkRWDeadLine = kingpin.Flag(
zkRWDeadLine = app.Flag(
"zk.connect-rw-deadline",
"Socket deadline for read & write operations",
).Default("5").Int()
Expand All @@ -45,9 +48,20 @@ func setup() {
log.SetOutput(os.Stdout)
log.SetLevel(logrus.DebugLevel)

kingpin.Version(version)
kingpin.HelpFlag.Short('h')
kingpin.Parse()
app.Version(version)
app.HelpFlag.Short('h')
if _, err := app.Parse(os.Args[1:]); err != nil {
log.Fatal("Couldn't parse command line args")
}
}

func getVersion() string {
b, err := ioutil.ReadFile("VERSION")
if err != nil {
log.Errorf("can't read from VERSION file: %s", err)
return "0.0.0"
}
return strings.TrimSpace(string(b))
}

func main() {
Expand All @@ -66,16 +80,15 @@ func main() {
intervalDuration := time.Duration(*pollInterval) * time.Second

// Create new metrics interface
metrics := initMetrics()
metrics := newMetrics()

// Start an export thread per server
for _, ipport := range zkHosts {
p := newPoller(intervalDuration, metrics, *newZKServer(ipport))
p := newPoller(intervalDuration, *metrics, *newZKServer(ipport))
go p.pollForMetrics()
}

// Start http handler & server
// http.HandleFunc("/", metricsRequestHandler)
mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.Handler())

Expand Down
67 changes: 44 additions & 23 deletions metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ const (
zkServerState = "zk_server_state"
zkFsyncThresholdExceeded = "zk_fsync_threshold_exceed_count"
zkVersion = "zk_version"
pollerFailureTotal = "polling_failure_total"

zkOK = "zk_ok"

Expand All @@ -36,10 +37,29 @@ const (
leader serverState = 2
standalone serverState = 3

// metric namepspace - prepended to all etric names
// metric namespace - prepended to all metric names
namespace = "zookeeper__"
)

type zkMetrics struct {
gauges map[string]*prometheus.GaugeVec
pollingFailureCounter *prometheus.CounterVec
}

func newMetrics() *zkMetrics {
// Create an internal metric to count polling failures
failureCounter := prometheus.NewCounterVec(prometheus.CounterOpts{
Name: prependNamespace(pollerFailureTotal),
Help: "Polling failure count",
}, []string{"zk_instance"})
prometheus.MustRegister(failureCounter)

return &zkMetrics{
gauges: initGauges(),
pollingFailureCounter: failureCounter,
}
}

func getState(s string) serverState {
switch s {
case "follower":
Expand All @@ -53,113 +73,114 @@ func getState(s string) serverState {
}
}

func prepend_namespace(rawMetricName string) string {
// prepends the namespace in front of all metric names
func prependNamespace(rawMetricName string) string {
return namespace + rawMetricName
}

// Creates a map of all known metrics exposed by zookeeper's mntr command
// literal metric name maps to a prometheus Gauge with label zk_instance set to zk's address
func initMetrics() map[string]*prometheus.GaugeVec {
func initGauges() map[string]*prometheus.GaugeVec {

allMetrics := make(map[string]*prometheus.GaugeVec)

allMetrics[zkAvgLatency] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: prepend_namespace(zkAvgLatency),
Name: prependNamespace(zkAvgLatency),
Help: "Average Latency for ZooKeeper network requests",
}, []string{"zk_instance"})

allMetrics[zkMinLatency] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: prepend_namespace(zkMinLatency),
Name: prependNamespace(zkMinLatency),
Help: "Minimum latency for Zookeeper network requests.",
}, []string{"zk_instance"})

allMetrics[zkMaxLatency] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: prepend_namespace(zkMaxLatency),
Name: prependNamespace(zkMaxLatency),
Help: "Maximum latency for ZooKeeper network requests",
}, []string{"zk_instance"})

allMetrics[zkPacketsReceived] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: prepend_namespace(zkPacketsReceived),
Name: prependNamespace(zkPacketsReceived),
Help: "Number of network packets received by the ZooKeeper instance.",
}, []string{"zk_instance"})

allMetrics[zkPacketsSent] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: prepend_namespace(zkPacketsSent),
Name: prependNamespace(zkPacketsSent),
Help: "Number of network packets sent by the ZooKeeper instance.",
}, []string{"zk_instance"})

allMetrics[zkNumAliveConnections] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: prepend_namespace(zkNumAliveConnections),
Name: prependNamespace(zkNumAliveConnections),
Help: "Number of currently alive connections to the ZooKeeper instance.",
}, []string{"zk_instance"})

allMetrics[zkOutstandingRequests] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: prepend_namespace(zkOutstandingRequests),
Name: prependNamespace(zkOutstandingRequests),
Help: "Number of requests currently waiting in the queue.",
}, []string{"zk_instance"})

allMetrics[zkZnodeCount] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: prepend_namespace(zkZnodeCount),
Name: prependNamespace(zkZnodeCount),
Help: "Znode count",
}, []string{"zk_instance"})

allMetrics[zkWatchCount] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: prepend_namespace(zkWatchCount),
Name: prependNamespace(zkWatchCount),
Help: "Watch count",
}, []string{"zk_instance"})

allMetrics[zkEphemeralsCount] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: prepend_namespace(zkEphemeralsCount),
Name: prependNamespace(zkEphemeralsCount),
Help: "Ephemerals Count",
}, []string{"zk_instance"})

allMetrics[zkApproximateDataSize] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: prepend_namespace(zkApproximateDataSize),
Name: prependNamespace(zkApproximateDataSize),
Help: "Approximate data size",
}, []string{"zk_instance"})

allMetrics[zkOpenFileDescriptorCount] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: prepend_namespace(zkOpenFileDescriptorCount),
Name: prependNamespace(zkOpenFileDescriptorCount),
Help: "Number of currently open file descriptors",
}, []string{"zk_instance"})

allMetrics[zkMaxFileDescriptorCount] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: prepend_namespace(zkMaxFileDescriptorCount),
Name: prependNamespace(zkMaxFileDescriptorCount),
Help: "Maximum number of open file descriptors",
}, []string{"zk_instance"})

allMetrics[zkServerState] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: prepend_namespace(zkServerState),
Name: prependNamespace(zkServerState),
Help: "Current state of the zk instance: 1 = follower, 2 = leader, 3 = standalone, -1 if unknown",
}, []string{"zk_instance"})

allMetrics[zkFollowers] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: prepend_namespace(zkFollowers),
Name: prependNamespace(zkFollowers),
Help: "Leader only: number of followers.",
}, []string{"zk_instance"})

allMetrics[zkSyncedFollowers] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: prepend_namespace(zkSyncedFollowers),
Name: prependNamespace(zkSyncedFollowers),
Help: "Leader only: number of followers currently in sync",
}, []string{"zk_instance"})

allMetrics[zkPendingSyncs] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: prepend_namespace(zkPendingSyncs),
Name: prependNamespace(zkPendingSyncs),
Help: "Current number of pending syncs",
}, []string{"zk_instance"})

allMetrics[zkOK] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: prepend_namespace(zkOK),
Name: prependNamespace(zkOK),
Help: "Is ZooKeeper currently OK",
}, []string{"zk_instance"})

allMetrics[zkFsyncThresholdExceeded] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: prepend_namespace(zkFsyncThresholdExceeded),
Name: prependNamespace(zkFsyncThresholdExceeded),
Help: "Number of times File sync exceeded fsyncWarningThresholdMS",
}, []string{"zk_instance"})

allMetrics[zkVersion] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: prepend_namespace(zkVersion),
Name: prependNamespace(zkVersion),
Help: "Zookeeper version",
}, []string{"zk_instance", "zk_version"})

Expand Down
10 changes: 6 additions & 4 deletions poller.go
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
package main

import (
"github.com/prometheus/client_golang/prometheus"
"strconv"
"strings"
"time"
)

type zkPoller struct {
interval time.Duration
metrics map[string]*prometheus.GaugeVec
metrics zkMetrics
zkServer zkServer
}

func newPoller(interval time.Duration, metrics map[string]*prometheus.GaugeVec, zkServer zkServer) *zkPoller {
func newPoller(interval time.Duration, metrics zkMetrics, zkServer zkServer) *zkPoller {
return &zkPoller{
interval: interval,
metrics: metrics,
Expand All @@ -22,11 +21,14 @@ func newPoller(interval time.Duration, metrics map[string]*prometheus.GaugeVec,
}

func (p *zkPoller) pollForMetrics() {
// Initialise to counter to 0
p.metrics.pollingFailureCounter.WithLabelValues(p.zkServer.ipPort).Add(0)
for {
expirationTime := time.Now().Add(p.interval)
m, err := p.zkServer.getStats()
if err != nil {
log.Errorf("[%v] failed to get stats: %v", p.zkServer.ipPort, err)
p.metrics.pollingFailureCounter.WithLabelValues(p.zkServer.ipPort).Inc()
}

p.refreshMetrics(m)
Expand All @@ -39,7 +41,7 @@ func (p *zkPoller) pollForMetrics() {

func (p *zkPoller) refreshMetrics(updated map[string]string) {
for name, value := range updated {
metric, ok := p.metrics[name]
metric, ok := p.metrics.gauges[name]

if !ok {
log.Errorf("[%v] stat=%v not defined in metrics.go\n", p.zkServer.ipPort, name)
Expand Down

0 comments on commit b3ae2c1

Please sign in to comment.