Skip to content

Commit

Permalink
Metrics and preliminary multi-API support (#6)
Browse files Browse the repository at this point in the history
Two somewhat unrelated things in this change:

1. Metrics about the workings of TailscaleSD are now exported at /metrics
2. Introduces a MultiDiscoverer to aggregate results from multiple Discoverers.

The MultiDiscoverer feature will be expanded to support multiple tailnets in an
upcoming change. It wasn't supposed to be included but I implemented it in the
wrong branch and figured, "what the hell?"
  • Loading branch information
cfunkhouser committed Apr 7, 2022
2 parents af6a763 + 72e4481 commit f48c691
Show file tree
Hide file tree
Showing 9 changed files with 638 additions and 11 deletions.
20 changes: 17 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,18 @@
Serves Prometheus HTTP Service Discovery for devices on a Tailscale Tailnet.

For details on HTTP Service Discovery, read the Prometheus docs:
https://prometheus.io/docs/prometheus/latest/http_sd/
<https://prometheus.io/docs/prometheus/latest/http_sd/>

## Usage

The `tailscalesd` server is very simple. It serves the SD payload at `/` on its
HTTP server. It respects the following configuration parameters, each of which
may be specified as a flag or an environment variable.

**As of v0.2.1 the the local and public APIs are no longer mutually exclusive.
Setting the `-localapi` flag and providing `-tailnet` + `-token` will result in
a union of targets from both APIs.**

- `-address` / `ADDRESS` is the host:port on which to serve TailscaleSD.
Defaults to `0.0.0.0:9242`.
- `-localapi` / `TAILSCALE_USE_LOCAL_API` instructs TailscaleSD to use the
Expand Down Expand Up @@ -44,14 +48,24 @@ See the label comments in [`tailscalesd.go`](./tailscalesd.go) for details about
which labels are supported for each API type. **Do not assume they will be the
same labels, or that values will match across the APIs!**

## Metrics

As of v0.2.1, TailscaleSD exports Prometheus metrics on the standard `/metrics`
endpoint. In addition to the standard Go metrics, you will find
TailscaleSD-specific metrics defined in [`metrics.go`](./metrics.go). The
metrics are targetted at understanding the behavior of TailscaleSD itself.
Contributions of additional interesting metrics are welcome, but please remember
that details about your devices should be handled by your monitoring. This is a
target discovery tool, _not_ a Prometheus exporter for Tailscale!

## Prometheus Configuration

Configure Prometheus by placing the `tailscalesd` URL in a `http_sd_configs`
block in a `scrape_config`. The following labels are potentially made available
for all Tailscale nodes discovered, however any label for which the Tailscale
API did not return a value will be omitted. For more details on each field and
the API in general, see:
https://github.com/tailscale/tailscale/blob/main/api.md#tailnet-devices-get
<https://github.com/tailscale/tailscale/blob/main/api.md#tailnet-devices-get>

Possible target labels follow. See the label comments in
[`tailscalesd.go`](./tailscalesd.go) for details. There will be one target entry
Expand Down Expand Up @@ -120,4 +134,4 @@ scrape_configs:
regex: '(.*)'
replacement: $1:9100
target_label: __address__
```
```
25 changes: 18 additions & 7 deletions cmd/tailscalesd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import (
"strings"
"time"

"github.com/prometheus/client_golang/prometheus/promhttp"

"github.com/cfunkhouser/tailscalesd"
)

Expand Down Expand Up @@ -101,17 +103,26 @@ func main() {
return
}

var ts tailscalesd.Discoverer
var ts tailscalesd.MultiDiscoverer
if useLocalAPI {
ts = tailscalesd.LocalAPI(tailscalesd.LocalAPISocket)
} else {
ts = tailscalesd.PublicAPI(tailnet, token)
ts = append(ts, &tailscalesd.RateLimitedDiscoverer{
Wrap: tailscalesd.LocalAPI(tailscalesd.LocalAPISocket),
Frequency: pollLimit,
})
}
ts = &tailscalesd.RateLimitedDiscoverer{
Wrap: ts,
Frequency: pollLimit,

if token != "" && tailnet != "" {
ts = append(ts, &tailscalesd.RateLimitedDiscoverer{
Wrap: tailscalesd.PublicAPI(tailnet, token),
Frequency: pollLimit,
})
}

// Metrics concerning tailscalesd itself are served from /metrics
http.Handle("/metrics", promhttp.Handler())
// Service discovery is served at /
http.Handle("/", tailscalesd.Export(ts))

log.Printf("Serving Tailscale service discovery on %q", address)
log.Print(http.ListenAndServe(address, nil))
log.Print("Done")
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ go 1.16

require (
github.com/google/go-cmp v0.5.7
github.com/prometheus/client_golang v1.12.1
inet.af/netaddr v0.0.0-20211027220019-c74959edd3b6
)
444 changes: 444 additions & 0 deletions go.sum

Large diffs are not rendered by default.

16 changes: 15 additions & 1 deletion localapi.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"net/http"
"time"

"github.com/prometheus/client_golang/prometheus"
"inet.af/netaddr"
)

Expand Down Expand Up @@ -45,22 +46,35 @@ type localAPIClient struct {
var errFailedLocalAPIRequest = errors.New("failed local API request")

func (a *localAPIClient) status(ctx context.Context) (interestingStatusSubset, error) {
start := time.Now()
lv := prometheus.Labels{
"api": "local",
"host": "localhost",
}
defer func() {
apiRequestLatencyHistogram.With(lv).Observe(float64(time.Since(start).Milliseconds()))
}()

var status interestingStatusSubset
req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost/localapi/v0/status", nil)
if err != nil {
return status, err
}

apiRequestCounter.With(lv).Inc()
resp, err := a.client.Do(req)
if err != nil {
apiRequestErrorCounter.With(lv).Inc()
return status, err
}

if (resp.StatusCode / 100) != 2 {
apiRequestErrorCounter.With(lv).Inc()
return status, fmt.Errorf("%w: %v", errFailedLocalAPIRequest, resp.Status)
}
defer resp.Body.Close()

if err := json.NewDecoder(resp.Body).Decode(&status); err != nil {
apiPayloadErrorCounter.With(lv).Inc()
return status, err
}
return status, nil
Expand Down
77 changes: 77 additions & 0 deletions metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package tailscalesd

import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)

var (
apiRequestCounter = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "tailscalesd_tailscale_api_requests",
Help: "Counter of requests made to Tailscale APIs. Labeled with the API host to which requests are made.",
},
[]string{"api", "host"})

apiRequestLatencyHistogram = promauto.NewHistogramVec(
prometheus.HistogramOpts{
Name: "tailscalesd_tailscale_api_request_latency_ms",
Help: "Histogram of API request latency measured in milliseconds. " +
"Bucketted geometrically.",
Buckets: []float64{1, 2.75, 7.5625, 20.7969, 57.1914, 157.2764, 432.5100, 1189.4025, 3270.8569, 8994.8566},
},
[]string{"api", "host"})

apiRequestErrorCounter = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "tailscalesd_tailscale_api_errors",
Help: "Counter of errors during requests to Tailscale APIs. " +
"Denominated by tailscalesd_tailscale_api_requests.",
},
[]string{"api", "host"})

apiPayloadErrorCounter = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "tailscalesd_tailscale_api_payload_errors",
Help: "Counter of bad payload responses from Tailscale APIs. Denominated by tailscalesd_tailscale_api_requests.",
},
[]string{"api", "host"})

multiDiscovererRequestCounter = promauto.NewCounter(
prometheus.CounterOpts{
Name: "tailscalesd_tailscale_multi_requests",
Help: "Counter of all requests to a multi-discoverer.",
})

multiDiscovererErrorCounter = promauto.NewCounter(
prometheus.CounterOpts{
Name: "tailscalesd_tailscale_multi_errors",
Help: "Counter of errors during requests to all multi-discoverer. " +
"Denominated by tailscalesd_tailscale_multi_requests.",
})

rateLimitedRequests = promauto.NewCounter(
prometheus.CounterOpts{
Name: "tailscalesd_tailscale_rate_limited_requests",
Help: "Counter of all requests to a rate limited discoverer.",
})

rateLimitedRequestRefreshses = promauto.NewCounter(
prometheus.CounterOpts{
Name: "tailscalesd_tailscale_rate_limited_refreshes",
Help: "Counter of requests to a rate limited discoverer which result in a data refresh.",
})

rateLimitedStaleResults = promauto.NewCounter(
prometheus.CounterOpts{
Name: "tailscalesd_tailscale_rate_limited_stale",
Help: "Counter of requests to a rate limited discoverer which result a return of stale results.",
})

tailnetDevicesFoundCounter = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "tailscalesd_public_api_devices_found",
Help: "Counter of devices found using the public API, labeled with tailnet name.",
},
[]string{"tailnet"})
)
41 changes: 41 additions & 0 deletions multi.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package tailscalesd

import (
"context"
"sync"
)

// MultiDiscoverer aggregates responses from multiple Discoverers.
type MultiDiscoverer []Discoverer

type discoveryResult struct {
devices []Device
err error
}

// Devices aggregates the results of calling Devices on each contained
// Discoverer. Returns the first encountered error.
func (md MultiDiscoverer) Devices(ctx context.Context) ([]Device, error) {
multiDiscovererRequestCounter.Inc()
var wg sync.WaitGroup
n := len(md)
results := make([]discoveryResult, n)
wg.Add(n)
for i, d := range md {
go func(d Discoverer, result *discoveryResult) {
defer wg.Done()
result.devices, result.err = d.Devices(ctx)
}(d, &results[i])
}
wg.Wait()

var ret []Device
for i := range results {
if err := results[i].err; err != nil {
multiDiscovererErrorCounter.Inc()
return ret, err
}
ret = append(ret, results[i].devices...)
}
return ret, nil
}
20 changes: 20 additions & 0 deletions publicapi.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"net"
"net/http"
"time"

"github.com/prometheus/client_golang/prometheus"
)

type deviceAPIResponse struct {
Expand All @@ -24,23 +26,41 @@ type publicAPIDiscoverer struct {
var errFailedAPIRequest = errors.New("failed API request")

func (a *publicAPIDiscoverer) Devices(ctx context.Context) ([]Device, error) {
start := time.Now()
lv := prometheus.Labels{
"api": "public",
"host": a.apiBase,
}
defer func() {
apiRequestLatencyHistogram.With(lv).Observe(float64(time.Since(start).Milliseconds()))
}()

url := fmt.Sprintf("https://%v@%v/api/v2/tailnet/%v/devices", a.token, a.apiBase, a.tailnet)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, err
}

apiRequestCounter.With(prometheus.Labels{
"api": "public",
"host": a.apiBase,
}).Inc()
resp, err := a.client.Do(req)
if err != nil {
apiRequestErrorCounter.With(lv).Inc()
return nil, err
}
if (resp.StatusCode / 100) != 2 {
apiRequestErrorCounter.With(lv).Inc()
return nil, fmt.Errorf("%w: %v", errFailedAPIRequest, resp.Status)
}
defer resp.Body.Close()
var d deviceAPIResponse
if err := json.NewDecoder(resp.Body).Decode(&d); err != nil {
apiPayloadErrorCounter.With(lv).Inc()
return nil, fmt.Errorf("%w: bad payload from API: %v", errFailedAPIRequest, err)
}
tailnetDevicesFoundCounter.With(prometheus.Labels{"tailnet": a.tailnet}).Inc()
for i := range d.Devices {
d.Devices[i].API = a.apiBase
d.Devices[i].Tailnet = a.tailnet
Expand Down
5 changes: 5 additions & 0 deletions ratelimited.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@ type RateLimitedDiscoverer struct {
}

func (c *RateLimitedDiscoverer) refreshDevices(ctx context.Context) ([]Device, error) {
rateLimitedRequestRefreshses.Inc()

devices, err := c.Wrap.Devices(ctx)
if err != nil {
rateLimitedStaleResults.Inc()
return devices, fmt.Errorf("%w: %v", errStaleResults, err)
}

Expand All @@ -36,6 +39,8 @@ func (c *RateLimitedDiscoverer) refreshDevices(ctx context.Context) ([]Device, e
}

func (c *RateLimitedDiscoverer) Devices(ctx context.Context) ([]Device, error) {
rateLimitedRequests.Inc()

c.mu.RLock()
expired := time.Now().After(c.earliest)
last := make([]Device, len(c.last))
Expand Down

0 comments on commit f48c691

Please sign in to comment.