Skip to content

Commit

Permalink
Merge branch 'luminous' into ssobolewski/optionally-collect-rgw-gc-stats
Browse files Browse the repository at this point in the history
  • Loading branch information
ssobolewski committed Jul 9, 2018
2 parents f91113a + ae0f874 commit 04d1531
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 2 deletions.
18 changes: 18 additions & 0 deletions collectors/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ type ClusterHealthCollector struct {
// TotalPGs shows the total no. of PGs the cluster constitutes of.
TotalPGs prometheus.Gauge

// ActivePGs shows the no. of PGs the cluster is actively serving data
// from.
ActivePGs prometheus.Gauge

// DegradedPGs shows the no. of PGs that have some of the replicas
// missing.
DegradedPGs prometheus.Gauge
Expand Down Expand Up @@ -209,6 +213,14 @@ func NewClusterHealthCollector(conn Conn, cluster string) *ClusterHealthCollecto
ConstLabels: labels,
},
),
ActivePGs: prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "active_pgs",
Help: "No. of active PGs in the cluster",
ConstLabels: labels,
},
),
ScrubbingPGs: prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: cephNamespace,
Expand Down Expand Up @@ -463,6 +475,7 @@ func (c *ClusterHealthCollector) metricsList() []prometheus.Metric {
c.HealthStatus,
c.TotalPGs,
c.DegradedPGs,
c.ActivePGs,
c.StuckDegradedPGs,
c.UncleanPGs,
c.StuckUncleanPGs,
Expand Down Expand Up @@ -722,6 +735,7 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {

var (
degradedPGs float64
activePGs float64
uncleanPGs float64
undersizedPGs float64
peeringPGs float64
Expand All @@ -731,6 +745,7 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {

pgStateMap = map[string]*float64{
"degraded": &degradedPGs,
"active": &activePGs,
"unclean": &uncleanPGs,
"undersized": &undersizedPGs,
"peering": &peeringPGs,
Expand All @@ -751,6 +766,9 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
if *pgStateMap["degraded"] > 0 {
c.DegradedPGs.Set(*pgStateMap["degraded"])
}
if *pgStateMap["active"] > 0 {
c.ActivePGs.Set(*pgStateMap["active"])
}
if *pgStateMap["unclean"] > 0 {
c.UncleanPGs.Set(*pgStateMap["unclean"])
}
Expand Down
2 changes: 2 additions & 0 deletions collectors/health_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,7 @@ $ sudo ceph -s
"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "7 pgs undersized"}]}
}`,
regexes: []*regexp.Regexp{
regexp.MustCompile(`active_pgs{cluster="ceph"} 7`),
regexp.MustCompile(`scrubbing_pgs{cluster="ceph"} 2`),
regexp.MustCompile(`deep_scrubbing_pgs{cluster="ceph"} 5`),
},
Expand Down Expand Up @@ -596,6 +597,7 @@ $ sudo ceph -s
}
}`,
regexes: []*regexp.Regexp{
regexp.MustCompile(`active_pgs{cluster="ceph"} 30`),
regexp.MustCompile(`degraded_pgs{cluster="ceph"} 40`),
regexp.MustCompile(`unclean_pgs{cluster="ceph"} 30`),
regexp.MustCompile(`undersized_pgs{cluster="ceph"} 40`),
Expand Down
44 changes: 42 additions & 2 deletions exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,12 @@ package main
import (
"flag"
"log"
"net"
"net/http"
"os"
"sync"
"syscall"
"time"

"github.com/ceph/go-ceph/rados"
"github.com/digitalocean/ceph_exporter/collectors"
Expand All @@ -32,6 +36,33 @@ const (
defaultCephConfigPath = "/etc/ceph/ceph.conf"
)

// This horrible thing is a copy of tcpKeepAliveListener, tweaked to
// specifically check if it hits EMFILE when doing an accept, and if so,
// terminate the process.

const keepAlive time.Duration = 3 * time.Minute

type emfileAwareTcpListener struct {
*net.TCPListener
}

func (ln emfileAwareTcpListener) Accept() (c net.Conn, err error) {
tc, err := ln.AcceptTCP()
if err != nil {
if oerr, ok := err.(*net.OpError); ok {
if serr, ok := oerr.Err.(*os.SyscallError); ok && serr.Err == syscall.EMFILE {
// This calls os.Exit(1) and terminates the process
log.Fatalf("%v", err)
}
}
// Default return
return
}
tc.SetKeepAlive(true)
tc.SetKeepAlivePeriod(keepAlive)
return tc, nil
}

// CephExporter wraps all the ceph collectors and provides a single global
// exporter to extracts metrics out of. It also ensures that the collection
// is done in a thread-safe manner, the necessary requirement stated by
Expand Down Expand Up @@ -167,7 +198,16 @@ func main() {
})

log.Printf("Starting ceph exporter on %q", *addr)
if err := http.ListenAndServe(*addr, nil); err != nil {
log.Fatalf("cannot start ceph exporter: %s", err)
// Below is essentially http.ListenAndServe(), but using our custom
// emfileAwareTcpListener that will die if we run out of file descriptors
ln, err := net.Listen("tcp", *addr)
if err == nil {
err := http.Serve(emfileAwareTcpListener{ln.(*net.TCPListener)}, nil)
if err != nil {
log.Fatalf("unable to serve requests: %s", err)
}
}
if err != nil {
log.Fatalf("unable to create listener: %s", err)
}
}

0 comments on commit 04d1531

Please sign in to comment.