Skip to content

Commit

Permalink
Add dkg_state_change and reshare_state_change metrics (#949)
Browse files Browse the repository at this point in the history
  • Loading branch information
mcamou committed Apr 1, 2022
1 parent def4c1b commit 07b33e0
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 3 deletions.
18 changes: 18 additions & 0 deletions core/drand_beacon.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"time"

"github.com/drand/drand/common"
"github.com/drand/drand/metrics"

"github.com/drand/drand/chain"
"github.com/drand/drand/chain/boltdb"
Expand Down Expand Up @@ -120,6 +121,13 @@ func (bp *BeaconProcess) Load() (bool, error) {
bp.log = bp.log.Named(fmt.Sprint(bp.index))

bp.log.Debugw("", "serving", bp.priv.Public.Address())

beaconID := metrics.UnknownBeaconID
if bp.group != nil {
beaconID = bp.group.ID
}
metrics.DKGStateChange(metrics.DKGNotStarted, beaconID, false)

bp.dkgDone = false

return false, nil
Expand All @@ -135,6 +143,16 @@ func (bp *BeaconProcess) WaitDKG() (*key.Group, error) {
bp.state.Unlock()
return nil, errors.New("no dkg info set")
}

beaconID := metrics.UnknownBeaconID
if bp.group != nil {
beaconID = bp.group.ID
}
if beaconID == "" {
beaconID = common.DefaultBeaconID
}
metrics.DKGStateChange(metrics.DKGWaiting, beaconID, false)

waitCh := bp.dkgInfo.proto.WaitEnd()
bp.log.Debugw("", "waiting_dkg_end", time.Now())

Expand Down
11 changes: 11 additions & 0 deletions core/drand_beacon_control.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/drand/drand/entropy"
"github.com/drand/drand/key"
"github.com/drand/drand/log"
"github.com/drand/drand/metrics"
clock "github.com/jonboulle/clockwork"

"github.com/drand/drand/net"
Expand Down Expand Up @@ -388,6 +389,7 @@ func (bp *BeaconProcess) runDKG(leader bool, group *key.Group, timeout uint32, r
if leader {
bp.dkgInfo.started = true
}
metrics.DKGStateChange(metrics.DKGInProgress, beaconID, leader)
bp.state.Unlock()

if leader {
Expand All @@ -409,6 +411,7 @@ func (bp *BeaconProcess) runDKG(leader bool, group *key.Group, timeout uint32, r
}
bp.state.Lock()
bp.cleanupDKG()
metrics.DKGStateChange(metrics.DKGReady, beaconID, leader)
bp.dkgDone = true
bp.state.Unlock()
bp.log.Infow("", "init_dkg", "dkg_done",
Expand Down Expand Up @@ -512,6 +515,8 @@ func (bp *BeaconProcess) runResharing(leader bool, oldGroup, newGroup *key.Group
"target_group", hex.EncodeToString(newGroup.Hash()), "index", newNode.Index)
bp.dkgInfo.started = true
}

metrics.ReshareStateChange(metrics.ReshareInProgess, beaconID, leader)
bp.state.Unlock()

if leader {
Expand All @@ -532,6 +537,7 @@ func (bp *BeaconProcess) runResharing(leader bool, oldGroup, newGroup *key.Group
return nil, fmt.Errorf("drand: err during DKG: %w", err)
}
bp.log.Infow("", "dkg_reshare", "finished", "leader", leader)
metrics.ReshareStateChange(metrics.ReshareIdle, beaconID, leader)

// runs the transition of the beacon
go bp.transition(oldGroup, oldPresent, newPresent)
Expand Down Expand Up @@ -568,6 +574,7 @@ func (bp *BeaconProcess) setupAutomaticDKG(_ context.Context, in *drand.InitDKGP

defer func(r *setupReceiver) {
bp.state.Lock()
metrics.DKGStateChange(metrics.DKGNotStarted, beaconID, false)
r.stop()
if r == bp.receiver {
// if there has been no new receiver since, we set the field to nil
Expand Down Expand Up @@ -597,6 +604,7 @@ func (bp *BeaconProcess) setupAutomaticDKG(_ context.Context, in *drand.InitDKGP
}

bp.log.Debugw("", "init_dkg", "wait_group")
metrics.DKGStateChange(metrics.DKGWaiting, beaconID, false)

group, dkgTimeout, err := bp.receiver.WaitDKGInfo(nc)
if err != nil {
Expand Down Expand Up @@ -666,6 +674,7 @@ func (bp *BeaconProcess) setupAutomaticResharing(_ context.Context, oldGroup *ke
}
bp.receiver = receiver
defer func(r *setupReceiver) {
metrics.ReshareStateChange(metrics.ReshareIdle, beaconID, false)
bp.state.Lock()
r.stop()
// only set to nil if the given receiver here is the same as the current
Expand All @@ -689,6 +698,8 @@ func (bp *BeaconProcess) setupAutomaticResharing(_ context.Context, oldGroup *ke
Metadata: &metadata,
}

metrics.ReshareStateChange(metrics.ReshareWaiting, beaconID, false)

// we wait only a certain amount of time for the prepare phase
nc, cancel := context.WithTimeout(context.Background(), MaxWaitPrepareDKG)
defer cancel()
Expand Down
51 changes: 48 additions & 3 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"net"
"net/http"
"runtime"
"strconv"
"strings"
"time"

Expand All @@ -15,6 +16,26 @@ import (
"github.com/prometheus/client_golang/prometheus/promhttp"
)

type DKGStatus string
type ReshareStatus string

const UnknownBeaconID = "unknown"

const (
DKGNotStarted DKGStatus = "not_started"
DKGInProgress DKGStatus = "in_progress"
DKGWaiting DKGStatus = "waiting"
DKGReady DKGStatus = "ready"
DKGUnknownStatus DKGStatus = "unknown"
)

const (
ReshareIdle ReshareStatus = "idle"
ReshareWaiting ReshareStatus = "waiting"
ReshareInProgess ReshareStatus = "in_progress"
ReshareStatusUnknown ReshareStatus = "unknown"
)

var (
// PrivateMetrics about the internal world (go process, private stuff)
PrivateMetrics = prometheus.NewRegistry()
Expand Down Expand Up @@ -151,8 +172,22 @@ var (
[]string{"url"},
)

// DrandBuildTime emits the timestamp when the binary was built in Unix time.
DrandBuildTime = prometheus.NewUntypedFunc(prometheus.UntypedOpts{
// dkgStateChangeTimestamp tracks DKG status changes
dkgStateChangeTimestamp = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "dkg_state_change_timestamp",
Help: "DKG state change timestamp in seconds since the Epoch",
ConstLabels: map[string]string{},
}, []string{"state", "beacon_id", "is_leader"})

// reshareStateChangeTimestamp tracks reshare status changes
reshareStateChangeTimestamp = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "reshare_state_change_timestamp",
Help: "Reshare state change timestamp in seconds since the Epoch",
ConstLabels: map[string]string{},
}, []string{"state", "beacon_id", "is_leader"})

// drandBuildTime emits the timestamp when the binary was built in Unix time.
drandBuildTime = prometheus.NewUntypedFunc(prometheus.UntypedOpts{
Name: "drand_build_time",
Help: "Timestamp when the binary was built in seconds since the Epoch",
ConstLabels: map[string]string{"build": common.COMMIT, "version": common.GetAppVersion().String()},
Expand All @@ -177,7 +212,9 @@ func bindMetrics() error {

// Private metrics
private := []prometheus.Collector{
DrandBuildTime,
drandBuildTime,
dkgStateChangeTimestamp,
reshareStateChangeTimestamp,
}
for _, c := range private {
if err := PrivateMetrics.Register(c); err != nil {
Expand Down Expand Up @@ -344,3 +381,11 @@ func getBuildTimestamp(buildDate string) int64 {
}
return t.Unix()
}

func DKGStateChange(s DKGStatus, beaconID string, leader bool) {
dkgStateChangeTimestamp.WithLabelValues(string(s), beaconID, strconv.FormatBool(leader)).SetToCurrentTime()
}

func ReshareStateChange(s ReshareStatus, beaconID string, leader bool) {
reshareStateChangeTimestamp.WithLabelValues(string(s), beaconID, strconv.FormatBool(leader)).SetToCurrentTime()
}

0 comments on commit 07b33e0

Please sign in to comment.