Skip to content

Commit

Permalink
Miscellaneous fixes from merged reviews (#1237)
Browse files Browse the repository at this point in the history
* added lots of comments to the state machine
* don't consume genesis beacon from others
* make transition time mandatory to CLI
* added a test around pretty printing
* added missing flag to demo project
* added missing tests
  • Loading branch information
CluEleSsUK committed Jan 9, 2024
1 parent 7a5a49b commit 9ad37dd
Show file tree
Hide file tree
Showing 6 changed files with 284 additions and 99 deletions.
1 change: 1 addition & 0 deletions demo/node/node_subprocess.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ func (n *NodeProc) StartLeaderDKG(thr int, _ int, joiners []*drand.Participant)
"--scheme", n.scheme.Name,
"--period", n.period,
"--catchup-period", "1s",
"--transition-time", "1m",
"--proposal", n.proposalPath,
"--threshold", strconv.Itoa(thr),
"--timeout", (5 * time.Minute).String(),
Expand Down
21 changes: 13 additions & 8 deletions internal/core/drand_beacon.go
Original file line number Diff line number Diff line change
Expand Up @@ -555,14 +555,19 @@ func (bp *BeaconProcess) storeCurrentFromPeerNetwork(ctx context.Context, store
return err
}

// if no node in the network has created a beacon yet, we will receive the 0th beacon from them
// and this will fail to verify in Kyber
if targetBeacon.Round != 0 {
err = bp.group.Scheme.VerifyBeacon(&targetBeacon, bp.group.PublicKey.Key())
if err != nil {
bp.log.Errorw("failed to verify beacon", "err", err)
return err
}
// if no node in the network has created a beacon yet, we will receive the 0th beacon from them,
// so we just determine it from our group file and add it, rather that relying on them giving us
// the correct one
if targetBeacon.Round == 0 {
bp.log.Warnw("No node in the network has created a beacon yet: storing genesis beacon instead")
err = store.Put(ctx, chain.GenesisBeacon(bp.group.GenesisSeed))
return err
}

err = bp.group.Scheme.VerifyBeacon(&targetBeacon, bp.group.PublicKey.Key())
if err != nil {
bp.log.Errorw("failed to verify beacon", "err", err)
return err
}

err = store.Put(ctx, &targetBeacon)
Expand Down
65 changes: 63 additions & 2 deletions internal/dkg/state_machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,42 @@ import (
type Status uint32

const (
// Fresh is the state all nodes start in - both pre-genesis, and if the network is running but they aren't
// yet participating
Fresh Status = iota
// Proposed implies somebody else has sent me a proposal
Proposed
// Proposing implies I have sent the others in the network a proposal
Proposing
// Accepted means I have accepted a proposal received from somebody else
// note Joiners do not accept/reject proposals
Accepted
// Rejected means I have rejected a proposal received from somebody else
// it doesn't automatically abort the DKG, but the leader is advised to abort and suggest some new terms
Rejected
// Aborted means the leader has told the network to abort the proposal; a node may have rejected,
// they may have found an error in the proposal, or any other reason could have occurred
Aborted
// Executing means the leader has reviewed accepts/rejects and decided to go ahead with the DKG
// this implies that the Kyber DKG process has been started
Executing
// Complete means the DKG has finished and a new group file has been created successfully
Complete
// TimedOut means the proposal timeout has been reached without entering the `Executing` state
// any node can trigger this for themselves should they identify timeout has been reached
// it does _not_ guarantee that other nodes have also timed out - a network partition or something else
// could have occurred. If the rest of the network continues, our node will likely transition to `Evicted`
TimedOut
// Joined is the state a new proposed group member enters when they have been proposed a DKG and they run the
// `join` DKG command to signal their acceptance to join the network
Joined
// Left is used when a node has left the network by their own choice after a DKG. It's not entirely necessary,
// an operator could just turn their node off. It's used to determine if an existing state is the current state
// of the network, or whether epochs have happened in between times
Left
// Evicted signals that a DKG execution has happened successfully, but our node was excluded for some reason.
// Generally it means that other nodes have been unable to validate its shares, or it was offline during DKG
// execution. If a threshold number of nodes are evicted during the DKG, the old state will be reverted.
Evicted
)

Expand Down Expand Up @@ -124,6 +149,9 @@ func (d *DBState) Equals(e *DBState) bool {
reflect.DeepEqual(d.KeyShare, e.KeyShare)
}

// DBStateTOML is a convenience object for managing de/serialisation of DBStates when reading/writing them
// from/to disk.
// Don't forget to update it if you update the `DBState` object!!
type DBStateTOML struct {
BeaconID string
Epoch uint32
Expand Down Expand Up @@ -246,6 +274,8 @@ func (d *DBState) Joined(me *drand.Participant, previousGroup *key.Group) (*DBSt
return nil, ErrTimeoutReached
}

// joiners after the first epoch must pass a group file in order to determine
// that the proposal is valid (e.g. the `GenesisTime` and `Remaining` group are correct)
if d.Epoch != 1 && previousGroup == nil {
return nil, ErrJoiningAfterFirstEpochNeedsGroupFile
}
Expand All @@ -259,6 +289,7 @@ func (d *DBState) Joined(me *drand.Participant, previousGroup *key.Group) (*DBSt
return d, nil
}

// Proposing is used by the leader to set their own local state when proposing a DKG to the network
func (d *DBState) Proposing(me *drand.Participant, terms *drand.ProposalTerms) (*DBState, error) {
if !isValidStateChange(d.State, Proposing) {
return nil, InvalidStateChange(d.State, Proposing)
Expand All @@ -272,6 +303,7 @@ func (d *DBState) Proposing(me *drand.Participant, terms *drand.ProposalTerms) (
return nil, err
}

// new joiners cannot be the leader except for genesis
if d.State == Fresh && terms.Epoch > 1 {
return nil, ErrInvalidEpoch
}
Expand All @@ -295,11 +327,14 @@ func (d *DBState) Proposing(me *drand.Participant, terms *drand.ProposalTerms) (
}, nil
}

// Proposed is used by non-leader nodes to set their own state when they receive a proposal
func (d *DBState) Proposed(sender, me *drand.Participant, terms *drand.ProposalTerms) (*DBState, error) {
if !isValidStateChange(d.State, Proposed) {
return nil, InvalidStateChange(d.State, Proposed)
}

// it's important to verify that the sender (and by extension the signature of the sender)
// is the same as the proposed leader, to avoid nodes trying to propose DKGs on behalf of somebody else
if terms.Leader != sender {
return nil, ErrCannotProposeAsNonLeader
}
Expand All @@ -308,6 +343,7 @@ func (d *DBState) Proposed(sender, me *drand.Participant, terms *drand.ProposalT
return nil, err
}

// if I've received a proposal, I must surely be in it!
if !util.Contains(terms.Joining, me) && !util.Contains(terms.Remaining, me) && !util.Contains(terms.Leaving, me) {
return nil, ErrSelfMissingFromProposal
}
Expand Down Expand Up @@ -358,10 +394,12 @@ func (d *DBState) Accepted(me *drand.Participant) (*DBState, error) {
return nil, ErrTimeoutReached
}

// Leavers get no say if the rest of the network wants them out
if util.Contains(d.Leaving, me) {
return nil, ErrCannotAcceptProposalWhereLeaving
}

// Joiners should run the `Join` command instead
if util.Contains(d.Joining, me) {
return nil, ErrCannotAcceptProposalWhereJoining
}
Expand All @@ -379,10 +417,12 @@ func (d *DBState) Rejected(me *drand.Participant) (*DBState, error) {
return nil, ErrTimeoutReached
}

// Joiners should just not run the `Join` command if they don't want to join
if util.Contains(d.Joining, me) {
return nil, ErrCannotRejectProposalWhereJoining
}

// Leavers get no say if the rest of the network wants them out
if util.Contains(d.Leaving, me) {
return nil, ErrCannotRejectProposalWhereLeaving
}
Expand Down Expand Up @@ -422,18 +462,21 @@ func (d *DBState) Evicted() (*DBState, error) {
}

func (d *DBState) Executing(me *drand.Participant) (*DBState, error) {
// we check the timeout first as we have additional branches for leaving
if hasTimedOut(d) {
return nil, ErrTimeoutReached
}

if util.Contains(d.Leaving, me) {
// leavers don't need to participate in the execution, so we can check it first
if util.Contains(d.Leaving, me) && isValidStateChange(d.State, Left) {
return d.Left(me)
}

if !isValidStateChange(d.State, Executing) {
return nil, InvalidStateChange(d.State, Executing)
}

// participants not in the DKG should not be executing!
if !util.Contains(d.Remaining, me) && !util.Contains(d.Joining, me) {
return nil, ErrCannotExecuteIfNotJoinerOrRemainer
}
Expand Down Expand Up @@ -465,6 +508,9 @@ func (d *DBState) Complete(finalGroup *key.Group, share *key.Share) (*DBState, e
return d, nil
}

// ReceivedAcceptance is used by nodes when they receive a gossiped acceptance packet
// they needn't necessarily collect _all_ acceptances for executing, but it gives them some insight into
// the state of the DKG when they run the status command
func (d *DBState) ReceivedAcceptance(me, them *drand.Participant) (*DBState, error) {
if d.State != Proposing {
return nil, InvalidStateChange(d.State, Proposing)
Expand All @@ -488,6 +534,9 @@ func (d *DBState) ReceivedAcceptance(me, them *drand.Participant) (*DBState, err
return d, nil
}

// ReceivedRejection is used by nodes when they receive a gossiped rejection packet
// they may not receive all rejections before executing, but it gives them some insight into
// the state of the DKG when they run the status command
func (d *DBState) ReceivedRejection(me, them *drand.Participant) (*DBState, error) {
if d.State != Proposing {
return nil, InvalidStateChange(d.State, Proposing)
Expand Down Expand Up @@ -524,7 +573,6 @@ var ErrGenesisSeedCannotChange = errors.New("genesis seed cannot change after th
var ErrTransitionTimeMustBeGenesisTime = errors.New("transition time must be the same as the genesis time for the first epoch")
var ErrTransitionTimeMissing = errors.New("transition time must be provided in a proposal")
var ErrTransitionTimeBeforeGenesis = errors.New("transition time cannot be before the genesis time")
var ErrTransitionTimeTooSoonAfterGenesis = errors.New("transition time cannot be sooner than one round after genesis")
var ErrSelfMissingFromProposal = errors.New("you must include yourself in a proposal")
var ErrCannotJoinIfNotInJoining = errors.New("you cannot join a proposal in which you are not a joiner")
var ErrJoiningAfterFirstEpochNeedsGroupFile = errors.New("joining after the first epoch requires a previous group file")
Expand Down Expand Up @@ -553,6 +601,8 @@ var ErrNonLeaderCannotReceiveRejection = errors.New("you received a rejection bu
var ErrFinalGroupCannotBeEmpty = errors.New("you cannot complete a DKG with a nil final group")
var ErrKeyShareCannotBeEmpty = errors.New("you cannot complete a DKG with a nil key share")

// isValidStateChange details all the viable state changes
//
//nolint:gocyclo
func isValidStateChange(current, next Status) bool {
switch current {
Expand Down Expand Up @@ -590,6 +640,7 @@ func hasTimedOut(details *DBState) bool {
}

func ValidateProposal(currentState *DBState, terms *drand.ProposalTerms) error {
// it shouldn't really be possible for the wrong beaconID to make its way here, but better safe than sorry :)
if currentState.BeaconID != terms.BeaconID {
return ErrInvalidBeaconID
}
Expand All @@ -611,6 +662,8 @@ func ValidateProposal(currentState *DBState, terms *drand.ProposalTerms) error {
return err
}

// some terms (such as genesis seed) get set during the first epoch
// additionally, we can't have remainers, `GenesisTime` == `TransitionTime`, amongst other things
if terms.Epoch == 1 {
return validateFirstEpoch(terms)
}
Expand All @@ -633,10 +686,14 @@ func ValidateProposal(currentState *DBState, terms *drand.ProposalTerms) error {
return ErrNoNodesRemaining
}

// there's no theoretical reason the leader can't be leaving, but from a practical perspective
// it makes sense in case e.g. the DKG fails or aborts
if util.Contains(terms.Leaving, terms.Leader) || !util.Contains(terms.Remaining, terms.Leader) {
return ErrLeaderNotRemaining
}

// nodes joining after the first epoch accept some things at face value
// nodes already in the network shouldn't accept e.g. a change of genesis time
if currentState.State != Fresh {
return validateReshare(currentState, terms)
}
Expand All @@ -645,6 +702,7 @@ func ValidateProposal(currentState *DBState, terms *drand.ProposalTerms) error {
}

func validateEpoch(currentState *DBState, terms *drand.ProposalTerms) error {
// epochs should be monotonically increasing
if terms.Epoch < currentState.Epoch {
return ErrInvalidEpoch
}
Expand All @@ -654,6 +712,7 @@ func validateEpoch(currentState *DBState, terms *drand.ProposalTerms) error {
return ErrInvalidEpoch
}

// if we have some leftover state after having left the network, we can accept higher epochs
if terms.Epoch > currentState.Epoch+1 && (currentState.State != Left && currentState.State != Fresh && currentState.State != Evicted) {
return ErrInvalidEpoch
}
Expand All @@ -670,6 +729,8 @@ func validateFirstEpoch(terms *drand.ProposalTerms) error {
if !util.Contains(terms.Joining, terms.Leader) {
return ErrLeaderNotJoining
}
// for the first epoch, the transition time and genesis time should be equal
// as the genesis is in some sense a 'transition'
if !terms.TransitionTime.AsTime().Equal(terms.GenesisTime.AsTime()) {
return ErrTransitionTimeMustBeGenesisTime
}
Expand Down
64 changes: 64 additions & 0 deletions internal/dkg/state_machine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,18 @@ func TestJoiningADKGFromProposal(t *testing.T) {
},
expectedError: ErrCannotJoinIfNotInJoining,
},
{
name: "joining after first epoch without group file fails",
startingState: func() *DBState {
entry := NewCompleteDKGEntry(t, beaconID, Proposed, bob)
entry.Epoch = 2
return entry
}(),
transitionFn: func(in *DBState) (*DBState, error) {
return in.Joined(alice, nil)
},
expectedError: ErrJoiningAfterFirstEpochNeedsGroupFile,
},
}

RunStateChangeTest(t, tests)
Expand Down Expand Up @@ -1507,6 +1519,58 @@ func TestReceivedRejection(t *testing.T) {
RunStateChangeTest(t, tests)
}

func TestCompletion(t *testing.T) {
beaconID := "some-wonderful-beacon-id"
group := key.Group{
GenesisSeed: []byte("deadbeef"),
}
keyShare := key.Share{}
tests := []stateChangeTableTest{
{
name: "receiving a valid share and group file succeeds",
startingState: NewCompleteDKGEntry(t, beaconID, Executing, alice, bob),
transitionFn: func(in *DBState) (*DBState, error) {
return in.Complete(&group, &keyShare)
},
expectedResult: func() *DBState {
d := NewCompleteDKGEntry(t, beaconID, Complete, alice, bob)
d.KeyShare = &keyShare
d.FinalGroup = &group
d.GenesisSeed = group.GenesisSeed
return d
}(),
},
{
name: "cannot complete from non-executing state",
startingState: NewCompleteDKGEntry(t, beaconID, Proposed, alice, bob),
transitionFn: func(in *DBState) (*DBState, error) {
return in.Complete(&group, &keyShare)
},
expectedError: InvalidStateChange(Proposed, Complete),
},
{
name: "empty group file fails",
startingState: NewCompleteDKGEntry(t, beaconID, Executing, alice, bob),
transitionFn: func(in *DBState) (*DBState, error) {
return in.Complete(nil, &keyShare)
},
expectedError: ErrFinalGroupCannotBeEmpty,
},

{
name: "empty key share fails",
startingState: NewCompleteDKGEntry(t, beaconID, Executing, alice, bob),
transitionFn: func(in *DBState) (*DBState, error) {
return in.Complete(&group, nil)
},
expectedError: ErrKeyShareCannotBeEmpty,
},
}

RunStateChangeTest(t, tests)

}

type stateChangeTableTest struct {
name string
startingState *DBState
Expand Down
Loading

0 comments on commit 9ad37dd

Please sign in to comment.