Skip to content

Commit

Permalink
Large fixing of tests - removing time dependencies
Browse files Browse the repository at this point in the history
* v1.2.0

* Attempt to time-out potentially stalled watches (#843)

* Time-out potentially stalled watches

* waiting rounds on node before moving on

* fix issue with not waiting on correct round

* fix demo orchestration test timeout

* fix data race in test orchestrator

* fix check on dkg reshare timeout test

* fix linting issues

* better wait

* remove unused linter directives

* using reshare config

* fix data race

* linting

Co-authored-by: Will Scott <will@cypherpunk.email>
Co-authored-by: Will Scott <will.scott@protocol.ai>
  • Loading branch information
3 people committed Nov 10, 2021
1 parent aa7f6e5 commit b30b074
Show file tree
Hide file tree
Showing 8 changed files with 333 additions and 87 deletions.
4 changes: 4 additions & 0 deletions core/drand.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ type Drand struct {

// version indicates the base code variant
version common.Version

// only used for testing at the moment - may be useful later
// to pinpoint the exact messages from all nodes during dkg
dkgBoardSetup func(Broadcast) Broadcast
}

// NewDrand returns a drand struct. It assumes the private key pair
Expand Down
8 changes: 7 additions & 1 deletion core/drand_control.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ func (d *Drand) leaderRunSetup(newSetup func(d *Drand) (*setupManager, error)) (

if d.manager != nil {
d.log.Infow("", "beacon_id", d.manager.beaconID, "reshare", "already_in_progress", "restart", "reshare", "old")
fmt.Println("\n\n PRE EMPTIVE STOP")
d.manager.StopPreemptively()
}

Expand All @@ -106,6 +107,7 @@ func (d *Drand) leaderRunSetup(newSetup func(d *Drand) (*setupManager, error)) (
defer func() {
// don't clear manager if pre-empted
if err == errPreempted {
fmt.Println("PRE EMPTION ERROR ", err)
return
}
d.state.Lock()
Expand Down Expand Up @@ -267,10 +269,14 @@ func (d *Drand) runResharing(leader bool, oldGroup, newGroup *key.Group, timeout
}

allNodes := nodeUnion(oldGroup.Nodes, newGroup.Nodes)
board := newEchoBroadcast(d.log, d.version, beaconID, d.privGateway.ProtocolClient,
var board Broadcast = newEchoBroadcast(d.log, d.version, beaconID, d.privGateway.ProtocolClient,
d.priv.Public.Address(), allNodes, func(p dkg.Packet) error {
return dkg.VerifyPacketSignature(config, p)
})

if d.dkgBoardSetup != nil {
board = d.dkgBoardSetup(board)
}
phaser := d.getPhaser(timeout, beaconID)

dkgProto, err := dkg.NewProtocol(config, board, phaser, true)
Expand Down
125 changes: 96 additions & 29 deletions core/drand_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,12 @@ func TestRunDKGBroadcastDeny(t *testing.T) {
dt.SetMockClock(t, group1.GenesisTime)
dt.AdvanceMockClock(t, 1*time.Second)

group2, err := dt.RunReshare(t, nil, n, 0, thr, 1*time.Second, false, false, false)
group2, err := dt.RunReshare(t,
&reshareConfig{
oldRun: n,
newThr: thr,
timeout: time.Second,
})
require.NoError(t, err)
require.NotNil(t, group2)

Expand All @@ -179,32 +184,66 @@ func TestRunDKGReshareForce(t *testing.T) {
group1 := dt.RunDKG()

dt.SetMockClock(t, group1.GenesisTime)
dt.AdvanceMockClock(t, 1*time.Second)

// wait to get first round
t.Logf("Getting round %d", 0)
err := dt.WaitUntilRound(t, dt.nodes[0], 1)
require.NoError(t, err)

// run the resharing
stateCh := make(chan int)
errFirstTry := make(chan error)
go func() {
t.Log("[ReshareForce] Start reshare")
_, err := dt.RunReshare(t, stateCh, oldNodes, 0, oldThreshold, timeout, false, true, false)
require.Error(t, err)
_, err := dt.RunReshare(t,
&reshareConfig{
stateCh: stateCh,
oldRun: oldNodes,
newThr: oldThreshold,
timeout: timeout,
onlyLeader: true,
})
errFirstTry <- err
}()

LOOP:
for {
var resharingRunning bool
for !resharingRunning {
select {
case state := <-stateCh:
if state == ReshareUnlock {
break LOOP
resharingRunning = true
}
case <-time.After(2 * time.Minute):
t.Errorf("Timeout waiting reshare process to get unlock phase")
}
}
// first resharing should fail
select {
case err := <-errFirstTry:
require.Error(t, err)
case <-time.After(2 * time.Minute):
t.Errorf("timeout of the first resharing output")
}

// do a few periods
for i := 0; i < 2; i++ {
dt.AdvanceMockClock(t, group1.Period)
err := dt.WaitUntilRound(t, dt.nodes[0], uint64(2+i))
require.NoError(t, err)
}

// force
t.Log("[reshare] Start again!")
group3, err := dt.RunReshare(t, nil, oldNodes, 0, oldThreshold, timeout, true, false, false)
require.NoError(t, err)
group3, err := dt.RunReshare(t,
&reshareConfig{
oldRun: oldNodes,
newThr: oldThreshold,
timeout: timeout,
force: true,
})

// second resharing should succeed
require.NoError(t, err, "second resharing failed")

t.Log("[reshare] Move to response phase!")
t.Logf("[reshare] Group: %s", group3)
Expand Down Expand Up @@ -252,7 +291,13 @@ func TestRunDKGReshareAbsentNode(t *testing.T) {
}

t.Log("Setup reshare done. Starting reshare... Ignoring reshare errors")
newGroup, err := dt.RunReshare(t, nil, oldNodes, nodesToAdd, newThreshold, timeout, false, false, true)
newGroup, err := dt.RunReshare(t, &reshareConfig{
oldRun: oldNodes,
newRun: nodesToAdd,
newThr: newThreshold,
timeout: timeout,
ignoreErr: true,
})
require.NoError(t, err)
require.NotNil(t, newGroup)

Expand All @@ -262,6 +307,7 @@ func TestRunDKGReshareAbsentNode(t *testing.T) {
require.Nil(t, newGroup.Find(missingPublic), "missing public is found", missingPublic)
}

// nolint:funlen
// The test creates the scenario where one node made a complaint during the DKG, at the second phase, so normally,
// there should be a "Justification" at the third phase. In this case, there is not. This scenario
// can happen if there is an offline node right at the beginning of DKG that don't even send any message.
Expand Down Expand Up @@ -300,7 +346,13 @@ func TestRunDKGReshareTimeout(t *testing.T) {
var doneReshare = make(chan *key.Group)
go func() {
t.Log("[reshare] Start reshare")
group, err := dt.RunReshare(t, nil, nodesToKeep, nodesToAdd, newThreshold, timeout, false, false, false)
group, err := dt.RunReshare(t,
&reshareConfig{
oldRun: nodesToKeep,
newRun: nodesToAdd,
newThr: newThreshold,
timeout: timeout,
})
require.NoError(t, err)
doneReshare <- group
}()
Expand Down Expand Up @@ -342,18 +394,12 @@ func TestRunDKGReshareTimeout(t *testing.T) {
transitionTime := resharedGroup.TransitionTime
now := dt.Now().Unix()

// get rounds from first node in the "old" group - since he's the leader for
// the new group, he's alive
t.Log("Check Beacon Public on Leader")
lastBeacon := dt.CheckPublicBeacon(dt.Ids(1, false)[0], false)

// move to the transition time period by period - do not skip potential
// periods as to emulate the normal time behavior
for now < transitionTime-1 {
dt.AdvanceMockClock(t, beaconPeriod)

t.Log("Check Beacon Public on Leader")
lastBeacon = dt.CheckPublicBeacon(dt.Ids(1, false)[0], false)
dt.CheckPublicBeacon(dt.Ids(1, false)[0], false)
now = dt.Now().Unix()
}

Expand All @@ -362,8 +408,19 @@ func TestRunDKGReshareTimeout(t *testing.T) {
time.Sleep(getSleepDuration())

// test that all nodes in the new group have generated a new beacon
t.Log("Check Beacon Length")
dt.CheckBeaconLength(t, dt.resharedNodes, int(lastBeacon.Round+1))
root := dt.resharedNodes[0].drand
rootID := root.priv.Public
cm := root.opts.certmanager
client := net.NewGrpcClientFromCertManager(cm)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
resp, err := client.PublicRand(ctx, rootID, new(drand.PublicRandRequest))
require.NoError(t, err)
for _, n := range dt.resharedNodes[1:] {
resp2, err := client.PublicRand(ctx, n.drand.priv.Public, new(drand.PublicRandRequest))
require.NoError(t, err)
require.Equal(t, resp, resp2)
}
}

// nolint:funlen
Expand Down Expand Up @@ -421,7 +478,12 @@ func TestRunDKGResharePreempt(t *testing.T) {
// run the resharing
var doneReshare = make(chan *key.Group, 1)
go func() {
g, err := dt.RunReshare(t, nil, oldN, 0, Thr, timeout, false, false, false)
g, err := dt.RunReshare(t,
&reshareConfig{
oldRun: oldN,
newThr: Thr,
timeout: timeout,
})
require.NoError(t, err)
doneReshare <- g
}()
Expand Down Expand Up @@ -607,21 +669,20 @@ func TestDrandPublicStream(t *testing.T) {
dt.SetMockClock(t, group.GenesisTime)
dt.WaitUntilChainIsServing(t, dt.nodes[0])

err := dt.WaitUntilRound(t, dt.nodes[0], 1)
require.NoError(t, err)

// do a few periods
for i := 0; i < 3; i++ {
dt.AdvanceMockClock(t, group.Period)

err = dt.WaitUntilRound(t, dt.nodes[0], uint64(i+2))
// +2 because rounds start at 1, and at genesis time, drand generates
// first round already
err := dt.WaitUntilRound(t, dt.nodes[0], uint64(i+2))
require.NoError(t, err)
}

cm := root.drand.opts.certmanager
client := net.NewGrpcClientFromCertManager(cm)

ctx, cancel := context.WithCancel(context.Background())
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()

// get last round first
Expand Down Expand Up @@ -705,13 +766,12 @@ func TestDrandFollowChain(t *testing.T) {
dt.SetMockClock(t, group.GenesisTime)
dt.WaitUntilChainIsServing(t, dt.nodes[0])

err := dt.WaitUntilRound(t, dt.nodes[0], 1)
require.NoError(t, err)

// do a few periods
for i := 0; i < 6; i++ {
dt.AdvanceMockClock(t, group.Period)

// +2 because rounds start at 1, and at genesis time, drand generates
// first round already
err := dt.WaitUntilRound(t, dt.nodes[0], uint64(i+2))
require.NoError(t, err)
}
Expand Down Expand Up @@ -819,6 +879,10 @@ func TestDrandPublicStreamProxy(t *testing.T) {
// do a few periods
for i := 0; i < 3; i++ {
dt.AdvanceMockClock(t, group.Period)
// +2 because rounds start at 1, and at genesis time, drand generates
// first round already
err := dt.WaitUntilRound(t, dt.nodes[0], uint64(i+2))
require.NoError(t, err)
}

client := &drandProxy{root.drand}
Expand Down Expand Up @@ -851,6 +915,9 @@ func TestDrandPublicStreamProxy(t *testing.T) {
for round := initRound; round < maxRound; round++ {
// move time to next period
dt.AdvanceMockClock(t, group.Period)
err := dt.WaitUntilRound(t, dt.nodes[0], round)
require.NoError(t, err)

beacon, ok = <-rc

require.True(t, ok)
Expand Down
1 change: 1 addition & 0 deletions core/group_setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ type setupManager struct {
hashedSecret []byte
}

// nolint: gocritic
func newDKGSetup(
l log.Logger,
c clock.Clock,
Expand Down
Loading

0 comments on commit b30b074

Please sign in to comment.