/
rpc.go
228 lines (203 loc) · 5.62 KB
/
rpc.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
package main
import (
"context"
"errors"
"fmt"
"time"
rpchttp "github.com/tendermint/tendermint/rpc/client/http"
rpctypes "github.com/tendermint/tendermint/rpc/coretypes"
e2e "github.com/tendermint/tendermint/test/e2e/pkg"
"github.com/tendermint/tendermint/types"
)
// waitForHeight waits for the network to reach a certain height (or above),
// returning the block at the height seen. Errors if the network is not making
// progress at all.
// If height == 0, the initial height of the test network is used as the target.
func waitForHeight(ctx context.Context, testnet *e2e.Testnet, height int64) (*types.Block, *types.BlockID, error) {
var (
err error
clients = map[string]*rpchttp.HTTP{}
lastHeight int64
lastIncrease = time.Now()
nodesAtHeight = map[string]struct{}{}
numRunningNodes int
)
if height == 0 {
height = testnet.InitialHeight
}
for _, node := range testnet.Nodes {
if node.Stateless() {
continue
}
if node.HasStarted {
numRunningNodes++
}
}
timer := time.NewTimer(0)
defer timer.Stop()
for {
select {
case <-ctx.Done():
return nil, nil, ctx.Err()
case <-timer.C:
for _, node := range testnet.Nodes {
// skip nodes that have reached the target height
if _, ok := nodesAtHeight[node.Name]; ok {
continue
}
// skip nodes that don't have state or haven't started yet
if node.Stateless() {
continue
}
if !node.HasStarted {
continue
}
// cache the clients
client, ok := clients[node.Name]
if !ok {
client, err = node.Client()
if err != nil {
continue
}
clients[node.Name] = client
}
wctx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
result, err := client.Status(wctx)
if err != nil {
continue
}
if result.SyncInfo.LatestBlockHeight > lastHeight {
lastHeight = result.SyncInfo.LatestBlockHeight
lastIncrease = time.Now()
}
if result.SyncInfo.LatestBlockHeight >= height {
// the node has achieved the target height!
// add this node to the set of target
// height nodes
nodesAtHeight[node.Name] = struct{}{}
// if not all of the nodes that we
// have clients for have reached the
// target height, keep trying.
if numRunningNodes > len(nodesAtHeight) {
continue
}
// All nodes are at or above the target height. Now fetch the block for that target height
// and return it. We loop again through all clients because some may have pruning set but
// at least two of them should be archive nodes.
for _, c := range clients {
result, err := c.Block(ctx, &height)
if err != nil || result == nil || result.Block == nil {
continue
}
return result.Block, &result.BlockID, err
}
}
}
if len(clients) == 0 {
return nil, nil, errors.New("unable to connect to any network nodes")
}
if time.Since(lastIncrease) >= time.Minute {
if lastHeight == 0 {
return nil, nil, errors.New("chain stalled at unknown height (most likely upon starting)")
}
return nil, nil, fmt.Errorf("chain stalled at height %v [%d of %d nodes %+v]",
lastHeight,
len(nodesAtHeight),
numRunningNodes,
nodesAtHeight)
}
timer.Reset(1 * time.Second)
}
}
}
// waitForNode waits for a node to become available and catch up to the given block height.
func waitForNode(ctx context.Context, node *e2e.Node, height int64) (*rpctypes.ResultStatus, error) {
if node.Mode == e2e.ModeSeed {
return nil, nil
}
client, err := node.Client()
if err != nil {
return nil, err
}
timer := time.NewTimer(0)
defer timer.Stop()
var (
lastFailed bool
counter int
)
for {
counter++
if lastFailed {
lastFailed = false
// if there was a problem with the request in
// the previous recreate the client to ensure
// reconnection
client, err = node.Client()
if err != nil {
return nil, err
}
}
select {
case <-ctx.Done():
return nil, ctx.Err()
case <-timer.C:
status, err := client.Status(ctx)
switch {
case errors.Is(err, context.DeadlineExceeded):
return nil, fmt.Errorf("timed out waiting for %v to reach height %v", node.Name, height)
case errors.Is(err, context.Canceled):
return nil, err
case err == nil && status.SyncInfo.LatestBlockHeight >= height:
return status, nil
case counter%500 == 0:
switch {
case err != nil:
lastFailed = true
logger.Error("node not yet ready",
"iter", counter,
"node", node.Name,
"target", height,
"err", err,
)
case status != nil:
logger.Info("node not yet ready",
"iter", counter,
"node", node.Name,
"height", status.SyncInfo.LatestBlockHeight,
"target", height,
)
}
}
timer.Reset(250 * time.Millisecond)
}
}
}
// getLatestBlock returns the last block that all active nodes in the network have
// agreed upon i.e. the earlist of each nodes latest block
func getLatestBlock(ctx context.Context, testnet *e2e.Testnet) (*types.Block, error) {
var earliestBlock *types.Block
for _, node := range testnet.Nodes {
// skip nodes that don't have state or haven't started yet
if node.Stateless() {
continue
}
if !node.HasStarted {
continue
}
client, err := node.Client()
if err != nil {
return nil, err
}
wctx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
result, err := client.Block(wctx, nil)
if err != nil {
return nil, err
}
if result.Block != nil && (earliestBlock == nil || earliestBlock.Height > result.Block.Height) {
earliestBlock = result.Block
}
}
return earliestBlock, nil
}