From a57ad562757815958b749f7d5068b53090509afb Mon Sep 17 00:00:00 2001 From: hrt/derrandz Date: Wed, 10 May 2023 07:19:38 +0100 Subject: [PATCH 1/3] fix(share/p2p/shrexeds): return correct err on context error (#2177) Resolves item 7 on https://github.com/celestiaorg/celestia-node/issues/2176 --- share/p2p/shrexeds/client.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share/p2p/shrexeds/client.go b/share/p2p/shrexeds/client.go index dccb5ce06a..5f3470179e 100644 --- a/share/p2p/shrexeds/client.go +++ b/share/p2p/shrexeds/client.go @@ -57,7 +57,7 @@ func (c *Client) RequestEDS( log.Debugw("client: eds request to peer failed", "peer", peer, "hash", dataHash.String(), "error", err) if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) { c.metrics.ObserveRequests(1, p2p.StatusTimeout) - return nil, ctx.Err() + return nil, err } // some net.Errors also mean the context deadline was exceeded, but yamux/mocknet do not // unwrap to a ctx err From 267b8ffd7cabaf7d3732c04ed2a4ea022d1337e4 Mon Sep 17 00:00:00 2001 From: Vlad <13818348+walldiss@users.noreply.github.com> Date: Wed, 10 May 2023 17:06:13 +0800 Subject: [PATCH 2/3] fix(discovery): race in discovery tests (#2179) ## Overview Fix for error: ``` WARNING: DATA RACE Read at 0x000001ead5c8 by goroutine 282: github.com/celestiaorg/celestia-node/share/availability/discovery.(*Discovery).discoveryLoop() /home/runner/work/celestia-node/celestia-node/share/availability/discovery/discovery.go:174 +0x59 github.com/celestiaorg/celestia-node/share/availability/discovery.(*Discovery).Start.func2() /home/runner/work/celestia-node/celestia-node/share/availability/discovery/discovery.go:99 +0x58 Previous write at 0x000001ead5c8 by goroutine 172: github.com/celestiaorg/celestia-node/share/availability/discovery.TestDiscovery() /home/runner/work/celestia-node/celestia-node/share/availability/discovery/discovery_test.go:31 +0x14c testing.tRunner() /opt/hostedtoolcache/go/1.20.3/x64/src/testing/testing.go:1576 +0x216 testing.(*T).Run.func1() /opt/hostedtoolcache/go/1.20.3/x64/src/testing/testing.go:1629 +0x47 Goroutine 282 (running) created at: github.com/celestiaorg/celestia-node/share/availability/discovery.(*Discovery).Start() /home/runner/work/celestia-node/celestia-node/share/availability/discovery/discovery.go:99 +0x3ad github.com/celestiaorg/celestia-node/share/availability/discovery.(*testnet).discovery() /home/runner/work/celestia-node/celestia-node/share/availability/discovery/discovery_test.go:101 +0xb7 github.com/celestiaorg/celestia-node/share/availability/discovery.TestDiscovery() /home/runner/work/celestia-node/celestia-node/share/availability/discovery/discovery_test.go:27 +0x138 testing.tRunner() /opt/hostedtoolcache/go/1.20.3/x64/src/testing/testing.go:1576 +0x216 testing.(*T).Run.func1() /opt/hostedtoolcache/go/1.20.3/x64/src/testing/testing.go:1629 +0x47 Goroutine 172 (running) created at: testing.(*T).Run() /opt/hostedtoolcache/go/1.20.3/x64/src/testing/testing.go:1629 +0x805 testing.runTests.func1() /opt/hostedtoolcache/go/1.20.3/x64/src/testing/testing.go:2036 +0x8d testing.tRunner() /opt/hostedtoolcache/go/1.20.3/x64/src/testing/testing.go:1576 +0x216 testing.runTests() /opt/hostedtoolcache/go/1.20.3/x64/src/testing/testing.go:2034 +0x87c testing.(*M).Run() /opt/hostedtoolcache/go/1.20.3/x64/src/testing/testing.go:1906 +0xb44 main.main() _testmain.go:63 +0x2e9 ``` --- share/availability/discovery/discovery_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/share/availability/discovery/discovery_test.go b/share/availability/discovery/discovery_test.go index eada732aa9..f0935086ef 100644 --- a/share/availability/discovery/discovery_test.go +++ b/share/availability/discovery/discovery_test.go @@ -19,6 +19,8 @@ import ( func TestDiscovery(t *testing.T) { const nodes = 10 // higher number brings higher coverage + discoveryRetryTimeout = time.Millisecond * 100 // defined in discovery.go + ctx, cancel := context.WithTimeout(context.Background(), time.Second*30) t.Cleanup(cancel) @@ -28,7 +30,6 @@ func TestDiscovery(t *testing.T) { WithPeersLimit(nodes), WithAdvertiseInterval(-1), ) - discoveryRetryTimeout = time.Millisecond * 100 // defined in discovery.go type peerUpdate struct { peerID peer.ID @@ -42,7 +43,6 @@ func TestDiscovery(t *testing.T) { discs := make([]*Discovery, nodes) for i := range discs { discs[i] = tn.discovery(WithPeersLimit(0), WithAdvertiseInterval(time.Millisecond*100)) - discoveryRetryTimeout = -1 // defined in discovery.go select { case res := <-updateCh: From e632a471e6ab79003ba4f4efd54a92417eb276f2 Mon Sep 17 00:00:00 2001 From: Vlad <13818348+walldiss@users.noreply.github.com> Date: Wed, 10 May 2023 17:17:01 +0800 Subject: [PATCH 3/3] fix(daser): do not spawn retry job twice (#2178) ## Overview In some rare cases when header height was not available for sampling and also picked up by both `recent` and `catchup` workers it could result in two parallel `retry` workers spawned for same header height. This PR fixes given case. --- das/state.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/das/state.go b/das/state.go index 12de2a0b01..894bf742ba 100644 --- a/das/state.go +++ b/das/state.go @@ -96,6 +96,10 @@ func (s *coordinatorState) handleResult(res result) { // if job was already in retry and failed again, carry over attempt count lastRetry, ok := s.inRetry[h] if ok { + if res.job.jobType != retryJob { + // retry job has been already created by another worker (recent or catchup) + continue + } delete(s.inRetry, h) }