Skip to content

Commit f8af130

Browse files
committed
[FAB-9157] serialize channel manipulation in kafka
The 'doneReprocessingMsgInFlight' channel reference was modified at runtime without serialization. This resulted in a race between routines in WaitReady and the modifications in processRegular. Wrapping the access and modification with a mutex resolves the race. Change-Id: I68a001b8584a1b17865701c2117fcaa28a72ffcc Signed-off-by: Matthew Sykes <sykesmat@us.ibm.com>
1 parent a444aa6 commit f8af130

File tree

1 file changed

+25
-5
lines changed

1 file changed

+25
-5
lines changed

orderer/consensus/kafka/chain.go

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ package kafka
99
import (
1010
"fmt"
1111
"strconv"
12+
"sync"
1213
"time"
1314

1415
"github.com/Shopify/sarama"
@@ -93,6 +94,8 @@ type chainImpl struct {
9394
parentConsumer sarama.Consumer
9495
channelConsumer sarama.PartitionConsumer
9596

97+
// mutex used when changing the doneReprocessingMsgInFlight
98+
doneReprocessingMutex sync.Mutex
9699
// notification that there are in-flight messages need to wait for
97100
doneReprocessingMsgInFlight chan struct{}
98101

@@ -161,15 +164,32 @@ func (chain *chainImpl) WaitReady() error {
161164
select {
162165
case <-chain.haltChan: // The chain has been halted, stop here
163166
return fmt.Errorf("consenter for this channel has been halted")
164-
// Block waiting for all re-submitted messages to be reprocessed
165-
case <-chain.doneReprocessingMsgInFlight:
167+
case <-chain.doneReprocessing(): // Block waiting for all re-submitted messages to be reprocessed
166168
return nil
167169
}
168170
default: // Not ready yet
169171
return fmt.Errorf("will not enqueue, consenter for this channel hasn't started yet")
170172
}
171173
}
172174

175+
func (chain *chainImpl) doneReprocessing() <-chan struct{} {
176+
chain.doneReprocessingMutex.Lock()
177+
defer chain.doneReprocessingMutex.Unlock()
178+
return chain.doneReprocessingMsgInFlight
179+
}
180+
181+
func (chain *chainImpl) reprocessConfigComplete() {
182+
chain.doneReprocessingMutex.Lock()
183+
defer chain.doneReprocessingMutex.Unlock()
184+
close(chain.doneReprocessingMsgInFlight)
185+
}
186+
187+
func (chain *chainImpl) reprocessConfigPending() {
188+
chain.doneReprocessingMutex.Lock()
189+
defer chain.doneReprocessingMutex.Unlock()
190+
chain.doneReprocessingMsgInFlight = make(chan struct{})
191+
}
192+
173193
// Implements the consensus.Chain interface. Called by Broadcast().
174194
func (chain *chainImpl) Order(env *cb.Envelope, configSeq uint64) error {
175195
return chain.order(env, configSeq, int64(0))
@@ -771,7 +791,7 @@ func (chain *chainImpl) processRegular(regularMessage *ab.KafkaMessageRegular, r
771791
regularMessage.ConfigSeq == seq { // AND we don't need to resubmit it again
772792
logger.Debugf("[channel: %s] Config message with original offset %d is the last in-flight resubmitted message"+
773793
"and it does not require revalidation, unblock ingress messages now", chain.ChainID(), regularMessage.OriginalOffset)
774-
close(chain.doneReprocessingMsgInFlight) // Therefore, we could finally close the channel to unblock broadcast
794+
chain.reprocessConfigComplete() // Therefore, we could finally unblock broadcast
775795
}
776796

777797
// Somebody resubmitted message at offset X, whereas we didn't. This is due to non-determinism where
@@ -798,8 +818,8 @@ func (chain *chainImpl) processRegular(regularMessage *ab.KafkaMessageRegular, r
798818
}
799819

800820
logger.Debugf("[channel: %s] Resubmitted config message with offset %d, block ingress messages", chain.ChainID(), receivedOffset)
801-
chain.lastResubmittedConfigOffset = receivedOffset // Keep track of last resubmitted message offset
802-
chain.doneReprocessingMsgInFlight = make(chan struct{}) // Create the channel to block ingress messages
821+
chain.lastResubmittedConfigOffset = receivedOffset // Keep track of last resubmitted message offset
822+
chain.reprocessConfigPending() // Begin blocking ingress messages
803823

804824
return nil
805825
}

0 commit comments

Comments
 (0)