algorand · algorandskiy · Jun 20, 2023 · May 30, 2023 · Jun 5, 2023 · Jun 5, 2023
diff --git a/network/wsNetwork_test.go b/network/wsNetwork_test.go
@@ -3945,3 +3945,97 @@ func TestTryConnectEarlyWrite(t *testing.T) {
 	fmt.Printf("MI Message Count: %v\n", netA.peers[0].miMessageCount)
 	assert.Equal(t, uint64(1), netA.peers[0].miMessageCount)
 }
+
+func TestDiscardUnrequestedBlockResponse(t *testing.T) {
+	partitiontest.PartitionTest(t)
+
+	netA := makeTestWebsocketNode(t, testWebsocketLogNameOption{"netA"})
+	netA.config.GossipFanout = 1
+
+	netB := makeTestWebsocketNode(t, testWebsocketLogNameOption{"netB"})
+	netB.config.GossipFanout = 1
+
+	netC := makeTestWebsocketNode(t, testWebsocketLogNameOption{"netC"})
+	netC.config.GossipFanout = 1
+
+	netA.Start()
+	defer netA.Stop()
+	netB.Start()
+	defer netB.Stop()
+
+	addrB, ok := netB.Address()
+	require.True(t, ok)
+	gossipB, err := netB.addrToGossipAddr(addrB)
+	require.NoError(t, err)
+
+	netA.wg.Add(1)
+	netA.tryConnect(addrB, gossipB)
+	time.Sleep(100 * time.Millisecond)
+	require.Equal(t, 1, len(netA.peers))
+
+	// Create a buffer to monitor log output from netB
+	logBuffer := bytes.NewBuffer(nil)
+	netB.log.SetOutput(logBuffer)
+
+	// send an unrequested block response
+	msg := make([]sendMessage, 1, 1)
+	msg[0] = sendMessage{
+		data:         append([]byte(protocol.TopicMsgRespTag), []byte("foo")...),
+		enqueued:     time.Now(),
+		peerEnqueued: time.Now(),
+		ctx:          context.Background(),
+	}
+	netA.peers[0].sendBufferBulk <- sendMessages{msgs: msg}
+	time.Sleep(100 * time.Millisecond)
+
+	// Stop and confirm that we hit the case of disconnecting a peer for sending an unrequested block response
+	netB.Stop()
+	lg := logBuffer.String()
+	require.Contains(t, lg, "sent TS response without a request")
+
+	netC.Start()
+	defer netC.Stop()
+
+	addrC, ok := netC.Address()
+	require.True(t, ok)
+	gossipC, err := netC.addrToGossipAddr(addrC)
+	require.NoError(t, err)
+	_ = gossipC
+
+	netA.wg.Add(1)
+	netA.tryConnect(addrC, gossipC)
+	time.Sleep(100 * time.Millisecond)
+	require.Equal(t, 1, len(netA.peers))
+
+	ctx, cancel := context.WithCancel(context.Background())
+	topics := Topics{
+		MakeTopic("requestDataType",
+			[]byte("fake block and cert value")),
+		MakeTopic(
+			"blockData",
+			[]byte("fake round value")),
+	}
+	// Send a request for a block and cancel it immediately
+	go func() {
+		netC.peers[0].Request(ctx, protocol.UniEnsBlockReqTag, topics)
+	}()
+	time.Sleep(100 * time.Millisecond)
+	cancel()
+
+	// confirm that the request was cancelled but that we have registered that we have sent a request
+	require.NotZero(t, netC.peers[0].getPeerData(lastSentRequestTimeKey))
+	require.False(t, netC.peers[0].hasOutstandingRequests())
+
+	// Create a buffer to monitor log output from netC
+	logBuffer = bytes.NewBuffer(nil)
+	netC.log.SetOutput(logBuffer)
+
+	// send a late TS response from A -> C
+	netA.peers[0].sendBufferBulk <- sendMessages{msgs: msg}
+	time.Sleep(100 * time.Millisecond)
+
+	// Stop and confirm that we hit the case of disconnecting a peer for sending a stale block response
+	netC.Stop()
+	lg = logBuffer.String()
+	require.Contains(t, lg, "wsPeer readLoop: received a TS response for a stale request ")
+}
diff --git a/network/wsPeer.go b/network/wsPeer.go
@@ -169,6 +169,12 @@
 const disconnectDuplicateConnection disconnectReason = "DuplicateConnection"
 const disconnectBadIdentityData disconnectReason = "BadIdentityData"
 
+const lastSentRequestTimeKey string = "lsrt"
+
+// If the peer sends us a block response after this threshold
+// we should disconnect from it
+const blockResponseDisconnectThreshold = 60 * time.Second
+
 // Response is the structure holding the response from the server
 type Response struct {
 	Topics Topics
@@ -505,6 +511,29 @@
 			return
 		}
 		msg.Tag = Tag(string(tag[:]))
+
+		// Skip the message if it's a response to a request we didn't make or has timed out
+		if msg.Tag == protocol.TopicMsgRespTag && !wp.hasOutstandingRequests() {
+			// We never requested anything from this peer so sending a response is breach protocol -- disconnect
+			lastSentTime, ok := wp.getPeerData(lastSentRequestTimeKey).(time.Time)
+			if !ok {
+				wp.net.log.Errorf("wsPeer readloop: peer %s sent TS response without a request", wp.conn.RemoteAddr().String())
+				networkConnectionsDroppedTotal.Inc(map[string]string{"reason": "protocol"})
+				return
+			} else if time.Since(lastSentTime) > blockResponseDisconnectThreshold {
+				wp.net.log.Errorf("wsPeer readloop: peer %s sent a very stale TS response. lastRequestTime :%d", wp.conn.RemoteAddr().String(), lastSentTime.UnixNano())
+				networkConnectionsDroppedTotal.Inc(map[string]string{"reason": "protocol"})
+				return
+			}
+			// Peer sent us a response to a request we made but we've already timed out	-- discard
+			n, err := io.Copy(io.Discard, reader)
+			if err != nil {
+				wp.net.log.Warnf("wsPeer readloop: could not discard timed-out TS message from %s : %s", wp.conn.RemoteAddr().String(), err)
+				continue
+			}
+			wp.net.log.Warnf("wsPeer readLoop: received a TS response for a stale request from %s. %d bytes discarded", wp.conn.RemoteAddr().String(), n)
+			continue
+		}
 		slurper.Reset()
 		err = slurper.Read(reader)
 		if err != nil {
@@ -942,13 +971,16 @@
 
 	// Send serializedMsg
 	msg := make([]sendMessage, 1, 1)
+
+	tStart := time.Now()
 	msg[0] = sendMessage{
 		data:         append([]byte(tag), serializedMsg...),
-		enqueued:     time.Now(),
-		peerEnqueued: time.Now(),
+		enqueued:     tStart,
+		peerEnqueued: tStart,
 		ctx:          context.Background()}
 	select {
 	case wp.sendBufferBulk <- sendMessages{msgs: msg}:
+		wp.setPeerData(lastSentRequestTimeKey, tStart)
 	case <-wp.closing:
 		e = fmt.Errorf("peer closing %s", wp.conn.RemoteAddr().String())
 		return
@@ -976,6 +1008,12 @@
 	return newChan
 }
 
+func (wp *wsPeer) hasOutstandingRequests() bool {
+	wp.responseChannelsMutex.Lock()
+	defer wp.responseChannelsMutex.Unlock()
+	return len(wp.responseChannels) > 0
+}
+
 // getAndRemoveResponseChannel returns the channel and deletes the channel from the map
 func (wp *wsPeer) getAndRemoveResponseChannel(key uint64) (respChan chan *Response, found bool) {
 	wp.responseChannelsMutex.Lock()