From e19701052b4d904efe3efb9d7ec1d86545e133af Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 24 Aug 2021 11:54:31 +0200 Subject: [PATCH 1/2] refactor: reduce channel monitor log verbosity --- channelmonitor/channelmonitor.go | 55 +++++++++++++++++++++++--------- impl/events.go | 4 +-- 2 files changed, 42 insertions(+), 17 deletions(-) diff --git a/channelmonitor/channelmonitor.go b/channelmonitor/channelmonitor.go index 03e99252..3d037b2e 100644 --- a/channelmonitor/channelmonitor.go +++ b/channelmonitor/channelmonitor.go @@ -160,7 +160,7 @@ type monitoredChannel struct { chid datatransfer.ChannelID cfg *Config unsub datatransfer.Unsubscribe - restartChannelDebounced func() + restartChannelDebounced func(error) onShutdown func(datatransfer.ChannelID) shutdownLk sync.Mutex @@ -187,8 +187,33 @@ func newMonitoredChannel( cfg: cfg, onShutdown: onShutdown, } + + // "debounce" calls to restart channel, ie if there are multiple calls in a + // short space of time, only send a message to restart the channel once + var lk sync.Mutex + var lastErr error debouncer := debounce.New(cfg.RestartDebounce) - mpc.restartChannelDebounced = func() { debouncer(mpc.restartChannel) } + mpc.restartChannelDebounced = func(err error) { + // Log the error at debug level + log.Debug(err.Error()) + + // Save the last error passed to restartChannelDebounced + lk.Lock() + lastErr = err + lk.Unlock() + + debouncer(func() { + // Log only the last error passed to restartChannelDebounced at warning level + lk.Lock() + log.Warnf("%s", lastErr) + lk.Unlock() + + // Restart the channel + mpc.restartChannel() + }) + } + + // Start monitoring the channel mpc.start() return mpc } @@ -247,13 +272,13 @@ func (mc *monitoredChannel) start() { case datatransfer.SendDataError: // If the transport layer reports an error sending data over the wire, // attempt to restart the channel - log.Warnf("%s: data transfer transport send error, restarting data transfer", mc.chid) - go mc.restartChannelDebounced() + err := xerrors.Errorf("%s: data transfer transport send error, restarting data transfer", mc.chid) + go mc.restartChannelDebounced(err) case datatransfer.ReceiveDataError: // If the transport layer reports an error receiving data over the wire, // attempt to restart the channel - log.Warnf("%s: data transfer transport receive error, restarting data transfer", mc.chid) - go mc.restartChannelDebounced() + err := xerrors.Errorf("%s: data transfer transport receive error, restarting data transfer", mc.chid) + go mc.restartChannelDebounced(err) case datatransfer.FinishTransfer: // The channel initiator has finished sending / receiving all data. // Watch to make sure that the responder sends a message to acknowledge @@ -355,7 +380,7 @@ func (mc *monitoredChannel) restartChannel() { // Check if channel is already being restarted if !restartedAt.IsZero() { - log.Infof("%s: restart called but already restarting channel, "+ + log.Debugf("%s: restart called but already restarting channel, "+ "waiting to restart again (since %s; restart backoff is %s)", mc.chid, time.Since(restartedAt), mc.cfg.RestartBackoff) return @@ -397,7 +422,7 @@ func (mc *monitoredChannel) restartChannel() { } // There was a restart queued, restart again - log.Infof("%s: restart was queued - restarting again", mc.chid) + log.Debugf("%s: restart was queued - restarting again", mc.chid) } } @@ -416,14 +441,14 @@ func (mc *monitoredChannel) doRestartChannel() error { } // Send the restart message - log.Infof("%s: restarting (%d consecutive restarts)", mc.chid, restartCount) + log.Debugf("%s: restarting (%d consecutive restarts)", mc.chid, restartCount) err := mc.sendRestartMessage(restartCount) if err != nil { log.Warnf("%s: restart failed, trying again: %s", mc.chid, err) // If the restart message could not be sent, try again return mc.doRestartChannel() } - log.Infof("%s: restart completed successfully", mc.chid) + log.Debugf("%s: restart completed successfully", mc.chid) return nil } @@ -433,17 +458,17 @@ func (mc *monitoredChannel) sendRestartMessage(restartCount int) error { // Note that at the networking layer there is logic to retry if a network // connection cannot be established, so this may take some time. p := mc.chid.OtherParty(mc.mgr.PeerID()) - log.Infof("%s: re-establishing connection to %s", mc.chid, p) + log.Debugf("%s: re-establishing connection to %s", mc.chid, p) start := time.Now() err := mc.mgr.ConnectTo(mc.ctx, p) if err != nil { return xerrors.Errorf("%s: failed to reconnect to peer %s after %s: %w", mc.chid, p, time.Since(start), err) } - log.Infof("%s: re-established connection to %s in %s", mc.chid, p, time.Since(start)) + log.Debugf("%s: re-established connection to %s in %s", mc.chid, p, time.Since(start)) // Send a restart message for the channel - log.Infof("%s: sending restart message to %s (%d consecutive restarts)", mc.chid, p, restartCount) + log.Debugf("%s: sending restart message to %s (%d consecutive restarts)", mc.chid, p, restartCount) err = mc.mgr.RestartDataTransferChannel(mc.ctx, mc.chid) if err != nil { return xerrors.Errorf("%s: failed to send restart message to %s: %w", mc.chid, p, err) @@ -453,11 +478,11 @@ func (mc *monitoredChannel) sendRestartMessage(restartCount int) error { // If a restart backoff is configured, backoff after a restart before // attempting another. if mc.cfg.RestartBackoff > 0 { - log.Infof("%s: backing off %s before allowing any other restarts", + log.Debugf("%s: backing off %s before allowing any other restarts", mc.chid, mc.cfg.RestartBackoff) select { case <-time.After(mc.cfg.RestartBackoff): - log.Infof("%s: restart back-off of %s complete", mc.chid, mc.cfg.RestartBackoff) + log.Debugf("%s: restart back-off of %s complete", mc.chid, mc.cfg.RestartBackoff) case <-mc.ctx.Done(): return nil } diff --git a/impl/events.go b/impl/events.go index 5297d04f..72ab2018 100644 --- a/impl/events.go +++ b/impl/events.go @@ -233,12 +233,12 @@ func (m *manager) OnRequestDisconnected(chid datatransfer.ChannelID, err error) } func (m *manager) OnSendDataError(chid datatransfer.ChannelID, err error) error { - log.Warnf("channel %+v had transport send error: %s", chid, err) + log.Debugf("channel %+v had transport send error: %s", chid, err) return m.channels.SendDataError(chid, err) } func (m *manager) OnReceiveDataError(chid datatransfer.ChannelID, err error) error { - log.Warnf("channel %+v had transport receive error: %s", chid, err) + log.Debugf("channel %+v had transport receive error: %s", chid, err) return m.channels.ReceiveDataError(chid, err) } From 78441cdf38576165d2d6d0ae31e34036261d774c Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 24 Aug 2021 14:34:06 +0200 Subject: [PATCH 2/2] refactor: debug -> info --- channelmonitor/channelmonitor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/channelmonitor/channelmonitor.go b/channelmonitor/channelmonitor.go index 3d037b2e..ed3b0a3c 100644 --- a/channelmonitor/channelmonitor.go +++ b/channelmonitor/channelmonitor.go @@ -448,7 +448,7 @@ func (mc *monitoredChannel) doRestartChannel() error { // If the restart message could not be sent, try again return mc.doRestartChannel() } - log.Debugf("%s: restart completed successfully", mc.chid) + log.Infof("%s: restart completed successfully", mc.chid) return nil }