From 2717c537b6655d35341468bcdeef6d79cd133f03 Mon Sep 17 00:00:00 2001 From: Andrew Ferrazzutti Date: Sat, 24 Feb 2024 06:31:40 -0500 Subject: [PATCH] Restore missing metrics & remove unused ones (#463) Fixes #453 --- metrics.go | 68 +++++++++++------------------------------------------- portal.go | 11 ++++++++- user.go | 9 ++++++++ 3 files changed, 33 insertions(+), 55 deletions(-) diff --git a/metrics.go b/metrics.go index 018b97d6..2e76a047 100644 --- a/metrics.go +++ b/metrics.go @@ -20,16 +20,15 @@ import ( "context" "net/http" "runtime/debug" - "strconv" "sync" "time" + "github.com/google/uuid" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/rs/zerolog" "maunium.net/go/mautrix/event" - "maunium.net/go/mautrix/id" "go.mau.fi/mautrix-signal/database" ) @@ -47,9 +46,6 @@ type MetricsHandler struct { signalMessageAge prometheus.Histogram signalMessageHandling *prometheus.HistogramVec countCollection prometheus.Histogram - disconnections *prometheus.CounterVec - incomingRetryReceipts *prometheus.CounterVec - connectionFailures *prometheus.CounterVec puppetCount prometheus.Gauge userCount prometheus.Gauge messageCount prometheus.Gauge @@ -60,16 +56,16 @@ type MetricsHandler struct { unencryptedPrivateCount prometheus.Gauge connected prometheus.Gauge - connectedState map[string]bool + connectedState map[uuid.UUID]bool connectedStateLock sync.Mutex loggedIn prometheus.Gauge - loggedInState map[string]bool + loggedInState map[uuid.UUID]bool loggedInStateLock sync.Mutex } func NewMetricsHandler(address string, log zerolog.Logger, db *database.Database) *MetricsHandler { portalCount := promauto.NewGaugeVec(prometheus.GaugeOpts{ - Name: "bridge_portals_total", + Name: "signal_portals_total", Help: "Number of portal rooms on Matrix", }, []string{"type", "encrypted"}) return &MetricsHandler{ @@ -92,31 +88,19 @@ func NewMetricsHandler(address string, log zerolog.Logger, db *database.Database Help: "Time spent processing Signal messages", }, []string{"message_type"}), countCollection: promauto.NewHistogram(prometheus.HistogramOpts{ - Name: "bridge_count_collection", + Name: "signal_count_collection", Help: "Time spent collecting the bridge_*_total metrics", }), - disconnections: promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "bridge_disconnections", - Help: "Number of times a Matrix user has been disconnected from Signal", - }, []string{"user_id"}), - connectionFailures: promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "bridge_connection_failures", - Help: "Number of times a connection has failed to Signal", - }, []string{"reason"}), - incomingRetryReceipts: promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "bridge_incoming_retry_receipts", - Help: "Number of times a remote Signal user has requested a retry from the bridge. retry_count = 5 is usually the last attempt (and very likely means a failed message)", - }, []string{"retry_count", "message_found"}), puppetCount: promauto.NewGauge(prometheus.GaugeOpts{ - Name: "bridge_puppets_total", + Name: "signal_puppets_total", Help: "Number of Signal users bridged into Matrix", }), userCount: promauto.NewGauge(prometheus.GaugeOpts{ - Name: "bridge_users_total", + Name: "signal_users_total", Help: "Number of Matrix users using the bridge", }), messageCount: promauto.NewGauge(prometheus.GaugeOpts{ - Name: "bridge_messages_total", + Name: "signal_messages_total", Help: "Number of messages bridged", }), portalCount: portalCount, @@ -129,12 +113,12 @@ func NewMetricsHandler(address string, log zerolog.Logger, db *database.Database Name: "bridge_logged_in", Help: "Bridge users logged into Signal", }), - loggedInState: make(map[string]bool), + loggedInState: make(map[uuid.UUID]bool), connected: promauto.NewGauge(prometheus.GaugeOpts{ Name: "bridge_connected", Help: "Bridge users connected to Signal", }), - connectedState: make(map[string]bool), + connectedState: make(map[uuid.UUID]bool), } } @@ -168,31 +152,7 @@ func (mh *MetricsHandler) TrackSignalMessage(timestamp time.Time, messageType st } } -func (mh *MetricsHandler) TrackDisconnection(userID id.UserID) { - if !mh.running { - return - } - mh.disconnections.With(prometheus.Labels{"user_id": string(userID)}).Inc() -} - -func (mh *MetricsHandler) TrackConnectionFailure(reason string) { - if !mh.running { - return - } - mh.connectionFailures.With(prometheus.Labels{"reason": reason}).Inc() -} - -func (mh *MetricsHandler) TrackRetryReceipt(count int, found bool) { - if !mh.running { - return - } - mh.incomingRetryReceipts.With(prometheus.Labels{ - "retry_count": strconv.Itoa(count), - "message_found": strconv.FormatBool(found), - }).Inc() -} - -func (mh *MetricsHandler) TrackLoginState(signalID string, loggedIn bool) { +func (mh *MetricsHandler) TrackLoginState(signalID uuid.UUID, loggedIn bool) { if !mh.running { return } @@ -203,13 +163,13 @@ func (mh *MetricsHandler) TrackLoginState(signalID string, loggedIn bool) { mh.loggedInState[signalID] = loggedIn if loggedIn { mh.loggedIn.Inc() - } else { + } else if ok { mh.loggedIn.Dec() } } } -func (mh *MetricsHandler) TrackConnectionState(signalID string, connected bool) { +func (mh *MetricsHandler) TrackConnectionState(signalID uuid.UUID, connected bool) { if !mh.running { return } @@ -220,7 +180,7 @@ func (mh *MetricsHandler) TrackConnectionState(signalID string, connected bool) mh.connectedState[signalID] = connected if connected { mh.connected.Inc() - } else { + } else if ok { mh.connected.Dec() } } diff --git a/portal.go b/portal.go index 9661e6c8..357a14b1 100644 --- a/portal.go +++ b/portal.go @@ -845,18 +845,27 @@ func (portal *Portal) handleSignalMessage(portalMessage portalSignalMessage) { Msg("Couldn't get puppet for message") return } + var msgType string + var timestamp uint64 switch typedEvt := portalMessage.evt.Event.(type) { case *signalpb.DataMessage: + msgType = "data" + timestamp = typedEvt.GetTimestamp() portal.handleSignalDataMessage(portalMessage.user, sender, typedEvt) case *signalpb.TypingMessage: + msgType = "typing" + timestamp = typedEvt.GetTimestamp() portal.handleSignalTypingMessage(sender, typedEvt) case *signalpb.EditMessage: - portal.handleSignalEditMessage(sender, typedEvt.GetTargetSentTimestamp(), typedEvt.GetDataMessage()) + msgType = "edit" + timestamp = typedEvt.GetTargetSentTimestamp() + portal.handleSignalEditMessage(sender, timestamp, typedEvt.GetDataMessage()) default: portal.log.Error(). Type("data_type", typedEvt). Msg("Invalid inner event type inside ChatEvent") } + portal.bridge.Metrics.TrackSignalMessage(time.UnixMilli(int64(timestamp)), msgType) } func (portal *Portal) handleSignalDataMessage(source *User, sender *Puppet, msg *signalpb.DataMessage) { diff --git a/user.go b/user.go index 6f8732e4..ab36113e 100644 --- a/user.go +++ b/user.go @@ -423,6 +423,8 @@ func (user *User) startupTryConnect(retryCount int) { case signalmeow.SignalConnectionEventConnected: user.log.Debug().Msg("Sending Connected BridgeState") user.BridgeState.Send(status.BridgeState{StateEvent: status.StateConnected}) + user.bridge.Metrics.TrackConnectionState(user.SignalID, true) + user.bridge.Metrics.TrackLoginState(user.SignalID, true) case signalmeow.SignalConnectionEventDisconnected: user.log.Debug().Msg("Received SignalConnectionEventDisconnected") @@ -472,6 +474,7 @@ func (user *User) startupTryConnect(retryCount int) { } else { user.BridgeState.Send(status.BridgeState{StateEvent: status.StateTransientDisconnect, Error: "unknown-websocket-error", Message: err.Error()}) } + user.bridge.Metrics.TrackConnectionState(user.SignalID, false) } } @@ -482,6 +485,8 @@ func (user *User) startupTryConnect(retryCount int) { } else { user.BridgeState.Send(status.BridgeState{StateEvent: status.StateBadCredentials, Message: err.Error()}) } + user.bridge.Metrics.TrackConnectionState(user.SignalID, false) + user.bridge.Metrics.TrackLoginState(user.SignalID, false) user.clearKeysAndDisconnect() if managementRoom := user.GetManagementRoomID(); managementRoom != "" { _, _ = user.bridge.Bot.SendText(ctx, managementRoom, "You've been logged out of Signal") @@ -490,6 +495,7 @@ func (user *User) startupTryConnect(retryCount int) { case signalmeow.SignalConnectionEventError: user.log.Debug().Msg("Sending UnknownError BridgeState") user.BridgeState.Send(status.BridgeState{StateEvent: status.StateUnknownError, Error: "unknown-websocket-error", Message: err.Error()}) + user.bridge.Metrics.TrackConnectionState(user.SignalID, false) case signalmeow.SignalConnectionCleanShutdown: if user.Client.IsLoggedIn() { @@ -498,6 +504,7 @@ func (user *User) startupTryConnect(retryCount int) { user.log.Debug().Msg("Clean Shutdown, but logged out - Sending BadCredentials BridgeState") user.BridgeState.Send(status.BridgeState{StateEvent: status.StateBadCredentials, Message: "You have been logged out of Signal, please reconnect"}) } + user.bridge.Metrics.TrackConnectionState(user.SignalID, false) } } }() @@ -569,6 +576,7 @@ func (user *User) saveSignalID(ctx context.Context, id uuid.UUID, number string) Stringer("previous_user", existingUser.MXID). Stringer("signal_uuid", id). Msg("Another user is already logged in with same UUID, logging out previous user") + existingUser.bridge.Metrics.TrackLoginState(user.SignalID, false) _ = existingUser.Disconnect() existingUser.SignalID = uuid.Nil existingUser.SignalUsername = "" @@ -812,6 +820,7 @@ func (user *User) Logout() error { loggedOutDevice, err := user.disconnectNoLock() user.bridge.MeowStore.DeleteDevice(context.TODO(), &loggedOutDevice.Store.DeviceData) user.bridge.GetPuppetByCustomMXID(user.MXID).ClearCustomMXID() + user.bridge.Metrics.TrackLoginState(user.SignalID, false) return err }