From ead7429bec9db813b36669a4ef542e9b6a04d517 Mon Sep 17 00:00:00 2001 From: Cosmin Rentea Date: Mon, 20 Mar 2017 14:15:31 +0200 Subject: [PATCH 1/2] Prometheus metrics for SMS --- server/sms/nexmo_sms_sender.go | 3 +++ server/sms/sms_gateway.go | 4 +++- server/sms/sms_prometheus.go | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 server/sms/sms_prometheus.go diff --git a/server/sms/nexmo_sms_sender.go b/server/sms/nexmo_sms_sender.go index 528647b6..8026b52a 100644 --- a/server/sms/nexmo_sms_sender.go +++ b/server/sms/nexmo_sms_sender.go @@ -210,6 +210,7 @@ func (ns *NexmoSender) sendSms(sms *NexmoSms) (*NexmoMessageResponse, error) { logger.WithField("error", err.Error()).Error("Error doing the request to nexmo endpoint") ns.createHttpClient() mTotalSendErrors.Add(1) + pNexmoSendErrors.Inc() return nil, ErrHTTPClientError } defer resp.Body.Close() @@ -219,6 +220,7 @@ func (ns *NexmoSender) sendSms(sms *NexmoSms) (*NexmoMessageResponse, error) { if err != nil { logger.WithField("error", err.Error()).Error("Error reading the nexmo body response") mTotalResponseInternalErrors.Add(1) + pNexmoResponseInternalErrors.Inc() return nil, ErrSMSResponseDecodingFailed } @@ -226,6 +228,7 @@ func (ns *NexmoSender) sendSms(sms *NexmoSms) (*NexmoMessageResponse, error) { if err != nil { logger.WithField("error", err.Error()).Error("Error decoding the response from nexmo endpoint") mTotalResponseInternalErrors.Add(1) + pNexmoResponseInternalErrors.Inc() return nil, ErrSMSResponseDecodingFailed } logger.WithField("messageResponse", messageResponse).Info("Actual nexmo response") diff --git a/server/sms/sms_gateway.go b/server/sms/sms_gateway.go index 8a78af44..ffd80660 100644 --- a/server/sms/sms_gateway.go +++ b/server/sms/sms_gateway.go @@ -189,10 +189,12 @@ func (g *gateway) send(receivedMsg *protocol.Message) error { if err != nil { log.WithField("error", err.Error()).Error("Sending of message failed") mTotalResponseErrors.Add(1) + pNexmoResponseErrors.Inc() return err } - mTotalSentMessages.Add(1) g.SetLastSentID(receivedMsg.ID) + mTotalSentMessages.Add(1) + pSent.Inc() return nil } diff --git a/server/sms/sms_prometheus.go b/server/sms/sms_prometheus.go new file mode 100644 index 00000000..fef38d3d --- /dev/null +++ b/server/sms/sms_prometheus.go @@ -0,0 +1,34 @@ +package sms + +import "github.com/prometheus/client_golang/prometheus" + +var ( + pSent = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "sms_sent", + Help: "Number of sms sent to the SMS service", + }) + + pNexmoSendErrors = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "sms_nexmo_send_errors", + Help: "Number of errors while trying to send sms to Nexmo", + }) + + pNexmoResponseErrors = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "sms_nexmo_response_errors", + Help: "Number of errors received from Nexmo", + }) + + pNexmoResponseInternalErrors = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "sms_nexmo_response_internal_errors", + Help: "Number of internal errors related to Nexmo responses", + }) +) + +func init() { + prometheus.MustRegister( + pSent, + pNexmoSendErrors, + pNexmoResponseErrors, + pNexmoResponseInternalErrors, + ) +} From 075a4ae5dc2e31ea70c0cd969f25d693d00c0cb7 Mon Sep 17 00:00:00 2001 From: Cosmin Rentea Date: Mon, 20 Mar 2017 14:25:15 +0200 Subject: [PATCH 2/2] Prometheus metrics for APNS --- server/apns/apns.go | 4 ++++ server/apns/apns_prometheus.go | 42 ++++++++++++++++++++++++++++++++++ server/apns/apns_sender.go | 3 +++ 3 files changed, 49 insertions(+) diff --git a/server/apns/apns.go b/server/apns/apns.go index cf3e7f2b..a0be264d 100644 --- a/server/apns/apns.go +++ b/server/apns/apns.go @@ -107,6 +107,7 @@ func (a *apns) HandleResponse(request connector.Request, responseIface interface r, ok := responseIface.(*apns2.Response) if !ok { mTotalResponseErrors.Add(1) + pResponseErrors.Inc() return fmt.Errorf("Response could not be converted to an APNS Response") } messageID := request.Message().ID @@ -115,6 +116,7 @@ func (a *apns) HandleResponse(request connector.Request, responseIface interface if err := a.Manager().Update(subscriber); err != nil { logger.WithField("error", err.Error()).Error("Manager could not update subscription") mTotalResponseInternalErrors.Add(1) + pResponseInternalErrors.Inc() return err } if r.Sent() { @@ -137,6 +139,7 @@ func (a *apns) HandleResponse(request connector.Request, responseIface interface logger.WithField("id", r.ApnsID).Info("trying to remove subscriber because a relevant error was received from APNS") mTotalResponseRegistrationErrors.Add(1) + pResponseRegistrationErrors.Inc() err := a.Manager().Remove(subscriber) if err != nil { logger.WithField("id", r.ApnsID).Error("could not remove subscriber") @@ -144,6 +147,7 @@ func (a *apns) HandleResponse(request connector.Request, responseIface interface default: logger.Error("handling other APNS errors") mTotalResponseOtherErrors.Add(1) + pResponseOtherErrors.Inc() } return nil } diff --git a/server/apns/apns_prometheus.go b/server/apns/apns_prometheus.go index 9b61f4a7..633e55d2 100644 --- a/server/apns/apns_prometheus.go +++ b/server/apns/apns_prometheus.go @@ -14,11 +14,53 @@ var ( Name: "apns_send_errors", Help: "Number of errors when trying to send messages to APNS", }) + + pResponseErrors = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "apns_response_errors", + Help: "Number of errors received after sending messages to APNS", + }) + + pResponseInternalErrors = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "apns_response_internal_errors", + Help: "Number of internal errors related to handling responses from APNS", + }) + + pResponseRegistrationErrors = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "apns_response_registration_errors", + Help: "Number of errors related to APNS registrations", + }) + + pResponseOtherErrors = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "apns_response_other_errors", + Help: "Number of other APNS errors", + }) + + pSendNetworkErrors = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "apns_send_network_errors", + Help: "Number of errors related to network when sending to APNS", + }) + + pSendRetryCloseTLS = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "apns_send_retry_close_tls", + Help: "Number of retries related to closing TLS in the APNS connector", + }) + + pSendRetryUnrecoverable = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "apns_send_retry_unrecoverable", + Help: "Number of unrecoverable retries in the APNS connector", + }) ) func init() { prometheus.MustRegister( pSentMessages, pSendErrors, + pResponseErrors, + pResponseInternalErrors, + pResponseRegistrationErrors, + pResponseOtherErrors, + pSendNetworkErrors, + pSendRetryCloseTLS, + pSendRetryUnrecoverable, ) } diff --git a/server/apns/apns_sender.go b/server/apns/apns_sender.go index 355fde86..67b1d931 100644 --- a/server/apns/apns_sender.go +++ b/server/apns/apns_sender.go @@ -69,10 +69,12 @@ func (s sender) Send(request connector.Request) (interface{}, error) { if closable, ok := s.client.(closable); ok { logger.Warn("Close TLS and retry again") mTotalSendRetryCloseTLS.Add(1) + pSendRetryCloseTLS.Inc() closable.CloseTLS() return push() } else { mTotalSendRetryUnrecoverable.Add(1) + pSendRetryUnrecoverable.Inc() logger.Error("Cannot Close TLS. Unrecoverable state") } } @@ -92,6 +94,7 @@ func (r *retryable) execute(op func() (interface{}, error)) (interface{}, error) // retry on network errors if _, ok := opError.(net.Error); ok { mTotalSendNetworkErrors.Add(1) + pSendNetworkErrors.Inc() if tryCounter >= r.maxTries { return "", ErrRetryFailed }