diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b693c48e9..a6098cd771 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ - Add support of JPEG files with differential Huffman coding or arithmetic coding. - Add `IMGPROXY_PREFERRED_FORMATS` config. - Add `IMGPROXY_REQUESTS_QUEUE_SIZE` config. +- Add `requests_in_progress` and `images_in_progress` metrics. +- Add queue segment/span to request traces. - Add sending additional metrics to Datadog and `IMGPROXY_DATADOG_ENABLE_ADDITIONAL_METRICS` config. - Add sending additional metrics to New Relic. diff --git a/docs/datadog.md b/docs/datadog.md index e4183f708a..5c16879a4f 100644 --- a/docs/datadog.md +++ b/docs/datadog.md @@ -23,6 +23,7 @@ imgproxy can send its metrics to Datadog. To use this feature, do the following: imgproxy will send the following info to Datadog: * Response time +* Queue time * Image downloading time * Image processing time * Errors that occurred while downloading and processing image @@ -31,6 +32,8 @@ imgproxy will send the following info to Datadog: When the `IMGPROXY_DATADOG_ENABLE_ADDITIONAL_METRICS` environment variable is set to `true` imgproxy will send the following additional metrics to Datadog: +* `imgproxy.requests_in_progress`: a number of requests currently being in progress +* `imgproxy.images_in_progress`: a number of images currently being in progress * `imgproxy.buffer.size`: a histogram of the download/gzip buffers sizes (in bytes) * `imgproxy.buffer.default_size`: calibrated default buffer size (in bytes) * `imgproxy.buffer.max_size`: calibrated maximum buffer size (in bytes) diff --git a/docs/new_relic.md b/docs/new_relic.md index 368f2383b5..5a75ab75f3 100644 --- a/docs/new_relic.md +++ b/docs/new_relic.md @@ -11,12 +11,15 @@ imgproxy will send the following info to New Relic: * CPU and memory usage * Response time +* Queue time * Image downloading time * Image processing time * Errors that occurred while downloading and processing an image Additionally, imgproxy sends the following metrics over [Metrics API](https://docs.newrelic.com/docs/data-apis/ingest-apis/metric-api/introduction-metric-api/): +* `imgproxy.requests_in_progress`: a number of requests currently being in progress +* `imgproxy.images_in_progress`: a number of images currently being in progress * `imgproxy.buffer.size`: a summary of the download/gzip buffers sizes (in bytes) * `imgproxy.buffer.default_size`: calibrated default buffer size (in bytes) * `imgproxy.buffer.max_size`: calibrated maximum buffer size (in bytes) diff --git a/docs/prometheus.md b/docs/prometheus.md index bf455951cc..e213850d80 100644 --- a/docs/prometheus.md +++ b/docs/prometheus.md @@ -3,16 +3,17 @@ imgproxy can collect metrics for Prometheus. To use this feature, do the following: 1. Set the `IMGPROXY_PROMETHEUS_BIND` environment variable to the address and port that will be listened to by the Prometheus server. Note that you can't bind the main server and Prometheus to the same port. -2. _(optional)_ Set the `IMGPROXY_PROMETHEUS_NAMESPACE` to prepend prefix to the names of metrics, i.e. with `IMGPROXY_PROMETHEUS_NAMESPACE=imgproxy` Names will appear like `imgproxy_requests_total`. +2. _(optional)_ Set the `IMGPROXY_PROMETHEUS_NAMESPACE` to prepend prefix to the names of metrics, i.e. with `IMGPROXY_PROMETHEUS_NAMESPACE=imgproxy` names will appear like `imgproxy_requests_total`. 3. Collect the metrics from any path on the specified binding. imgproxy will collect the following metrics: * `requests_total`: a counter with the total number of HTTP requests imgproxy has processed * `errors_total`: a counter of the occurred errors separated by type (timeout, downloading, processing) -* `request_duration_seconds`: a histogram of the response latency (in seconds) -* `download_duration_seconds`: a histogram of the source image downloading latency (in seconds) -* `processing_duration_seconds`: a histogram of the image processing latency (in seconds) +* `request_duration_seconds`: a histogram of the request latency (in seconds) +* `request_span_duration_seconds`: a histogram of the request latency (in seconds) separated by span (queue, downloading, processing) +* `requests_in_progress`: a number of requests currently being in progress +* `images_in_progress`: a number of images currently being in progress * `buffer_size_bytes`: a histogram of the download/gzip buffers sizes (in bytes) * `buffer_default_size_bytes`: calibrated default buffer size (in bytes) * `buffer_max_size_bytes`: calibrated maximum buffer size (in bytes) @@ -20,3 +21,11 @@ imgproxy will collect the following metrics: * `vips_max_memory_bytes`: libvips maximum memory usage * `vips_allocs`: the number of active vips allocations * Some useful Go metrics like memstats and goroutines count + +### Deprecated metrics + +The following metrics are deprecated and can be removed in future versions. Use `request_span_duration_seconds` instead. + +* `download_duration_seconds`: a histogram of the source image downloading latency (in seconds) +* `processing_duration_seconds`: a histogram of the image processing latency (in seconds) + diff --git a/metrics/datadog/datadog.go b/metrics/datadog/datadog.go index 4c6c3d49ac..c4b5099bba 100644 --- a/metrics/datadog/datadog.go +++ b/metrics/datadog/datadog.go @@ -15,6 +15,7 @@ import ( "github.com/imgproxy/imgproxy/v3/config" "github.com/imgproxy/imgproxy/v3/metrics/errformat" + "github.com/imgproxy/imgproxy/v3/metrics/stats" "github.com/imgproxy/imgproxy/v3/version" ) @@ -176,6 +177,9 @@ func runMetricsCollector() { statsdClient.Gauge(name, f(), nil, 1) } }() + + statsdClient.Gauge("imgproxy.requests_in_progress", stats.RequestsInProgress(), nil, 1) + statsdClient.Gauge("imgproxy.images_in_progress", stats.ImagesInProgress(), nil, 1) case <-statsdClientStop: return } diff --git a/metrics/metrics.go b/metrics/metrics.go index 5c429c6d36..22b644e95b 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -46,6 +46,20 @@ func StartRequest(ctx context.Context, rw http.ResponseWriter, r *http.Request) return ctx, cancel, rw } +func StartQueueSegment(ctx context.Context) context.CancelFunc { + promCancel := prometheus.StartQueueSegment() + nrCancel := newrelic.StartSegment(ctx, "Queue") + ddCancel := datadog.StartSpan(ctx, "queue") + + cancel := func() { + promCancel() + nrCancel() + ddCancel() + } + + return cancel +} + func StartDownloadingSegment(ctx context.Context) context.CancelFunc { promCancel := prometheus.StartDownloadingSegment() nrCancel := newrelic.StartSegment(ctx, "Downloading image") diff --git a/metrics/newrelic/newrelic.go b/metrics/newrelic/newrelic.go index 8404f87334..f17cc72c5d 100644 --- a/metrics/newrelic/newrelic.go +++ b/metrics/newrelic/newrelic.go @@ -15,6 +15,7 @@ import ( "github.com/imgproxy/imgproxy/v3/config" "github.com/imgproxy/imgproxy/v3/metrics/errformat" + "github.com/imgproxy/imgproxy/v3/metrics/stats" ) type transactionCtxKey struct{} @@ -246,6 +247,18 @@ func runMetricsCollector() { } }() + harvester.RecordMetric(telemetry.Gauge{ + Name: "imgproxy.requests_in_progress", + Value: stats.RequestsInProgress(), + Timestamp: time.Now(), + }) + + harvester.RecordMetric(telemetry.Gauge{ + Name: "imgproxy.images_in_progress", + Value: stats.ImagesInProgress(), + Timestamp: time.Now(), + }) + harvester.HarvestNow(harvesterCtx) case <-harvesterCtx.Done(): return diff --git a/metrics/prometheus/prometheus.go b/metrics/prometheus/prometheus.go index b38dac1864..bbf63e230f 100644 --- a/metrics/prometheus/prometheus.go +++ b/metrics/prometheus/prometheus.go @@ -11,20 +11,27 @@ import ( log "github.com/sirupsen/logrus" "github.com/imgproxy/imgproxy/v3/config" + "github.com/imgproxy/imgproxy/v3/metrics/stats" "github.com/imgproxy/imgproxy/v3/reuseport" ) var ( enabled = false - requestsTotal prometheus.Counter - errorsTotal *prometheus.CounterVec - requestDuration prometheus.Histogram - downloadDuration prometheus.Histogram - processingDuration prometheus.Histogram - bufferSize *prometheus.HistogramVec - bufferDefaultSize *prometheus.GaugeVec - bufferMaxSize *prometheus.GaugeVec + requestsTotal prometheus.Counter + errorsTotal *prometheus.CounterVec + + requestDuration prometheus.Histogram + requestSpanDuration *prometheus.HistogramVec + downloadDuration prometheus.Histogram + processingDuration prometheus.Histogram + + bufferSize *prometheus.HistogramVec + bufferDefaultSize *prometheus.GaugeVec + bufferMaxSize *prometheus.GaugeVec + + requestsInProgress prometheus.GaugeFunc + imagesInProgress prometheus.GaugeFunc ) func Init() { @@ -50,6 +57,12 @@ func Init() { Help: "A histogram of the response latency.", }) + requestSpanDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: config.PrometheusNamespace, + Name: "request_span_duration_seconds", + Help: "A histogram of the queue latency.", + }, []string{"span"}) + downloadDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ Namespace: config.PrometheusNamespace, Name: "download_duration_seconds", @@ -81,15 +94,30 @@ func Init() { Help: "A gauge of the buffer max size in bytes.", }, []string{"type"}) + requestsInProgress = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Namespace: config.PrometheusNamespace, + Name: "requests_in_progress", + Help: "A gauge of the number of requests currently being in progress.", + }, stats.RequestsInProgress) + + imagesInProgress = prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Namespace: config.PrometheusNamespace, + Name: "images_in_progress", + Help: "A gauge of the number of images currently being in progress.", + }, stats.ImagesInProgress) + prometheus.MustRegister( requestsTotal, errorsTotal, requestDuration, + requestSpanDuration, downloadDuration, processingDuration, bufferSize, bufferDefaultSize, bufferMaxSize, + requestsInProgress, + imagesInProgress, ) enabled = true @@ -131,19 +159,43 @@ func StartRequest() context.CancelFunc { return startDuration(requestDuration) } -func StartDownloadingSegment() context.CancelFunc { - return startDuration(downloadDuration) +func StartQueueSegment() context.CancelFunc { + if !enabled { + return func() {} + } + + return startDuration(requestSpanDuration.With(prometheus.Labels{"span": "queue"})) } -func StartProcessingSegment() context.CancelFunc { - return startDuration(processingDuration) +func StartDownloadingSegment() context.CancelFunc { + if !enabled { + return func() {} + } + + cancel := startDuration(requestSpanDuration.With(prometheus.Labels{"span": "downloading"})) + cancelLegacy := startDuration(downloadDuration) + + return func() { + cancel() + cancelLegacy() + } } -func startDuration(m prometheus.Histogram) context.CancelFunc { +func StartProcessingSegment() context.CancelFunc { if !enabled { return func() {} } + cancel := startDuration(requestSpanDuration.With(prometheus.Labels{"span": "processing"})) + cancelLegacy := startDuration(processingDuration) + + return func() { + cancel() + cancelLegacy() + } +} + +func startDuration(m prometheus.Observer) context.CancelFunc { t := time.Now() return func() { m.Observe(time.Since(t).Seconds()) diff --git a/metrics/stats/stats.go b/metrics/stats/stats.go new file mode 100644 index 0000000000..82b8c236f1 --- /dev/null +++ b/metrics/stats/stats.go @@ -0,0 +1,32 @@ +package stats + +import "sync/atomic" + +var ( + requestsInProgress int64 + imagesInProgress int64 +) + +func RequestsInProgress() float64 { + return float64(atomic.LoadInt64(&requestsInProgress)) +} + +func IncRequestsInProgress() { + atomic.AddInt64(&requestsInProgress, 1) +} + +func DecRequestsInProgress() { + atomic.AddInt64(&requestsInProgress, -1) +} + +func ImagesInProgress() float64 { + return float64(atomic.LoadInt64(&imagesInProgress)) +} + +func IncImagesInProgress() { + atomic.AddInt64(&imagesInProgress, 1) +} + +func DecImagesInProgress() { + atomic.AddInt64(&imagesInProgress, -1) +} diff --git a/processing_handler.go b/processing_handler.go index b8ec3efbdb..3f6d66d769 100644 --- a/processing_handler.go +++ b/processing_handler.go @@ -19,6 +19,7 @@ import ( "github.com/imgproxy/imgproxy/v3/imagedata" "github.com/imgproxy/imgproxy/v3/imagetype" "github.com/imgproxy/imgproxy/v3/metrics" + "github.com/imgproxy/imgproxy/v3/metrics/stats" "github.com/imgproxy/imgproxy/v3/options" "github.com/imgproxy/imgproxy/v3/processing" "github.com/imgproxy/imgproxy/v3/router" @@ -179,6 +180,9 @@ func checkErr(ctx context.Context, errType string, err error) { } func handleProcessing(reqID string, rw http.ResponseWriter, r *http.Request) { + stats.IncRequestsInProgress() + defer stats.DecRequestsInProgress() + ctx := r.Context() if queueSem != nil { @@ -249,15 +253,24 @@ func handleProcessing(reqID string, rw http.ResponseWriter, r *http.Request) { } // The heavy part start here, so we need to restrict concurrency - processingSemToken, aquired := processingSem.Aquire(ctx) - if !aquired { - // We don't actually need to check timeout here, - // but it's an easy way to check if this is an actual timeout - // or the request was cancelled - checkErr(ctx, "queue", router.CheckTimeout(ctx)) - } + var processingSemToken *semaphore.Token + func() { + defer metrics.StartQueueSegment(ctx)() + + var aquired bool + processingSemToken, aquired = processingSem.Aquire(ctx) + if !aquired { + // We don't actually need to check timeout here, + // but it's an easy way to check if this is an actual timeout + // or the request was cancelled + checkErr(ctx, "queue", router.CheckTimeout(ctx)) + } + }() defer processingSemToken.Release() + stats.IncImagesInProgress() + defer stats.DecImagesInProgress() + statusCode := http.StatusOK originData, err := func() (*imagedata.ImageData, error) {