Skip to content

Commit

Permalink
Add metric for bytes logged in WAL and Checkpoints
Browse files Browse the repository at this point in the history
Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
  • Loading branch information
codesome committed Apr 22, 2020
1 parent a353bfa commit 3530f27
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Expand Up @@ -33,6 +33,7 @@
* [ENHANCEMENT] Allow 1w (where w denotes week) and 1y (where y denotes year) when setting `-store.cache-lookups-older-than` and `-store.max-look-back-period`. #2454
* [ENHANCEMENT] Optimize index queries for matchers using "a|b|c"-type regex. #2446 #2475
* [ENHANCEMENT] Added per tenant metrics for queries and chunks and bytes read from chunk store: #2463
* [ENHANCEMENT] Experimental WAL: New metrics `cortex_ingester_wal_logged_bytes_total` and `cortex_ingester_checkpoint_logged_bytes_total` added to track total bytes logged to disk for WAL and checkpoints. #2497
* `cortex_chunk_store_fetched_chunks_total` and `cortex_chunk_store_fetched_chunk_bytes_total`
* `cortex_query_frontend_queries_total` (per tenant queries counted by the frontend)
* [ENHANCEMENT] query-frontend now also logs the POST data of long queries. #2481
Expand Down
27 changes: 21 additions & 6 deletions pkg/ingester/wal.go
Expand Up @@ -72,11 +72,13 @@ type walWrapper struct {
checkpointMtx sync.Mutex

// Checkpoint metrics.
checkpointDeleteFail prometheus.Counter
checkpointDeleteTotal prometheus.Counter
checkpointCreationFail prometheus.Counter
checkpointCreationTotal prometheus.Counter
checkpointDuration prometheus.Summary
checkpointDeleteFail prometheus.Counter
checkpointDeleteTotal prometheus.Counter
checkpointCreationFail prometheus.Counter
checkpointCreationTotal prometheus.Counter
checkpointDuration prometheus.Summary
checkpointLoggedBytesTotal prometheus.Counter
walLoggedBytesTotal prometheus.Counter
}

// newWAL creates a WAL object. If the WAL is disabled, then the returned WAL is a no-op WAL.
Expand Down Expand Up @@ -124,6 +126,14 @@ func newWAL(cfg WALConfig, userStatesFunc func() map[string]*userState, register
Help: "Time taken to create a checkpoint.",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
})
w.checkpointLoggedBytesTotal = promauto.With(registerer).NewCounter(prometheus.CounterOpts{
Name: "cortex_ingester_checkpoint_logged_bytes_total",
Help: "Total number of bytes written to disk for checkpointing.",
})
w.walLoggedBytesTotal = promauto.With(registerer).NewCounter(prometheus.CounterOpts{
Name: "cortex_ingester_wal_logged_bytes_total",
Help: "Total number of bytes written to disk for WAL records.",
})

w.wait.Add(1)
go w.run()
Expand All @@ -148,6 +158,7 @@ func (w *walWrapper) Log(record *Record) error {
if err != nil {
return err
}
w.walLoggedBytesTotal.Add(float64(len(buf)))
return w.wal.Log(buf)
}
}
Expand Down Expand Up @@ -401,7 +412,11 @@ func (w *walWrapper) checkpointSeries(cp *wal.WAL, userID string, fp model.Finge
return wireChunks, err
}

return wireChunks, cp.Log(buf)
err = cp.Log(buf)
if err == nil {
w.checkpointLoggedBytesTotal.Add(float64(len(buf)))
}
return wireChunks, err
}

type walRecoveryParameters struct {
Expand Down

0 comments on commit 3530f27

Please sign in to comment.