Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix metrics when a tenant TSDB is closed because idle #3646

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Expand Up @@ -27,6 +27,7 @@
* [BUGFIX] Allow `-querier.max-query-lookback` use `y|w|d` suffix like deprecated `-store.max-look-back-period`. #3598
* [BUGFIX] Memberlist: Entry in the ring should now not appear again after using "Forget" feature (unless it's still heartbeating). #3603
* [BUGFIX] Ingester: do not close idle TSDBs while blocks shipping is in progress. #3630
* [BUGFIX] Ingester: correctly update `cortex_ingester_memory_users` and `cortex_ingester_active_series` when a tenant's TSDB is closed because idle, when running Cortex with the blocks storage. #3646
pstibrany marked this conversation as resolved.
Show resolved Hide resolved

## 1.6.0

Expand Down
5 changes: 5 additions & 0 deletions pkg/ingester/ingester_v2.go
Expand Up @@ -1316,6 +1316,9 @@ func (i *Ingester) closeAllTSDB() {
i.userStatesMtx.Lock()
delete(i.TSDBState.dbs, userID)
i.userStatesMtx.Unlock()

i.metrics.memUsers.Dec()
i.metrics.activeSeriesPerUser.DeleteLabelValues(userID)
}(userDB)
}

Expand Down Expand Up @@ -1666,6 +1669,8 @@ func (i *Ingester) closeAndDeleteUserTSDBIfIdle(userID string) tsdbCloseCheckRes
delete(i.TSDBState.dbs, userID)
i.userStatesMtx.Unlock()

i.metrics.memUsers.Dec()
i.metrics.activeSeriesPerUser.DeleteLabelValues(userID)
i.TSDBState.tsdbMetrics.removeRegistryForUser(userID)

// And delete local data.
Expand Down
50 changes: 44 additions & 6 deletions pkg/ingester/ingester_v2_test.go
Expand Up @@ -2271,7 +2271,11 @@ func TestIngesterCompactIdleBlock(t *testing.T) {
# HELP cortex_ingester_memory_series_removed_total The total number of series that were removed per user.
# TYPE cortex_ingester_memory_series_removed_total counter
cortex_ingester_memory_series_removed_total{user="1"} 0
`), memSeriesCreatedTotalName, memSeriesRemovedTotalName))

# HELP cortex_ingester_memory_users The current number of users in memory.
# TYPE cortex_ingester_memory_users gauge
cortex_ingester_memory_users 1
`), memSeriesCreatedTotalName, memSeriesRemovedTotalName, "cortex_ingester_memory_users"))

// wait one second -- TSDB is now idle.
time.Sleep(cfg.BlocksStorageConfig.TSDB.HeadCompactionIdleTimeout)
Expand All @@ -2286,7 +2290,11 @@ func TestIngesterCompactIdleBlock(t *testing.T) {
# HELP cortex_ingester_memory_series_removed_total The total number of series that were removed per user.
# TYPE cortex_ingester_memory_series_removed_total counter
cortex_ingester_memory_series_removed_total{user="1"} 1
`), memSeriesCreatedTotalName, memSeriesRemovedTotalName))

# HELP cortex_ingester_memory_users The current number of users in memory.
# TYPE cortex_ingester_memory_users gauge
cortex_ingester_memory_users 1
`), memSeriesCreatedTotalName, memSeriesRemovedTotalName, "cortex_ingester_memory_users"))

// Pushing another sample still works.
pushSingleSample(t, i)
Expand All @@ -2300,7 +2308,11 @@ func TestIngesterCompactIdleBlock(t *testing.T) {
# HELP cortex_ingester_memory_series_removed_total The total number of series that were removed per user.
# TYPE cortex_ingester_memory_series_removed_total counter
cortex_ingester_memory_series_removed_total{user="1"} 1
`), memSeriesCreatedTotalName, memSeriesRemovedTotalName))

# HELP cortex_ingester_memory_users The current number of users in memory.
# TYPE cortex_ingester_memory_users gauge
cortex_ingester_memory_users 1
`), memSeriesCreatedTotalName, memSeriesRemovedTotalName, "cortex_ingester_memory_users"))
}

func TestIngesterCompactAndCloseIdleTSDB(t *testing.T) {
Expand Down Expand Up @@ -2334,6 +2346,7 @@ func TestIngesterCompactAndCloseIdleTSDB(t *testing.T) {
m.On("Sync", mock.Anything).Return(0, nil)

pushSingleSample(t, i)
i.v2UpdateActiveSeries()

require.NoError(t, testutil.GatherAndCompare(r, strings.NewReader(`
# HELP cortex_ingester_memory_series_created_total The total number of series that were created per user.
Expand All @@ -2343,7 +2356,15 @@ func TestIngesterCompactAndCloseIdleTSDB(t *testing.T) {
# HELP cortex_ingester_memory_series_removed_total The total number of series that were removed per user.
# TYPE cortex_ingester_memory_series_removed_total counter
cortex_ingester_memory_series_removed_total{user="1"} 0
`), memSeriesCreatedTotalName, memSeriesRemovedTotalName))

# HELP cortex_ingester_memory_users The current number of users in memory.
# TYPE cortex_ingester_memory_users gauge
cortex_ingester_memory_users 1

# HELP cortex_ingester_active_series Number of currently active series per user.
# TYPE cortex_ingester_active_series gauge
cortex_ingester_active_series{user="1"} 1
`), memSeriesCreatedTotalName, memSeriesRemovedTotalName, "cortex_ingester_memory_users", "cortex_ingester_active_series"))

// Wait until idle TSDB is force-compacted, shipped, and eventually closed and removed.
test.Poll(t, 10*time.Second, 0, func() interface{} {
Expand All @@ -2366,6 +2387,7 @@ func TestIngesterCompactAndCloseIdleTSDB(t *testing.T) {
})

require.Greater(t, testutil.ToFloat64(i.TSDBState.idleTsdbChecks.WithLabelValues(string(tsdbIdleClosed))), float64(0))
i.v2UpdateActiveSeries()

// Verify that user has disappeared from metrics.
require.NoError(t, testutil.GatherAndCompare(r, strings.NewReader(`
Expand All @@ -2374,10 +2396,18 @@ func TestIngesterCompactAndCloseIdleTSDB(t *testing.T) {

# HELP cortex_ingester_memory_series_removed_total The total number of series that were removed per user.
# TYPE cortex_ingester_memory_series_removed_total counter
`), memSeriesCreatedTotalName, memSeriesRemovedTotalName))

# HELP cortex_ingester_memory_users The current number of users in memory.
# TYPE cortex_ingester_memory_users gauge
cortex_ingester_memory_users 0

# HELP cortex_ingester_active_series Number of currently active series per user.
# TYPE cortex_ingester_active_series gauge
`), memSeriesCreatedTotalName, memSeriesRemovedTotalName, "cortex_ingester_memory_users", "cortex_ingester_active_series"))

// Pushing another sample will recreate TSDB.
pushSingleSample(t, i)
i.v2UpdateActiveSeries()

// User is back.
require.NoError(t, testutil.GatherAndCompare(r, strings.NewReader(`
Expand All @@ -2388,7 +2418,15 @@ func TestIngesterCompactAndCloseIdleTSDB(t *testing.T) {
# HELP cortex_ingester_memory_series_removed_total The total number of series that were removed per user.
# TYPE cortex_ingester_memory_series_removed_total counter
cortex_ingester_memory_series_removed_total{user="1"} 0
`), memSeriesCreatedTotalName, memSeriesRemovedTotalName))

# HELP cortex_ingester_memory_users The current number of users in memory.
# TYPE cortex_ingester_memory_users gauge
cortex_ingester_memory_users 1

# HELP cortex_ingester_active_series Number of currently active series per user.
# TYPE cortex_ingester_active_series gauge
cortex_ingester_active_series{user="1"} 1
`), memSeriesCreatedTotalName, memSeriesRemovedTotalName, "cortex_ingester_memory_users", "cortex_ingester_active_series"))
}

func verifyCompactedHead(t *testing.T, i *Ingester, expected bool) {
Expand Down