Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retention speedup #4172

Merged
merged 3 commits into from
Aug 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
18 changes: 3 additions & 15 deletions pkg/storage/stores/shipper/compactor/compactor.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"flag"
"path/filepath"
"reflect"
"strconv"
"sync"
"time"

Expand Down Expand Up @@ -198,7 +197,7 @@ func (c *Compactor) CompactTable(ctx context.Context, tableName string) error {
return err
}

interval := extractIntervalFromTableName(tableName)
interval := retention.ExtractIntervalFromTableName(tableName)
intervalHasExpiredChunks := false
if c.cfg.RetentionEnabled {
intervalHasExpiredChunks = c.expirationChecker.IntervalHasExpiredChunks(interval)
Expand Down Expand Up @@ -338,17 +337,6 @@ func (e *expirationChecker) IntervalHasExpiredChunks(interval model.Interval) bo
return e.retentionExpiryChecker.IntervalHasExpiredChunks(interval) || e.deletionExpiryChecker.IntervalHasExpiredChunks(interval)
}

func extractIntervalFromTableName(tableName string) model.Interval {
interval := model.Interval{
Start: 0,
End: model.Now(),
}
tableNumber, err := strconv.ParseInt(tableName[len(tableName)-5:], 10, 64)
if err != nil {
return interval
}

interval.Start = model.TimeFromUnix(tableNumber * 86400)
interval.End = interval.Start.Add(24 * time.Hour)
return interval
func (e *expirationChecker) DropFromIndex(ref retention.ChunkEntry, tableEndTime model.Time, now model.Time) bool {
return e.retentionExpiryChecker.DropFromIndex(ref, tableEndTime, now) || e.deletionExpiryChecker.DropFromIndex(ref, tableEndTime, now)
}
39 changes: 0 additions & 39 deletions pkg/storage/stores/shipper/compactor/compactor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,9 @@ import (

"github.com/cortexproject/cortex/pkg/util/flagext"

"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"

loki_storage "github.com/grafana/loki/pkg/storage"
"github.com/grafana/loki/pkg/storage/chunk"
"github.com/grafana/loki/pkg/storage/chunk/local"
"github.com/grafana/loki/pkg/storage/chunk/storage"
"github.com/grafana/loki/pkg/storage/stores/shipper/testutil"
Expand Down Expand Up @@ -59,43 +57,6 @@ func TestIsDefaults(t *testing.T) {
}
}

func TestExtractIntervalFromTableName(t *testing.T) {
periodicTableConfig := chunk.PeriodicTableConfig{
Prefix: "dummy",
Period: 24 * time.Hour,
}

const millisecondsInDay = model.Time(24 * time.Hour / time.Millisecond)

calculateInterval := func(tm model.Time) (m model.Interval) {
m.Start = tm - tm%millisecondsInDay
m.End = m.Start + millisecondsInDay
return
}

for i, tc := range []struct {
tableName string
expectedInterval model.Interval
}{
{
tableName: periodicTableConfig.TableFor(model.Now()),
expectedInterval: calculateInterval(model.Now()),
},
{
tableName: periodicTableConfig.TableFor(model.Now().Add(-24 * time.Hour)),
expectedInterval: calculateInterval(model.Now().Add(-24 * time.Hour)),
},
{
tableName: periodicTableConfig.TableFor(model.Now().Add(-24 * time.Hour).Add(time.Minute)),
expectedInterval: calculateInterval(model.Now().Add(-24 * time.Hour).Add(time.Minute)),
},
} {
t.Run(fmt.Sprint(i), func(t *testing.T) {
require.Equal(t, tc.expectedInterval, extractIntervalFromTableName(tc.tableName))
})
}
}

func TestCompactor_RunCompaction(t *testing.T) {
tempDir, err := ioutil.TempDir("", "compactor-run-compaction")
require.NoError(t, err)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,7 @@ func (d *DeleteRequestsManager) IntervalHasExpiredChunks(interval model.Interval

return false
}

func (d *DeleteRequestsManager) DropFromIndex(_ retention.ChunkEntry, _ model.Time, _ model.Time) bool {
return false
}
10 changes: 10 additions & 0 deletions pkg/storage/stores/shipper/compactor/retention/expiration.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ type ExpirationChecker interface {
MarkPhaseStarted()
MarkPhaseFailed()
MarkPhaseFinished()
DropFromIndex(ref ChunkEntry, tableEndTime model.Time, now model.Time) bool
}

type expirationChecker struct {
Expand Down Expand Up @@ -46,6 +47,15 @@ func (e *expirationChecker) Expired(ref ChunkEntry, now model.Time) (bool, []mod
return now.Sub(ref.Through) > period, nil
}

// DropFromIndex tells if it is okay to drop the chunk entry from index table.
// We check if tableEndTime is out of retention period, calculated using the labels from the chunk.
// If the tableEndTime is out of retention then we can drop the chunk entry without removing the chunk from the store.
func (e *expirationChecker) DropFromIndex(ref ChunkEntry, tableEndTime model.Time, now model.Time) bool {
userID := unsafeGetString(ref.UserID)
period := e.tenantsRetention.RetentionPeriodFor(userID, ref.Labels)
return now.Sub(tableEndTime) > period
}

func (e *expirationChecker) MarkPhaseStarted() {
smallestRetentionPeriod := findSmallestRetentionPeriod(e.tenantsRetention.limits)
e.latestRetentionStartTime = model.Now().Add(-smallestRetentionPeriod)
Expand Down
97 changes: 31 additions & 66 deletions pkg/storage/stores/shipper/compactor/retention/iterator.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
package retention

import (
"bytes"
"encoding/binary"
"fmt"
"time"

"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/pkg/labels"
"go.etcd.io/bbolt"

Expand Down Expand Up @@ -91,93 +89,60 @@ func (b *chunkIndexIterator) Next() bool {
}

type SeriesCleaner interface {
Cleanup(seriesID []byte, userID []byte) error
Cleanup(userID []byte, lbls labels.Labels) error
}

type seriesCleaner struct {
bucketTimestamps []string
shards map[uint32]string
cursor *bbolt.Cursor
config chunk.PeriodConfig
tableInterval model.Interval
shards map[uint32]string
bucket *bbolt.Bucket
config chunk.PeriodConfig
schema chunk.SeriesStoreSchema

buf []byte
}

func newSeriesCleaner(bucket *bbolt.Bucket, config chunk.PeriodConfig) *seriesCleaner {
var (
fromDay = config.From.Time.Unix() / int64(config.IndexTables.Period/time.Second)
throughDay = config.From.Add(config.IndexTables.Period).Unix() / int64(config.IndexTables.Period/time.Second)
bucketTimestamps = []string{}
)
for i := fromDay; i <= throughDay; i++ {
bucketTimestamps = append(bucketTimestamps, fmt.Sprintf("d%d", i))
}
func newSeriesCleaner(bucket *bbolt.Bucket, config chunk.PeriodConfig, tableName string) *seriesCleaner {
baseSchema, _ := config.CreateSchema()
schema := baseSchema.(chunk.SeriesStoreSchema)
var shards map[uint32]string

if config.RowShards != 0 {
shards = map[uint32]string{}
for s := uint32(0); s <= config.RowShards; s++ {
shards[s] = fmt.Sprintf("%02d", s)
}
}

return &seriesCleaner{
bucketTimestamps: bucketTimestamps,
cursor: bucket.Cursor(),
buf: make([]byte, 0, 1024),
config: config,
shards: shards,
tableInterval: ExtractIntervalFromTableName(tableName),
schema: schema,
bucket: bucket,
buf: make([]byte, 0, 1024),
config: config,
shards: shards,
}
}

func (s *seriesCleaner) Cleanup(seriesID []byte, userID []byte) error {
for _, timestamp := range s.bucketTimestamps {
// build the chunk ref prefix
s.buf = s.buf[:0]
if s.config.Schema != "v9" {
shard := binary.BigEndian.Uint32(seriesID) % s.config.RowShards
s.buf = append(s.buf, unsafeGetBytes(s.shards[shard])...)
s.buf = append(s.buf, ':')
}
s.buf = append(s.buf, userID...)
s.buf = append(s.buf, ':')
s.buf = append(s.buf, unsafeGetBytes(timestamp)...)
s.buf = append(s.buf, ':')
s.buf = append(s.buf, seriesID...)

if key, _ := s.cursor.Seek(s.buf); key != nil && bytes.HasPrefix(key, s.buf) {
// this series still have chunk entries we can't cleanup
continue
}
// we don't have any chunk ref for that series let's delete all label index entries
s.buf = s.buf[:0]
if s.config.Schema != "v9" {
shard := binary.BigEndian.Uint32(seriesID) % s.config.RowShards
s.buf = append(s.buf, unsafeGetBytes(s.shards[shard])...)
s.buf = append(s.buf, ':')
}
s.buf = append(s.buf, userID...)
s.buf = append(s.buf, ':')
s.buf = append(s.buf, unsafeGetBytes(timestamp)...)
s.buf = append(s.buf, ':')
s.buf = append(s.buf, unsafeGetBytes(logMetricName)...)
func (s *seriesCleaner) Cleanup(userID []byte, lbls labels.Labels) error {
_, indexEntries, err := s.schema.GetCacheKeysAndLabelWriteEntries(s.tableInterval.Start, s.tableInterval.End, string(userID), logMetricName, lbls, "")
if err != nil {
return err
}

// delete all seriesRangeKeyV1 and labelSeriesRangeKeyV1 via prefix
// todo(cyriltovena) we might be able to encode index key instead of parsing all label entries for faster delete.
for key, _ := s.cursor.Seek(s.buf); key != nil && bytes.HasPrefix(key, s.buf); key, _ = s.cursor.Next() {
for i := range indexEntries {
for _, indexEntry := range indexEntries[i] {
key := make([]byte, 0, len(indexEntry.HashValue)+len(separator)+len(indexEntry.RangeValue))
key = append(key, []byte(indexEntry.HashValue)...)
key = append(key, []byte(separator)...)
key = append(key, indexEntry.RangeValue...)

parsedSeriesID, ok, err := parseLabelIndexSeriesID(decodeKey(key))
err := s.bucket.Delete(key)
if err != nil {
return err
}
if !ok {
continue
}
if !bytes.Equal(seriesID, parsedSeriesID) {
continue
}
if err := s.cursor.Delete(); err != nil {
return err
}
}
}

return nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,11 @@ func Test_SeriesCleaner(t *testing.T) {
require.NoError(t, err)

err = tables[0].DB.Update(func(tx *bbolt.Tx) error {
cleaner := newSeriesCleaner(tx.Bucket(bucketName), tt.config)
if err := cleaner.Cleanup(entryFromChunk(c2).SeriesID, entryFromChunk(c2).UserID); err != nil {
cleaner := newSeriesCleaner(tx.Bucket(bucketName), tt.config, tables[0].name)
if err := cleaner.Cleanup(entryFromChunk(c2).UserID, c2.Metric); err != nil {
return err
}
if err := cleaner.Cleanup(entryFromChunk(c1).SeriesID, entryFromChunk(c1).UserID); err != nil {
if err := cleaner.Cleanup(entryFromChunk(c1).UserID, c1.Metric); err != nil {
return err
}
return nil
Expand Down
54 changes: 43 additions & 11 deletions pkg/storage/stores/shipper/compactor/retention/retention.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ func (t *Marker) markTable(ctx context.Context, tableName string, db *bbolt.DB)
return err
}

empty, err = markforDelete(ctx, markerWriter, chunkIt, newSeriesCleaner(bucket, schemaCfg), t.expiration, chunkRewriter)
empty, err = markforDelete(ctx, tableName, markerWriter, chunkIt, newSeriesCleaner(bucket, schemaCfg, tableName), t.expiration, chunkRewriter)
if err != nil {
return err
}
Expand All @@ -129,47 +129,79 @@ func (t *Marker) markTable(ctx context.Context, tableName string, db *bbolt.DB)
return empty, markerWriter.Count(), nil
}

func markforDelete(ctx context.Context, marker MarkerStorageWriter, chunkIt ChunkEntryIterator, seriesCleaner SeriesCleaner, expiration ExpirationChecker, chunkRewriter *chunkRewriter) (bool, error) {
func markforDelete(ctx context.Context, tableName string, marker MarkerStorageWriter, chunkIt ChunkEntryIterator, seriesCleaner SeriesCleaner, expiration ExpirationChecker, chunkRewriter *chunkRewriter) (bool, error) {
seriesMap := newUserSeriesMap()
// tableInterval holds the interval for which the table is expected to have the chunks indexed
tableInterval := ExtractIntervalFromTableName(tableName)
empty := true
now := model.Now()

for chunkIt.Next() {
if chunkIt.Err() != nil {
return false, chunkIt.Err()
}
c := chunkIt.Entry()
seriesMap.Add(c.SeriesID, c.UserID, c.Labels)

// see if the chunk is deleted completely or partially
if expired, nonDeletedIntervals := expiration.Expired(c, now); expired {
if len(nonDeletedIntervals) == 0 {
seriesMap.Add(c.SeriesID, c.UserID)
} else {
if len(nonDeletedIntervals) > 0 {
wroteChunks, err := chunkRewriter.rewriteChunk(ctx, c, nonDeletedIntervals)
if err != nil {
return false, err
}

if !wroteChunks {
seriesMap.Add(c.SeriesID, c.UserID)
if wroteChunks {
// we have re-written chunk to the storage so the table won't be empty and the series are still being referred.
empty = false
seriesMap.MarkSeriesNotDeleted(c.SeriesID, c.UserID)
}
}

if err := chunkIt.Delete(); err != nil {
return false, err
}
if err := marker.Put(c.ChunkID); err != nil {
return false, err

// Mark the chunk for deletion only if it is completely deleted, or this is the last table that the chunk is index in.
// For a partially deleted chunk, if we delete the source chunk before all the tables which index it are processed then
// the retention would fail because it would fail to find it in the storage.
if len(nonDeletedIntervals) == 0 || c.Through <= tableInterval.End {
if err := marker.Put(c.ChunkID); err != nil {
return false, err
}
}
continue
}

// The chunk is not deleted, now see if we can drop its index entry based on end time from tableInterval.
// If chunk end time is after the end time of tableInterval, it means the chunk would also be indexed in the next table.
// We would now check if the end time of the tableInterval is out of retention period so that
// we can drop the chunk entry from this table without removing the chunk from the store.
if c.Through.After(tableInterval.End) {
if expiration.DropFromIndex(c, tableInterval.End, now) {
if err := chunkIt.Delete(); err != nil {
return false, err
}
continue
}
}

empty = false
seriesMap.MarkSeriesNotDeleted(c.SeriesID, c.UserID)
}
if empty {
return true, nil
}
if ctx.Err() != nil {
return false, ctx.Err()
}
return false, seriesMap.ForEach(func(seriesID, userID []byte) error {
return seriesCleaner.Cleanup(seriesID, userID)

return false, seriesMap.ForEach(func(info userSeriesInfo) error {
if !info.isDeleted {
return nil
}

return seriesCleaner.Cleanup(info.UserID(), info.lbls)
})
}

Expand Down