Skip to content

Commit

Permalink
fix(blooms): Handle not found metas gracefully (#12853)
Browse files Browse the repository at this point in the history
There is a time window between between listing metas and fetching them from object storage which could lead to a race condition that the meta is not found in object storage, because it was deleted and superseded by a newer meta.

This can happen when querying recent bloom data, that is still subject to updates, and results in an error like this:

```
rpc error: code = Unknown desc = failed to get meta file bloom/tsdb_index_19843/XXXX/metas/18fbdc8500000000-1921d15dffffffff-270affee.json: storage: object doesn't exist
(Trace ID: 4fe28d32cfa3e3df9495c3a5d4a683fb)
```

Signed-off-by: Christian Haudum <christian.haudum@gmail.com>
  • Loading branch information
chaudum committed May 2, 2024
1 parent ed84b23 commit 37c8822
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 11 deletions.
19 changes: 13 additions & 6 deletions pkg/storage/stores/shipper/bloomshipper/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"time"

"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/grafana/dskit/concurrency"
"github.com/pkg/errors"
"github.com/prometheus/common/model"
Expand Down Expand Up @@ -388,28 +389,34 @@ func (b *BloomClient) GetMetas(ctx context.Context, refs []MetaRef) ([]Meta, err
err := concurrency.ForEachJob(ctx, len(refs), b.concurrency, func(ctx context.Context, idx int) error {
meta, err := b.GetMeta(ctx, refs[idx])
if err != nil {
return err
key := b.KeyResolver.Meta(refs[idx]).Addr()
if !b.IsObjectNotFoundErr(err) {
return fmt.Errorf("failed to get meta file %s: %w", key, err)
}
level.Error(b.logger).Log("msg", "failed to get meta file", "ref", key, "err", err)
}
results[idx] = meta
return nil
})
return results, err
}

// GetMeta fetches the meta file for given MetaRef from object storage and
// decodes the JSON data into a Meta.
// If the meta file is not found in storage or decoding fails, the empty Meta
// is returned along with the error.
func (b *BloomClient) GetMeta(ctx context.Context, ref MetaRef) (Meta, error) {
meta := Meta{
MetaRef: ref,
}
meta := Meta{MetaRef: ref}
key := b.KeyResolver.Meta(ref).Addr()
reader, _, err := b.client.GetObject(ctx, key)
if err != nil {
return Meta{}, fmt.Errorf("failed to get meta file%s: %w", key, err)
return meta, err
}
defer reader.Close()

err = json.NewDecoder(reader).Decode(&meta)
if err != nil {
return Meta{}, fmt.Errorf("failed to decode meta file %s: %w", key, err)
return meta, errors.Wrap(err, "failed to decode JSON")
}
return meta, nil
}
Expand Down
19 changes: 14 additions & 5 deletions pkg/storage/stores/shipper/bloomshipper/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,20 @@ func TestBloomClient_GetMetas(t *testing.T) {
require.Equal(t, metas, []Meta{m1, m2})
})

t.Run("does not exist", func(t *testing.T) {
metas, err := c.GetMetas(ctx, []MetaRef{{}})
require.Error(t, err)
require.True(t, c.client.IsObjectNotFoundErr(err))
require.Equal(t, metas, []Meta{{}})
t.Run("does not exist - yields empty meta", func(t *testing.T) {
ref := MetaRef{
Ref: Ref{
TenantID: "tenant",
TableName: "table",
Bounds: v1.FingerprintBounds{},
StartTimestamp: 1000,
EndTimestamp: 2000,
Checksum: 1234,
},
}
metas, err := c.GetMetas(ctx, []MetaRef{ref})
require.NoError(t, err)
require.Equal(t, metas, []Meta{{MetaRef: ref}})
})
}

Expand Down

0 comments on commit 37c8822

Please sign in to comment.