Skip to content

Commit b2e038a

Browse files
committed
block: generalize CompressionStats
Generalize `CompressionStats` so that it can be used with an arbitrary number of settings.
1 parent 278a861 commit b2e038a

File tree

3 files changed

+93
-42
lines changed

3 files changed

+93
-42
lines changed

sstable/block/compression_stats.go

Lines changed: 81 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -5,63 +5,83 @@
55
package block
66

77
import (
8+
"cmp"
89
"fmt"
910
"iter"
11+
"slices"
1012
"strings"
1113

1214
"github.com/cockroachdb/pebble/internal/compression"
15+
"github.com/cockroachdb/pebble/internal/invariants"
1316
)
1417

15-
// CompressionStats collects compression statistics for a single file - the
16-
// total compressed and uncompressed sizes for each distinct compression.Setting
17-
// used.
18+
// CompressionStats collects compression statistics (either for a single file or
19+
// for a collection of files).
20+
//
21+
// Compression statistics consist of the total compressed and uncompressed sizes for
22+
// each distinct compression.Setting used.
1823
type CompressionStats struct {
19-
n int
20-
// Compression profiles have three settings (data, value, other) and
21-
// NoCompression can also be used for data that didn't compress.
22-
buf [4]CompressionStatsForSetting
24+
// We inline common values to avoid allocating the map in most cases.
25+
26+
// Total number of bytes that are not compressed.
27+
noCompressionBytes uint64
28+
// Compression stats for fastestCompression.
29+
fastest CompressionStatsForSetting
30+
31+
others map[compression.Setting]CompressionStatsForSetting
2332
}
2433

2534
type CompressionStatsForSetting struct {
26-
Setting compression.Setting
27-
UncompressedBytes uint64
2835
CompressedBytes uint64
36+
UncompressedBytes uint64
37+
}
38+
39+
func (cs *CompressionStatsForSetting) Add(other CompressionStatsForSetting) {
40+
cs.CompressedBytes += other.CompressedBytes
41+
cs.UncompressedBytes += other.UncompressedBytes
2942
}
3043

3144
// add updates the stats to reflect a block that was compressed with the given setting.
32-
func (c *CompressionStats) add(
33-
setting compression.Setting, sizeUncompressed, sizeCompressed uint64,
34-
) {
35-
for i := 0; i < c.n; i++ {
36-
if c.buf[i].Setting == setting {
37-
c.buf[i].UncompressedBytes += sizeUncompressed
38-
c.buf[i].CompressedBytes += sizeCompressed
39-
return
45+
func (c *CompressionStats) add(setting compression.Setting, stats CompressionStatsForSetting) {
46+
switch setting {
47+
case compression.None:
48+
c.noCompressionBytes += stats.UncompressedBytes
49+
if invariants.Enabled && stats.UncompressedBytes != stats.CompressedBytes {
50+
panic("invalid stats for no-compression")
4051
}
52+
case fastestCompression:
53+
c.fastest.Add(stats)
54+
default:
55+
if c.others == nil {
56+
c.others = make(map[compression.Setting]CompressionStatsForSetting, 2)
57+
}
58+
prev := c.others[setting]
59+
prev.Add(stats)
60+
c.others[setting] = prev
4161
}
42-
if c.n >= len(c.buf)-1 {
43-
panic("too many compression settings")
44-
}
45-
c.buf[c.n] = CompressionStatsForSetting{
46-
Setting: setting,
47-
UncompressedBytes: sizeUncompressed,
48-
CompressedBytes: sizeCompressed,
49-
}
50-
c.n++
5162
}
5263

5364
// MergeWith updates the receiver stats to include the other stats.
5465
func (c *CompressionStats) MergeWith(other *CompressionStats) {
55-
for i := 0; i < other.n; i++ {
56-
c.add(other.buf[i].Setting, other.buf[i].UncompressedBytes, other.buf[i].CompressedBytes)
66+
for s, cs := range other.All() {
67+
c.add(s, cs)
5768
}
5869
}
5970

6071
// All returns an iterator over the collected stats, in arbitrary order.
61-
func (c CompressionStats) All() iter.Seq[CompressionStatsForSetting] {
62-
return func(yield func(cs CompressionStatsForSetting) bool) {
63-
for i := 0; i < c.n; i++ {
64-
if !yield(c.buf[i]) {
72+
func (c *CompressionStats) All() iter.Seq2[compression.Setting, CompressionStatsForSetting] {
73+
return func(yield func(s compression.Setting, cs CompressionStatsForSetting) bool) {
74+
if c.noCompressionBytes != 0 && !yield(compression.None, CompressionStatsForSetting{
75+
UncompressedBytes: c.noCompressionBytes,
76+
CompressedBytes: c.noCompressionBytes,
77+
}) {
78+
return
79+
}
80+
if c.fastest.UncompressedBytes != 0 && !yield(fastestCompression, c.fastest) {
81+
return
82+
}
83+
for s, cs := range c.others {
84+
if !yield(s, cs) {
6585
return
6686
}
6787
}
@@ -70,14 +90,39 @@ func (c CompressionStats) All() iter.Seq[CompressionStatsForSetting] {
7090

7191
// String returns a string representation of the stats, in the format:
7292
// "<setting1>:<compressed1>/<uncompressed1>,<setting2>:<compressed2>/<uncompressed2>,..."
93+
//
94+
// The settings are ordered alphabetically.
7395
func (c CompressionStats) String() string {
96+
n := len(c.others)
97+
if c.noCompressionBytes != 0 {
98+
n++
99+
}
100+
if c.fastest.UncompressedBytes != 0 {
101+
n++
102+
}
103+
104+
type entry struct {
105+
s compression.Setting
106+
cs CompressionStatsForSetting
107+
}
108+
entries := make([]entry, 0, n)
109+
for s, cs := range c.All() {
110+
entries = append(entries, entry{s, cs})
111+
}
112+
slices.SortFunc(entries, func(x, y entry) int {
113+
if x.s.Algorithm != y.s.Algorithm {
114+
return cmp.Compare(x.s.Algorithm.String(), y.s.Algorithm.String())
115+
}
116+
return cmp.Compare(x.s.Level, y.s.Level)
117+
})
118+
74119
var buf strings.Builder
75-
buf.Grow(c.n * 64)
76-
for i := 0; i < c.n; i++ {
77-
if i > 0 {
120+
buf.Grow(n * 64)
121+
for _, e := range entries {
122+
if buf.Len() > 0 {
78123
buf.WriteString(",")
79124
}
80-
fmt.Fprintf(&buf, "%s:%d/%d", c.buf[i].Setting.String(), c.buf[i].CompressedBytes, c.buf[i].UncompressedBytes)
125+
fmt.Fprintf(&buf, "%s:%d/%d", e.s.String(), e.cs.CompressedBytes, e.cs.UncompressedBytes)
81126
}
82127
return buf.String()
83128
}

sstable/block/compressor.go

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,18 +89,24 @@ func (c *Compressor) Compress(dst, src []byte, kind Kind) (CompressionIndicator,
8989
// before
9090
if setting.Algorithm != compression.NoCompression &&
9191
int64(len(out))*100 > int64(len(src))*int64(100-c.minReductionPercent) {
92-
c.stats.add(compression.None, uint64(len(src)), uint64(len(src)))
93-
return NoCompressionIndicator, append(out[:0], src...)
92+
setting.Algorithm = compression.NoCompression
93+
out = append(out[:0], src...)
9494
}
95-
c.stats.add(setting, uint64(len(src)), uint64(len(out)))
95+
c.stats.add(setting, CompressionStatsForSetting{
96+
UncompressedBytes: uint64(len(src)),
97+
CompressedBytes: uint64(len(out)),
98+
})
9699
return compressionIndicatorFromAlgorithm(setting.Algorithm), out
97100
}
98101

99102
// UncompressedBlock informs the compressor that a block of the given size and
100103
// kind was written uncompressed. This is used so that the final statistics are
101104
// complete.
102105
func (c *Compressor) UncompressedBlock(size int, kind Kind) {
103-
c.stats.add(compression.None, uint64(size), uint64(size))
106+
c.stats.add(compression.None, CompressionStatsForSetting{
107+
UncompressedBytes: uint64(size),
108+
CompressedBytes: uint64(size),
109+
})
104110
}
105111

106112
// Stats returns the compression stats. The result can only be used until the

sstable/testdata/writer_value_blocks

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -670,7 +670,7 @@ sstable
670670
│ ├── 00630 rocksdb.top-level.index.size (24)
671671
│ ├── restart points
672672
│ │ └── 00654 [restart 0]
673-
│ └── trailer [compression=none checksum=0x3508516e]
673+
│ └── trailer [compression=none checksum=0x88688345]
674674
├── meta-index offset: 1360 length: 64
675675
│ ├── 0000 pebble.value_index block:680/8 value-blocks-index-lengths: 1(num), 2(offset), 1(length) [restart]
676676
│ ├── 0027 rocksdb.properties block:693/662 [restart]
@@ -1247,7 +1247,7 @@ sstable
12471247
│ ├── 00698 rocksdb.property.collectors (41)
12481248
│ ├── 00739 rocksdb.raw.key.size (21)
12491249
│ ├── 00760 rocksdb.raw.value.size (23)
1250-
│ └── trailer [compression=snappy checksum=0xf941a001]
1250+
│ └── trailer [compression=snappy checksum=0x4ed1f050]
12511251
├── meta-index offset: 908 length: 72
12521252
│ ├── 0000 pebble.value_index block:257/3 value-blocks-index-lengths: 1(num), 1(offset), 1(length)
12531253
│ │

0 commit comments

Comments
 (0)