5
5
package block
6
6
7
7
import (
8
+ "cmp"
8
9
"fmt"
9
10
"iter"
11
+ "slices"
10
12
"strings"
11
13
12
14
"github.com/cockroachdb/pebble/internal/compression"
15
+ "github.com/cockroachdb/pebble/internal/invariants"
13
16
)
14
17
15
- // CompressionStats collects compression statistics for a single file - the
16
- // total compressed and uncompressed sizes for each distinct compression.Setting
17
- // used.
18
+ // CompressionStats collects compression statistics (either for a single file or
19
+ // for a collection of files).
20
+ //
21
+ // Compression statistics consist of the total compressed and uncompressed sizes for
22
+ // each distinct compression.Setting used.
18
23
type CompressionStats struct {
19
- n int
20
- // Compression profiles have three settings (data, value, other) and
21
- // NoCompression can also be used for data that didn't compress.
22
- buf [4 ]CompressionStatsForSetting
24
+ // We inline common values to avoid allocating the map in most cases.
25
+
26
+ // Total number of bytes that are not compressed.
27
+ noCompressionBytes uint64
28
+ // Compression stats for fastestCompression.
29
+ fastest CompressionStatsForSetting
30
+
31
+ others map [compression.Setting ]CompressionStatsForSetting
23
32
}
24
33
25
34
type CompressionStatsForSetting struct {
26
- Setting compression.Setting
27
- UncompressedBytes uint64
28
35
CompressedBytes uint64
36
+ UncompressedBytes uint64
37
+ }
38
+
39
+ func (cs * CompressionStatsForSetting ) Add (other CompressionStatsForSetting ) {
40
+ cs .CompressedBytes += other .CompressedBytes
41
+ cs .UncompressedBytes += other .UncompressedBytes
29
42
}
30
43
31
44
// add updates the stats to reflect a block that was compressed with the given setting.
32
- func (c * CompressionStats ) add (
33
- setting compression.Setting , sizeUncompressed , sizeCompressed uint64 ,
34
- ) {
35
- for i := 0 ; i < c .n ; i ++ {
36
- if c .buf [i ].Setting == setting {
37
- c .buf [i ].UncompressedBytes += sizeUncompressed
38
- c .buf [i ].CompressedBytes += sizeCompressed
39
- return
45
+ func (c * CompressionStats ) add (setting compression.Setting , stats CompressionStatsForSetting ) {
46
+ switch setting {
47
+ case compression .None :
48
+ c .noCompressionBytes += stats .UncompressedBytes
49
+ if invariants .Enabled && stats .UncompressedBytes != stats .CompressedBytes {
50
+ panic ("invalid stats for no-compression" )
40
51
}
52
+ case fastestCompression :
53
+ c .fastest .Add (stats )
54
+ default :
55
+ if c .others == nil {
56
+ c .others = make (map [compression.Setting ]CompressionStatsForSetting , 2 )
57
+ }
58
+ prev := c .others [setting ]
59
+ prev .Add (stats )
60
+ c .others [setting ] = prev
41
61
}
42
- if c .n >= len (c .buf )- 1 {
43
- panic ("too many compression settings" )
44
- }
45
- c .buf [c .n ] = CompressionStatsForSetting {
46
- Setting : setting ,
47
- UncompressedBytes : sizeUncompressed ,
48
- CompressedBytes : sizeCompressed ,
49
- }
50
- c .n ++
51
62
}
52
63
53
64
// MergeWith updates the receiver stats to include the other stats.
54
65
func (c * CompressionStats ) MergeWith (other * CompressionStats ) {
55
- for i := 0 ; i < other .n ; i ++ {
56
- c .add (other . buf [ i ]. Setting , other . buf [ i ]. UncompressedBytes , other . buf [ i ]. CompressedBytes )
66
+ for s , cs := range other .All () {
67
+ c .add (s , cs )
57
68
}
58
69
}
59
70
60
71
// All returns an iterator over the collected stats, in arbitrary order.
61
- func (c CompressionStats ) All () iter.Seq [CompressionStatsForSetting ] {
62
- return func (yield func (cs CompressionStatsForSetting ) bool ) {
63
- for i := 0 ; i < c .n ; i ++ {
64
- if ! yield (c .buf [i ]) {
72
+ func (c * CompressionStats ) All () iter.Seq2 [compression.Setting , CompressionStatsForSetting ] {
73
+ return func (yield func (s compression.Setting , cs CompressionStatsForSetting ) bool ) {
74
+ if c .noCompressionBytes != 0 && ! yield (compression .None , CompressionStatsForSetting {
75
+ UncompressedBytes : c .noCompressionBytes ,
76
+ CompressedBytes : c .noCompressionBytes ,
77
+ }) {
78
+ return
79
+ }
80
+ if c .fastest .UncompressedBytes != 0 && ! yield (fastestCompression , c .fastest ) {
81
+ return
82
+ }
83
+ for s , cs := range c .others {
84
+ if ! yield (s , cs ) {
65
85
return
66
86
}
67
87
}
@@ -70,14 +90,39 @@ func (c CompressionStats) All() iter.Seq[CompressionStatsForSetting] {
70
90
71
91
// String returns a string representation of the stats, in the format:
72
92
// "<setting1>:<compressed1>/<uncompressed1>,<setting2>:<compressed2>/<uncompressed2>,..."
93
+ //
94
+ // The settings are ordered alphabetically.
73
95
func (c CompressionStats ) String () string {
96
+ n := len (c .others )
97
+ if c .noCompressionBytes != 0 {
98
+ n ++
99
+ }
100
+ if c .fastest .UncompressedBytes != 0 {
101
+ n ++
102
+ }
103
+
104
+ type entry struct {
105
+ s compression.Setting
106
+ cs CompressionStatsForSetting
107
+ }
108
+ entries := make ([]entry , 0 , n )
109
+ for s , cs := range c .All () {
110
+ entries = append (entries , entry {s , cs })
111
+ }
112
+ slices .SortFunc (entries , func (x , y entry ) int {
113
+ if x .s .Algorithm != y .s .Algorithm {
114
+ return cmp .Compare (x .s .Algorithm .String (), y .s .Algorithm .String ())
115
+ }
116
+ return cmp .Compare (x .s .Level , y .s .Level )
117
+ })
118
+
74
119
var buf strings.Builder
75
- buf .Grow (c . n * 64 )
76
- for i := 0 ; i < c . n ; i ++ {
77
- if i > 0 {
120
+ buf .Grow (n * 64 )
121
+ for _ , e := range entries {
122
+ if buf . Len () > 0 {
78
123
buf .WriteString ("," )
79
124
}
80
- fmt .Fprintf (& buf , "%s:%d/%d" , c . buf [ i ]. Setting . String (), c . buf [ i ] .CompressedBytes , c . buf [ i ] .UncompressedBytes )
125
+ fmt .Fprintf (& buf , "%s:%d/%d" , e . s . String (), e . cs .CompressedBytes , e . cs .UncompressedBytes )
81
126
}
82
127
return buf .String ()
83
128
}
0 commit comments