Skip to content

Commit dc93da6

Browse files
committed
compressionanalyzer: sample per byte instead of per block
We record compression ratio and block compression/decompression time per block. This is ok when blocks are about the same size but are less useful when there is larger variance (as in >128KB buckets). We want the compression ratio to be the ratio between the total decompressed size and the total compressed size, not the average compression ratio among blocks. Similarly, we want to measure performance in terms of CPU time per byte. We switch to using frequency-weighted sampling (where frequency is the size of the block) and we now show compression/decompression performance in MB/s. Note that the standard deviation for the latter is derived from the CPU time per second metric, it doesn't apply directly to the MB/s figure.
1 parent 60a9df1 commit dc93da6

File tree

8 files changed

+209
-118
lines changed

8 files changed

+209
-118
lines changed

sstable/compressionanalyzer/block_analyzer.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,10 @@ func (a *BlockAnalyzer) runExperiment(
8888
}
8989
decompressionTime := t2.Elapsed()
9090

91-
// CPU times are in microseconds.
92-
pa.CompressionTime.Add(compressionTime.Seconds() * 1e6)
93-
pa.DecompressionTime.Add(decompressionTime.Seconds() * 1e6)
94-
pa.CompressionRatio.Add(float64(len(block)) / float64(len(compressed)))
91+
// CPU times are in nanoseconds / byte.
92+
pa.CompressionTime.Add(float64(compressionTime)/float64(len(block)), uint64(len(block)))
93+
pa.DecompressionTime.Add(float64(decompressionTime)/float64(len(block)), uint64(len(block)))
94+
pa.CompressionRatio.Add(float64(len(block))/float64(len(compressed)), uint64(len(block)))
9595
}
9696

9797
func ensureLen(b []byte, n int) []byte {

sstable/compressionanalyzer/buckets.go

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"math"
1010
"strings"
1111
"text/tabwriter"
12+
"time"
1213

1314
"github.com/cockroachdb/pebble/internal/compression"
1415
)
@@ -145,10 +146,10 @@ type Bucket struct {
145146
// PerSetting holds statistics from experiments on blocks in a bucket with a
146147
// specific compression.Setting.
147148
type PerSetting struct {
148-
CompressionRatio Welford
149-
// CPU times are in microseconds.
150-
CompressionTime Welford
151-
DecompressionTime Welford
149+
CompressionRatio WeightedWelford
150+
// CPU times are in nanoseconds per byte.
151+
CompressionTime WeightedWelford
152+
DecompressionTime WeightedWelford
152153
}
153154

154155
func (b *Buckets) String(minSamples int) string {
@@ -167,19 +168,22 @@ func (b *Buckets) String(minSamples int) string {
167168
if bucket.UncompressedSize.Count() < int64(minSamples) {
168169
continue
169170
}
170-
fmt.Fprintf(tw, "%s\t%s\t%s\t%d\t%s\tCR", k, sz, c, bucket.UncompressedSize.Count(), withStdDev(bucket.UncompressedSize, "KB", 1.0/1024))
171+
fmt.Fprintf(tw, "%s\t%s\t%s\t%d\t%.1fKB %s\tCR", k, sz, c, bucket.UncompressedSize.Count(), bucket.UncompressedSize.Mean()/1024, stdDevStr(bucket.UncompressedSize.Mean(), bucket.UncompressedSize.SampleStandardDeviation()))
171172
for _, e := range (*b)[k][sz][c].Experiments {
172-
fmt.Fprintf(tw, "\t%s", withStdDev(e.CompressionRatio, "", 1.0))
173+
mean, stdDev := e.CompressionRatio.Mean(), e.CompressionRatio.SampleStandardDeviation()
174+
fmt.Fprintf(tw, "\t%.2f %s", mean, stdDevStr(mean, stdDev))
173175
}
174176
fmt.Fprintf(tw, "\n")
175177
fmt.Fprintf(tw, "\t\t\t\t\tComp")
176178
for _, e := range (*b)[k][sz][c].Experiments {
177-
fmt.Fprintf(tw, "\t%s", withStdDev(e.CompressionTime, "us", 1.0))
179+
mean, stdDev := e.CompressionTime.Mean(), e.CompressionTime.SampleStandardDeviation()
180+
fmt.Fprintf(tw, "\t%.0fMBps %s", toMBPS(mean), stdDevStr(mean, stdDev))
178181
}
179182
fmt.Fprintf(tw, "\n")
180183
fmt.Fprintf(tw, "\t\t\t\t\tDecomp")
181184
for _, e := range (*b)[k][sz][c].Experiments {
182-
fmt.Fprintf(tw, "\t%s", withStdDev(e.DecompressionTime, "us", 1.0))
185+
mean, stdDev := e.DecompressionTime.Mean(), e.DecompressionTime.SampleStandardDeviation()
186+
fmt.Fprintf(tw, "\t%.0fMBps %s", toMBPS(mean), stdDevStr(mean, stdDev))
183187
}
184188
fmt.Fprintf(tw, "\n")
185189
}
@@ -189,16 +193,22 @@ func (b *Buckets) String(minSamples int) string {
189193
return buf.String()
190194
}
191195

192-
func withStdDev(w Welford, units string, scale float64) string {
193-
mean := w.Mean() * scale
194-
if math.IsNaN(mean) {
195-
mean = 0
196+
func toMBPS(nsPerByte float64) float64 {
197+
if nsPerByte == 0 {
198+
return 0
196199
}
197-
stddev := 0
198-
if s := w.SampleStandardDeviation(); !math.IsNaN(s) {
199-
stddev = int(100 * s / w.Mean())
200+
const oneMB = 1 << 20
201+
return float64(time.Second) / (nsPerByte * oneMB)
202+
}
203+
204+
// stdDevStr formats the standard deviation as a percentage of the mean,
205+
// for example "± 10%".
206+
func stdDevStr(mean, stddev float64) string {
207+
percent := 0
208+
if mean > 0 {
209+
percent = int(math.Round(100 * stddev / mean))
200210
}
201-
return fmt.Sprintf("%.1f%s ± %d%%", mean, units, stddev)
211+
return fmt.Sprintf("± %d%%", percent)
202212
}
203213

204214
func (b *Buckets) ToCSV(minSamples int) string {
@@ -207,9 +217,9 @@ func (b *Buckets) ToCSV(minSamples int) string {
207217
for _, s := range Settings {
208218
fmt.Fprintf(&buf, ",%s CR", s.String())
209219
fmt.Fprintf(&buf, ",%s CR±", s.String())
210-
fmt.Fprintf(&buf, ",%s Comp us", s.String())
220+
fmt.Fprintf(&buf, ",%s Comp ns/b", s.String())
211221
fmt.Fprintf(&buf, ",%s Comp±", s.String())
212-
fmt.Fprintf(&buf, ",%s Decomp us", s.String())
222+
fmt.Fprintf(&buf, ",%s Decomp ns/b", s.String())
213223
fmt.Fprintf(&buf, ",%s Decomp±", s.String())
214224
}
215225
fmt.Fprintf(&buf, "\n")
@@ -222,9 +232,9 @@ func (b *Buckets) ToCSV(minSamples int) string {
222232
}
223233
fmt.Fprintf(&buf, "%s,%s,%s,%d,%.0f,%.0f", k, sz, c, bucket.UncompressedSize.Count(), bucket.UncompressedSize.Mean(), bucket.UncompressedSize.SampleStandardDeviation())
224234
for _, e := range (*b)[k][sz][c].Experiments {
225-
fmt.Fprintf(&buf, ",%.1f,%.1f", e.CompressionRatio.Mean(), e.CompressionRatio.SampleStandardDeviation())
226-
fmt.Fprintf(&buf, ",%.1f,%.1f", e.CompressionTime.Mean(), e.CompressionTime.SampleStandardDeviation())
227-
fmt.Fprintf(&buf, ",%.1f,%.1f", e.DecompressionTime.Mean(), e.DecompressionTime.SampleStandardDeviation())
235+
fmt.Fprintf(&buf, ",%.3f,%.3f", e.CompressionRatio.Mean(), e.CompressionRatio.SampleStandardDeviation())
236+
fmt.Fprintf(&buf, ",%.3f,%.3f", e.CompressionTime.Mean(), e.CompressionTime.SampleStandardDeviation())
237+
fmt.Fprintf(&buf, ",%.3f,%.3f", e.DecompressionTime.Mean(), e.DecompressionTime.SampleStandardDeviation())
228238
}
229239
fmt.Fprintf(&buf, "\n")
230240
}

sstable/compressionanalyzer/buckets_test.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,10 @@ func exampleBuckets() Buckets {
6969
b.UncompressedSize.Add(100 + float64(r.IntN(64*1024)))
7070
for j := range b.Experiments {
7171
e := &b.Experiments[j]
72-
e.CompressionRatio.Add(float64(j+1) + 0.1*float64(r.IntN(10)))
73-
e.CompressionTime.Add(float64((j+1)*10) + 0.1*float64(r.IntN(10)))
74-
e.DecompressionTime.Add(float64((j+1)*100) + 0.1*float64(r.IntN(10)))
72+
blockSize := uint64(50 + r.IntN(100))
73+
e.CompressionRatio.Add(float64(j+1)+0.1*float64(r.IntN(10)), blockSize)
74+
e.CompressionTime.Add(float64((j+1)*10)+0.1*float64(r.IntN(10)), blockSize)
75+
e.DecompressionTime.Add(float64((j+1)*100)+0.1*float64(r.IntN(10)), blockSize)
7576
}
7677
}
7778
}

sstable/compressionanalyzer/file_analyzer_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ func TestFileAnalyzer(t *testing.T) {
3636
// Snappy always has the same output in all configurations and on
3737
// all platforms.
3838
if Settings[l].Algorithm != compression.SnappyAlgorithm {
39-
bucket.Experiments[l].CompressionRatio = Welford{}
39+
bucket.Experiments[l].CompressionRatio = WeightedWelford{}
4040
}
41-
bucket.Experiments[l].CompressionTime = Welford{}
42-
bucket.Experiments[l].DecompressionTime = Welford{}
41+
bucket.Experiments[l].CompressionTime = WeightedWelford{}
42+
bucket.Experiments[l].DecompressionTime = WeightedWelford{}
4343
}
4444
}
4545
}

0 commit comments

Comments
 (0)