cockroachdb
diff --git a/‎sstable/compressionanalyzer/block_analyzer.go‎
Lines changed: 4 additions & 4 deletions b/‎sstable/compressionanalyzer/block_analyzer.go‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎sstable/compressionanalyzer/buckets.go‎
Lines changed: 31 additions & 21 deletions b/‎sstable/compressionanalyzer/buckets.go‎
Lines changed: 31 additions & 21 deletions
diff --git a/‎sstable/compressionanalyzer/buckets_test.go‎
Lines changed: 4 additions & 3 deletions b/‎sstable/compressionanalyzer/buckets_test.go‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎sstable/compressionanalyzer/file_analyzer_test.go‎
Lines changed: 3 additions & 3 deletions b/‎sstable/compressionanalyzer/file_analyzer_test.go‎
Lines changed: 3 additions & 3 deletions
@@ -88,10 +88,10 @@ func (a *BlockAnalyzer) runExperiment(
 	}
 	decompressionTime := t2.Elapsed()
 
-	// CPU times are in microseconds.
-	pa.CompressionTime.Add(compressionTime.Seconds() * 1e6)
-	pa.DecompressionTime.Add(decompressionTime.Seconds() * 1e6)
-	pa.CompressionRatio.Add(float64(len(block)) / float64(len(compressed)))
+	// CPU times are in nanoseconds / byte.
+	pa.CompressionTime.Add(float64(compressionTime)/float64(len(block)), uint64(len(block)))
+	pa.DecompressionTime.Add(float64(decompressionTime)/float64(len(block)), uint64(len(block)))
+	pa.CompressionRatio.Add(float64(len(block))/float64(len(compressed)), uint64(len(block)))
 }
 
 func ensureLen(b []byte, n int) []byte {
 
@@ -9,6 +9,7 @@ import (
 	"math"
 	"strings"
 	"text/tabwriter"
+	"time"
 
 	"github.com/cockroachdb/pebble/internal/compression"
 )
@@ -145,10 +146,10 @@ type Bucket struct {
 // PerSetting holds statistics from experiments on blocks in a bucket with a
 // specific compression.Setting.
 type PerSetting struct {
-	CompressionRatio Welford
-	// CPU times are in microseconds.
-	CompressionTime   Welford
-	DecompressionTime Welford
+	CompressionRatio WeightedWelford
+	// CPU times are in nanoseconds per byte.
+	CompressionTime   WeightedWelford
+	DecompressionTime WeightedWelford
 }
 
 func (b *Buckets) String(minSamples int) string {
@@ -167,19 +168,22 @@ func (b *Buckets) String(minSamples int) string {
 				if bucket.UncompressedSize.Count() < int64(minSamples) {
 					continue
 				}
-				fmt.Fprintf(tw, "%s\t%s\t%s\t%d\t%s\tCR", k, sz, c, bucket.UncompressedSize.Count(), withStdDev(bucket.UncompressedSize, "KB", 1.0/1024))
+				fmt.Fprintf(tw, "%s\t%s\t%s\t%d\t%.1fKB %s\tCR", k, sz, c, bucket.UncompressedSize.Count(), bucket.UncompressedSize.Mean()/1024, stdDevStr(bucket.UncompressedSize.Mean(), bucket.UncompressedSize.SampleStandardDeviation()))
 				for _, e := range (*b)[k][sz][c].Experiments {
-					fmt.Fprintf(tw, "\t%s", withStdDev(e.CompressionRatio, "", 1.0))
+					mean, stdDev := e.CompressionRatio.Mean(), e.CompressionRatio.SampleStandardDeviation()
+					fmt.Fprintf(tw, "\t%.2f %s", mean, stdDevStr(mean, stdDev))
 				}
 				fmt.Fprintf(tw, "\n")
 				fmt.Fprintf(tw, "\t\t\t\t\tComp")
 				for _, e := range (*b)[k][sz][c].Experiments {
-					fmt.Fprintf(tw, "\t%s", withStdDev(e.CompressionTime, "us", 1.0))
+					mean, stdDev := e.CompressionTime.Mean(), e.CompressionTime.SampleStandardDeviation()
+					fmt.Fprintf(tw, "\t%.0fMBps %s", toMBPS(mean), stdDevStr(mean, stdDev))
 				}
 				fmt.Fprintf(tw, "\n")
 				fmt.Fprintf(tw, "\t\t\t\t\tDecomp")
 				for _, e := range (*b)[k][sz][c].Experiments {
-					fmt.Fprintf(tw, "\t%s", withStdDev(e.DecompressionTime, "us", 1.0))
+					mean, stdDev := e.DecompressionTime.Mean(), e.DecompressionTime.SampleStandardDeviation()
+					fmt.Fprintf(tw, "\t%.0fMBps %s", toMBPS(mean), stdDevStr(mean, stdDev))
 				}
 				fmt.Fprintf(tw, "\n")
 			}
@@ -189,16 +193,22 @@ func (b *Buckets) String(minSamples int) string {
 	return buf.String()
 }
 
-func withStdDev(w Welford, units string, scale float64) string {
-	mean := w.Mean() * scale
-	if math.IsNaN(mean) {
-		mean = 0
+func toMBPS(nsPerByte float64) float64 {
+	if nsPerByte == 0 {
+		return 0
 	}
-	stddev := 0
-	if s := w.SampleStandardDeviation(); !math.IsNaN(s) {
-		stddev = int(100 * s / w.Mean())
+	const oneMB = 1 << 20
+	return float64(time.Second) / (nsPerByte * oneMB)
+}
+
+// stdDevStr formats the standard deviation as a percentage of the mean,
+// for example "± 10%".
+func stdDevStr(mean, stddev float64) string {
+	percent := 0
+	if mean > 0 {
+		percent = int(math.Round(100 * stddev / mean))
 	}
-	return fmt.Sprintf("%.1f%s ± %d%%", mean, units, stddev)
+	return fmt.Sprintf("± %d%%", percent)
 }
 
 func (b *Buckets) ToCSV(minSamples int) string {
@@ -207,9 +217,9 @@ func (b *Buckets) ToCSV(minSamples int) string {
 	for _, s := range Settings {
 		fmt.Fprintf(&buf, ",%s CR", s.String())
 		fmt.Fprintf(&buf, ",%s CR±", s.String())
-		fmt.Fprintf(&buf, ",%s Comp us", s.String())
+		fmt.Fprintf(&buf, ",%s Comp ns/b", s.String())
 		fmt.Fprintf(&buf, ",%s Comp±", s.String())
-		fmt.Fprintf(&buf, ",%s Decomp us", s.String())
+		fmt.Fprintf(&buf, ",%s Decomp ns/b", s.String())
 		fmt.Fprintf(&buf, ",%s Decomp±", s.String())
 	}
 	fmt.Fprintf(&buf, "\n")
@@ -222,9 +232,9 @@ func (b *Buckets) ToCSV(minSamples int) string {
 				}
 				fmt.Fprintf(&buf, "%s,%s,%s,%d,%.0f,%.0f", k, sz, c, bucket.UncompressedSize.Count(), bucket.UncompressedSize.Mean(), bucket.UncompressedSize.SampleStandardDeviation())
 				for _, e := range (*b)[k][sz][c].Experiments {
-					fmt.Fprintf(&buf, ",%.1f,%.1f", e.CompressionRatio.Mean(), e.CompressionRatio.SampleStandardDeviation())
-					fmt.Fprintf(&buf, ",%.1f,%.1f", e.CompressionTime.Mean(), e.CompressionTime.SampleStandardDeviation())
-					fmt.Fprintf(&buf, ",%.1f,%.1f", e.DecompressionTime.Mean(), e.DecompressionTime.SampleStandardDeviation())
+					fmt.Fprintf(&buf, ",%.3f,%.3f", e.CompressionRatio.Mean(), e.CompressionRatio.SampleStandardDeviation())
+					fmt.Fprintf(&buf, ",%.3f,%.3f", e.CompressionTime.Mean(), e.CompressionTime.SampleStandardDeviation())
+					fmt.Fprintf(&buf, ",%.3f,%.3f", e.DecompressionTime.Mean(), e.DecompressionTime.SampleStandardDeviation())
 				}
 				fmt.Fprintf(&buf, "\n")
 			}
 
@@ -69,9 +69,10 @@ func exampleBuckets() Buckets {
 			b.UncompressedSize.Add(100 + float64(r.IntN(64*1024)))
 			for j := range b.Experiments {
 				e := &b.Experiments[j]
-				e.CompressionRatio.Add(float64(j+1) + 0.1*float64(r.IntN(10)))
-				e.CompressionTime.Add(float64((j+1)*10) + 0.1*float64(r.IntN(10)))
-				e.DecompressionTime.Add(float64((j+1)*100) + 0.1*float64(r.IntN(10)))
+				blockSize := uint64(50 + r.IntN(100))
+				e.CompressionRatio.Add(float64(j+1)+0.1*float64(r.IntN(10)), blockSize)
+				e.CompressionTime.Add(float64((j+1)*10)+0.1*float64(r.IntN(10)), blockSize)
+				e.DecompressionTime.Add(float64((j+1)*100)+0.1*float64(r.IntN(10)), blockSize)
 			}
 		}
 	}
 
@@ -36,10 +36,10 @@ func TestFileAnalyzer(t *testing.T) {
 							// Snappy always has the same output in all configurations and on
 							// all platforms.
 							if Settings[l].Algorithm != compression.SnappyAlgorithm {
-								bucket.Experiments[l].CompressionRatio = Welford{}
+								bucket.Experiments[l].CompressionRatio = WeightedWelford{}
 							}
-							bucket.Experiments[l].CompressionTime = Welford{}
-							bucket.Experiments[l].DecompressionTime = Welford{}
+							bucket.Experiments[l].CompressionTime = WeightedWelford{}
+							bucket.Experiments[l].DecompressionTime = WeightedWelford{}
 						}
 					}
 				}
Original file line number	Diff line number	Diff line change
`@@ -88,10 +88,10 @@ func (a *BlockAnalyzer) runExperiment(`
`88`	`88`	`}`
`89`	`89`	`decompressionTime := t2.Elapsed()`
`90`	`90`
`91`		`- // CPU times are in microseconds.`
`92`		`- pa.CompressionTime.Add(compressionTime.Seconds() * 1e6)`
`93`		`- pa.DecompressionTime.Add(decompressionTime.Seconds() * 1e6)`
`94`		`- pa.CompressionRatio.Add(float64(len(block)) / float64(len(compressed)))`
	`91`	`+ // CPU times are in nanoseconds / byte.`
	`92`	`+ pa.CompressionTime.Add(float64(compressionTime)/float64(len(block)), uint64(len(block)))`
	`93`	`+ pa.DecompressionTime.Add(float64(decompressionTime)/float64(len(block)), uint64(len(block)))`
	`94`	`+ pa.CompressionRatio.Add(float64(len(block))/float64(len(compressed)), uint64(len(block)))`
`95`	`95`	`}`
`96`	`96`
`97`	`97`	`func ensureLen(b []byte, n int) []byte {`
Original file line number	Diff line number	Diff line change
`@@ -69,9 +69,10 @@ func exampleBuckets() Buckets {`
`69`	`69`	`b.UncompressedSize.Add(100 + float64(r.IntN(64*1024)))`
`70`	`70`	`for j := range b.Experiments {`
`71`	`71`	`e := &b.Experiments[j]`
`72`		`- e.CompressionRatio.Add(float64(j+1) + 0.1*float64(r.IntN(10)))`
`73`		`- e.CompressionTime.Add(float64((j+1)10) + 0.1float64(r.IntN(10)))`
`74`		`- e.DecompressionTime.Add(float64((j+1)100) + 0.1float64(r.IntN(10)))`
	`72`	`+ blockSize := uint64(50 + r.IntN(100))`
	`73`	`+ e.CompressionRatio.Add(float64(j+1)+0.1*float64(r.IntN(10)), blockSize)`
	`74`	`+ e.CompressionTime.Add(float64((j+1)10)+0.1float64(r.IntN(10)), blockSize)`
	`75`	`+ e.DecompressionTime.Add(float64((j+1)100)+0.1float64(r.IntN(10)), blockSize)`
`75`	`76`	`}`
`76`	`77`	`}`
`77`	`78`	`}`
Original file line number	Diff line number	Diff line change
`@@ -36,10 +36,10 @@ func TestFileAnalyzer(t *testing.T) {`
`36`	`36`	`// Snappy always has the same output in all configurations and on`
`37`	`37`	`// all platforms.`
`38`	`38`	`if Settings[l].Algorithm != compression.SnappyAlgorithm {`
`39`		`- bucket.Experiments[l].CompressionRatio = Welford{}`
	`39`	`+ bucket.Experiments[l].CompressionRatio = WeightedWelford{}`
`40`	`40`	`}`
`41`		`- bucket.Experiments[l].CompressionTime = Welford{}`
`42`		`- bucket.Experiments[l].DecompressionTime = Welford{}`
	`41`	`+ bucket.Experiments[l].CompressionTime = WeightedWelford{}`
	`42`	`+ bucket.Experiments[l].DecompressionTime = WeightedWelford{}`
`43`	`43`	`}`
`44`	`44`	`}`
`45`	`45`	`}`