Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tool: add more summaries to manifest summarize #3599

Merged
merged 1 commit into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 101 additions & 27 deletions tool/manifest.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"slices"
"time"

"github.com/HdrHistogram/hdrhistogram-go"
"github.com/cockroachdb/pebble"
"github.com/cockroachdb/pebble/internal/base"
"github.com/cockroachdb/pebble/internal/humanize"
Expand Down Expand Up @@ -309,7 +310,12 @@ func (m *manifestT) runSummarizeOne(stdout io.Writer, arg string) error {
type summaryBucket struct {
bytesAdded [manifest.NumLevels]uint64
bytesCompactOut [manifest.NumLevels]uint64
bytesCompactIn [manifest.NumLevels]uint64
filesCompactIn [manifest.NumLevels]uint64
fileLifetimeSec [manifest.NumLevels]*hdrhistogram.Histogram
}
// 365 days. Arbitrary.
const maxLifetimeSec = 365 * 24 * 60 * 60
var (
bve manifest.BulkVersionEdit
newestOverall time.Time
Expand All @@ -319,6 +325,7 @@ func (m *manifestT) runSummarizeOne(stdout io.Writer, arg string) error {
)
bve.AddedByFileNum = make(map[base.FileNum]*manifest.FileMetadata)
rr := record.NewReader(f, 0 /* logNum */)
numHistErrors := 0
for i := 0; ; i++ {
r, err := rr.Next()
if err == io.EOF {
Expand All @@ -336,9 +343,27 @@ func (m *manifestT) runSummarizeOne(stdout io.Writer, arg string) error {
return err
}

veNewest, veOldest := newestOverall, newestOverall
// !isLikelyCompaction corresponds to flushes or ingests, that will be
// counted in bytesAdded. This is imperfect since ingests that excise can
// have deleted files without creating backing tables, and be counted as
// compactions. Also, copy compactions have no deleted files and create
// backing tables, so will be counted as flush/ingest.
//
// The bytesAdded metric overcounts since existing files virtualized by an
// ingest are also included.
//
// TODO(sumeer): this summarization needs a rewrite. We could do that
// after adding an enum to the VersionEdit to aid the summarization.
isLikelyCompaction := len(ve.NewFiles) > 0 && len(ve.DeletedFiles) > 0 && len(ve.CreatedBackingTables) == 0
isIntraL0Compaction := isLikelyCompaction && ve.NewFiles[0].Level == 0
veNewest := newestOverall
for _, nf := range ve.NewFiles {
_, seen := metadatas[nf.Meta.FileNum]
if seen && !isLikelyCompaction {
// Output error and continue processing as usual.
fmt.Fprintf(stdout, "error: flush/ingest has file that is already known %d size %s\n",
nf.Meta.FileNum, humanize.Bytes.Uint64(nf.Meta.Size))
}
metadatas[nf.Meta.FileNum] = nf.Meta
if nf.Meta.CreationTime == 0 {
continue
Expand All @@ -348,12 +373,6 @@ func (m *manifestT) runSummarizeOne(stdout io.Writer, arg string) error {
if veNewest.Before(t) {
veNewest = t
}
// Only update the oldest if we haven't already seen this
// file; it might've been moved in which case the sstable's
// creation time is from when it was originally created.
if veOldest.After(t) && !seen {
veOldest = t
}
}
// Ratchet up the most recent timestamp we've seen.
if newestOverall.Before(veNewest) {
Expand All @@ -377,26 +396,36 @@ func (m *manifestT) runSummarizeOne(stdout io.Writer, arg string) error {
buckets[bucketKey] = b
}

// Increase `bytesAdded` for any version edits that only add files.
// These are either flushes or ingests.
if len(ve.NewFiles) > 0 && len(ve.DeletedFiles) == 0 {
for _, nf := range ve.NewFiles {
for _, nf := range ve.NewFiles {
if !isLikelyCompaction {
b.bytesAdded[nf.Level] += nf.Meta.Size
} else if !isIntraL0Compaction {
b.bytesCompactIn[nf.Level] += nf.Meta.Size
b.filesCompactIn[nf.Level]++
}
continue
}

// Increase `bytesCompactOut` for the input level of any compactions
// that remove bytes from a level (excluding intra-L0 compactions).
// compactions.
destLevel := -1
if len(ve.NewFiles) > 0 {
destLevel = ve.NewFiles[0].Level
}
for dfe := range ve.DeletedFiles {
if dfe.Level != destLevel {
// Increase `bytesCompactOut` for the input level of any compactions
// that remove bytes from a level (excluding intra-L0 compactions).
if isLikelyCompaction && !isIntraL0Compaction && dfe.Level != manifest.NumLevels-1 {
b.bytesCompactOut[dfe.Level] += metadatas[dfe.FileNum].Size
}
meta, ok := metadatas[dfe.FileNum]
if m.verbose && ok && meta.CreationTime > 0 {
hist := b.fileLifetimeSec[dfe.Level]
if hist == nil {
hist = hdrhistogram.New(0, maxLifetimeSec, 1)
b.fileLifetimeSec[dfe.Level] = hist
}
lifetimeSec := int64((newestOverall.Sub(time.Unix(meta.CreationTime, 0).UTC())) / time.Second)
if lifetimeSec > maxLifetimeSec {
lifetimeSec = maxLifetimeSec
}
if err := hist.RecordValue(lifetimeSec); err != nil {
numHistErrors++
}
}
}
}

Expand All @@ -406,7 +435,7 @@ func (m *manifestT) runSummarizeOne(stdout io.Writer, arg string) error {
}
return humanize.Bytes.Uint64(v).String()
}
formatRate := func(v uint64, dur time.Duration) string {
formatByteRate := func(v uint64, dur time.Duration) string {
if v == 0 {
return "."
}
Expand All @@ -416,6 +445,16 @@ func (m *manifestT) runSummarizeOne(stdout io.Writer, arg string) error {
}
return humanize.Bytes.Uint64(uint64(float64(v)/secs)).String() + "/s"
}
formatRate := func(v uint64, dur time.Duration) string {
if v == 0 {
return "."
}
secs := dur.Seconds()
if secs == 0 {
secs = 1
}
return fmt.Sprintf("%.1f/s", float64(v)/secs)
}

if newestOverall.IsZero() {
fmt.Fprintf(stdout, "(no timestamps)\n")
Expand All @@ -432,7 +471,7 @@ func (m *manifestT) runSummarizeOne(stdout io.Writer, arg string) error {
}

if bi%10 == 0 {
fmt.Fprintf(stdout, " ")
fmt.Fprintf(stdout, " ")
fmt.Fprintf(stdout, "_______L0_______L1_______L2_______L3_______L4_______L5_______L6_____TOTAL\n")
}
fmt.Fprintf(stdout, "%s\n", bt.Format(time.RFC3339))
Expand All @@ -450,17 +489,19 @@ func (m *manifestT) runSummarizeOne(stdout io.Writer, arg string) error {
format func(uint64, time.Duration) string
vals [manifest.NumLevels]uint64
}{
{"Ingest+Flush", formatUint64, bucket.bytesAdded},
{"Ingest+Flush", formatRate, bucket.bytesAdded},
{"Compact (out)", formatUint64, bucket.bytesCompactOut},
{"Compact (out)", formatRate, bucket.bytesCompactOut},
{"Ingest+Flush Bytes", formatUint64, bucket.bytesAdded},
{"Ingest+Flush Bytes/s", formatByteRate, bucket.bytesAdded},
{"Compact Out Bytes", formatUint64, bucket.bytesCompactOut},
{"Compact Out Bytes/s", formatByteRate, bucket.bytesCompactOut},
{"Compact In Bytes/s", formatByteRate, bucket.bytesCompactIn},
{"Compact In Files/s", formatRate, bucket.filesCompactIn},
}
for _, stat := range stats {
var sum uint64
for _, v := range stat.vals {
sum += v
}
fmt.Fprintf(stdout, "%20s %8s %8s %8s %8s %8s %8s %8s %8s\n",
fmt.Fprintf(stdout, "%23s %8s %8s %8s %8s %8s %8s %8s %8s\n",
stat.label,
stat.format(stat.vals[0], dur),
stat.format(stat.vals[1], dur),
Expand All @@ -473,13 +514,46 @@ func (m *manifestT) runSummarizeOne(stdout io.Writer, arg string) error {
}
}
fmt.Fprintf(stdout, "%s\n", newestOverall.Format(time.RFC3339))

formatSec := func(sec int64) string {
return (time.Second * time.Duration(sec)).String()
}
if m.verbose {
fmt.Fprintf(stdout, "\nLifetime histograms\n")
for bi, bt := 0, oldestOverall; !bt.After(newestOverall); bi, bt = bi+1, bt.Truncate(m.summarizeDur).Add(m.summarizeDur) {
// Truncate the start time to calculate the bucket key, and
// retrieve the appropriate bucket.
bk := bt.Truncate(m.summarizeDur)
var bucket summaryBucket
if buckets[bk] != nil {
bucket = *buckets[bk]
}
fmt.Fprintf(stdout, "%s\n", bt.Format(time.RFC3339))
formatHist := func(level int, hist *hdrhistogram.Histogram) {
if hist == nil {
return
}
fmt.Fprintf(stdout, " L%d: mean: %s p25: %s p50: %s p75: %s p90: %s\n", level,
formatSec(int64(hist.Mean())), formatSec(hist.ValueAtPercentile(25)),
formatSec(hist.ValueAtPercentile(50)), formatSec(hist.ValueAtPercentile(75)),
formatSec(hist.ValueAtPercentile(90)))
}
for i := range bucket.fileLifetimeSec {
formatHist(i, bucket.fileLifetimeSec[i])
}
}
fmt.Fprintf(stdout, "%s\n", newestOverall.Format(time.RFC3339))
}
}

dur := newestOverall.Sub(oldestOverall)
fmt.Fprintf(stdout, "---\n")
fmt.Fprintf(stdout, "Estimated start time: %s\n", oldestOverall.Format(time.RFC3339))
fmt.Fprintf(stdout, "Estimated end time: %s\n", newestOverall.Format(time.RFC3339))
fmt.Fprintf(stdout, "Estimated duration: %s\n", dur.String())
if numHistErrors > 0 {
fmt.Fprintf(stdout, "Errors in lifetime histograms: %d\n", numHistErrors)
}

return nil
}
Expand Down
24 changes: 14 additions & 10 deletions tool/testdata/manifest_summarize
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@ manifest summarize
./testdata/find-db/MANIFEST-000001
----
MANIFEST-000001
_______L0_______L1_______L2_______L3_______L4_______L5_______L6_____TOTAL
_______L0_______L1_______L2_______L3_______L4_______L5_______L6_____TOTAL
2023-12-12T18:55:00Z
Ingest+Flush 2.0KB . . . . . 671B 2.7KB
Ingest+Flush 2.0KB/s . . . . . 671B/s 2.7KB/s
Compact (out) 2.0KB . . . . . . 2.0KB
Compact (out) 2.0KB/s . . . . . . 2.0KB/s
Ingest+Flush Bytes 2.0KB . . . . . 671B 2.7KB
Ingest+Flush Bytes/s 2.0KB/s . . . . . 671B/s 2.7KB/s
Compact Out Bytes 2.0KB . . . . . . 2.0KB
Compact Out Bytes/s 2.0KB/s . . . . . . 2.0KB/s
Compact In Bytes/s . . . . . . 2.2KB/s 2.2KB/s
Compact In Files/s . . . . . . 3.0/s 3.0/s
2023-12-12T18:55:00Z
---
Estimated start time: 2023-12-12T18:55:00Z
Expand All @@ -32,12 +34,14 @@ manifest summarize
./testdata/mixed/MANIFEST-000001
----
MANIFEST-000001
_______L0_______L1_______L2_______L3_______L4_______L5_______L6_____TOTAL
_______L0_______L1_______L2_______L3_______L4_______L5_______L6_____TOTAL
2023-12-11T18:59:04Z
Ingest+Flush 1.0KB . . . . . . 1.0KB
Ingest+Flush 1.0KB/s . . . . . . 1.0KB/s
Compact (out) . . . . . . . .
Compact (out) . . . . . . . .
Ingest+Flush Bytes 1.0KB . . . . . . 1.0KB
Ingest+Flush Bytes/s 1.0KB/s . . . . . . 1.0KB/s
Compact Out Bytes . . . . . . . .
Compact Out Bytes/s . . . . . . . .
Compact In Bytes/s . . . . . . . .
Compact In Files/s . . . . . . . .
2023-12-11T18:59:04Z
---
Estimated start time: 2023-12-11T18:59:04Z
Expand Down
Loading