From e6fc6495ce4aaf704f16ecb104818544dc1dc3d3 Mon Sep 17 00:00:00 2001 From: Paulo Gomes Date: Thu, 27 Nov 2025 22:25:48 +0000 Subject: [PATCH 1/3] Add command verify-pack Signed-off-by: Paulo Gomes --- cmd/gogit/verify-pack.go | 312 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 312 insertions(+) create mode 100644 cmd/gogit/verify-pack.go diff --git a/cmd/gogit/verify-pack.go b/cmd/gogit/verify-pack.go new file mode 100644 index 0000000..18bba8c --- /dev/null +++ b/cmd/gogit/verify-pack.go @@ -0,0 +1,312 @@ +package main + +import ( + "crypto" + "fmt" + "io" + "log/slog" + "os" + "path/filepath" + "sort" + "strings" + + "github.com/go-git/go-billy/v6/osfs" + "github.com/go-git/go-git/v6/plumbing" + "github.com/go-git/go-git/v6/plumbing/format/idxfile" + "github.com/go-git/go-git/v6/plumbing/format/packfile" + "github.com/spf13/cobra" +) + +var verifyPackVerbose bool + +func init() { + verifyPackCmd.Flags().BoolVarP(&verifyPackVerbose, "verbose", "v", false, "Show detailed object information") + rootCmd.AddCommand(verifyPackCmd) +} + +var verifyPackCmd = &cobra.Command{ + Use: "verify-pack [-v] ", + Short: "Validate packed Git archive files", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + return verifyPack(args[0], verifyPackVerbose) + }, + DisableFlagsInUseLine: true, +} + +type objectInfo struct { + hash plumbing.Hash + typ plumbing.ObjectType + diskType plumbing.ObjectType + size int64 + packedSize int64 + offset int64 + depth int + base plumbing.Hash +} + +func verifyPack(path string, verbose bool) error { + idxPath := path + packPath := path + + if strings.HasSuffix(path, ".idx") { + packPath = strings.TrimSuffix(path, ".idx") + ".pack" + } else if strings.HasSuffix(path, ".pack") { + idxPath = strings.TrimSuffix(path, ".pack") + ".idx" + } else { + return fmt.Errorf("file must have .idx or .pack extension") + } + + idxFile, err := os.Open(idxPath) + if err != nil { + return fmt.Errorf("failed to open index file: %w", err) + } + defer func() { + err = idxFile.Close() + if err != nil { + slog.Debug("failed to close idx file", "error", err) + } + }() + + idx := idxfile.NewMemoryIndex(crypto.SHA1.Size()) + dec := idxfile.NewDecoder(idxFile) + if err := dec.Decode(idx); err != nil { + return fmt.Errorf("failed to decode index file: %w", err) + } + + fs := osfs.New(filepath.Dir(packPath)) + packFile, err := fs.Open(filepath.Base(packPath)) + if err != nil { + return fmt.Errorf("failed to open pack file: %w", err) + } + defer func() { + err = packFile.Close() + if err != nil { + slog.Debug("failed to close pack file", "error", err) + } + }() + + pf := packfile.NewPackfile( + packFile, + packfile.WithIdx(idx), + packfile.WithFs(fs), + ) + defer func() { + err = pf.Close() + if err != nil { + slog.Debug("failed to close Packfile object", "error", err) + } + }() + + scanner, err := pf.Scanner() //nolint:staticcheck + if err != nil { + return fmt.Errorf("failed to get scanner: %w", err) + } + + entries, err := idx.EntriesByOffset() + if err != nil { + return fmt.Errorf("failed to get entries: %w", err) + } + + var objects []objectInfo + + for { + entry, err := entries.Next() + if err == io.EOF { + break + } + if err != nil { + return fmt.Errorf("failed to read entry: %w", err) + } + + // Read raw object header to get delta information. + err = scanner.SeekFromStart(int64(entry.Offset)) + if err != nil { + return fmt.Errorf("failed to seek to offset %d: %w", entry.Offset, err) + } + + if !scanner.Scan() { + return fmt.Errorf("failed to scan object at offset %d", entry.Offset) + } + + header := scanner.Data().Value().(packfile.ObjectHeader) + + // For delta objects, Size is the delta size. + // For regular objects, Size is the inflated size. + info := objectInfo{ + hash: entry.Hash, + diskType: header.Type, + size: header.Size, + offset: int64(entry.Offset), + } + + // Calculate packed size (distance to next header or end of file). + if len(objects) > 0 { + objects[len(objects)-1].packedSize = info.offset - objects[len(objects)-1].offset + } + + objects = append(objects, info) + } + + // Calculate the packed size of the last object. + if len(objects) > 0 { + stat, err := packFile.Stat() + if err != nil { + return fmt.Errorf("failed to stat pack file: %w", err) + } + // Pack file ends with a 20-byte SHA-1 checksum. + objects[len(objects)-1].packedSize = stat.Size() - objects[len(objects)-1].offset - int64(crypto.SHA1.Size()) + } + + // Resolve actual types for all objects (after delta application). + for i := range objects { + obj, err := pf.GetByOffset(objects[i].offset) + if err != nil { + return fmt.Errorf("failed to get object at offset %d: %w", objects[i].offset, err) + } + objects[i].typ = obj.Type() + } + + // Build delta chain information. + deltaChains := make(map[plumbing.Hash]int) + objectByHash := make(map[plumbing.Hash]*objectInfo) + objectByOffset := make(map[int64]*objectInfo) + + for i := range objects { + objectByHash[objects[i].hash] = &objects[i] + objectByOffset[objects[i].offset] = &objects[i] + } + + // Calculate delta chains by reading headers again. + for i := range objects { + if !objects[i].diskType.IsDelta() { + continue + } + + err := scanner.SeekFromStart(objects[i].offset) + if err != nil { + return fmt.Errorf("failed to seek to offset %d: %w", objects[i].offset, err) + } + + if !scanner.Scan() { + return fmt.Errorf("failed to scan object at offset %d", objects[i].offset) + } + + header := scanner.Data().Value().(packfile.ObjectHeader) + + // Calculate delta chain depth. + depth := 1 + var baseHash plumbing.Hash + + switch header.Type { + case plumbing.REFDeltaObject: + baseHash = header.Reference + case plumbing.OFSDeltaObject: + // OffsetReference is the absolute offset of the base object. + if baseObj, ok := objectByOffset[header.OffsetReference]; ok { + baseHash = baseObj.hash + } + } + + // Follow the chain to calculate total depth. + if !baseHash.IsZero() { + current := baseHash + for { + baseObj, ok := objectByHash[current] + if !ok { + break + } + + if !baseObj.diskType.IsDelta() { + // Reached a non-delta base. + break + } + + // Get the base object's header. + err := scanner.SeekFromStart(baseObj.offset) + if err != nil { + break + } + if !scanner.Scan() { + break + } + baseHeader := scanner.Data().Value().(packfile.ObjectHeader) + + depth++ + + if baseHeader.Type == plumbing.REFDeltaObject { + current = baseHeader.Reference + } else if baseHeader.Type == plumbing.OFSDeltaObject { + // OffsetReference is the absolute offset. + if nextBase, ok := objectByOffset[baseHeader.OffsetReference]; ok { + current = nextBase.hash + } else { + break + } + } else { + break + } + } + } + + objects[i].depth = depth + objects[i].base = baseHash + deltaChains[objects[i].hash] = depth + } + + if verbose { + for _, obj := range objects { + // Format type with padding to match git's output. + typeStr := obj.typ.String() + if len(typeStr) == 4 { + typeStr = typeStr + " " + } else { + typeStr = typeStr + " " + } + + fmt.Printf("%s %s%d %d %d", + obj.hash.String(), + typeStr, + obj.size, + obj.packedSize, + obj.offset, + ) + + if obj.diskType.IsDelta() && !obj.base.IsZero() { + fmt.Printf(" %d %s", obj.depth, obj.base.String()) + } + + fmt.Println() + } + + // Print statistics. + nonDelta := len(objects) - len(deltaChains) + fmt.Printf("non delta: %d objects\n", nonDelta) + + // Count chain lengths. + chainLengths := make(map[int]int) + for _, depth := range deltaChains { + chainLengths[depth]++ + } + + // Sort chain lengths for consistent output. + var lengths []int + for length := range chainLengths { + lengths = append(lengths, length) + } + sort.Ints(lengths) + + for _, length := range lengths { + count := chainLengths[length] + objWord := "objects" + if count == 1 { + objWord = "object" + } + fmt.Printf("chain length = %d: %d %s\n", length, count, objWord) + } + } + + fmt.Printf("%s: ok\n", packPath) + + return nil +} From e59cacc240744947fa4f9b1da3b793320dd3f874 Mon Sep 17 00:00:00 2001 From: Paulo Gomes Date: Thu, 27 Nov 2025 22:59:40 +0000 Subject: [PATCH 2/3] verify-pack: Add support for analysing go-git-fixtures This is mostly useful when debugging go-git tests that are processing pack files that are contained within a go-git-fixture. Signed-off-by: Paulo Gomes --- cmd/gogit/verify-pack.go | 107 +++++++++++++++++++++++++++------------ go.mod | 1 + 2 files changed, 76 insertions(+), 32 deletions(-) diff --git a/cmd/gogit/verify-pack.go b/cmd/gogit/verify-pack.go index 18bba8c..45ee745 100644 --- a/cmd/gogit/verify-pack.go +++ b/cmd/gogit/verify-pack.go @@ -2,25 +2,32 @@ package main import ( "crypto" + "errors" "fmt" "io" "log/slog" "os" - "path/filepath" "sort" "strings" - "github.com/go-git/go-billy/v6/osfs" + "github.com/go-git/go-billy/v6" + fixtures "github.com/go-git/go-git-fixtures/v5" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/format/idxfile" "github.com/go-git/go-git/v6/plumbing/format/packfile" "github.com/spf13/cobra" ) -var verifyPackVerbose bool +var ( + verifyPackVerbose bool + verifyPackFixtureUrl bool + verifyPackFixtureTag bool +) func init() { verifyPackCmd.Flags().BoolVarP(&verifyPackVerbose, "verbose", "v", false, "Show detailed object information") + verifyPackCmd.Flags().BoolVarP(&verifyPackFixtureUrl, "fixture-url", "", false, "Use as go-git-fixture url") + verifyPackCmd.Flags().BoolVarP(&verifyPackFixtureTag, "fixture-tag", "", false, "Use as go-git-fixture tag") rootCmd.AddCommand(verifyPackCmd) } @@ -46,21 +53,11 @@ type objectInfo struct { } func verifyPack(path string, verbose bool) error { - idxPath := path - packPath := path - - if strings.HasSuffix(path, ".idx") { - packPath = strings.TrimSuffix(path, ".idx") + ".pack" - } else if strings.HasSuffix(path, ".pack") { - idxPath = strings.TrimSuffix(path, ".pack") + ".idx" - } else { - return fmt.Errorf("file must have .idx or .pack extension") - } - - idxFile, err := os.Open(idxPath) + idxFile, packFile, err := openPack(path) if err != nil { - return fmt.Errorf("failed to open index file: %w", err) + return err } + defer func() { err = idxFile.Close() if err != nil { @@ -68,17 +65,6 @@ func verifyPack(path string, verbose bool) error { } }() - idx := idxfile.NewMemoryIndex(crypto.SHA1.Size()) - dec := idxfile.NewDecoder(idxFile) - if err := dec.Decode(idx); err != nil { - return fmt.Errorf("failed to decode index file: %w", err) - } - - fs := osfs.New(filepath.Dir(packPath)) - packFile, err := fs.Open(filepath.Base(packPath)) - if err != nil { - return fmt.Errorf("failed to open pack file: %w", err) - } defer func() { err = packFile.Close() if err != nil { @@ -86,19 +72,26 @@ func verifyPack(path string, verbose bool) error { } }() + idx := idxfile.NewMemoryIndex(crypto.SHA1.Size()) + + dec := idxfile.NewDecoder(idxFile) + if err := dec.Decode(idx); err != nil { + return fmt.Errorf("failed to decode index file: %w", err) + } + pf := packfile.NewPackfile( packFile, packfile.WithIdx(idx), - packfile.WithFs(fs), ) + defer func() { - err = pf.Close() + err := pf.Close() if err != nil { slog.Debug("failed to close Packfile object", "error", err) } }() - scanner, err := pf.Scanner() //nolint:staticcheck + scanner, err := pf.Scanner() if err != nil { return fmt.Errorf("failed to get scanner: %w", err) } @@ -112,9 +105,10 @@ func verifyPack(path string, verbose bool) error { for { entry, err := entries.Next() - if err == io.EOF { + if errors.Is(err, io.EOF) { break } + if err != nil { return fmt.Errorf("failed to read entry: %w", err) } @@ -164,6 +158,7 @@ func verifyPack(path string, verbose bool) error { if err != nil { return fmt.Errorf("failed to get object at offset %d: %w", objects[i].offset, err) } + objects[i].typ = obj.Type() } @@ -196,6 +191,7 @@ func verifyPack(path string, verbose bool) error { // Calculate delta chain depth. depth := 1 + var baseHash plumbing.Hash switch header.Type { @@ -227,9 +223,11 @@ func verifyPack(path string, verbose bool) error { if err != nil { break } + if !scanner.Scan() { break } + baseHeader := scanner.Data().Value().(packfile.ObjectHeader) depth++ @@ -294,19 +292,64 @@ func verifyPack(path string, verbose bool) error { for length := range chainLengths { lengths = append(lengths, length) } + sort.Ints(lengths) for _, length := range lengths { count := chainLengths[length] + objWord := "objects" if count == 1 { objWord = "object" } + fmt.Printf("chain length = %d: %d %s\n", length, count, objWord) } } - fmt.Printf("%s: ok\n", packPath) + fmt.Printf("%s: ok\n", path) return nil } + +func openPack(path string) (billy.File, billy.File, error) { + if verifyPackFixtureUrl || verifyPackFixtureTag { + var f fixtures.Fixtures + if verifyPackFixtureUrl { + f = fixtures.ByURL(path) + } + if verifyPackFixtureTag { + f = fixtures.ByTag(path) + } + + if len(f) == 0 { + return nil, nil, fmt.Errorf("no fixture found for %q", path) + } + + fixture := f.One() + return fixture.Idx(), fixture.Packfile(), nil + } + + idxPath := path + packPath := path + + if before, ok := strings.CutSuffix(path, ".idx"); ok { + packPath = before + ".pack" + } else if before, ok := strings.CutSuffix(path, ".pack"); ok { + idxPath = before + ".idx" + } else { + return nil, nil, errors.New("file must have .idx or .pack extension") + } + + idxFile, err := os.Open(idxPath) + if err != nil { + return nil, nil, fmt.Errorf("failed to open index file: %w", err) + } + + packFile, err := os.Open(packPath) + if err != nil { + return nil, nil, fmt.Errorf("failed to open pack file: %w", err) + } + + return idxFile, packFile, nil +} diff --git a/go.mod b/go.mod index 73b5b37..3855f25 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ toolchain go1.25.4 require ( github.com/go-git/go-billy/v6 v6.0.0-20251120215217-80673c4ccbfb + github.com/go-git/go-git-fixtures/v5 v5.1.1 github.com/go-git/go-git/v6 v6.0.0-20251123162143-36fa81975a20 github.com/spf13/cobra v1.10.1 golang.org/x/crypto v0.45.0 From 94e409d14a704c248463f6e9d02e27b37a17090a Mon Sep 17 00:00:00 2001 From: Paulo Gomes Date: Thu, 27 Nov 2025 23:13:35 +0000 Subject: [PATCH 3/3] verify-pack: Add support for sha256 packfiles Signed-off-by: Paulo Gomes --- cmd/gogit/verify-pack.go | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/cmd/gogit/verify-pack.go b/cmd/gogit/verify-pack.go index 45ee745..b097d10 100644 --- a/cmd/gogit/verify-pack.go +++ b/cmd/gogit/verify-pack.go @@ -22,12 +22,14 @@ var ( verifyPackVerbose bool verifyPackFixtureUrl bool verifyPackFixtureTag bool + verifyPackSHA256 bool ) func init() { verifyPackCmd.Flags().BoolVarP(&verifyPackVerbose, "verbose", "v", false, "Show detailed object information") verifyPackCmd.Flags().BoolVarP(&verifyPackFixtureUrl, "fixture-url", "", false, "Use as go-git-fixture url") verifyPackCmd.Flags().BoolVarP(&verifyPackFixtureTag, "fixture-tag", "", false, "Use as go-git-fixture tag") + verifyPackCmd.Flags().BoolVarP(&verifyPackSHA256, "sha256", "", false, "Treat the pack file as sha256") rootCmd.AddCommand(verifyPackCmd) } @@ -72,7 +74,12 @@ func verifyPack(path string, verbose bool) error { } }() - idx := idxfile.NewMemoryIndex(crypto.SHA1.Size()) + ch := crypto.SHA1 + if verifyPackSHA256 { + ch = crypto.SHA256 + } + + idx := idxfile.NewMemoryIndex(ch.Size()) dec := idxfile.NewDecoder(idxFile) if err := dec.Decode(idx); err != nil { @@ -82,6 +89,7 @@ func verifyPack(path string, verbose bool) error { pf := packfile.NewPackfile( packFile, packfile.WithIdx(idx), + packfile.WithObjectIDSize(ch.Size()), ) defer func() { @@ -148,8 +156,8 @@ func verifyPack(path string, verbose bool) error { if err != nil { return fmt.Errorf("failed to stat pack file: %w", err) } - // Pack file ends with a 20-byte SHA-1 checksum. - objects[len(objects)-1].packedSize = stat.Size() - objects[len(objects)-1].offset - int64(crypto.SHA1.Size()) + // Pack file ends with a checksum (20-byte SHA-1 or 32-byte SHA-256). + objects[len(objects)-1].packedSize = stat.Size() - objects[len(objects)-1].offset - int64(ch.Size()) } // Resolve actual types for all objects (after delta application).