diff --git a/internal/file/test-fixtures/generate-zip-fixture-from-source-dir.sh b/internal/file/test-fixtures/generate-zip-fixture-from-source-dir.sh index 9cba4e09cb9..f73ea4ba0f6 100755 --- a/internal/file/test-fixtures/generate-zip-fixture-from-source-dir.sh +++ b/internal/file/test-fixtures/generate-zip-fixture-from-source-dir.sh @@ -3,5 +3,10 @@ set -eux # $1 —— absolute path to destination file, should end with .zip, ideally # $2 —— absolute path to directory from which to add entries to the archive +# $3 —— if files should be zip64 or not -pushd "$2" && find . -print | zip "$1" -@ && popd +if [[$3]]; then + pushd "$2" && find . -print | zip -fz "$1" -@ && popd +else + pushd "$2" && find . -print | zip "$1" -@ && popd +fi diff --git a/internal/file/zip_file_helpers_test.go b/internal/file/zip_file_helpers_test.go index 7bf89a08c55..7a76be3b01a 100644 --- a/internal/file/zip_file_helpers_test.go +++ b/internal/file/zip_file_helpers_test.go @@ -20,16 +20,20 @@ var expectedZipArchiveEntries = []string{ } // createZipArchive creates a new ZIP archive file at destinationArchivePath based on the directory found at -// sourceDirPath. -func createZipArchive(t testing.TB, sourceDirPath, destinationArchivePath string) { +// sourceDirPath. It forces a zip64 archive if zip64 is "0". +func createZipArchive(t testing.TB, sourceDirPath, destinationArchivePath string, zip64 bool) { t.Helper() cwd, err := os.Getwd() if err != nil { t.Fatalf("unable to get cwd: %+v", err) } + zip64Arg := "0" + if zip64 { + zip64Arg = "1" + } - cmd := exec.Command("./generate-zip-fixture-from-source-dir.sh", destinationArchivePath, path.Base(sourceDirPath)) + cmd := exec.Command("./generate-zip-fixture-from-source-dir.sh", destinationArchivePath, path.Base(sourceDirPath), zip64Arg) cmd.Dir = filepath.Join(cwd, "test-fixtures") if err := cmd.Start(); err != nil { @@ -66,7 +70,7 @@ func assertNoError(t testing.TB, fn func() error) func() { // which should be called (typically deferred) by the caller, the path of the created zip archive, and an error, // which should trigger a fatal test failure in the consuming test. The returned cleanup function will never be nil // (even if there's an error), and it should always be called. -func setupZipFileTest(t testing.TB, sourceDirPath string) string { +func setupZipFileTest(t testing.TB, sourceDirPath string, zip64 bool) string { t.Helper() archivePrefix, err := ioutil.TempFile("", "syft-ziputil-archive-TEST-") @@ -84,7 +88,7 @@ func setupZipFileTest(t testing.TB, sourceDirPath string) string { destinationArchiveFilePath := archivePrefix.Name() + ".zip" t.Logf("archive path: %s", destinationArchiveFilePath) - createZipArchive(t, sourceDirPath, destinationArchiveFilePath) + createZipArchive(t, sourceDirPath, destinationArchiveFilePath, zip64) t.Cleanup( assertNoError(t, @@ -109,7 +113,7 @@ func ensureNestedZipExists(t *testing.T, sourceDirPath string) error { t.Helper() nestedArchiveFilePath := path.Join(sourceDirPath, "nested.zip") - createZipArchive(t, sourceDirPath, nestedArchiveFilePath) + createZipArchive(t, sourceDirPath, nestedArchiveFilePath, false) return nil } diff --git a/internal/file/zip_file_manifest_test.go b/internal/file/zip_file_manifest_test.go index f89e88ca9a0..b9b475e27d5 100644 --- a/internal/file/zip_file_manifest_test.go +++ b/internal/file/zip_file_manifest_test.go @@ -22,7 +22,42 @@ func TestNewZipFileManifest(t *testing.T) { t.Fatal(err) } - archiveFilePath := setupZipFileTest(t, sourceDirPath) + archiveFilePath := setupZipFileTest(t, sourceDirPath, false) + + actual, err := NewZipFileManifest(archiveFilePath) + if err != nil { + t.Fatalf("unable to extract from unzip archive: %+v", err) + } + + if len(expectedZipArchiveEntries) != len(actual) { + t.Fatalf("mismatched manifest: %d != %d", len(actual), len(expectedZipArchiveEntries)) + } + + for _, e := range expectedZipArchiveEntries { + _, ok := actual[e] + if !ok { + t.Errorf("missing path: %s", e) + } + } + + if t.Failed() { + b, err := json.MarshalIndent(actual, "", " ") + if err != nil { + t.Fatalf("can't show results: %+v", err) + } + + t.Errorf("full result: %s", string(b)) + } +} + +func TestNewZip64FileManifest(t *testing.T) { + cwd, err := os.Getwd() + if err != nil { + t.Fatal(err) + } + + sourceDirPath := path.Join(cwd, "test-fixtures", "zip-source") + archiveFilePath := setupZipFileTest(t, sourceDirPath, true) actual, err := NewZipFileManifest(archiveFilePath) if err != nil { @@ -62,7 +97,7 @@ func TestZipFileManifest_GlobMatch(t *testing.T) { t.Fatal(err) } - archiveFilePath := setupZipFileTest(t, sourceDirPath) + archiveFilePath := setupZipFileTest(t, sourceDirPath, false) z, err := NewZipFileManifest(archiveFilePath) if err != nil { diff --git a/internal/file/zip_file_traversal_test.go b/internal/file/zip_file_traversal_test.go index 6c7a9ea46e0..d2bff824438 100644 --- a/internal/file/zip_file_traversal_test.go +++ b/internal/file/zip_file_traversal_test.go @@ -47,7 +47,7 @@ func TestUnzipToDir(t *testing.T) { goldenRootDir := filepath.Join(cwd, "test-fixtures") sourceDirPath := path.Join(goldenRootDir, "zip-source") - archiveFilePath := setupZipFileTest(t, sourceDirPath) + archiveFilePath := setupZipFileTest(t, sourceDirPath, false) unzipDestinationDir, err := ioutil.TempDir("", "syft-ziputil-contents-TEST-") t.Cleanup(assertNoError(t, func() error { @@ -227,7 +227,7 @@ func prepZipSourceFixture(t testing.TB) string { t.Logf("archive path: %s", archivePath) - createZipArchive(t, "zip-source", archivePrefix.Name()) + createZipArchive(t, "zip-source", archivePrefix.Name(), false) return archivePath } diff --git a/internal/file/zip_read_closer.go b/internal/file/zip_read_closer.go index 83edb6b0801..11c2783f3ea 100644 --- a/internal/file/zip_read_closer.go +++ b/internal/file/zip_read_closer.go @@ -3,6 +3,7 @@ package file import ( "archive/zip" "encoding/binary" + "errors" "fmt" "io" "os" @@ -13,7 +14,13 @@ import ( // - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/reader.go // findArchiveStartOffset is derived from the same stdlib utils, specifically the readDirectoryEnd function. -const directoryEndLen = 22 +const ( + directoryEndLen = 22 + directory64LocLen = 20 + directory64EndLen = 56 + directory64LocSignature = 0x07064b50 + directory64EndSignature = 0x06064b50 +) // ZipReadCloser is a drop-in replacement for zip.ReadCloser (from zip.OpenReader) that additionally considers zips // that have bytes prefixed to the front of the archive (common with self-extracting jars). @@ -72,6 +79,12 @@ func (b *readBuf) uint32() uint32 { return v } +func (b *readBuf) uint64() uint64 { + v := binary.LittleEndian.Uint64(*b) + *b = (*b)[8:] + return v +} + type directoryEnd struct { diskNbr uint32 // unused dirDiskNbr uint32 // unused @@ -82,6 +95,7 @@ type directoryEnd struct { } // note: this is derived from readDirectoryEnd within the archive/zip package +// nolint:gocognit func findArchiveStartOffset(r io.ReaderAt, size int64) (startOfArchive uint64, err error) { // look for directoryEndSignature in the last 1k, then in the last 65k var buf []byte @@ -120,13 +134,22 @@ func findArchiveStartOffset(r io.ReaderAt, size int64) (startOfArchive uint64, e directoryOffset: uint64(b.uint32()), } // Calculate where the zip data actually begins - startOfArchive = uint64(directoryEndOffset) - d.directorySize - d.directoryOffset // These values mean that the file can be a zip64 file if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff { + p, err := findDirectory64End(r, directoryEndOffset) + if err == nil && p >= 0 { + directoryEndOffset = p + err = readDirectory64End(r, p, d) + } + if err != nil { + return 0, err + } startOfArchive = 0 // Prefixed data not supported } + startOfArchive = uint64(directoryEndOffset) - d.directorySize - d.directoryOffset + // Make sure directoryOffset points to somewhere in our file. if o := int64(d.directoryOffset); o < 0 || o >= size { return 0, zip.ErrFormat @@ -134,6 +157,56 @@ func findArchiveStartOffset(r io.ReaderAt, size int64) (startOfArchive uint64, e return startOfArchive, nil } +// findDirectory64End tries to read the zip64 locator just before the +// directory end and returns the offset of the zip64 directory end if +// found. +func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) { + locOffset := directoryEndOffset - directory64LocLen + if locOffset < 0 { + return -1, nil // no need to look for a header outside the file + } + buf := make([]byte, directory64LocLen) + if _, err := r.ReadAt(buf, locOffset); err != nil { + return -1, err + } + b := readBuf(buf) + if sig := b.uint32(); sig != directory64LocSignature { + return -1, nil + } + if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory + return -1, nil // the file is not a valid zip64-file + } + p := b.uint64() // relative offset of the zip64 end of central directory record + if b.uint32() != 1 { // total number of disks + return -1, nil // the file is not a valid zip64-file + } + return int64(p), nil +} + +// readDirectory64End reads the zip64 directory end and updates the +// directory end with the zip64 directory end values. +func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) { + buf := make([]byte, directory64EndLen) + if _, err := r.ReadAt(buf, offset); err != nil { + return err + } + + b := readBuf(buf) + if sig := b.uint32(); sig != directory64EndSignature { + return errors.New("could not read directory64End") + } + + b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16) + d.diskNbr = b.uint32() // number of this disk + d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory + d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk + d.directoryRecords = b.uint64() // total number of entries in the central directory + d.directorySize = b.uint64() // size of the central directory + d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number + + return nil +} + func findSignatureInBlock(b []byte) int { for i := len(b) - directoryEndLen; i >= 0; i-- { // defined from directoryEndSignature