diff --git a/internal/gps/_testdata/src/cycle/a.go b/internal/gps/_testdata/src/cycle/a.go index b6d9252e1e..0965bb7037 100644 --- a/internal/gps/_testdata/src/cycle/a.go +++ b/internal/gps/_testdata/src/cycle/a.go @@ -6,6 +6,7 @@ package cycle import ( "cycle/one" + "github.com/golang/dep/internal/gps" ) diff --git a/internal/gps/_testdata/src/cycle/one/a.go b/internal/gps/_testdata/src/cycle/one/a.go index 8dc21d7f0b..407c8051c2 100644 --- a/internal/gps/_testdata/src/cycle/one/a.go +++ b/internal/gps/_testdata/src/cycle/one/a.go @@ -6,6 +6,7 @@ package one import ( "cycle/two" + "github.com/golang/dep/internal/gps" ) diff --git a/internal/gps/_testdata/src/cycle/two/a.go b/internal/gps/_testdata/src/cycle/two/a.go index 8dbbd715f3..98ea406f9c 100644 --- a/internal/gps/_testdata/src/cycle/two/a.go +++ b/internal/gps/_testdata/src/cycle/two/a.go @@ -6,6 +6,7 @@ package two import ( "cycle" + "github.com/golang/dep/internal/gps" ) diff --git a/internal/gps/_testdata/src/missing/a.go b/internal/gps/_testdata/src/missing/a.go index 644731fccf..0cdcb394ba 100644 --- a/internal/gps/_testdata/src/missing/a.go +++ b/internal/gps/_testdata/src/missing/a.go @@ -7,8 +7,9 @@ package simple import ( "sort" - "github.com/golang/dep/internal/gps" "missing/missing" + + "github.com/golang/dep/internal/gps" ) var ( diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go new file mode 100644 index 0000000000..e2b9116cf0 --- /dev/null +++ b/internal/gps/pkgtree/digest.go @@ -0,0 +1,472 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgtree + +import ( + "bytes" + "crypto/sha256" + "encoding/binary" + "hash" + "io" + "os" + "path/filepath" + "strconv" + + "github.com/pkg/errors" +) + +const osPathSeparator = string(filepath.Separator) + +// lineEndingReader is a `io.Reader` that converts CRLF sequences to LF. +// +// When cloning or checking out repositories, some Version Control Systems, +// VCSs, on some supported Go Operating System architectures, GOOS, will +// automatically convert line endings that end in a single line feed byte, LF, +// to line endings that end in a two byte sequence of carriage return, CR, +// followed by LF. This LF to CRLF conversion would cause otherwise identical +// versioned files to have different on disk contents simply based on which VCS +// and GOOS are involved. Different file contents for the same file would cause +// the resultant hashes to differ. In order to ensure file contents normalize +// and produce the same hash, this structure wraps an io.Reader that modifies +// the file's contents when it is read, translating all CRLF sequences to LF. +type lineEndingReader struct { + src io.Reader // source io.Reader from which this reads + prevReadEndedCR bool // used to track whether final byte of previous Read was CR +} + +// newLineEndingReader returns a new lineEndingReader that reads from the +// specified source io.Reader. +func newLineEndingReader(src io.Reader) *lineEndingReader { + return &lineEndingReader{src: src} +} + +var crlf = []byte("\r\n") + +// Read consumes bytes from the structure's source io.Reader to fill the +// specified slice of bytes. It converts all CRLF byte sequences to LF, and +// handles cases where CR and LF straddle across two Read operations. +func (f *lineEndingReader) Read(buf []byte) (int, error) { + buflen := len(buf) + if f.prevReadEndedCR { + // Read one fewer bytes so we have room if the first byte of the + // upcoming Read is not a LF, in which case we will need to insert + // trailing CR from previous read. + buflen-- + } + nr, er := f.src.Read(buf[:buflen]) + if nr > 0 { + if f.prevReadEndedCR && buf[0] != '\n' { + // Having a CRLF split across two Read operations is rare, so the + // performance impact of copying entire buffer to the right by one + // byte, while suboptimal, will at least will not happen very + // often. This negative performance impact is mitigated somewhat on + // many Go compilation architectures, GOARCH, because the `copy` + // builtin uses a machine opcode for performing the memory copy on + // possibly overlapping regions of memory. This machine opcodes is + // not instantaneous and does require multiple CPU cycles to + // complete, but is significantly faster than the application + // looping through bytes. + copy(buf[1:nr+1], buf[:nr]) // shift data to right one byte + buf[0] = '\r' // insert the previous skipped CR byte at start of buf + nr++ // pretend we read one more byte + } + + // Remove any CRLF sequences in the buffer using `bytes.Index` because, + // like the `copy` builtin on many GOARCHs, it also takes advantage of a + // machine opcode to search for byte patterns. + var searchOffset int // index within buffer from whence the search will commence for each loop; set to the index of the end of the previous loop. + var shiftCount int // each subsequenct shift operation needs to shift bytes to the left by one more position than the shift that preceded it. + previousIndex := -1 // index of previously found CRLF; -1 means no previous index + for { + index := bytes.Index(buf[searchOffset:nr], crlf) + if index == -1 { + break + } + index += searchOffset // convert relative index to absolute + if previousIndex != -1 { + // shift substring between previous index and this index + copy(buf[previousIndex-shiftCount:], buf[previousIndex+1:index]) + shiftCount++ // next shift needs to be 1 byte to the left + } + previousIndex = index + searchOffset = index + 2 // start next search after len(crlf) + } + if previousIndex != -1 { + // handle final shift + copy(buf[previousIndex-shiftCount:], buf[previousIndex+1:nr]) + shiftCount++ + } + nr -= shiftCount // shorten byte read count by number of shifts executed + + // When final byte from a read operation is CR, do not emit it until + // ensure first byte on next read is not LF. + if f.prevReadEndedCR = buf[nr-1] == '\r'; f.prevReadEndedCR { + nr-- // pretend byte was never read from source + } + } else if f.prevReadEndedCR { + // Reading from source returned nothing, but this struct is sitting on a + // trailing CR from previous Read, so let's give it to client now. + buf[0] = '\r' + nr = 1 + er = nil + f.prevReadEndedCR = false // prevent infinite loop + } + return nr, er +} + +// writeBytesWithNull appends the specified data to the specified hash, followed by +// the NULL byte, in order to make accidental hash collisions less likely. +func writeBytesWithNull(h hash.Hash, data []byte) { + // Ignore return values from writing to the hash, because hash write always + // returns nil error. + _, _ = h.Write(append(data, 0)) +} + +// dirWalkClosure is used to reduce number of allocation involved in closing +// over these variables. +type dirWalkClosure struct { + someCopyBufer []byte // allocate once and reuse for each file copy + someModeBytes []byte // allocate once and reuse for each node + someDirLen int + someHash hash.Hash +} + +// DigestFromDirectory returns a hash of the specified directory contents, which +// will match the hash computed for any directory on any supported Go platform +// whose contents exactly match the specified directory. +// +// This function ignores any file system node named `vendor`, `.bzr`, `.git`, +// `.hg`, and `.svn`, as these are typically used as Version Control System +// (VCS) directories. +// +// Other than the `vendor` and VCS directories mentioned above, the calculated +// hash includes the pathname to every discovered file system node, whether it +// is an empty directory, a non-empty directory, empty file, non-empty file, or +// symbolic link. If a symbolic link, the referent name is included. If a +// non-empty file, the file's contents are included. If a non-empty directory, +// the contents of the directory are included. +// +// While filepath.Walk could have been used, that standard library function +// skips symbolic links, and for now, we want the hash to include the symbolic +// link referents. +func DigestFromDirectory(osDirname string) ([]byte, error) { + osDirname = filepath.Clean(osDirname) + + // Create a single hash instance for the entire operation, rather than a new + // hash for each node we encounter. + + closure := dirWalkClosure{ + someCopyBufer: make([]byte, 4*1024), // only allocate a single page + someModeBytes: make([]byte, 4), // scratch place to store encoded os.FileMode (uint32) + someDirLen: len(osDirname) + len(osPathSeparator), + someHash: sha256.New(), + } + + err := DirWalk(osDirname, func(osPathname string, info os.FileInfo, err error) error { + if err != nil { + return err // DirWalk received an error during initial Lstat + } + + var osRelative string + if len(osPathname) > closure.someDirLen { + osRelative = osPathname[closure.someDirLen:] + } + + switch filepath.Base(osRelative) { + case "vendor", ".bzr", ".git", ".hg", ".svn": + return filepath.SkipDir + } + + // We could make our own enum-like data type for encoding the file type, + // but Go's runtime already gives us architecture independent file + // modes, as discussed in `os/types.go`: + // + // Go's runtime FileMode type has same definition on all systems, so + // that information about files can be moved from one system to + // another portably. + var mt os.FileMode + + // We only care about the bits that identify the type of a file system + // node, and can ignore append, exclusive, temporary, setuid, setgid, + // permission bits, and sticky bits, which are coincident to bits which + // declare type of the file system node. + modeType := info.Mode() & os.ModeType + var shouldSkip bool // skip some types of file system nodes + + switch { + case modeType&os.ModeDir > 0: + mt = os.ModeDir + // DirWalkFunc itself does not need to enumerate children, because + // DirWalk will do that for us. + shouldSkip = true + case modeType&os.ModeSymlink > 0: + mt = os.ModeSymlink + case modeType&os.ModeNamedPipe > 0: + mt = os.ModeNamedPipe + shouldSkip = true + case modeType&os.ModeSocket > 0: + mt = os.ModeSocket + shouldSkip = true + case modeType&os.ModeDevice > 0: + mt = os.ModeDevice + shouldSkip = true + } + + // Write the relative pathname to hash because the hash is a function of + // the node names, node types, and node contents. Added benefit is that + // empty directories, named pipes, sockets, devices, and symbolic links + // will also affect final hash value. Use `filepath.ToSlash` to ensure + // relative pathname is os-agnostic. + writeBytesWithNull(closure.someHash, []byte(filepath.ToSlash(osRelative))) + + binary.LittleEndian.PutUint32(closure.someModeBytes, uint32(mt)) // encode the type of mode + writeBytesWithNull(closure.someHash, closure.someModeBytes) // and write to hash + + if shouldSkip { + return nil // nothing more to do for some of the node types + } + + if mt == os.ModeSymlink { // okay to check for equivalence because we set to this value + osRelative, err = os.Readlink(osPathname) // read the symlink referent + if err != nil { + return errors.Wrap(err, "cannot Readlink") + } + writeBytesWithNull(closure.someHash, []byte(filepath.ToSlash(osRelative))) // write referent to hash + return nil // proceed to next node in queue + } + + // If we get here, node is a regular file. + fh, err := os.Open(osPathname) + if err != nil { + return errors.Wrap(err, "cannot Open") + } + + var bytesWritten int64 + bytesWritten, err = io.CopyBuffer(closure.someHash, newLineEndingReader(fh), closure.someCopyBufer) // fast copy of file contents to hash + err = errors.Wrap(err, "cannot Copy") // errors.Wrap only wraps non-nil, so skip extra check + writeBytesWithNull(closure.someHash, []byte(strconv.FormatInt(bytesWritten, 10))) // 10: format file size as base 10 integer + + // Close the file handle to the open file without masking + // possible previous error value. + if er := fh.Close(); err == nil { + err = errors.Wrap(er, "cannot Close") + } + return err + }) + if err != nil { + return nil, err + } + return closure.someHash.Sum(nil), nil +} + +// VendorStatus represents one of a handful of possible status conditions for a +// particular file sytem node in the vendor directory tree. +type VendorStatus uint8 + +const ( + // NotInLock is used when a file system node exists for which there is no + // corresponding dependency in the lock file. + NotInLock VendorStatus = iota + + // NotInTree is used when a lock file dependency exists for which there is + // no corresponding file system node. + NotInTree + + // NoMismatch is used when the digest for a dependency listed in the + // lockfile matches what is calculated from the file system. + NoMismatch + + // EmptyDigestInLock is used when the digest for a dependency listed in the + // lock file is the empty string. While this is a special case of + // DigestMismatchInLock, keeping both cases discrete is a desired feature. + EmptyDigestInLock + + // DigestMismatchInLock is used when the digest for a dependency listed in + // the lock file does not match what is calculated from the file system. + DigestMismatchInLock +) + +func (ls VendorStatus) String() string { + switch ls { + case NotInTree: + return "not in tree" + case NotInLock: + return "not in lock" + case NoMismatch: + return "match" + case EmptyDigestInLock: + return "empty digest in lock" + case DigestMismatchInLock: + return "mismatch" + } + return "unknown" +} + +// fsnode is used to track which file system nodes are required by the lock +// file. When a directory is found whose name matches one of the declared +// projects in the lock file, e.g., "github.com/alice/alice1", an fsnode is +// created for that directory, but not for any of its children. All other file +// system nodes encountered will result in a fsnode created to represent it. +type fsnode struct { + osRelative string // os-specific relative path of a resource under vendor root + isRequiredAncestor bool // true iff this node or one of its descendants is in the lock file + myIndex, parentIndex int // index of this node and its parent in the tree's slice +} + +// VerifyDepTree verifies a dependency tree according to expected digest sums, +// and returns an associative array of file system nodes and their respective +// vendor status conditions. +// +// The keys to the expected digest sums associative array represent the +// project's dependencies, and each is required to be expressed using the +// solidus character, `/`, as its path separator. For example, even on a GOOS +// platform where the file system path separator is a character other than +// solidus, one particular dependency would be represented as +// "github.com/alice/alice1". +func VerifyDepTree(osDirname string, wantSums map[string][]byte) (map[string]VendorStatus, error) { + osDirname = filepath.Clean(osDirname) + + // Ensure top level pathname is a directory + fi, err := os.Stat(osDirname) + if err != nil { + return nil, errors.Wrap(err, "cannot Stat") + } + if !fi.IsDir() { + return nil, errors.Errorf("cannot verify non directory: %q", osDirname) + } + + // Initialize work queue with a node representing the specified directory + // name by declaring its relative pathname under the directory name as the + // empty string. + currentNode := &fsnode{osRelative: "", parentIndex: -1, isRequiredAncestor: true} + queue := []*fsnode{currentNode} // queue of directories that must be inspected + + // In order to identify all file system nodes that are not in the lock file, + // represented by the specified expected sums parameter, and in order to + // only report the top level of a subdirectory of file system nodes, rather + // than every node internal to them, we will create a tree of nodes stored + // in a slice. We do this because we cannot predict the depth at which + // project roots occur. Some projects are fewer than and some projects more + // than the typical three layer subdirectory under the vendor root + // directory. + // + // For a following few examples, assume the below vendor root directory: + // + // github.com/alice/alice1/a1.go + // github.com/alice/alice2/a2.go + // github.com/bob/bob1/b1.go + // github.com/bob/bob2/b2.go + // launchpad.net/nifty/n1.go + // + // 1) If only the `alice1` and `alice2` projects were in the lock file, we'd + // prefer the output to state that `github.com/bob` is `NotInLock`, and + // `launchpad.net/nifty` is `NotInLock`. + // + // 2) If `alice1`, `alice2`, and `bob1` were in the lock file, we'd want to + // report `github.com/bob/bob2` as `NotInLock`, and `launchpad.net/nifty` is + // `NotInLock`. + // + // 3) If none of `alice1`, `alice2`, `bob1`, or `bob2` were in the lock + // file, the entire `github.com` directory would be reported as `NotInLock`, + // along with `launchpad.net/nifty` is `NotInLock`. + // + // Each node in our tree has the slice index of its parent node, so once we + // can categorically state a particular directory is required because it is + // in the lock file, we can mark all of its ancestors as also being + // required. Then, when we finish walking the directory hierarchy, any nodes + // which are not required but have a required parent will be marked as + // `NotInLock`. + nodes := []*fsnode{currentNode} + + // Create associative array to store the results of calling this function. + slashStatus := make(map[string]VendorStatus) + + // Mark directories of expected projects as required. When each respective + // project is later found while traversing the vendor root hierarchy, its + // status will be updated to reflect whether its digest is empty, or, + // whether or not it matches the expected digest. + for slashPathname := range wantSums { + slashStatus[slashPathname] = NotInTree + } + + for len(queue) > 0 { + // Pop node from the top of queue (depth first traversal, reverse + // lexicographical order inside a directory), clearing the value stored + // in the slice's backing array as we proceed. + lq1 := len(queue) - 1 + currentNode, queue[lq1], queue = queue[lq1], nil, queue[:lq1] + slashPathname := filepath.ToSlash(currentNode.osRelative) + osPathname := filepath.Join(osDirname, currentNode.osRelative) + + if expectedSum, ok := wantSums[slashPathname]; ok { + ls := EmptyDigestInLock + if len(expectedSum) > 0 { + projectSum, err := DigestFromDirectory(osPathname) + if err != nil { + return nil, errors.Wrap(err, "cannot compute dependency hash") + } + if bytes.Equal(projectSum, expectedSum) { + ls = NoMismatch + } else { + ls = DigestMismatchInLock + } + } + slashStatus[slashPathname] = ls + + // Mark current nodes and all its parents as required. + for i := currentNode.myIndex; i != -1; i = nodes[i].parentIndex { + nodes[i].isRequiredAncestor = true + } + + // Do not need to process this directory's contents because we + // already accounted for its contents while calculating its digest. + continue + } + + osChildrenNames, err := sortedChildrenFromDirname(osPathname) + if err != nil { + return nil, errors.Wrap(err, "cannot get sorted list of directory children") + } + for _, osChildName := range osChildrenNames { + switch osChildName { + case ".", "..", "vendor", ".bzr", ".git", ".hg", ".svn": + // skip + default: + osChildRelative := filepath.Join(currentNode.osRelative, osChildName) + osChildPathname := filepath.Join(osDirname, osChildRelative) + + // Create a new fsnode for this file system node, with a parent + // index set to the index of the current node. + otherNode := &fsnode{osRelative: osChildRelative, myIndex: len(nodes), parentIndex: currentNode.myIndex} + + fi, err := os.Stat(osChildPathname) + if err != nil { + return nil, errors.Wrap(err, "cannot Stat") + } + nodes = append(nodes, otherNode) // Track all file system nodes... + if fi.IsDir() { + queue = append(queue, otherNode) // but only need to add directories to the work queue. + } + } + } + } + + // Ignoring first node in the list, walk nodes from last to first. Whenever + // the current node is not required, but its parent is required, then the + // current node ought to be marked as `NotInLock`. + for len(nodes) > 1 { + // Pop node from top of queue, clearing the value stored in the slice's + // backing array as we proceed. + ln1 := len(nodes) - 1 + currentNode, nodes[ln1], nodes = nodes[ln1], nil, nodes[:ln1] + + if !currentNode.isRequiredAncestor && nodes[currentNode.parentIndex].isRequiredAncestor { + slashStatus[filepath.ToSlash(currentNode.osRelative)] = NotInLock + } + } + currentNode, nodes = nil, nil + + return slashStatus, nil +} diff --git a/internal/gps/pkgtree/digest_test.go b/internal/gps/pkgtree/digest_test.go new file mode 100644 index 0000000000..bfad0ef13c --- /dev/null +++ b/internal/gps/pkgtree/digest_test.go @@ -0,0 +1,220 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgtree + +import ( + "bytes" + "io" + "os" + "path/filepath" + "testing" +) + +// crossBuffer is a test io.Reader that emits a few canned responses. +type crossBuffer struct { + readCount int + iterations []string +} + +func (cb *crossBuffer) Read(buf []byte) (int, error) { + if cb.readCount == len(cb.iterations) { + return 0, io.EOF + } + cb.readCount++ + return copy(buf, cb.iterations[cb.readCount-1]), nil +} + +func streamThruLineEndingReader(t *testing.T, iterations []string) []byte { + dst := new(bytes.Buffer) + n, err := io.Copy(dst, newLineEndingReader(&crossBuffer{iterations: iterations})) + if got, want := err, error(nil); got != want { + t.Errorf("(GOT): %v; (WNT): %v", got, want) + } + if got, want := n, int64(dst.Len()); got != want { + t.Errorf("(GOT): %v; (WNT): %v", got, want) + } + return dst.Bytes() +} + +func TestLineEndingReader(t *testing.T) { + testCases := []struct { + input []string + output string + }{ + {[]string{"\r"}, "\r"}, + {[]string{"\r\n"}, "\n"}, + {[]string{"now is the time\r\n"}, "now is the time\n"}, + {[]string{"now is the time\r\n(trailing data)"}, "now is the time\n(trailing data)"}, + {[]string{"now is the time\n"}, "now is the time\n"}, + {[]string{"now is the time\r"}, "now is the time\r"}, // trailing CR ought to convey + {[]string{"\rnow is the time"}, "\rnow is the time"}, // CR not followed by LF ought to convey + {[]string{"\rnow is the time\r"}, "\rnow is the time\r"}, // CR not followed by LF ought to convey + + // no line splits + {[]string{"first", "second", "third"}, "firstsecondthird"}, + + // 1->2 and 2->3 both break across a CRLF + {[]string{"first\r", "\nsecond\r", "\nthird"}, "first\nsecond\nthird"}, + + // 1->2 breaks across CRLF and 2->3 does not + {[]string{"first\r", "\nsecond", "third"}, "first\nsecondthird"}, + + // 1->2 breaks across CRLF and 2 ends in CR but 3 does not begin LF + {[]string{"first\r", "\nsecond\r", "third"}, "first\nsecond\rthird"}, + + // 1 ends in CR but 2 does not begin LF, and 2->3 breaks across CRLF + {[]string{"first\r", "second\r", "\nthird"}, "first\rsecond\nthird"}, + + // 1 ends in CR but 2 does not begin LF, and 2->3 does not break across CRLF + {[]string{"first\r", "second\r", "\nthird"}, "first\rsecond\nthird"}, + + // 1->2 and 2->3 both break across a CRLF, but 3->4 does not + {[]string{"first\r", "\nsecond\r", "\nthird\r", "fourth"}, "first\nsecond\nthird\rfourth"}, + {[]string{"first\r", "\nsecond\r", "\nthird\n", "fourth"}, "first\nsecond\nthird\nfourth"}, + + {[]string{"this is the result\r\nfrom the first read\r", "\nthis is the result\r\nfrom the second read\r"}, + "this is the result\nfrom the first read\nthis is the result\nfrom the second read\r"}, + {[]string{"now is the time\r\nfor all good engineers\r\nto improve their test coverage!\r\n"}, + "now is the time\nfor all good engineers\nto improve their test coverage!\n"}, + {[]string{"now is the time\r\nfor all good engineers\r", "\nto improve their test coverage!\r\n"}, + "now is the time\nfor all good engineers\nto improve their test coverage!\n"}, + } + + for _, testCase := range testCases { + got := streamThruLineEndingReader(t, testCase.input) + if want := []byte(testCase.output); !bytes.Equal(got, want) { + t.Errorf("Input: %#v; (GOT): %#q; (WNT): %#q", testCase.input, got, want) + } + } +} + +//////////////////////////////////////// + +func getTestdataVerifyRoot(t *testing.T) string { + cwd, err := os.Getwd() + if err != nil { + t.Fatal(err) + } + return filepath.Join(filepath.Dir(cwd), "testdata_digest") +} + +func TestDigestFromDirectoryBailsUnlessDirectory(t *testing.T) { + prefix := getTestdataVerifyRoot(t) + relativePathname := "launchpad.net/match" + _, err := DigestFromDirectory(filepath.Join(prefix, relativePathname)) + if got, want := err, error(nil); got != want { + t.Errorf("\n(GOT): %v; (WNT): %v", got, want) + } +} + +func TestDigestFromDirectory(t *testing.T) { + relativePathname := "launchpad.net/match" + want := []byte{0x7e, 0x10, 0x6, 0x2f, 0x8, 0x3, 0x3c, 0x76, 0xae, 0xbc, 0xa4, 0xc9, 0xec, 0x73, 0x67, 0x15, 0x70, 0x2b, 0x0, 0x89, 0x27, 0xbb, 0x61, 0x9d, 0xc7, 0xc3, 0x39, 0x46, 0x3, 0x91, 0xb7, 0x3b} + + // NOTE: Create the hash using both an absolute and a relative pathname to + // ensure hash ignores prefix. + + t.Run("AbsolutePrefix", func(t *testing.T) { + prefix := getTestdataVerifyRoot(t) + got, err := DigestFromDirectory(filepath.Join(prefix, relativePathname)) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(got, want) { + t.Errorf("\n(GOT):\n\t%#v\n(WNT):\n\t%#v", got, want) + } + }) + + t.Run("RelativePrefix", func(t *testing.T) { + prefix := "../testdata_digest" + got, err := DigestFromDirectory(filepath.Join(prefix, relativePathname)) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(got, want) { + t.Errorf("\n(GOT):\n\t%#v\n(WNT):\n\t%#v", got, want) + } + }) +} + +func TestVerifyDepTree(t *testing.T) { + vendorRoot := getTestdataVerifyRoot(t) + + wantSums := map[string][]byte{ + "github.com/alice/match": []byte{0x7e, 0x10, 0x6, 0x2f, 0x8, 0x3, 0x3c, 0x76, 0xae, 0xbc, 0xa4, 0xc9, 0xec, 0x73, 0x67, 0x15, 0x70, 0x2b, 0x0, 0x89, 0x27, 0xbb, 0x61, 0x9d, 0xc7, 0xc3, 0x39, 0x46, 0x3, 0x91, 0xb7, 0x3b}, + "github.com/alice/mismatch": []byte("some non-matching digest"), + "github.com/bob/emptyDigest": nil, // empty hash result + "github.com/bob/match": []byte{0x7e, 0x10, 0x6, 0x2f, 0x8, 0x3, 0x3c, 0x76, 0xae, 0xbc, 0xa4, 0xc9, 0xec, 0x73, 0x67, 0x15, 0x70, 0x2b, 0x0, 0x89, 0x27, 0xbb, 0x61, 0x9d, 0xc7, 0xc3, 0x39, 0x46, 0x3, 0x91, 0xb7, 0x3b}, + "github.com/charlie/notInTree": nil, // not in tree result ought to superseede empty digest result + // matching result at seldomly found directory level + "launchpad.net/match": []byte{0x7e, 0x10, 0x6, 0x2f, 0x8, 0x3, 0x3c, 0x76, 0xae, 0xbc, 0xa4, 0xc9, 0xec, 0x73, 0x67, 0x15, 0x70, 0x2b, 0x0, 0x89, 0x27, 0xbb, 0x61, 0x9d, 0xc7, 0xc3, 0x39, 0x46, 0x3, 0x91, 0xb7, 0x3b}, + } + + status, err := VerifyDepTree(vendorRoot, wantSums) + if err != nil { + t.Fatal(err) + } + + if got, want := len(status), 7; got != want { + t.Errorf("\n(GOT): %v; (WNT): %v", got, want) + } + + checkStatus := func(t *testing.T, status map[string]VendorStatus, key string, want VendorStatus) { + got, ok := status[key] + if !ok { + t.Errorf("Want key: %q", key) + return + } + if got != want { + t.Errorf("Key: %q; (GOT): %v; (WNT): %v", key, got, want) + } + } + + checkStatus(t, status, "github.com/alice/match", NoMismatch) + checkStatus(t, status, "github.com/alice/mismatch", DigestMismatchInLock) + checkStatus(t, status, "github.com/alice/notInLock", NotInLock) + checkStatus(t, status, "github.com/bob/match", NoMismatch) + checkStatus(t, status, "github.com/bob/emptyDigest", EmptyDigestInLock) + checkStatus(t, status, "github.com/charlie/notInTree", NotInTree) + checkStatus(t, status, "launchpad.net/match", NoMismatch) + + if t.Failed() { + for k, want := range wantSums { + got, err := DigestFromDirectory(filepath.Join(vendorRoot, k)) + if err != nil { + t.Error(err) + } + if !bytes.Equal(got, want) { + t.Errorf("%q\n(GOT):\n\t%#v\n(WNT):\n\t%#v", k, got, want) + } + } + } +} + +func BenchmarkDigestFromDirectory(b *testing.B) { + b.Skip("Eliding benchmark of user's Go source directory") + + prefix := filepath.Join(os.Getenv("GOPATH"), "src") + + for i := 0; i < b.N; i++ { + _, err := DigestFromDirectory(prefix) + if err != nil { + b.Fatal(err) + } + } +} + +func BenchmarkVerifyDepTree(b *testing.B) { + b.Skip("Eliding benchmark of user's Go source directory") + + prefix := filepath.Join(os.Getenv("GOPATH"), "src") + + for i := 0; i < b.N; i++ { + _, err := VerifyDepTree(prefix, nil) + if err != nil { + b.Fatal(err) + } + } +} diff --git a/internal/gps/pkgtree/dirwalk.go b/internal/gps/pkgtree/dirwalk.go new file mode 100644 index 0000000000..350c1606c3 --- /dev/null +++ b/internal/gps/pkgtree/dirwalk.go @@ -0,0 +1,139 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgtree + +import ( + "os" + "path/filepath" + "sort" + "strings" + + "github.com/pkg/errors" +) + +// DirWalkFunc is the type of the function called for each file system node +// visited by DirWalk. The path argument contains the argument to DirWalk as a +// prefix; that is, if DirWalk is called with "dir", which is a directory +// containing the file "a", the walk function will be called with the argument +// "dir/a", using the correct os.PathSeparator for the Go Operating System +// architecture, GOOS. The info argument is the os.FileInfo for the named path. +// +// If there was a problem walking to the file or directory named by path, the +// incoming error will describe the problem and the function can decide how to +// handle that error (and DirWalk will not descend into that directory). If an +// error is returned, processing stops. The sole exception is when the function +// returns the special value filepath.SkipDir. If the function returns +// filepath.SkipDir when invoked on a directory, DirWalk skips the directory's +// contents entirely. If the function returns filepath.SkipDir when invoked on a +// non-directory file system node, DirWalk skips the remaining files in the +// containing directory. +type DirWalkFunc func(osPathname string, info os.FileInfo, err error) error + +// DirWalk walks the file tree rooted at osDirname, calling for each file system +// node in the tree, including root. All errors that arise visiting nodes are +// filtered by walkFn. The nodes are walked in lexical order, which makes the +// output deterministic but means that for very large directories DirWalk can be +// inefficient. Unlike filepath.Walk, DirWalk does follow symbolic links. +func DirWalk(osDirname string, walkFn DirWalkFunc) error { + osDirname = filepath.Clean(osDirname) + + // Ensure parameter is a directory + fi, err := os.Stat(osDirname) + if err != nil { + return errors.Wrap(err, "cannot read node") + } + if !fi.IsDir() { + return errors.Errorf("cannot walk non directory: %q", osDirname) + } + + // Initialize a work queue with the empty string, which signifies the + // starting directory itself. + queue := []string{""} + + var osRelative string // os-specific relative pathname under directory name + + // As we enumerate over the queue and encounter a directory, its children + // will be added to the work queue. + for len(queue) > 0 { + // Unshift a pathname from the queue (breadth-first traversal of + // hierarchy) + osRelative, queue = queue[0], queue[1:] + osPathname := filepath.Join(osDirname, osRelative) + + // walkFn needs to choose how to handle symbolic links, therefore obtain + // lstat rather than stat. + fi, err = os.Lstat(osPathname) + if err == nil { + err = walkFn(osPathname, fi, nil) + } else { + err = walkFn(osPathname, nil, errors.Wrap(err, "cannot read node")) + } + + if err != nil { + if err == filepath.SkipDir { + if fi.Mode()&os.ModeSymlink > 0 { + // Resolve symbolic link referent to determine whether node + // is directory or not. + fi, err = os.Stat(osPathname) + if err != nil { + return errors.Wrap(err, "cannot visit node") + } + } + // If current node is directory, then skip this + // directory. Otherwise, skip all nodes in the same parent + // directory. + if !fi.IsDir() { + // Consume nodes from queue while they have the same parent + // as the current node. + osParent := filepath.Dir(osPathname) + osPathSeparator + for len(queue) > 0 && strings.HasPrefix(queue[0], osParent) { + queue = queue[1:] // drop sibling from queue + } + } + + continue + } + return errors.Wrap(err, "DirWalkFunction") // wrap error returned by walkFn + } + + if fi.IsDir() { + osChildrenNames, err := sortedChildrenFromDirname(osPathname) + if err != nil { + return errors.Wrap(err, "cannot get list of directory children") + } + for _, osChildName := range osChildrenNames { + switch osChildName { + case ".", "..": + // skip + default: + queue = append(queue, filepath.Join(osRelative, osChildName)) + } + } + } + } + return nil +} + +// sortedChildrenFromDirname returns a lexicographically sorted list of child +// nodes for the specified directory. +func sortedChildrenFromDirname(osDirname string) ([]string, error) { + fh, err := os.Open(osDirname) + if err != nil { + return nil, errors.Wrap(err, "cannot Open") + } + + osChildrenNames, err := fh.Readdirnames(0) // 0: read names of all children + if err != nil { + return nil, errors.Wrap(err, "cannot Readdirnames") + } + sort.Strings(osChildrenNames) + + // Close the file handle to the open directory without masking possible + // previous error value. + if er := fh.Close(); err == nil { + err = errors.Wrap(er, "cannot Close") + } + return osChildrenNames, err +} diff --git a/internal/gps/testdata_digest/github.com/alice/match/match.go b/internal/gps/testdata_digest/github.com/alice/match/match.go new file mode 100644 index 0000000000..ab5f875285 --- /dev/null +++ b/internal/gps/testdata_digest/github.com/alice/match/match.go @@ -0,0 +1,5 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package match diff --git a/internal/gps/testdata_digest/github.com/alice/mismatch/mismatch.go b/internal/gps/testdata_digest/github.com/alice/mismatch/mismatch.go new file mode 100644 index 0000000000..1ace4e7683 --- /dev/null +++ b/internal/gps/testdata_digest/github.com/alice/mismatch/mismatch.go @@ -0,0 +1,5 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mismatch diff --git a/internal/gps/testdata_digest/github.com/alice/notInLock/notInLock.go b/internal/gps/testdata_digest/github.com/alice/notInLock/notInLock.go new file mode 100644 index 0000000000..5b2914de73 --- /dev/null +++ b/internal/gps/testdata_digest/github.com/alice/notInLock/notInLock.go @@ -0,0 +1,5 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package notInLock diff --git a/internal/gps/testdata_digest/github.com/bob/emptyDigest/emptyDigest.go b/internal/gps/testdata_digest/github.com/bob/emptyDigest/emptyDigest.go new file mode 100644 index 0000000000..2d6067bc8e --- /dev/null +++ b/internal/gps/testdata_digest/github.com/bob/emptyDigest/emptyDigest.go @@ -0,0 +1,5 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package emptyDigest diff --git a/internal/gps/testdata_digest/github.com/bob/match/match.go b/internal/gps/testdata_digest/github.com/bob/match/match.go new file mode 100644 index 0000000000..ab5f875285 --- /dev/null +++ b/internal/gps/testdata_digest/github.com/bob/match/match.go @@ -0,0 +1,5 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package match diff --git a/internal/gps/testdata_digest/launchpad.net/match/match.go b/internal/gps/testdata_digest/launchpad.net/match/match.go new file mode 100644 index 0000000000..ab5f875285 --- /dev/null +++ b/internal/gps/testdata_digest/launchpad.net/match/match.go @@ -0,0 +1,5 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package match