From 84b3567419007c1d574cc6ab9975cac1180988ae Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Sat, 15 Jul 2017 15:07:20 -0600 Subject: [PATCH 01/36] HashFromNode and VerifyDepTree * HashFromNode returns a deterministic cryptographic digest for a directory tree * VerifyDepTree verifies dep tree according to expected digests --- internal/fs/hash.go | 145 +++++++++++++ internal/fs/hash_test.go | 40 ++++ internal/fs/testdata/blob | 7 + internal/fs/testdata/recursive/blob | 7 + internal/fs/testdata/recursive/emptyFile | 0 internal/fs/testdata/recursive/foo/bar | 3 + internal/fs/testdata/recursive/vendor/skip1 | 1 + internal/fs/testdata/recursive/vendor/skip2 | 0 internal/gps/_testdata/src/cycle/a.go | 1 + internal/gps/_testdata/src/cycle/one/a.go | 1 + internal/gps/_testdata/src/cycle/two/a.go | 1 + internal/gps/_testdata/src/missing/a.go | 3 +- internal/gps/pkgtree/pkgtree.go | 202 ++++++++++++++++++ internal/gps/pkgtree/pkgtree_test.go | 47 ++++ .../github.com/alice/alice1/a.go | 0 .../github.com/alice/alice2/a.go | 0 .../github.com/alice/notInLock/a.go | 0 .../verify_testdata/github.com/bob/bob1/a.go | 0 .../verify_testdata/github.com/bob/bob2/a.go | 0 .../verify_testdata/launchpad.net/nifty/a.go | 0 20 files changed, 457 insertions(+), 1 deletion(-) create mode 100644 internal/fs/hash.go create mode 100644 internal/fs/hash_test.go create mode 100644 internal/fs/testdata/blob create mode 100644 internal/fs/testdata/recursive/blob create mode 100644 internal/fs/testdata/recursive/emptyFile create mode 100644 internal/fs/testdata/recursive/foo/bar create mode 100644 internal/fs/testdata/recursive/vendor/skip1 create mode 100644 internal/fs/testdata/recursive/vendor/skip2 create mode 100644 internal/gps/verify_testdata/github.com/alice/alice1/a.go create mode 100644 internal/gps/verify_testdata/github.com/alice/alice2/a.go create mode 100644 internal/gps/verify_testdata/github.com/alice/notInLock/a.go create mode 100644 internal/gps/verify_testdata/github.com/bob/bob1/a.go create mode 100644 internal/gps/verify_testdata/github.com/bob/bob2/a.go create mode 100644 internal/gps/verify_testdata/launchpad.net/nifty/a.go diff --git a/internal/fs/hash.go b/internal/fs/hash.go new file mode 100644 index 0000000000..0be4b4cd68 --- /dev/null +++ b/internal/fs/hash.go @@ -0,0 +1,145 @@ +package fs + +import ( + "crypto/sha256" + "fmt" + "io" + "os" + "path/filepath" + "sort" + "strconv" + + "github.com/pkg/errors" +) + +const ( + pathSeparator = string(filepath.Separator) + skipModes = os.ModeDevice | os.ModeNamedPipe | os.ModeSocket | os.ModeCharDevice +) + +// HashFromNode returns a deterministic hash of the specified file system node, +// performing a breadth-first traversal of directories. While the specified +// prefix is joined with the pathname to walk the file system, the prefix string +// is eliminated from the pathname of the nodes encounted when hashing the +// pathnames. +// +// This function ignores any file system node named `vendor`, `.bzr`, `.git`, +// `.hg`, and `.svn`, as these are typically used as Version Control System +// (VCS) directories. +// +// Other than the `vendor` and VCS directories mentioned above, the calculated +// hash includes the pathname to every discovered file system node, whether it +// is an empty directory, a non-empty directory, empty file, non-empty file, or +// symbolic link. If a symbolic link, the referent name is included. If a +// non-empty file, the file's contents are incuded. If a non-empty directory, +// the contents of the directory are included. +// +// While filepath.Walk could have been used, that standard library function +// skips symbolic links, and for now, we want to hash the referent string of +// symbolic links. +func HashFromNode(prefix, pathname string) (hash string, err error) { + // Create a single hash instance for the entire operation, rather than a new + // hash for each node we encounter. + h := sha256.New() + + // "../../../vendor", "github.com/account/library" + prefix = filepath.Clean(prefix) + pathSeparator + prefixLength := len(prefix) + if prefixLength > 0 { + prefixLength += len(pathSeparator) // if not empty string, include len of path separator + } + joined := filepath.Join(prefix, pathname) + + // Initialize a work queue with the os-agnostic cleaned up pathname. Note + // that we use `filepath.Clean` rather than `filepath.Abs`, because we don't + // want the hash to be based on the absolute pathnames of the specified + // directory and contents. + pathnameQueue := []string{joined} + + for len(pathnameQueue) > 0 { + // NOTE: unshift a pathname from the queue + pathname, pathnameQueue = pathnameQueue[0], pathnameQueue[1:] + + fi, er := os.Lstat(pathname) + if er != nil { + err = errors.Wrap(er, "cannot Lstat") + return + } + + mode := fi.Mode() + + // Skip special files + if mode&skipModes != 0 { + continue + } + + // NOTE: Write pathname to hash, because hash ought to be as much a + // function of the names of the files and directories as their + // contents. Added benefit is that even empty directories and symbolic + // links will effect final hash value. + // + // NOTE: Throughout this function, we ignore return values from writing + // to the hash, because hash write always returns nil error. + _, _ = h.Write([]byte(pathname)[prefixLength:]) + + if mode&os.ModeSymlink != 0 { + referent, er := os.Readlink(pathname) + if er != nil { + err = errors.Wrap(er, "cannot Readlink") + return + } + // Write the referent to the hash and proceed to the next pathname + // in the queue. + _, _ = h.Write([]byte(referent)) + continue + } + + fh, er := os.Open(pathname) + if er != nil { + err = errors.Wrap(er, "cannot Open") + return + } + + if fi.IsDir() { + childrenNames, er := fh.Readdirnames(0) // 0: read names of all children + if er != nil { + err = errors.Wrap(er, "cannot Readdirnames") + // NOTE: Even if there was an error reading the names of the + // directory entries, we still must close file handle for the + // open directory before we return. In this case, we simply skip + // sorting and adding entry names to the work queue beforehand. + childrenNames = nil + } + + // NOTE: Sort children names to ensure deterministic ordering of + // contents of each directory, ensuring hash remains same even if + // operating system returns same values in a different order on + // subsequent invocation. + sort.Strings(childrenNames) + + for _, childName := range childrenNames { + switch childName { + case ".", "..", "vendor", ".bzr", ".git", ".hg", ".svn": + // skip + default: + pathnameQueue = append(pathnameQueue, pathname+pathSeparator+childName) + } + } + } else { + _, _ = h.Write([]byte(strconv.FormatInt(fi.Size(), 10))) // format file size as base 10 integer + _, er = io.Copy(h, fh) + err = errors.Wrap(er, "cannot Copy") // errors.Wrap only wraps non-nil, so elide checking here + } + + // NOTE: Close the file handle to the open directory or file. + if er = fh.Close(); err == nil { + err = errors.Wrap(er, "cannot Close") + } + if err != nil { + return // early termination iff error + } + } + + hash = fmt.Sprintf("%x", h.Sum(nil)) + return +} diff --git a/internal/fs/hash_test.go b/internal/fs/hash_test.go new file mode 100644 index 0000000000..720e2795fb --- /dev/null +++ b/internal/fs/hash_test.go @@ -0,0 +1,40 @@ +package fs + +import ( + "os" + "path/filepath" + "testing" +) + +func TestHashFromNodeWithFile(t *testing.T) { + actual, err := HashFromNode("", "./testdata/blob") + if err != nil { + t.Fatal(err) + } + expected := "9ccd71eec554488c99eb9205b6707cb70379e1aa637faad58ac875278786f2ff" + if actual != expected { + t.Errorf("Actual:\n\t%#q\nExpected:\n\t%#q", actual, expected) + } +} + +func TestHashFromNodeWithDirectory(t *testing.T) { + actual, err := HashFromNode("../fs", "testdata/recursive") + if err != nil { + t.Fatal(err) + } + expected := "432949ff3f1687e7e09b31fd81ecab3b7d9b68da5f543aad18e1930b5bd22e30" + if actual != expected { + t.Errorf("Actual:\n\t%#q\nExpected:\n\t%#q", actual, expected) + } +} + +var goSource = filepath.Join(os.Getenv("GOPATH"), "src") + +func BenchmarkHashFromNode(b *testing.B) { + for i := 0; i < b.N; i++ { + _, err := HashFromNode("", goSource) + if err != nil { + b.Fatal(err) + } + } +} diff --git a/internal/fs/testdata/blob b/internal/fs/testdata/blob new file mode 100644 index 0000000000..fd411f98b2 --- /dev/null +++ b/internal/fs/testdata/blob @@ -0,0 +1,7 @@ +fjdkal;fdjskc +xzc +axc +fdsf +adsf +das +fd \ No newline at end of file diff --git a/internal/fs/testdata/recursive/blob b/internal/fs/testdata/recursive/blob new file mode 100644 index 0000000000..fd411f98b2 --- /dev/null +++ b/internal/fs/testdata/recursive/blob @@ -0,0 +1,7 @@ +fjdkal;fdjskc +xzc +axc +fdsf +adsf +das +fd \ No newline at end of file diff --git a/internal/fs/testdata/recursive/emptyFile b/internal/fs/testdata/recursive/emptyFile new file mode 100644 index 0000000000..e69de29bb2 diff --git a/internal/fs/testdata/recursive/foo/bar b/internal/fs/testdata/recursive/foo/bar new file mode 100644 index 0000000000..9627a854aa --- /dev/null +++ b/internal/fs/testdata/recursive/foo/bar @@ -0,0 +1,3 @@ +fjdsakl;fd +vcafcds +vca diff --git a/internal/fs/testdata/recursive/vendor/skip1 b/internal/fs/testdata/recursive/vendor/skip1 new file mode 100644 index 0000000000..15989ffe40 --- /dev/null +++ b/internal/fs/testdata/recursive/vendor/skip1 @@ -0,0 +1 @@ +this file ought to be skipped \ No newline at end of file diff --git a/internal/fs/testdata/recursive/vendor/skip2 b/internal/fs/testdata/recursive/vendor/skip2 new file mode 100644 index 0000000000..e69de29bb2 diff --git a/internal/gps/_testdata/src/cycle/a.go b/internal/gps/_testdata/src/cycle/a.go index b6d9252e1e..0965bb7037 100644 --- a/internal/gps/_testdata/src/cycle/a.go +++ b/internal/gps/_testdata/src/cycle/a.go @@ -6,6 +6,7 @@ package cycle import ( "cycle/one" + "github.com/golang/dep/internal/gps" ) diff --git a/internal/gps/_testdata/src/cycle/one/a.go b/internal/gps/_testdata/src/cycle/one/a.go index 8dc21d7f0b..407c8051c2 100644 --- a/internal/gps/_testdata/src/cycle/one/a.go +++ b/internal/gps/_testdata/src/cycle/one/a.go @@ -6,6 +6,7 @@ package one import ( "cycle/two" + "github.com/golang/dep/internal/gps" ) diff --git a/internal/gps/_testdata/src/cycle/two/a.go b/internal/gps/_testdata/src/cycle/two/a.go index 8dbbd715f3..98ea406f9c 100644 --- a/internal/gps/_testdata/src/cycle/two/a.go +++ b/internal/gps/_testdata/src/cycle/two/a.go @@ -6,6 +6,7 @@ package two import ( "cycle" + "github.com/golang/dep/internal/gps" ) diff --git a/internal/gps/_testdata/src/missing/a.go b/internal/gps/_testdata/src/missing/a.go index 644731fccf..0cdcb394ba 100644 --- a/internal/gps/_testdata/src/missing/a.go +++ b/internal/gps/_testdata/src/missing/a.go @@ -7,8 +7,9 @@ package simple import ( "sort" - "github.com/golang/dep/internal/gps" "missing/missing" + + "github.com/golang/dep/internal/gps" ) var ( diff --git a/internal/gps/pkgtree/pkgtree.go b/internal/gps/pkgtree/pkgtree.go index f14598c286..dd5b539ae3 100644 --- a/internal/gps/pkgtree/pkgtree.go +++ b/internal/gps/pkgtree/pkgtree.go @@ -16,6 +16,9 @@ import ( "strconv" "strings" "unicode" + + "github.com/golang/dep/internal/fs" + "github.com/pkg/errors" ) // Package represents a Go package. It contains a subset of the information @@ -921,3 +924,202 @@ func uniq(a []string) []string { } return a[:i] } + +// LibraryStatus represents one of a handful of possible statuses of a +// particular subdirectory under vendor. +type LibraryStatus uint8 + +const ( + // NotInLock is used when a file system node exists for which there is no + // corresponding dependency in the lock file. + NotInLock LibraryStatus = iota + + // NotInTree is used when a lock file dependency exists for which there is + // no corresponding file system node. + NotInTree + + // NoMismatch is used when the digest for a dependency listed in the + // lockfile matches what is calculated from the file system. + NoMismatch + + // EmptyDigestInLock is used when the digest for a dependency listed in the + // lock file is the empty string. NOTE: Seems like a special case of + // DigestMismatchInLock. + EmptyDigestInLock + + // DigestMismatchInLock is used when the digest for a dependency listed in + // the lock file does not match what is calculated from the file system. + DigestMismatchInLock +) + +// String returns a human-friendly string representing LibraryStatus. +func (ls LibraryStatus) String() string { + switch ls { + case NotInTree: + return "not in tree" + case NotInLock: + return "not in lock" + case NoMismatch: + return "match" + case EmptyDigestInLock: + return "empty digest in lock" + case DigestMismatchInLock: + return "mismatch" + } + return "unknown" +} + +type fsnode struct { + pathname string + isRequiredAncestor bool + myIndex, parentIndex int +} + +func (n fsnode) String() string { + return fmt.Sprintf("[%d:%d %q %t]", n.myIndex, n.parentIndex, n.pathname, n.isRequiredAncestor) +} + +var skipModes = os.ModeDevice | os.ModeNamedPipe | os.ModeSocket | os.ModeCharDevice + +// sortedListOfDirectoryChildren returns a lexicographical sorted list of child +// nodes for the specified directory. +func sortedListOfDirectoryChildren(pathname string) (childrenNames []string, err error) { + fh, er := os.Open(pathname) + if er != nil { + err = errors.Wrap(er, "cannot Open") + return + } + + childrenNames, er = fh.Readdirnames(0) // 0: read names of all children + if er != nil { + err = errors.Wrap(er, "cannot Readdirnames") + // NOTE: Even if there was an error reading the names of the + // directory entries, we still must close file handle for the + // open directory before we return. In this case, we simply skip + // sorting and adding entry names to the work queue beforehand. + childrenNames = nil + } + + if len(childrenNames) > 0 { + sort.Strings(childrenNames) + } + + // NOTE: Close the file handle to the open directory. + if er = fh.Close(); err == nil { + err = errors.Wrap(er, "cannot Close") + } + + return +} + +const pathSeparator = string(filepath.Separator) + +// VerifyDepTree verifies dep tree according to expected digests, and returns an +// associative array of file system nodes and their respective status in +// accordance with the provided expectedSums parameter. +func VerifyDepTree(vendorPathname string, expectedSums map[string]string) (status map[string]LibraryStatus, err error) { + // NOTE: Ensure top level pathname is a directory + fi, er := os.Stat(vendorPathname) + if er != nil { + err = errors.Wrap(er, "cannot Stat") + return + } + if !fi.IsDir() { + err = errors.Errorf("cannot verify non directory: %q", vendorPathname) + return + } + + status = make(map[string]LibraryStatus) + vendorPathname = filepath.Clean(vendorPathname) + pathSeparator + prefixLength := len(vendorPathname) + + var otherNode *fsnode + currentNode := &fsnode{pathname: vendorPathname, parentIndex: -1, isRequiredAncestor: true} + nodes := []*fsnode{currentNode} + queue := []*fsnode{currentNode} + + // NOTE: Mark directories of expected dependencies and their parents as required + for pathname := range expectedSums { + status[pathname] = NotInTree + } + + for len(queue) > 0 { + // pop node from the queue (depth first traversal, reverse lexicographical order) + lq1 := len(queue) - 1 + currentNode, queue = queue[lq1], queue[:lq1] + + // log.Printf("NODE: %s", currentNode) + short := currentNode.pathname[prefixLength:] // chop off the prefix + if expectedSum, ok := expectedSums[short]; ok { + ls := EmptyDigestInLock + if expectedSum != "" { + ls = NoMismatch + actualSum, er := fs.HashFromNode(vendorPathname, short) + if err != nil { + err = errors.Wrap(er, "cannot compute dependency hash") + return + } + if actualSum != expectedSum { + ls = DigestMismatchInLock + } + } + status[short] = ls + + // NOTE: Mark current nodes and all parents: required. + for pni := currentNode.myIndex; pni != -1; pni = otherNode.parentIndex { + otherNode = nodes[pni] + otherNode.isRequiredAncestor = true + // log.Printf("parent node: %s", otherNode) + } + + continue // do not need to process directory's contents + } + + childrenNames, er := sortedListOfDirectoryChildren(currentNode.pathname) + if er != nil { + err = errors.Wrap(er, "cannot get sorted list of directory children") + return + } + for _, childName := range childrenNames { + switch childName { + case ".", "..", "vendor", ".bzr", ".git", ".hg", ".svn": + // skip + default: + childPathname := filepath.Join(currentNode.pathname, childName) + otherNode = &fsnode{pathname: childPathname, myIndex: len(nodes), parentIndex: currentNode.myIndex} + + fi, er := os.Stat(childPathname) + if er != nil { + err = errors.Wrap(er, "cannot Stat") + return + } + // Skip non-interesting file system nodes + mode := fi.Mode() + if mode&skipModes != 0 || mode&os.ModeSymlink != 0 { + // log.Printf("DEBUG: skipping: %v; %q", mode, currentNode.pathname) + continue + } + + nodes = append(nodes, otherNode) + if fi.IsDir() { + queue = append(queue, otherNode) + } + } + } + + if err != nil { + return // early termination iff error + } + } + + // Walk nodes from end to beginning; ignore first node in list + for i := len(nodes) - 1; i > 0; i-- { + currentNode = nodes[i] + // log.Printf("FINAL: %s", currentNode) + if !currentNode.isRequiredAncestor && nodes[currentNode.parentIndex].isRequiredAncestor { + status[currentNode.pathname[prefixLength:]] = NotInLock + } + } + + return +} diff --git a/internal/gps/pkgtree/pkgtree_test.go b/internal/gps/pkgtree/pkgtree_test.go index ab134f02e7..c7c4648523 100644 --- a/internal/gps/pkgtree/pkgtree_test.go +++ b/internal/gps/pkgtree/pkgtree_test.go @@ -1983,3 +1983,50 @@ func getTestdataRootDir(t *testing.T) string { } return filepath.Join(cwd, "..", "_testdata") } + +func getVerifyTestdataRootDir(t *testing.T) string { + cwd, err := os.Getwd() + if err != nil { + t.Fatal(err) + } + return filepath.Join(cwd, "..", "verify_testdata") +} + +func TestVerifyDepTree(t *testing.T) { + pathname := getVerifyTestdataRootDir(t) + + status, err := VerifyDepTree(pathname, map[string]string{ + "github.com/alice/alice1": "7428b8ac79007e9d46edd49b5798f7947c1ef73752cf31d4f7e451b8f25b3cb4", // match + "github.com/alice/alice2": "non matching digest", // mismatch + "github.com/charlie/notInTree": "", // not in tree superseedes empty hash + "github.com/bob/bob1": "1da9bd85997bf900d56a3b02b4f3e6232e1eb6948f6ba84ac54fae241ae9f980", // match + "github.com/bob/bob2": "", // empty hash + "launchpad.net/nifty": "05a970598d198edf8e8f51127f322e91be0afadf026c1192f709f293230c6930", // match at unusual dir level + }) + if err != nil { + t.Fatal(err) + } + + if actual, expected := len(status), 7; actual != expected { + t.Errorf("Actual: %v; Expected: %v", actual, expected) + } + + checkStatus := func(t *testing.T, status map[string]LibraryStatus, key string, expected LibraryStatus) { + actual, ok := status[key] + if ok != true { + t.Errorf("Expected key: %q", key) + return + } + if actual != expected { + t.Errorf("Key: %q; Actual: %v; Expected: %v", key, actual, expected) + } + } + + checkStatus(t, status, "github.com/alice/alice1", NoMismatch) + checkStatus(t, status, "github.com/alice/alice2", DigestMismatchInLock) + checkStatus(t, status, "github.com/alice/notInLock", NotInLock) + checkStatus(t, status, "github.com/bob/bob1", NoMismatch) + checkStatus(t, status, "github.com/bob/bob2", EmptyDigestInLock) + checkStatus(t, status, "github.com/charlie/notInTree", NotInTree) + checkStatus(t, status, "launchpad.net/nifty", NoMismatch) +} diff --git a/internal/gps/verify_testdata/github.com/alice/alice1/a.go b/internal/gps/verify_testdata/github.com/alice/alice1/a.go new file mode 100644 index 0000000000..e69de29bb2 diff --git a/internal/gps/verify_testdata/github.com/alice/alice2/a.go b/internal/gps/verify_testdata/github.com/alice/alice2/a.go new file mode 100644 index 0000000000..e69de29bb2 diff --git a/internal/gps/verify_testdata/github.com/alice/notInLock/a.go b/internal/gps/verify_testdata/github.com/alice/notInLock/a.go new file mode 100644 index 0000000000..e69de29bb2 diff --git a/internal/gps/verify_testdata/github.com/bob/bob1/a.go b/internal/gps/verify_testdata/github.com/bob/bob1/a.go new file mode 100644 index 0000000000..e69de29bb2 diff --git a/internal/gps/verify_testdata/github.com/bob/bob2/a.go b/internal/gps/verify_testdata/github.com/bob/bob2/a.go new file mode 100644 index 0000000000..e69de29bb2 diff --git a/internal/gps/verify_testdata/launchpad.net/nifty/a.go b/internal/gps/verify_testdata/launchpad.net/nifty/a.go new file mode 100644 index 0000000000..e69de29bb2 From ae2c30ed0bcad8ee7b1a3a1c626b4e2675de0807 Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Sat, 5 Aug 2017 12:54:51 -0400 Subject: [PATCH 02/36] added package declarations at top of all test go files --- internal/gps/pkgtree/pkgtree_test.go | 6 +++--- internal/gps/verify_testdata/github.com/alice/alice1/a.go | 1 + internal/gps/verify_testdata/github.com/alice/alice2/a.go | 1 + .../gps/verify_testdata/github.com/alice/notInLock/a.go | 1 + internal/gps/verify_testdata/github.com/bob/bob1/a.go | 1 + internal/gps/verify_testdata/github.com/bob/bob2/a.go | 1 + internal/gps/verify_testdata/launchpad.net/nifty/a.go | 1 + 7 files changed, 9 insertions(+), 3 deletions(-) diff --git a/internal/gps/pkgtree/pkgtree_test.go b/internal/gps/pkgtree/pkgtree_test.go index c7c4648523..581456a4d4 100644 --- a/internal/gps/pkgtree/pkgtree_test.go +++ b/internal/gps/pkgtree/pkgtree_test.go @@ -1996,12 +1996,12 @@ func TestVerifyDepTree(t *testing.T) { pathname := getVerifyTestdataRootDir(t) status, err := VerifyDepTree(pathname, map[string]string{ - "github.com/alice/alice1": "7428b8ac79007e9d46edd49b5798f7947c1ef73752cf31d4f7e451b8f25b3cb4", // match + "github.com/alice/alice1": "f49816b46140e3e12e1175da2d9fabe18593f0cc576371cdee6b72bbf7a9d87c", // match "github.com/alice/alice2": "non matching digest", // mismatch "github.com/charlie/notInTree": "", // not in tree superseedes empty hash - "github.com/bob/bob1": "1da9bd85997bf900d56a3b02b4f3e6232e1eb6948f6ba84ac54fae241ae9f980", // match + "github.com/bob/bob1": "675dc8ab3389edbd2b42cbf0b15ae2371e0a046f114fe5298ffcb292bff03e30", // match "github.com/bob/bob2": "", // empty hash - "launchpad.net/nifty": "05a970598d198edf8e8f51127f322e91be0afadf026c1192f709f293230c6930", // match at unusual dir level + "launchpad.net/nifty": "6bf586f1f8cc58fb0875f9f7d4eadac3601536b670112031cf59a07abd220826", // match at unusual dir level }) if err != nil { t.Fatal(err) diff --git a/internal/gps/verify_testdata/github.com/alice/alice1/a.go b/internal/gps/verify_testdata/github.com/alice/alice1/a.go index e69de29bb2..62cd11d48d 100644 --- a/internal/gps/verify_testdata/github.com/alice/alice1/a.go +++ b/internal/gps/verify_testdata/github.com/alice/alice1/a.go @@ -0,0 +1 @@ +package alice1 diff --git a/internal/gps/verify_testdata/github.com/alice/alice2/a.go b/internal/gps/verify_testdata/github.com/alice/alice2/a.go index e69de29bb2..5bc90c5bd4 100644 --- a/internal/gps/verify_testdata/github.com/alice/alice2/a.go +++ b/internal/gps/verify_testdata/github.com/alice/alice2/a.go @@ -0,0 +1 @@ +package alice2 diff --git a/internal/gps/verify_testdata/github.com/alice/notInLock/a.go b/internal/gps/verify_testdata/github.com/alice/notInLock/a.go index e69de29bb2..be58629081 100644 --- a/internal/gps/verify_testdata/github.com/alice/notInLock/a.go +++ b/internal/gps/verify_testdata/github.com/alice/notInLock/a.go @@ -0,0 +1 @@ +package notInLock diff --git a/internal/gps/verify_testdata/github.com/bob/bob1/a.go b/internal/gps/verify_testdata/github.com/bob/bob1/a.go index e69de29bb2..4c81e12753 100644 --- a/internal/gps/verify_testdata/github.com/bob/bob1/a.go +++ b/internal/gps/verify_testdata/github.com/bob/bob1/a.go @@ -0,0 +1 @@ +package bob1 diff --git a/internal/gps/verify_testdata/github.com/bob/bob2/a.go b/internal/gps/verify_testdata/github.com/bob/bob2/a.go index e69de29bb2..8858e1cb7a 100644 --- a/internal/gps/verify_testdata/github.com/bob/bob2/a.go +++ b/internal/gps/verify_testdata/github.com/bob/bob2/a.go @@ -0,0 +1 @@ +package bob2 diff --git a/internal/gps/verify_testdata/launchpad.net/nifty/a.go b/internal/gps/verify_testdata/launchpad.net/nifty/a.go index e69de29bb2..8fff589d90 100644 --- a/internal/gps/verify_testdata/launchpad.net/nifty/a.go +++ b/internal/gps/verify_testdata/launchpad.net/nifty/a.go @@ -0,0 +1 @@ +package nifty From 1a76ff6fe40277da30b6e515d20de2abab639954 Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Mon, 7 Aug 2017 15:07:32 -0400 Subject: [PATCH 03/36] refactor based on feedback from @sdboyer --- internal/fs/hash.go | 145 ------- internal/fs/hash_test.go | 40 -- internal/fs/testdata/blob | 7 - internal/fs/testdata/recursive/blob | 7 - internal/fs/testdata/recursive/emptyFile | 0 internal/fs/testdata/recursive/foo/bar | 3 - internal/fs/testdata/recursive/vendor/skip1 | 1 - internal/fs/testdata/recursive/vendor/skip2 | 0 internal/gps/pkgtree/digest.go | 359 ++++++++++++++++++ internal/gps/pkgtree/digest_test.go | 176 +++++++++ internal/gps/pkgtree/pkgtree.go | 202 ---------- internal/gps/pkgtree/pkgtree_test.go | 47 --- .../github.com/alice/alice1/a.go | 0 .../github.com/alice/alice2/a.go | 0 .../github.com/alice/notInLock/a.go | 0 .../github.com/bob/bob1/a.go | 0 .../github.com/bob/bob2/a.go | 0 .../launchpad.net/nifty/a.go | 0 18 files changed, 535 insertions(+), 452 deletions(-) delete mode 100644 internal/fs/hash.go delete mode 100644 internal/fs/hash_test.go delete mode 100644 internal/fs/testdata/blob delete mode 100644 internal/fs/testdata/recursive/blob delete mode 100644 internal/fs/testdata/recursive/emptyFile delete mode 100644 internal/fs/testdata/recursive/foo/bar delete mode 100644 internal/fs/testdata/recursive/vendor/skip1 delete mode 100644 internal/fs/testdata/recursive/vendor/skip2 create mode 100644 internal/gps/pkgtree/digest.go create mode 100644 internal/gps/pkgtree/digest_test.go rename internal/gps/{verify_testdata => testdata_digest}/github.com/alice/alice1/a.go (100%) rename internal/gps/{verify_testdata => testdata_digest}/github.com/alice/alice2/a.go (100%) rename internal/gps/{verify_testdata => testdata_digest}/github.com/alice/notInLock/a.go (100%) rename internal/gps/{verify_testdata => testdata_digest}/github.com/bob/bob1/a.go (100%) rename internal/gps/{verify_testdata => testdata_digest}/github.com/bob/bob2/a.go (100%) rename internal/gps/{verify_testdata => testdata_digest}/launchpad.net/nifty/a.go (100%) diff --git a/internal/fs/hash.go b/internal/fs/hash.go deleted file mode 100644 index 0be4b4cd68..0000000000 --- a/internal/fs/hash.go +++ /dev/null @@ -1,145 +0,0 @@ -package fs - -import ( - "crypto/sha256" - "fmt" - "io" - "os" - "path/filepath" - "sort" - "strconv" - - "github.com/pkg/errors" -) - -const ( - pathSeparator = string(filepath.Separator) - skipModes = os.ModeDevice | os.ModeNamedPipe | os.ModeSocket | os.ModeCharDevice -) - -// HashFromNode returns a deterministic hash of the specified file system node, -// performing a breadth-first traversal of directories. While the specified -// prefix is joined with the pathname to walk the file system, the prefix string -// is eliminated from the pathname of the nodes encounted when hashing the -// pathnames. -// -// This function ignores any file system node named `vendor`, `.bzr`, `.git`, -// `.hg`, and `.svn`, as these are typically used as Version Control System -// (VCS) directories. -// -// Other than the `vendor` and VCS directories mentioned above, the calculated -// hash includes the pathname to every discovered file system node, whether it -// is an empty directory, a non-empty directory, empty file, non-empty file, or -// symbolic link. If a symbolic link, the referent name is included. If a -// non-empty file, the file's contents are incuded. If a non-empty directory, -// the contents of the directory are included. -// -// While filepath.Walk could have been used, that standard library function -// skips symbolic links, and for now, we want to hash the referent string of -// symbolic links. -func HashFromNode(prefix, pathname string) (hash string, err error) { - // Create a single hash instance for the entire operation, rather than a new - // hash for each node we encounter. - h := sha256.New() - - // "../../../vendor", "github.com/account/library" - prefix = filepath.Clean(prefix) + pathSeparator - prefixLength := len(prefix) - if prefixLength > 0 { - prefixLength += len(pathSeparator) // if not empty string, include len of path separator - } - joined := filepath.Join(prefix, pathname) - - // Initialize a work queue with the os-agnostic cleaned up pathname. Note - // that we use `filepath.Clean` rather than `filepath.Abs`, because we don't - // want the hash to be based on the absolute pathnames of the specified - // directory and contents. - pathnameQueue := []string{joined} - - for len(pathnameQueue) > 0 { - // NOTE: unshift a pathname from the queue - pathname, pathnameQueue = pathnameQueue[0], pathnameQueue[1:] - - fi, er := os.Lstat(pathname) - if er != nil { - err = errors.Wrap(er, "cannot Lstat") - return - } - - mode := fi.Mode() - - // Skip special files - if mode&skipModes != 0 { - continue - } - - // NOTE: Write pathname to hash, because hash ought to be as much a - // function of the names of the files and directories as their - // contents. Added benefit is that even empty directories and symbolic - // links will effect final hash value. - // - // NOTE: Throughout this function, we ignore return values from writing - // to the hash, because hash write always returns nil error. - _, _ = h.Write([]byte(pathname)[prefixLength:]) - - if mode&os.ModeSymlink != 0 { - referent, er := os.Readlink(pathname) - if er != nil { - err = errors.Wrap(er, "cannot Readlink") - return - } - // Write the referent to the hash and proceed to the next pathname - // in the queue. - _, _ = h.Write([]byte(referent)) - continue - } - - fh, er := os.Open(pathname) - if er != nil { - err = errors.Wrap(er, "cannot Open") - return - } - - if fi.IsDir() { - childrenNames, er := fh.Readdirnames(0) // 0: read names of all children - if er != nil { - err = errors.Wrap(er, "cannot Readdirnames") - // NOTE: Even if there was an error reading the names of the - // directory entries, we still must close file handle for the - // open directory before we return. In this case, we simply skip - // sorting and adding entry names to the work queue beforehand. - childrenNames = nil - } - - // NOTE: Sort children names to ensure deterministic ordering of - // contents of each directory, ensuring hash remains same even if - // operating system returns same values in a different order on - // subsequent invocation. - sort.Strings(childrenNames) - - for _, childName := range childrenNames { - switch childName { - case ".", "..", "vendor", ".bzr", ".git", ".hg", ".svn": - // skip - default: - pathnameQueue = append(pathnameQueue, pathname+pathSeparator+childName) - } - } - } else { - _, _ = h.Write([]byte(strconv.FormatInt(fi.Size(), 10))) // format file size as base 10 integer - _, er = io.Copy(h, fh) - err = errors.Wrap(er, "cannot Copy") // errors.Wrap only wraps non-nil, so elide checking here - } - - // NOTE: Close the file handle to the open directory or file. - if er = fh.Close(); err == nil { - err = errors.Wrap(er, "cannot Close") - } - if err != nil { - return // early termination iff error - } - } - - hash = fmt.Sprintf("%x", h.Sum(nil)) - return -} diff --git a/internal/fs/hash_test.go b/internal/fs/hash_test.go deleted file mode 100644 index 720e2795fb..0000000000 --- a/internal/fs/hash_test.go +++ /dev/null @@ -1,40 +0,0 @@ -package fs - -import ( - "os" - "path/filepath" - "testing" -) - -func TestHashFromNodeWithFile(t *testing.T) { - actual, err := HashFromNode("", "./testdata/blob") - if err != nil { - t.Fatal(err) - } - expected := "9ccd71eec554488c99eb9205b6707cb70379e1aa637faad58ac875278786f2ff" - if actual != expected { - t.Errorf("Actual:\n\t%#q\nExpected:\n\t%#q", actual, expected) - } -} - -func TestHashFromNodeWithDirectory(t *testing.T) { - actual, err := HashFromNode("../fs", "testdata/recursive") - if err != nil { - t.Fatal(err) - } - expected := "432949ff3f1687e7e09b31fd81ecab3b7d9b68da5f543aad18e1930b5bd22e30" - if actual != expected { - t.Errorf("Actual:\n\t%#q\nExpected:\n\t%#q", actual, expected) - } -} - -var goSource = filepath.Join(os.Getenv("GOPATH"), "src") - -func BenchmarkHashFromNode(b *testing.B) { - for i := 0; i < b.N; i++ { - _, err := HashFromNode("", goSource) - if err != nil { - b.Fatal(err) - } - } -} diff --git a/internal/fs/testdata/blob b/internal/fs/testdata/blob deleted file mode 100644 index fd411f98b2..0000000000 --- a/internal/fs/testdata/blob +++ /dev/null @@ -1,7 +0,0 @@ -fjdkal;fdjskc -xzc -axc -fdsf -adsf -das -fd \ No newline at end of file diff --git a/internal/fs/testdata/recursive/blob b/internal/fs/testdata/recursive/blob deleted file mode 100644 index fd411f98b2..0000000000 --- a/internal/fs/testdata/recursive/blob +++ /dev/null @@ -1,7 +0,0 @@ -fjdkal;fdjskc -xzc -axc -fdsf -adsf -das -fd \ No newline at end of file diff --git a/internal/fs/testdata/recursive/emptyFile b/internal/fs/testdata/recursive/emptyFile deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/internal/fs/testdata/recursive/foo/bar b/internal/fs/testdata/recursive/foo/bar deleted file mode 100644 index 9627a854aa..0000000000 --- a/internal/fs/testdata/recursive/foo/bar +++ /dev/null @@ -1,3 +0,0 @@ -fjdsakl;fd -vcafcds -vca diff --git a/internal/fs/testdata/recursive/vendor/skip1 b/internal/fs/testdata/recursive/vendor/skip1 deleted file mode 100644 index 15989ffe40..0000000000 --- a/internal/fs/testdata/recursive/vendor/skip1 +++ /dev/null @@ -1 +0,0 @@ -this file ought to be skipped \ No newline at end of file diff --git a/internal/fs/testdata/recursive/vendor/skip2 b/internal/fs/testdata/recursive/vendor/skip2 deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go new file mode 100644 index 0000000000..a7422d7e9c --- /dev/null +++ b/internal/gps/pkgtree/digest.go @@ -0,0 +1,359 @@ +package pkgtree + +import ( + "bytes" + "crypto/sha256" + "fmt" + "io" + "os" + "path/filepath" + "sort" + "strconv" + + "github.com/pkg/errors" +) + +const ( + pathSeparator = string(filepath.Separator) + + // when walking vendor root hierarchy, ignore file system nodes of the + // following types. + skipModes = os.ModeDevice | os.ModeNamedPipe | os.ModeSocket | os.ModeCharDevice +) + +// DigestFromPathname returns a deterministic hash of the specified file system +// node, performing a breadth-first traversal of directories. While the +// specified prefix is joined with the pathname to walk the file system, the +// prefix string is eliminated from the pathname of the nodes encounted when +// hashing the pathnames, so that the resultant hash is agnostic to the absolute +// root directory path of the nodes being checked. +// +// This function ignores any file system node named `vendor`, `.bzr`, `.git`, +// `.hg`, and `.svn`, as these are typically used as Version Control System +// (VCS) directories. +// +// Other than the `vendor` and VCS directories mentioned above, the calculated +// hash includes the pathname to every discovered file system node, whether it +// is an empty directory, a non-empty directory, empty file, non-empty file, or +// symbolic link. If a symbolic link, the referent name is included. If a +// non-empty file, the file's contents are incuded. If a non-empty directory, +// the contents of the directory are included. +// +// While filepath.Walk could have been used, that standard library function +// skips symbolic links, and for now, we want the hash to include the symbolic +// link referents. +func DigestFromPathname(prefix, pathname string) ([]byte, error) { + // Create a single hash instance for the entire operation, rather than a new + // hash for each node we encounter. + h := sha256.New() + + // Initialize a work queue with the os-agnostic cleaned up pathname. Note + // that we use `filepath.Clean` rather than `filepath.Abs`, because the hash + // has pathnames which are relative to prefix, and there is no reason to + // convert to absolute pathname for every invocation of this function. + prefix = filepath.Clean(prefix) + pathSeparator + prefixLength := len(prefix) // store length to trim off pathnames later + pathnameQueue := []string{filepath.Join(prefix, pathname)} + + // As we enumerate over the queue and encounter a directory, its children + // will be added to the work queue. + for len(pathnameQueue) > 0 { + // Unshift a pathname from the queue (breadth-first traversal of + // hierarchy) + pathname, pathnameQueue = pathnameQueue[0], pathnameQueue[1:] + + fi, err := os.Lstat(pathname) + if err != nil { + return nil, errors.Wrap(err, "cannot Lstat") + } + mode := fi.Mode() + + // Skip file system nodes we are not concerned with + if mode&skipModes != 0 { + continue + } + + // Write the prefix-stripped pathname to hash because the hash is as + // much a function of the relative names of the files and directories as + // it is their contents. Added benefit is that even empty directories + // and symbolic links will effect final hash value. Use + // `filepath.ToSlash` to ensure relative pathname is os-agnostic. + // + // NOTE: Throughout this function, we ignore return values from writing + // to the hash, because hash write always returns nil error. + _, _ = h.Write([]byte(filepath.ToSlash(pathname[prefixLength:]))) + + if mode&os.ModeSymlink != 0 { + referent, err := os.Readlink(pathname) + if err != nil { + return nil, errors.Wrap(err, "cannot Readlink") + } + // Write the os-agnostic referent to the hash and proceed to the + // next pathname in the queue. + _, _ = h.Write([]byte(filepath.ToSlash(referent))) + continue + } + + // For both directories and regular files, we must create a file system + // handle in order to read their contents. + fh, err := os.Open(pathname) + if err != nil { + return nil, errors.Wrap(err, "cannot Open") + } + + if fi.IsDir() { + if childrenNames, err := sortedListOfDirectoryChildrenFromFileHandle(fh); err == nil { + for _, childName := range childrenNames { + switch childName { + case ".", "..", "vendor", ".bzr", ".git", ".hg", ".svn": + // skip + default: + pathnameQueue = append(pathnameQueue, filepath.Join(pathname, childName)) + } + } + } + } else { + _, _ = h.Write([]byte(strconv.FormatInt(fi.Size(), 10))) // format file size as base 10 integer + _, err = io.Copy(h, fh) // fast copy of file contents to hash + err = errors.Wrap(err, "cannot Copy") // errors.Wrap only wraps non-nil, so elide guard condition + } + + // Close the file handle to the open directory without masking possible + // previous error value. + if er := fh.Close(); err == nil { + err = errors.Wrap(er, "cannot Close") + } + if err != nil { + return nil, err // early termination iff error + } + } + + return h.Sum(nil), nil +} + +// VendorStatus represents one of a handful of possible statuses of a particular +// subdirectory under vendor. +type VendorStatus uint8 + +const ( + // NotInLock is used when a file system node exists for which there is no + // corresponding dependency in the lock file. + NotInLock VendorStatus = iota + + // NotInTree is used when a lock file dependency exists for which there is + // no corresponding file system node. + NotInTree + + // NoMismatch is used when the digest for a dependency listed in the + // lockfile matches what is calculated from the file system. + NoMismatch + + // EmptyDigestInLock is used when the digest for a dependency listed in the + // lock file is the empty string. NOTE: Seems like a special case of + // DigestMismatchInLock. + EmptyDigestInLock + + // DigestMismatchInLock is used when the digest for a dependency listed in + // the lock file does not match what is calculated from the file system. + DigestMismatchInLock +) + +func (ls VendorStatus) String() string { + switch ls { + case NotInTree: + return "not in tree" + case NotInLock: + return "not in lock" + case NoMismatch: + return "match" + case EmptyDigestInLock: + return "empty digest in lock" + case DigestMismatchInLock: + return "mismatch" + } + return "unknown" +} + +type fsnode struct { + pathname string + isRequiredAncestor bool + myIndex, parentIndex int +} + +func (n fsnode) String() string { + return fmt.Sprintf("[%d:%d %q %t]", n.myIndex, n.parentIndex, n.pathname, n.isRequiredAncestor) +} + +// sortedListOfDirectoryChildrenFromPathname returns a lexicographical sorted +// list of child nodes for the specified directory. +func sortedListOfDirectoryChildrenFromPathname(pathname string) ([]string, error) { + fh, err := os.Open(pathname) + if err != nil { + return nil, errors.Wrap(err, "cannot Open") + } + + childrenNames, err := sortedListOfDirectoryChildrenFromFileHandle(fh) + + // Close the file handle to the open directory without masking possible + // previous error value. + if er := fh.Close(); err == nil { + err = errors.Wrap(er, "cannot Close") + } + + return childrenNames, err +} + +// sortedListOfDirectoryChildrenFromPathname returns a lexicographical sorted +// list of child nodes for the specified open file handle to a directory. This +// function is written once to avoid writing the logic in two places. +func sortedListOfDirectoryChildrenFromFileHandle(fh *os.File) ([]string, error) { + childrenNames, err := fh.Readdirnames(0) // 0: read names of all children + if err != nil { + return nil, errors.Wrap(err, "cannot Readdirnames") + } + if len(childrenNames) > 0 { + sort.Strings(childrenNames) + } + return childrenNames, nil +} + +// VerifyDepTree verifies dependency tree according to expected digest sums, and +// returns an associative array of file system nodes and their respective vendor +// status, in accordance with the provided expected digest sums parameter. +func VerifyDepTree(vendorPathname string, wantSums map[string][]byte) (map[string]VendorStatus, error) { + // NOTE: Ensure top level pathname is a directory + fi, err := os.Stat(vendorPathname) + if err != nil { + return nil, errors.Wrap(err, "cannot Stat") + } + if !fi.IsDir() { + return nil, errors.Errorf("cannot verify non directory: %q", vendorPathname) + } + + vendorPathname = filepath.Clean(vendorPathname) + pathSeparator + prefixLength := len(vendorPathname) + + var otherNode *fsnode + currentNode := &fsnode{pathname: vendorPathname, parentIndex: -1, isRequiredAncestor: true} + queue := []*fsnode{currentNode} // queue of directories that must be inspected + + // In order to identify all file system nodes that are not in the lock file, + // represented by the specified expected sums parameter, and in order to + // only report the top level of a subdirectory of file system nodes, rather + // than every node internal to them, we will create a tree of nodes stored + // in a slice. We do this because we do not know at what level a project + // exists at. Some projects are fewer than and some projects more than the + // typical three layer subdirectory under the vendor root directory. + // + // For a following few examples, assume the below vendor root directory: + // + // github.com/alice/alice1/a1.go + // github.com/alice/alice2/a2.go + // github.com/bob/bob1/b1.go + // github.com/bob/bob2/b2.go + // launghpad.net/nifty/n1.go + // + // 1) If only the `alice1` and `alice2` projects were in the lock file, we'd + // prefer the output to state that `github.com/bob` is `NotInLock`. + // + // 2) If `alice1`, `alice2`, and `bob1` were in the lock file, we'd want to + // report `github.com/bob/bob2` as `NotInLock`. + // + // 3) If none of `alice1`, `alice2`, `bob1`, or `bob2` were in the lock + // file, the entire `github.com` directory would be reported as `NotInLock`. + // + // Each node in our tree has the slice index of its parent node, so once we + // can categorically state a particular directory is required because it is + // in the lock file, we can mark all of its ancestors as also being + // required. Then, when we finish walking the directory hierarchy, any nodes + // which are not required but have a required parent will be marked as + // `NotInLock`. + nodes := []*fsnode{currentNode} + + // Mark directories of expected projects as required. When the respective + // project is found in the vendor root hierarchy, its status will be updated + // to reflect whether its digest is empty, or whether or not it matches the + // expected digest. + status := make(map[string]VendorStatus) + for pathname := range wantSums { + status[pathname] = NotInTree + } + + for len(queue) > 0 { + // pop node from the queue (depth first traversal, reverse lexicographical order) + lq1 := len(queue) - 1 + currentNode, queue = queue[lq1], queue[:lq1] + + // log.Printf("NODE: %s", currentNode) + short := currentNode.pathname[prefixLength:] // chop off the vendor root prefix, including the path separator + if expectedSum, ok := wantSums[short]; ok { + ls := EmptyDigestInLock + if len(expectedSum) > 0 { + ls = NoMismatch + projectSum, err := DigestFromPathname(vendorPathname, short) + if err != nil { + return nil, errors.Wrap(err, "cannot compute dependency hash") + } + if !bytes.Equal(projectSum, expectedSum) { + ls = DigestMismatchInLock + } + } + status[short] = ls + + // NOTE: Mark current nodes and all parents: required. + for pni := currentNode.myIndex; pni != -1; pni = otherNode.parentIndex { + otherNode = nodes[pni] + otherNode.isRequiredAncestor = true + // log.Printf("parent node: %s", otherNode) + } + + continue // do not need to process directory's contents + } + + childrenNames, err := sortedListOfDirectoryChildrenFromPathname(currentNode.pathname) + if err != nil { + return nil, errors.Wrap(err, "cannot get sorted list of directory children") + } + for _, childName := range childrenNames { + switch childName { + case ".", "..", "vendor", ".bzr", ".git", ".hg", ".svn": + // skip + default: + childPathname := filepath.Join(currentNode.pathname, childName) + otherNode = &fsnode{pathname: childPathname, myIndex: len(nodes), parentIndex: currentNode.myIndex} + + fi, err := os.Stat(childPathname) + if err != nil { + return nil, errors.Wrap(err, "cannot Stat") + } + // Skip non-interesting file system nodes + mode := fi.Mode() + if mode&skipModes != 0 || mode&os.ModeSymlink != 0 { + // log.Printf("DEBUG: skipping: %v; %q", mode, currentNode.pathname) + continue + } + + nodes = append(nodes, otherNode) + if fi.IsDir() { + queue = append(queue, otherNode) + } + } + } + + if err != nil { + return nil, err // early termination iff error + } + } + + // Ignoring first node in the list, walk nodes from end to + // beginning. Whenever a node is not required, but its parent is required, + // then that node and all under it ought to be marked as `NotInLock`. + for i := len(nodes) - 1; i > 0; i-- { + currentNode = nodes[i] + if !currentNode.isRequiredAncestor && nodes[currentNode.parentIndex].isRequiredAncestor { + status[currentNode.pathname[prefixLength:]] = NotInLock + } + } + + return status, nil +} diff --git a/internal/gps/pkgtree/digest_test.go b/internal/gps/pkgtree/digest_test.go new file mode 100644 index 0000000000..742e73b125 --- /dev/null +++ b/internal/gps/pkgtree/digest_test.go @@ -0,0 +1,176 @@ +package pkgtree + +import ( + "bytes" + "os" + "path/filepath" + "testing" +) + +// prefix == "." +// symlink referent normalization + +func getTestdataVerifyRoot(t *testing.T) string { + cwd, err := os.Getwd() + if err != nil { + t.Fatal(err) + } + return filepath.Join(cwd, "..", "testdata_digest") +} + +func TestDigestFromPathnameWithFile(t *testing.T) { + relativePathname := "github.com/alice/alice1/a.go" + want := []byte{ + 0x36, 0xe6, 0x36, 0xcf, 0x62, 0x5e, 0x18, 0xcc, + 0x0e, 0x0d, 0xed, 0xad, 0x6d, 0x69, 0x08, 0x80, + 0x54, 0xec, 0x69, 0xde, 0x58, 0xa1, 0x2e, 0x09, + 0x9f, 0x8f, 0x4a, 0xba, 0x44, 0x2f, 0xae, 0xc8, + } + + // NOTE: Create the hash using both an absolute and a relative pathname to + // ensure hash ignores prefix. + + t.Run("AbsolutePrefix", func(t *testing.T) { + prefix := getTestdataVerifyRoot(t) + got, err := DigestFromPathname(prefix, relativePathname) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(got, want) { + t.Errorf("\n(GOT):\n\t%x\n(WNT):\n\t%x", got, want) + } + }) + + t.Run("RelativePrefix", func(t *testing.T) { + prefix := "../testdata_digest" + got, err := DigestFromPathname(prefix, relativePathname) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(got, want) { + t.Errorf("\n(GOT):\n\t%x\n(WNT):\n\t%x", got, want) + } + }) +} + +func TestDigestFromPathnameWithDirectory(t *testing.T) { + relativePathname := "launchpad.net/nifty" + want := []byte{ + 0x9c, 0x09, 0xeb, 0x81, 0xd0, 0x47, 0x4f, 0x68, + 0xb5, 0x50, 0xe0, 0x94, 0x94, 0x9a, 0x41, 0x80, + 0x28, 0xef, 0x63, 0x35, 0x6f, 0x64, 0x92, 0x7e, + 0x6a, 0x43, 0xd7, 0x9d, 0x45, 0xf9, 0x8a, 0xeb, + } + + // NOTE: Create the hash using both an absolute and a relative pathname to + // ensure hash ignores prefix. + + t.Run("AbsolutePrefix", func(t *testing.T) { + prefix := getTestdataVerifyRoot(t) + got, err := DigestFromPathname(prefix, relativePathname) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(got, want) { + t.Errorf("\n(GOT):\n\t%x\n(WNT):\n\t%x", got, want) + } + }) + + t.Run("RelativePrefix", func(t *testing.T) { + prefix := "../testdata_digest" + got, err := DigestFromPathname(prefix, relativePathname) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(got, want) { + t.Errorf("\n(GOT):\n\t%x\n(WNT):\n\t%x", got, want) + } + }) + +} + +func TestVerifyDepTree(t *testing.T) { + vendorRoot := getTestdataVerifyRoot(t) + + status, err := VerifyDepTree(vendorRoot, map[string][]byte{ + // matching result + "github.com/alice/alice1": []byte{ + 0x53, 0xed, 0x38, 0xc0, 0x69, 0x34, 0xf0, 0x76, + 0x26, 0x62, 0xe5, 0xa1, 0xa4, 0xa9, 0xe9, 0x23, + 0x73, 0xf0, 0x02, 0x03, 0xfa, 0x43, 0xd0, 0x7e, + 0x7a, 0x29, 0x89, 0xae, 0x4c, 0x44, 0x50, 0x11, + }, + + // mismatching result + "github.com/alice/alice2": []byte("non matching digest"), + + // not in tree result ought to superseede empty hash result + "github.com/charlie/notInTree": nil, + + // another matching result + "github.com/bob/bob1": []byte{ + 0x75, 0xc0, 0x5a, 0x13, 0xb0, 0xd6, 0x19, 0x5d, + 0x36, 0x26, 0xf4, 0xbc, 0xcb, 0x4d, 0x92, 0x08, + 0x4d, 0x03, 0xe0, 0x70, 0x11, 0x53, 0xcd, 0x5c, + 0x6a, 0x53, 0xc6, 0x31, 0x84, 0x34, 0x4f, 0xf4, + }, + + // empty hash result + "github.com/bob/bob2": nil, + + // matching result at seldomly found directory level + "launchpad.net/nifty": []byte{ + 0x9c, 0x09, 0xeb, 0x81, 0xd0, 0x47, 0x4f, 0x68, 0xb5, 0x50, 0xe0, 0x94, 0x94, 0x9a, 0x41, 0x80, 0x28, 0xef, 0x63, 0x35, 0x6f, 0x64, 0x92, 0x7e, 0x6a, 0x43, 0xd7, 0x9d, 0x45, 0xf9, 0x8a, 0xeb, + }, + }) + if err != nil { + t.Fatal(err) + } + + // NOTE: When true, display the vendor status map. + if false { + for k := range status { + digest, err := DigestFromPathname(vendorRoot, k) + if err != nil { + t.Error(err) + } + t.Logf("%q\t%x", k, digest) + } + } + + if got, want := len(status), 7; got != want { + t.Errorf("\n(GOT): %v; (WNT): %v", got, want) + } + + checkStatus := func(t *testing.T, status map[string]VendorStatus, key string, want VendorStatus) { + got, ok := status[key] + if ok != true { + t.Errorf("Want key: %q", key) + return + } + if got != want { + t.Errorf("Key: %q; (GOT): %v; (WNT): %v", key, got, want) + } + } + + checkStatus(t, status, "github.com/alice/alice1", NoMismatch) + checkStatus(t, status, "github.com/alice/alice2", DigestMismatchInLock) + checkStatus(t, status, "github.com/alice/notInLock", NotInLock) + checkStatus(t, status, "github.com/bob/bob1", NoMismatch) + checkStatus(t, status, "github.com/bob/bob2", EmptyDigestInLock) + checkStatus(t, status, "github.com/charlie/notInTree", NotInTree) + checkStatus(t, status, "launchpad.net/nifty", NoMismatch) +} + +func BenchmarkVerifyDepTree(b *testing.B) { + b.Skip("Eliding benchmark of user's Go source directory") + + prefix := filepath.Join(os.Getenv("GOPATH"), "src") + + for i := 0; i < b.N; i++ { + _, err := VerifyDepTree(prefix, nil) + if err != nil { + b.Fatal(err) + } + } +} diff --git a/internal/gps/pkgtree/pkgtree.go b/internal/gps/pkgtree/pkgtree.go index dd5b539ae3..f14598c286 100644 --- a/internal/gps/pkgtree/pkgtree.go +++ b/internal/gps/pkgtree/pkgtree.go @@ -16,9 +16,6 @@ import ( "strconv" "strings" "unicode" - - "github.com/golang/dep/internal/fs" - "github.com/pkg/errors" ) // Package represents a Go package. It contains a subset of the information @@ -924,202 +921,3 @@ func uniq(a []string) []string { } return a[:i] } - -// LibraryStatus represents one of a handful of possible statuses of a -// particular subdirectory under vendor. -type LibraryStatus uint8 - -const ( - // NotInLock is used when a file system node exists for which there is no - // corresponding dependency in the lock file. - NotInLock LibraryStatus = iota - - // NotInTree is used when a lock file dependency exists for which there is - // no corresponding file system node. - NotInTree - - // NoMismatch is used when the digest for a dependency listed in the - // lockfile matches what is calculated from the file system. - NoMismatch - - // EmptyDigestInLock is used when the digest for a dependency listed in the - // lock file is the empty string. NOTE: Seems like a special case of - // DigestMismatchInLock. - EmptyDigestInLock - - // DigestMismatchInLock is used when the digest for a dependency listed in - // the lock file does not match what is calculated from the file system. - DigestMismatchInLock -) - -// String returns a human-friendly string representing LibraryStatus. -func (ls LibraryStatus) String() string { - switch ls { - case NotInTree: - return "not in tree" - case NotInLock: - return "not in lock" - case NoMismatch: - return "match" - case EmptyDigestInLock: - return "empty digest in lock" - case DigestMismatchInLock: - return "mismatch" - } - return "unknown" -} - -type fsnode struct { - pathname string - isRequiredAncestor bool - myIndex, parentIndex int -} - -func (n fsnode) String() string { - return fmt.Sprintf("[%d:%d %q %t]", n.myIndex, n.parentIndex, n.pathname, n.isRequiredAncestor) -} - -var skipModes = os.ModeDevice | os.ModeNamedPipe | os.ModeSocket | os.ModeCharDevice - -// sortedListOfDirectoryChildren returns a lexicographical sorted list of child -// nodes for the specified directory. -func sortedListOfDirectoryChildren(pathname string) (childrenNames []string, err error) { - fh, er := os.Open(pathname) - if er != nil { - err = errors.Wrap(er, "cannot Open") - return - } - - childrenNames, er = fh.Readdirnames(0) // 0: read names of all children - if er != nil { - err = errors.Wrap(er, "cannot Readdirnames") - // NOTE: Even if there was an error reading the names of the - // directory entries, we still must close file handle for the - // open directory before we return. In this case, we simply skip - // sorting and adding entry names to the work queue beforehand. - childrenNames = nil - } - - if len(childrenNames) > 0 { - sort.Strings(childrenNames) - } - - // NOTE: Close the file handle to the open directory. - if er = fh.Close(); err == nil { - err = errors.Wrap(er, "cannot Close") - } - - return -} - -const pathSeparator = string(filepath.Separator) - -// VerifyDepTree verifies dep tree according to expected digests, and returns an -// associative array of file system nodes and their respective status in -// accordance with the provided expectedSums parameter. -func VerifyDepTree(vendorPathname string, expectedSums map[string]string) (status map[string]LibraryStatus, err error) { - // NOTE: Ensure top level pathname is a directory - fi, er := os.Stat(vendorPathname) - if er != nil { - err = errors.Wrap(er, "cannot Stat") - return - } - if !fi.IsDir() { - err = errors.Errorf("cannot verify non directory: %q", vendorPathname) - return - } - - status = make(map[string]LibraryStatus) - vendorPathname = filepath.Clean(vendorPathname) + pathSeparator - prefixLength := len(vendorPathname) - - var otherNode *fsnode - currentNode := &fsnode{pathname: vendorPathname, parentIndex: -1, isRequiredAncestor: true} - nodes := []*fsnode{currentNode} - queue := []*fsnode{currentNode} - - // NOTE: Mark directories of expected dependencies and their parents as required - for pathname := range expectedSums { - status[pathname] = NotInTree - } - - for len(queue) > 0 { - // pop node from the queue (depth first traversal, reverse lexicographical order) - lq1 := len(queue) - 1 - currentNode, queue = queue[lq1], queue[:lq1] - - // log.Printf("NODE: %s", currentNode) - short := currentNode.pathname[prefixLength:] // chop off the prefix - if expectedSum, ok := expectedSums[short]; ok { - ls := EmptyDigestInLock - if expectedSum != "" { - ls = NoMismatch - actualSum, er := fs.HashFromNode(vendorPathname, short) - if err != nil { - err = errors.Wrap(er, "cannot compute dependency hash") - return - } - if actualSum != expectedSum { - ls = DigestMismatchInLock - } - } - status[short] = ls - - // NOTE: Mark current nodes and all parents: required. - for pni := currentNode.myIndex; pni != -1; pni = otherNode.parentIndex { - otherNode = nodes[pni] - otherNode.isRequiredAncestor = true - // log.Printf("parent node: %s", otherNode) - } - - continue // do not need to process directory's contents - } - - childrenNames, er := sortedListOfDirectoryChildren(currentNode.pathname) - if er != nil { - err = errors.Wrap(er, "cannot get sorted list of directory children") - return - } - for _, childName := range childrenNames { - switch childName { - case ".", "..", "vendor", ".bzr", ".git", ".hg", ".svn": - // skip - default: - childPathname := filepath.Join(currentNode.pathname, childName) - otherNode = &fsnode{pathname: childPathname, myIndex: len(nodes), parentIndex: currentNode.myIndex} - - fi, er := os.Stat(childPathname) - if er != nil { - err = errors.Wrap(er, "cannot Stat") - return - } - // Skip non-interesting file system nodes - mode := fi.Mode() - if mode&skipModes != 0 || mode&os.ModeSymlink != 0 { - // log.Printf("DEBUG: skipping: %v; %q", mode, currentNode.pathname) - continue - } - - nodes = append(nodes, otherNode) - if fi.IsDir() { - queue = append(queue, otherNode) - } - } - } - - if err != nil { - return // early termination iff error - } - } - - // Walk nodes from end to beginning; ignore first node in list - for i := len(nodes) - 1; i > 0; i-- { - currentNode = nodes[i] - // log.Printf("FINAL: %s", currentNode) - if !currentNode.isRequiredAncestor && nodes[currentNode.parentIndex].isRequiredAncestor { - status[currentNode.pathname[prefixLength:]] = NotInLock - } - } - - return -} diff --git a/internal/gps/pkgtree/pkgtree_test.go b/internal/gps/pkgtree/pkgtree_test.go index 581456a4d4..ab134f02e7 100644 --- a/internal/gps/pkgtree/pkgtree_test.go +++ b/internal/gps/pkgtree/pkgtree_test.go @@ -1983,50 +1983,3 @@ func getTestdataRootDir(t *testing.T) string { } return filepath.Join(cwd, "..", "_testdata") } - -func getVerifyTestdataRootDir(t *testing.T) string { - cwd, err := os.Getwd() - if err != nil { - t.Fatal(err) - } - return filepath.Join(cwd, "..", "verify_testdata") -} - -func TestVerifyDepTree(t *testing.T) { - pathname := getVerifyTestdataRootDir(t) - - status, err := VerifyDepTree(pathname, map[string]string{ - "github.com/alice/alice1": "f49816b46140e3e12e1175da2d9fabe18593f0cc576371cdee6b72bbf7a9d87c", // match - "github.com/alice/alice2": "non matching digest", // mismatch - "github.com/charlie/notInTree": "", // not in tree superseedes empty hash - "github.com/bob/bob1": "675dc8ab3389edbd2b42cbf0b15ae2371e0a046f114fe5298ffcb292bff03e30", // match - "github.com/bob/bob2": "", // empty hash - "launchpad.net/nifty": "6bf586f1f8cc58fb0875f9f7d4eadac3601536b670112031cf59a07abd220826", // match at unusual dir level - }) - if err != nil { - t.Fatal(err) - } - - if actual, expected := len(status), 7; actual != expected { - t.Errorf("Actual: %v; Expected: %v", actual, expected) - } - - checkStatus := func(t *testing.T, status map[string]LibraryStatus, key string, expected LibraryStatus) { - actual, ok := status[key] - if ok != true { - t.Errorf("Expected key: %q", key) - return - } - if actual != expected { - t.Errorf("Key: %q; Actual: %v; Expected: %v", key, actual, expected) - } - } - - checkStatus(t, status, "github.com/alice/alice1", NoMismatch) - checkStatus(t, status, "github.com/alice/alice2", DigestMismatchInLock) - checkStatus(t, status, "github.com/alice/notInLock", NotInLock) - checkStatus(t, status, "github.com/bob/bob1", NoMismatch) - checkStatus(t, status, "github.com/bob/bob2", EmptyDigestInLock) - checkStatus(t, status, "github.com/charlie/notInTree", NotInTree) - checkStatus(t, status, "launchpad.net/nifty", NoMismatch) -} diff --git a/internal/gps/verify_testdata/github.com/alice/alice1/a.go b/internal/gps/testdata_digest/github.com/alice/alice1/a.go similarity index 100% rename from internal/gps/verify_testdata/github.com/alice/alice1/a.go rename to internal/gps/testdata_digest/github.com/alice/alice1/a.go diff --git a/internal/gps/verify_testdata/github.com/alice/alice2/a.go b/internal/gps/testdata_digest/github.com/alice/alice2/a.go similarity index 100% rename from internal/gps/verify_testdata/github.com/alice/alice2/a.go rename to internal/gps/testdata_digest/github.com/alice/alice2/a.go diff --git a/internal/gps/verify_testdata/github.com/alice/notInLock/a.go b/internal/gps/testdata_digest/github.com/alice/notInLock/a.go similarity index 100% rename from internal/gps/verify_testdata/github.com/alice/notInLock/a.go rename to internal/gps/testdata_digest/github.com/alice/notInLock/a.go diff --git a/internal/gps/verify_testdata/github.com/bob/bob1/a.go b/internal/gps/testdata_digest/github.com/bob/bob1/a.go similarity index 100% rename from internal/gps/verify_testdata/github.com/bob/bob1/a.go rename to internal/gps/testdata_digest/github.com/bob/bob1/a.go diff --git a/internal/gps/verify_testdata/github.com/bob/bob2/a.go b/internal/gps/testdata_digest/github.com/bob/bob2/a.go similarity index 100% rename from internal/gps/verify_testdata/github.com/bob/bob2/a.go rename to internal/gps/testdata_digest/github.com/bob/bob2/a.go diff --git a/internal/gps/verify_testdata/launchpad.net/nifty/a.go b/internal/gps/testdata_digest/launchpad.net/nifty/a.go similarity index 100% rename from internal/gps/verify_testdata/launchpad.net/nifty/a.go rename to internal/gps/testdata_digest/launchpad.net/nifty/a.go From 66fe4eb356f2a08461634848522e7f632f0457e3 Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Mon, 7 Aug 2017 15:38:22 -0400 Subject: [PATCH 04/36] writes NULL byte after writing to hash * prevents many accidental hash collisions --- internal/gps/pkgtree/digest.go | 41 ++++++++++++++++++----------- internal/gps/pkgtree/digest_test.go | 28 ++++++++------------ 2 files changed, 36 insertions(+), 33 deletions(-) diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index a7422d7e9c..18832bc869 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -4,6 +4,7 @@ import ( "bytes" "crypto/sha256" "fmt" + "hash" "io" "os" "path/filepath" @@ -78,10 +79,7 @@ func DigestFromPathname(prefix, pathname string) ([]byte, error) { // it is their contents. Added benefit is that even empty directories // and symbolic links will effect final hash value. Use // `filepath.ToSlash` to ensure relative pathname is os-agnostic. - // - // NOTE: Throughout this function, we ignore return values from writing - // to the hash, because hash write always returns nil error. - _, _ = h.Write([]byte(filepath.ToSlash(pathname[prefixLength:]))) + writeBytesWithNull(h, []byte(filepath.ToSlash(pathname[prefixLength:]))) if mode&os.ModeSymlink != 0 { referent, err := os.Readlink(pathname) @@ -90,7 +88,7 @@ func DigestFromPathname(prefix, pathname string) ([]byte, error) { } // Write the os-agnostic referent to the hash and proceed to the // next pathname in the queue. - _, _ = h.Write([]byte(filepath.ToSlash(referent))) + writeBytesWithNull(h, []byte(filepath.ToSlash(referent))) continue } @@ -102,20 +100,23 @@ func DigestFromPathname(prefix, pathname string) ([]byte, error) { } if fi.IsDir() { - if childrenNames, err := sortedListOfDirectoryChildrenFromFileHandle(fh); err == nil { - for _, childName := range childrenNames { - switch childName { - case ".", "..", "vendor", ".bzr", ".git", ".hg", ".svn": - // skip - default: - pathnameQueue = append(pathnameQueue, filepath.Join(pathname, childName)) - } + childrenNames, err := sortedListOfDirectoryChildrenFromFileHandle(fh) + if err != nil { + _ = fh.Close() // already have an error reading directory; ignore Close result. + return nil, errors.Wrap(err, "cannot get list of directory children") + } + for _, childName := range childrenNames { + switch childName { + case ".", "..", "vendor", ".bzr", ".git", ".hg", ".svn": + // skip + default: + pathnameQueue = append(pathnameQueue, filepath.Join(pathname, childName)) } } } else { - _, _ = h.Write([]byte(strconv.FormatInt(fi.Size(), 10))) // format file size as base 10 integer - _, err = io.Copy(h, fh) // fast copy of file contents to hash - err = errors.Wrap(err, "cannot Copy") // errors.Wrap only wraps non-nil, so elide guard condition + writeBytesWithNull(h, []byte(strconv.FormatInt(fi.Size(), 10))) // format file size as base 10 integer + _, err = io.Copy(h, fh) // fast copy of file contents to hash + err = errors.Wrap(err, "cannot Copy") // errors.Wrap only wraps non-nil, so elide guard condition } // Close the file handle to the open directory without masking possible @@ -131,6 +132,14 @@ func DigestFromPathname(prefix, pathname string) ([]byte, error) { return h.Sum(nil), nil } +// writeBytesWithNull appends the specified data to the specified hash, followed by +// the NULL byte, in order to make accidental hash collisions less likely. +func writeBytesWithNull(h hash.Hash, data []byte) { + // Ignore return values from writing to the hash, because hash write always + // returns nil error. + _, _ = h.Write(append(data, 0)) +} + // VendorStatus represents one of a handful of possible statuses of a particular // subdirectory under vendor. type VendorStatus uint8 diff --git a/internal/gps/pkgtree/digest_test.go b/internal/gps/pkgtree/digest_test.go index 742e73b125..87c38f877b 100644 --- a/internal/gps/pkgtree/digest_test.go +++ b/internal/gps/pkgtree/digest_test.go @@ -21,10 +21,10 @@ func getTestdataVerifyRoot(t *testing.T) string { func TestDigestFromPathnameWithFile(t *testing.T) { relativePathname := "github.com/alice/alice1/a.go" want := []byte{ - 0x36, 0xe6, 0x36, 0xcf, 0x62, 0x5e, 0x18, 0xcc, - 0x0e, 0x0d, 0xed, 0xad, 0x6d, 0x69, 0x08, 0x80, - 0x54, 0xec, 0x69, 0xde, 0x58, 0xa1, 0x2e, 0x09, - 0x9f, 0x8f, 0x4a, 0xba, 0x44, 0x2f, 0xae, 0xc8, + 0x1e, 0x86, 0x4d, 0x08, 0xeb, 0x0e, 0xa4, 0xb4, + 0x61, 0xba, 0x86, 0xe4, 0x2d, 0x1a, 0x1e, 0x75, + 0xf6, 0xa8, 0x7c, 0x0b, 0x53, 0x4d, 0x77, 0x4b, + 0x7b, 0xeb, 0x14, 0xe1, 0x99, 0x80, 0x0d, 0x04, } // NOTE: Create the hash using both an absolute and a relative pathname to @@ -56,10 +56,10 @@ func TestDigestFromPathnameWithFile(t *testing.T) { func TestDigestFromPathnameWithDirectory(t *testing.T) { relativePathname := "launchpad.net/nifty" want := []byte{ - 0x9c, 0x09, 0xeb, 0x81, 0xd0, 0x47, 0x4f, 0x68, - 0xb5, 0x50, 0xe0, 0x94, 0x94, 0x9a, 0x41, 0x80, - 0x28, 0xef, 0x63, 0x35, 0x6f, 0x64, 0x92, 0x7e, - 0x6a, 0x43, 0xd7, 0x9d, 0x45, 0xf9, 0x8a, 0xeb, + 0x3c, 0xbb, 0x1c, 0x51, 0x76, 0x88, 0x88, 0x67, + 0x5d, 0x2b, 0x32, 0x5f, 0x44, 0xc0, 0x6b, 0x5d, + 0x72, 0x24, 0x99, 0x35, 0x3b, 0x94, 0x7e, 0xae, + 0x6b, 0xe4, 0xc3, 0xce, 0xd1, 0x2c, 0x30, 0xf0, } // NOTE: Create the hash using both an absolute and a relative pathname to @@ -95,10 +95,7 @@ func TestVerifyDepTree(t *testing.T) { status, err := VerifyDepTree(vendorRoot, map[string][]byte{ // matching result "github.com/alice/alice1": []byte{ - 0x53, 0xed, 0x38, 0xc0, 0x69, 0x34, 0xf0, 0x76, - 0x26, 0x62, 0xe5, 0xa1, 0xa4, 0xa9, 0xe9, 0x23, - 0x73, 0xf0, 0x02, 0x03, 0xfa, 0x43, 0xd0, 0x7e, - 0x7a, 0x29, 0x89, 0xae, 0x4c, 0x44, 0x50, 0x11, + 0x32, 0x6c, 0xd0, 0x36, 0xbb, 0xd4, 0xd0, 0x06, 0x34, 0x41, 0xda, 0x80, 0x2f, 0xe3, 0x31, 0xa0, 0x2b, 0xe7, 0xd6, 0x5d, 0x4c, 0xea, 0xf0, 0xe7, 0xf6, 0x46, 0x23, 0x6c, 0xa9, 0x02, 0x3a, 0x35, }, // mismatching result @@ -109,10 +106,7 @@ func TestVerifyDepTree(t *testing.T) { // another matching result "github.com/bob/bob1": []byte{ - 0x75, 0xc0, 0x5a, 0x13, 0xb0, 0xd6, 0x19, 0x5d, - 0x36, 0x26, 0xf4, 0xbc, 0xcb, 0x4d, 0x92, 0x08, - 0x4d, 0x03, 0xe0, 0x70, 0x11, 0x53, 0xcd, 0x5c, - 0x6a, 0x53, 0xc6, 0x31, 0x84, 0x34, 0x4f, 0xf4, + 0x2b, 0xcf, 0xe2, 0xc4, 0x67, 0x77, 0xf2, 0x7d, 0x30, 0x7e, 0x1e, 0xba, 0xc3, 0x90, 0xdc, 0x2a, 0xdd, 0x3a, 0x91, 0x98, 0x09, 0xcd, 0x50, 0x0d, 0x7e, 0x51, 0x99, 0xf4, 0x96, 0x59, 0x1d, 0xd2, }, // empty hash result @@ -120,7 +114,7 @@ func TestVerifyDepTree(t *testing.T) { // matching result at seldomly found directory level "launchpad.net/nifty": []byte{ - 0x9c, 0x09, 0xeb, 0x81, 0xd0, 0x47, 0x4f, 0x68, 0xb5, 0x50, 0xe0, 0x94, 0x94, 0x9a, 0x41, 0x80, 0x28, 0xef, 0x63, 0x35, 0x6f, 0x64, 0x92, 0x7e, 0x6a, 0x43, 0xd7, 0x9d, 0x45, 0xf9, 0x8a, 0xeb, + 0x3c, 0xbb, 0x1c, 0x51, 0x76, 0x88, 0x88, 0x67, 0x5d, 0x2b, 0x32, 0x5f, 0x44, 0xc0, 0x6b, 0x5d, 0x72, 0x24, 0x99, 0x35, 0x3b, 0x94, 0x7e, 0xae, 0x6b, 0xe4, 0xc3, 0xce, 0xd1, 0x2c, 0x30, 0xf0, }, }) if err != nil { From 5c97180d642123edb19d91f9d8145c54799d6b1f Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Mon, 7 Aug 2017 16:52:32 -0400 Subject: [PATCH 05/36] added copywrite notification for dummy files --- internal/gps/pkgtree/digest_test.go | 40 +++++-------------- .../github.com/alice/alice1/a.go | 4 ++ .../github.com/alice/alice2/a.go | 4 ++ .../github.com/alice/notInLock/a.go | 4 ++ .../testdata_digest/github.com/bob/bob1/a.go | 4 ++ .../testdata_digest/github.com/bob/bob2/a.go | 4 ++ .../testdata_digest/launchpad.net/nifty/a.go | 4 ++ 7 files changed, 34 insertions(+), 30 deletions(-) diff --git a/internal/gps/pkgtree/digest_test.go b/internal/gps/pkgtree/digest_test.go index 87c38f877b..cfe4647acf 100644 --- a/internal/gps/pkgtree/digest_test.go +++ b/internal/gps/pkgtree/digest_test.go @@ -7,9 +7,6 @@ import ( "testing" ) -// prefix == "." -// symlink referent normalization - func getTestdataVerifyRoot(t *testing.T) string { cwd, err := os.Getwd() if err != nil { @@ -20,12 +17,7 @@ func getTestdataVerifyRoot(t *testing.T) string { func TestDigestFromPathnameWithFile(t *testing.T) { relativePathname := "github.com/alice/alice1/a.go" - want := []byte{ - 0x1e, 0x86, 0x4d, 0x08, 0xeb, 0x0e, 0xa4, 0xb4, - 0x61, 0xba, 0x86, 0xe4, 0x2d, 0x1a, 0x1e, 0x75, - 0xf6, 0xa8, 0x7c, 0x0b, 0x53, 0x4d, 0x77, 0x4b, - 0x7b, 0xeb, 0x14, 0xe1, 0x99, 0x80, 0x0d, 0x04, - } + want := []byte{0xc6, 0xd1, 0x58, 0x5f, 0x86, 0x60, 0xea, 0x3d, 0x44, 0xce, 0x51, 0x29, 0x92, 0xbc, 0xd9, 0xbe, 0x67, 0x7d, 0xe9, 0xd8, 0xc6, 0xb0, 0x6b, 0x21, 0x98, 0x9e, 0x24, 0xb0, 0x34, 0xd1, 0xe5, 0x3c} // NOTE: Create the hash using both an absolute and a relative pathname to // ensure hash ignores prefix. @@ -37,7 +29,7 @@ func TestDigestFromPathnameWithFile(t *testing.T) { t.Fatal(err) } if !bytes.Equal(got, want) { - t.Errorf("\n(GOT):\n\t%x\n(WNT):\n\t%x", got, want) + t.Errorf("\n(GOT):\n\t%#v\n(WNT):\n\t%#v", got, want) } }) @@ -48,19 +40,14 @@ func TestDigestFromPathnameWithFile(t *testing.T) { t.Fatal(err) } if !bytes.Equal(got, want) { - t.Errorf("\n(GOT):\n\t%x\n(WNT):\n\t%x", got, want) + t.Errorf("\n(GOT):\n\t%#v\n(WNT):\n\t%#v", got, want) } }) } func TestDigestFromPathnameWithDirectory(t *testing.T) { relativePathname := "launchpad.net/nifty" - want := []byte{ - 0x3c, 0xbb, 0x1c, 0x51, 0x76, 0x88, 0x88, 0x67, - 0x5d, 0x2b, 0x32, 0x5f, 0x44, 0xc0, 0x6b, 0x5d, - 0x72, 0x24, 0x99, 0x35, 0x3b, 0x94, 0x7e, 0xae, - 0x6b, 0xe4, 0xc3, 0xce, 0xd1, 0x2c, 0x30, 0xf0, - } + want := []byte{0xf9, 0xd5, 0xc4, 0x87, 0x54, 0x96, 0xd1, 0x90, 0x93, 0xf0, 0xbb, 0x6b, 0x9c, 0x13, 0x5c, 0x6b, 0xd1, 0x9b, 0xcc, 0xe2, 0x30, 0x91, 0xd4, 0xc4, 0x85, 0x8e, 0xa5, 0xb0, 0x8d, 0x99, 0xf0, 0xe2} // NOTE: Create the hash using both an absolute and a relative pathname to // ensure hash ignores prefix. @@ -72,7 +59,7 @@ func TestDigestFromPathnameWithDirectory(t *testing.T) { t.Fatal(err) } if !bytes.Equal(got, want) { - t.Errorf("\n(GOT):\n\t%x\n(WNT):\n\t%x", got, want) + t.Errorf("\n(GOT):\n\t%#v\n(WNT):\n\t%#v", got, want) } }) @@ -83,10 +70,9 @@ func TestDigestFromPathnameWithDirectory(t *testing.T) { t.Fatal(err) } if !bytes.Equal(got, want) { - t.Errorf("\n(GOT):\n\t%x\n(WNT):\n\t%x", got, want) + t.Errorf("\n(GOT):\n\t%#v\n(WNT):\n\t%#v", got, want) } }) - } func TestVerifyDepTree(t *testing.T) { @@ -94,9 +80,7 @@ func TestVerifyDepTree(t *testing.T) { status, err := VerifyDepTree(vendorRoot, map[string][]byte{ // matching result - "github.com/alice/alice1": []byte{ - 0x32, 0x6c, 0xd0, 0x36, 0xbb, 0xd4, 0xd0, 0x06, 0x34, 0x41, 0xda, 0x80, 0x2f, 0xe3, 0x31, 0xa0, 0x2b, 0xe7, 0xd6, 0x5d, 0x4c, 0xea, 0xf0, 0xe7, 0xf6, 0x46, 0x23, 0x6c, 0xa9, 0x02, 0x3a, 0x35, - }, + "github.com/alice/alice1": []byte{0x53, 0x2c, 0x1e, 0xda, 0x88, 0x71, 0xa0, 0x12, 0x81, 0xc1, 0xc8, 0xc3, 0xfb, 0xc3, 0x99, 0x9a, 0xe2, 0x95, 0xb1, 0x56, 0xe4, 0xfa, 0x9d, 0x88, 0x16, 0xc8, 0x69, 0x90, 0x74, 0x0, 0xc2, 0xb}, // mismatching result "github.com/alice/alice2": []byte("non matching digest"), @@ -105,17 +89,13 @@ func TestVerifyDepTree(t *testing.T) { "github.com/charlie/notInTree": nil, // another matching result - "github.com/bob/bob1": []byte{ - 0x2b, 0xcf, 0xe2, 0xc4, 0x67, 0x77, 0xf2, 0x7d, 0x30, 0x7e, 0x1e, 0xba, 0xc3, 0x90, 0xdc, 0x2a, 0xdd, 0x3a, 0x91, 0x98, 0x09, 0xcd, 0x50, 0x0d, 0x7e, 0x51, 0x99, 0xf4, 0x96, 0x59, 0x1d, 0xd2, - }, + "github.com/bob/bob1": []byte{0x4c, 0x86, 0xa, 0x58, 0x43, 0xc5, 0xbe, 0xa6, 0xe4, 0xe4, 0xbc, 0xa6, 0xbb, 0x86, 0x57, 0x7a, 0x9e, 0x55, 0x95, 0x1b, 0x77, 0x90, 0x94, 0xe0, 0x9, 0x40, 0xc8, 0x4b, 0x9e, 0xb1, 0xed, 0x4b}, // empty hash result "github.com/bob/bob2": nil, // matching result at seldomly found directory level - "launchpad.net/nifty": []byte{ - 0x3c, 0xbb, 0x1c, 0x51, 0x76, 0x88, 0x88, 0x67, 0x5d, 0x2b, 0x32, 0x5f, 0x44, 0xc0, 0x6b, 0x5d, 0x72, 0x24, 0x99, 0x35, 0x3b, 0x94, 0x7e, 0xae, 0x6b, 0xe4, 0xc3, 0xce, 0xd1, 0x2c, 0x30, 0xf0, - }, + "launchpad.net/nifty": []byte{0xf9, 0xd5, 0xc4, 0x87, 0x54, 0x96, 0xd1, 0x90, 0x93, 0xf0, 0xbb, 0x6b, 0x9c, 0x13, 0x5c, 0x6b, 0xd1, 0x9b, 0xcc, 0xe2, 0x30, 0x91, 0xd4, 0xc4, 0x85, 0x8e, 0xa5, 0xb0, 0x8d, 0x99, 0xf0, 0xe2}, }) if err != nil { t.Fatal(err) @@ -128,7 +108,7 @@ func TestVerifyDepTree(t *testing.T) { if err != nil { t.Error(err) } - t.Logf("%q\t%x", k, digest) + t.Logf("%q\t%#v", k, digest) } } diff --git a/internal/gps/testdata_digest/github.com/alice/alice1/a.go b/internal/gps/testdata_digest/github.com/alice/alice1/a.go index 62cd11d48d..d3858617d4 100644 --- a/internal/gps/testdata_digest/github.com/alice/alice1/a.go +++ b/internal/gps/testdata_digest/github.com/alice/alice1/a.go @@ -1 +1,5 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + package alice1 diff --git a/internal/gps/testdata_digest/github.com/alice/alice2/a.go b/internal/gps/testdata_digest/github.com/alice/alice2/a.go index 5bc90c5bd4..e745cdae13 100644 --- a/internal/gps/testdata_digest/github.com/alice/alice2/a.go +++ b/internal/gps/testdata_digest/github.com/alice/alice2/a.go @@ -1 +1,5 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + package alice2 diff --git a/internal/gps/testdata_digest/github.com/alice/notInLock/a.go b/internal/gps/testdata_digest/github.com/alice/notInLock/a.go index be58629081..5b2914de73 100644 --- a/internal/gps/testdata_digest/github.com/alice/notInLock/a.go +++ b/internal/gps/testdata_digest/github.com/alice/notInLock/a.go @@ -1 +1,5 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + package notInLock diff --git a/internal/gps/testdata_digest/github.com/bob/bob1/a.go b/internal/gps/testdata_digest/github.com/bob/bob1/a.go index 4c81e12753..3be2518c63 100644 --- a/internal/gps/testdata_digest/github.com/bob/bob1/a.go +++ b/internal/gps/testdata_digest/github.com/bob/bob1/a.go @@ -1 +1,5 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + package bob1 diff --git a/internal/gps/testdata_digest/github.com/bob/bob2/a.go b/internal/gps/testdata_digest/github.com/bob/bob2/a.go index 8858e1cb7a..d3cc2c512d 100644 --- a/internal/gps/testdata_digest/github.com/bob/bob2/a.go +++ b/internal/gps/testdata_digest/github.com/bob/bob2/a.go @@ -1 +1,5 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + package bob2 diff --git a/internal/gps/testdata_digest/launchpad.net/nifty/a.go b/internal/gps/testdata_digest/launchpad.net/nifty/a.go index 8fff589d90..edaa02df95 100644 --- a/internal/gps/testdata_digest/launchpad.net/nifty/a.go +++ b/internal/gps/testdata_digest/launchpad.net/nifty/a.go @@ -1 +1,5 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + package nifty From 546d65f48868d6c97b805072df52db0f8046bb7f Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Mon, 7 Aug 2017 17:11:24 -0400 Subject: [PATCH 06/36] test scaffolding dirs and files have meaningful names --- internal/gps/pkgtree/digest_test.go | 57 +++++++++---------- .../a.go => github.com/alice/match/match.go} | 2 +- .../{alice1/a.go => mismatch/mismatch.go} | 2 +- .../alice/notInLock/{a.go => notInLock.go} | 0 .../a.go => bob/emptyDigest/emptyDigest.go} | 2 +- .../bob/{bob1/a.go => match/match.go} | 2 +- .../a.go => launchpad.net/match/match.go} | 2 +- 7 files changed, 31 insertions(+), 36 deletions(-) rename internal/gps/testdata_digest/{launchpad.net/nifty/a.go => github.com/alice/match/match.go} (91%) rename internal/gps/testdata_digest/github.com/alice/{alice1/a.go => mismatch/mismatch.go} (90%) rename internal/gps/testdata_digest/github.com/alice/notInLock/{a.go => notInLock.go} (100%) rename internal/gps/testdata_digest/github.com/{alice/alice2/a.go => bob/emptyDigest/emptyDigest.go} (88%) rename internal/gps/testdata_digest/github.com/bob/{bob1/a.go => match/match.go} (91%) rename internal/gps/testdata_digest/{github.com/bob/bob2/a.go => launchpad.net/match/match.go} (91%) diff --git a/internal/gps/pkgtree/digest_test.go b/internal/gps/pkgtree/digest_test.go index cfe4647acf..40b9475856 100644 --- a/internal/gps/pkgtree/digest_test.go +++ b/internal/gps/pkgtree/digest_test.go @@ -16,8 +16,8 @@ func getTestdataVerifyRoot(t *testing.T) string { } func TestDigestFromPathnameWithFile(t *testing.T) { - relativePathname := "github.com/alice/alice1/a.go" - want := []byte{0xc6, 0xd1, 0x58, 0x5f, 0x86, 0x60, 0xea, 0x3d, 0x44, 0xce, 0x51, 0x29, 0x92, 0xbc, 0xd9, 0xbe, 0x67, 0x7d, 0xe9, 0xd8, 0xc6, 0xb0, 0x6b, 0x21, 0x98, 0x9e, 0x24, 0xb0, 0x34, 0xd1, 0xe5, 0x3c} + relativePathname := "github.com/alice/match/match.go" + want := []byte{0xef, 0x1e, 0x30, 0x16, 0xc4, 0xb1, 0xdb, 0xd9, 0x38, 0x65, 0xec, 0x90, 0xca, 0xad, 0x89, 0x52, 0xf9, 0x56, 0xb5, 0xa7, 0xfd, 0x83, 0x8e, 0xd7, 0x21, 0x6b, 0xbd, 0x63, 0x9a, 0xde, 0xc3, 0xeb} // NOTE: Create the hash using both an absolute and a relative pathname to // ensure hash ignores prefix. @@ -46,8 +46,8 @@ func TestDigestFromPathnameWithFile(t *testing.T) { } func TestDigestFromPathnameWithDirectory(t *testing.T) { - relativePathname := "launchpad.net/nifty" - want := []byte{0xf9, 0xd5, 0xc4, 0x87, 0x54, 0x96, 0xd1, 0x90, 0x93, 0xf0, 0xbb, 0x6b, 0x9c, 0x13, 0x5c, 0x6b, 0xd1, 0x9b, 0xcc, 0xe2, 0x30, 0x91, 0xd4, 0xc4, 0x85, 0x8e, 0xa5, 0xb0, 0x8d, 0x99, 0xf0, 0xe2} + relativePathname := "launchpad.net/match" + want := []byte{0x8, 0xe5, 0x5b, 0xb6, 0xb8, 0x44, 0x91, 0x80, 0x46, 0xca, 0xc6, 0x2e, 0x44, 0x7b, 0x42, 0xd4, 0xfb, 0x2d, 0xfd, 0x4c, 0xd9, 0xc9, 0xd, 0x38, 0x23, 0xed, 0xa5, 0xf4, 0xbc, 0x69, 0xd4, 0x8b} // NOTE: Create the hash using both an absolute and a relative pathname to // ensure hash ignores prefix. @@ -78,37 +78,32 @@ func TestDigestFromPathnameWithDirectory(t *testing.T) { func TestVerifyDepTree(t *testing.T) { vendorRoot := getTestdataVerifyRoot(t) - status, err := VerifyDepTree(vendorRoot, map[string][]byte{ - // matching result - "github.com/alice/alice1": []byte{0x53, 0x2c, 0x1e, 0xda, 0x88, 0x71, 0xa0, 0x12, 0x81, 0xc1, 0xc8, 0xc3, 0xfb, 0xc3, 0x99, 0x9a, 0xe2, 0x95, 0xb1, 0x56, 0xe4, 0xfa, 0x9d, 0x88, 0x16, 0xc8, 0x69, 0x90, 0x74, 0x0, 0xc2, 0xb}, - - // mismatching result - "github.com/alice/alice2": []byte("non matching digest"), - - // not in tree result ought to superseede empty hash result - "github.com/charlie/notInTree": nil, - - // another matching result - "github.com/bob/bob1": []byte{0x4c, 0x86, 0xa, 0x58, 0x43, 0xc5, 0xbe, 0xa6, 0xe4, 0xe4, 0xbc, 0xa6, 0xbb, 0x86, 0x57, 0x7a, 0x9e, 0x55, 0x95, 0x1b, 0x77, 0x90, 0x94, 0xe0, 0x9, 0x40, 0xc8, 0x4b, 0x9e, 0xb1, 0xed, 0x4b}, - - // empty hash result - "github.com/bob/bob2": nil, - + wantSums := map[string][]byte{ + "github.com/alice/match": []byte{0x13, 0x22, 0x4c, 0xd5, 0x5b, 0x3e, 0x29, 0xe5, 0x40, 0x23, 0x56, 0xc7, 0xc1, 0x50, 0x4, 0x1, 0x60, 0x46, 0xd4, 0x14, 0x9b, 0x63, 0x88, 0x37, 0xeb, 0x53, 0xfc, 0xe7, 0x63, 0x53, 0xf3, 0xf6}, + "github.com/alice/mismatch": []byte("some non-matching digest"), + "github.com/bob/emptyDigest": nil, // empty hash result + "github.com/charlie/notInTree": nil, // not in tree result ought to superseede empty digest result + "github.com/bob/match": []byte{0x73, 0xf5, 0xdc, 0xad, 0x39, 0x31, 0x25, 0x78, 0x9d, 0x2c, 0xbe, 0xd0, 0x9e, 0x6, 0x40, 0x64, 0xcd, 0x32, 0x9c, 0x28, 0xf4, 0xa0, 0xd2, 0xa4, 0x46, 0xca, 0x1f, 0x13, 0xfb, 0xaf, 0x53, 0x96}, // matching result at seldomly found directory level - "launchpad.net/nifty": []byte{0xf9, 0xd5, 0xc4, 0x87, 0x54, 0x96, 0xd1, 0x90, 0x93, 0xf0, 0xbb, 0x6b, 0x9c, 0x13, 0x5c, 0x6b, 0xd1, 0x9b, 0xcc, 0xe2, 0x30, 0x91, 0xd4, 0xc4, 0x85, 0x8e, 0xa5, 0xb0, 0x8d, 0x99, 0xf0, 0xe2}, - }) + "launchpad.net/match": []byte{0x8, 0xe5, 0x5b, 0xb6, 0xb8, 0x44, 0x91, 0x80, 0x46, 0xca, 0xc6, 0x2e, 0x44, 0x7b, 0x42, 0xd4, 0xfb, 0x2d, 0xfd, 0x4c, 0xd9, 0xc9, 0xd, 0x38, 0x23, 0xed, 0xa5, 0xf4, 0xbc, 0x69, 0xd4, 0x8b}, + } + + status, err := VerifyDepTree(vendorRoot, wantSums) if err != nil { t.Fatal(err) } - // NOTE: When true, display the vendor status map. + // NOTE: When true, display the digests of the directories specified by the + // digest keys. if false { - for k := range status { - digest, err := DigestFromPathname(vendorRoot, k) + for k, want := range wantSums { + got, err := DigestFromPathname(vendorRoot, k) if err != nil { t.Error(err) } - t.Logf("%q\t%#v", k, digest) + if !bytes.Equal(got, want) { + t.Errorf("%q\n(GOT):\n\t%#v\n(WNT):\n\t%#v", k, got, want) + } } } @@ -127,13 +122,13 @@ func TestVerifyDepTree(t *testing.T) { } } - checkStatus(t, status, "github.com/alice/alice1", NoMismatch) - checkStatus(t, status, "github.com/alice/alice2", DigestMismatchInLock) + checkStatus(t, status, "github.com/alice/match", NoMismatch) + checkStatus(t, status, "github.com/alice/mismatch", DigestMismatchInLock) checkStatus(t, status, "github.com/alice/notInLock", NotInLock) - checkStatus(t, status, "github.com/bob/bob1", NoMismatch) - checkStatus(t, status, "github.com/bob/bob2", EmptyDigestInLock) + checkStatus(t, status, "github.com/bob/match", NoMismatch) + checkStatus(t, status, "github.com/bob/emptyDigest", EmptyDigestInLock) checkStatus(t, status, "github.com/charlie/notInTree", NotInTree) - checkStatus(t, status, "launchpad.net/nifty", NoMismatch) + checkStatus(t, status, "launchpad.net/match", NoMismatch) } func BenchmarkVerifyDepTree(b *testing.B) { diff --git a/internal/gps/testdata_digest/launchpad.net/nifty/a.go b/internal/gps/testdata_digest/github.com/alice/match/match.go similarity index 91% rename from internal/gps/testdata_digest/launchpad.net/nifty/a.go rename to internal/gps/testdata_digest/github.com/alice/match/match.go index edaa02df95..ab5f875285 100644 --- a/internal/gps/testdata_digest/launchpad.net/nifty/a.go +++ b/internal/gps/testdata_digest/github.com/alice/match/match.go @@ -2,4 +2,4 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package nifty +package match diff --git a/internal/gps/testdata_digest/github.com/alice/alice1/a.go b/internal/gps/testdata_digest/github.com/alice/mismatch/mismatch.go similarity index 90% rename from internal/gps/testdata_digest/github.com/alice/alice1/a.go rename to internal/gps/testdata_digest/github.com/alice/mismatch/mismatch.go index d3858617d4..1ace4e7683 100644 --- a/internal/gps/testdata_digest/github.com/alice/alice1/a.go +++ b/internal/gps/testdata_digest/github.com/alice/mismatch/mismatch.go @@ -2,4 +2,4 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package alice1 +package mismatch diff --git a/internal/gps/testdata_digest/github.com/alice/notInLock/a.go b/internal/gps/testdata_digest/github.com/alice/notInLock/notInLock.go similarity index 100% rename from internal/gps/testdata_digest/github.com/alice/notInLock/a.go rename to internal/gps/testdata_digest/github.com/alice/notInLock/notInLock.go diff --git a/internal/gps/testdata_digest/github.com/alice/alice2/a.go b/internal/gps/testdata_digest/github.com/bob/emptyDigest/emptyDigest.go similarity index 88% rename from internal/gps/testdata_digest/github.com/alice/alice2/a.go rename to internal/gps/testdata_digest/github.com/bob/emptyDigest/emptyDigest.go index e745cdae13..2d6067bc8e 100644 --- a/internal/gps/testdata_digest/github.com/alice/alice2/a.go +++ b/internal/gps/testdata_digest/github.com/bob/emptyDigest/emptyDigest.go @@ -2,4 +2,4 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package alice2 +package emptyDigest diff --git a/internal/gps/testdata_digest/github.com/bob/bob1/a.go b/internal/gps/testdata_digest/github.com/bob/match/match.go similarity index 91% rename from internal/gps/testdata_digest/github.com/bob/bob1/a.go rename to internal/gps/testdata_digest/github.com/bob/match/match.go index 3be2518c63..ab5f875285 100644 --- a/internal/gps/testdata_digest/github.com/bob/bob1/a.go +++ b/internal/gps/testdata_digest/github.com/bob/match/match.go @@ -2,4 +2,4 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package bob1 +package match diff --git a/internal/gps/testdata_digest/github.com/bob/bob2/a.go b/internal/gps/testdata_digest/launchpad.net/match/match.go similarity index 91% rename from internal/gps/testdata_digest/github.com/bob/bob2/a.go rename to internal/gps/testdata_digest/launchpad.net/match/match.go index d3cc2c512d..ab5f875285 100644 --- a/internal/gps/testdata_digest/github.com/bob/bob2/a.go +++ b/internal/gps/testdata_digest/launchpad.net/match/match.go @@ -2,4 +2,4 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package bob2 +package match From f1d5d8503f0917aff63e32cdd9103f87fb6cac08 Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Mon, 7 Aug 2017 17:43:44 -0400 Subject: [PATCH 07/36] use filepath function to locate testdata_digest directory --- internal/gps/pkgtree/digest_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/gps/pkgtree/digest_test.go b/internal/gps/pkgtree/digest_test.go index 40b9475856..e554f290bf 100644 --- a/internal/gps/pkgtree/digest_test.go +++ b/internal/gps/pkgtree/digest_test.go @@ -12,7 +12,8 @@ func getTestdataVerifyRoot(t *testing.T) string { if err != nil { t.Fatal(err) } - return filepath.Join(cwd, "..", "testdata_digest") + parent, _ := filepath.Split(cwd) + return filepath.Join(parent, "testdata_digest") } func TestDigestFromPathnameWithFile(t *testing.T) { From b6d16e108911e501077f0fd99384511218a9413b Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Mon, 7 Aug 2017 23:32:19 -0400 Subject: [PATCH 08/36] convert CRLF to LF when hashing file contents --- internal/gps/pkgtree/digest.go | 107 +++++++++++++++++++++++++++- internal/gps/pkgtree/digest_test.go | 59 +++++++++++++++ 2 files changed, 165 insertions(+), 1 deletion(-) diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index 18832bc869..616cfccca5 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -115,7 +115,7 @@ func DigestFromPathname(prefix, pathname string) ([]byte, error) { } } else { writeBytesWithNull(h, []byte(strconv.FormatInt(fi.Size(), 10))) // format file size as base 10 integer - _, err = io.Copy(h, fh) // fast copy of file contents to hash + _, err = io.Copy(h, &lineEndingWriterTo{fh}) // fast copy of file contents to hash err = errors.Wrap(err, "cannot Copy") // errors.Wrap only wraps non-nil, so elide guard condition } @@ -132,6 +132,111 @@ func DigestFromPathname(prefix, pathname string) ([]byte, error) { return h.Sum(nil), nil } +// lineEndingWriterTo +type lineEndingWriterTo struct { + src io.Reader +} + +func (liw *lineEndingWriterTo) Read(buf []byte) (int, error) { + return liw.src.Read(buf) +} + +// WriteTo writes data to w until there's no more data to write or +// when an error occurs. The return value n is the number of bytes +// written. Any error encountered during the write is also returned. +// +// The Copy function uses WriterTo if available. +func (liw *lineEndingWriterTo) WriteTo(dst io.Writer) (int64, error) { + // Some VCS systems automatically convert LF line endings to CRLF on some OS + // platforms, such as Windows. This would cause the a file checked out on + // Windows to have a different digest than the same file on macOS or + // Linux. In order to ensure file contents normalize the same, we need to + // modify the file's contents when we compute its hash. + // + // Keep reading from embedded io.Reader and writing to io.Writer until EOF. + // For each blob, when read CRLF, convert to LF. Ensure handle case when CR + // read in one buffer and LF read in another. Another option is just filter + // out all CR bytes, but unneeded removal of CR bytes increases our surface + // area for accidental hash collisions. Therefore, we will only convert CRLF + // to LF. + + // Create a buffer to hold file contents; use same size as `io.Copy` + // creates. + buf := make([]byte, 32*1024) + + var err error + var finalCR bool + var written int64 + + for { + nr, er := liw.src.Read(buf) + if nr > 0 { + // When previous buffer ended in CR and this buffer does not start + // in LF, we need to emit a CR byte. + if finalCR && buf[0] != '\n' { + // We owe the destination a CR byte because we held onto it from + // last loop. + nw, ew := dst.Write([]byte{'\r'}) + if nw > 0 { + written += int64(nw) + } + if ew != nil { + err = ew + break + } + } + + // Remove any CRLF sequences in buf, using `bytes.Index` because + // that takes advantage of machine opcodes for searching for byte + // patterns on many platforms. + for { + index := bytes.Index(buf, []byte("\r\n")) + if index == -1 { + break + } + // Want to skip index byte, where the CR is. + copy(buf[index:nr-1], buf[index+1:nr]) + nr-- + } + + // When final byte is CR, do not emit until we ensure first byte on + // next read is LF. + if finalCR = buf[nr-1] == '\r'; finalCR { + nr-- // pretend byte was never read from source + } + + nw, ew := dst.Write(buf[:nr]) + if nw > 0 { + written += int64(nw) + } + if ew != nil { + err = ew + break + } + if nr != nw { + err = io.ErrShortWrite + break + } + } + if er != nil { + if er != io.EOF { + err = er + } + break + } + } + if finalCR { + nw, ew := dst.Write([]byte{'\r'}) + if nw > 0 { + written += int64(nw) + } + if ew != nil { + err = ew + } + } + return written, err +} + // writeBytesWithNull appends the specified data to the specified hash, followed by // the NULL byte, in order to make accidental hash collisions less likely. func writeBytesWithNull(h hash.Hash, data []byte) { diff --git a/internal/gps/pkgtree/digest_test.go b/internal/gps/pkgtree/digest_test.go index e554f290bf..cbe4e545c0 100644 --- a/internal/gps/pkgtree/digest_test.go +++ b/internal/gps/pkgtree/digest_test.go @@ -2,6 +2,7 @@ package pkgtree import ( "bytes" + "io" "os" "path/filepath" "testing" @@ -144,3 +145,61 @@ func BenchmarkVerifyDepTree(b *testing.B) { } } } + +type crossBuffer struct { + readCount int +} + +func (cb *crossBuffer) Read(buf []byte) (int, error) { + const first = "this is the result\r\nfrom the first read\r" + const second = "\nthis is the result\r\nfrom the second read\r" + + cb.readCount++ + + switch cb.readCount { + case 1: + return copy(buf, first), nil + case 2: + return copy(buf, second), nil + default: + return 0, io.EOF + } +} + +func TestLineEndingWriteToCrossBufferCRLF(t *testing.T) { + src := &lineEndingWriterTo{new(crossBuffer)} + dst := new(bytes.Buffer) + + // the final CR byte ought to be conveyed to destination + const output = "this is the result\nfrom the first read\nthis is the result\nfrom the second read\r" + + n, err := io.Copy(dst, src) + if got, want := err, error(nil); got != want { + t.Errorf("(GOT): %v; (WNT): %v", got, want) + } + if got, want := n, int64(len(output)); got != want { + t.Errorf("(GOT): %v; (WNT): %v", got, want) + } + if got, want := dst.Bytes(), []byte(output); !bytes.Equal(got, want) { + t.Errorf("(GOT): %#q; (WNT): %#q", got, want) + } +} + +func TestLineEndingWriteTo(t *testing.T) { + const input = "now is the time\r\nfor all good engineers\r\nto improve their test coverage!\r\n" + const output = "now is the time\nfor all good engineers\nto improve their test coverage!\n" + + src := &lineEndingWriterTo{bytes.NewBufferString(input)} + dst := new(bytes.Buffer) + + n, err := io.Copy(dst, src) + if got, want := err, error(nil); got != want { + t.Errorf("(GOT): %v; (WNT): %v", got, want) + } + if got, want := n, int64(len(output)); got != want { + t.Errorf("(GOT): %v; (WNT): %v", got, want) + } + if got, want := dst.Bytes(), []byte(output); !bytes.Equal(got, want) { + t.Errorf("(GOT): %#q; (WNT): %#q", got, want) + } +} From 9a7209dcad0b46437b21694a58b66ad60000b3c0 Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Mon, 7 Aug 2017 23:51:27 -0400 Subject: [PATCH 09/36] added copywrite notification for digest{,_test}.go --- internal/gps/pkgtree/digest.go | 4 ++++ internal/gps/pkgtree/digest_test.go | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index 616cfccca5..bb3cc6cf83 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -1,3 +1,7 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + package pkgtree import ( diff --git a/internal/gps/pkgtree/digest_test.go b/internal/gps/pkgtree/digest_test.go index cbe4e545c0..98302d0646 100644 --- a/internal/gps/pkgtree/digest_test.go +++ b/internal/gps/pkgtree/digest_test.go @@ -1,3 +1,7 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + package pkgtree import ( From 581e51a6ce82148a49980c57526dab5d5a8f8427 Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Mon, 7 Aug 2017 23:58:30 -0400 Subject: [PATCH 10/36] fixes `gosimple $PKGS` lint suggestion --- internal/gps/pkgtree/digest_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/gps/pkgtree/digest_test.go b/internal/gps/pkgtree/digest_test.go index 98302d0646..9963a13700 100644 --- a/internal/gps/pkgtree/digest_test.go +++ b/internal/gps/pkgtree/digest_test.go @@ -119,7 +119,7 @@ func TestVerifyDepTree(t *testing.T) { checkStatus := func(t *testing.T, status map[string]VendorStatus, key string, want VendorStatus) { got, ok := status[key] - if ok != true { + if !ok { t.Errorf("Want key: %q", key) return } From a298630583efa9db1260eda29b8653b4966da793 Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Tue, 8 Aug 2017 10:30:20 -0400 Subject: [PATCH 11/36] lineEndingReader is more simplified than lineEndingWriterTo --- internal/gps/pkgtree/digest.go | 148 ++++++++++------------------ internal/gps/pkgtree/digest_test.go | 129 ++++++++++++------------ 2 files changed, 118 insertions(+), 159 deletions(-) diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index bb3cc6cf83..1a75a137e2 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -60,6 +60,8 @@ func DigestFromPathname(prefix, pathname string) ([]byte, error) { prefixLength := len(prefix) // store length to trim off pathnames later pathnameQueue := []string{filepath.Join(prefix, pathname)} + var written int64 + // As we enumerate over the queue and encounter a directory, its children // will be added to the work queue. for len(pathnameQueue) > 0 { @@ -118,9 +120,9 @@ func DigestFromPathname(prefix, pathname string) ([]byte, error) { } } } else { - writeBytesWithNull(h, []byte(strconv.FormatInt(fi.Size(), 10))) // format file size as base 10 integer - _, err = io.Copy(h, &lineEndingWriterTo{fh}) // fast copy of file contents to hash - err = errors.Wrap(err, "cannot Copy") // errors.Wrap only wraps non-nil, so elide guard condition + written, err = io.Copy(h, newLineEndingReader(fh)) // fast copy of file contents to hash + err = errors.Wrap(err, "cannot Copy") // errors.Wrap only wraps non-nil, so elide guard condition + writeBytesWithNull(h, []byte(strconv.FormatInt(written, 10))) // format file size as base 10 integer } // Close the file handle to the open directory without masking possible @@ -136,109 +138,63 @@ func DigestFromPathname(prefix, pathname string) ([]byte, error) { return h.Sum(nil), nil } -// lineEndingWriterTo -type lineEndingWriterTo struct { - src io.Reader +// lineEndingReader is a `io.Reader` that converts CRLF sequences to LF. +// +// Some VCS systems automatically convert LF line endings to CRLF on some OS +// platforms. This would cause the a file checked out on those platforms to have +// a different digest than the same file on platforms that do not perform this +// translation. In order to ensure file contents normalize and hash the same, +// this struct satisfies the io.Reader interface by providing a Read method that +// modifies the file's contents when it is read, translating all CRLF sequences +// to LF. +type lineEndingReader struct { + src io.Reader // source io.Reader from which this reads + prevReadEndedCR bool // used to track whether final byte of previous Read was CR } -func (liw *lineEndingWriterTo) Read(buf []byte) (int, error) { - return liw.src.Read(buf) +// newLineEndingReader returns a new lineEndingReader that reads from the +// specified source io.Reader. +func newLineEndingReader(src io.Reader) *lineEndingReader { + return &lineEndingReader{src: src} } -// WriteTo writes data to w until there's no more data to write or -// when an error occurs. The return value n is the number of bytes -// written. Any error encountered during the write is also returned. -// -// The Copy function uses WriterTo if available. -func (liw *lineEndingWriterTo) WriteTo(dst io.Writer) (int64, error) { - // Some VCS systems automatically convert LF line endings to CRLF on some OS - // platforms, such as Windows. This would cause the a file checked out on - // Windows to have a different digest than the same file on macOS or - // Linux. In order to ensure file contents normalize the same, we need to - // modify the file's contents when we compute its hash. - // - // Keep reading from embedded io.Reader and writing to io.Writer until EOF. - // For each blob, when read CRLF, convert to LF. Ensure handle case when CR - // read in one buffer and LF read in another. Another option is just filter - // out all CR bytes, but unneeded removal of CR bytes increases our surface - // area for accidental hash collisions. Therefore, we will only convert CRLF - // to LF. - - // Create a buffer to hold file contents; use same size as `io.Copy` - // creates. - buf := make([]byte, 32*1024) - - var err error - var finalCR bool - var written int64 - - for { - nr, er := liw.src.Read(buf) - if nr > 0 { - // When previous buffer ended in CR and this buffer does not start - // in LF, we need to emit a CR byte. - if finalCR && buf[0] != '\n' { - // We owe the destination a CR byte because we held onto it from - // last loop. - nw, ew := dst.Write([]byte{'\r'}) - if nw > 0 { - written += int64(nw) - } - if ew != nil { - err = ew - break - } - } - - // Remove any CRLF sequences in buf, using `bytes.Index` because - // that takes advantage of machine opcodes for searching for byte - // patterns on many platforms. - for { - index := bytes.Index(buf, []byte("\r\n")) - if index == -1 { - break - } - // Want to skip index byte, where the CR is. - copy(buf[index:nr-1], buf[index+1:nr]) - nr-- - } - - // When final byte is CR, do not emit until we ensure first byte on - // next read is LF. - if finalCR = buf[nr-1] == '\r'; finalCR { - nr-- // pretend byte was never read from source - } +// Read consumes bytes from the structure's source io.Reader to fill the +// specified slice of bytes. It converts all CRLF byte sequences to LF, and +// handles cases where CR and LF straddle across two Read operations. +func (f *lineEndingReader) Read(buf []byte) (int, error) { + var startIndex int + if f.prevReadEndedCR { + // Read one less byte in case we need to insert CR in there + startIndex++ + } + nr, er := f.src.Read(buf[startIndex:]) + if nr > 0 { + if f.prevReadEndedCR && buf[startIndex] != '\n' { + buf[0] = '\r' + startIndex = 0 + } - nw, ew := dst.Write(buf[:nr]) - if nw > 0 { - written += int64(nw) - } - if ew != nil { - err = ew - break - } - if nr != nw { - err = io.ErrShortWrite + // Remove any CRLF sequences in buf, using `bytes.Index` because that + // takes advantage of machine opcodes that search for byte patterns on + // many architectures. + index := startIndex + for { + index = bytes.Index(buf[index:], []byte("\r\n")) + if index == -1 { break } + // Want to skip index byte, where the CR is. + copy(buf[index:nr-1], buf[index+1:nr]) + nr-- } - if er != nil { - if er != io.EOF { - err = er - } - break - } - } - if finalCR { - nw, ew := dst.Write([]byte{'\r'}) - if nw > 0 { - written += int64(nw) - } - if ew != nil { - err = ew + + // When final byte is CR, do not emit until we ensure first byte on + // next read is LF. + if f.prevReadEndedCR = buf[nr-1] == '\r'; f.prevReadEndedCR { + nr-- // pretend byte was never read from source } } - return written, err + return nr, er } // writeBytesWithNull appends the specified data to the specified hash, followed by diff --git a/internal/gps/pkgtree/digest_test.go b/internal/gps/pkgtree/digest_test.go index 9963a13700..5750d36547 100644 --- a/internal/gps/pkgtree/digest_test.go +++ b/internal/gps/pkgtree/digest_test.go @@ -12,6 +12,67 @@ import ( "testing" ) +// crossBuffer is a test io.Reader that emits a few canned responses. +type crossBuffer struct { + readCount int +} + +func (cb *crossBuffer) Read(buf []byte) (int, error) { + const first = "this is the result\r\nfrom the first read\r" + const second = "\nthis is the result\r\nfrom the second read\r" + + cb.readCount++ + + switch cb.readCount { + case 1: + return copy(buf, first), nil + case 2: + return copy(buf, second), nil + default: + return 0, io.EOF + } +} + +//////////////////////////////////////// + +func TestLineEndingReadCrossBufferCRLF(t *testing.T) { + // The final CR byte ought to be conveyed to destination. + const output = "this is the result\nfrom the first read\nthis is the result\nfrom the second read\r" + + src := newLineEndingReader(new(crossBuffer)) + dst := new(bytes.Buffer) + + n, err := io.Copy(dst, src) + if got, want := err, error(nil); got != want { + t.Errorf("(GOT): %v; (WNT): %v", got, want) + } + if got, want := n, int64(len(output)); got != want { + t.Errorf("(GOT): %v; (WNT): %v", got, want) + } + if got, want := dst.Bytes(), []byte(output); !bytes.Equal(got, want) { + t.Errorf("(GOT): %#q; (WNT): %#q", got, want) + } +} + +func TestLineEndingRead(t *testing.T) { + const input = "now is the time\r\nfor all good engineers\r\nto improve their test coverage!\r\n" + const output = "now is the time\nfor all good engineers\nto improve their test coverage!\n" + + src := newLineEndingReader(bytes.NewBufferString(input)) + dst := new(bytes.Buffer) + + n, err := io.Copy(dst, src) + if got, want := err, error(nil); got != want { + t.Errorf("(GOT): %v; (WNT): %v", got, want) + } + if got, want := n, int64(len(output)); got != want { + t.Errorf("(GOT): %v; (WNT): %v", got, want) + } + if got, want := dst.Bytes(), []byte(output); !bytes.Equal(got, want) { + t.Errorf("(GOT): %#q; (WNT): %#q", got, want) + } +} + func getTestdataVerifyRoot(t *testing.T) string { cwd, err := os.Getwd() if err != nil { @@ -23,7 +84,7 @@ func getTestdataVerifyRoot(t *testing.T) string { func TestDigestFromPathnameWithFile(t *testing.T) { relativePathname := "github.com/alice/match/match.go" - want := []byte{0xef, 0x1e, 0x30, 0x16, 0xc4, 0xb1, 0xdb, 0xd9, 0x38, 0x65, 0xec, 0x90, 0xca, 0xad, 0x89, 0x52, 0xf9, 0x56, 0xb5, 0xa7, 0xfd, 0x83, 0x8e, 0xd7, 0x21, 0x6b, 0xbd, 0x63, 0x9a, 0xde, 0xc3, 0xeb} + want := []byte{0x43, 0xe6, 0x3, 0x53, 0xda, 0x4, 0x18, 0xf6, 0xbd, 0x98, 0xf4, 0x6c, 0x6d, 0xb8, 0xc1, 0x8d, 0xa2, 0x78, 0x16, 0x45, 0xf7, 0xca, 0xc, 0xec, 0xcf, 0x2e, 0xa1, 0x64, 0x55, 0x69, 0xbf, 0x8f} // NOTE: Create the hash using both an absolute and a relative pathname to // ensure hash ignores prefix. @@ -53,7 +114,7 @@ func TestDigestFromPathnameWithFile(t *testing.T) { func TestDigestFromPathnameWithDirectory(t *testing.T) { relativePathname := "launchpad.net/match" - want := []byte{0x8, 0xe5, 0x5b, 0xb6, 0xb8, 0x44, 0x91, 0x80, 0x46, 0xca, 0xc6, 0x2e, 0x44, 0x7b, 0x42, 0xd4, 0xfb, 0x2d, 0xfd, 0x4c, 0xd9, 0xc9, 0xd, 0x38, 0x23, 0xed, 0xa5, 0xf4, 0xbc, 0x69, 0xd4, 0x8b} + want := []byte{0x74, 0xe, 0x17, 0x87, 0xd4, 0x8e, 0x56, 0x2e, 0x7e, 0x32, 0x4e, 0x80, 0x3a, 0x5f, 0x3a, 0x10, 0x33, 0x43, 0x2c, 0x24, 0x8e, 0xf7, 0x1a, 0x37, 0x5e, 0x76, 0xf4, 0x6, 0x2b, 0xf3, 0xfd, 0x91} // NOTE: Create the hash using both an absolute and a relative pathname to // ensure hash ignores prefix. @@ -85,13 +146,13 @@ func TestVerifyDepTree(t *testing.T) { vendorRoot := getTestdataVerifyRoot(t) wantSums := map[string][]byte{ - "github.com/alice/match": []byte{0x13, 0x22, 0x4c, 0xd5, 0x5b, 0x3e, 0x29, 0xe5, 0x40, 0x23, 0x56, 0xc7, 0xc1, 0x50, 0x4, 0x1, 0x60, 0x46, 0xd4, 0x14, 0x9b, 0x63, 0x88, 0x37, 0xeb, 0x53, 0xfc, 0xe7, 0x63, 0x53, 0xf3, 0xf6}, + "github.com/alice/match": []byte{0x87, 0x4, 0x53, 0x5f, 0xb8, 0xca, 0xe9, 0x52, 0x40, 0x41, 0xcd, 0x93, 0x2e, 0x42, 0xfb, 0x14, 0x77, 0x57, 0x9c, 0x3, 0x6b, 0xe1, 0x15, 0xe7, 0xfa, 0xc1, 0xf0, 0x98, 0x4b, 0x61, 0x9f, 0x48}, "github.com/alice/mismatch": []byte("some non-matching digest"), "github.com/bob/emptyDigest": nil, // empty hash result + "github.com/bob/match": []byte{0x6a, 0x11, 0xf9, 0x46, 0xcc, 0xf3, 0x44, 0xdb, 0x2c, 0x6b, 0xcc, 0xb4, 0x0, 0x71, 0xe5, 0xc4, 0xee, 0x9a, 0x26, 0x71, 0x9d, 0xab, 0xe2, 0x40, 0xb7, 0xbf, 0x2a, 0xd9, 0x4, 0xcf, 0xc9, 0x46}, "github.com/charlie/notInTree": nil, // not in tree result ought to superseede empty digest result - "github.com/bob/match": []byte{0x73, 0xf5, 0xdc, 0xad, 0x39, 0x31, 0x25, 0x78, 0x9d, 0x2c, 0xbe, 0xd0, 0x9e, 0x6, 0x40, 0x64, 0xcd, 0x32, 0x9c, 0x28, 0xf4, 0xa0, 0xd2, 0xa4, 0x46, 0xca, 0x1f, 0x13, 0xfb, 0xaf, 0x53, 0x96}, // matching result at seldomly found directory level - "launchpad.net/match": []byte{0x8, 0xe5, 0x5b, 0xb6, 0xb8, 0x44, 0x91, 0x80, 0x46, 0xca, 0xc6, 0x2e, 0x44, 0x7b, 0x42, 0xd4, 0xfb, 0x2d, 0xfd, 0x4c, 0xd9, 0xc9, 0xd, 0x38, 0x23, 0xed, 0xa5, 0xf4, 0xbc, 0x69, 0xd4, 0x8b}, + "launchpad.net/match": []byte{0x74, 0xe, 0x17, 0x87, 0xd4, 0x8e, 0x56, 0x2e, 0x7e, 0x32, 0x4e, 0x80, 0x3a, 0x5f, 0x3a, 0x10, 0x33, 0x43, 0x2c, 0x24, 0x8e, 0xf7, 0x1a, 0x37, 0x5e, 0x76, 0xf4, 0x6, 0x2b, 0xf3, 0xfd, 0x91}, } status, err := VerifyDepTree(vendorRoot, wantSums) @@ -149,61 +210,3 @@ func BenchmarkVerifyDepTree(b *testing.B) { } } } - -type crossBuffer struct { - readCount int -} - -func (cb *crossBuffer) Read(buf []byte) (int, error) { - const first = "this is the result\r\nfrom the first read\r" - const second = "\nthis is the result\r\nfrom the second read\r" - - cb.readCount++ - - switch cb.readCount { - case 1: - return copy(buf, first), nil - case 2: - return copy(buf, second), nil - default: - return 0, io.EOF - } -} - -func TestLineEndingWriteToCrossBufferCRLF(t *testing.T) { - src := &lineEndingWriterTo{new(crossBuffer)} - dst := new(bytes.Buffer) - - // the final CR byte ought to be conveyed to destination - const output = "this is the result\nfrom the first read\nthis is the result\nfrom the second read\r" - - n, err := io.Copy(dst, src) - if got, want := err, error(nil); got != want { - t.Errorf("(GOT): %v; (WNT): %v", got, want) - } - if got, want := n, int64(len(output)); got != want { - t.Errorf("(GOT): %v; (WNT): %v", got, want) - } - if got, want := dst.Bytes(), []byte(output); !bytes.Equal(got, want) { - t.Errorf("(GOT): %#q; (WNT): %#q", got, want) - } -} - -func TestLineEndingWriteTo(t *testing.T) { - const input = "now is the time\r\nfor all good engineers\r\nto improve their test coverage!\r\n" - const output = "now is the time\nfor all good engineers\nto improve their test coverage!\n" - - src := &lineEndingWriterTo{bytes.NewBufferString(input)} - dst := new(bytes.Buffer) - - n, err := io.Copy(dst, src) - if got, want := err, error(nil); got != want { - t.Errorf("(GOT): %v; (WNT): %v", got, want) - } - if got, want := n, int64(len(output)); got != want { - t.Errorf("(GOT): %v; (WNT): %v", got, want) - } - if got, want := dst.Bytes(), []byte(output); !bytes.Equal(got, want) { - t.Errorf("(GOT): %#q; (WNT): %#q", got, want) - } -} From f0b532dba42c2b0aff2b517ec9ce0b28e879a060 Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Tue, 8 Aug 2017 14:43:35 -0400 Subject: [PATCH 12/36] bug fixes and better testing for lineEndingReader --- internal/gps/pkgtree/digest.go | 35 +++++++++----- internal/gps/pkgtree/digest_test.go | 74 +++++++++++++---------------- 2 files changed, 58 insertions(+), 51 deletions(-) diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index 1a75a137e2..2a9b77f962 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -162,37 +162,50 @@ func newLineEndingReader(src io.Reader) *lineEndingReader { // specified slice of bytes. It converts all CRLF byte sequences to LF, and // handles cases where CR and LF straddle across two Read operations. func (f *lineEndingReader) Read(buf []byte) (int, error) { - var startIndex int + buflen := len(buf) if f.prevReadEndedCR { // Read one less byte in case we need to insert CR in there - startIndex++ + buflen-- } - nr, er := f.src.Read(buf[startIndex:]) + nr, er := f.src.Read(buf[:buflen]) if nr > 0 { - if f.prevReadEndedCR && buf[startIndex] != '\n' { - buf[0] = '\r' - startIndex = 0 + if f.prevReadEndedCR && buf[0] != '\n' { + // Having a CRLF split across two Read operations is rare, so + // ignoring performance impact of copying entire buffer by one + // byte. Plus, `copy` builtin likely uses machien opcode for + // performing the memory copy. + copy(buf[1:nr+1], buf[:nr]) // shift data to right one byte + buf[0] = '\r' // insert the previous skipped CR byte at start of buf + nr++ // pretend we read one more byte } // Remove any CRLF sequences in buf, using `bytes.Index` because that // takes advantage of machine opcodes that search for byte patterns on // many architectures. - index := startIndex + var prevIndex int for { - index = bytes.Index(buf[index:], []byte("\r\n")) + index := bytes.Index(buf[prevIndex:nr], []byte("\r\n")) if index == -1 { break } // Want to skip index byte, where the CR is. - copy(buf[index:nr-1], buf[index+1:nr]) + copy(buf[prevIndex+index:nr-1], buf[prevIndex+index+1:nr]) nr-- + prevIndex = index } - // When final byte is CR, do not emit until we ensure first byte on - // next read is LF. + // When final byte from a read operation is CR, do not emit it until + // ensure first byte on next read is not LF. if f.prevReadEndedCR = buf[nr-1] == '\r'; f.prevReadEndedCR { nr-- // pretend byte was never read from source } + } else if f.prevReadEndedCR { + // Reading from source returned nothing, but this struct is sitting on a + // trailing CR from previous Read, so let's give it to client now. + buf[0] = '\r' + nr = 1 + er = nil + f.prevReadEndedCR = false // prevent infinite loop } return nr, er } diff --git a/internal/gps/pkgtree/digest_test.go b/internal/gps/pkgtree/digest_test.go index 5750d36547..f49a4055e4 100644 --- a/internal/gps/pkgtree/digest_test.go +++ b/internal/gps/pkgtree/digest_test.go @@ -14,65 +14,59 @@ import ( // crossBuffer is a test io.Reader that emits a few canned responses. type crossBuffer struct { - readCount int + readCount int + iterations []string } func (cb *crossBuffer) Read(buf []byte) (int, error) { - const first = "this is the result\r\nfrom the first read\r" - const second = "\nthis is the result\r\nfrom the second read\r" - - cb.readCount++ - - switch cb.readCount { - case 1: - return copy(buf, first), nil - case 2: - return copy(buf, second), nil - default: + if cb.readCount == len(cb.iterations) { return 0, io.EOF } + cb.readCount++ + return copy(buf, cb.iterations[cb.readCount-1]), nil } -//////////////////////////////////////// - -func TestLineEndingReadCrossBufferCRLF(t *testing.T) { - // The final CR byte ought to be conveyed to destination. - const output = "this is the result\nfrom the first read\nthis is the result\nfrom the second read\r" - - src := newLineEndingReader(new(crossBuffer)) +func streamThruLineEndingReader(t *testing.T, iterations []string) []byte { dst := new(bytes.Buffer) - - n, err := io.Copy(dst, src) + n, err := io.Copy(dst, newLineEndingReader(&crossBuffer{iterations: iterations})) if got, want := err, error(nil); got != want { t.Errorf("(GOT): %v; (WNT): %v", got, want) } - if got, want := n, int64(len(output)); got != want { + if got, want := n, int64(dst.Len()); got != want { t.Errorf("(GOT): %v; (WNT): %v", got, want) } - if got, want := dst.Bytes(), []byte(output); !bytes.Equal(got, want) { - t.Errorf("(GOT): %#q; (WNT): %#q", got, want) - } + return dst.Bytes() } -func TestLineEndingRead(t *testing.T) { - const input = "now is the time\r\nfor all good engineers\r\nto improve their test coverage!\r\n" - const output = "now is the time\nfor all good engineers\nto improve their test coverage!\n" - - src := newLineEndingReader(bytes.NewBufferString(input)) - dst := new(bytes.Buffer) - - n, err := io.Copy(dst, src) - if got, want := err, error(nil); got != want { - t.Errorf("(GOT): %v; (WNT): %v", got, want) - } - if got, want := n, int64(len(output)); got != want { - t.Errorf("(GOT): %v; (WNT): %v", got, want) +func TestLineEndingReader(t *testing.T) { + testCases := []struct { + input []string + output string + }{ + {[]string{"now is the time\r\n"}, "now is the time\n"}, + {[]string{"now is the time\n"}, "now is the time\n"}, + {[]string{"now is the time\r"}, "now is the time\r"}, // trailing CR ought to convey + {[]string{"\rnow is the time"}, "\rnow is the time"}, // CR not followed by LF ought to convey + {[]string{"\rnow is the time\r"}, "\rnow is the time\r"}, // CR not followed by LF ought to convey + + {[]string{"this is the result\r\nfrom the first read\r", "\nthis is the result\r\nfrom the second read\r"}, + "this is the result\nfrom the first read\nthis is the result\nfrom the second read\r"}, + {[]string{"now is the time\r\nfor all good engineers\r\nto improve their test coverage!\r\n"}, + "now is the time\nfor all good engineers\nto improve their test coverage!\n"}, + {[]string{"now is the time\r\nfor all good engineers\r", "\nto improve their test coverage!\r\n"}, + "now is the time\nfor all good engineers\nto improve their test coverage!\n"}, } - if got, want := dst.Bytes(), []byte(output); !bytes.Equal(got, want) { - t.Errorf("(GOT): %#q; (WNT): %#q", got, want) + + for _, testCase := range testCases { + got := streamThruLineEndingReader(t, testCase.input) + if want := []byte(testCase.output); !bytes.Equal(got, want) { + t.Errorf("Input: %#v; (GOT): %#q; (WNT): %#q", testCase.input, got, want) + } } } +//////////////////////////////////////// + func getTestdataVerifyRoot(t *testing.T) string { cwd, err := os.Getwd() if err != nil { From 911fda48cdaeb8f69475875bef2d91605cd3813b Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Tue, 8 Aug 2017 17:14:45 -0400 Subject: [PATCH 13/36] TestScaffolding prints testdata_digest directory * trying to figure out condition of the testdata_digest directory on the build box --- internal/gps/pkgtree/digest_test.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/internal/gps/pkgtree/digest_test.go b/internal/gps/pkgtree/digest_test.go index f49a4055e4..ad453e30fa 100644 --- a/internal/gps/pkgtree/digest_test.go +++ b/internal/gps/pkgtree/digest_test.go @@ -7,6 +7,7 @@ package pkgtree import ( "bytes" "io" + "log" "os" "path/filepath" "testing" @@ -204,3 +205,17 @@ func BenchmarkVerifyDepTree(b *testing.B) { } } } + +func TestScaffolding(t *testing.T) { + err := filepath.Walk(getTestdataVerifyRoot(t), func(pathname string, info os.FileInfo, err error) error { + if err != nil { + t.Error(err) + return err + } + log.Printf("pathname: %q", pathname) + return nil + }) + if err != nil { + t.Fatal(err) + } +} From ecbbf03d394270805ab42beb94e70ca0fd57b92a Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Tue, 8 Aug 2017 17:24:49 -0400 Subject: [PATCH 14/36] pathnames returned as keys to VerifyDepTree normalized with filepath.ToSlash() --- internal/gps/pkgtree/digest.go | 4 ++-- internal/gps/pkgtree/digest_test.go | 18 ++++-------------- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index 2a9b77f962..e1e90e974e 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -385,7 +385,7 @@ func VerifyDepTree(vendorPathname string, wantSums map[string][]byte) (map[strin ls = DigestMismatchInLock } } - status[short] = ls + status[filepath.ToSlash(short)] = ls // NOTE: Mark current nodes and all parents: required. for pni := currentNode.myIndex; pni != -1; pni = otherNode.parentIndex { @@ -438,7 +438,7 @@ func VerifyDepTree(vendorPathname string, wantSums map[string][]byte) (map[strin for i := len(nodes) - 1; i > 0; i-- { currentNode = nodes[i] if !currentNode.isRequiredAncestor && nodes[currentNode.parentIndex].isRequiredAncestor { - status[currentNode.pathname[prefixLength:]] = NotInLock + status[filepath.ToSlash(currentNode.pathname[prefixLength:])] = NotInLock } } diff --git a/internal/gps/pkgtree/digest_test.go b/internal/gps/pkgtree/digest_test.go index ad453e30fa..3d54a1b92f 100644 --- a/internal/gps/pkgtree/digest_test.go +++ b/internal/gps/pkgtree/digest_test.go @@ -184,6 +184,10 @@ func TestVerifyDepTree(t *testing.T) { } } + for k, v := range status { + log.Printf("%q: %v", k, v) + } + checkStatus(t, status, "github.com/alice/match", NoMismatch) checkStatus(t, status, "github.com/alice/mismatch", DigestMismatchInLock) checkStatus(t, status, "github.com/alice/notInLock", NotInLock) @@ -205,17 +209,3 @@ func BenchmarkVerifyDepTree(b *testing.B) { } } } - -func TestScaffolding(t *testing.T) { - err := filepath.Walk(getTestdataVerifyRoot(t), func(pathname string, info os.FileInfo, err error) error { - if err != nil { - t.Error(err) - return err - } - log.Printf("pathname: %q", pathname) - return nil - }) - if err != nil { - t.Fatal(err) - } -} From 0f7b4192b24da0663e809cc9708003a91bc80f4b Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Tue, 8 Aug 2017 18:05:43 -0400 Subject: [PATCH 15/36] continue printing VerifyDepTree response --- internal/gps/pkgtree/digest_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/internal/gps/pkgtree/digest_test.go b/internal/gps/pkgtree/digest_test.go index 3d54a1b92f..d204925b1e 100644 --- a/internal/gps/pkgtree/digest_test.go +++ b/internal/gps/pkgtree/digest_test.go @@ -169,6 +169,10 @@ func TestVerifyDepTree(t *testing.T) { } } + for k, v := range status { + log.Printf("STATUS: %q: %v", k, v) + } + if got, want := len(status), 7; got != want { t.Errorf("\n(GOT): %v; (WNT): %v", got, want) } @@ -184,10 +188,6 @@ func TestVerifyDepTree(t *testing.T) { } } - for k, v := range status { - log.Printf("%q: %v", k, v) - } - checkStatus(t, status, "github.com/alice/match", NoMismatch) checkStatus(t, status, "github.com/alice/mismatch", DigestMismatchInLock) checkStatus(t, status, "github.com/alice/notInLock", NotInLock) From 443fcd3cc4bdfb4c61e02ea0385f239f1e9c9c9f Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Tue, 8 Aug 2017 18:05:56 -0400 Subject: [PATCH 16/36] comments and some logic changes --- internal/gps/pkgtree/digest.go | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index e1e90e974e..ee9543e6a5 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -23,7 +23,7 @@ const ( // when walking vendor root hierarchy, ignore file system nodes of the // following types. - skipModes = os.ModeDevice | os.ModeNamedPipe | os.ModeSocket | os.ModeCharDevice + skipSpecialNodes = os.ModeDevice | os.ModeNamedPipe | os.ModeSocket | os.ModeCharDevice ) // DigestFromPathname returns a deterministic hash of the specified file system @@ -76,7 +76,7 @@ func DigestFromPathname(prefix, pathname string) ([]byte, error) { mode := fi.Mode() // Skip file system nodes we are not concerned with - if mode&skipModes != 0 { + if mode&skipSpecialNodes != 0 { continue } @@ -363,15 +363,14 @@ func VerifyDepTree(vendorPathname string, wantSums map[string][]byte) (map[strin // expected digest. status := make(map[string]VendorStatus) for pathname := range wantSums { - status[pathname] = NotInTree + status[filepath.ToSlash(pathname)] = NotInTree } for len(queue) > 0 { - // pop node from the queue (depth first traversal, reverse lexicographical order) + // pop node from the queue (depth first traversal, reverse lexicographical order inside a directory) lq1 := len(queue) - 1 currentNode, queue = queue[lq1], queue[:lq1] - // log.Printf("NODE: %s", currentNode) short := currentNode.pathname[prefixLength:] // chop off the vendor root prefix, including the path separator if expectedSum, ok := wantSums[short]; ok { ls := EmptyDigestInLock @@ -388,10 +387,8 @@ func VerifyDepTree(vendorPathname string, wantSums map[string][]byte) (map[strin status[filepath.ToSlash(short)] = ls // NOTE: Mark current nodes and all parents: required. - for pni := currentNode.myIndex; pni != -1; pni = otherNode.parentIndex { - otherNode = nodes[pni] - otherNode.isRequiredAncestor = true - // log.Printf("parent node: %s", otherNode) + for i := currentNode.myIndex; i != -1; i = nodes[i].parentIndex { + nodes[i].isRequiredAncestor = true } continue // do not need to process directory's contents @@ -414,22 +411,17 @@ func VerifyDepTree(vendorPathname string, wantSums map[string][]byte) (map[strin return nil, errors.Wrap(err, "cannot Stat") } // Skip non-interesting file system nodes - mode := fi.Mode() - if mode&skipModes != 0 || mode&os.ModeSymlink != 0 { - // log.Printf("DEBUG: skipping: %v; %q", mode, currentNode.pathname) + if fi.Mode()&skipSpecialNodes != 0 { continue } - + // Keep track of all regular files and directories, but do not + // need to visit files. nodes = append(nodes, otherNode) if fi.IsDir() { queue = append(queue, otherNode) } } } - - if err != nil { - return nil, err // early termination iff error - } } // Ignoring first node in the list, walk nodes from end to From 85750408daa2834c764aee3f8aacd726aaf10531 Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Wed, 9 Aug 2017 10:55:29 -0500 Subject: [PATCH 17/36] normalize pathnames as keys for expected digests map --- internal/gps/pkgtree/digest.go | 44 ++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index ee9543e6a5..b230081ee6 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -278,15 +278,12 @@ func sortedListOfDirectoryChildrenFromPathname(pathname string) ([]string, error if err != nil { return nil, errors.Wrap(err, "cannot Open") } - childrenNames, err := sortedListOfDirectoryChildrenFromFileHandle(fh) - // Close the file handle to the open directory without masking possible // previous error value. if er := fh.Close(); err == nil { err = errors.Wrap(er, "cannot Close") } - return childrenNames, err } @@ -298,31 +295,34 @@ func sortedListOfDirectoryChildrenFromFileHandle(fh *os.File) ([]string, error) if err != nil { return nil, errors.Wrap(err, "cannot Readdirnames") } - if len(childrenNames) > 0 { - sort.Strings(childrenNames) - } + sort.Strings(childrenNames) return childrenNames, nil } // VerifyDepTree verifies dependency tree according to expected digest sums, and // returns an associative array of file system nodes and their respective vendor // status, in accordance with the provided expected digest sums parameter. -func VerifyDepTree(vendorPathname string, wantSums map[string][]byte) (map[string]VendorStatus, error) { +// +// The vendor root will be converted to os-specific pathname for processing, and +// the map of project names to their expected digests are required to have the +// solidus character, `/`, as their path separator. For example, +// "github.com/alice/alice1". +func VerifyDepTree(vendorRoot string, wantSums map[string][]byte) (map[string]VendorStatus, error) { + vendorRoot = filepath.Clean(vendorRoot) + pathSeparator + // NOTE: Ensure top level pathname is a directory - fi, err := os.Stat(vendorPathname) + fi, err := os.Stat(vendorRoot) if err != nil { return nil, errors.Wrap(err, "cannot Stat") } if !fi.IsDir() { - return nil, errors.Errorf("cannot verify non directory: %q", vendorPathname) + return nil, errors.Errorf("cannot verify non directory: %q", vendorRoot) } - vendorPathname = filepath.Clean(vendorPathname) + pathSeparator - prefixLength := len(vendorPathname) - var otherNode *fsnode - currentNode := &fsnode{pathname: vendorPathname, parentIndex: -1, isRequiredAncestor: true} + currentNode := &fsnode{pathname: vendorRoot, parentIndex: -1, isRequiredAncestor: true} queue := []*fsnode{currentNode} // queue of directories that must be inspected + prefixLength := len(vendorRoot) // In order to identify all file system nodes that are not in the lock file, // represented by the specified expected sums parameter, and in order to @@ -363,7 +363,7 @@ func VerifyDepTree(vendorPathname string, wantSums map[string][]byte) (map[strin // expected digest. status := make(map[string]VendorStatus) for pathname := range wantSums { - status[filepath.ToSlash(pathname)] = NotInTree + status[pathname] = NotInTree } for len(queue) > 0 { @@ -371,20 +371,24 @@ func VerifyDepTree(vendorPathname string, wantSums map[string][]byte) (map[strin lq1 := len(queue) - 1 currentNode, queue = queue[lq1], queue[:lq1] - short := currentNode.pathname[prefixLength:] // chop off the vendor root prefix, including the path separator - if expectedSum, ok := wantSums[short]; ok { + // Chop off the vendor root prefix, including the path separator, then + // normalize. + projectNormalized := filepath.ToSlash(currentNode.pathname[prefixLength:]) + + if expectedSum, ok := wantSums[projectNormalized]; ok { ls := EmptyDigestInLock if len(expectedSum) > 0 { - ls = NoMismatch - projectSum, err := DigestFromPathname(vendorPathname, short) + projectSum, err := DigestFromPathname(vendorRoot, projectNormalized) if err != nil { return nil, errors.Wrap(err, "cannot compute dependency hash") } - if !bytes.Equal(projectSum, expectedSum) { + if bytes.Equal(projectSum, expectedSum) { + ls = NoMismatch + } else { ls = DigestMismatchInLock } } - status[filepath.ToSlash(short)] = ls + status[projectNormalized] = ls // NOTE: Mark current nodes and all parents: required. for i := currentNode.myIndex; i != -1; i = nodes[i].parentIndex { From 541b3f8a26013166d0f69281d46375e275cbb568 Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Wed, 9 Aug 2017 12:29:32 -0400 Subject: [PATCH 18/36] no longer dumps status output during testing --- internal/gps/pkgtree/digest_test.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/internal/gps/pkgtree/digest_test.go b/internal/gps/pkgtree/digest_test.go index d204925b1e..f49a4055e4 100644 --- a/internal/gps/pkgtree/digest_test.go +++ b/internal/gps/pkgtree/digest_test.go @@ -7,7 +7,6 @@ package pkgtree import ( "bytes" "io" - "log" "os" "path/filepath" "testing" @@ -169,10 +168,6 @@ func TestVerifyDepTree(t *testing.T) { } } - for k, v := range status { - log.Printf("STATUS: %q: %v", k, v) - } - if got, want := len(status), 7; got != want { t.Errorf("\n(GOT): %v; (WNT): %v", got, want) } From 0a017134476445418d35d8817d8109aedc177917 Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Wed, 9 Aug 2017 12:29:32 -0400 Subject: [PATCH 19/36] no longer dumps status output during testing * some more edge test cases added --- internal/gps/pkgtree/digest.go | 33 +++++++++++++++++++++-------- internal/gps/pkgtree/digest_test.go | 5 ++++- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index b230081ee6..0e4eef15bf 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -158,6 +158,8 @@ func newLineEndingReader(src io.Reader) *lineEndingReader { return &lineEndingReader{src: src} } +var crlf = []byte("\r\n") + // Read consumes bytes from the structure's source io.Reader to fill the // specified slice of bytes. It converts all CRLF byte sequences to LF, and // handles cases where CR and LF straddle across two Read operations. @@ -172,27 +174,40 @@ func (f *lineEndingReader) Read(buf []byte) (int, error) { if f.prevReadEndedCR && buf[0] != '\n' { // Having a CRLF split across two Read operations is rare, so // ignoring performance impact of copying entire buffer by one - // byte. Plus, `copy` builtin likely uses machien opcode for + // byte. Plus, `copy` builtin likely uses machine opcode for // performing the memory copy. copy(buf[1:nr+1], buf[:nr]) // shift data to right one byte buf[0] = '\r' // insert the previous skipped CR byte at start of buf nr++ // pretend we read one more byte } - // Remove any CRLF sequences in buf, using `bytes.Index` because that + // Remove any CRLF sequences in buf, using `bytes.Index` because it // takes advantage of machine opcodes that search for byte patterns on - // many architectures. - var prevIndex int + // many architectures; and, using builtin `copy` which also takes + // advantage of machine opcodes to quickly move overlapping regions of + // bytes in memory. + var searchOffset, shiftCount int + previousIndex := -1 // index of previous CRLF index; -1 means no previous index known for { - index := bytes.Index(buf[prevIndex:nr], []byte("\r\n")) + index := bytes.Index(buf[searchOffset:nr], crlf) if index == -1 { break } - // Want to skip index byte, where the CR is. - copy(buf[prevIndex+index:nr-1], buf[prevIndex+index+1:nr]) - nr-- - prevIndex = index + index += searchOffset // convert relative index to absolute + if previousIndex != -1 { + // shift substring between previous index and this index + copy(buf[previousIndex-shiftCount:], buf[previousIndex+1:index]) + shiftCount++ // next shift needs to be 1 byte to the left + } + previousIndex = index + searchOffset = index + 2 // start next search after len(crlf) + } + if previousIndex != -1 { + // handle final shift + copy(buf[previousIndex-shiftCount:], buf[previousIndex+1:nr]) + shiftCount++ } + nr -= shiftCount // shorten byte read count by number of shifts executed // When final byte from a read operation is CR, do not emit it until // ensure first byte on next read is not LF. diff --git a/internal/gps/pkgtree/digest_test.go b/internal/gps/pkgtree/digest_test.go index f49a4055e4..22c346b1da 100644 --- a/internal/gps/pkgtree/digest_test.go +++ b/internal/gps/pkgtree/digest_test.go @@ -43,7 +43,10 @@ func TestLineEndingReader(t *testing.T) { input []string output string }{ + {[]string{"\r"}, "\r"}, + {[]string{"\r\n"}, "\n"}, {[]string{"now is the time\r\n"}, "now is the time\n"}, + {[]string{"now is the time\r\n(trailing data)"}, "now is the time\n(trailing data)"}, {[]string{"now is the time\n"}, "now is the time\n"}, {[]string{"now is the time\r"}, "now is the time\r"}, // trailing CR ought to convey {[]string{"\rnow is the time"}, "\rnow is the time"}, // CR not followed by LF ought to convey @@ -193,7 +196,7 @@ func TestVerifyDepTree(t *testing.T) { } func BenchmarkVerifyDepTree(b *testing.B) { - b.Skip("Eliding benchmark of user's Go source directory") + // b.Skip("Eliding benchmark of user's Go source directory") prefix := filepath.Join(os.Getenv("GOPATH"), "src") From 8a0c3656ac16206d9413db9807a20188965a2ba0 Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Sat, 12 Aug 2017 04:41:39 -0400 Subject: [PATCH 20/36] two loop solution Remove any CRLF sequences in buf, using `bytes.Index` because that takes advantage of machine opcodes that search for byte patterns on many architectures. This operation is done in two parts: find and record the CRLF sequence positions, then loop through and copy bytes to eliminate the CR bytes. --- internal/gps/pkgtree/digest.go | 50 ++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index 0e4eef15bf..6cb78d12ad 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -181,33 +181,43 @@ func (f *lineEndingReader) Read(buf []byte) (int, error) { nr++ // pretend we read one more byte } - // Remove any CRLF sequences in buf, using `bytes.Index` because it + // Remove any CRLF sequences in buf, using `bytes.Index` because that // takes advantage of machine opcodes that search for byte patterns on - // many architectures; and, using builtin `copy` which also takes - // advantage of machine opcodes to quickly move overlapping regions of - // bytes in memory. - var searchOffset, shiftCount int - previousIndex := -1 // index of previous CRLF index; -1 means no previous index known + // many architectures. This operation is done in two parts: find and + // record the CRLF sequence positions, then loop through and copy bytes + // to eliminate the CR bytes. + // + // Record indexes of all CRLF sequences in buffer, bound by number of + // bytes read. + var indexes []int + var offset int for { - index := bytes.Index(buf[searchOffset:nr], crlf) + index := bytes.Index(buf[offset:nr], crlf) if index == -1 { break } - index += searchOffset // convert relative index to absolute - if previousIndex != -1 { - // shift substring between previous index and this index - copy(buf[previousIndex-shiftCount:], buf[previousIndex+1:index]) - shiftCount++ // next shift needs to be 1 byte to the left - } - previousIndex = index - searchOffset = index + 2 // start next search after len(crlf) + index += offset + indexes = append(indexes, index) + offset = index + 2 // or: len(crlf) } - if previousIndex != -1 { - // handle final shift - copy(buf[previousIndex-shiftCount:], buf[previousIndex+1:nr]) - shiftCount++ + // Shift sub strings between indexes recorded in above loop. + if count := len(indexes); count > 0 { + offset = 0 + var i, j, ij int + for ii := 0; ii < count; ii++ { + i = indexes[ii] + if ij = ii + 1; ij == count { + j = nr // last substring: bound by number of bytes read + } else { + j = indexes[ij] // bound copy by index of following substring + } + copy(buf[i+offset:], buf[i+1:j]) + // Each loop we replace 2 bytes with 1 byte, and thus alter + // future shifts by 1 byte to the left for each iteration. + offset-- + } + nr -= len(indexes) } - nr -= shiftCount // shorten byte read count by number of shifts executed // When final byte from a read operation is CR, do not emit it until // ensure first byte on next read is not LF. From cba6ebe940e8c80a513d1f787ac7f0e72a03fb5c Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Sat, 12 Aug 2017 04:43:44 -0400 Subject: [PATCH 21/36] O(n) loop for CRLF -> LF translation in lineEndingReader --- internal/gps/pkgtree/digest.go | 50 ++++++++++++++-------------------- 1 file changed, 20 insertions(+), 30 deletions(-) diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index 6cb78d12ad..0e4eef15bf 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -181,43 +181,33 @@ func (f *lineEndingReader) Read(buf []byte) (int, error) { nr++ // pretend we read one more byte } - // Remove any CRLF sequences in buf, using `bytes.Index` because that + // Remove any CRLF sequences in buf, using `bytes.Index` because it // takes advantage of machine opcodes that search for byte patterns on - // many architectures. This operation is done in two parts: find and - // record the CRLF sequence positions, then loop through and copy bytes - // to eliminate the CR bytes. - // - // Record indexes of all CRLF sequences in buffer, bound by number of - // bytes read. - var indexes []int - var offset int + // many architectures; and, using builtin `copy` which also takes + // advantage of machine opcodes to quickly move overlapping regions of + // bytes in memory. + var searchOffset, shiftCount int + previousIndex := -1 // index of previous CRLF index; -1 means no previous index known for { - index := bytes.Index(buf[offset:nr], crlf) + index := bytes.Index(buf[searchOffset:nr], crlf) if index == -1 { break } - index += offset - indexes = append(indexes, index) - offset = index + 2 // or: len(crlf) - } - // Shift sub strings between indexes recorded in above loop. - if count := len(indexes); count > 0 { - offset = 0 - var i, j, ij int - for ii := 0; ii < count; ii++ { - i = indexes[ii] - if ij = ii + 1; ij == count { - j = nr // last substring: bound by number of bytes read - } else { - j = indexes[ij] // bound copy by index of following substring - } - copy(buf[i+offset:], buf[i+1:j]) - // Each loop we replace 2 bytes with 1 byte, and thus alter - // future shifts by 1 byte to the left for each iteration. - offset-- + index += searchOffset // convert relative index to absolute + if previousIndex != -1 { + // shift substring between previous index and this index + copy(buf[previousIndex-shiftCount:], buf[previousIndex+1:index]) + shiftCount++ // next shift needs to be 1 byte to the left } - nr -= len(indexes) + previousIndex = index + searchOffset = index + 2 // start next search after len(crlf) + } + if previousIndex != -1 { + // handle final shift + copy(buf[previousIndex-shiftCount:], buf[previousIndex+1:nr]) + shiftCount++ } + nr -= shiftCount // shorten byte read count by number of shifts executed // When final byte from a read operation is CR, do not emit it until // ensure first byte on next read is not LF. From 59a104697c3de0c52b03bec8978d55e60f2825c5 Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Sat, 12 Aug 2017 05:09:15 -0400 Subject: [PATCH 22/36] typos --- internal/gps/pkgtree/digest.go | 22 ++++++++++++---------- internal/gps/pkgtree/digest_test.go | 3 +-- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index 0e4eef15bf..b756832edc 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -83,7 +83,7 @@ func DigestFromPathname(prefix, pathname string) ([]byte, error) { // Write the prefix-stripped pathname to hash because the hash is as // much a function of the relative names of the files and directories as // it is their contents. Added benefit is that even empty directories - // and symbolic links will effect final hash value. Use + // and symbolic links will affect final hash value. Use // `filepath.ToSlash` to ensure relative pathname is os-agnostic. writeBytesWithNull(h, []byte(filepath.ToSlash(pathname[prefixLength:]))) @@ -251,8 +251,8 @@ const ( NoMismatch // EmptyDigestInLock is used when the digest for a dependency listed in the - // lock file is the empty string. NOTE: Seems like a special case of - // DigestMismatchInLock. + // lock file is the empty string. While this is a special case of + // DigestMismatchInLock, keeping both cases discrete is a desired feature. EmptyDigestInLock // DigestMismatchInLock is used when the digest for a dependency listed in @@ -314,9 +314,10 @@ func sortedListOfDirectoryChildrenFromFileHandle(fh *os.File) ([]string, error) return childrenNames, nil } -// VerifyDepTree verifies dependency tree according to expected digest sums, and -// returns an associative array of file system nodes and their respective vendor -// status, in accordance with the provided expected digest sums parameter. +// VerifyDepTree verifies a dependency tree according to expected digest sums, +// and returns an associative array of file system nodes and their respective +// vendor status, in accordance with the provided expected digest sums +// parameter. // // The vendor root will be converted to os-specific pathname for processing, and // the map of project names to their expected digests are required to have the @@ -343,9 +344,10 @@ func VerifyDepTree(vendorRoot string, wantSums map[string][]byte) (map[string]Ve // represented by the specified expected sums parameter, and in order to // only report the top level of a subdirectory of file system nodes, rather // than every node internal to them, we will create a tree of nodes stored - // in a slice. We do this because we do not know at what level a project - // exists at. Some projects are fewer than and some projects more than the - // typical three layer subdirectory under the vendor root directory. + // in a slice. We do this because we cannot predict the depth at which + // project roots occur. Some projects are fewer than and some projects more + // than the typical three layer subdirectory under the vendor root + // directory. // // For a following few examples, assume the below vendor root directory: // @@ -353,7 +355,7 @@ func VerifyDepTree(vendorRoot string, wantSums map[string][]byte) (map[string]Ve // github.com/alice/alice2/a2.go // github.com/bob/bob1/b1.go // github.com/bob/bob2/b2.go - // launghpad.net/nifty/n1.go + // launchpad.net/nifty/n1.go // // 1) If only the `alice1` and `alice2` projects were in the lock file, we'd // prefer the output to state that `github.com/bob` is `NotInLock`. diff --git a/internal/gps/pkgtree/digest_test.go b/internal/gps/pkgtree/digest_test.go index 22c346b1da..88e59eb2c6 100644 --- a/internal/gps/pkgtree/digest_test.go +++ b/internal/gps/pkgtree/digest_test.go @@ -75,8 +75,7 @@ func getTestdataVerifyRoot(t *testing.T) string { if err != nil { t.Fatal(err) } - parent, _ := filepath.Split(cwd) - return filepath.Join(parent, "testdata_digest") + return filepath.Join(filepath.Dir(cwd), "testdata_digest") } func TestDigestFromPathnameWithFile(t *testing.T) { From c5be3d892cc3ff9d5ca06bcb3fa6522f87f8c41f Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Sun, 13 Aug 2017 03:07:27 -0400 Subject: [PATCH 23/36] NotInLock loop more easy to read --- internal/gps/pkgtree/digest.go | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index b756832edc..b8005a4c1b 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -445,15 +445,19 @@ func VerifyDepTree(vendorRoot string, wantSums map[string][]byte) (map[string]Ve } } - // Ignoring first node in the list, walk nodes from end to - // beginning. Whenever a node is not required, but its parent is required, - // then that node and all under it ought to be marked as `NotInLock`. - for i := len(nodes) - 1; i > 0; i-- { - currentNode = nodes[i] + // Ignoring first node in the list, walk nodes from last to first. Whenever + // the current node is not required, but its parent is required, then the + // current node ought to be marked as `NotInLock`. + for len(nodes) > 1 { + // pop off right-most node from slice of nodes + ln1 := len(nodes) - 1 + currentNode, nodes = nodes[ln1], nodes[:ln1] + if !currentNode.isRequiredAncestor && nodes[currentNode.parentIndex].isRequiredAncestor { status[filepath.ToSlash(currentNode.pathname[prefixLength:])] = NotInLock } } + currentNode, nodes = nil, nil return status, nil } From 87959d06040dfb431650b55abb2ea45a130c0316 Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Sun, 13 Aug 2017 07:03:05 -0400 Subject: [PATCH 24/36] DigestFromPathname -> DigestFromDirectory * Only works with directories; no longer with files. * Takes a single argument, that of the directory to hash. * Hashes existance and type of all file system nodes in the directory that it finds. * When hashing file system node names, it no longer includes the prefix matching the directory name parameter. --- internal/gps/pkgtree/digest.go | 125 +++++++++++++++++++--------- internal/gps/pkgtree/digest_test.go | 51 ++++-------- 2 files changed, 99 insertions(+), 77 deletions(-) diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index b8005a4c1b..9b6682cf9f 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -7,6 +7,7 @@ package pkgtree import ( "bytes" "crypto/sha256" + "encoding/binary" "fmt" "hash" "io" @@ -26,12 +27,9 @@ const ( skipSpecialNodes = os.ModeDevice | os.ModeNamedPipe | os.ModeSocket | os.ModeCharDevice ) -// DigestFromPathname returns a deterministic hash of the specified file system -// node, performing a breadth-first traversal of directories. While the -// specified prefix is joined with the pathname to walk the file system, the -// prefix string is eliminated from the pathname of the nodes encounted when -// hashing the pathnames, so that the resultant hash is agnostic to the absolute -// root directory path of the nodes being checked. +// DigestFromDirectory returns a hash of the specified directory contents, which +// will match the hash computed for any directory on any supported Go platform +// whose contents exactly match the specified directory. // // This function ignores any file system node named `vendor`, `.bzr`, `.git`, // `.hg`, and `.svn`, as these are typically used as Version Control System @@ -41,60 +39,105 @@ const ( // hash includes the pathname to every discovered file system node, whether it // is an empty directory, a non-empty directory, empty file, non-empty file, or // symbolic link. If a symbolic link, the referent name is included. If a -// non-empty file, the file's contents are incuded. If a non-empty directory, +// non-empty file, the file's contents are included. If a non-empty directory, // the contents of the directory are included. // // While filepath.Walk could have been used, that standard library function // skips symbolic links, and for now, we want the hash to include the symbolic // link referents. -func DigestFromPathname(prefix, pathname string) ([]byte, error) { +func DigestFromDirectory(dirname string) ([]byte, error) { + dirname = filepath.Clean(dirname) + + // Ensure parameter is a directory + fi, err := os.Stat(dirname) + if err != nil { + return nil, errors.Wrap(err, "cannot Stat") + } + if !fi.IsDir() { + return nil, errors.Errorf("cannot verify non directory: %q", dirname) + } + // Create a single hash instance for the entire operation, rather than a new // hash for each node we encounter. h := sha256.New() - // Initialize a work queue with the os-agnostic cleaned up pathname. Note - // that we use `filepath.Clean` rather than `filepath.Abs`, because the hash - // has pathnames which are relative to prefix, and there is no reason to - // convert to absolute pathname for every invocation of this function. - prefix = filepath.Clean(prefix) + pathSeparator - prefixLength := len(prefix) // store length to trim off pathnames later - pathnameQueue := []string{filepath.Join(prefix, pathname)} + // Initialize a work queue with the empty string, which signifies the + // starting directory itself. + queue := []string{""} - var written int64 + var relative string // pathname relative to dirname + var pathname string // formed by combining dirname with relative for each node + var bytesWritten int64 + modeBytes := make([]byte, 4) // scratch place to store encoded os.FileMode (uint32) // As we enumerate over the queue and encounter a directory, its children // will be added to the work queue. - for len(pathnameQueue) > 0 { + for len(queue) > 0 { // Unshift a pathname from the queue (breadth-first traversal of // hierarchy) - pathname, pathnameQueue = pathnameQueue[0], pathnameQueue[1:] + relative, queue = queue[0], queue[1:] + pathname = filepath.Join(dirname, relative) - fi, err := os.Lstat(pathname) + fi, err = os.Lstat(pathname) if err != nil { return nil, errors.Wrap(err, "cannot Lstat") } - mode := fi.Mode() - // Skip file system nodes we are not concerned with - if mode&skipSpecialNodes != 0 { - continue + // We could make our own enum-like data type for encoding the file type, + // but Go's runtime already gives us architecture independent file + // modes, as discussed in `os/types.go`: + // + // Go's runtime FileMode type has same definition on all systems, so + // that information about files can be moved from one system to + // another portably. + var mt os.FileMode + + // We only care about the bits that identify the type of a file system + // node, and can ignore append, exclusive, temporary, setuid, setgid, + // permission bits, and sticky bits, which are coincident to bits which + // declare type of the file system node. + modeType := fi.Mode() & os.ModeType + var shouldSkip bool // skip some types of file system nodes + + switch { + case modeType&os.ModeDir > 0: + mt = os.ModeDir + case modeType&os.ModeSymlink > 0: + mt = os.ModeSymlink + case modeType&os.ModeNamedPipe > 0: + mt = os.ModeNamedPipe + shouldSkip = true + case modeType&os.ModeSocket > 0: + mt = os.ModeSocket + shouldSkip = true + case modeType&os.ModeDevice > 0: + mt = os.ModeDevice + shouldSkip = true } - // Write the prefix-stripped pathname to hash because the hash is as - // much a function of the relative names of the files and directories as - // it is their contents. Added benefit is that even empty directories - // and symbolic links will affect final hash value. Use - // `filepath.ToSlash` to ensure relative pathname is os-agnostic. - writeBytesWithNull(h, []byte(filepath.ToSlash(pathname[prefixLength:]))) + // Write the relative pathname to hash because the hash is a function of + // the node names, node types, and node contents. Added benefit is that + // empty directories, named pipes, sockets, devices, and symbolic links + // will affect final hash value. Use `filepath.ToSlash` to ensure + // relative pathname is os-agnostic. + writeBytesWithNull(h, []byte(filepath.ToSlash(relative))) + + binary.LittleEndian.PutUint32(modeBytes, uint32(mt)) // encode the type of mode + writeBytesWithNull(h, modeBytes) // and write to hash + + if shouldSkip { + // There is nothing more to do for some of the node types. + continue + } - if mode&os.ModeSymlink != 0 { - referent, err := os.Readlink(pathname) + if mt == os.ModeSymlink { // okay to check for equivalence because we set to this value + relative, err = os.Readlink(pathname) // read the symlink referent if err != nil { return nil, errors.Wrap(err, "cannot Readlink") } // Write the os-agnostic referent to the hash and proceed to the // next pathname in the queue. - writeBytesWithNull(h, []byte(filepath.ToSlash(referent))) + writeBytesWithNull(h, []byte(filepath.ToSlash(relative))) // and write it to hash continue } @@ -105,10 +148,10 @@ func DigestFromPathname(prefix, pathname string) ([]byte, error) { return nil, errors.Wrap(err, "cannot Open") } - if fi.IsDir() { + if mt == os.ModeDir { childrenNames, err := sortedListOfDirectoryChildrenFromFileHandle(fh) if err != nil { - _ = fh.Close() // already have an error reading directory; ignore Close result. + _ = fh.Close() // ignore close error because we already have an error reading directory return nil, errors.Wrap(err, "cannot get list of directory children") } for _, childName := range childrenNames { @@ -116,17 +159,17 @@ func DigestFromPathname(prefix, pathname string) ([]byte, error) { case ".", "..", "vendor", ".bzr", ".git", ".hg", ".svn": // skip default: - pathnameQueue = append(pathnameQueue, filepath.Join(pathname, childName)) + queue = append(queue, filepath.Join(relative, childName)) } } } else { - written, err = io.Copy(h, newLineEndingReader(fh)) // fast copy of file contents to hash - err = errors.Wrap(err, "cannot Copy") // errors.Wrap only wraps non-nil, so elide guard condition - writeBytesWithNull(h, []byte(strconv.FormatInt(written, 10))) // format file size as base 10 integer + bytesWritten, err = io.Copy(h, newLineEndingReader(fh)) // fast copy of file contents to hash + err = errors.Wrap(err, "cannot Copy") // errors.Wrap only wraps non-nil, so skip extra check + writeBytesWithNull(h, []byte(strconv.FormatInt(bytesWritten, 10))) // format file size as base 10 integer } - // Close the file handle to the open directory without masking possible - // previous error value. + // Close the file handle to the open directory or file without masking + // possible previous error value. if er := fh.Close(); err == nil { err = errors.Wrap(er, "cannot Close") } @@ -395,7 +438,7 @@ func VerifyDepTree(vendorRoot string, wantSums map[string][]byte) (map[string]Ve if expectedSum, ok := wantSums[projectNormalized]; ok { ls := EmptyDigestInLock if len(expectedSum) > 0 { - projectSum, err := DigestFromPathname(vendorRoot, projectNormalized) + projectSum, err := DigestFromDirectory(filepath.Join(vendorRoot, projectNormalized)) if err != nil { return nil, errors.Wrap(err, "cannot compute dependency hash") } diff --git a/internal/gps/pkgtree/digest_test.go b/internal/gps/pkgtree/digest_test.go index 88e59eb2c6..51b9fe783d 100644 --- a/internal/gps/pkgtree/digest_test.go +++ b/internal/gps/pkgtree/digest_test.go @@ -78,46 +78,25 @@ func getTestdataVerifyRoot(t *testing.T) string { return filepath.Join(filepath.Dir(cwd), "testdata_digest") } -func TestDigestFromPathnameWithFile(t *testing.T) { - relativePathname := "github.com/alice/match/match.go" - want := []byte{0x43, 0xe6, 0x3, 0x53, 0xda, 0x4, 0x18, 0xf6, 0xbd, 0x98, 0xf4, 0x6c, 0x6d, 0xb8, 0xc1, 0x8d, 0xa2, 0x78, 0x16, 0x45, 0xf7, 0xca, 0xc, 0xec, 0xcf, 0x2e, 0xa1, 0x64, 0x55, 0x69, 0xbf, 0x8f} - - // NOTE: Create the hash using both an absolute and a relative pathname to - // ensure hash ignores prefix. - - t.Run("AbsolutePrefix", func(t *testing.T) { - prefix := getTestdataVerifyRoot(t) - got, err := DigestFromPathname(prefix, relativePathname) - if err != nil { - t.Fatal(err) - } - if !bytes.Equal(got, want) { - t.Errorf("\n(GOT):\n\t%#v\n(WNT):\n\t%#v", got, want) - } - }) - - t.Run("RelativePrefix", func(t *testing.T) { - prefix := "../testdata_digest" - got, err := DigestFromPathname(prefix, relativePathname) - if err != nil { - t.Fatal(err) - } - if !bytes.Equal(got, want) { - t.Errorf("\n(GOT):\n\t%#v\n(WNT):\n\t%#v", got, want) - } - }) +func TestDigestFromDirectoryBailsUnlessDirectory(t *testing.T) { + prefix := getTestdataVerifyRoot(t) + relativePathname := "launchpad.net/match" + _, err := DigestFromDirectory(filepath.Join(prefix, relativePathname)) + if got, want := err, error(nil); got != want { + t.Errorf("\n(GOT): %v; (WNT): %v", got, want) + } } -func TestDigestFromPathnameWithDirectory(t *testing.T) { +func TestDigestFromDirectory(t *testing.T) { relativePathname := "launchpad.net/match" - want := []byte{0x74, 0xe, 0x17, 0x87, 0xd4, 0x8e, 0x56, 0x2e, 0x7e, 0x32, 0x4e, 0x80, 0x3a, 0x5f, 0x3a, 0x10, 0x33, 0x43, 0x2c, 0x24, 0x8e, 0xf7, 0x1a, 0x37, 0x5e, 0x76, 0xf4, 0x6, 0x2b, 0xf3, 0xfd, 0x91} + want := []byte{0x7e, 0x10, 0x6, 0x2f, 0x8, 0x3, 0x3c, 0x76, 0xae, 0xbc, 0xa4, 0xc9, 0xec, 0x73, 0x67, 0x15, 0x70, 0x2b, 0x0, 0x89, 0x27, 0xbb, 0x61, 0x9d, 0xc7, 0xc3, 0x39, 0x46, 0x3, 0x91, 0xb7, 0x3b} // NOTE: Create the hash using both an absolute and a relative pathname to // ensure hash ignores prefix. t.Run("AbsolutePrefix", func(t *testing.T) { prefix := getTestdataVerifyRoot(t) - got, err := DigestFromPathname(prefix, relativePathname) + got, err := DigestFromDirectory(filepath.Join(prefix, relativePathname)) if err != nil { t.Fatal(err) } @@ -128,7 +107,7 @@ func TestDigestFromPathnameWithDirectory(t *testing.T) { t.Run("RelativePrefix", func(t *testing.T) { prefix := "../testdata_digest" - got, err := DigestFromPathname(prefix, relativePathname) + got, err := DigestFromDirectory(filepath.Join(prefix, relativePathname)) if err != nil { t.Fatal(err) } @@ -142,13 +121,13 @@ func TestVerifyDepTree(t *testing.T) { vendorRoot := getTestdataVerifyRoot(t) wantSums := map[string][]byte{ - "github.com/alice/match": []byte{0x87, 0x4, 0x53, 0x5f, 0xb8, 0xca, 0xe9, 0x52, 0x40, 0x41, 0xcd, 0x93, 0x2e, 0x42, 0xfb, 0x14, 0x77, 0x57, 0x9c, 0x3, 0x6b, 0xe1, 0x15, 0xe7, 0xfa, 0xc1, 0xf0, 0x98, 0x4b, 0x61, 0x9f, 0x48}, + "github.com/alice/match": []byte{0x7e, 0x10, 0x6, 0x2f, 0x8, 0x3, 0x3c, 0x76, 0xae, 0xbc, 0xa4, 0xc9, 0xec, 0x73, 0x67, 0x15, 0x70, 0x2b, 0x0, 0x89, 0x27, 0xbb, 0x61, 0x9d, 0xc7, 0xc3, 0x39, 0x46, 0x3, 0x91, 0xb7, 0x3b}, "github.com/alice/mismatch": []byte("some non-matching digest"), "github.com/bob/emptyDigest": nil, // empty hash result - "github.com/bob/match": []byte{0x6a, 0x11, 0xf9, 0x46, 0xcc, 0xf3, 0x44, 0xdb, 0x2c, 0x6b, 0xcc, 0xb4, 0x0, 0x71, 0xe5, 0xc4, 0xee, 0x9a, 0x26, 0x71, 0x9d, 0xab, 0xe2, 0x40, 0xb7, 0xbf, 0x2a, 0xd9, 0x4, 0xcf, 0xc9, 0x46}, + "github.com/bob/match": []byte{0x7e, 0x10, 0x6, 0x2f, 0x8, 0x3, 0x3c, 0x76, 0xae, 0xbc, 0xa4, 0xc9, 0xec, 0x73, 0x67, 0x15, 0x70, 0x2b, 0x0, 0x89, 0x27, 0xbb, 0x61, 0x9d, 0xc7, 0xc3, 0x39, 0x46, 0x3, 0x91, 0xb7, 0x3b}, "github.com/charlie/notInTree": nil, // not in tree result ought to superseede empty digest result // matching result at seldomly found directory level - "launchpad.net/match": []byte{0x74, 0xe, 0x17, 0x87, 0xd4, 0x8e, 0x56, 0x2e, 0x7e, 0x32, 0x4e, 0x80, 0x3a, 0x5f, 0x3a, 0x10, 0x33, 0x43, 0x2c, 0x24, 0x8e, 0xf7, 0x1a, 0x37, 0x5e, 0x76, 0xf4, 0x6, 0x2b, 0xf3, 0xfd, 0x91}, + "launchpad.net/match": []byte{0x7e, 0x10, 0x6, 0x2f, 0x8, 0x3, 0x3c, 0x76, 0xae, 0xbc, 0xa4, 0xc9, 0xec, 0x73, 0x67, 0x15, 0x70, 0x2b, 0x0, 0x89, 0x27, 0xbb, 0x61, 0x9d, 0xc7, 0xc3, 0x39, 0x46, 0x3, 0x91, 0xb7, 0x3b}, } status, err := VerifyDepTree(vendorRoot, wantSums) @@ -160,7 +139,7 @@ func TestVerifyDepTree(t *testing.T) { // digest keys. if false { for k, want := range wantSums { - got, err := DigestFromPathname(vendorRoot, k) + got, err := DigestFromDirectory(filepath.Join(vendorRoot, k)) if err != nil { t.Error(err) } From 29b1709b375706941297ae3ef99847eed8a7b8bb Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Sun, 13 Aug 2017 07:17:20 -0400 Subject: [PATCH 25/36] sets element in backing array to nil after popping off queue --- internal/gps/pkgtree/digest.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index 9b6682cf9f..7ed2d10fa4 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -429,7 +429,7 @@ func VerifyDepTree(vendorRoot string, wantSums map[string][]byte) (map[string]Ve for len(queue) > 0 { // pop node from the queue (depth first traversal, reverse lexicographical order inside a directory) lq1 := len(queue) - 1 - currentNode, queue = queue[lq1], queue[:lq1] + currentNode, queue[lq1], queue = queue[lq1], nil, queue[:lq1] // Chop off the vendor root prefix, including the path separator, then // normalize. @@ -494,7 +494,7 @@ func VerifyDepTree(vendorRoot string, wantSums map[string][]byte) (map[string]Ve for len(nodes) > 1 { // pop off right-most node from slice of nodes ln1 := len(nodes) - 1 - currentNode, nodes = nodes[ln1], nodes[:ln1] + currentNode, nodes[ln1], nodes = nodes[ln1], nil, nodes[:ln1] if !currentNode.isRequiredAncestor && nodes[currentNode.parentIndex].isRequiredAncestor { status[filepath.ToSlash(currentNode.pathname[prefixLength:])] = NotInLock From 17e7c9d4bec533dd17dabe4073bc9935097dd98c Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Sun, 13 Aug 2017 11:15:18 -0400 Subject: [PATCH 26/36] comments updated --- internal/gps/pkgtree/digest.go | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index 7ed2d10fa4..3ec2d5caae 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -365,7 +365,8 @@ func sortedListOfDirectoryChildrenFromFileHandle(fh *os.File) ([]string, error) // The vendor root will be converted to os-specific pathname for processing, and // the map of project names to their expected digests are required to have the // solidus character, `/`, as their path separator. For example, -// "github.com/alice/alice1". +// "github.com/alice/alice1", even on platforms where the file system path +// separator is a different character. func VerifyDepTree(vendorRoot string, wantSums map[string][]byte) (map[string]VendorStatus, error) { vendorRoot = filepath.Clean(vendorRoot) + pathSeparator @@ -401,13 +402,16 @@ func VerifyDepTree(vendorRoot string, wantSums map[string][]byte) (map[string]Ve // launchpad.net/nifty/n1.go // // 1) If only the `alice1` and `alice2` projects were in the lock file, we'd - // prefer the output to state that `github.com/bob` is `NotInLock`. + // prefer the output to state that `github.com/bob` is `NotInLock`, and + // `launchpad.net/nifty` is `NotInLock`. // // 2) If `alice1`, `alice2`, and `bob1` were in the lock file, we'd want to - // report `github.com/bob/bob2` as `NotInLock`. + // report `github.com/bob/bob2` as `NotInLock`, and `launchpad.net/nifty` is + // `NotInLock`. // // 3) If none of `alice1`, `alice2`, `bob1`, or `bob2` were in the lock - // file, the entire `github.com` directory would be reported as `NotInLock`. + // file, the entire `github.com` directory would be reported as `NotInLock`, + // along with `launchpad.net/nifty` is `NotInLock`. // // Each node in our tree has the slice index of its parent node, so once we // can categorically state a particular directory is required because it is @@ -418,9 +422,9 @@ func VerifyDepTree(vendorRoot string, wantSums map[string][]byte) (map[string]Ve nodes := []*fsnode{currentNode} // Mark directories of expected projects as required. When the respective - // project is found in the vendor root hierarchy, its status will be updated - // to reflect whether its digest is empty, or whether or not it matches the - // expected digest. + // project is later found while traversing the vendor root hierarchy, its + // status will be updated to reflect whether its digest is empty, or whether + // or not it matches the expected digest. status := make(map[string]VendorStatus) for pathname := range wantSums { status[pathname] = NotInTree From 0df696c399a51b3168ac78cbae86dade5a22e509 Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Sun, 13 Aug 2017 13:55:27 -0400 Subject: [PATCH 27/36] code cleanup and documentation updates * In line with the original document describing hungarian notation, all file system path variable names have been prefixed with either `slash` or `os` to differentiate whether the variable holds a slash delimited pathname or a pathname delimited by an os-specific pathname delimiter. This is to make it much easier to visually see whether we are using the wrong pathname "type" than desired. --- internal/gps/pkgtree/digest.go | 417 +++++++++++++++++---------------- 1 file changed, 216 insertions(+), 201 deletions(-) diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index 3ec2d5caae..969f86fe7d 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -8,7 +8,6 @@ import ( "bytes" "crypto/sha256" "encoding/binary" - "fmt" "hash" "io" "os" @@ -20,13 +19,145 @@ import ( ) const ( - pathSeparator = string(filepath.Separator) + osPathSeparator = string(filepath.Separator) // when walking vendor root hierarchy, ignore file system nodes of the // following types. skipSpecialNodes = os.ModeDevice | os.ModeNamedPipe | os.ModeSocket | os.ModeCharDevice ) +// lineEndingReader is a `io.Reader` that converts CRLF sequences to LF. +// +// When cloning or checking out repositories, some Version Control Systems, +// VCSs, on some supported Go Operating System architectures, GOOS, will +// automatically convert line endings that end in a single line feed byte, LF, +// to line endings that end in a two byte sequence of carriage return, CR, +// followed by LF. This LF to CRLF conversion would cause otherwise identical +// versioned files to have different on disk contents simply based on which VCS +// and GOOS are involved. Different file contents for the same file would cause +// the resultant hashes to differ. In order to ensure file contents normalize +// and produce the same hash, this structure wraps an io.Reader that modifies +// the file's contents when it is read, translating all CRLF sequences to LF. +type lineEndingReader struct { + src io.Reader // source io.Reader from which this reads + prevReadEndedCR bool // used to track whether final byte of previous Read was CR +} + +// newLineEndingReader returns a new lineEndingReader that reads from the +// specified source io.Reader. +func newLineEndingReader(src io.Reader) *lineEndingReader { + return &lineEndingReader{src: src} +} + +var crlf = []byte("\r\n") + +// Read consumes bytes from the structure's source io.Reader to fill the +// specified slice of bytes. It converts all CRLF byte sequences to LF, and +// handles cases where CR and LF straddle across two Read operations. +func (f *lineEndingReader) Read(buf []byte) (int, error) { + buflen := len(buf) + if f.prevReadEndedCR { + // Read one fewer bytes so we have room if the first byte of the + // upcoming Read is not a LF, in which case we will need to insert + // trailing CR from previous read. + buflen-- + } + nr, er := f.src.Read(buf[:buflen]) + if nr > 0 { + if f.prevReadEndedCR && buf[0] != '\n' { + // Having a CRLF split across two Read operations is rare, so the + // performance impact of copying entire buffer to the right by one + // byte, while suboptimal, will at least will not happen very + // often. This negative performance impact is mitigated somewhat on + // many Go compilation architectures, GOARCH, because the `copy` + // builtin uses a machine opcode for performing the memory copy on + // possibly overlapping regions of memory. This machine opcodes is + // not instantaneous and does require multiple CPU cycles to + // complete, but is significantly faster than the application + // looping through bytes. + copy(buf[1:nr+1], buf[:nr]) // shift data to right one byte + buf[0] = '\r' // insert the previous skipped CR byte at start of buf + nr++ // pretend we read one more byte + } + + // Remove any CRLF sequences in the buffer using `bytes.Index` because, + // like the `copy` builtin on many GOARCHs, it also takes advantage of a + // machine opcode to search for byte patterns. + var searchOffset int // index within buffer from whence the search will commence for each loop; set to the index of the end of the previous loop. + var shiftCount int // each subsequenct shift operation needs to shift bytes to the left by one more position than the shift that preceded it. + previousIndex := -1 // index of previously found CRLF; -1 means no previous index + for { + index := bytes.Index(buf[searchOffset:nr], crlf) + if index == -1 { + break + } + index += searchOffset // convert relative index to absolute + if previousIndex != -1 { + // shift substring between previous index and this index + copy(buf[previousIndex-shiftCount:], buf[previousIndex+1:index]) + shiftCount++ // next shift needs to be 1 byte to the left + } + previousIndex = index + searchOffset = index + 2 // start next search after len(crlf) + } + if previousIndex != -1 { + // handle final shift + copy(buf[previousIndex-shiftCount:], buf[previousIndex+1:nr]) + shiftCount++ + } + nr -= shiftCount // shorten byte read count by number of shifts executed + + // When final byte from a read operation is CR, do not emit it until + // ensure first byte on next read is not LF. + if f.prevReadEndedCR = buf[nr-1] == '\r'; f.prevReadEndedCR { + nr-- // pretend byte was never read from source + } + } else if f.prevReadEndedCR { + // Reading from source returned nothing, but this struct is sitting on a + // trailing CR from previous Read, so let's give it to client now. + buf[0] = '\r' + nr = 1 + er = nil + f.prevReadEndedCR = false // prevent infinite loop + } + return nr, er +} + +// sortedChildrenFromDirname returns a lexicographically sorted list of child +// nodes for the specified directory. +func sortedChildrenFromDirname(osDirname string) ([]string, error) { + fh, err := os.Open(osDirname) + if err != nil { + return nil, errors.Wrap(err, "cannot Open") + } + osChildrenNames, err := sortedChildrenFromDirectorFileHandle(fh) + // Close the file handle to the open directory without masking possible + // previous error value. + if er := fh.Close(); err == nil { + err = errors.Wrap(er, "cannot Close") + } + return osChildrenNames, err +} + +// sortedChildrenFromDirectorFileHandle returns a lexicographically sorted list +// of child nodes for the specified file handle to a directory. +func sortedChildrenFromDirectorFileHandle(fh *os.File) ([]string, error) { + osChildrenNames, err := fh.Readdirnames(0) // 0: read names of all children + if err != nil { + return nil, errors.Wrap(err, "cannot Readdirnames") + } + sort.Strings(osChildrenNames) + return osChildrenNames, nil +} + +// writeBytesWithNull appends the specified data to the specified hash, followed by +// the NULL byte, in order to make accidental hash collisions less likely. +func writeBytesWithNull(h hash.Hash, data []byte) { + // Ignore return values from writing to the hash, because hash write always + // returns nil error. + _, _ = h.Write(append(data, 0)) +} + // DigestFromDirectory returns a hash of the specified directory contents, which // will match the hash computed for any directory on any supported Go platform // whose contents exactly match the specified directory. @@ -45,16 +176,16 @@ const ( // While filepath.Walk could have been used, that standard library function // skips symbolic links, and for now, we want the hash to include the symbolic // link referents. -func DigestFromDirectory(dirname string) ([]byte, error) { - dirname = filepath.Clean(dirname) +func DigestFromDirectory(osDirname string) ([]byte, error) { + osDirname = filepath.Clean(osDirname) // Ensure parameter is a directory - fi, err := os.Stat(dirname) + fi, err := os.Stat(osDirname) if err != nil { return nil, errors.Wrap(err, "cannot Stat") } if !fi.IsDir() { - return nil, errors.Errorf("cannot verify non directory: %q", dirname) + return nil, errors.Errorf("cannot verify non directory: %q", osDirname) } // Create a single hash instance for the entire operation, rather than a new @@ -65,8 +196,7 @@ func DigestFromDirectory(dirname string) ([]byte, error) { // starting directory itself. queue := []string{""} - var relative string // pathname relative to dirname - var pathname string // formed by combining dirname with relative for each node + var osRelative string // os-specific relative pathname under dirname var bytesWritten int64 modeBytes := make([]byte, 4) // scratch place to store encoded os.FileMode (uint32) @@ -75,10 +205,10 @@ func DigestFromDirectory(dirname string) ([]byte, error) { for len(queue) > 0 { // Unshift a pathname from the queue (breadth-first traversal of // hierarchy) - relative, queue = queue[0], queue[1:] - pathname = filepath.Join(dirname, relative) + osRelative, queue = queue[0], queue[1:] + osPathname := filepath.Join(osDirname, osRelative) - fi, err = os.Lstat(pathname) + fi, err = os.Lstat(osPathname) if err != nil { return nil, errors.Wrap(err, "cannot Lstat") } @@ -118,54 +248,51 @@ func DigestFromDirectory(dirname string) ([]byte, error) { // Write the relative pathname to hash because the hash is a function of // the node names, node types, and node contents. Added benefit is that // empty directories, named pipes, sockets, devices, and symbolic links - // will affect final hash value. Use `filepath.ToSlash` to ensure + // will also affect final hash value. Use `filepath.ToSlash` to ensure // relative pathname is os-agnostic. - writeBytesWithNull(h, []byte(filepath.ToSlash(relative))) + writeBytesWithNull(h, []byte(filepath.ToSlash(osRelative))) binary.LittleEndian.PutUint32(modeBytes, uint32(mt)) // encode the type of mode writeBytesWithNull(h, modeBytes) // and write to hash if shouldSkip { - // There is nothing more to do for some of the node types. - continue + continue // nothing more to do for some of the node types } if mt == os.ModeSymlink { // okay to check for equivalence because we set to this value - relative, err = os.Readlink(pathname) // read the symlink referent + osRelative, err = os.Readlink(osPathname) // read the symlink referent if err != nil { return nil, errors.Wrap(err, "cannot Readlink") } - // Write the os-agnostic referent to the hash and proceed to the - // next pathname in the queue. - writeBytesWithNull(h, []byte(filepath.ToSlash(relative))) // and write it to hash - continue + writeBytesWithNull(h, []byte(filepath.ToSlash(osRelative))) // write referent to hash + continue // proceed to next node in queue } // For both directories and regular files, we must create a file system // handle in order to read their contents. - fh, err := os.Open(pathname) + fh, err := os.Open(osPathname) if err != nil { return nil, errors.Wrap(err, "cannot Open") } if mt == os.ModeDir { - childrenNames, err := sortedListOfDirectoryChildrenFromFileHandle(fh) + osChildrenNames, err := sortedChildrenFromDirectorFileHandle(fh) if err != nil { _ = fh.Close() // ignore close error because we already have an error reading directory return nil, errors.Wrap(err, "cannot get list of directory children") } - for _, childName := range childrenNames { - switch childName { + for _, osChildName := range osChildrenNames { + switch osChildName { case ".", "..", "vendor", ".bzr", ".git", ".hg", ".svn": // skip default: - queue = append(queue, filepath.Join(relative, childName)) + queue = append(queue, filepath.Join(osRelative, osChildName)) } } } else { bytesWritten, err = io.Copy(h, newLineEndingReader(fh)) // fast copy of file contents to hash err = errors.Wrap(err, "cannot Copy") // errors.Wrap only wraps non-nil, so skip extra check - writeBytesWithNull(h, []byte(strconv.FormatInt(bytesWritten, 10))) // format file size as base 10 integer + writeBytesWithNull(h, []byte(strconv.FormatInt(bytesWritten, 10))) // 10: format file size as base 10 integer } // Close the file handle to the open directory or file without masking @@ -181,103 +308,8 @@ func DigestFromDirectory(dirname string) ([]byte, error) { return h.Sum(nil), nil } -// lineEndingReader is a `io.Reader` that converts CRLF sequences to LF. -// -// Some VCS systems automatically convert LF line endings to CRLF on some OS -// platforms. This would cause the a file checked out on those platforms to have -// a different digest than the same file on platforms that do not perform this -// translation. In order to ensure file contents normalize and hash the same, -// this struct satisfies the io.Reader interface by providing a Read method that -// modifies the file's contents when it is read, translating all CRLF sequences -// to LF. -type lineEndingReader struct { - src io.Reader // source io.Reader from which this reads - prevReadEndedCR bool // used to track whether final byte of previous Read was CR -} - -// newLineEndingReader returns a new lineEndingReader that reads from the -// specified source io.Reader. -func newLineEndingReader(src io.Reader) *lineEndingReader { - return &lineEndingReader{src: src} -} - -var crlf = []byte("\r\n") - -// Read consumes bytes from the structure's source io.Reader to fill the -// specified slice of bytes. It converts all CRLF byte sequences to LF, and -// handles cases where CR and LF straddle across two Read operations. -func (f *lineEndingReader) Read(buf []byte) (int, error) { - buflen := len(buf) - if f.prevReadEndedCR { - // Read one less byte in case we need to insert CR in there - buflen-- - } - nr, er := f.src.Read(buf[:buflen]) - if nr > 0 { - if f.prevReadEndedCR && buf[0] != '\n' { - // Having a CRLF split across two Read operations is rare, so - // ignoring performance impact of copying entire buffer by one - // byte. Plus, `copy` builtin likely uses machine opcode for - // performing the memory copy. - copy(buf[1:nr+1], buf[:nr]) // shift data to right one byte - buf[0] = '\r' // insert the previous skipped CR byte at start of buf - nr++ // pretend we read one more byte - } - - // Remove any CRLF sequences in buf, using `bytes.Index` because it - // takes advantage of machine opcodes that search for byte patterns on - // many architectures; and, using builtin `copy` which also takes - // advantage of machine opcodes to quickly move overlapping regions of - // bytes in memory. - var searchOffset, shiftCount int - previousIndex := -1 // index of previous CRLF index; -1 means no previous index known - for { - index := bytes.Index(buf[searchOffset:nr], crlf) - if index == -1 { - break - } - index += searchOffset // convert relative index to absolute - if previousIndex != -1 { - // shift substring between previous index and this index - copy(buf[previousIndex-shiftCount:], buf[previousIndex+1:index]) - shiftCount++ // next shift needs to be 1 byte to the left - } - previousIndex = index - searchOffset = index + 2 // start next search after len(crlf) - } - if previousIndex != -1 { - // handle final shift - copy(buf[previousIndex-shiftCount:], buf[previousIndex+1:nr]) - shiftCount++ - } - nr -= shiftCount // shorten byte read count by number of shifts executed - - // When final byte from a read operation is CR, do not emit it until - // ensure first byte on next read is not LF. - if f.prevReadEndedCR = buf[nr-1] == '\r'; f.prevReadEndedCR { - nr-- // pretend byte was never read from source - } - } else if f.prevReadEndedCR { - // Reading from source returned nothing, but this struct is sitting on a - // trailing CR from previous Read, so let's give it to client now. - buf[0] = '\r' - nr = 1 - er = nil - f.prevReadEndedCR = false // prevent infinite loop - } - return nr, er -} - -// writeBytesWithNull appends the specified data to the specified hash, followed by -// the NULL byte, in order to make accidental hash collisions less likely. -func writeBytesWithNull(h hash.Hash, data []byte) { - // Ignore return values from writing to the hash, because hash write always - // returns nil error. - _, _ = h.Write(append(data, 0)) -} - -// VendorStatus represents one of a handful of possible statuses of a particular -// subdirectory under vendor. +// VendorStatus represents one of a handful of possible status conditions for a +// particular file sytem node in the vendor directory tree. type VendorStatus uint8 const ( @@ -319,70 +351,44 @@ func (ls VendorStatus) String() string { return "unknown" } +// fsnode is used to track which file system nodes are required by the lock +// file. When a directory is found whose name matches one of the declared +// projects in the lock file, e.g., "github.com/alice/alice1", an fsnode is +// created for that directory, but not for any of its children. All other file +// system nodes encountered will result in a fsnode created to represent it. type fsnode struct { - pathname string - isRequiredAncestor bool - myIndex, parentIndex int -} - -func (n fsnode) String() string { - return fmt.Sprintf("[%d:%d %q %t]", n.myIndex, n.parentIndex, n.pathname, n.isRequiredAncestor) -} - -// sortedListOfDirectoryChildrenFromPathname returns a lexicographical sorted -// list of child nodes for the specified directory. -func sortedListOfDirectoryChildrenFromPathname(pathname string) ([]string, error) { - fh, err := os.Open(pathname) - if err != nil { - return nil, errors.Wrap(err, "cannot Open") - } - childrenNames, err := sortedListOfDirectoryChildrenFromFileHandle(fh) - // Close the file handle to the open directory without masking possible - // previous error value. - if er := fh.Close(); err == nil { - err = errors.Wrap(er, "cannot Close") - } - return childrenNames, err -} - -// sortedListOfDirectoryChildrenFromPathname returns a lexicographical sorted -// list of child nodes for the specified open file handle to a directory. This -// function is written once to avoid writing the logic in two places. -func sortedListOfDirectoryChildrenFromFileHandle(fh *os.File) ([]string, error) { - childrenNames, err := fh.Readdirnames(0) // 0: read names of all children - if err != nil { - return nil, errors.Wrap(err, "cannot Readdirnames") - } - sort.Strings(childrenNames) - return childrenNames, nil + osRelative string // os-specific relative path of a resource under vendor root + isRequiredAncestor bool // true iff this node or one of its descendants is in the lock file + myIndex, parentIndex int // index of this node and its parent in the tree's slice } // VerifyDepTree verifies a dependency tree according to expected digest sums, // and returns an associative array of file system nodes and their respective -// vendor status, in accordance with the provided expected digest sums -// parameter. +// vendor status conditions. // -// The vendor root will be converted to os-specific pathname for processing, and -// the map of project names to their expected digests are required to have the -// solidus character, `/`, as their path separator. For example, -// "github.com/alice/alice1", even on platforms where the file system path -// separator is a different character. -func VerifyDepTree(vendorRoot string, wantSums map[string][]byte) (map[string]VendorStatus, error) { - vendorRoot = filepath.Clean(vendorRoot) + pathSeparator - - // NOTE: Ensure top level pathname is a directory - fi, err := os.Stat(vendorRoot) +// The keys to the expected digest sums associative array represent the +// project's dependencies, and each is required to be expressed using the +// solidus character, `/`, as its path separator. For example, even on a GOOS +// platform where the file system path separator is a character other than +// solidus, one particular dependency would be represented as +// "github.com/alice/alice1". +func VerifyDepTree(osDirname string, wantSums map[string][]byte) (map[string]VendorStatus, error) { + osDirname = filepath.Clean(osDirname) + + // Ensure top level pathname is a directory + fi, err := os.Stat(osDirname) if err != nil { return nil, errors.Wrap(err, "cannot Stat") } if !fi.IsDir() { - return nil, errors.Errorf("cannot verify non directory: %q", vendorRoot) + return nil, errors.Errorf("cannot verify non directory: %q", osDirname) } - var otherNode *fsnode - currentNode := &fsnode{pathname: vendorRoot, parentIndex: -1, isRequiredAncestor: true} + // Initialize work queue with a node representing the specified directory + // name by declaring its relative pathname under the directory name as the + // empty string. + currentNode := &fsnode{osRelative: "", parentIndex: -1, isRequiredAncestor: true} queue := []*fsnode{currentNode} // queue of directories that must be inspected - prefixLength := len(vendorRoot) // In order to identify all file system nodes that are not in the lock file, // represented by the specified expected sums parameter, and in order to @@ -421,28 +427,30 @@ func VerifyDepTree(vendorRoot string, wantSums map[string][]byte) (map[string]Ve // `NotInLock`. nodes := []*fsnode{currentNode} - // Mark directories of expected projects as required. When the respective + // Create associative array to store the results of calling this function. + slashStatus := make(map[string]VendorStatus) + + // Mark directories of expected projects as required. When each respective // project is later found while traversing the vendor root hierarchy, its - // status will be updated to reflect whether its digest is empty, or whether - // or not it matches the expected digest. - status := make(map[string]VendorStatus) - for pathname := range wantSums { - status[pathname] = NotInTree + // status will be updated to reflect whether its digest is empty, or, + // whether or not it matches the expected digest. + for slashPathname := range wantSums { + slashStatus[slashPathname] = NotInTree } for len(queue) > 0 { - // pop node from the queue (depth first traversal, reverse lexicographical order inside a directory) + // Pop node from the top of queue (depth first traversal, reverse + // lexicographical order inside a directory), clearing the value stored + // in the slice's backing array as we proceed. lq1 := len(queue) - 1 currentNode, queue[lq1], queue = queue[lq1], nil, queue[:lq1] + slashPathname := filepath.ToSlash(currentNode.osRelative) + osPathname := filepath.Join(osDirname, currentNode.osRelative) - // Chop off the vendor root prefix, including the path separator, then - // normalize. - projectNormalized := filepath.ToSlash(currentNode.pathname[prefixLength:]) - - if expectedSum, ok := wantSums[projectNormalized]; ok { + if expectedSum, ok := wantSums[slashPathname]; ok { ls := EmptyDigestInLock if len(expectedSum) > 0 { - projectSum, err := DigestFromDirectory(filepath.Join(vendorRoot, projectNormalized)) + projectSum, err := DigestFromDirectory(osPathname) if err != nil { return nil, errors.Wrap(err, "cannot compute dependency hash") } @@ -452,29 +460,35 @@ func VerifyDepTree(vendorRoot string, wantSums map[string][]byte) (map[string]Ve ls = DigestMismatchInLock } } - status[projectNormalized] = ls + slashStatus[slashPathname] = ls - // NOTE: Mark current nodes and all parents: required. + // Mark current nodes and all its parents as required. for i := currentNode.myIndex; i != -1; i = nodes[i].parentIndex { nodes[i].isRequiredAncestor = true } - continue // do not need to process directory's contents + // Do not need to process this directory's contents because we + // already accounted for its contents while calculating its digest. + continue } - childrenNames, err := sortedListOfDirectoryChildrenFromPathname(currentNode.pathname) + osChildrenNames, err := sortedChildrenFromDirname(osPathname) if err != nil { return nil, errors.Wrap(err, "cannot get sorted list of directory children") } - for _, childName := range childrenNames { - switch childName { + for _, osChildName := range osChildrenNames { + switch osChildName { case ".", "..", "vendor", ".bzr", ".git", ".hg", ".svn": // skip default: - childPathname := filepath.Join(currentNode.pathname, childName) - otherNode = &fsnode{pathname: childPathname, myIndex: len(nodes), parentIndex: currentNode.myIndex} + osChildRelative := filepath.Join(currentNode.osRelative, osChildName) + osChildPathname := filepath.Join(osDirname, osChildRelative) + + // Create a new fsnode for this file system node, with a parent + // index set to the index of the current node. + otherNode := &fsnode{osRelative: osChildRelative, myIndex: len(nodes), parentIndex: currentNode.myIndex} - fi, err := os.Stat(childPathname) + fi, err := os.Stat(osChildPathname) if err != nil { return nil, errors.Wrap(err, "cannot Stat") } @@ -482,8 +496,8 @@ func VerifyDepTree(vendorRoot string, wantSums map[string][]byte) (map[string]Ve if fi.Mode()&skipSpecialNodes != 0 { continue } - // Keep track of all regular files and directories, but do not - // need to visit files. + // Track all file system nodes, but only need to add directories + // to the work queue. nodes = append(nodes, otherNode) if fi.IsDir() { queue = append(queue, otherNode) @@ -496,15 +510,16 @@ func VerifyDepTree(vendorRoot string, wantSums map[string][]byte) (map[string]Ve // the current node is not required, but its parent is required, then the // current node ought to be marked as `NotInLock`. for len(nodes) > 1 { - // pop off right-most node from slice of nodes + // Pop node from top of queue, clearing the value stored in the slice's + // backing array as we proceed. ln1 := len(nodes) - 1 currentNode, nodes[ln1], nodes = nodes[ln1], nil, nodes[:ln1] if !currentNode.isRequiredAncestor && nodes[currentNode.parentIndex].isRequiredAncestor { - status[filepath.ToSlash(currentNode.pathname[prefixLength:])] = NotInLock + slashStatus[filepath.ToSlash(currentNode.osRelative)] = NotInLock } } currentNode, nodes = nil, nil - return status, nil + return slashStatus, nil } From f02d479891f193ba108714ee22c04357b652b8ba Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Mon, 14 Aug 2017 14:38:59 -0400 Subject: [PATCH 28/36] VerifyDepTree reports all NotInLock file system nodes * device files, named pipes, sockets found in a vendor root tree are also reported, and will result in a status of `NotInLock`, because those node types are generally not in source control. --- internal/gps/pkgtree/digest.go | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index 969f86fe7d..cff24adc1e 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -492,15 +492,9 @@ func VerifyDepTree(osDirname string, wantSums map[string][]byte) (map[string]Ven if err != nil { return nil, errors.Wrap(err, "cannot Stat") } - // Skip non-interesting file system nodes - if fi.Mode()&skipSpecialNodes != 0 { - continue - } - // Track all file system nodes, but only need to add directories - // to the work queue. - nodes = append(nodes, otherNode) + nodes = append(nodes, otherNode) // Track all file system nodes... if fi.IsDir() { - queue = append(queue, otherNode) + queue = append(queue, otherNode) // but only need to add directories to the work queue. } } } From e9ff6b28248e6ce3b65315bfa1a21da09074937a Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Mon, 14 Aug 2017 22:15:01 -0500 Subject: [PATCH 29/36] additional test cases --- internal/gps/pkgtree/digest_test.go | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/internal/gps/pkgtree/digest_test.go b/internal/gps/pkgtree/digest_test.go index 51b9fe783d..53a9ec2202 100644 --- a/internal/gps/pkgtree/digest_test.go +++ b/internal/gps/pkgtree/digest_test.go @@ -52,6 +52,28 @@ func TestLineEndingReader(t *testing.T) { {[]string{"\rnow is the time"}, "\rnow is the time"}, // CR not followed by LF ought to convey {[]string{"\rnow is the time\r"}, "\rnow is the time\r"}, // CR not followed by LF ought to convey + // no line splits + {[]string{"first", "second", "third"}, "firstsecondthird"}, + + // 1->2 and 2->3 both break across a CRLF + {[]string{"first\r", "\nsecond\r", "\nthird"}, "first\nsecond\nthird"}, + + // 1->2 breaks across CRLF and 2->3 does not + {[]string{"first\r", "\nsecond", "third"}, "first\nsecondthird"}, + + // 1->2 breaks across CRLF and 2 ends in CR but 3 does not begin LF + {[]string{"first\r", "\nsecond\r", "third"}, "first\nsecond\rthird"}, + + // 1 ends in CR but 2 does not begin LF, and 2->3 breaks across CRLF + {[]string{"first\r", "second\r", "\nthird"}, "first\rsecond\nthird"}, + + // 1 ends in CR but 2 does not begin LF, and 2->3 does not break across CRLF + {[]string{"first\r", "second\r", "\nthird"}, "first\rsecond\nthird"}, + + // 1->2 and 2->3 both break across a CRLF, but 3->4 does not + {[]string{"first\r", "\nsecond\r", "\nthird\r", "fourth"}, "first\nsecond\nthird\rfourth"}, + {[]string{"first\r", "\nsecond\r", "\nthird\n", "fourth"}, "first\nsecond\nthird\nfourth"}, + {[]string{"this is the result\r\nfrom the first read\r", "\nthis is the result\r\nfrom the second read\r"}, "this is the result\nfrom the first read\nthis is the result\nfrom the second read\r"}, {[]string{"now is the time\r\nfor all good engineers\r\nto improve their test coverage!\r\n"}, From fbe5d64f1197ec688114918c40c92cffabca1c72 Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Mon, 14 Aug 2017 22:28:47 -0500 Subject: [PATCH 30/36] VerifyDepTree test displays actual hash values when got status does not match want status --- internal/gps/pkgtree/digest_test.go | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/internal/gps/pkgtree/digest_test.go b/internal/gps/pkgtree/digest_test.go index 53a9ec2202..7bf13fc0dd 100644 --- a/internal/gps/pkgtree/digest_test.go +++ b/internal/gps/pkgtree/digest_test.go @@ -157,20 +157,6 @@ func TestVerifyDepTree(t *testing.T) { t.Fatal(err) } - // NOTE: When true, display the digests of the directories specified by the - // digest keys. - if false { - for k, want := range wantSums { - got, err := DigestFromDirectory(filepath.Join(vendorRoot, k)) - if err != nil { - t.Error(err) - } - if !bytes.Equal(got, want) { - t.Errorf("%q\n(GOT):\n\t%#v\n(WNT):\n\t%#v", k, got, want) - } - } - } - if got, want := len(status), 7; got != want { t.Errorf("\n(GOT): %v; (WNT): %v", got, want) } @@ -193,6 +179,18 @@ func TestVerifyDepTree(t *testing.T) { checkStatus(t, status, "github.com/bob/emptyDigest", EmptyDigestInLock) checkStatus(t, status, "github.com/charlie/notInTree", NotInTree) checkStatus(t, status, "launchpad.net/match", NoMismatch) + + if t.Failed() { + for k, want := range wantSums { + got, err := DigestFromDirectory(filepath.Join(vendorRoot, k)) + if err != nil { + t.Error(err) + } + if !bytes.Equal(got, want) { + t.Errorf("%q\n(GOT):\n\t%#v\n(WNT):\n\t%#v", k, got, want) + } + } + } } func BenchmarkVerifyDepTree(b *testing.B) { From 48cd6644856470d7be9d74a8b0126933781f8144 Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Tue, 15 Aug 2017 02:12:34 -0400 Subject: [PATCH 31/36] DirWalk --- internal/gps/pkgtree/digest.go | 108 ++++++++++++++++++++++++++++++++ internal/gps/pkgtree/dirwalk.go | 105 +++++++++++++++++++++++++++++++ 2 files changed, 213 insertions(+) create mode 100644 internal/gps/pkgtree/dirwalk.go diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index cff24adc1e..b2ded627e8 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -10,6 +10,7 @@ import ( "encoding/binary" "hash" "io" + // "log" "os" "path/filepath" "sort" @@ -178,6 +179,113 @@ func writeBytesWithNull(h hash.Hash, data []byte) { // link referents. func DigestFromDirectory(osDirname string) ([]byte, error) { osDirname = filepath.Clean(osDirname) + dirlen := len(osDirname) + len(osPathSeparator) + // log.Printf("DirWalkFunc: osDirname: %s", osDirname) + // log.Printf("DirWalkFunc: dirlen: %d", dirlen) + + // Create a single hash instance for the entire operation, rather than a new + // hash for each node we encounter. + h := sha256.New() + var bytesWritten int64 + modeBytes := make([]byte, 4) // scratch place to store encoded os.FileMode (uint32) + + err := DirWalk(osDirname, func(osPathname string, fi os.FileInfo, err error) error { + // log.Printf("DirWalkFunc: osPathname: %s", osPathname) + var osRelative string + if len(osPathname) > dirlen { + osRelative = osPathname[dirlen:] + // } else { + // osRelative = osPathname + } + // log.Printf("DirWalkFunc: osRelative: %s", osRelative) + + switch osRelative { + case "vendor", ".bzr", ".git", ".hg", ".svn": + return filepath.SkipDir // skip + } + + // We could make our own enum-like data type for encoding the file type, + // but Go's runtime already gives us architecture independent file + // modes, as discussed in `os/types.go`: + // + // Go's runtime FileMode type has same definition on all systems, so + // that information about files can be moved from one system to + // another portably. + var mt os.FileMode + + // We only care about the bits that identify the type of a file system + // node, and can ignore append, exclusive, temporary, setuid, setgid, + // permission bits, and sticky bits, which are coincident to bits which + // declare type of the file system node. + modeType := fi.Mode() & os.ModeType + var shouldSkip bool // skip some types of file system nodes + + switch { + case modeType&os.ModeDir > 0: + mt = os.ModeDir + // Walk function itself does not need to enumerate children, because + // DirWalk will do that for us. + shouldSkip = true + case modeType&os.ModeSymlink > 0: + mt = os.ModeSymlink + case modeType&os.ModeNamedPipe > 0: + mt = os.ModeNamedPipe + shouldSkip = true + case modeType&os.ModeSocket > 0: + mt = os.ModeSocket + shouldSkip = true + case modeType&os.ModeDevice > 0: + mt = os.ModeDevice + shouldSkip = true + } + + // Write the relative pathname to hash because the hash is a function of + // the node names, node types, and node contents. Added benefit is that + // empty directories, named pipes, sockets, devices, and symbolic links + // will also affect final hash value. Use `filepath.ToSlash` to ensure + // relative pathname is os-agnostic. + writeBytesWithNull(h, []byte(filepath.ToSlash(osRelative))) + + binary.LittleEndian.PutUint32(modeBytes, uint32(mt)) // encode the type of mode + writeBytesWithNull(h, modeBytes) // and write to hash + + if shouldSkip { + return nil // nothing more to do for some of the node types + } + + if mt == os.ModeSymlink { // okay to check for equivalence because we set to this value + osRelative, err = os.Readlink(osPathname) // read the symlink referent + if err != nil { + return errors.Wrap(err, "cannot Readlink") + } + writeBytesWithNull(h, []byte(filepath.ToSlash(osRelative))) // write referent to hash + return nil // proceed to next node in queue + } + + fh, err := os.Open(osPathname) + if err != nil { + return errors.Wrap(err, "cannot Open") + } + + bytesWritten, err = io.Copy(h, newLineEndingReader(fh)) // fast copy of file contents to hash + err = errors.Wrap(err, "cannot Copy") // errors.Wrap only wraps non-nil, so skip extra check + writeBytesWithNull(h, []byte(strconv.FormatInt(bytesWritten, 10))) // 10: format file size as base 10 integer + + // Close the file handle to the open file without masking + // possible previous error value. + if er := fh.Close(); err == nil { + err = errors.Wrap(er, "cannot Close") + } + return err + }) + if err != nil { + return nil, err + } + return h.Sum(nil), nil +} + +func DigestFromDirectoryGood(osDirname string) ([]byte, error) { + osDirname = filepath.Clean(osDirname) // Ensure parameter is a directory fi, err := os.Stat(osDirname) diff --git a/internal/gps/pkgtree/dirwalk.go b/internal/gps/pkgtree/dirwalk.go new file mode 100644 index 0000000000..0fb47d503f --- /dev/null +++ b/internal/gps/pkgtree/dirwalk.go @@ -0,0 +1,105 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgtree + +import ( + "log" + "os" + "path/filepath" + "strings" + + "github.com/pkg/errors" +) + +// DirWalk is the type of the function called for each file system node visited +// by DirWalk. The path argument contains the argument to DirWalk as a prefix; +// that is, if DirWalk is called with "dir", which is a directory containing the +// file "a", the walk function will be called with the argument "dir/a", using +// the correct os.PathSeparator for the Go Operating System architecture, +// GOOS. The info argument is the os.FileInfo for the named path. +// +// If there was a problem walking to the file or directory named by path, the +// incoming error will describe the problem and the function can decide how to +// handle that error (and DirWalk will not descend into that directory). If an +// error is returned, processing stops. The sole exception is when the function +// returns the special value filepath.SkipDir. If the function returns +// filepath.SkipDir when invoked on a directory, DirWalk skips the directory's +// contents entirely. If the function returns filepath.SkipDir when invoked on a +// non-directory file system node, DirWalk skips the remaining files in the +// containing directory. +type DirWalkFunc func(osPathname string, info os.FileInfo, err error) error + +// DirWalk walks the file tree rooted at osDirname, calling for each file system +// node in the tree, including root. All errors that arise visiting nodes are +// filtered by walkFn. The nodes are walked in lexical order, which makes the +// output deterministic but means that for very large directories DirWalk can be +// inefficient. Unlike filepath.Walk, DirWalk does follow symbolic links. +func DirWalk(osDirname string, walkFn DirWalkFunc) error { + osDirname = filepath.Clean(osDirname) + + // Ensure parameter is a directory + fi, err := os.Stat(osDirname) + if err != nil { + return errors.Wrap(err, "cannot Stat") + } + if !fi.IsDir() { + return errors.Errorf("cannot verify non directory: %q", osDirname) + } + + // Initialize a work queue with the empty string, which signifies the + // starting directory itself. + queue := []string{""} + + var osRelative string // os-specific relative pathname under dirname + + // As we enumerate over the queue and encounter a directory, its children + // will be added to the work queue. + for len(queue) > 0 { + // Unshift a pathname from the queue (breadth-first traversal of + // hierarchy) + osRelative, queue = queue[0], queue[1:] + osPathname := filepath.Join(osDirname, osRelative) + + fi, err = os.Lstat(osPathname) + err = walkFn(osPathname, fi, errors.Wrap(err, "cannot Lstat")) + if err != nil { + if err == filepath.SkipDir { + // We have lstat, and we need stat right now + fi, err = os.Stat(osPathname) + if err != nil { + return errors.Wrap(err, "cannot Stat") + } + if !fi.IsDir() { + // Consume items from queue while they have the same parent + // as the current item. + osParent := filepath.Dir(osPathname) + for len(queue) > 0 && strings.HasPrefix(queue[0], osParent) { + log.Printf("skipping sibling: %s", queue[0]) + queue = queue[1:] + } + } + + continue + } + return errors.Wrap(err, "DirWalkFunction") + } + + if fi.IsDir() { + osChildrenNames, err := sortedChildrenFromDirname(osPathname) + if err != nil { + return errors.Wrap(err, "cannot get list of directory children") + } + for _, osChildName := range osChildrenNames { + switch osChildName { + case ".", "..": + // skip + default: + queue = append(queue, filepath.Join(osRelative, osChildName)) + } + } + } + } + return nil +} From 845c95ebecb8a6b1504f8770c754331426e081c3 Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Tue, 15 Aug 2017 04:37:18 -0400 Subject: [PATCH 32/36] repeatedly uses io.CopyBuffer with pre-allocated buffer --- internal/gps/pkgtree/digest.go | 116 ++-------------------------- internal/gps/pkgtree/digest_test.go | 15 +++- 2 files changed, 19 insertions(+), 112 deletions(-) diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index b2ded627e8..2cebf93292 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -10,7 +10,6 @@ import ( "encoding/binary" "hash" "io" - // "log" "os" "path/filepath" "sort" @@ -179,113 +178,6 @@ func writeBytesWithNull(h hash.Hash, data []byte) { // link referents. func DigestFromDirectory(osDirname string) ([]byte, error) { osDirname = filepath.Clean(osDirname) - dirlen := len(osDirname) + len(osPathSeparator) - // log.Printf("DirWalkFunc: osDirname: %s", osDirname) - // log.Printf("DirWalkFunc: dirlen: %d", dirlen) - - // Create a single hash instance for the entire operation, rather than a new - // hash for each node we encounter. - h := sha256.New() - var bytesWritten int64 - modeBytes := make([]byte, 4) // scratch place to store encoded os.FileMode (uint32) - - err := DirWalk(osDirname, func(osPathname string, fi os.FileInfo, err error) error { - // log.Printf("DirWalkFunc: osPathname: %s", osPathname) - var osRelative string - if len(osPathname) > dirlen { - osRelative = osPathname[dirlen:] - // } else { - // osRelative = osPathname - } - // log.Printf("DirWalkFunc: osRelative: %s", osRelative) - - switch osRelative { - case "vendor", ".bzr", ".git", ".hg", ".svn": - return filepath.SkipDir // skip - } - - // We could make our own enum-like data type for encoding the file type, - // but Go's runtime already gives us architecture independent file - // modes, as discussed in `os/types.go`: - // - // Go's runtime FileMode type has same definition on all systems, so - // that information about files can be moved from one system to - // another portably. - var mt os.FileMode - - // We only care about the bits that identify the type of a file system - // node, and can ignore append, exclusive, temporary, setuid, setgid, - // permission bits, and sticky bits, which are coincident to bits which - // declare type of the file system node. - modeType := fi.Mode() & os.ModeType - var shouldSkip bool // skip some types of file system nodes - - switch { - case modeType&os.ModeDir > 0: - mt = os.ModeDir - // Walk function itself does not need to enumerate children, because - // DirWalk will do that for us. - shouldSkip = true - case modeType&os.ModeSymlink > 0: - mt = os.ModeSymlink - case modeType&os.ModeNamedPipe > 0: - mt = os.ModeNamedPipe - shouldSkip = true - case modeType&os.ModeSocket > 0: - mt = os.ModeSocket - shouldSkip = true - case modeType&os.ModeDevice > 0: - mt = os.ModeDevice - shouldSkip = true - } - - // Write the relative pathname to hash because the hash is a function of - // the node names, node types, and node contents. Added benefit is that - // empty directories, named pipes, sockets, devices, and symbolic links - // will also affect final hash value. Use `filepath.ToSlash` to ensure - // relative pathname is os-agnostic. - writeBytesWithNull(h, []byte(filepath.ToSlash(osRelative))) - - binary.LittleEndian.PutUint32(modeBytes, uint32(mt)) // encode the type of mode - writeBytesWithNull(h, modeBytes) // and write to hash - - if shouldSkip { - return nil // nothing more to do for some of the node types - } - - if mt == os.ModeSymlink { // okay to check for equivalence because we set to this value - osRelative, err = os.Readlink(osPathname) // read the symlink referent - if err != nil { - return errors.Wrap(err, "cannot Readlink") - } - writeBytesWithNull(h, []byte(filepath.ToSlash(osRelative))) // write referent to hash - return nil // proceed to next node in queue - } - - fh, err := os.Open(osPathname) - if err != nil { - return errors.Wrap(err, "cannot Open") - } - - bytesWritten, err = io.Copy(h, newLineEndingReader(fh)) // fast copy of file contents to hash - err = errors.Wrap(err, "cannot Copy") // errors.Wrap only wraps non-nil, so skip extra check - writeBytesWithNull(h, []byte(strconv.FormatInt(bytesWritten, 10))) // 10: format file size as base 10 integer - - // Close the file handle to the open file without masking - // possible previous error value. - if er := fh.Close(); err == nil { - err = errors.Wrap(er, "cannot Close") - } - return err - }) - if err != nil { - return nil, err - } - return h.Sum(nil), nil -} - -func DigestFromDirectoryGood(osDirname string) ([]byte, error) { - osDirname = filepath.Clean(osDirname) // Ensure parameter is a directory fi, err := os.Stat(osDirname) @@ -300,6 +192,8 @@ func DigestFromDirectoryGood(osDirname string) ([]byte, error) { // hash for each node we encounter. h := sha256.New() + someCopyBufer := make([]byte, 32*1024) // same size as what io.Copy uses + // Initialize a work queue with the empty string, which signifies the // starting directory itself. queue := []string{""} @@ -398,9 +292,9 @@ func DigestFromDirectoryGood(osDirname string) ([]byte, error) { } } } else { - bytesWritten, err = io.Copy(h, newLineEndingReader(fh)) // fast copy of file contents to hash - err = errors.Wrap(err, "cannot Copy") // errors.Wrap only wraps non-nil, so skip extra check - writeBytesWithNull(h, []byte(strconv.FormatInt(bytesWritten, 10))) // 10: format file size as base 10 integer + bytesWritten, err = io.CopyBuffer(h, newLineEndingReader(fh), someCopyBufer) // fast copy of file contents to hash + err = errors.Wrap(err, "cannot Copy") // errors.Wrap only wraps non-nil, so skip extra check + writeBytesWithNull(h, []byte(strconv.FormatInt(bytesWritten, 10))) // 10: format file size as base 10 integer } // Close the file handle to the open directory or file without masking diff --git a/internal/gps/pkgtree/digest_test.go b/internal/gps/pkgtree/digest_test.go index 7bf13fc0dd..bfad0ef13c 100644 --- a/internal/gps/pkgtree/digest_test.go +++ b/internal/gps/pkgtree/digest_test.go @@ -193,8 +193,21 @@ func TestVerifyDepTree(t *testing.T) { } } +func BenchmarkDigestFromDirectory(b *testing.B) { + b.Skip("Eliding benchmark of user's Go source directory") + + prefix := filepath.Join(os.Getenv("GOPATH"), "src") + + for i := 0; i < b.N; i++ { + _, err := DigestFromDirectory(prefix) + if err != nil { + b.Fatal(err) + } + } +} + func BenchmarkVerifyDepTree(b *testing.B) { - // b.Skip("Eliding benchmark of user's Go source directory") + b.Skip("Eliding benchmark of user's Go source directory") prefix := filepath.Join(os.Getenv("GOPATH"), "src") From 9a883e870ae97457e0fc5b29c5866a2a8fa6ef69 Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Tue, 15 Aug 2017 04:46:06 -0400 Subject: [PATCH 33/36] DigestFromDirectory uses DirWalk --- internal/gps/pkgtree/digest.go | 139 +++++++++++--------------------- internal/gps/pkgtree/dirwalk.go | 64 +++++++++++---- 2 files changed, 95 insertions(+), 108 deletions(-) diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index 2cebf93292..46d14b71a6 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -12,7 +12,6 @@ import ( "io" "os" "path/filepath" - "sort" "strconv" "github.com/pkg/errors" @@ -123,33 +122,6 @@ func (f *lineEndingReader) Read(buf []byte) (int, error) { return nr, er } -// sortedChildrenFromDirname returns a lexicographically sorted list of child -// nodes for the specified directory. -func sortedChildrenFromDirname(osDirname string) ([]string, error) { - fh, err := os.Open(osDirname) - if err != nil { - return nil, errors.Wrap(err, "cannot Open") - } - osChildrenNames, err := sortedChildrenFromDirectorFileHandle(fh) - // Close the file handle to the open directory without masking possible - // previous error value. - if er := fh.Close(); err == nil { - err = errors.Wrap(er, "cannot Close") - } - return osChildrenNames, err -} - -// sortedChildrenFromDirectorFileHandle returns a lexicographically sorted list -// of child nodes for the specified file handle to a directory. -func sortedChildrenFromDirectorFileHandle(fh *os.File) ([]string, error) { - osChildrenNames, err := fh.Readdirnames(0) // 0: read names of all children - if err != nil { - return nil, errors.Wrap(err, "cannot Readdirnames") - } - sort.Strings(osChildrenNames) - return osChildrenNames, nil -} - // writeBytesWithNull appends the specified data to the specified hash, followed by // the NULL byte, in order to make accidental hash collisions less likely. func writeBytesWithNull(h hash.Hash, data []byte) { @@ -158,6 +130,15 @@ func writeBytesWithNull(h hash.Hash, data []byte) { _, _ = h.Write(append(data, 0)) } +// dirWalkClosure is used to reduce number of allocation involved in closing +// over these variables. +type dirWalkClosure struct { + someCopyBufer []byte // allocate once and reuse for each file copy + someModeBytes []byte // allocate once and reuse for each node + someDirLen int + someHash hash.Hash +} + // DigestFromDirectory returns a hash of the specified directory contents, which // will match the hash computed for any directory on any supported Go platform // whose contents exactly match the specified directory. @@ -179,40 +160,25 @@ func writeBytesWithNull(h hash.Hash, data []byte) { func DigestFromDirectory(osDirname string) ([]byte, error) { osDirname = filepath.Clean(osDirname) - // Ensure parameter is a directory - fi, err := os.Stat(osDirname) - if err != nil { - return nil, errors.Wrap(err, "cannot Stat") - } - if !fi.IsDir() { - return nil, errors.Errorf("cannot verify non directory: %q", osDirname) - } - // Create a single hash instance for the entire operation, rather than a new // hash for each node we encounter. - h := sha256.New() - - someCopyBufer := make([]byte, 32*1024) // same size as what io.Copy uses - - // Initialize a work queue with the empty string, which signifies the - // starting directory itself. - queue := []string{""} - var osRelative string // os-specific relative pathname under dirname - var bytesWritten int64 - modeBytes := make([]byte, 4) // scratch place to store encoded os.FileMode (uint32) + closure := dirWalkClosure{ + someCopyBufer: make([]byte, 4*1024), // only allocate a single page + someModeBytes: make([]byte, 4), // scratch place to store encoded os.FileMode (uint32) + someDirLen: len(osDirname) + len(osPathSeparator), + someHash: sha256.New(), + } - // As we enumerate over the queue and encounter a directory, its children - // will be added to the work queue. - for len(queue) > 0 { - // Unshift a pathname from the queue (breadth-first traversal of - // hierarchy) - osRelative, queue = queue[0], queue[1:] - osPathname := filepath.Join(osDirname, osRelative) + err := DirWalk(osDirname, func(osPathname string, info os.FileInfo, err error) error { + var osRelative string + if len(osPathname) > closure.someDirLen { + osRelative = osPathname[closure.someDirLen:] + } - fi, err = os.Lstat(osPathname) - if err != nil { - return nil, errors.Wrap(err, "cannot Lstat") + switch filepath.Base(osRelative) { + case "vendor", ".bzr", ".git", ".hg", ".svn": + return filepath.SkipDir } // We could make our own enum-like data type for encoding the file type, @@ -228,12 +194,15 @@ func DigestFromDirectory(osDirname string) ([]byte, error) { // node, and can ignore append, exclusive, temporary, setuid, setgid, // permission bits, and sticky bits, which are coincident to bits which // declare type of the file system node. - modeType := fi.Mode() & os.ModeType + modeType := info.Mode() & os.ModeType var shouldSkip bool // skip some types of file system nodes switch { case modeType&os.ModeDir > 0: mt = os.ModeDir + // DirWalkFunc itself does not need to enumerate children, because + // DirWalk will do that for us. + shouldSkip = true case modeType&os.ModeSymlink > 0: mt = os.ModeSymlink case modeType&os.ModeNamedPipe > 0: @@ -252,62 +221,46 @@ func DigestFromDirectory(osDirname string) ([]byte, error) { // empty directories, named pipes, sockets, devices, and symbolic links // will also affect final hash value. Use `filepath.ToSlash` to ensure // relative pathname is os-agnostic. - writeBytesWithNull(h, []byte(filepath.ToSlash(osRelative))) + writeBytesWithNull(closure.someHash, []byte(filepath.ToSlash(osRelative))) - binary.LittleEndian.PutUint32(modeBytes, uint32(mt)) // encode the type of mode - writeBytesWithNull(h, modeBytes) // and write to hash + binary.LittleEndian.PutUint32(closure.someModeBytes, uint32(mt)) // encode the type of mode + writeBytesWithNull(closure.someHash, closure.someModeBytes) // and write to hash if shouldSkip { - continue // nothing more to do for some of the node types + return nil // nothing more to do for some of the node types } if mt == os.ModeSymlink { // okay to check for equivalence because we set to this value osRelative, err = os.Readlink(osPathname) // read the symlink referent if err != nil { - return nil, errors.Wrap(err, "cannot Readlink") + return errors.Wrap(err, "cannot Readlink") } - writeBytesWithNull(h, []byte(filepath.ToSlash(osRelative))) // write referent to hash - continue // proceed to next node in queue + writeBytesWithNull(closure.someHash, []byte(filepath.ToSlash(osRelative))) // write referent to hash + return nil // proceed to next node in queue } - // For both directories and regular files, we must create a file system - // handle in order to read their contents. + // If we get here, node is a regular file. fh, err := os.Open(osPathname) if err != nil { - return nil, errors.Wrap(err, "cannot Open") + return errors.Wrap(err, "cannot Open") } - if mt == os.ModeDir { - osChildrenNames, err := sortedChildrenFromDirectorFileHandle(fh) - if err != nil { - _ = fh.Close() // ignore close error because we already have an error reading directory - return nil, errors.Wrap(err, "cannot get list of directory children") - } - for _, osChildName := range osChildrenNames { - switch osChildName { - case ".", "..", "vendor", ".bzr", ".git", ".hg", ".svn": - // skip - default: - queue = append(queue, filepath.Join(osRelative, osChildName)) - } - } - } else { - bytesWritten, err = io.CopyBuffer(h, newLineEndingReader(fh), someCopyBufer) // fast copy of file contents to hash - err = errors.Wrap(err, "cannot Copy") // errors.Wrap only wraps non-nil, so skip extra check - writeBytesWithNull(h, []byte(strconv.FormatInt(bytesWritten, 10))) // 10: format file size as base 10 integer - } + var bytesWritten int64 + bytesWritten, err = io.CopyBuffer(closure.someHash, newLineEndingReader(fh), closure.someCopyBufer) // fast copy of file contents to hash + err = errors.Wrap(err, "cannot Copy") // errors.Wrap only wraps non-nil, so skip extra check + writeBytesWithNull(closure.someHash, []byte(strconv.FormatInt(bytesWritten, 10))) // 10: format file size as base 10 integer - // Close the file handle to the open directory or file without masking + // Close the file handle to the open file without masking // possible previous error value. if er := fh.Close(); err == nil { err = errors.Wrap(er, "cannot Close") } - if err != nil { - return nil, err // early termination iff error - } + return err + }) + if err != nil { + return nil, err } - - return h.Sum(nil), nil + return closure.someHash.Sum(nil), nil } // VendorStatus represents one of a handful of possible status conditions for a diff --git a/internal/gps/pkgtree/dirwalk.go b/internal/gps/pkgtree/dirwalk.go index 0fb47d503f..cafd4b7bd9 100644 --- a/internal/gps/pkgtree/dirwalk.go +++ b/internal/gps/pkgtree/dirwalk.go @@ -5,9 +5,9 @@ package pkgtree import ( - "log" "os" "path/filepath" + "sort" "strings" "github.com/pkg/errors" @@ -42,17 +42,17 @@ func DirWalk(osDirname string, walkFn DirWalkFunc) error { // Ensure parameter is a directory fi, err := os.Stat(osDirname) if err != nil { - return errors.Wrap(err, "cannot Stat") + return errors.Wrap(err, "cannot read node") } if !fi.IsDir() { - return errors.Errorf("cannot verify non directory: %q", osDirname) + return errors.Errorf("cannot walk non directory: %q", osDirname) } // Initialize a work queue with the empty string, which signifies the // starting directory itself. queue := []string{""} - var osRelative string // os-specific relative pathname under dirname + var osRelative string // os-specific relative pathname under directory name // As we enumerate over the queue and encounter a directory, its children // will be added to the work queue. @@ -62,28 +62,40 @@ func DirWalk(osDirname string, walkFn DirWalkFunc) error { osRelative, queue = queue[0], queue[1:] osPathname := filepath.Join(osDirname, osRelative) + // walkFn needs to choose how to handle symbolic links, therefore obtain + // lstat rather than stat. fi, err = os.Lstat(osPathname) - err = walkFn(osPathname, fi, errors.Wrap(err, "cannot Lstat")) + if err == nil { + err = walkFn(osPathname, fi, nil) + } else { + err = walkFn(osPathname, nil, errors.Wrap(err, "cannot read node")) + } + if err != nil { if err == filepath.SkipDir { - // We have lstat, and we need stat right now - fi, err = os.Stat(osPathname) - if err != nil { - return errors.Wrap(err, "cannot Stat") + if fi.Mode()&os.ModeSymlink > 0 { + // Resolve symbolic link referent to determine whether node + // is directory or not. + fi, err = os.Stat(osPathname) + if err != nil { + return errors.Wrap(err, "cannot visit node") + } } + // If current node is directory, then skip this + // directory. Otherwise, skip all nodes in the same parent + // directory. if !fi.IsDir() { - // Consume items from queue while they have the same parent - // as the current item. - osParent := filepath.Dir(osPathname) + // Consume nodes from queue while they have the same parent + // as the current node. + osParent := filepath.Dir(osPathname) + osPathSeparator for len(queue) > 0 && strings.HasPrefix(queue[0], osParent) { - log.Printf("skipping sibling: %s", queue[0]) - queue = queue[1:] + queue = queue[1:] // drop sibling from queue } } continue } - return errors.Wrap(err, "DirWalkFunction") + return errors.Wrap(err, "DirWalkFunction") // wrap error returned by walkFn } if fi.IsDir() { @@ -103,3 +115,25 @@ func DirWalk(osDirname string, walkFn DirWalkFunc) error { } return nil } + +// sortedChildrenFromDirname returns a lexicographically sorted list of child +// nodes for the specified directory. +func sortedChildrenFromDirname(osDirname string) ([]string, error) { + fh, err := os.Open(osDirname) + if err != nil { + return nil, errors.Wrap(err, "cannot Open") + } + + osChildrenNames, err := fh.Readdirnames(0) // 0: read names of all children + if err != nil { + return nil, errors.Wrap(err, "cannot Readdirnames") + } + sort.Strings(osChildrenNames) + + // Close the file handle to the open directory without masking possible + // previous error value. + if er := fh.Close(); err == nil { + err = errors.Wrap(er, "cannot Close") + } + return osChildrenNames, err +} From a252c366eb0f2bf2ecd20f6efcbd009c061edb95 Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Tue, 15 Aug 2017 09:45:33 -0400 Subject: [PATCH 34/36] public type has properly formatted documentation --- internal/gps/pkgtree/dirwalk.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/internal/gps/pkgtree/dirwalk.go b/internal/gps/pkgtree/dirwalk.go index cafd4b7bd9..350c1606c3 100644 --- a/internal/gps/pkgtree/dirwalk.go +++ b/internal/gps/pkgtree/dirwalk.go @@ -13,12 +13,12 @@ import ( "github.com/pkg/errors" ) -// DirWalk is the type of the function called for each file system node visited -// by DirWalk. The path argument contains the argument to DirWalk as a prefix; -// that is, if DirWalk is called with "dir", which is a directory containing the -// file "a", the walk function will be called with the argument "dir/a", using -// the correct os.PathSeparator for the Go Operating System architecture, -// GOOS. The info argument is the os.FileInfo for the named path. +// DirWalkFunc is the type of the function called for each file system node +// visited by DirWalk. The path argument contains the argument to DirWalk as a +// prefix; that is, if DirWalk is called with "dir", which is a directory +// containing the file "a", the walk function will be called with the argument +// "dir/a", using the correct os.PathSeparator for the Go Operating System +// architecture, GOOS. The info argument is the os.FileInfo for the named path. // // If there was a problem walking to the file or directory named by path, the // incoming error will describe the problem and the function can decide how to From 83ad7c6d47cee285ab6bddbf033aad10e609ec4b Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Tue, 15 Aug 2017 09:46:01 -0400 Subject: [PATCH 35/36] DigestFromDirectory handles Lstat error from DirWalk --- internal/gps/pkgtree/digest.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index 46d14b71a6..7427fb590b 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -171,6 +171,10 @@ func DigestFromDirectory(osDirname string) ([]byte, error) { } err := DirWalk(osDirname, func(osPathname string, info os.FileInfo, err error) error { + if err != nil { + return err // DirWalk received an error during initial Lstat + } + var osRelative string if len(osPathname) > closure.someDirLen { osRelative = osPathname[closure.someDirLen:] From a9bb8cd30f207506113661735c0ca4b9ea87eea0 Mon Sep 17 00:00:00 2001 From: "Karrick S. McDermott" Date: Tue, 15 Aug 2017 09:57:13 -0400 Subject: [PATCH 36/36] removed constant no longer being used * thanks megacheck :) --- internal/gps/pkgtree/digest.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/internal/gps/pkgtree/digest.go b/internal/gps/pkgtree/digest.go index 7427fb590b..e2b9116cf0 100644 --- a/internal/gps/pkgtree/digest.go +++ b/internal/gps/pkgtree/digest.go @@ -17,13 +17,7 @@ import ( "github.com/pkg/errors" ) -const ( - osPathSeparator = string(filepath.Separator) - - // when walking vendor root hierarchy, ignore file system nodes of the - // following types. - skipSpecialNodes = os.ModeDevice | os.ModeNamedPipe | os.ModeSocket | os.ModeCharDevice -) +const osPathSeparator = string(filepath.Separator) // lineEndingReader is a `io.Reader` that converts CRLF sequences to LF. //