From e8df27f8eec1b12ab39be3342f88ed6e4ece680c Mon Sep 17 00:00:00 2001 From: "Wladimir J. van der Laan" Date: Tue, 14 Mar 2017 07:33:43 +0100 Subject: [PATCH] Merge #9984: devtools: Make github-merge compute SHA512 from git, instead of worktree a327e8e devtools: Make github-merge compute SHA512 from git, instead of worktree (Wladimir J. van der Laan) Tree-SHA512: 22ec7712876be4ab361015a2dd75a09628ec59105ffe3260126f899d8f3ff8666351b65b9a4dfe83f78eb777730442cd0352b155d7f573424f7fc1c4dbc0ddd2 --- contrib/devtools/github-merge.py | 45 ++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/contrib/devtools/github-merge.py b/contrib/devtools/github-merge.py index f1b6a12fd058e..3fee39143dad7 100755 --- a/contrib/devtools/github-merge.py +++ b/contrib/devtools/github-merge.py @@ -78,24 +78,53 @@ def get_symlink_files(): ret.append(f.decode('utf-8').split("\t")[1]) return ret -def tree_sha512sum(): - files = sorted(subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', '--name-only', 'HEAD']).splitlines()) +def tree_sha512sum(commit='HEAD'): + # request metadata for entire tree, recursively + files = [] + blob_by_name = {} + for line in subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', commit]).splitlines(): + name_sep = line.index(b'\t') + metadata = line[:name_sep].split() # perms, 'blob', blobid + assert(metadata[1] == b'blob') + name = line[name_sep+1:] + files.append(name) + blob_by_name[name] = metadata[2] + + files.sort() + # open connection to git-cat-file in batch mode to request data for all blobs + # this is much faster than launching it per file + p = subprocess.Popen([GIT, 'cat-file', '--batch'], stdout=subprocess.PIPE, stdin=subprocess.PIPE) overall = hashlib.sha512() for f in files: + blob = blob_by_name[f] + # request blob + p.stdin.write(blob + b'\n') + p.stdin.flush() + # read header: blob, "blob", size + reply = p.stdout.readline().split() + assert(reply[0] == blob and reply[1] == b'blob') + size = int(reply[2]) + # hash the blob data intern = hashlib.sha512() - fi = open(f, 'rb') - while True: - piece = fi.read(65536) - if piece: + ptr = 0 + while ptr < size: + bs = min(65536, size - ptr) + piece = p.stdout.read(bs) + if len(piece) == bs: intern.update(piece) else: - break - fi.close() + raise IOError('Premature EOF reading git cat-file output') + ptr += bs dig = intern.hexdigest() + assert(p.stdout.read(1) == b'\n') # ignore LF that follows blob data + # update overall hash with file hash overall.update(dig.encode("utf-8")) overall.update(" ".encode("utf-8")) overall.update(f) overall.update("\n".encode("utf-8")) + p.stdin.close() + if p.wait(): + raise IOError('Non-zero return value executing git cat-file') return overall.hexdigest()