Skip to content

Commit

Permalink
devtools: Make github-merge compute SHA512 from git, instead of worktree
Browse files Browse the repository at this point in the history
This changes tree_sha512sum() to requests the objects for hashing from
git instead of from the working tree.

The change should make the process more deterministic (it hashes what
will be pushed) and hopefully avoids the frequent miscomputed SHA512's
that happen now.
  • Loading branch information
laanwj committed Mar 13, 2017
1 parent 8040ae6 commit a327e8e
Showing 1 changed file with 37 additions and 8 deletions.
45 changes: 37 additions & 8 deletions contrib/devtools/github-merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,24 +78,53 @@ def get_symlink_files():
ret.append(f.decode('utf-8').split("\t")[1])
return ret

def tree_sha512sum():
files = sorted(subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', '--name-only', 'HEAD']).splitlines())
def tree_sha512sum(commit='HEAD'):
# request metadata for entire tree, recursively
files = []
blob_by_name = {}
for line in subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', commit]).splitlines():
name_sep = line.index(b'\t')
metadata = line[:name_sep].split() # perms, 'blob', blobid
assert(metadata[1] == b'blob')
name = line[name_sep+1:]
files.append(name)
blob_by_name[name] = metadata[2]

files.sort()
# open connection to git-cat-file in batch mode to request data for all blobs
# this is much faster than launching it per file
p = subprocess.Popen([GIT, 'cat-file', '--batch'], stdout=subprocess.PIPE, stdin=subprocess.PIPE)
overall = hashlib.sha512()
for f in files:
blob = blob_by_name[f]
# request blob
p.stdin.write(blob + b'\n')
p.stdin.flush()
# read header: blob, "blob", size
reply = p.stdout.readline().split()
assert(reply[0] == blob and reply[1] == b'blob')
size = int(reply[2])
# hash the blob data
intern = hashlib.sha512()
fi = open(f, 'rb')
while True:
piece = fi.read(65536)
if piece:
ptr = 0
while ptr < size:
bs = min(65536, size - ptr)
piece = p.stdout.read(bs)
if len(piece) == bs:
intern.update(piece)
else:
break
fi.close()
raise IOError('Premature EOF reading git cat-file output')
ptr += bs
dig = intern.hexdigest()
assert(p.stdout.read(1) == b'\n') # ignore LF that follows blob data
# update overall hash with file hash
overall.update(dig.encode("utf-8"))
overall.update(" ".encode("utf-8"))
overall.update(f)
overall.update("\n".encode("utf-8"))
p.stdin.close()
if p.wait():
raise IOError('Non-zero return value executing git cat-file')
return overall.hexdigest()


Expand Down

0 comments on commit a327e8e

Please sign in to comment.