Skip to content

Commit

Permalink
Merge pull request #4248 from Rovanion/reproducible-git-tarballs
Browse files Browse the repository at this point in the history
change `tar` command used in `get_source_tarball_from_git` to get reproducible tarballs
  • Loading branch information
lexming committed Mar 18, 2024
2 parents 6203780 + e1f95b1 commit db2ab3a
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 40 deletions.
32 changes: 24 additions & 8 deletions easybuild/tools/filetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2540,12 +2540,12 @@ def copy(paths, target_path, force_in_dry_run=False, **kwargs):
raise EasyBuildError("Specified path to copy is not an existing file or directory: %s", path)


def get_source_tarball_from_git(filename, targetdir, git_config):
def get_source_tarball_from_git(filename, target_dir, git_config):
"""
Downloads a git repository, at a specific tag or commit, recursively or not, and make an archive with it
:param filename: name of the archive to save the code to (must be .tar.gz)
:param targetdir: target directory where to save the archive to
:param target_dir: target directory where to save the archive to
:param git_config: dictionary containing url, repo_name, recursive, and one of tag or commit
"""
# sanity check on git_config value being passed
Expand Down Expand Up @@ -2584,8 +2584,7 @@ def get_source_tarball_from_git(filename, targetdir, git_config):
raise EasyBuildError("git_config currently only supports filename ending in .tar.gz")

# prepare target directory and clone repository
mkdir(targetdir, parents=True)
targetpath = os.path.join(targetdir, filename)
mkdir(target_dir, parents=True)

# compose 'git clone' command, and run it
if extra_config_params:
Expand Down Expand Up @@ -2668,17 +2667,34 @@ def get_source_tarball_from_git(filename, targetdir, git_config):
for cmd in cmds:
run_shell_cmd(cmd, work_dir=work_dir, hidden=True, verbose_dry_run=True)

# create an archive and delete the git repo directory
# Create archive
archive_path = os.path.join(target_dir, filename)

if keep_git_dir:
tar_cmd = ['tar', 'cfvz', targetpath, repo_name]
# create archive of git repo including .git directory
tar_cmd = ['tar', 'cfvz', archive_path, repo_name]
else:
tar_cmd = ['tar', 'cfvz', targetpath, '--exclude', '.git', repo_name]
# create reproducible archive
# see https://reproducible-builds.org/docs/archives/
tar_cmd = [
# print names of all files and folders excluding .git directory
'find', repo_name, '-name ".git"', '-prune', '-o', '-print0',
# reset access and modification timestamps
'-exec', 'touch', '-t 197001010100', '{}', r'\;', '|',
# sort file list
'LC_ALL=C', 'sort', '--zero-terminated', '|',
# create tarball in GNU format with ownership reset
'tar', '--create', '--no-recursion', '--owner=0', '--group=0', '--numeric-owner', '--format=gnu',
'--null', '--files-from', '-', '|',
# compress tarball with gzip without original file name and timestamp
'gzip', '--no-name', '>', archive_path
]
run_shell_cmd(' '.join(tar_cmd), work_dir=tmpdir, hidden=True, verbose_dry_run=True)

# cleanup (repo_name dir does not exist in dry run mode)
remove(tmpdir)

return targetpath
return archive_path


def move_file(path, target_path, force_in_dry_run=False):
Expand Down
72 changes: 40 additions & 32 deletions test/framework/filetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2798,42 +2798,51 @@ def run_check():
'url': 'git@github.com:easybuilders',
'tag': 'tag_for_tests',
}
git_repo = {'git_repo': 'git@github.com:easybuilders/testrepository.git'} # Just to make the below shorter
string_args = {
'git_repo': 'git@github.com:easybuilders/testrepository.git',
'test_prefix': self.test_prefix,
}
reprod_tar_cmd_pattern = (
r' running shell command "find {} -name \".git\" -prune -o -print0 -exec touch -t 197001010100 {{}} \; |'
r' LC_ALL=C sort --zero-terminated | tar --create --no-recursion --owner=0 --group=0 --numeric-owner'
r' --format=gnu --null --files-from - | gzip --no-name > %(test_prefix)s/target/test.tar.gz'
)

expected = '\n'.join([
r' running shell command "git clone --depth 1 --branch tag_for_tests %(git_repo)s"',
r" \(in /.*\)",
r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"',
r" \(in /.*\)",
]) % git_repo
r" \(in .*/tmp.*\)",
reprod_tar_cmd_pattern.format("testrepository"),
r" \(in .*/tmp.*\)",
]) % string_args
run_check()

git_config['clone_into'] = 'test123'
expected = '\n'.join([
r' running shell command "git clone --depth 1 --branch tag_for_tests %(git_repo)s test123"',
r" \(in /.*\)",
r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git test123"',
r" \(in /.*\)",
]) % git_repo
r" \(in .*/tmp.*\)",
reprod_tar_cmd_pattern.format("test123"),
r" \(in .*/tmp.*\)",
]) % string_args
run_check()
del git_config['clone_into']

git_config['recursive'] = True
expected = '\n'.join([
r' running shell command "git clone --depth 1 --branch tag_for_tests --recursive %(git_repo)s"',
r" \(in /.*\)",
r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"',
r" \(in /.*\)",
]) % git_repo
r" \(in .*/tmp.*\)",
reprod_tar_cmd_pattern.format("testrepository"),
r" \(in .*/tmp.*\)",
]) % string_args
run_check()

git_config['recurse_submodules'] = ['!vcflib', '!sdsl-lite']
expected = '\n'.join([
' running shell command "git clone --depth 1 --branch tag_for_tests --recursive'
+ ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\' %(git_repo)s"',
r" \(in .*/tmp.*\)",
r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"',
reprod_tar_cmd_pattern.format("testrepository"),
r" \(in .*/tmp.*\)",
]) % git_repo
]) % string_args
run_check()

git_config['extra_config_params'] = [
Expand All @@ -2845,45 +2854,44 @@ def run_check():
+ ' clone --depth 1 --branch tag_for_tests --recursive'
+ ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\' %(git_repo)s"',
r" \(in .*/tmp.*\)",
r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"',
reprod_tar_cmd_pattern.format("testrepository"),
r" \(in .*/tmp.*\)",
]) % git_repo
]) % string_args
run_check()
del git_config['recurse_submodules']
del git_config['extra_config_params']

git_config['keep_git_dir'] = True
expected = '\n'.join([
r' running shell command "git clone --branch tag_for_tests --recursive %(git_repo)s"',
r" \(in /.*\)",
r" \(in .*/tmp.*\)",
r' running shell command "tar cfvz .*/target/test.tar.gz testrepository"',
r" \(in /.*\)",
]) % git_repo
r" \(in .*/tmp.*\)",
]) % string_args
run_check()
del git_config['keep_git_dir']

del git_config['tag']
git_config['commit'] = '8456f86'
expected = '\n'.join([
r' running shell command "git clone --no-checkout %(git_repo)s"',
r" \(in /.*\)",
r" \(in .*/tmp.*\)",
r' running shell command "git checkout 8456f86 && git submodule update --init --recursive"',
r" \(in /.*/testrepository\)",
r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"',
r" \(in /.*\)",
]) % git_repo
r" \(in testrepository\)",
reprod_tar_cmd_pattern.format("testrepository"),
r" \(in .*/tmp.*\)",
]) % string_args
run_check()

git_config['recurse_submodules'] = ['!vcflib', '!sdsl-lite']
expected = '\n'.join([
r' running shell command "git clone --no-checkout %(git_repo)s"',
r" \(in .*/tmp.*\)",
' running shell command "git checkout 8456f86 && git submodule update --init --recursive'
+ ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\'"',
r" \(in /.*/testrepository\)",
r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"',
r' running shell command "git checkout 8456f86"',
r" \(in testrepository\)",
reprod_tar_cmd_pattern.format("testrepository"),
r" \(in .*/tmp.*\)",
]) % git_repo
]) % string_args
run_check()

del git_config['recursive']
Expand All @@ -2893,9 +2901,9 @@ def run_check():
r" \(in /.*\)",
r' running shell command "git checkout 8456f86"',
r" \(in /.*/testrepository\)",
r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"',
reprod_tar_cmd_pattern.format("testrepository"),
r" \(in /.*\)",
]) % git_repo
]) % string_args
run_check()

# Test with real data.
Expand Down

0 comments on commit db2ab3a

Please sign in to comment.