Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

change tar command used in get_source_tarball_from_git to get reproducible tarballs #4248

Merged
merged 9 commits into from
Mar 18, 2024
32 changes: 24 additions & 8 deletions easybuild/tools/filetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2540,12 +2540,12 @@ def copy(paths, target_path, force_in_dry_run=False, **kwargs):
raise EasyBuildError("Specified path to copy is not an existing file or directory: %s", path)


def get_source_tarball_from_git(filename, targetdir, git_config):
def get_source_tarball_from_git(filename, target_dir, git_config):
"""
Downloads a git repository, at a specific tag or commit, recursively or not, and make an archive with it

:param filename: name of the archive to save the code to (must be .tar.gz)
:param targetdir: target directory where to save the archive to
:param target_dir: target directory where to save the archive to
:param git_config: dictionary containing url, repo_name, recursive, and one of tag or commit
"""
# sanity check on git_config value being passed
Expand Down Expand Up @@ -2584,8 +2584,7 @@ def get_source_tarball_from_git(filename, targetdir, git_config):
raise EasyBuildError("git_config currently only supports filename ending in .tar.gz")

# prepare target directory and clone repository
mkdir(targetdir, parents=True)
targetpath = os.path.join(targetdir, filename)
mkdir(target_dir, parents=True)

# compose 'git clone' command, and run it
if extra_config_params:
Expand Down Expand Up @@ -2668,17 +2667,34 @@ def get_source_tarball_from_git(filename, targetdir, git_config):
for cmd in cmds:
run_shell_cmd(cmd, work_dir=work_dir, hidden=True, verbose_dry_run=True)

# create an archive and delete the git repo directory
# Create archive
archive_path = os.path.join(target_dir, filename)

if keep_git_dir:
tar_cmd = ['tar', 'cfvz', targetpath, repo_name]
# create archive of git repo including .git directory
tar_cmd = ['tar', 'cfvz', archive_path, repo_name]
else:
tar_cmd = ['tar', 'cfvz', targetpath, '--exclude', '.git', repo_name]
# create reproducible archive
# see https://reproducible-builds.org/docs/archives/
tar_cmd = [
# print names of all files and folders excluding .git directory
'find', repo_name, '-name ".git"', '-prune', '-o', '-print0',
# reset access and modification timestamps
'-exec', 'touch', '-t 197001010100', '{}', r'\;', '|',
# sort file list
'LC_ALL=C', 'sort', '--zero-terminated', '|',
# create tarball in GNU format with ownership reset
'tar', '--create', '--no-recursion', '--owner=0', '--group=0', '--numeric-owner', '--format=gnu',
'--null', '--files-from', '-', '|',
# compress tarball with gzip without original file name and timestamp
'gzip', '--no-name', '>', archive_path
]
run_shell_cmd(' '.join(tar_cmd), work_dir=tmpdir, hidden=True, verbose_dry_run=True)

# cleanup (repo_name dir does not exist in dry run mode)
remove(tmpdir)

return targetpath
return archive_path


def move_file(path, target_path, force_in_dry_run=False):
Expand Down
72 changes: 40 additions & 32 deletions test/framework/filetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2798,42 +2798,51 @@ def run_check():
'url': 'git@github.com:easybuilders',
'tag': 'tag_for_tests',
}
git_repo = {'git_repo': 'git@github.com:easybuilders/testrepository.git'} # Just to make the below shorter
string_args = {
'git_repo': 'git@github.com:easybuilders/testrepository.git',
'test_prefix': self.test_prefix,
}
reprod_tar_cmd_pattern = (
r' running shell command "find {} -name \".git\" -prune -o -print0 -exec touch -t 197001010100 {{}} \; |'
r' LC_ALL=C sort --zero-terminated | tar --create --no-recursion --owner=0 --group=0 --numeric-owner'
r' --format=gnu --null --files-from - | gzip --no-name > %(test_prefix)s/target/test.tar.gz'
)

expected = '\n'.join([
r' running shell command "git clone --depth 1 --branch tag_for_tests %(git_repo)s"',
r" \(in /.*\)",
r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"',
r" \(in /.*\)",
]) % git_repo
r" \(in .*/tmp.*\)",
reprod_tar_cmd_pattern.format("testrepository"),
r" \(in .*/tmp.*\)",
]) % string_args
run_check()

git_config['clone_into'] = 'test123'
expected = '\n'.join([
r' running shell command "git clone --depth 1 --branch tag_for_tests %(git_repo)s test123"',
r" \(in /.*\)",
r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git test123"',
r" \(in /.*\)",
]) % git_repo
r" \(in .*/tmp.*\)",
reprod_tar_cmd_pattern.format("test123"),
r" \(in .*/tmp.*\)",
]) % string_args
run_check()
del git_config['clone_into']

git_config['recursive'] = True
expected = '\n'.join([
r' running shell command "git clone --depth 1 --branch tag_for_tests --recursive %(git_repo)s"',
r" \(in /.*\)",
r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"',
r" \(in /.*\)",
]) % git_repo
r" \(in .*/tmp.*\)",
reprod_tar_cmd_pattern.format("testrepository"),
r" \(in .*/tmp.*\)",
]) % string_args
run_check()

git_config['recurse_submodules'] = ['!vcflib', '!sdsl-lite']
expected = '\n'.join([
' running shell command "git clone --depth 1 --branch tag_for_tests --recursive'
+ ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\' %(git_repo)s"',
r" \(in .*/tmp.*\)",
r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"',
reprod_tar_cmd_pattern.format("testrepository"),
r" \(in .*/tmp.*\)",
]) % git_repo
]) % string_args
run_check()

git_config['extra_config_params'] = [
Expand All @@ -2845,45 +2854,44 @@ def run_check():
+ ' clone --depth 1 --branch tag_for_tests --recursive'
+ ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\' %(git_repo)s"',
r" \(in .*/tmp.*\)",
r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"',
reprod_tar_cmd_pattern.format("testrepository"),
r" \(in .*/tmp.*\)",
]) % git_repo
]) % string_args
run_check()
del git_config['recurse_submodules']
del git_config['extra_config_params']

git_config['keep_git_dir'] = True
expected = '\n'.join([
r' running shell command "git clone --branch tag_for_tests --recursive %(git_repo)s"',
r" \(in /.*\)",
r" \(in .*/tmp.*\)",
r' running shell command "tar cfvz .*/target/test.tar.gz testrepository"',
r" \(in /.*\)",
]) % git_repo
r" \(in .*/tmp.*\)",
]) % string_args
run_check()
del git_config['keep_git_dir']

del git_config['tag']
git_config['commit'] = '8456f86'
expected = '\n'.join([
r' running shell command "git clone --no-checkout %(git_repo)s"',
r" \(in /.*\)",
r" \(in .*/tmp.*\)",
r' running shell command "git checkout 8456f86 && git submodule update --init --recursive"',
r" \(in /.*/testrepository\)",
r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"',
r" \(in /.*\)",
]) % git_repo
r" \(in testrepository\)",
reprod_tar_cmd_pattern.format("testrepository"),
r" \(in .*/tmp.*\)",
]) % string_args
run_check()

git_config['recurse_submodules'] = ['!vcflib', '!sdsl-lite']
expected = '\n'.join([
r' running shell command "git clone --no-checkout %(git_repo)s"',
r" \(in .*/tmp.*\)",
' running shell command "git checkout 8456f86 && git submodule update --init --recursive'
+ ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\'"',
r" \(in /.*/testrepository\)",
r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"',
r' running shell command "git checkout 8456f86"',
r" \(in testrepository\)",
reprod_tar_cmd_pattern.format("testrepository"),
r" \(in .*/tmp.*\)",
]) % git_repo
]) % string_args
run_check()

del git_config['recursive']
Expand All @@ -2893,9 +2901,9 @@ def run_check():
r" \(in /.*\)",
r' running shell command "git checkout 8456f86"',
r" \(in /.*/testrepository\)",
r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"',
reprod_tar_cmd_pattern.format("testrepository"),
r" \(in /.*\)",
]) % git_repo
]) % string_args
run_check()

# Test with real data.
Expand Down