Skip to content

Commit

Permalink
ARROW-14408: [Packaging][Crossbow] Option for skipping artifact patte…
Browse files Browse the repository at this point in the history
…rn validation

Only download the artifacts belonging to the `manylinux` tasks but without validating the expected artifact names:

```
archery crossbow download-artifacts --no-fetch release-6.0.0-rc1-0 --task-filter "*manylinux*" --skip-pattern-validation
```

`--task-filter`: useful to restrict the download script for certain tasks
`--skip-pattern-validation`: occasionally useful, like in the current case where the manylinux2014 wheels names now contain two platform tags

cc @kou

Closes #11490 from kszucs/crossbow-pattern-validation

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
  • Loading branch information
kszucs committed Oct 21, 2021
1 parent 47f9176 commit eee80f1
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 15 deletions.
23 changes: 18 additions & 5 deletions dev/archery/archery/crossbow/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,14 +210,18 @@ def highlight(code):
@click.argument('job-name', required=True)
@click.option('--fetch/--no-fetch', default=True,
help='Fetch references (branches and tags) from the remote')
@click.option('--task-filter', '-f', 'task_filters', multiple=True,
help='Glob pattern for filtering relevant tasks')
@click.pass_obj
def status(obj, job_name, fetch):
def status(obj, job_name, fetch, task_filters):
output = obj['output']
queue = obj['queue']
if fetch:
queue.fetch()
job = queue.get(job_name)
ConsoleReport(job).show(output)

report = ConsoleReport(job, task_filters=task_filters)
report.show(output)


@crossbow.command()
Expand Down Expand Up @@ -306,8 +310,13 @@ def report(obj, job_name, sender_name, sender_email, recipient_email,
help='Just display process, don\'t download anything')
@click.option('--fetch/--no-fetch', default=True,
help='Fetch references (branches and tags) from the remote')
@click.option('--task-filter', '-f', 'task_filters', multiple=True,
help='Glob pattern for filtering relevant tasks')
@click.option('--validate-patterns/--skip-pattern-validation', default=True,
help='Whether to validate artifact name patterns or not')
@click.pass_obj
def download_artifacts(obj, job_name, target_dir, dry_run, fetch):
def download_artifacts(obj, job_name, target_dir, dry_run, fetch,
validate_patterns, task_filters):
"""Download build artifacts from GitHub releases"""
output = obj['output']

Expand Down Expand Up @@ -335,8 +344,12 @@ def asset_callback(task_name, task, asset):
click.echo('Destination directory is {}'.format(target_dir))
click.echo()

report = ConsoleReport(job)
report.show(output, asset_callback=asset_callback)
report = ConsoleReport(job, task_filters=task_filters)
report.show(
output,
asset_callback=asset_callback,
validate_patterns=validate_patterns
)


@crossbow.command()
Expand Down
14 changes: 10 additions & 4 deletions dev/archery/archery/crossbow/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -788,12 +788,13 @@ def status(self, force_query=False):
self._status = TaskStatus(github_commit)
return self._status

def assets(self, force_query=False):
def assets(self, force_query=False, validate_patterns=True):
_assets = getattr(self, '_assets', None)
if force_query or _assets is None:
github_release = self._queue.github_release(self.tag)
self._assets = TaskAssets(github_release,
artifact_patterns=self.artifacts)
artifact_patterns=self.artifacts,
validate_patterns=validate_patterns)
return self._assets


Expand Down Expand Up @@ -874,7 +875,8 @@ def __init__(self, commit):

class TaskAssets(dict):

def __init__(self, github_release, artifact_patterns):
def __init__(self, github_release, artifact_patterns,
validate_patterns=True):
# HACK(kszucs): don't expect uploaded assets of no atifacts were
# defiened for the tasks in order to spare a bit of github rate limit
if not artifact_patterns:
Expand All @@ -885,9 +887,13 @@ def __init__(self, github_release, artifact_patterns):
else:
github_assets = {a.name: a for a in github_release.assets()}

if not validate_patterns:
# shortcut to avoid pattern validation and just set all artifacts
return self.update(github_assets)

for pattern in artifact_patterns:
# artifact can be a regex pattern
compiled = re.compile(pattern)
compiled = re.compile(f"^{pattern}$")
matches = list(
filter(None, map(compiled.match, github_assets.keys()))
)
Expand Down
25 changes: 19 additions & 6 deletions dev/archery/archery/crossbow/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import click
import collections
import operator
import fnmatch
import functools
from io import StringIO
import textwrap
Expand All @@ -26,9 +27,23 @@
# TODO(kszucs): use archery.report.JinjaReport instead
class Report:

def __init__(self, job):
def __init__(self, job, task_filters=None):
self.job = job

tasks = sorted(job.tasks.items())
if task_filters is not None:
filtered = set()
for pattern in task_filters:
filtered |= set(fnmatch.filter(job.tasks.keys(), pattern))

tasks = [(name, task) for name, task in tasks if name in filtered]

self._tasks = dict(tasks)

@property
def tasks(self):
return self._tasks

def show(self):
raise NotImplementedError()

Expand Down Expand Up @@ -88,19 +103,17 @@ def artifact(self, state, pattern, asset):
)
return name_ + state_

def show(self, outstream, asset_callback=None):
def show(self, outstream, asset_callback=None, validate_patterns=True):
echo = functools.partial(click.echo, file=outstream)

# write table's header
echo(self.header())

# write table's body
for task_name, task in sorted(self.job.tasks.items()):
# if not task_name.startswith("test-debian-10-python-3"):
# continue
for task_name, task in self.tasks.items():
# write summary of the uploaded vs total assets
status = task.status()
assets = task.assets()
assets = task.assets(validate_patterns=validate_patterns)

# mapping of artifact pattern to asset or None of not uploaded
n_expected = len(task.artifacts)
Expand Down
1 change: 1 addition & 0 deletions dev/release/03-binary-submit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ release_tag="apache-arrow-${version}"
# are jobs submitted with the same prefix (the integer at the end is auto
# incremented)
archery crossbow submit \
--no-fetch \
--job-prefix ${crossbow_job_prefix} \
--arrow-version ${version_with_rc} \
--arrow-remote "https://github.com/${ARROW_REPOSITORY}" \
Expand Down

0 comments on commit eee80f1

Please sign in to comment.