Skip to content

Commit

Permalink
ARROW-8371: [Crossbow] Implement and exercise sanity checks for tasks…
Browse files Browse the repository at this point in the history
….yml

- implement `python crossbow.py check-config`
- also run the check before job submission
- a bit more verbose error messages for template rendering issues
- run these sanity checks if anything changes under dev/tasks

Closes #6875 from kszucs/crossbow-check-config

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
  • Loading branch information
kszucs committed Apr 8, 2020
1 parent 9422f4d commit f396de5
Show file tree
Hide file tree
Showing 15 changed files with 151 additions and 100 deletions.
15 changes: 10 additions & 5 deletions .github/workflows/archery.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,25 @@
# specific language governing permissions and limitations
# under the License.

name: Archery
name: Archery & Crossbow

on:
push:
paths:
- '.github/workflows/archery.yml'
- 'dev/archery/**'
- 'dev/tasks/**'
pull_request:
paths:
- '.github/workflows/archery.yml'
- 'dev/archery/**'
- 'dev/tasks/**'

jobs:

archery:
test:
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
name: Archery Unittests
name: Archery Unittests and Crossbow Check Config
runs-on: ubuntu-latest
steps:
- name: Checkout Arrow
Expand All @@ -47,7 +49,10 @@ jobs:
python-version: '3.7'
- name: Install
working-directory: dev/archery
run: pip install pytest responses ruamel.yaml -e .
- name: Test
run: pip install pytest responses ruamel.yaml toolz jinja2 -e .
- name: Archery Unittests
working-directory: dev/archery
run: pytest -v archery
- name: Crossbow Check Config
working-directory: dev/tasks
run: python crossbow.py check-config
2 changes: 1 addition & 1 deletion dev/tasks/conda-recipes/azure.linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
- script: |
python arrow/dev/tasks/crossbow.py \
--queue-path . \
--queue-remote {{ queue.remote_url }} \
--queue-remote {{ queue_remote_url }} \
upload-artifacts \
--sha {{ task.branch }} \
--tag {{ task.tag }} \
Expand Down
2 changes: 1 addition & 1 deletion dev/tasks/conda-recipes/azure.osx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ jobs:
conda install -y click github3.py jinja2 jira pygit2 ruamel.yaml setuptools_scm toolz
python arrow/dev/tasks/crossbow.py \
--queue-path . \
--queue-remote {{ queue.remote_url }} \
--queue-remote {{ queue_remote_url }} \
upload-artifacts \
--sha {{ task.branch }} \
--tag {{ task.tag }} \
Expand Down
2 changes: 1 addition & 1 deletion dev/tasks/conda-recipes/azure.win.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ jobs:
- script: |
python arrow/dev/tasks/crossbow.py ^
--queue-path . ^
--queue-remote {{ queue.remote_url }} ^
--queue-remote {{ queue_remote_url }} ^
upload-artifacts ^
--sha {{ task.branch }} ^
--tag {{ task.tag }} ^
Expand Down
186 changes: 133 additions & 53 deletions dev/tasks/crossbow.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@

import click
import toolz
from setuptools_scm.git import parse as parse_git_version
from setuptools_scm.version import guess_next_version

from ruamel.yaml import YAML

try:
Expand Down Expand Up @@ -340,6 +339,12 @@ def fetch(self):

def push(self, refs=None, github_token=None):
github_token = github_token or self.github_token
if github_token is None:
raise click.ClickException(
'Could not determine GitHub token. Please set the '
'CROSSBOW_GITHUB_TOKEN environment variable to a '
'valid GitHub access token or pass one to --github-token.'
)
callbacks = GitRemoteCallbacks(github_token)
refs = refs or []
try:
Expand Down Expand Up @@ -745,7 +750,8 @@ def put(self, job, prefix='build'):
# adding CI's name to the end of the branch in order to use skip
# patterns on travis and circleci
task.branch = '{}-{}-{}'.format(job.branch, task.ci, task_name)
files = task.render_files(job=job, arrow=job.target, queue=self)
files = task.render_files(arrow=job.target,
queue_remote_url=self.remote_url)
branch = self.create_branch(task.branch, files=files)
self.create_tag(task.tag, branch.target)
task.commit = str(branch.target)
Expand All @@ -759,8 +765,12 @@ def get_version(root, **kwargs):
Parse function for setuptools_scm that ignores tags for non-C++
subprojects, e.g. apache-arrow-js-XXX tags.
"""
kwargs['describe_command'] =\
from setuptools_scm.git import parse as parse_git_version
from setuptools_scm.version import guess_next_version

kwargs['describe_command'] = (
'git describe --dirty --tags --long --match "apache-arrow-[0-9].*"'
)
version = parse_git_version(root, **kwargs)
return version.format_next_version(guess_next_version)

Expand Down Expand Up @@ -840,10 +850,20 @@ def __init__(self, platform, ci, template, artifacts=None, params=None):

def render_files(self, **extra_params):
from jinja2 import Template, StrictUndefined
from jinja2.exceptions import TemplateError

path = CWD / self.template
params = toolz.merge(self.params, extra_params)
template = Template(path.read_text(), undefined=StrictUndefined)
rendered = template.render(task=self, **params)
try:
rendered = template.render(task=self, **params)
except TemplateError as e:
raise RuntimeError(
'Failed to render template `{}` with {}: {}'.format(
path, e.__class__.__name__, str(e)
)
)

tree = toolz.merge(_default_tree, {self.filename: rendered})
return unflatten_tree(tree)

Expand Down Expand Up @@ -962,47 +982,13 @@ def from_config(cls, config, target, tasks=None, groups=None):
click.ClickException
If invalid groups or tasks has been passed.
"""
config_groups = dict(config['groups'])
config_tasks = dict(config['tasks'])
valid_groups = set(config_groups.keys())
valid_tasks = set(config_tasks.keys())
group_whitelist = list(groups or [])
task_whitelist = list(tasks or [])

# validate that the passed groups are defined in the config
requested_groups = set(group_whitelist)
invalid_groups = requested_groups - valid_groups
if invalid_groups:
msg = 'Invalid group(s) {!r}. Must be one of {!r}'.format(
invalid_groups, valid_groups
)
raise click.ClickException(msg)

# merge the tasks defined in the selected groups
task_patterns = [list(config_groups[name]) for name in group_whitelist]
task_patterns = set(sum(task_patterns, task_whitelist))

# treat the task names as glob patterns to select tasks more easily
requested_tasks = set(
toolz.concat(
fnmatch.filter(valid_tasks, p) for p in task_patterns
)
)

# validate that the passed and matched tasks are defined in the config
invalid_tasks = requested_tasks - valid_tasks
if invalid_tasks:
msg = 'Invalid task(s) {!r}. Must be one of {!r}'.format(
invalid_tasks, valid_tasks
)
raise click.ClickException(msg)
task_definitions = config.select(tasks, groups=groups)

# instantiate the tasks
tasks = {}
versions = {'version': target.version,
'no_rc_version': target.no_rc_version}
for task_name in requested_tasks:
task = config_tasks[task_name]
for task_name, task in task_definitions.items():
artifacts = task.pop('artifacts', None) or [] # because of yaml
artifacts = [fn.format(**versions) for fn in artifacts]
tasks[task_name] = Task(artifacts=artifacts, **task)
Expand Down Expand Up @@ -1059,6 +1045,96 @@ def query_assets(self, max_workers=None, ignore_prefix=None):
yield (task_name, task, status.result(), assets.result())


class Config(dict):

@classmethod
def load_yaml(cls, path):
with Path(path).open() as fp:
return cls(yaml.load(fp))

def select(self, tasks=None, groups=None):
config_groups = dict(self['groups'])
config_tasks = dict(self['tasks'])
valid_groups = set(config_groups.keys())
valid_tasks = set(config_tasks.keys())
group_whitelist = list(groups or [])
task_whitelist = list(tasks or [])

# validate that the passed groups are defined in the config
requested_groups = set(group_whitelist)
invalid_groups = requested_groups - valid_groups
if invalid_groups:
msg = 'Invalid group(s) {!r}. Must be one of {!r}'.format(
invalid_groups, valid_groups
)
raise ValueError(msg)

# merge the tasks defined in the selected groups
task_patterns = [list(config_groups[name]) for name in group_whitelist]
task_patterns = set(sum(task_patterns, task_whitelist))

# treat the task names as glob patterns to select tasks more easily
requested_tasks = set(
toolz.concat(
fnmatch.filter(valid_tasks, p) for p in task_patterns
)
)

# validate that the passed and matched tasks are defined in the config
invalid_tasks = requested_tasks - valid_tasks
if invalid_tasks:
msg = 'Invalid task(s) {!r}. Must be one of {!r}'.format(
invalid_tasks, valid_tasks
)
raise ValueError(msg)

return {
task_name: config_tasks[task_name] for task_name in requested_tasks
}

def validate(self):
# validate that the task groups are properly referening the tasks
for group in self['groups']:
tasks = self.select(groups=[group])
if not tasks:
raise ValueError(
"The patterns defined for task group `{}` are not "
"matching any of the tasks defined in the configuration "
"file.".format(group)
)

# validate that the tasks are constructible
for task_name, task in self['tasks'].items():
try:
Task(**task)
except Exception as e:
raise ValueError(
'Unable to construct a task object from the '
'definition of task `{}`. The original error message '
'is: `{}`'.format(task_name, str(e))
)

# validate that the defined tasks are renderable, in order to to that
# define the required object with dummy data
target = Target(
head='e279a7e06e61c14868ca7d71dea795420aea6539',
branch='master',
remote='https://github.com/apache/arrow',
version='1.0.0dev123',
email='dummy@example.ltd'
)

for task_name, task in self['tasks'].items():
task = Task(**task)
files = task.render_files(
arrow=target,
queue_remote_url='https://github.com/org/crossbow'
)
if not files:
raise ValueError('No files have been rendered for task `{}`'
.format(task_name))


class Report:

def __init__(self, job):
Expand Down Expand Up @@ -1355,11 +1431,11 @@ def render(self):
@click.option('--github-token', '-t', default=None,
help='OAuth token for GitHub authentication')
@click.option('--arrow-path', '-a',
type=click.Path(exists=True), default=str(DEFAULT_ARROW_PATH),
type=click.Path(), default=str(DEFAULT_ARROW_PATH),
help='Arrow\'s repository path. Defaults to the repository of '
'this script')
@click.option('--queue-path', '-q',
type=click.Path(exists=True), default=str(DEFAULT_QUEUE_PATH),
type=click.Path(), default=str(DEFAULT_QUEUE_PATH),
help='The repository path used for scheduling the tasks. '
'Defaults to crossbow directory placed next to arrow')
@click.option('--queue-remote', '-qr', default=None,
Expand All @@ -1370,13 +1446,6 @@ def render(self):
@click.pass_context
def crossbow(ctx, github_token, arrow_path, queue_path, queue_remote,
output_file):
if github_token is None:
raise click.ClickException(
'Could not determine GitHub token. Please set the '
'CROSSBOW_GITHUB_TOKEN environment variable to a '
'valid GitHub access token or pass one to --github-token.'
)

ctx.ensure_object(dict)
ctx.obj['output'] = output_file
ctx.obj['arrow'] = Repo(arrow_path)
Expand Down Expand Up @@ -1416,6 +1485,16 @@ def changelog(obj, changelog_path, arrow_version, is_website, jira_username,
'changes')


@crossbow.command()
@click.option('--config-path', '-c',
type=click.Path(exists=True), default=DEFAULT_CONFIG_PATH,
help='Task configuration yml. Defaults to tasks.yml')
def check_config(config_path):
# load available tasks configuration and groups from yaml
config = Config.load_yaml(config_path)
config.validate()


@crossbow.command()
@click.argument('tasks', nargs=-1, required=False)
@click.option('--group', '-g', 'groups', multiple=True,
Expand Down Expand Up @@ -1447,8 +1526,8 @@ def submit(obj, tasks, groups, job_prefix, config_path, arrow_version,
queue, arrow = obj['queue'], obj['arrow']

# load available tasks configuration and groups from yaml
with Path(config_path).open() as fp:
config = yaml.load(fp)
config = Config.load_yaml(config_path)
config.validate()

# Override the detected repo url / remote, branch and sha - this aims to
# make release procedure a bit simpler.
Expand All @@ -1461,7 +1540,8 @@ def submit(obj, tasks, groups, job_prefix, config_path, arrow_version,
head=arrow_sha, version=arrow_version)

# instantiate the job object
job = Job.from_config(config, target=target, tasks=tasks, groups=groups)
job = Job.from_config(config=config, target=target, tasks=tasks,
groups=groups)

if dry_run:
yaml.dump(job, output)
Expand Down
4 changes: 0 additions & 4 deletions dev/tasks/gandiva-jars/travis.linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,3 @@ deploy:
skip_cleanup: true
on:
tags: true

notifications:
email:
- {{ job.email }}
6 changes: 1 addition & 5 deletions dev/tasks/gandiva-jars/travis.osx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,8 @@ script:
- >
python3 dev/tasks/crossbow.py
--queue-path $TRAVIS_BUILD_DIR
--queue-remote {{ queue.remote_url }}
--queue-remote {{ queue_remote_url }}
upload-artifacts
--sha {{ task.branch }}
--tag {{ task.tag }}
--pattern "dist/*.jar"
notifications:
email:
- {{ job.email }}
4 changes: 0 additions & 4 deletions dev/tasks/homebrew-formulae/travis.osx.r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,3 @@ script:
after_failure:
# If there's a build failure, it's probably in this log
- cat arrow.Rcheck/00install.out

notifications:
email:
- {{ job.email }}
4 changes: 0 additions & 4 deletions dev/tasks/homebrew-formulae/travis.osx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,3 @@ before_script:
script:
- brew install -v --build-from-source --HEAD $ARROW_FORMULA
- brew test $ARROW_FORMULA

notifications:
email:
- {{ job.email }}
2 changes: 1 addition & 1 deletion dev/tasks/linux-packages/github.linux.arm64.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ jobs:
run: |
python arrow/dev/tasks/crossbow.py \
--queue-path . \
--queue-remote {{ queue.remote_url }} \
--queue-remote {{ queue_remote_url }} \
upload-artifacts \
{%- for extension in upload_extensions %}
--pattern "**/*{{ extension }}" \
Expand Down
Loading

0 comments on commit f396de5

Please sign in to comment.