Have collect target script return the artifact search path, and use t…

…hat information Summary: IN the past, we specified a static artifact search path for every kind of job steps. The problem with this is that with Bazel, the path is not static, but (at the time of this writing) the hash of the workspace path. The solution is, during the collect targets step, also return the artifact search path. This diff does that and also propagate the returned path to the expanded job steps. Test Plan: unit tests Reviewers: anupc Reviewed By: anupc Subscribers: changesbot, kylec Differential Revision: https://tails.corp.dropbox.com/D224447
dropbox · Aug 31, 2016 · 445684d · 445684d
1 parent 453622a
commit 445684d
Show file tree

Hide file tree

Showing 9 changed files with 329 additions and 108 deletions.
diff --git a/changes/buildsteps/default.py b/changes/buildsteps/default.py
@@ -43,7 +43,10 @@
 # We only want to copy certain attributes from a jobstep (basically, only
 # static state, not things that change after jobstep creation), so we
 # have an explicit list of attributes we'll copy
-JOBSTEP_DATA_COPY_WHITELIST = ('release', 'cpus', 'memory', 'weight', 'tests', 'shard_count')
+JOBSTEP_DATA_COPY_WHITELIST = (
+    'release', 'cpus', 'memory', 'weight', 'tests', 'shard_count',
+    'artifact_search_path',
+)
 
 
 class DefaultBuildStep(BuildStep):
@@ -484,7 +487,7 @@ def _image_for_job_id(self, job_id):
 
     def get_allocation_params(self, jobstep):
         params = {
-            'artifact-search-path': self.artifact_search_path,
+            'artifact-search-path': jobstep.data.get('artifact_search_path', self.artifact_search_path),
             'artifacts-server': current_app.config['ARTIFACTS_SERVER'],
             'adapter': self.get_client_adapter(),
             'server': build_internal_uri('/api/0/'),

diff --git a/changes/expanders/tests.py b/changes/expanders/tests.py
@@ -49,11 +49,17 @@ def expand(self, max_executors, test_stats_from=None):
                 env=self.data.get('env'),
                 artifacts=self.data.get('artifacts'),
             )
+            artifact_search_path = self.data.get('artifact_search_path')
+            artifact_search_path = artifact_search_path if artifact_search_path else None
             future_jobstep = FutureJobStep(
                 label=self.data.get('label') or future_command.label,
                 commands=[future_command],
-                data={'weight': weight, 'tests': test_list,
-                      'shard_count': len(groups)},
+                data={
+                    'weight': weight,
+                    'tests': test_list,
+                    'shard_count': len(groups),
+                    'artifact_search_path': artifact_search_path,
+                },
             )
             yield future_jobstep
 

diff --git a/changes/models/jobplan.py b/changes/models/jobplan.py
@@ -1,7 +1,6 @@
 from __future__ import absolute_import
 
 import logging
-import os
 import uuid
 
 from collections import defaultdict
@@ -227,8 +226,6 @@ def get_build_step_for_job(cls, job_id):
                         bazel_exclude_tags=bazel_exclude_tags,
                         max_jobs=2 * bazel_cpus), 'type': 'collect_tests'},
                 ],
-                artifacts=['*.xml'],
-                artifact_search_path=os.path.join(current_app.config['BAZEL_ROOT_PATH'], current_app.config['BAZEL_TEST_OUTPUT_RELATIVE_PATH']),
                 cpus=bazel_cpus,
                 memory=bazel_memory,
             )

diff --git a/changes/utils/bazel_setup.py b/changes/utils/bazel_setup.py
@@ -7,13 +7,13 @@
 # Clean up any existing apt sources
 sudo rm -rf /etc/apt/sources.list.d
 # Overwrite apt sources
-echo "%(apt_spec)s" | sudo tee /etc/apt/sources.list
+echo "{apt_spec}" | sudo tee /etc/apt/sources.list
 
 # apt-get update, and try again if it fails first time
 sudo apt-get -y update || sudo apt-get -y update
-sudo apt-get install -y --force-yes %(bazel_apt_pkgs)s
+sudo apt-get install -y --force-yes {bazel_apt_pkgs}
 
-/usr/bin/bazel --nomaster_blazerc --blazerc=/dev/null --output_user_root=%(bazel_root)s --batch version
+/usr/bin/bazel --nomaster_blazerc --blazerc=/dev/null --output_user_root={bazel_root} --batch version
 """.strip()
 
 # We run setup again because changes does not run setup before collecting tests, but we need bazel
@@ -24,25 +24,23 @@
 # Clean up any existing apt sources
 sudo rm -rf /etc/apt/sources.list.d >/dev/null 2>&1
 # Overwrite apt sources
-(echo "%(apt_spec)s" | sudo tee /etc/apt/sources.list) >/dev/null 2>&1
+(echo "{apt_spec}" | sudo tee /etc/apt/sources.list) >/dev/null 2>&1
 
 # apt-get update, and try again if it fails first time
 (sudo apt-get -y update || sudo apt-get -y update) >/dev/null 2>&1
-sudo apt-get install -y --force-yes %(bazel_apt_pkgs)s python >/dev/null 2>&1
+sudo apt-get install -y --force-yes {bazel_apt_pkgs} python >/dev/null 2>&1
 
-(/usr/bin/bazel --nomaster_blazerc --blazerc=/dev/null --output_user_root=%(bazel_root)s --batch query \
-    'let t = tests(%(bazel_targets)s) in ($t %(exclusion_subquery)s)' | \
-    python -c "%(jsonify_script)s") 2> /dev/null
+python -c "{script}" "{bazel_root}" "{bazel_targets}" "{bazel_exclude_tags}" "{max_jobs}" 2> /dev/null
 """.strip()
 
 
 SYNC_ENCAP_PKG = """
-sudo /usr/bin/rsync -a --delete %(encap_rsync_url)s%(pkg)s %(encap_dir)s
+sudo /usr/bin/rsync -a --delete {encap_rsync_url}{pkg} {encap_dir}
 """.strip()
 
 
 def get_bazel_setup():
-    return BASH_BAZEL_SETUP % dict(
+    return BASH_BAZEL_SETUP.format(
         apt_spec=current_app.config['APT_SPEC'],
         bazel_apt_pkgs=' '.join(current_app.config['BAZEL_APT_PKGS']),
         bazel_root=current_app.config['BAZEL_ROOT_PATH'],
@@ -65,36 +63,15 @@ def collect_bazel_targets(bazel_targets, bazel_exclude_tags, max_jobs):
     package_dir = os.path.dirname(__file__)
     bazel_target_py = os.path.join(package_dir, "collect_bazel_targets.py")
 
-    # Please use https://www.bazel.io/docs/query.html as a reference for Bazel query syntax
-    #
-    # To exclude targets matching a tag, we construct a query as follows:
-    #   let t = test(bazel_targets)                     ## Collect list of test targets in $t
-    #   return $t except attr("tags", $expression, $t)  ## Return all targets in t except those where "tags" matches an expression
-    #
-    # Multiple exclusions a performed by adding further "except" clauses
-    #   return $t except attr(1) expect attr(2) except attr(3) ...
-    #
-    # Examples of "tags" attribute:
-    #   []                                              ## No tags
-    #   [flaky]                                         ## Single tag named flaky
-    #   [flaky, manual]                                 ## Two tags named flaky and manual
-    #
-    # Tags are delimited on the left by an opening bracket or space, and on the right by a comma or closing bracket.
-    #
-    # Hence, $expression =>
-    #   (\[| )                                          ## Starts with an opening bracket or space
-    #   tag_name                                        ## Match actual tag name
-    #   (\]|,)                                          ## Ends with a closing bracket or comma
-    exclusion_subquery = ' '.join(["""except (attr("tags", "(\[| )%s(\]|,)", $t))""" % (tag) for tag in bazel_exclude_tags])
-
-    with open(bazel_target_py, 'r') as jsonify_script:
-        return COLLECT_BAZEL_TARGETS % dict(
+    with open(bazel_target_py, 'r') as script:
+        return COLLECT_BAZEL_TARGETS.format(
             apt_spec=current_app.config['APT_SPEC'],
             bazel_apt_pkgs=' '.join(current_app.config['BAZEL_APT_PKGS']),
             bazel_root=current_app.config['BAZEL_ROOT_PATH'],
-            bazel_targets=' + '.join(bazel_targets),
-            jsonify_script=jsonify_script.read() % dict(max_jobs=max_jobs, bazel_root=current_app.config['BAZEL_ROOT_PATH']),
-            exclusion_subquery=exclusion_subquery,
+            bazel_targets=','.join(bazel_targets),
+            script=script.read().replace(r'"', r'\"').replace(r'$', r'\$').replace(r'`', r'\`'),
+            bazel_exclude_tags=','.join(bazel_exclude_tags),
+            max_jobs=max_jobs,
         )
 
 
@@ -103,7 +80,7 @@ def sync_encap_pkgs(project_config, encap_dir='/usr/local/encap/'):
     encap_pkgs = dependencies.get('encap', [])
 
     def sync_pkg(pkg):
-        return SYNC_ENCAP_PKG % dict(
+        return SYNC_ENCAP_PKG.format(
             encap_rsync_url=current_app.config['ENCAP_RSYNC_URL'],
             pkg=pkg,
             encap_dir=encap_dir,

diff --git a/changes/utils/collect_bazel_targets.py b/changes/utils/collect_bazel_targets.py
@@ -1,10 +1,79 @@
-import sys
 import json
+import subprocess
+import sys
+
+
+BAZEL_PATH = '/usr/bin/bazel'
+
+BAZEL_FLAGS = ['--nomaster_blazerc', '--blazerc=/dev/null']
+
+
+def main(argv):
+    """Collect bazel targets. This runs a query against Bazel to determine
+    the list of test targets to run. It also queries Bazel for the artifact
+    search path, which is passed back to Changes.
+
+    `argv` contains the command line arguments. There must be exactly 4, in
+    this order:
+    - bazel root output path
+    - comma-separated list of bazel targets
+    - comma-separated list of bazel exclude tags
+    - the maximum number of jobs to run per command
+    """
+    bazel_root = argv[0]
+    bazel_targets = [x for x in argv[1].split(',') if x != '']
+    bazel_exclude_tags = [x for x in argv[2].split(',') if x != '']
+    max_jobs = argv[3]
+
+    # Please use https://www.bazel.io/docs/query.html as a reference for Bazel query syntax
+    #
+    # To exclude targets matching a tag, we construct a query as follows:
+    #   let t = test(bazel_targets)                     ## Collect list of test targets in $t
+    #   return $t except attr("tags", $expression, $t)  ## Return all targets in t except those where "tags" matches an expression
+    #
+    # Multiple exclusions a performed by adding further "except" clauses
+    #   return $t except attr(1) expect attr(2) except attr(3) ...
+    #
+    # Examples of "tags" attribute:
+    #   []                                              ## No tags
+    #   [flaky]                                         ## Single tag named flaky
+    #   [flaky, manual]                                 ## Two tags named flaky and manual
+    #
+    # Tags are delimited on the left by an opening bracket or space, and on the right by a comma or closing bracket.
+    #
+    # Hence, $expression =>
+    #   (\[| )                                          ## Starts with an opening bracket or space
+    #   tag_name                                        ## Match actual tag name
+    #   (\]|,)                                          ## Ends with a closing bracket or comma
+    exclusion_subquery = ' '.join(["""except (attr("tags", "(\[| ){}(\]|,)", $t))""".format(
+        tag) for tag in bazel_exclude_tags])
+
+    targets = subprocess.check_output(
+        [BAZEL_PATH] + BAZEL_FLAGS + [
+            '--output_user_root={}'.format(bazel_root),
+            '--batch',
+            'query',
+            'let t = tests({targets}) in ($t {exclusion_subquery})'.format(
+                targets=' + '.join(bazel_targets),
+                exclusion_subquery=exclusion_subquery
+            ),
+        ]).strip().splitlines()
 
+    artifact_search_path = subprocess.check_output(
+        [BAZEL_PATH] + BAZEL_FLAGS + [
+            '--output_user_root={}'.format(bazel_root),
+            '--batch',
+            'info',
+            'bazel-testlog',
+        ]).strip()
+    out = {
+        'cmd': '/usr/bin/bazel --output_user_root={bazel_root} test --jobs={max_jobs} {{test_names}}'.format(bazel_root=bazel_root, max_jobs=max_jobs),
+        'tests': targets,
+        'artifact_search_path': artifact_search_path,
+        'artifacts': ['*.xml'],
+    }
+    json.dump(out, sys.stdout)
+    return 0
 
-targets = sys.stdin.read().splitlines()
-out = {
-    'cmd': '/usr/bin/bazel --output_user_root=%(bazel_root)s test --jobs=%(max_jobs)s {test_names}',
-    'tests': targets,
-}
-json.dump(out, sys.stdout)
+if __name__ == '__main__':
+    sys.exit(main(sys.argv[1:]))
diff --git a/tests/changes/buildsteps/test_default.py b/tests/changes/buildsteps/test_default.py
@@ -588,6 +588,31 @@ def test_get_allocation_params_with_artifact_search_path(self):
             'dist': 'ubuntu',
         }
 
+    def test_get_allocation_params_with_artifact_search_path_from_jobstep(self):
+        project = self.create_project()
+        build = self.create_build(project)
+        job = self.create_job(build)
+        jobphase = self.create_jobphase(job)
+        jobstep = self.create_jobstep(jobphase, data={
+            'artifact_search_path': '/out'
+        })
+
+        buildstep = self.get_buildstep(repo_path='source', path='tests')
+        result = buildstep.get_allocation_params(jobstep)
+        assert result == {
+            'adapter': 'basic',
+            'server': 'http://changes-int.example.com/api/0/',
+            'jobstep_id': jobstep.id.hex,
+            'release': 'precise',
+            's3-bucket': 'snapshot-bucket',
+            'pre-launch': 'echo pre',
+            'post-launch': 'echo post',
+            'artifacts-server': 'http://localhost:1234',
+            'artifact-search-path': '/out',
+            'use-external-env': 'false',
+            'dist': 'ubuntu',
+        }
+
     def test_get_allocation_params_for_snapshotting(self):
         project = self.create_project()
         build = self.create_build(project)

diff --git a/tests/changes/expanders/test_tests.py b/tests/changes/expanders/test_tests.py
@@ -109,12 +109,92 @@ def test_expand(self, mock_get_test_stats):
         assert results[0].data['weight'] == 201
         assert set(results[0].data['tests']) == set(['foo.bar.test_buz'])
         assert results[0].data['shard_count'] == 2
+        assert results[0].data['artifact_search_path'] is None
         assert results[0].commands[0].label == results[0].label
         assert results[0].commands[0].script == results[0].label
 
         assert results[1].label == 'py.test --junit=junit.xml foo/bar.py foo/baz.py foo.bar.test_biz'
         assert results[1].data['weight'] == 78
         assert set(results[1].data['tests']) == set(['foo/bar.py', 'foo/baz.py', 'foo.bar.test_biz'])
         assert results[1].data['shard_count'] == 2
+        assert results[1].data['artifact_search_path'] is None
+        assert results[1].commands[0].label == results[1].label
+        assert results[1].commands[0].script == results[1].label
+
+    @patch.object(TestsExpander, 'get_test_stats')
+    def test_expand_with_artifact_search_path(self, mock_get_test_stats):
+        mock_get_test_stats.return_value = {
+            ('foo', 'bar'): 50,
+            ('foo', 'baz'): 15,
+            ('foo', 'bar', 'test_biz'): 10,
+            ('foo', 'bar', 'test_buz'): 200,
+        }, 68
+
+        results = list(self.get_expander({
+            'cmd': 'py.test --junit=junit.xml {test_names}',
+            'tests': [
+                'foo/bar.py',
+                'foo/baz.py',
+                'foo.bar.test_biz',
+                'foo.bar.test_buz',
+            ],
+            'artifact_search_path': '/tmp/artifacts',
+        }).expand(max_executors=2))
+
+        results.sort(key=lambda x: x.data['weight'], reverse=True)
+
+        assert len(results) == 2
+        assert results[0].label == 'py.test --junit=junit.xml foo.bar.test_buz'
+        assert results[0].data['weight'] == 201
+        assert set(results[0].data['tests']) == set(['foo.bar.test_buz'])
+        assert results[0].data['shard_count'] == 2
+        assert results[0].data['artifact_search_path'] == '/tmp/artifacts'
+        assert results[0].commands[0].label == results[0].label
+        assert results[0].commands[0].script == results[0].label
+
+        assert results[1].label == 'py.test --junit=junit.xml foo/bar.py foo/baz.py foo.bar.test_biz'
+        assert results[1].data['weight'] == 78
+        assert set(results[1].data['tests']) == set(['foo/bar.py', 'foo/baz.py', 'foo.bar.test_biz'])
+        assert results[1].data['shard_count'] == 2
+        assert results[1].data['artifact_search_path'] == '/tmp/artifacts'
+        assert results[1].commands[0].label == results[1].label
+        assert results[1].commands[0].script == results[1].label
+
+    @patch.object(TestsExpander, 'get_test_stats')
+    def test_expand_with_artifact_search_path_empty(self, mock_get_test_stats):
+        mock_get_test_stats.return_value = {
+            ('foo', 'bar'): 50,
+            ('foo', 'baz'): 15,
+            ('foo', 'bar', 'test_biz'): 10,
+            ('foo', 'bar', 'test_buz'): 200,
+        }, 68
+
+        results = list(self.get_expander({
+            'cmd': 'py.test --junit=junit.xml {test_names}',
+            'tests': [
+                'foo/bar.py',
+                'foo/baz.py',
+                'foo.bar.test_biz',
+                'foo.bar.test_buz',
+            ],
+            'artifact_search_path': '',
+        }).expand(max_executors=2))
+
+        results.sort(key=lambda x: x.data['weight'], reverse=True)
+
+        assert len(results) == 2
+        assert results[0].label == 'py.test --junit=junit.xml foo.bar.test_buz'
+        assert results[0].data['weight'] == 201
+        assert set(results[0].data['tests']) == set(['foo.bar.test_buz'])
+        assert results[0].data['shard_count'] == 2
+        assert results[0].data['artifact_search_path'] is None
+        assert results[0].commands[0].label == results[0].label
+        assert results[0].commands[0].script == results[0].label
+
+        assert results[1].label == 'py.test --junit=junit.xml foo/bar.py foo/baz.py foo.bar.test_biz'
+        assert results[1].data['weight'] == 78
+        assert set(results[1].data['tests']) == set(['foo/bar.py', 'foo/baz.py', 'foo.bar.test_biz'])
+        assert results[1].data['shard_count'] == 2
+        assert results[1].data['artifact_search_path'] is None
         assert results[1].commands[0].label == results[1].label
         assert results[1].commands[0].script == results[1].label