First check-in of configuration overhead measurement tool

i.e. first check-in of [Measuring Configuration Overhead](https://docs.google.com/document/d/10ZxO2wZdKJATnYBqAm22xT1k5r4Vp6QX96TkqSUIhs0/edit). This just establishes supporting structure. The tool is not yet functional. Specifically: - `types.py`: defines data structures for "configuration" and "configured target" - `bazel_api.py`: API to translate `bazel cquery` and `bazel config` calls into the above data structures - `bazel_api_test.py`: tests - `ctexplain.py`: stump of an entry point The tests utilize an existing Python test framework for invoking Bazel (`//src/test/py/bazel:test_base`). Work towards bazelbuild#10613 Closes bazelbuild#11511. PiperOrigin-RevId: 315479921 Change-Id: I00898a4579e20483c8c4c3f5250afc22ee1e1695
gregestren · Jul 14, 2020 · 7095653 · 7095653
1 parent 3bb3162
commit 7095653
Show file tree

Hide file tree

Showing 8 changed files with 547 additions and 0 deletions.
diff --git a/src/test/py/bazel/BUILD b/src/test/py/bazel/BUILD
@@ -29,6 +29,7 @@ py_library(
         "//third_party/def_parser:__pkg__",
         "//tools/android:__pkg__",
         "//tools/build_rules:__pkg__",
+        "//tools/ctexplain:__pkg__",
         "//tools/python:__pkg__",
     ],
 )

diff --git a/tools/BUILD b/tools/BUILD
@@ -15,6 +15,7 @@ filegroup(
         "//tools/build_rules:srcs",
         "//tools/config:srcs",
         "//tools/coverage:srcs",
+        "//tools/ctexplain:srcs",
         "//tools/distributions:srcs",
         "//tools/java:srcs",
         "//tools/jdk:srcs",

diff --git a/tools/ctexplain/BUILD b/tools/ctexplain/BUILD
@@ -0,0 +1,60 @@
+# Description:
+#   Tool for measuring how configuration transitions affect build graph size.
+load("//tools/python:private/defs.bzl", "py_binary", "py_library")
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])  # Apache 2.0
+
+py_binary(
+    name = "ctexplain",
+    srcs = ["ctexplain.py"],
+    python_version = "PY3",
+    deps = [":bazel_api"],
+)
+
+py_library(
+    name = "bazel_api",
+    srcs = ["bazel_api.py"],
+    srcs_version = "PY3ONLY",
+    deps = [":base"],
+)
+
+py_test(
+    name = "bazel_api_test",
+    size = "small",
+    srcs = ["bazel_api_test.py"],
+    python_version = "PY3",
+    deps = [
+        ":bazel_api",
+        "//src/test/py/bazel:test_base",
+    ],
+)
+
+py_library(
+    name = "base",
+    srcs = [
+        "types.py",
+    ],
+    srcs_version = "PY3ONLY",
+    deps = [
+        "//third_party/py/dataclasses",  # Backport for Python < 3.7.
+        "//third_party/py/frozendict",
+    ],
+)
+
+py_test(
+    name = "types_test",
+    size = "small",
+    srcs = ["types_test.py"],
+    python_version = "PY3",
+    deps = [
+        ":base",
+        "//third_party/py/frozendict",
+    ],
+)
+
+filegroup(
+    name = "srcs",
+    srcs = glob(["*"]),
+)
diff --git a/tools/ctexplain/bazel_api.py b/tools/ctexplain/bazel_api.py
@@ -0,0 +1,158 @@
+# Lint as: python3
+# Copyright 2020 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""API for Bazel calls for config, cquery, and required fragment info.
+
+There's no Python Bazel API so we invoke Bazel as a subprocess.
+"""
+import json
+import os
+import subprocess
+from typing import Callable
+from typing import List
+from typing import Tuple
+# Do not edit this line. Copybara replaces it with PY2 migration helper.
+from frozendict import frozendict
+from tools.ctexplain.types import Configuration
+from tools.ctexplain.types import ConfiguredTarget
+from tools.ctexplain.types import HostConfiguration
+from tools.ctexplain.types import NullConfiguration
+
+
+def run_bazel_in_client(args: List[str]) -> Tuple[int, List[str], List[str]]:
+  """Calls bazel within the current workspace.
+
+  For production use. Tests use an alternative invoker that goes through test
+  infrastructure.
+
+  Args:
+    args: the arguments to call Bazel with
+
+  Returns:
+    Tuple of (return code, stdout, stderr)
+  """
+  result = subprocess.run(
+      ["bazel"] + args,
+      cwd=os.getcwd(),
+      stdout=subprocess.PIPE,
+      stderr=subprocess.PIPE,
+      check=True)
+  return (result.returncode, result.stdout.decode("utf-8").split(os.linesep),
+          result.stderr)
+
+
+class BazelApi():
+  """API that accepts injectable Bazel invocation logic."""
+
+  def __init__(self,
+               run_bazel: Callable[[List[str]],
+                                   Tuple[int, List[str],
+                                         List[str]]] = run_bazel_in_client):
+    self.run_bazel = run_bazel
+
+  def cquery(self,
+             args: List[str]) -> Tuple[bool, str, Tuple[ConfiguredTarget, ...]]:
+    """Calls cquery with the given arguments.
+
+    Args:
+      args: A list of cquery command-line arguments, one argument per entry.
+
+    Returns:
+      (success, stderr, cts), where success is True iff the query succeeded,
+      stderr contains the query's stderr (regardless of success value), and cts
+      is the configured targets found by the query if successful, empty
+      otherwise.
+    """
+    base_args = ["cquery", "--show_config_fragments=transitive"]
+    (returncode, stdout, stderr) = self.run_bazel(base_args + args)
+    if returncode != 0:
+      return (False, stderr, ())
+
+    cts = set()
+    for line in stdout:
+      ctinfo = _parse_cquery_result_line(line)
+      if ctinfo is not None:
+        cts.add(ctinfo)
+
+    return (True, stderr, tuple(cts))
+
+  def get_config(self, config_hash: str) -> Configuration:
+    """Calls "bazel config" with the given config hash.
+
+    Args:
+      config_hash: A config hash as reported by "bazel cquery".
+
+    Returns:
+      The matching configuration or None if no match is found.
+
+    Raises:
+      ValueError on any parsing problems.
+    """
+    if config_hash == "HOST":
+      return HostConfiguration()
+    elif config_hash == "null":
+      return NullConfiguration()
+
+    base_args = ["config", "--output=json"]
+    (returncode, stdout, stderr) = self.run_bazel(base_args + [config_hash])
+    if returncode != 0:
+      raise ValueError("Could not get config: " + stderr)
+    config_json = json.loads(os.linesep.join(stdout))
+    fragments = [
+        fragment["name"].split(".")[-1] for fragment in config_json["fragments"]
+    ]
+    options = frozendict({
+        entry["name"].split(".")[-1]: frozendict(entry["options"])
+        for entry in config_json["fragmentOptions"]
+    })
+    return Configuration(fragments, options)
+
+
+# TODO(gregce): have cquery --output=jsonproto support --show_config_fragments
+# so we can replace all this regex parsing with JSON reads.
+def _parse_cquery_result_line(line: str) -> ConfiguredTarget:
+  """Converts a cquery output line to a ConfiguredTarget.
+
+  Expected input is:
+
+      "<label> (<config hash>) [configFragment1, configFragment2, ...]"
+
+  or:
+      "<label> (null)"
+
+  Args:
+    line: The expected input.
+
+  Returns:
+    Corresponding ConfiguredTarget if the line matches else None.
+  """
+  tokens = line.split(maxsplit=2)
+  label = tokens[0]
+  if tokens[1][0] != "(" or tokens[1][-1] != ")":
+    raise ValueError(f"{tokens[1]} in {line} not surrounded by parentheses")
+  config_hash = tokens[1][1:-1]
+  if config_hash == "null":
+    fragments = ()
+  else:
+    if tokens[2][0] != "[" or tokens[2][-1] != "]":
+      raise ValueError(f"{tokens[2]} in {line} not surrounded by [] brackets")
+    # The fragments list looks like '[Fragment1, Fragment2, ...]'. Split the
+    # whole line on ' [' to get just this list, then remove the final ']', then
+    # split again on ', ' to convert it to a structured tuple.
+    fragments = tuple(line.split(" [")[1][0:-1].split(", "))
+  return ConfiguredTarget(
+      label=label,
+      config=None,  # Not yet available: we'll need `bazel config` to get this.
+      config_hash=config_hash,
+      transitive_fragments=fragments)
diff --git a/tools/ctexplain/bazel_api_test.py b/tools/ctexplain/bazel_api_test.py
@@ -0,0 +1,147 @@
+# Lint as: python3
+# Copyright 2020 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for bazel_api.py."""
+import os
+import unittest
+from src.test.py.bazel import test_base
+from tools.ctexplain.bazel_api import BazelApi
+from tools.ctexplain.types import HostConfiguration
+from tools.ctexplain.types import NullConfiguration
+
+
+class BazelApiTest(test_base.TestBase):
+
+  _bazel_api: BazelApi = None
+
+  def setUp(self):
+    test_base.TestBase.setUp(self)
+    self._bazel_api = BazelApi(self.RunBazel)
+    self.ScratchFile('WORKSPACE')
+    self.CreateWorkspaceWithDefaultRepos('repo/WORKSPACE')
+
+  def tearDown(self):
+    test_base.TestBase.tearDown(self)
+
+  def testBasicCquery(self):
+    self.ScratchFile('testapp/BUILD', [
+        'filegroup(name = "fg", srcs = ["a.file"])',
+    ])
+    res = self._bazel_api.cquery(['//testapp:all'])
+    success = res[0]
+    cts = res[2]
+    self.assertTrue(success)
+    self.assertEqual(len(cts), 1)
+    self.assertEqual(cts[0].label, '//testapp:fg')
+    self.assertIsNone(cts[0].config)
+    self.assertGreater(len(cts[0].config_hash), 10)
+    self.assertIn('PlatformConfiguration', cts[0].transitive_fragments)
+
+  def testFailedCquery(self):
+    self.ScratchFile('testapp/BUILD', [
+        'filegroup(name = "fg", srcs = ["a.file"])',
+    ])
+    (success, stderr, cts) = self._bazel_api.cquery(['//testapp:typo'])
+    self.assertFalse(success)
+    self.assertEqual(len(cts), 0)
+    self.assertIn("target 'typo' not declared in package 'testapp'",
+                  os.linesep.join(stderr))
+
+  def testTransitiveFragmentsAccuracy(self):
+    self.ScratchFile('testapp/BUILD', [
+        'filegroup(name = "fg", srcs = ["a.file"])',
+        'filegroup(name = "ccfg", srcs = [":ccbin"])',
+        'cc_binary(name = "ccbin", srcs = ["ccbin.cc"])'
+    ])
+    cts1 = self._bazel_api.cquery(['//testapp:fg'])[2]
+    self.assertNotIn('CppConfiguration', cts1[0].transitive_fragments)
+    cts2 = self._bazel_api.cquery(['//testapp:ccfg'])[2]
+    self.assertIn('CppConfiguration', cts2[0].transitive_fragments)
+
+  def testGetTargetConfig(self):
+    self.ScratchFile('testapp/BUILD', [
+        'filegroup(name = "fg", srcs = ["a.file"])',
+    ])
+    cts = self._bazel_api.cquery(['//testapp:fg'])[2]
+    config = self._bazel_api.get_config(cts[0].config_hash)
+    expected_fragments = ['PlatformConfiguration', 'JavaConfiguration']
+    for exp in expected_fragments:
+      self.assertIn(exp, config.fragments)
+    core_options = config.options['CoreOptions']
+    self.assertIsNotNone(core_options)
+    self.assertIn(('stamp', 'false'), core_options.items())
+
+  def testGetHostConfig(self):
+    self.ScratchFile('testapp/BUILD', [
+        'genrule(',
+        '    name = "g",',
+        '    srcs = [],',
+        '    cmd = "",',
+        '    outs = ["g.out"],',
+        '    tools = [":fg"])',
+        'filegroup(name = "fg", srcs = ["a.file"])',
+    ])
+    query = ['//testapp:fg', '--universe_scope=//testapp:g']
+    cts = self._bazel_api.cquery(query)[2]
+    config = self._bazel_api.get_config(cts[0].config_hash)
+    self.assertIsInstance(config, HostConfiguration)
+    # We don't currently populate or read a host configuration's details.
+    self.assertEqual(len(config.fragments), 0)
+    self.assertEqual(len(config.options), 0)
+
+  def testGetNullConfig(self):
+    self.ScratchFile('testapp/BUILD', [
+        'filegroup(name = "fg", srcs = ["a.file"])',
+    ])
+    cts = self._bazel_api.cquery(['//testapp:a.file'])[2]
+    config = self._bazel_api.get_config(cts[0].config_hash)
+    self.assertIsInstance(config, NullConfiguration)
+    # Null configurations have no information by definition.
+    self.assertEqual(len(config.fragments), 0)
+    self.assertEqual(len(config.options), 0)
+
+  def testConfigWithDefines(self):
+    self.ScratchFile('testapp/BUILD', [
+        'filegroup(name = "fg", srcs = ["a.file"])',
+    ])
+    cquery_args = ['//testapp:fg', '--define', 'a=b']
+    cts = self._bazel_api.cquery(cquery_args)[2]
+    config = self._bazel_api.get_config(cts[0].config_hash)
+    user_defined_options = config.options['user-defined']
+    self.assertIsNotNone(user_defined_options)
+    self.assertDictEqual(user_defined_options._dict, {'--define:a': 'b'})
+
+  def testConfigWithStarlarkFlags(self):
+    self.ScratchFile('testapp/defs.bzl', [
+        'def _flag_impl(settings, attr):', '  pass', 'string_flag = rule(',
+        '  implementation = _flag_impl,',
+        '  build_setting = config.string(flag = True)'
+        ')'
+    ])
+    self.ScratchFile('testapp/BUILD', [
+        'load(":defs.bzl", "string_flag")',
+        'string_flag(name = "my_flag", build_setting_default = "nada")',
+        'filegroup(name = "fg", srcs = ["a.file"])',
+    ])
+    cquery_args = ['//testapp:fg', '--//testapp:my_flag', 'algo']
+    cts = self._bazel_api.cquery(cquery_args)[2]
+    config = self._bazel_api.get_config(cts[0].config_hash)
+    user_defined_options = config.options['user-defined']
+    self.assertIsNotNone(user_defined_options)
+    self.assertDictEqual(user_defined_options._dict,
+                         {'//testapp:my_flag': 'algo'})
+
+
+if __name__ == '__main__':
+  unittest.main()