Skip to content

Commit

Permalink
First check-in of configuration overhead measurement tool
Browse files Browse the repository at this point in the history
i.e. first check-in of [Measuring Configuration Overhead](https://docs.google.com/document/d/10ZxO2wZdKJATnYBqAm22xT1k5r4Vp6QX96TkqSUIhs0/edit).

This just establishes supporting structure. The tool is not yet functional.

Specifically:
- `types.py`: defines data structures for "configuration" and "configured target"
- `bazel_api.py`: API to translate `bazel cquery` and `bazel config` calls into the above data structures
- `bazel_api_test.py`: tests
- `ctexplain.py`: stump of an entry point

The tests utilize an existing Python test framework for invoking Bazel (`//src/test/py/bazel:test_base`).

Work towards bazelbuild#10613

Closes bazelbuild#11511.

PiperOrigin-RevId: 315479921
Change-Id: I00898a4579e20483c8c4c3f5250afc22ee1e1695
  • Loading branch information
gregestren committed Jul 14, 2020
1 parent 3bb3162 commit 7095653
Show file tree
Hide file tree
Showing 8 changed files with 547 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/test/py/bazel/BUILD
Expand Up @@ -29,6 +29,7 @@ py_library(
"//third_party/def_parser:__pkg__",
"//tools/android:__pkg__",
"//tools/build_rules:__pkg__",
"//tools/ctexplain:__pkg__",
"//tools/python:__pkg__",
],
)
Expand Down
1 change: 1 addition & 0 deletions tools/BUILD
Expand Up @@ -15,6 +15,7 @@ filegroup(
"//tools/build_rules:srcs",
"//tools/config:srcs",
"//tools/coverage:srcs",
"//tools/ctexplain:srcs",
"//tools/distributions:srcs",
"//tools/java:srcs",
"//tools/jdk:srcs",
Expand Down
60 changes: 60 additions & 0 deletions tools/ctexplain/BUILD
@@ -0,0 +1,60 @@
# Description:
# Tool for measuring how configuration transitions affect build graph size.
load("//tools/python:private/defs.bzl", "py_binary", "py_library")

package(default_visibility = ["//visibility:public"])

licenses(["notice"]) # Apache 2.0

py_binary(
name = "ctexplain",
srcs = ["ctexplain.py"],
python_version = "PY3",
deps = [":bazel_api"],
)

py_library(
name = "bazel_api",
srcs = ["bazel_api.py"],
srcs_version = "PY3ONLY",
deps = [":base"],
)

py_test(
name = "bazel_api_test",
size = "small",
srcs = ["bazel_api_test.py"],
python_version = "PY3",
deps = [
":bazel_api",
"//src/test/py/bazel:test_base",
],
)

py_library(
name = "base",
srcs = [
"types.py",
],
srcs_version = "PY3ONLY",
deps = [
"//third_party/py/dataclasses", # Backport for Python < 3.7.
"//third_party/py/frozendict",
],
)

py_test(
name = "types_test",
size = "small",
srcs = ["types_test.py"],
python_version = "PY3",
deps = [
":base",
"//third_party/py/frozendict",
],
)

filegroup(
name = "srcs",
srcs = glob(["*"]),
)
158 changes: 158 additions & 0 deletions tools/ctexplain/bazel_api.py
@@ -0,0 +1,158 @@
# Lint as: python3
# Copyright 2020 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""API for Bazel calls for config, cquery, and required fragment info.
There's no Python Bazel API so we invoke Bazel as a subprocess.
"""
import json
import os
import subprocess
from typing import Callable
from typing import List
from typing import Tuple
# Do not edit this line. Copybara replaces it with PY2 migration helper.
from frozendict import frozendict
from tools.ctexplain.types import Configuration
from tools.ctexplain.types import ConfiguredTarget
from tools.ctexplain.types import HostConfiguration
from tools.ctexplain.types import NullConfiguration


def run_bazel_in_client(args: List[str]) -> Tuple[int, List[str], List[str]]:
"""Calls bazel within the current workspace.
For production use. Tests use an alternative invoker that goes through test
infrastructure.
Args:
args: the arguments to call Bazel with
Returns:
Tuple of (return code, stdout, stderr)
"""
result = subprocess.run(
["bazel"] + args,
cwd=os.getcwd(),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=True)
return (result.returncode, result.stdout.decode("utf-8").split(os.linesep),
result.stderr)


class BazelApi():
"""API that accepts injectable Bazel invocation logic."""

def __init__(self,
run_bazel: Callable[[List[str]],
Tuple[int, List[str],
List[str]]] = run_bazel_in_client):
self.run_bazel = run_bazel

def cquery(self,
args: List[str]) -> Tuple[bool, str, Tuple[ConfiguredTarget, ...]]:
"""Calls cquery with the given arguments.
Args:
args: A list of cquery command-line arguments, one argument per entry.
Returns:
(success, stderr, cts), where success is True iff the query succeeded,
stderr contains the query's stderr (regardless of success value), and cts
is the configured targets found by the query if successful, empty
otherwise.
"""
base_args = ["cquery", "--show_config_fragments=transitive"]
(returncode, stdout, stderr) = self.run_bazel(base_args + args)
if returncode != 0:
return (False, stderr, ())

cts = set()
for line in stdout:
ctinfo = _parse_cquery_result_line(line)
if ctinfo is not None:
cts.add(ctinfo)

return (True, stderr, tuple(cts))

def get_config(self, config_hash: str) -> Configuration:
"""Calls "bazel config" with the given config hash.
Args:
config_hash: A config hash as reported by "bazel cquery".
Returns:
The matching configuration or None if no match is found.
Raises:
ValueError on any parsing problems.
"""
if config_hash == "HOST":
return HostConfiguration()
elif config_hash == "null":
return NullConfiguration()

base_args = ["config", "--output=json"]
(returncode, stdout, stderr) = self.run_bazel(base_args + [config_hash])
if returncode != 0:
raise ValueError("Could not get config: " + stderr)
config_json = json.loads(os.linesep.join(stdout))
fragments = [
fragment["name"].split(".")[-1] for fragment in config_json["fragments"]
]
options = frozendict({
entry["name"].split(".")[-1]: frozendict(entry["options"])
for entry in config_json["fragmentOptions"]
})
return Configuration(fragments, options)


# TODO(gregce): have cquery --output=jsonproto support --show_config_fragments
# so we can replace all this regex parsing with JSON reads.
def _parse_cquery_result_line(line: str) -> ConfiguredTarget:
"""Converts a cquery output line to a ConfiguredTarget.
Expected input is:
"<label> (<config hash>) [configFragment1, configFragment2, ...]"
or:
"<label> (null)"
Args:
line: The expected input.
Returns:
Corresponding ConfiguredTarget if the line matches else None.
"""
tokens = line.split(maxsplit=2)
label = tokens[0]
if tokens[1][0] != "(" or tokens[1][-1] != ")":
raise ValueError(f"{tokens[1]} in {line} not surrounded by parentheses")
config_hash = tokens[1][1:-1]
if config_hash == "null":
fragments = ()
else:
if tokens[2][0] != "[" or tokens[2][-1] != "]":
raise ValueError(f"{tokens[2]} in {line} not surrounded by [] brackets")
# The fragments list looks like '[Fragment1, Fragment2, ...]'. Split the
# whole line on ' [' to get just this list, then remove the final ']', then
# split again on ', ' to convert it to a structured tuple.
fragments = tuple(line.split(" [")[1][0:-1].split(", "))
return ConfiguredTarget(
label=label,
config=None, # Not yet available: we'll need `bazel config` to get this.
config_hash=config_hash,
transitive_fragments=fragments)
147 changes: 147 additions & 0 deletions tools/ctexplain/bazel_api_test.py
@@ -0,0 +1,147 @@
# Lint as: python3
# Copyright 2020 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for bazel_api.py."""
import os
import unittest
from src.test.py.bazel import test_base
from tools.ctexplain.bazel_api import BazelApi
from tools.ctexplain.types import HostConfiguration
from tools.ctexplain.types import NullConfiguration


class BazelApiTest(test_base.TestBase):

_bazel_api: BazelApi = None

def setUp(self):
test_base.TestBase.setUp(self)
self._bazel_api = BazelApi(self.RunBazel)
self.ScratchFile('WORKSPACE')
self.CreateWorkspaceWithDefaultRepos('repo/WORKSPACE')

def tearDown(self):
test_base.TestBase.tearDown(self)

def testBasicCquery(self):
self.ScratchFile('testapp/BUILD', [
'filegroup(name = "fg", srcs = ["a.file"])',
])
res = self._bazel_api.cquery(['//testapp:all'])
success = res[0]
cts = res[2]
self.assertTrue(success)
self.assertEqual(len(cts), 1)
self.assertEqual(cts[0].label, '//testapp:fg')
self.assertIsNone(cts[0].config)
self.assertGreater(len(cts[0].config_hash), 10)
self.assertIn('PlatformConfiguration', cts[0].transitive_fragments)

def testFailedCquery(self):
self.ScratchFile('testapp/BUILD', [
'filegroup(name = "fg", srcs = ["a.file"])',
])
(success, stderr, cts) = self._bazel_api.cquery(['//testapp:typo'])
self.assertFalse(success)
self.assertEqual(len(cts), 0)
self.assertIn("target 'typo' not declared in package 'testapp'",
os.linesep.join(stderr))

def testTransitiveFragmentsAccuracy(self):
self.ScratchFile('testapp/BUILD', [
'filegroup(name = "fg", srcs = ["a.file"])',
'filegroup(name = "ccfg", srcs = [":ccbin"])',
'cc_binary(name = "ccbin", srcs = ["ccbin.cc"])'
])
cts1 = self._bazel_api.cquery(['//testapp:fg'])[2]
self.assertNotIn('CppConfiguration', cts1[0].transitive_fragments)
cts2 = self._bazel_api.cquery(['//testapp:ccfg'])[2]
self.assertIn('CppConfiguration', cts2[0].transitive_fragments)

def testGetTargetConfig(self):
self.ScratchFile('testapp/BUILD', [
'filegroup(name = "fg", srcs = ["a.file"])',
])
cts = self._bazel_api.cquery(['//testapp:fg'])[2]
config = self._bazel_api.get_config(cts[0].config_hash)
expected_fragments = ['PlatformConfiguration', 'JavaConfiguration']
for exp in expected_fragments:
self.assertIn(exp, config.fragments)
core_options = config.options['CoreOptions']
self.assertIsNotNone(core_options)
self.assertIn(('stamp', 'false'), core_options.items())

def testGetHostConfig(self):
self.ScratchFile('testapp/BUILD', [
'genrule(',
' name = "g",',
' srcs = [],',
' cmd = "",',
' outs = ["g.out"],',
' tools = [":fg"])',
'filegroup(name = "fg", srcs = ["a.file"])',
])
query = ['//testapp:fg', '--universe_scope=//testapp:g']
cts = self._bazel_api.cquery(query)[2]
config = self._bazel_api.get_config(cts[0].config_hash)
self.assertIsInstance(config, HostConfiguration)
# We don't currently populate or read a host configuration's details.
self.assertEqual(len(config.fragments), 0)
self.assertEqual(len(config.options), 0)

def testGetNullConfig(self):
self.ScratchFile('testapp/BUILD', [
'filegroup(name = "fg", srcs = ["a.file"])',
])
cts = self._bazel_api.cquery(['//testapp:a.file'])[2]
config = self._bazel_api.get_config(cts[0].config_hash)
self.assertIsInstance(config, NullConfiguration)
# Null configurations have no information by definition.
self.assertEqual(len(config.fragments), 0)
self.assertEqual(len(config.options), 0)

def testConfigWithDefines(self):
self.ScratchFile('testapp/BUILD', [
'filegroup(name = "fg", srcs = ["a.file"])',
])
cquery_args = ['//testapp:fg', '--define', 'a=b']
cts = self._bazel_api.cquery(cquery_args)[2]
config = self._bazel_api.get_config(cts[0].config_hash)
user_defined_options = config.options['user-defined']
self.assertIsNotNone(user_defined_options)
self.assertDictEqual(user_defined_options._dict, {'--define:a': 'b'})

def testConfigWithStarlarkFlags(self):
self.ScratchFile('testapp/defs.bzl', [
'def _flag_impl(settings, attr):', ' pass', 'string_flag = rule(',
' implementation = _flag_impl,',
' build_setting = config.string(flag = True)'
')'
])
self.ScratchFile('testapp/BUILD', [
'load(":defs.bzl", "string_flag")',
'string_flag(name = "my_flag", build_setting_default = "nada")',
'filegroup(name = "fg", srcs = ["a.file"])',
])
cquery_args = ['//testapp:fg', '--//testapp:my_flag', 'algo']
cts = self._bazel_api.cquery(cquery_args)[2]
config = self._bazel_api.get_config(cts[0].config_hash)
user_defined_options = config.options['user-defined']
self.assertIsNotNone(user_defined_options)
self.assertDictEqual(user_defined_options._dict,
{'//testapp:my_flag': 'algo'})


if __name__ == '__main__':
unittest.main()

0 comments on commit 7095653

Please sign in to comment.