Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add attributes to control header and external source inclusion #44

Merged
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
98 changes: 89 additions & 9 deletions refresh.template.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@


import concurrent.futures
import dataclasses
dave-hagedorn marked this conversation as resolved.
Show resolved Hide resolved
import functools
import itertools
import json
Expand All @@ -38,7 +39,7 @@
# Downside: We could miss headers conditionally included, e.g., by platform.
# Implementation: skip source files we've already seen in _get_files, shortcutting a bunch of slow preprocessor runs in _get_headers and output. We'd need a threadsafe set, or one set per thread, because header finding is already multithreaded for speed (same magnitudespeed win as single-threaded set).
# Anticipated speedup: ~2x (30s to 15s.)
# A better one would be to cache include information.
# A better one would be to cache include information.
# We could check to see if Bazel has cached .d files listing the dependencies and use those instead of preprocessing everything to regenerate them.
# If all the files listed in the .d file have older last-modified dates than the .d file itself, this should be safe. We'd want to check that bazel isn't 0-timestamping generated files, though.
# We could also write .d files when needed, saving work between runs.
Expand All @@ -50,7 +51,7 @@ def _print_header_finding_warning_once():
# Shared between platforms

# Just log once; subsequent messages wouldn't add anything.
if _print_header_finding_warning_once.has_logged: return
if _print_header_finding_warning_once.has_logged: return
_print_header_finding_warning_once.has_logged = True

print("""\033[0;33m>>> While locating the headers you use, we encountered a compiler warning or error.
Expand Down Expand Up @@ -194,6 +195,60 @@ def _get_headers_msvc(compile_args: typing.List[str], source_path: str):
return headers


def _is_relative_to(sub: pathlib.PurePath, parent: pathlib.PurePath):
"""Helper to determine if one path is relative to another"""
try:
# Note: Python 3.9 has .is_relative_to()
dave-hagedorn marked this conversation as resolved.
Show resolved Hide resolved
sub.relative_to(parent)
return True
except ValueError:
return False


def _is_file_in_external_workspace(file_str: str):
"""Returns True if the file is in the current workspace's external workspace.
Works with relative and absolute paths
"""

workspace = pathlib.PurePath(os.environ["BUILD_WORKSPACE_DIRECTORY"])
external_rel = pathlib.PurePath("external")
external_abs = workspace / external_rel
header = pathlib.PurePath(file_str)


dave-hagedorn marked this conversation as resolved.
Show resolved Hide resolved
# Relative path to a file in external workspace
# "external/..."
if _is_relative_to(header, external_rel): return True
# Absolte path to a file in external workspace
dave-hagedorn marked this conversation as resolved.
Show resolved Hide resolved
# "/some/workspace/external/..."
if _is_relative_to(header, external_abs): return True

dave-hagedorn marked this conversation as resolved.
Show resolved Hide resolved
return False


def _is_file_in_workspace(file_str: str):
"""Returns True if the file is in the current workspace.
If the file is in an workspace this return False
Works with relative and absolute paths
"""

workspace = pathlib.PurePath(os.environ["BUILD_WORKSPACE_DIRECTORY"])
header = pathlib.PurePath(file_str)

# Anything in the workspace/external/... symlink is technically in an external workspace
if _is_file_in_external_workspace(file_str): return False
# Assume anything relative is in the workspace
elif not header.is_absolute(): return True
# Otherwise check the path is relative to the workspace dir
elif _is_relative_to(header, workspace): return True

return False

def _is_file_outside_workspace(file_str: str):
"""Convenience wrapper - returns True if the file is not in any workspace"""
return not _is_file_in_workspace(file_str) and not _is_file_in_external_workspace(file_str)


def _get_headers(compile_args: typing.List[str], source_path: str):
"""Gets the headers used by a particular compile command.

Expand All @@ -205,7 +260,7 @@ def _get_headers(compile_args: typing.List[str], source_path: str):
# As an alternative approach, you might consider trying to get the headers by inspecing the Middlemen actions in the aquery output, but I don't see a way to get just the ones actually #included--or an easy way to get the system headers--without invoking the preprocessor's header search logic.
# For more on this, see https://github.com/hedronvision/bazel-compile-commands-extractor/issues/5#issuecomment-1031148373

# Rather than print a scary compiler error, warn gently
# Rather than print a scary compiler error, warn gently
if not os.path.isfile(source_path):
if not _get_headers.has_logged_missing_file_error: # Just log once; subsequent messages wouldn't add anything.
_get_headers.has_logged_missing_file_error = True
Expand All @@ -216,14 +271,29 @@ def _get_headers(compile_args: typing.List[str], source_path: str):
That way everything is generated, browsable and indexed for autocomplete.
However, if you have *already* built your code, and generated the missing file...
Please make sure you're supplying this tool with the same flags you use to build.
You can either use a refresh_compile_commands rule or the special -- syntax. Please see the README.
You can either use a refresh_compile_commands rule or the special -- syntax. Please see the README.
[Supplying flags normally won't work. That just causes this tool to be built with those flags.]
Continuing gracefully...\033[0m""", file=sys.stderr)
return set()


if compile_args[0].endswith('cl.exe'): # cl.exe and also clang-cl.exe
return _get_headers_msvc(compile_args, source_path)
return _get_headers_gcc(compile_args, source_path)
headers = _get_headers_msvc(compile_args, source_path)
else:
headers = _get_headers_gcc(compile_args, source_path)

if {exclude_headers} == "all":
headers = set()
elif {exclude_headers} == "external":
headers = {header for header in headers if not _is_file_in_external_workspace(header)}
elif {exclude_headers} == "system":
headers = {header for header in headers if not _is_file_outside_workspace(header)}
elif {exclude_headers} == "external_and_system":
headers = {header for header in headers if not _is_file_outside_workspace(header) and not _is_file_in_external_workspace(header)}

return headers


dave-hagedorn marked this conversation as resolved.
Show resolved Hide resolved
_get_headers.has_logged_missing_file_error = False


Expand Down Expand Up @@ -364,12 +434,21 @@ def _convert_compile_commands(aquery_output):
Crucially, this de-Bazels the compile commands it takes as input, leaving something clangd can understand. The result is a command that could be run from the workspace root directly, with no bazel-specific environment variables, etc.
"""

if {exclude_external_sources}:
targets = {target.id : target.label for target in aquery_output.targets}
# For each action, find its target
# If the target starts with @, it's an action for a target in an external workspace
# If the target can't be found (not sure if this can happen), just keep the action
actions = (action for action in aquery_output.actions if not targets.get(action.targetId, "").startswith("@"))
dave-hagedorn marked this conversation as resolved.
Show resolved Hide resolved
else:
actions = aquery_output.actions

# Process each action from Bazelisms -> file paths and their clang commands
# Threads instead of processes because most of the execution time is farmed out to subprocesses. No need to sidestep the GIL. Might change after https://github.com/clangd/clangd/issues/123 resolved
with concurrent.futures.ThreadPoolExecutor(
max_workers=min(32, (os.cpu_count() or 1) + 4) # Backport. Default in MIN_PY=3.8. See "using very large resources implicitly on many-core machines" in https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
) as threadpool:
outputs = threadpool.map(_get_cpp_command_for_files, aquery_output.actions)
outputs = threadpool.map(_get_cpp_command_for_files, actions)

# Yield as compile_commands.json entries
header_files_already_written = set()
Expand Down Expand Up @@ -476,7 +555,7 @@ def _ensure_external_workspaces_link_exists():
except OSError:
print("\033[0;31m>>> //external already exists, but it isn't a symlink or Windows junction. //external is reserved by Bazel and needed for this tool. Please rename or delete your existing //external and rerun. More details in the README if you want them.\033[0m", file=sys.stderr) # Don't auto delete in case the user has something important there.
exit(1)

# Normalize the path for matching
# First, workaround a gross case where Windows readlink returns extended path, starting with \\?\, causing the match to fail
if is_windows and current_dest.startswith('\\\\?\\'):
Expand Down Expand Up @@ -516,7 +595,7 @@ def _ensure_gitignore_entries():
('/.cache/', "# Directory where clangd puts its indexing work"),
]

# Separate operations because Python doesn't have a built in mode for read/write, don't truncate, create, allow seek to beginning of file.
# Separate operations because Python doesn't have a built in mode for read/write, don't truncate, create, allow seek to beginning of file.
open('.gitignore', 'a').close() # Ensure .gitignore exists
with open('.gitignore') as gitignore:
lines = [l.rstrip() for l in gitignore]
Expand Down Expand Up @@ -549,6 +628,7 @@ def _ensure_gitignore_entries():
{target_flag_pairs}
# End: template filled by Bazel
]

compile_command_entries = []
for (target, flags) in target_flag_pairs:
compile_command_entries.extend(_get_commands(target, flags))
Expand Down
60 changes: 40 additions & 20 deletions refresh_compile_commands.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -23,61 +23,81 @@ refresh_compile_commands(
# Or a dict of targets and any flags required to build:
# (No need to add flags already in .bazelrc. They're automatically picked up.)
# targets = {
# "//:my_output_1": "--important_flag1 --important_flag2=true",
# "//:my_output_1": "--important_flag1 --important_flag2=true",
# "//:my_output_2": "",
# },
# If you don't specify a target, that's fine (if it works for you); compile_commands.json will default to containing commands used in building all possible targets. But in that case, just bazel run @hedron_compile_commands//:refresh_all
# Wildcard target patterns (..., *, :all) patterns *are* allowed, like in bazel build
# For more, see https://docs.bazel.build/versions/main/guide.html#specifying-targets-to-build

# Optional attributes :

# To omit entries for sources from external workspaces (dependencies)
# This defaults to False
exclude_external_sources = False

# To omit entries for some or all headers
# Defaults to None (always include all headers)
# Some tools such as ccls work better without header entries, whereas others such as clangd require these (see https://github.com/clangd/clangd/issues/123 for why)
# all - will omit entires for any and all header files
# system - will omit system headers (/usr/include, etc.), but keep entries for headers in your workspace, and from any external workspaces (dependencies)
# external - will omit any external headers, but keep system and your workspace's
# external_and_system - will keep just your workspace's headers
#
# Note that any generated files from your workspace in bazel-out, etc. will still be included - these are considered part of your workspace's headers
exclude_external_workspaces = "all" | "external" | "system" | "external_and_system"
dave-hagedorn marked this conversation as resolved.
Show resolved Hide resolved
```
"""


dave-hagedorn marked this conversation as resolved.
Show resolved Hide resolved
########################################
# Implementation

load("@bazel_tools//tools/cpp:toolchain_utils.bzl", "find_cpp_toolchain")


def refresh_compile_commands(name, targets = None,
**kwargs): # For the other common attributes. Tags, compatible_with, etc. https://docs.bazel.build/versions/main/be/common-definitions.html#common-attributes.

def refresh_compile_commands(
name,
targets = None,
exclude_headers = None,
exclude_external_sources = False,
**kwargs): # For the other common attributes. Tags, compatible_with, etc. https://docs.bazel.build/versions/main/be/common-definitions.html#common-attributes.
# Convert the various, acceptable target shorthands into the dictionary format
if not targets: # Default to all targets in main workspace
if not targets: # Default to all targets in main workspace
targets = {"@//...": ""}
elif type(targets) == "list": # Allow specifying a list of targets w/o arguments
elif type(targets) == "list": # Allow specifying a list of targets w/o arguments
targets = {target: "" for target in targets}
elif type(targets) != "dict": # Assume they've supplied a single string/label and wrap it
elif type(targets) != "dict": # Assume they've supplied a single string/label and wrap it
targets = {targets: ""}

# Generate runnable python script from template
script_name = name + ".py"
_expand_template(name = script_name, labels_to_flags = targets, **kwargs)
_expand_template(name = script_name, labels_to_flags = targets, exclude_headers = exclude_headers, exclude_external_sources = exclude_external_sources, **kwargs)
native.py_binary(name = name, srcs = [script_name], **kwargs)


def _expand_template_impl(ctx):
"""Inject targets of interest into refresh.template.py, and set it up to be run."""
script = ctx.actions.declare_file(ctx.attr.name)
ctx.actions.expand_template(
output = script,
is_executable = True,
template = ctx.file._script_template,
substitutions = { # Note, don't delete whitespace. Correctly doing multiline indenting.
" {target_flag_pairs}":
"\n".join([" {},".format(pair) for pair in ctx.attr.labels_to_flags.items()]),
" {windows_default_include_paths}":
"\n".join([" %r," % path for path in find_cpp_toolchain(ctx).built_in_include_directories]), # find_cpp_toolchain is from https://docs.bazel.build/versions/main/integrating-with-rules-cc.html
}
substitutions = {
# Note, don't delete whitespace. Correctly doing multiline indenting.
" {target_flag_pairs}": "\n".join([" {},".format(pair) for pair in ctx.attr.labels_to_flags.items()]),
" {windows_default_include_paths}": "\n".join([" %r," % path for path in find_cpp_toolchain(ctx).built_in_include_directories]), # find_cpp_toolchain is from https://docs.bazel.build/versions/main/integrating-with-rules-cc.html
"{exclude_headers}": '"' + str(ctx.attr.exclude_headers) + '"',
"{exclude_external_sources}": str(ctx.attr.exclude_external_sources),
},
)
return DefaultInfo(files = depset([script]))

_expand_template = rule(
attrs = {
"labels_to_flags": attr.string_dict(mandatory = True), # string keys instead of label_keyed because Bazel doesn't support parsing wildcard target patterns (..., *, :all) in BUILD attributes.
"labels_to_flags": attr.string_dict(mandatory = True), # string keys instead of label_keyed because Bazel doesn't support parsing wildcard target patterns (..., *, :all) in BUILD attributes.
"exclude_external_sources": attr.bool(default = False),
"exclude_headers": attr.string(values = ["all", "system", "external", "external_and_system"]),
"_script_template": attr.label(allow_single_file = True, default = "refresh.template.py"),
"_cc_toolchain": attr.label(default = "@bazel_tools//tools/cpp:current_cc_toolchain"), # For Windows INCLUDE. If this were eliminated, for example by the resolution of https://github.com/clangd/clangd/issues/123, we'd be able to just use a macro and skylib's expand_template rule: https://github.com/bazelbuild/bazel-skylib/pull/330
"_cc_toolchain": attr.label(default = "@bazel_tools//tools/cpp:current_cc_toolchain"), # For Windows INCLUDE. If this were eliminated, for example by the resolution of https://github.com/clangd/clangd/issues/123, we'd be able to just use a macro and skylib's expand_template rule: https://github.com/bazelbuild/bazel-skylib/pull/330
},
toolchains = ["@bazel_tools//tools/cpp:toolchain_type"], # Needed for find_cpp_toolchain with --incompatible_enable_cc_toolchain_resolution
toolchains = ["@bazel_tools//tools/cpp:toolchain_type"], # Needed for find_cpp_toolchain with --incompatible_enable_cc_toolchain_resolution
dave-hagedorn marked this conversation as resolved.
Show resolved Hide resolved
implementation = _expand_template_impl,
)