Skip to content

Commit

Permalink
Switch back off rules_python.
Browse files Browse the repository at this point in the history
Mostly reverts 0e5b1aa

Tracking restoration at #168

Please see
- #163
- bazelbuild/rules_python#1732
- #165
- (rules_python issue to come)
- #166
- bazelbuild/rules_python#1169
  • Loading branch information
cpsauer committed Feb 1, 2024
1 parent 40a51d6 commit 0b821b7
Show file tree
Hide file tree
Showing 11 changed files with 143 additions and 103 deletions.
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ repos:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-ast
exclude: ^check_python_version\.template\.py$ # Template for bazel creates syntax error
- id: debug-statements
exclude: ^check_python_version\.template\.py$ # Template for bazel creates syntax error
- id: mixed-line-ending
- id: check-case-conflict
- id: fix-byte-order-marker
Expand Down
12 changes: 1 addition & 11 deletions BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@ filegroup(
visibility = ["//visibility:public"],
srcs = glob(["**/*.bzl"]) + [
"@bazel_tools//tools:bzl_srcs",
"@hedron_compile_commands_pip//:requirements.bzl",
"@python_3_11//:defs.bzl",
"@rules_python//:bzl",
],
)

Expand All @@ -28,17 +25,10 @@ filegroup(
# Implementation:
# If you are looking into the implementation, start with the overview in ImplementationReadme.md.

exports_files(["refresh.template.py"]) # For implicit use by the refresh_compile_commands macro, not direct use.
exports_files(["refresh.template.py", "check_python_version.template.py"]) # For implicit use by the refresh_compile_commands macro, not direct use.

cc_binary(
name = "print_args",
srcs = ["print_args.cpp"],
visibility = ["//visibility:public"],
)

# Quick test for https://github.com/bazelbuild/rules_python/issues/1732#issuecomment-1918268343. Delete when resolved.
load("@python_3_11//:defs.bzl", "py_binary")
py_binary(
name = "nvcc_clang_diff",
srcs = ["nvcc_clang_diff.py"],
)
16 changes: 0 additions & 16 deletions MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,3 @@ p = use_extension("//:workspace_setup.bzl", "hedron_compile_commands_extension")
pt = use_extension("//:workspace_setup_transitive.bzl", "hedron_compile_commands_extension")
ptt = use_extension("//:workspace_setup_transitive_transitive.bzl", "hedron_compile_commands_extension")
pttt = use_extension("//:workspace_setup_transitive_transitive_transitive.bzl", "hedron_compile_commands_extension")


bazel_dep(name = "rules_python", version = "0.29.0")
python = use_extension("@rules_python//python/extensions:python.bzl", "python")
python.toolchain(
python_version = "3.11",
)
use_repo(python, "python_3_11")
pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip")
pip.parse(
hub_name = "hedron_compile_commands_pip",
# Available versions are listed in @rules_python//python:versions.bzl.
python_version = "3.11",
requirements_lock = "//:requirements.txt",
)
use_repo(pip, "hedron_compile_commands_pip")
15 changes: 15 additions & 0 deletions check_python_version.template.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""
Print a nice error message if the user is running too old of a version of python.
Why not just put this block at the top of refresh.template.py?
Python versions introduce constructs that don't parse in older versions, leading to an error before the version check is executed, since python parses files eagerly.
For examples of this issue, see https://github.com/hedronvision/bazel-compile-commands-extractor/issues/119 and https://github.com/hedronvision/bazel-compile-commands-extractor/issues/95
This seems common enough that hopefully bazel will support it someday. We've filed a request: https://github.com/bazelbuild/bazel/issues/18389
"""

import sys
if sys.version_info < (3,6):
sys.exit("\n\033[31mFATAL ERROR:\033[0m Python 3.6 or later is required. Please update!")

# Only import -> parse once we're sure we have the required python version
import {to_run}
{to_run}.main()
104 changes: 68 additions & 36 deletions refresh.template.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,20 @@
- Crucially, this output is de-Bazeled; The result is a command that could be run from the workspace root directly, with no Bazel-specific requirements, environment variables, etc.
"""

# This file requires python 3.6, which is enforced by check_python_version.template.py
# 3.6 backwards compatibility required by @zhanyong-wan in https://github.com/hedronvision/bazel-compile-commands-extractor/issues/111.
# 3.7 backwards compatibility required by @lummax in https://github.com/hedronvision/bazel-compile-commands-extractor/pull/27.
# ^ Try to contact before upgrading.
# When adding things could be cleaner if we had a higher minimum version, please add a comment with MIN_PY=3.<v>.
# Similarly, when upgrading, please search for that MIN_PY= tag.


import concurrent.futures
import enum
import functools
import functools # MIN_PY=3.9: Replace `functools.lru_cache(maxsize=None)` with `functools.cache`.
import itertools
import json
import locale
import orjson # orjson is much faster than the standard library's json module (1.9 seconds vs 6.6 seconds for a ~140 MB file). See https://github.com/hedronvision/bazel-compile-commands-extractor/pull/118
import os
import pathlib
import re
Expand All @@ -28,7 +34,7 @@
import tempfile
import time
import types
import typing
import typing # MIN_PY=3.9: Switch e.g. typing.List[str] -> List[str]


@enum.unique
Expand Down Expand Up @@ -89,7 +95,7 @@ def _print_header_finding_warning_once():
_print_header_finding_warning_once.has_logged = False


@functools.lru_cache
@functools.lru_cache(maxsize=None)
def _get_bazel_version():
"""Gets the Bazel version as a tuple of (major, minor, patch).
Expand All @@ -99,7 +105,9 @@ def _get_bazel_version():
"""
bazel_version_process = subprocess.run(
['bazel', 'version'],
capture_output=True,
# MIN_PY=3.7: Replace PIPEs with capture_output.
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
encoding=locale.getpreferredencoding(),
check=True, # Should always succeed.
)
Expand All @@ -118,12 +126,14 @@ def _get_bazel_version():
return tuple(int(match.group(i)) for i in range(1, 4))


@functools.lru_cache
@functools.lru_cache(maxsize=None)
def _get_bazel_cached_action_keys():
"""Gets the set of actionKeys cached in bazel-out."""
action_cache_process = subprocess.run(
['bazel', 'dump', '--action_cache'],
capture_output=True,
# MIN_PY=3.7: Replace PIPEs with capture_output.
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
encoding=locale.getpreferredencoding(),
check=True, # Should always succeed.
)
Expand Down Expand Up @@ -177,7 +187,7 @@ def _parse_headers_from_makefile_deps(d_file_content: str, source_path_for_sanit
return set(headers)


@functools.lru_cache
@functools.lru_cache(maxsize=None)
def _get_cached_modified_time(path: str):
"""Returns 0 if the file doesn't exist.
Expand Down Expand Up @@ -222,7 +232,7 @@ def _is_nvcc(path: str):
return os.path.basename(path).startswith('nvcc')


def _get_headers_gcc(compile_args: list[str], source_path: str, action_key: str):
def _get_headers_gcc(compile_args: typing.List[str], source_path: str, action_key: str):
"""Gets the headers used by a particular compile command that uses gcc arguments formatting (including clang.)
Relatively slow. Requires running the C preprocessor if we can't hit Bazel's cache.
Expand Down Expand Up @@ -277,7 +287,9 @@ def _get_headers_gcc(compile_args: list[str], source_path: str, action_key: str)

header_search_process = _subprocess_run_spilling_over_to_param_file_if_needed( # Note: gcc/clang can be run from Windows, too.
header_cmd,
capture_output=True,
# MIN_PY=3.7: Replace PIPEs with capture_output.
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
encoding=locale.getpreferredencoding(),
check=False, # We explicitly ignore errors and carry on.
)
Expand Down Expand Up @@ -368,7 +380,7 @@ def windows_list2cmdline(seq):
return ''.join(result)


def _subprocess_run_spilling_over_to_param_file_if_needed(command: list[str], **kwargs):
def _subprocess_run_spilling_over_to_param_file_if_needed(command: typing.List[str], **kwargs):
"""Same as subprocess.run, but it handles the case where the command line length is exceeded on Windows and we need a param file."""

# On non-Windows, we have to run directly via a special case.
Expand Down Expand Up @@ -396,7 +408,7 @@ def _subprocess_run_spilling_over_to_param_file_if_needed(command: list[str], **
raise


def _get_headers_msvc(compile_args: list[str], source_path: str):
def _get_headers_msvc(compile_args: typing.List[str], source_path: str):
"""Gets the headers used by a particular compile command that uses msvc argument formatting (including clang-cl.)
Relatively slow. Requires running the C preprocessor.
Expand Down Expand Up @@ -471,13 +483,23 @@ def _get_headers_msvc(compile_args: list[str], source_path: str):
return headers, should_cache


def _is_relative_to(sub: pathlib.PurePath, parent: pathlib.PurePath):
"""Determine if one path is relative to another."""
# MIN_PY=3.9: Eliminate helper in favor of `PurePath.is_relative_to()`.
try:
sub.relative_to(parent)
except ValueError:
return False
return True


def _file_is_in_main_workspace_and_not_external(file_str: str):
file_path = pathlib.PurePath(file_str)
if file_path.is_absolute():
workspace_absolute = pathlib.PurePath(os.environ["BUILD_WORKSPACE_DIRECTORY"])
if not file_path.is_relative_to(workspace_absolute):
if not _is_relative_to(file_path, workspace_absolute):
return False
file_path = file_path.relative_to(workspace_absolute)
file_path = _is_relative_to(file_path, workspace_absolute)
# You can now assume that the path is relative to the workspace.
# [Already assuming that relative paths are relative to the main workspace.]

Expand Down Expand Up @@ -540,7 +562,7 @@ def _get_headers(compile_action, source_path: str):
cache_last_modified = os.path.getmtime(cache_file_path) # Do before opening just as a basic hedge against concurrent write, even though we won't handle the concurrent delete case perfectly.
try:
with open(cache_file_path) as cache_file:
action_key, cached_headers = orjson.loads(cache_file.read())
action_key, cached_headers = json.load(cache_file)
except json.JSONDecodeError:
# Corrupted cache, which can happen if, for example, the user kills the program, since writes aren't atomic.
# But if it is the result of a bug, we want to print it before it's overwritten, so it can be reported
Expand Down Expand Up @@ -569,11 +591,13 @@ def _get_headers(compile_action, source_path: str):
# Cache for future use
if output_file and should_cache:
os.makedirs(os.path.dirname(cache_file_path), exist_ok=True)
with open(cache_file_path, 'wb') as cache_file:
cache_file.write(orjson.dumps(
with open(cache_file_path, 'w') as cache_file:
json.dump(
(compile_action.actionKey, list(headers)),
option=orjson.OPT_INDENT_2,
))
cache_file,
indent=2,
check_circular=False,
)
elif not headers and cached_headers: # If we failed to get headers, we'll fall back on a stale cache.
headers = set(cached_headers)

Expand Down Expand Up @@ -689,7 +713,7 @@ def _get_files(compile_action):
_get_files.extensions_to_language_args = {ext : flag for exts, flag in _get_files.extensions_to_language_args.items() for ext in exts} # Flatten map for easier use


@functools.lru_cache
@functools.lru_cache(maxsize=None)
def _get_apple_SDKROOT(SDK_name: str):
"""Get path to xcode-select'd root for the given OS."""
SDKROOT_maybe_versioned = subprocess.check_output(
Expand All @@ -707,7 +731,7 @@ def _get_apple_SDKROOT(SDK_name: str):
# Traditionally stored in SDKROOT environment variable, but not provided by Bazel. See https://github.com/bazelbuild/bazel/issues/12852


def _get_apple_platform(compile_args: list[str]):
def _get_apple_platform(compile_args: typing.List[str]):
"""Figure out which Apple platform a command is for.
Is the name used by Xcode in the SDK files, not the marketing name.
Expand All @@ -721,15 +745,15 @@ def _get_apple_platform(compile_args: list[str]):
return None


@functools.lru_cache
@functools.lru_cache(maxsize=None)
def _get_apple_DEVELOPER_DIR():
"""Get path to xcode-select'd developer directory."""
return subprocess.check_output(('xcode-select', '--print-path'), encoding=locale.getpreferredencoding()).rstrip()
# Unless xcode-select has been invoked (like for a beta) we'd expect, e.g., '/Applications/Xcode.app/Contents/Developer' or '/Library/Developer/CommandLineTools'.
# Traditionally stored in DEVELOPER_DIR environment variable, but not provided by Bazel. See https://github.com/bazelbuild/bazel/issues/12852


def _apple_platform_patch(compile_args: list[str]):
def _apple_platform_patch(compile_args: typing.List[str]):
"""De-Bazel the command into something clangd can parse.
This function has fixes specific to Apple platforms, but you should call it on all platforms. It'll determine whether the fixes should be applied or not.
Expand Down Expand Up @@ -803,7 +827,9 @@ def get_workspace_root(path_from_execroot: pathlib.PurePath):
# On Windows, it fails to spawn the subprocess when the path uses forward slashes as a separator.
# Here, we convert emcc driver path to use the native path separator.
[str(emcc_driver)] + compile_action.arguments[1:],
capture_output=True,
# MIN_PY=3.7: Replace PIPEs with capture_output.
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
env=environment,
encoding=locale.getpreferredencoding(),
check=False, # We explicitly ignore errors and carry on.
Expand All @@ -817,7 +843,7 @@ def get_workspace_root(path_from_execroot: pathlib.PurePath):
end_args_idx = lines.index(END_ARGS_MARKER, begin_args_idx + 1)
args = lines[begin_args_idx + 1:end_args_idx]
clang_driver = pathlib.PurePath(args[0])
if clang_driver.is_relative_to(workspace_absolute):
if _is_relative_to(clang_driver, workspace_absolute):
args[0] = clang_driver.relative_to(workspace_absolute).as_posix()
return args

Expand All @@ -826,7 +852,7 @@ def get_workspace_root(path_from_execroot: pathlib.PurePath):
END_ARGS_MARKER = '===HEDRON_COMPILE_COMMANDS_END_ARGS==='


def _all_platform_patch(compile_args: list[str]):
def _all_platform_patch(compile_args: typing.List[str]):
"""Apply de-Bazeling fixes to the compile command that are shared across target platforms."""
# clangd writes module cache files to the wrong place
# Without this fix, you get tons of module caches dumped into the VSCode root folder.
Expand Down Expand Up @@ -865,7 +891,7 @@ def _all_platform_patch(compile_args: list[str]):

# Discover compilers that are actually symlinks to ccache--and replace them with the underlying compiler
if os.path.islink(compile_args[0]):
compiler_path = os.readlink(compile_args[0])
compiler_path = os.readlink(compile_args[0]) # MIN_PY=3.9 Switch to pathlib path.readlink()
if os.path.basename(compiler_path) == "ccache":
compiler = os.path.basename(compile_args[0])
real_compiler_path = shutil.which(compiler)
Expand All @@ -877,7 +903,7 @@ def _all_platform_patch(compile_args: list[str]):
return compile_args


def _nvcc_patch(compile_args: list[str]) -> list[str]:
def _nvcc_patch(compile_args: typing.List[str]) -> typing.List[str]:
"""Apply fixes to args to nvcc.
Basically remove everything that's an nvcc arg that is not also a clang arg, converting what we can.
Expand Down Expand Up @@ -1120,7 +1146,9 @@ def _convert_compile_commands(aquery_output):

# Process each action from Bazelisms -> file paths and their clang commands
# Threads instead of processes because most of the execution time is farmed out to subprocesses. No need to sidestep the GIL. Might change after https://github.com/clangd/clangd/issues/123 resolved
with concurrent.futures.ThreadPoolExecutor() as threadpool:
with concurrent.futures.ThreadPoolExecutor(
max_workers=min(32, (os.cpu_count() or 1) + 4) # Backport. Default in MIN_PY=3.8. See "using very large resources implicitly on many-core machines" in https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
) as threadpool:
outputs = threadpool.map(_get_cpp_command_for_files, aquery_output.actions)

# Yield as compile_commands.json entries
Expand Down Expand Up @@ -1210,7 +1238,9 @@ def _get_commands(target: str, flags: str):

aquery_process = subprocess.run(
aquery_args,
capture_output=True,
# MIN_PY=3.7: Replace PIPEs with capture_output.
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
encoding=locale.getpreferredencoding(),
check=False, # We explicitly ignore errors from `bazel aquery` and carry on.
)
Expand Down Expand Up @@ -1273,7 +1303,7 @@ def _ensure_external_workspaces_link_exists():
# This seemed to be the cleanest way to detect both.
# Note that os.path.islink doesn't detect junctions.
try:
current_dest = source.readlink()
current_dest = source.readlink() # MIN_PY=3.9 source.readlink()
except OSError:
log_error(f">>> //external already exists, but it isn't a {'junction' if is_windows else 'symlink'}. //external is reserved by Bazel and needed for this tool. Please rename or delete your existing //external and rerun. More details in the README if you want them.") # Don't auto delete in case the user has something important there.
sys.exit(1)
Expand Down Expand Up @@ -1368,7 +1398,7 @@ def _ensure_cwd_is_workspace_root():
os.chdir(workspace_root)


if __name__ == '__main__':
def main():
_ensure_cwd_is_workspace_root()
_ensure_gitignore_entries_exist()
_ensure_external_workspaces_link_exists()
Expand All @@ -1389,8 +1419,10 @@ def _ensure_cwd_is_workspace_root():
sys.exit(1)

# Chain output into compile_commands.json
with open('compile_commands.json', 'wb') as output_file:
output_file.write(orjson.dumps(
with open('compile_commands.json', 'w') as output_file:
json.dump(
compile_command_entries,
option=orjson.OPT_INDENT_2,
))
output_file,
indent=2, # Yay, human readability!
check_circular=False # For speed.
)

0 comments on commit 0b821b7

Please sign in to comment.