From 1d18f5faae058879b1a1ff40678c950b94fe8dbf Mon Sep 17 00:00:00 2001 From: Remco Vermeulen Date: Wed, 5 Jul 2023 23:36:08 -0700 Subject: [PATCH 1/4] Address incorrect inclusion of dependencies to dependency graph --- codeql_bundle/helpers/bundle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeql_bundle/helpers/bundle.py b/codeql_bundle/helpers/bundle.py index a31b360..68d54e4 100644 --- a/codeql_bundle/helpers/bundle.py +++ b/codeql_bundle/helpers/bundle.py @@ -229,7 +229,7 @@ def add_to_graph(pack: ResolvedCodeQLPack, processed_packs: set[ResolvedCodeQLPa logger.debug(f"Adding stdlib dependency {std_lib_dep.config.name}@{str(std_lib_dep.config.version)} to {pack.config.name}@{str(pack.config.version)}") pack.dependencies.append(std_lib_dep) logger.debug(f"Adding pack {pack.config.name}@{str(pack.config.version)} to dependency graph") - pack_sorter.add(pack, *pack.dependencies) + pack_sorter.add(pack) for dep in pack.dependencies: if dep not in processed_packs: add_to_graph(dep, processed_packs, std_lib_deps) From 6a4f7c6874c2e076d39766c9da16e0acd9ab30d5 Mon Sep 17 00:00:00 2001 From: Remco Vermeulen Date: Wed, 5 Jul 2023 23:37:52 -0700 Subject: [PATCH 2/4] Address superfluous packs due to scope and codeql dir collision --- codeql_bundle/helpers/bundle.py | 1 + 1 file changed, 1 insertion(+) diff --git a/codeql_bundle/helpers/bundle.py b/codeql_bundle/helpers/bundle.py index 68d54e4..a8a5595 100644 --- a/codeql_bundle/helpers/bundle.py +++ b/codeql_bundle/helpers/bundle.py @@ -277,6 +277,7 @@ def bundle_customization_pack(customization_pack: ResolvedCodeQLPack): def copy_pack(pack: ResolvedCodeQLPack) -> ResolvedCodeQLPack: pack_copy_dir = ( Path(self.tmp_dir.name) + / "temp" # Add a temp path segment because the standard library packs have scope 'codeql' that collides with the 'codeql' directory in the bundle that is extracted to the temporary directory. / cast(str, pack.config.get_scope()) / pack.config.get_pack_name() / str(pack.config.version) From a5a722c082882b35794174f25e70e205467af17b Mon Sep 17 00:00:00 2001 From: Remco Vermeulen Date: Thu, 6 Jul 2023 16:22:17 -0700 Subject: [PATCH 3/4] Bump dependency version to test with newer bundle --- tests/workspace/cpp/aa/qlpack.yml | 2 +- tests/workspace/cpp/foo-customizations/qlpack.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/workspace/cpp/aa/qlpack.yml b/tests/workspace/cpp/aa/qlpack.yml index 05668e3..62dcead 100644 --- a/tests/workspace/cpp/aa/qlpack.yml +++ b/tests/workspace/cpp/aa/qlpack.yml @@ -4,4 +4,4 @@ warnOnImplicitThis: false name: test/aa version: 0.0.1 dependencies: - "codeql/cpp-all": "0.7.3" + "codeql/cpp-all": "0.7.4" diff --git a/tests/workspace/cpp/foo-customizations/qlpack.yml b/tests/workspace/cpp/foo-customizations/qlpack.yml index 8f01e4e..2af68a3 100644 --- a/tests/workspace/cpp/foo-customizations/qlpack.yml +++ b/tests/workspace/cpp/foo-customizations/qlpack.yml @@ -2,4 +2,4 @@ library: True name: foo/cpp-customizations version: 0.0.1 dependencies: - "codeql/cpp-all": "0.7.3" \ No newline at end of file + "codeql/cpp-all": "0.7.4" \ No newline at end of file From 7ad165f3cef07a59617d30ba301757c476ab5ed6 Mon Sep 17 00:00:00 2001 From: Remco Vermeulen Date: Thu, 6 Jul 2023 16:24:23 -0700 Subject: [PATCH 4/4] Add support for platform specific bundles If the source bundle supports multiple platform then we can now build platform specific bundles to reduce the size of the bundle. --- codeql_bundle/cli.py | 39 ++++--- codeql_bundle/helpers/bundle.py | 174 +++++++++++++++++++++++++++++--- codeql_bundle/helpers/codeql.py | 7 ++ tests/bundle-diff.py | 50 +++++++++ 4 files changed, 245 insertions(+), 25 deletions(-) create mode 100644 tests/bundle-diff.py diff --git a/codeql_bundle/cli.py b/codeql_bundle/cli.py index 8e8799b..3937e12 100644 --- a/codeql_bundle/cli.py +++ b/codeql_bundle/cli.py @@ -10,7 +10,7 @@ import click from pathlib import Path from codeql_bundle.helpers.codeql import CodeQLException -from codeql_bundle.helpers.bundle import CustomBundle, BundleException +from codeql_bundle.helpers.bundle import CustomBundle, BundleException, BundlePlatform from typing import List import sys import logging @@ -30,7 +30,7 @@ "-o", "--output", required=True, - help="Path to store the custom CodeQL bundle. Can be a directory or a non-existing archive ending with the extension '.tar.gz'", + help="Path to store the custom CodeQL bundle. Can be a directory or a non-existing archive ending with the extension '.tar.gz' if there is only a single bundle", type=click.Path(path_type=Path), ) @click.option( @@ -49,12 +49,14 @@ ), default="WARNING", ) +@click.option("-p", "--platform", multiple=True, type=click.Choice(["linux64", "osx64", "win64"], case_sensitive=False), help="Target platform for the bundle") @click.argument("packs", nargs=-1, required=True) def main( bundle_path: Path, output: Path, workspace: Path, loglevel: str, + platform: List[str], packs: List[str], ) -> None: @@ -73,15 +75,27 @@ def main( workspace = workspace.parent logger.info( - f"Creating custom bundle of {bundle_path} using CodeQL packs in workspace {workspace}" + f"Creating custom bundle of {bundle_path} using CodeQL pack(s) in workspace {workspace}" ) try: bundle = CustomBundle(bundle_path, workspace) + + unsupported_platforms = list(filter(lambda p: not bundle.supports_platform(BundlePlatform.from_string(p)), platform)) + if len(unsupported_platforms) > 0: + logger.fatal( + f"The provided bundle supports the platform(s) {', '.join(map(str, bundle.platforms))}, but doesn't support the following platform(s): {', '.join(unsupported_platforms)}" + ) + sys.exit(1) + logger.info(f"Looking for CodeQL packs in workspace {workspace}") - packs_in_workspace = bundle.getCodeQLPacks() + packs_in_workspace = bundle.get_workspace_packs() logger.info( - f"Found the CodeQL packs: {','.join(map(lambda p: p.config.name, packs_in_workspace))}" + f"Found the CodeQL pack(s): {','.join(map(lambda p: p.config.name, packs_in_workspace))}" + ) + + logger.info( + f"Considering the following CodeQL pack(s) for inclusion in the custom bundle: {','.join(packs)}" ) if len(packs) > 0: @@ -93,23 +107,22 @@ def main( else: selected_packs = packs_in_workspace - logger.info( - f"Considering the following CodeQL packs for inclusion in the custom bundle: {','.join(map(lambda p: p.config.name, selected_packs))}" - ) + missing_packs = set(packs) - {pack.config.name for pack in selected_packs} if len(missing_packs) > 0: logger.fatal( - f"The provided CodeQL workspace doesn't contain the provided packs '{','.join(missing_packs)}'", + f"The provided CodeQL workspace doesn't contain the provided pack(s) '{','.join(missing_packs)}'", ) sys.exit(1) logger.info( - f"Adding the packs {','.join(map(lambda p: p.config.name, selected_packs))} and its workspace dependencies to the custom bundle." + f"Adding the pack(s) {','.join(map(lambda p: p.config.name, selected_packs))} and its workspace dependencies to the custom bundle." ) bundle.add_packs(*selected_packs) - logger.info(f"Bundling custom bundle at {output}") - bundle.bundle(output) - logger.info(f"Completed building of custom bundle.") + logger.info(f"Bundling custom bundle(s) at {output}") + platforms = set(map(BundlePlatform.from_string, platform)) + bundle.bundle(output, platforms) + logger.info(f"Completed building of custom bundle(s).") except CodeQLException as e: logger.fatal(f"Failed executing CodeQL command with reason: '{e}'") sys.exit(1) diff --git a/codeql_bundle/helpers/bundle.py b/codeql_bundle/helpers/bundle.py index a8a5595..c12ce07 100644 --- a/codeql_bundle/helpers/bundle.py +++ b/codeql_bundle/helpers/bundle.py @@ -6,18 +6,21 @@ from pathlib import Path from tempfile import TemporaryDirectory import tarfile -from typing import List, cast, Callable +from typing import List, cast, Callable, Optional from collections import defaultdict import shutil import yaml import dataclasses import logging -from enum import Enum +from enum import Enum, verify, UNIQUE from dataclasses import dataclass from graphlib import TopologicalSorter +import platform +import concurrent.futures logger = logging.getLogger(__name__) +@verify(UNIQUE) class CodeQLPackKind(Enum): QUERY_PACK = 1 LIBRARY_PACK = 2 @@ -49,6 +52,9 @@ def get_dependencies_path(self) -> Path: def get_cache_path(self) -> Path: return self.path.parent / ".cache" + def is_stdlib_module(self) -> bool: + return self.config.get_scope() == "codeql" + class BundleException(Exception): pass @@ -96,7 +102,7 @@ def inner(pack_to_be_resolved: CodeQLPack) -> ResolvedCodeQLPack: resolved_dep = inner(candidate_pack) if not resolved_dep: - raise PackResolverException(f"Could not resolve dependency {dep_name} for pack {pack_to_be_resolved.config.name}!") + raise PackResolverException(f"Could not resolve dependency {dep_name}@{dep_version} for pack {pack_to_be_resolved.config.name}@{str(pack_to_be_resolved.config.version)}!") resolved_deps.append(resolved_dep) @@ -108,6 +114,33 @@ def inner(pack_to_be_resolved: CodeQLPack) -> ResolvedCodeQLPack: return builder() +@verify(UNIQUE) +class BundlePlatform(Enum): + LINUX = 1 + WINDOWS = 2 + OSX = 3 + + @staticmethod + def from_string(platform: str) -> "BundlePlatform": + if platform.lower() == "linux" or platform.lower() == "linux64": + return BundlePlatform.LINUX + elif platform.lower() == "windows" or platform.lower() == "win64": + return BundlePlatform.WINDOWS + elif platform.lower() == "osx" or platform.lower() == "osx64": + return BundlePlatform.OSX + else: + raise BundleException(f"Invalid platform {platform}") + + def __str__(self): + if self == BundlePlatform.LINUX: + return "linux64" + elif self == BundlePlatform.WINDOWS: + return "win64" + elif self == BundlePlatform.OSX: + return "osx64" + else: + raise BundleException(f"Invalid platform {self}") + class Bundle: def __init__(self, bundle_path: Path) -> None: self.tmp_dir = TemporaryDirectory() @@ -127,6 +160,36 @@ def __init__(self, bundle_path: Path) -> None: else: raise BundleException("Invalid CodeQL bundle path") + def supports_linux() -> set[BundlePlatform]: + if (self.bundle_path / "cpp" / "tools" / "linux64").exists(): + return {BundlePlatform.LINUX} + else: + return set() + + def supports_macos() -> set[BundlePlatform]: + if (self.bundle_path / "cpp" / "tools" / "osx64").exists(): + return {BundlePlatform.OSX} + else: + return set() + + def supports_windows() -> set[BundlePlatform]: + if (self.bundle_path / "cpp" / "tools" / "win64").exists(): + return {BundlePlatform.WINDOWS} + else: + return set() + + self.platforms: set[BundlePlatform] = supports_linux() | supports_macos() | supports_windows() + + current_system = platform.system() + if not current_system in ["Linux", "Darwin", "Windows"]: + raise BundleException(f"Unsupported system: {current_system}") + if current_system == "Linux" and BundlePlatform.LINUX not in self.platforms: + raise BundleException("Bundle doesn't support Linux!") + elif current_system == "Darwin" and BundlePlatform.OSX not in self.platforms: + raise BundleException("Bundle doesn't support OSX!") + elif current_system == "Windows" and BundlePlatform.WINDOWS not in self.platforms: + raise BundleException("Bundle doesn't support Windows!") + self.codeql = CodeQL(self.bundle_path / "codeql") try: logging.info(f"Validating the CodeQL CLI version part of the bundle.") @@ -141,10 +204,11 @@ def __init__(self, bundle_path: Path) -> None: self.bundle_packs: list[ResolvedCodeQLPack] = [resolve(pack) for pack in packs] + self.languages = self.codeql.resolve_languages() + except CodeQLException: raise BundleException("Cannot determine CodeQL version!") - def __del__(self) -> None: if self.tmp_dir: logging.info( @@ -152,9 +216,12 @@ def __del__(self) -> None: ) self.tmp_dir.cleanup() - def getCodeQLPacks(self) -> List[ResolvedCodeQLPack]: + def get_bundle_packs(self) -> List[ResolvedCodeQLPack]: return self.bundle_packs + def supports_platform(self, platform: BundlePlatform) -> bool: + return platform in self.platforms + class CustomBundle(Bundle): def __init__(self, bundle_path: Path, workspace_path: Path = Path.cwd()) -> None: Bundle.__init__(self, bundle_path) @@ -184,7 +251,7 @@ def __init__(self, bundle_path: Path, workspace_path: Path = Path.cwd()) -> None f"Bundle doesn't have an associated temporary directory, created {self.tmp_dir.name} for building a custom bundle." ) - def getCodeQLPacks(self) -> List[ResolvedCodeQLPack]: + def get_workspace_packs(self) -> List[ResolvedCodeQLPack]: return self.workspace_packs def add_packs(self, *packs: ResolvedCodeQLPack): @@ -481,10 +548,93 @@ def bundle_query_pack(pack: ResolvedCodeQLPack): elif pack.kind == CodeQLPackKind.QUERY_PACK: bundle_query_pack(pack) - def bundle(self, output_path: Path): - if output_path.is_dir(): - output_path = output_path / "codeql-bundle.tar.gz" + def bundle(self, output_path: Path, platforms: set[BundlePlatform] = set()): + if len(platforms) == 0: + if output_path.is_dir(): + output_path = output_path / "codeql-bundle.tar.gz" + + logging.debug(f"Bundling custom bundle to {output_path}.") + with tarfile.open(output_path, mode="w:gz") as bundle_archive: + bundle_archive.add(self.bundle_path, arcname="codeql") + else: + if not output_path.is_dir(): + raise BundleException( + f"Output path {output_path} must be a directory when bundling for multiple platforms." + ) + + unsupported_platforms = platforms - self.platforms + if len(unsupported_platforms) > 0: + raise BundleException( + f"Unsupported platform(s) {', '.join(map(str,unsupported_platforms))} specified. Use the platform agnostic bundle to bundle for different platforms." + ) + + def create_bundle_for_platform(bundle_output_path:Path, platform: BundlePlatform) -> None: + """Create a bundle for a single platform.""" + def filter_for_platform(platform: BundlePlatform) -> Callable[[tarfile.TarInfo], Optional[tarfile.TarInfo]]: + """Create a filter function that will only include files for the specified platform.""" + relative_tools_paths = [Path(lang) / "tools" for lang in self.languages] + [Path("tools")] + + def get_nonplatform_tool_paths(platform: BundlePlatform) -> List[Path]: + """Get a list of paths to tools that are not for the specified platform relative to the root of a bundle.""" + specialize_path : Optional[Callable[[Path], List[Path]]] = None + linux64_subpaths = [Path("linux64"), Path("linux")] + osx64_subpaths = [Path("osx64"), Path("macos")] + win64_subpaths = [Path("win64"), Path("windows")] + if platform == BundlePlatform.LINUX: + specialize_path = lambda p: [p / subpath for subpath in osx64_subpaths + win64_subpaths] + elif platform == BundlePlatform.WINDOWS: + specialize_path = lambda p: [p / subpath for subpath in osx64_subpaths + linux64_subpaths] + elif platform == BundlePlatform.OSX: + specialize_path = lambda p: [p / subpath for subpath in linux64_subpaths + win64_subpaths] + else: + raise BundleException(f"Unsupported platform {platform}.") + + return [candidate for candidates in map(specialize_path, relative_tools_paths) for candidate in candidates] + + def filter(tarinfo: tarfile.TarInfo) -> Optional[tarfile.TarInfo]: + tarfile_path = Path(tarinfo.name) + + exclusion_paths = get_nonplatform_tool_paths(platform) + + # Manual exclusions based on diffing the contents of the platform specific bundles and the generated platform specific bundles. + if platform != BundlePlatform.WINDOWS: + exclusion_paths.append(Path("codeql.exe")) + else: + exclusion_paths.append(Path("swift/qltest")) + exclusion_paths.append(Path("swift/resource-dir")) + + if platform == BundlePlatform.LINUX: + exclusion_paths.append(Path("swift/qltest/osx64")) + exclusion_paths.append(Path("swift/resource-dir/osx64")) + + if platform == BundlePlatform.OSX: + exclusion_paths.append(Path("swift/qltest/linux64")) + exclusion_paths.append(Path("swift/resource-dir/linux64")) + + + tarfile_path_root = Path(tarfile_path.parts[0]) + exclusion_paths = [tarfile_path_root / path for path in exclusion_paths] + + if any(tarfile_path.is_relative_to(path) for path in exclusion_paths): + return None + + return tarinfo + + return filter + logging.debug(f"Bundling custom bundle for {platform} to {bundle_output_path}.") + with tarfile.open(bundle_output_path, mode="w:gz") as bundle_archive: + bundle_archive.add( + self.bundle_path, arcname="codeql", filter=filter_for_platform(platform) + ) + + with concurrent.futures.ThreadPoolExecutor(max_workers=len(platforms)) as executor: + future_to_platform = {executor.submit(create_bundle_for_platform, output_path / f"codeql-bundle-{platform}.tar.gz", platform): platform for platform in platforms} + for future in concurrent.futures.as_completed(future_to_platform): + platform = future_to_platform[future] + try: + future.result() + except Exception as exc: + raise BundleException(f"Failed to create bundle for platform {platform} with exception: {exc}.") + + - logging.debug(f"Bundling custom bundle to {output_path}.") - with tarfile.open(output_path, mode="w:gz") as bundle_archive: - bundle_archive.add(self.bundle_path, arcname="codeql") diff --git a/codeql_bundle/helpers/codeql.py b/codeql_bundle/helpers/codeql.py index b5bc9ed..085d2a5 100644 --- a/codeql_bundle/helpers/codeql.py +++ b/codeql_bundle/helpers/codeql.py @@ -164,3 +164,10 @@ def pack_create( if cp.returncode != 0: raise CodeQLException(f"Failed to run {cp.args} command! {cp.stderr}") + + def resolve_languages(self) -> set[str]: + cp = self._exec("resolve", "languages", "--format=json") + if cp.returncode == 0: + return set(json.loads(cp.stdout).keys()) + else: + raise CodeQLException(f"Failed to run {cp.args} command! {cp.stderr}") diff --git a/tests/bundle-diff.py b/tests/bundle-diff.py new file mode 100644 index 0000000..ad6e4cb --- /dev/null +++ b/tests/bundle-diff.py @@ -0,0 +1,50 @@ +from pathlib import Path + +def main(argv : list[str]) -> int: + if len(argv[1:]) != 2: + print("Usage: bundle-diff.py ") + return 1 + + added : set[Path] = set() + removed : set[Path] = set() + + bundle1 = Path(argv[1]) + if not bundle1.is_dir(): + print(f"Error: {bundle1} is not a directory") + return 1 + bundle2 = Path(argv[2]) + if not bundle2.is_dir(): + print(f"Error: {bundle2} is not a directory") + return 1 + + bundle1 = bundle1.absolute() + bundle2 = bundle2.absolute() + + for p in bundle1.glob("**/*"): + subpath = p.absolute().relative_to(bundle1) + #print(subpath) + if not set(subpath.parents).isdisjoint(removed): + continue + path_in_bundle2 = bundle2 / subpath + #print(path_in_bundle2) + if not path_in_bundle2.exists(): + removed.add(subpath) + + for p in bundle2.glob("**/*"): + subpath = p.absolute().relative_to(bundle2) + if not set(subpath.parents).isdisjoint(added): + continue + path_in_bundle1 = bundle1 / subpath + if not path_in_bundle1.exists(): + added.add(subpath) + + for p in sorted(added): + print(f"+ {p}") + + for p in sorted(removed): + print(f"- {p}") + return 0 + +if __name__ == "__main__": + import sys + sys.exit(main(sys.argv)) \ No newline at end of file