From c638789f3e4a1b26c27174e128fa9d6ffca48533 Mon Sep 17 00:00:00 2001 From: Alex Denisov Date: Mon, 12 Sep 2022 10:17:45 +0200 Subject: [PATCH 1/4] Swift: open(2) interception --- misc/bazel/workspace.bzl | 9 +++ swift/extractor/BUILD.bazel | 1 + swift/extractor/SwiftExtractor.cpp | 1 - swift/extractor/SwiftOutputRewrite.cpp | 2 +- swift/extractor/SwiftOutputRewrite.h | 1 + swift/extractor/main.cpp | 20 +++-- swift/extractor/remapping/BUILD.bazel | 23 ++++++ .../remapping/SwiftOpenInterception.Linux.cpp | 8 ++ .../remapping/SwiftOpenInterception.h | 11 +++ .../remapping/SwiftOpenInterception.macOS.cpp | 79 +++++++++++++++++++ swift/tools/fishhook/BUILD.bazel | 0 swift/tools/fishhook/BUILD.fishhook.bazel | 9 +++ 12 files changed, 151 insertions(+), 13 deletions(-) create mode 100644 swift/extractor/remapping/BUILD.bazel create mode 100644 swift/extractor/remapping/SwiftOpenInterception.Linux.cpp create mode 100644 swift/extractor/remapping/SwiftOpenInterception.h create mode 100644 swift/extractor/remapping/SwiftOpenInterception.macOS.cpp create mode 100644 swift/tools/fishhook/BUILD.bazel create mode 100644 swift/tools/fishhook/BUILD.fishhook.bazel diff --git a/misc/bazel/workspace.bzl b/misc/bazel/workspace.bzl index 7b89bff5693d..082cc68a9228 100644 --- a/misc/bazel/workspace.bzl +++ b/misc/bazel/workspace.bzl @@ -1,5 +1,6 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") +load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository") _swift_prebuilt_version = "swift-5.6-RELEASE.42271.54" _swift_sha_map = { @@ -55,3 +56,11 @@ def codeql_workspace(repository_name = "codeql"): "https://github.com/bazelbuild/rules_python/archive/refs/tags/0.8.1.tar.gz", ], ) + + new_git_repository( + name = "fishhook", + commit = "aadc161ac3b80db07a9908851839a17ba63a9eb1", + shallow_since = "1634071885 -0400", + build_file = "//swift/tools/fishhook:BUILD.fishhook.bazel", + remote = "https://github.com/facebook/fishhook", + ) diff --git a/swift/extractor/BUILD.bazel b/swift/extractor/BUILD.bazel index bf5c9e65d520..28d89d920de4 100644 --- a/swift/extractor/BUILD.bazel +++ b/swift/extractor/BUILD.bazel @@ -10,6 +10,7 @@ swift_cc_binary( deps = [ "//swift/extractor/infra", "//swift/extractor/visitors", + "//swift/extractor/remapping", "//swift/tools/prebuilt:swift-llvm-support", ], ) diff --git a/swift/extractor/SwiftExtractor.cpp b/swift/extractor/SwiftExtractor.cpp index c6473eca4153..4d0ec6794263 100644 --- a/swift/extractor/SwiftExtractor.cpp +++ b/swift/extractor/SwiftExtractor.cpp @@ -184,7 +184,6 @@ void codeql::extractSwiftFiles(const SwiftExtractorConfiguration& config, while (!todo.empty()) { auto module = todo.back(); todo.pop_back(); - llvm::errs() << "processing module " << module->getName() << '\n'; bool isFromSourceFile = false; std::unordered_set encounteredModules; for (auto file : module->getFiles()) { diff --git a/swift/extractor/SwiftOutputRewrite.cpp b/swift/extractor/SwiftOutputRewrite.cpp index b09a558d1873..95315776aa0c 100644 --- a/swift/extractor/SwiftOutputRewrite.cpp +++ b/swift/extractor/SwiftOutputRewrite.cpp @@ -1,4 +1,4 @@ -#include "SwiftOutputRewrite.h" +#include "swift/extractor/SwiftOutputRewrite.h" #include "swift/extractor/SwiftExtractorConfiguration.h" #include "swift/extractor/TargetTrapFile.h" diff --git a/swift/extractor/SwiftOutputRewrite.h b/swift/extractor/SwiftOutputRewrite.h index 94f1eeb6aaab..e8a61254a54c 100644 --- a/swift/extractor/SwiftOutputRewrite.h +++ b/swift/extractor/SwiftOutputRewrite.h @@ -32,4 +32,5 @@ std::vector collectVFSFiles(const SwiftExtractorConfiguration& conf // Creates empty trap files for output swiftmodule files void lockOutputSwiftModuleTraps(const SwiftExtractorConfiguration& config, const std::unordered_map& remapping); + } // namespace codeql diff --git a/swift/extractor/main.cpp b/swift/extractor/main.cpp index 5f9afef4e812..b5b4aab36a64 100644 --- a/swift/extractor/main.cpp +++ b/swift/extractor/main.cpp @@ -9,8 +9,9 @@ #include #include -#include "SwiftExtractor.h" -#include "SwiftOutputRewrite.h" +#include "swift/extractor/SwiftExtractor.h" +#include "swift/extractor/SwiftOutputRewrite.h" +#include "swift/extractor/remapping/SwiftOpenInterception.h" using namespace std::string_literals; @@ -21,14 +22,6 @@ class Observer : public swift::FrontendObserver { public: explicit Observer(const codeql::SwiftExtractorConfiguration& config) : config{config} {} - void parsedArgs(swift::CompilerInvocation& invocation) override { - auto& overlays = invocation.getSearchPathOptions().VFSOverlayFiles; - auto vfsFiles = codeql::collectVFSFiles(config); - for (auto& vfsFile : vfsFiles) { - overlays.push_back(vfsFile); - } - } - void performedSemanticAnalysis(swift::CompilerInstance& compiler) override { codeql::extractSwiftFiles(config, compiler); } @@ -49,6 +42,7 @@ int main(int argc, char** argv) { // TODO: print usage return 1; } + // Required by Swift/LLVM PROGRAM_START(argc, argv); INITIALIZE_LLVM(); @@ -58,6 +52,8 @@ int main(int argc, char** argv) { configuration.sourceArchiveDir = getenv_or("CODEQL_EXTRACTOR_SWIFT_SOURCE_ARCHIVE_DIR", "."); configuration.scratchDir = getenv_or("CODEQL_EXTRACTOR_SWIFT_SCRATCH_DIR", "."); + codeql::initInterception(configuration.getTempArtifactDir()); + configuration.frontendOptions.reserve(argc - 1); for (int i = 1; i < argc; i++) { configuration.frontendOptions.push_back(argv[i]); @@ -67,7 +63,6 @@ int main(int argc, char** argv) { auto remapping = codeql::rewriteOutputsInPlace(configuration, configuration.patchedFrontendOptions); codeql::ensureDirectoriesForNewPathsExist(remapping); - codeql::storeRemappingForVFS(configuration, remapping); codeql::lockOutputSwiftModuleTraps(configuration, remapping); std::vector args; @@ -77,5 +72,8 @@ int main(int argc, char** argv) { Observer observer(configuration); int frontend_rc = swift::performFrontend(args, "swift-extractor", (void*)main, &observer); + + codeql::remapArtifacts(remapping); + return frontend_rc; } diff --git a/swift/extractor/remapping/BUILD.bazel b/swift/extractor/remapping/BUILD.bazel new file mode 100644 index 000000000000..7a74b4213ec1 --- /dev/null +++ b/swift/extractor/remapping/BUILD.bazel @@ -0,0 +1,23 @@ +load("//swift:rules.bzl", "swift_cc_library") + +swift_cc_library( + name = "remapping", + srcs = select({ + "@platforms//os:linux": [ + "SwiftOpenInterception.Linux.cpp", + ], + "@platforms//os:macos": [ + "SwiftOpenInterception.macOS.cpp", + ], + }), + hdrs = glob(["*.h"]), + visibility = ["//swift:__subpackages__"], + deps = [ + "//swift/tools/prebuilt:swift-llvm-support", + ] + select({ + "@platforms//os:linux": [], + "@platforms//os:macos": [ + "@fishhook//:fishhook", + ], + }), +) diff --git a/swift/extractor/remapping/SwiftOpenInterception.Linux.cpp b/swift/extractor/remapping/SwiftOpenInterception.Linux.cpp new file mode 100644 index 000000000000..9ed799e4ad63 --- /dev/null +++ b/swift/extractor/remapping/SwiftOpenInterception.Linux.cpp @@ -0,0 +1,8 @@ +#include "swift/extractor/remapping/SwiftOpenInterception.h" + +namespace codeql { +// TBD +void remapArtifacts(const std::unordered_map& mapping) {} +void initInterception(const std::string& dir) {} + +} // namespace codeql diff --git a/swift/extractor/remapping/SwiftOpenInterception.h b/swift/extractor/remapping/SwiftOpenInterception.h new file mode 100644 index 000000000000..6130c145aa2f --- /dev/null +++ b/swift/extractor/remapping/SwiftOpenInterception.h @@ -0,0 +1,11 @@ +#pragma once + +#include +#include + +namespace codeql { + +void initInterception(const std::string& dir); +void remapArtifacts(const std::unordered_map& mapping); + +} // namespace codeql diff --git a/swift/extractor/remapping/SwiftOpenInterception.macOS.cpp b/swift/extractor/remapping/SwiftOpenInterception.macOS.cpp new file mode 100644 index 000000000000..28a112c9fe85 --- /dev/null +++ b/swift/extractor/remapping/SwiftOpenInterception.macOS.cpp @@ -0,0 +1,79 @@ +#include "swift/extractor/remapping/SwiftOpenInterception.h" +#include +#include +#include +#include +#include +#include + +namespace codeql { + +static std::string scratchDir; + +static int (*original_open)(const char*, int, ...) = nullptr; + +static std::string fileHash(const std::string& filename) { + int fd = original_open(filename.c_str(), O_RDONLY); + if (fd == -1) { + return {}; + } + auto maybeMD5 = llvm::sys::fs::md5_contents(fd); + close(fd); + if (!maybeMD5) { + return {}; + } + return maybeMD5->digest().str().str(); +} + +static int codeql_open(const char* path, int oflag, ...) { + va_list ap = {0}; + mode_t mode = 0; + if ((oflag & O_CREAT) != 0) { + // mode only applies to O_CREAT + va_start(ap, oflag); + mode = va_arg(ap, int); + va_end(ap); + } + + std::string newPath(path); + + if (llvm::sys::fs::exists(newPath)) { + // TODO: check file magic instead + if (llvm::StringRef(newPath).endswith(".swiftmodule")) { + auto hash = fileHash(newPath); + auto hashed = scratchDir + "/" + hash; + if (!hash.empty() && llvm::sys::fs::exists(hashed)) { + newPath = hashed; + } + } + } + + return original_open(newPath.c_str(), oflag, mode); +} + +void remapArtifacts(const std::unordered_map& mapping) { + for (auto& [original, patched] : mapping) { + // TODO: Check file magic instead + if (!llvm::StringRef(original).endswith(".swiftmodule")) { + continue; + } + auto hash = fileHash(original); + auto hashed = scratchDir + "/" + hash; + if (!hash.empty() && llvm::sys::fs::exists(patched)) { + if (std::error_code ec = llvm::sys::fs::create_link(/* from */ patched, /* to */ hashed)) { + llvm::errs() << "Cannot remap file '" << patched << "' -> '" << hashed + << "': " << ec.message() << "\n"; + } + } + } +} + +void initInterception(const std::string& dir) { + scratchDir = dir; + + struct rebinding binding[] = { + {"open", reinterpret_cast(codeql_open), reinterpret_cast(&original_open)}}; + rebind_symbols(binding, 1); +} + +} // namespace codeql diff --git a/swift/tools/fishhook/BUILD.bazel b/swift/tools/fishhook/BUILD.bazel new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/swift/tools/fishhook/BUILD.fishhook.bazel b/swift/tools/fishhook/BUILD.fishhook.bazel new file mode 100644 index 000000000000..ffb5e0d1dec4 --- /dev/null +++ b/swift/tools/fishhook/BUILD.fishhook.bazel @@ -0,0 +1,9 @@ +load("@//swift:rules.bzl", "swift_cc_library") + +swift_cc_library( + name = "fishhook", + srcs = glob(["*.c"]), + hdrs = glob(["*.h"]), + strip_include_prefix = ".", + visibility = ["@//swift:__subpackages__"], +) From d6d8480b2acb49861d8e28106da5841a2c0e524c Mon Sep 17 00:00:00 2001 From: Alex Denisov Date: Fri, 16 Sep 2022 12:59:43 +0200 Subject: [PATCH 2/4] Swift: fix internal builds --- misc/bazel/workspace.bzl | 2 +- swift/tools/fishhook/BUILD.fishhook.bazel | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/misc/bazel/workspace.bzl b/misc/bazel/workspace.bzl index 082cc68a9228..d9af835afee4 100644 --- a/misc/bazel/workspace.bzl +++ b/misc/bazel/workspace.bzl @@ -61,6 +61,6 @@ def codeql_workspace(repository_name = "codeql"): name = "fishhook", commit = "aadc161ac3b80db07a9908851839a17ba63a9eb1", shallow_since = "1634071885 -0400", - build_file = "//swift/tools/fishhook:BUILD.fishhook.bazel", + build_file = "@%s//swift/tools/fishhook:BUILD.fishhook.bazel" % repository_name, remote = "https://github.com/facebook/fishhook", ) diff --git a/swift/tools/fishhook/BUILD.fishhook.bazel b/swift/tools/fishhook/BUILD.fishhook.bazel index ffb5e0d1dec4..5919f608d65f 100644 --- a/swift/tools/fishhook/BUILD.fishhook.bazel +++ b/swift/tools/fishhook/BUILD.fishhook.bazel @@ -1,9 +1,7 @@ -load("@//swift:rules.bzl", "swift_cc_library") - -swift_cc_library( +cc_library( name = "fishhook", srcs = glob(["*.c"]), hdrs = glob(["*.h"]), strip_include_prefix = ".", - visibility = ["@//swift:__subpackages__"], + visibility = ["//visibility:public"], ) From 3c12644ab180e553e0a04be17e610ddaa1d31144 Mon Sep 17 00:00:00 2001 From: Alex Denisov Date: Mon, 19 Sep 2022 10:37:26 +0200 Subject: [PATCH 3/4] Swift: add a guard around hashing to aboid use-after-destructor --- swift/extractor/main.cpp | 4 ++-- .../extractor/remapping/SwiftOpenInterception.Linux.cpp | 4 ++-- swift/extractor/remapping/SwiftOpenInterception.h | 4 ++-- .../extractor/remapping/SwiftOpenInterception.macOS.cpp | 9 ++++++--- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/swift/extractor/main.cpp b/swift/extractor/main.cpp index b5b4aab36a64..d65b093a5cf1 100644 --- a/swift/extractor/main.cpp +++ b/swift/extractor/main.cpp @@ -52,7 +52,7 @@ int main(int argc, char** argv) { configuration.sourceArchiveDir = getenv_or("CODEQL_EXTRACTOR_SWIFT_SOURCE_ARCHIVE_DIR", "."); configuration.scratchDir = getenv_or("CODEQL_EXTRACTOR_SWIFT_SCRATCH_DIR", "."); - codeql::initInterception(configuration.getTempArtifactDir()); + codeql::initRemapping(configuration.getTempArtifactDir()); configuration.frontendOptions.reserve(argc - 1); for (int i = 1; i < argc; i++) { @@ -73,7 +73,7 @@ int main(int argc, char** argv) { Observer observer(configuration); int frontend_rc = swift::performFrontend(args, "swift-extractor", (void*)main, &observer); - codeql::remapArtifacts(remapping); + codeql::finalizeRemapping(remapping); return frontend_rc; } diff --git a/swift/extractor/remapping/SwiftOpenInterception.Linux.cpp b/swift/extractor/remapping/SwiftOpenInterception.Linux.cpp index 9ed799e4ad63..5afe28efe362 100644 --- a/swift/extractor/remapping/SwiftOpenInterception.Linux.cpp +++ b/swift/extractor/remapping/SwiftOpenInterception.Linux.cpp @@ -2,7 +2,7 @@ namespace codeql { // TBD -void remapArtifacts(const std::unordered_map& mapping) {} -void initInterception(const std::string& dir) {} +void initRemapping(const std::string& dir) {} +void finalizeRemapping(const std::unordered_map& mapping) {} } // namespace codeql diff --git a/swift/extractor/remapping/SwiftOpenInterception.h b/swift/extractor/remapping/SwiftOpenInterception.h index 6130c145aa2f..1b22d31ca394 100644 --- a/swift/extractor/remapping/SwiftOpenInterception.h +++ b/swift/extractor/remapping/SwiftOpenInterception.h @@ -5,7 +5,7 @@ namespace codeql { -void initInterception(const std::string& dir); -void remapArtifacts(const std::unordered_map& mapping); +void initRemapping(const std::string& dir); +void finalizeRemapping(const std::unordered_map& mapping); } // namespace codeql diff --git a/swift/extractor/remapping/SwiftOpenInterception.macOS.cpp b/swift/extractor/remapping/SwiftOpenInterception.macOS.cpp index 28a112c9fe85..a0f306aa4f3a 100644 --- a/swift/extractor/remapping/SwiftOpenInterception.macOS.cpp +++ b/swift/extractor/remapping/SwiftOpenInterception.macOS.cpp @@ -9,6 +9,7 @@ namespace codeql { static std::string scratchDir; +static bool interceptionEnabled = false; static int (*original_open)(const char*, int, ...) = nullptr; @@ -37,7 +38,7 @@ static int codeql_open(const char* path, int oflag, ...) { std::string newPath(path); - if (llvm::sys::fs::exists(newPath)) { + if (interceptionEnabled && llvm::sys::fs::exists(newPath)) { // TODO: check file magic instead if (llvm::StringRef(newPath).endswith(".swiftmodule")) { auto hash = fileHash(newPath); @@ -51,7 +52,7 @@ static int codeql_open(const char* path, int oflag, ...) { return original_open(newPath.c_str(), oflag, mode); } -void remapArtifacts(const std::unordered_map& mapping) { +void finalizeRemapping(const std::unordered_map& mapping) { for (auto& [original, patched] : mapping) { // TODO: Check file magic instead if (!llvm::StringRef(original).endswith(".swiftmodule")) { @@ -66,14 +67,16 @@ void remapArtifacts(const std::unordered_map& mapping) } } } + interceptionEnabled = false; } -void initInterception(const std::string& dir) { +void initRemapping(const std::string& dir) { scratchDir = dir; struct rebinding binding[] = { {"open", reinterpret_cast(codeql_open), reinterpret_cast(&original_open)}}; rebind_symbols(binding, 1); + interceptionEnabled = true; } } // namespace codeql From 9401eda8da6ff68933a0731dd28af0c149733e53 Mon Sep 17 00:00:00 2001 From: Alex Denisov Date: Tue, 20 Sep 2022 08:38:27 +0200 Subject: [PATCH 4/4] Swift: use http_archive instead of new_git_repository since it's faster --- misc/bazel/workspace.bzl | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/misc/bazel/workspace.bzl b/misc/bazel/workspace.bzl index d9af835afee4..3231cd4eaa6e 100644 --- a/misc/bazel/workspace.bzl +++ b/misc/bazel/workspace.bzl @@ -1,6 +1,5 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") -load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository") _swift_prebuilt_version = "swift-5.6-RELEASE.42271.54" _swift_sha_map = { @@ -27,6 +26,14 @@ def codeql_workspace(repository_name = "codeql"): sha256 = sha256, ) + http_archive( + name = "fishhook", + url = "https://github.com/facebook/fishhook/archive/aadc161ac3b80db07a9908851839a17ba63a9eb1.zip", + build_file = "@%s//swift/tools/fishhook:BUILD.fishhook.bazel" % repository_name, + strip_prefix = "fishhook-aadc161ac3b80db07a9908851839a17ba63a9eb1", + sha256 = "9f2cdee6dcc2039d4c47d25ab5141fe0678ce6ed27ef482cab17fe9fa38a30ce", + ) + maybe( repo_rule = http_archive, name = "rules_pkg", @@ -56,11 +63,3 @@ def codeql_workspace(repository_name = "codeql"): "https://github.com/bazelbuild/rules_python/archive/refs/tags/0.8.1.tar.gz", ], ) - - new_git_repository( - name = "fishhook", - commit = "aadc161ac3b80db07a9908851839a17ba63a9eb1", - shallow_since = "1634071885 -0400", - build_file = "@%s//swift/tools/fishhook:BUILD.fishhook.bazel" % repository_name, - remote = "https://github.com/facebook/fishhook", - )