Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions misc/bazel/workspace.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ def codeql_workspace(repository_name = "codeql"):
sha256 = sha256,
)

http_archive(
name = "fishhook",
url = "https://github.com/facebook/fishhook/archive/aadc161ac3b80db07a9908851839a17ba63a9eb1.zip",
build_file = "@%s//swift/tools/fishhook:BUILD.fishhook.bazel" % repository_name,
strip_prefix = "fishhook-aadc161ac3b80db07a9908851839a17ba63a9eb1",
sha256 = "9f2cdee6dcc2039d4c47d25ab5141fe0678ce6ed27ef482cab17fe9fa38a30ce",
)

maybe(
repo_rule = http_archive,
name = "rules_pkg",
Expand Down
1 change: 1 addition & 0 deletions swift/extractor/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ swift_cc_binary(
deps = [
"//swift/extractor/infra",
"//swift/extractor/visitors",
"//swift/extractor/remapping",
"//swift/tools/prebuilt:swift-llvm-support",
],
)
1 change: 0 additions & 1 deletion swift/extractor/SwiftExtractor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,6 @@ void codeql::extractSwiftFiles(const SwiftExtractorConfiguration& config,
while (!todo.empty()) {
auto module = todo.back();
todo.pop_back();
llvm::errs() << "processing module " << module->getName() << '\n';
bool isFromSourceFile = false;
std::unordered_set<swift::ModuleDecl*> encounteredModules;
for (auto file : module->getFiles()) {
Expand Down
2 changes: 1 addition & 1 deletion swift/extractor/SwiftOutputRewrite.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include "SwiftOutputRewrite.h"
#include "swift/extractor/SwiftOutputRewrite.h"
#include "swift/extractor/SwiftExtractorConfiguration.h"
#include "swift/extractor/TargetTrapFile.h"

Expand Down
1 change: 1 addition & 0 deletions swift/extractor/SwiftOutputRewrite.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,5 @@ std::vector<std::string> collectVFSFiles(const SwiftExtractorConfiguration& conf
// Creates empty trap files for output swiftmodule files
void lockOutputSwiftModuleTraps(const SwiftExtractorConfiguration& config,
const std::unordered_map<std::string, std::string>& remapping);

} // namespace codeql
20 changes: 9 additions & 11 deletions swift/extractor/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
#include <swift/Basic/LLVMInitialize.h>
#include <swift/FrontendTool/FrontendTool.h>

#include "SwiftExtractor.h"
#include "SwiftOutputRewrite.h"
#include "swift/extractor/SwiftExtractor.h"
#include "swift/extractor/SwiftOutputRewrite.h"
#include "swift/extractor/remapping/SwiftOpenInterception.h"

using namespace std::string_literals;

Expand All @@ -21,14 +22,6 @@ class Observer : public swift::FrontendObserver {
public:
explicit Observer(const codeql::SwiftExtractorConfiguration& config) : config{config} {}

void parsedArgs(swift::CompilerInvocation& invocation) override {
auto& overlays = invocation.getSearchPathOptions().VFSOverlayFiles;
auto vfsFiles = codeql::collectVFSFiles(config);
for (auto& vfsFile : vfsFiles) {
overlays.push_back(vfsFile);
}
}

void performedSemanticAnalysis(swift::CompilerInstance& compiler) override {
codeql::extractSwiftFiles(config, compiler);
}
Expand All @@ -49,6 +42,7 @@ int main(int argc, char** argv) {
// TODO: print usage
return 1;
}

// Required by Swift/LLVM
PROGRAM_START(argc, argv);
INITIALIZE_LLVM();
Expand All @@ -58,6 +52,8 @@ int main(int argc, char** argv) {
configuration.sourceArchiveDir = getenv_or("CODEQL_EXTRACTOR_SWIFT_SOURCE_ARCHIVE_DIR", ".");
configuration.scratchDir = getenv_or("CODEQL_EXTRACTOR_SWIFT_SCRATCH_DIR", ".");

codeql::initRemapping(configuration.getTempArtifactDir());

configuration.frontendOptions.reserve(argc - 1);
for (int i = 1; i < argc; i++) {
configuration.frontendOptions.push_back(argv[i]);
Expand All @@ -67,7 +63,6 @@ int main(int argc, char** argv) {
auto remapping =
codeql::rewriteOutputsInPlace(configuration, configuration.patchedFrontendOptions);
codeql::ensureDirectoriesForNewPathsExist(remapping);
codeql::storeRemappingForVFS(configuration, remapping);
codeql::lockOutputSwiftModuleTraps(configuration, remapping);

std::vector<const char*> args;
Expand All @@ -77,5 +72,8 @@ int main(int argc, char** argv) {

Observer observer(configuration);
int frontend_rc = swift::performFrontend(args, "swift-extractor", (void*)main, &observer);

codeql::finalizeRemapping(remapping);

return frontend_rc;
}
23 changes: 23 additions & 0 deletions swift/extractor/remapping/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
load("//swift:rules.bzl", "swift_cc_library")

swift_cc_library(
name = "remapping",
srcs = select({
"@platforms//os:linux": [
"SwiftOpenInterception.Linux.cpp",
],
"@platforms//os:macos": [
"SwiftOpenInterception.macOS.cpp",
],
}),
hdrs = glob(["*.h"]),
visibility = ["//swift:__subpackages__"],
deps = [
"//swift/tools/prebuilt:swift-llvm-support",
] + select({
"@platforms//os:linux": [],
"@platforms//os:macos": [
"@fishhook//:fishhook",
],
}),
)
8 changes: 8 additions & 0 deletions swift/extractor/remapping/SwiftOpenInterception.Linux.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#include "swift/extractor/remapping/SwiftOpenInterception.h"

namespace codeql {
// TBD
void initRemapping(const std::string& dir) {}
void finalizeRemapping(const std::unordered_map<std::string, std::string>& mapping) {}

} // namespace codeql
11 changes: 11 additions & 0 deletions swift/extractor/remapping/SwiftOpenInterception.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#pragma once

#include <string>
#include <unordered_map>

namespace codeql {

void initRemapping(const std::string& dir);
void finalizeRemapping(const std::unordered_map<std::string, std::string>& mapping);

} // namespace codeql
82 changes: 82 additions & 0 deletions swift/extractor/remapping/SwiftOpenInterception.macOS.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#include "swift/extractor/remapping/SwiftOpenInterception.h"
#include <fishhook.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm/Support/FileSystem.h>
#include <llvm/Support/Path.h>
#include <fcntl.h>
#include <unistd.h>

namespace codeql {

static std::string scratchDir;
Copy link
Contributor

@redsun82 redsun82 Sep 16, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there might be a use-after-destructor problem with this, in case open is called after exit by some other destructor of an object with static storage duration. Even though this is not happening now, we cannot really guarantee this won't happen. For example, what if the swift library initializes some non-POD static global that writes down diagnostics at the end of the program?

We could either:

  • undo the interception more or less at the time of calling remapArtifacts, putting original_open back to open
  • use a static const char* for the scratchDir, making codeql_open auto fallback on original_open if scratchDir == nullptr, and set it back to nullptr in remapArtifacts. The SwiftExtractorConfiguration object owning the scratch dir string will be alive between calls to initInterception and remapArtifacts

(or maybe both for maximum cleanness)

I'm always wary of using non-const globals (which is probably unavoidable here because of having to interface with C via free functions), but the wariness doubles when using non-POD globals (const or non const) because of the subtle bugs that can happen during end of program destructor calls. So if there is a way to avoid that I would rather take it, and go for the second option above.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a very good point and it would've been a nightmare to catch/debug afterwards!

However, SwiftExtractorConfiguration doesn't really own the scratch dir as we changed tempArtifactDir to be a function returning a new string every time it's called 😅. We can easily workaround this limitation, but IMO having a char * is way too fragile.

I'll see what's the best way to rebind the functions back.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, good point on the const char lifetime!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you go for a RAII class you could use the lifetime of that class to make sure the raw pointer is alive while required.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I played a bit with the options and the best suggestion I have is to add a boolean flag, though as I'm writing I'm wondering if it should be an atomic boolean? 🤔

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question, does llvm or the swift frontend fire up threads? Even if they do, are they still not joined when we execute our code?

Another option could be maybe to make scratchDir indestructible. Declare it as

static const std::string* scratchDir = nullptr;

and initialize it with

scratchDir = new std::string(dir);

and just fuggedaboutit... but I'm ok with this solution. We will probably need to come back to this code if we generalize the interception to Linux.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, all the available options are more like "which footgun shall we pick" 😄

static bool interceptionEnabled = false;

static int (*original_open)(const char*, int, ...) = nullptr;

static std::string fileHash(const std::string& filename) {
int fd = original_open(filename.c_str(), O_RDONLY);
if (fd == -1) {
return {};
}
auto maybeMD5 = llvm::sys::fs::md5_contents(fd);
close(fd);
if (!maybeMD5) {
return {};
}
return maybeMD5->digest().str().str();
}

static int codeql_open(const char* path, int oflag, ...) {
va_list ap = {0};
mode_t mode = 0;
if ((oflag & O_CREAT) != 0) {
// mode only applies to O_CREAT
va_start(ap, oflag);
mode = va_arg(ap, int);
va_end(ap);
}

std::string newPath(path);

if (interceptionEnabled && llvm::sys::fs::exists(newPath)) {
// TODO: check file magic instead
if (llvm::StringRef(newPath).endswith(".swiftmodule")) {
auto hash = fileHash(newPath);
auto hashed = scratchDir + "/" + hash;
if (!hash.empty() && llvm::sys::fs::exists(hashed)) {
newPath = hashed;
}
}
}

return original_open(newPath.c_str(), oflag, mode);
}

void finalizeRemapping(const std::unordered_map<std::string, std::string>& mapping) {
for (auto& [original, patched] : mapping) {
// TODO: Check file magic instead
if (!llvm::StringRef(original).endswith(".swiftmodule")) {
continue;
}
auto hash = fileHash(original);
auto hashed = scratchDir + "/" + hash;
if (!hash.empty() && llvm::sys::fs::exists(patched)) {
if (std::error_code ec = llvm::sys::fs::create_link(/* from */ patched, /* to */ hashed)) {
llvm::errs() << "Cannot remap file '" << patched << "' -> '" << hashed
<< "': " << ec.message() << "\n";
}
}
}
interceptionEnabled = false;
}

void initRemapping(const std::string& dir) {
scratchDir = dir;

struct rebinding binding[] = {
{"open", reinterpret_cast<void*>(codeql_open), reinterpret_cast<void**>(&original_open)}};
rebind_symbols(binding, 1);
interceptionEnabled = true;
}

} // namespace codeql
Empty file.
7 changes: 7 additions & 0 deletions swift/tools/fishhook/BUILD.fishhook.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
cc_library(
name = "fishhook",
srcs = glob(["*.c"]),
hdrs = glob(["*.h"]),
strip_include_prefix = ".",
visibility = ["//visibility:public"],
)