diff --git a/swift/extractor/BUILD.bazel b/swift/extractor/BUILD.bazel index b22841ca819b..bf5c9e65d520 100644 --- a/swift/extractor/BUILD.bazel +++ b/swift/extractor/BUILD.bazel @@ -2,14 +2,10 @@ load("//swift:rules.bzl", "swift_cc_binary") swift_cc_binary( name = "extractor", - srcs = [ - "SwiftOutputRewrite.cpp", - "SwiftOutputRewrite.h", - "SwiftExtractor.cpp", - "SwiftExtractor.h", - "SwiftExtractorConfiguration.h", - "main.cpp", - ], + srcs = glob([ + "*.h", + "*.cpp", + ]), visibility = ["//swift:__pkg__"], deps = [ "//swift/extractor/infra", diff --git a/swift/extractor/SwiftExtractor.cpp b/swift/extractor/SwiftExtractor.cpp index 8ad2fe128ac4..22f4f01a470a 100644 --- a/swift/extractor/SwiftExtractor.cpp +++ b/swift/extractor/SwiftExtractor.cpp @@ -14,7 +14,7 @@ #include "swift/extractor/trap/generated/TrapClasses.h" #include "swift/extractor/trap/TrapDomain.h" #include "swift/extractor/visitors/SwiftVisitor.h" -#include "swift/extractor/infra/TargetFile.h" +#include "swift/extractor/TargetTrapFile.h" using namespace codeql; using namespace std::string_literals; @@ -56,8 +56,10 @@ static std::string getFilename(swift::ModuleDecl& module, swift::SourceFile* pri } // PCM clang module if (module.isNonSwiftModule()) { - // Several modules with different name might come from .pcm (clang module) files + // Several modules with different names might come from .pcm (clang module) files // In this case we want to differentiate them + // Moreover, pcm files may come from caches located in different directories, but are + // unambiguously identified by the base file name, so we can discard the absolute directory std::string filename = "/pcms/"s + llvm::sys::path::filename(module.getModuleFilename()).str(); filename += "-"; filename += module.getName().str(); @@ -78,20 +80,6 @@ static llvm::SmallVector getTopLevelDecls(swift::ModuleDecl& modul return ret; } -static void dumpArgs(TargetFile& out, const SwiftExtractorConfiguration& config) { - out << "/* extractor-args:\n"; - for (const auto& opt : config.frontendOptions) { - out << " " << std::quoted(opt) << " \\\n"; - } - out << "\n*/\n"; - - out << "/* swift-frontend-args:\n"; - for (const auto& opt : config.patchedFrontendOptions) { - out << " " << std::quoted(opt) << " \\\n"; - } - out << "\n*/\n"; -} - static void extractDeclarations(const SwiftExtractorConfiguration& config, swift::CompilerInstance& compiler, swift::ModuleDecl& module, @@ -101,12 +89,11 @@ static void extractDeclarations(const SwiftExtractorConfiguration& config, // The extractor can be called several times from different processes with // the same input file(s). Using `TargetFile` the first process will win, and the following // will just skip the work - auto trapTarget = TargetFile::create(filename + ".trap", config.trapDir, config.getTempTrapDir()); + auto trapTarget = createTargetTrapFile(config, filename); if (!trapTarget) { // another process arrived first, nothing to do for us return; } - dumpArgs(*trapTarget, config); TrapDomain trap{*trapTarget}; // TODO: remove this and recreate it with IPA when we have that @@ -171,21 +158,21 @@ void codeql::extractSwiftFiles(const SwiftExtractorConfiguration& config, auto modules = collectModules(compiler); for (auto& module : modules) { - // We only extract system and builtin modules here as the other "user" modules can be built - // during the build process and then re-used at a later stage. In this case, we extract the - // user code twice: once during the module build in a form of a source file, and then as - // a pre-built module during building of the dependent source files. - if (module->isSystemModule() || module->isBuiltinModule()) { - extractDeclarations(config, compiler, *module); - } else { - for (auto file : module->getFiles()) { - auto sourceFile = llvm::dyn_cast(file); - if (!sourceFile || inputFiles.count(sourceFile->getFilename().str()) == 0) { - continue; - } - archiveFile(config, *sourceFile); - extractDeclarations(config, compiler, *module, sourceFile); + bool isFromSourceFile = false; + for (auto file : module->getFiles()) { + auto sourceFile = llvm::dyn_cast(file); + if (!sourceFile) { + continue; + } + isFromSourceFile = true; + if (inputFiles.count(sourceFile->getFilename().str()) == 0) { + continue; } + archiveFile(config, *sourceFile); + extractDeclarations(config, compiler, *module, sourceFile); + } + if (!isFromSourceFile) { + extractDeclarations(config, compiler, *module); } } } diff --git a/swift/extractor/SwiftExtractorConfiguration.h b/swift/extractor/SwiftExtractorConfiguration.h index 3365da5f268b..cd4ed51cdcd4 100644 --- a/swift/extractor/SwiftExtractorConfiguration.h +++ b/swift/extractor/SwiftExtractorConfiguration.h @@ -3,6 +3,8 @@ #include #include +#include "swift/extractor/infra/TargetFile.h" + namespace codeql { struct SwiftExtractorConfiguration { // The location for storing TRAP files to be imported by CodeQL engine. @@ -33,4 +35,5 @@ struct SwiftExtractorConfiguration { // overall extraction process. std::string getTempArtifactDir() const { return scratchDir + "/swift-extraction-artifacts"; } }; + } // namespace codeql diff --git a/swift/extractor/SwiftOutputRewrite.cpp b/swift/extractor/SwiftOutputRewrite.cpp index 35a38512ff8f..b09a558d1873 100644 --- a/swift/extractor/SwiftOutputRewrite.cpp +++ b/swift/extractor/SwiftOutputRewrite.cpp @@ -1,5 +1,6 @@ #include "SwiftOutputRewrite.h" #include "swift/extractor/SwiftExtractorConfiguration.h" +#include "swift/extractor/TargetTrapFile.h" #include #include @@ -163,7 +164,7 @@ static std::vector computeModuleAliases(llvm::StringRef modulePath, namespace codeql { std::unordered_map rewriteOutputsInPlace( - SwiftExtractorConfiguration& config, + const SwiftExtractorConfiguration& config, std::vector& CLIArgs) { std::unordered_map remapping; @@ -324,4 +325,17 @@ std::vector collectVFSFiles(const SwiftExtractorConfiguration& conf return overlays; } +void lockOutputSwiftModuleTraps(const SwiftExtractorConfiguration& config, + const std::unordered_map& remapping) { + for (const auto& [oldPath, newPath] : remapping) { + if (llvm::StringRef(oldPath).endswith(".swiftmodule")) { + if (auto target = createTargetTrapFile(config, oldPath)) { + *target << "// trap file deliberately empty\n" + "// this swiftmodule was created during the build, so its entities must have" + " been extracted directly from source files"; + } + } + } +} + } // namespace codeql diff --git a/swift/extractor/SwiftOutputRewrite.h b/swift/extractor/SwiftOutputRewrite.h index b7ee7fa38293..94f1eeb6aaab 100644 --- a/swift/extractor/SwiftOutputRewrite.h +++ b/swift/extractor/SwiftOutputRewrite.h @@ -13,7 +13,7 @@ struct SwiftExtractorConfiguration; // artifacts produced by the actual Swift compiler. // Returns the map containing remapping oldpath -> newPath. std::unordered_map rewriteOutputsInPlace( - SwiftExtractorConfiguration& config, + const SwiftExtractorConfiguration& config, std::vector& CLIArgs); // Create directories for all the redirected new paths as the Swift compiler expects them to exist. @@ -29,4 +29,7 @@ void storeRemappingForVFS(const SwiftExtractorConfiguration& config, // This is separate from storeRemappingForVFS as we also collect files produced by other processes. std::vector collectVFSFiles(const SwiftExtractorConfiguration& config); +// Creates empty trap files for output swiftmodule files +void lockOutputSwiftModuleTraps(const SwiftExtractorConfiguration& config, + const std::unordered_map& remapping); } // namespace codeql diff --git a/swift/extractor/TargetTrapFile.cpp b/swift/extractor/TargetTrapFile.cpp new file mode 100644 index 000000000000..2275575ecfa7 --- /dev/null +++ b/swift/extractor/TargetTrapFile.cpp @@ -0,0 +1,23 @@ +#include "swift/extractor/TargetTrapFile.h" +#include +namespace codeql { +std::optional createTargetTrapFile(const SwiftExtractorConfiguration& configuration, + std::string_view target) { + std::string trap{target}; + trap += ".trap"; + auto ret = TargetFile::create(trap, configuration.trapDir, configuration.getTempTrapDir()); + if (ret) { + *ret << "/* extractor-args:\n"; + for (const auto& opt : configuration.frontendOptions) { + *ret << " " << std::quoted(opt) << " \\\n"; + } + *ret << "\n*/\n" + "/* swift-frontend-args:\n"; + for (const auto& opt : configuration.patchedFrontendOptions) { + *ret << " " << std::quoted(opt) << " \\\n"; + } + *ret << "\n*/\n"; + } + return ret; +} +} // namespace codeql diff --git a/swift/extractor/TargetTrapFile.h b/swift/extractor/TargetTrapFile.h new file mode 100644 index 000000000000..eb8de4c206f5 --- /dev/null +++ b/swift/extractor/TargetTrapFile.h @@ -0,0 +1,11 @@ +#pragma once + +#include "swift/extractor/infra/TargetFile.h" +#include "swift/extractor/SwiftExtractorConfiguration.h" + +namespace codeql { + +std::optional createTargetTrapFile(const SwiftExtractorConfiguration& configuration, + std::string_view target); + +} // namespace codeql diff --git a/swift/extractor/main.cpp b/swift/extractor/main.cpp index bde37b0ccb5e..5f9afef4e812 100644 --- a/swift/extractor/main.cpp +++ b/swift/extractor/main.cpp @@ -68,6 +68,7 @@ int main(int argc, char** argv) { codeql::rewriteOutputsInPlace(configuration, configuration.patchedFrontendOptions); codeql::ensureDirectoriesForNewPathsExist(remapping); codeql::storeRemappingForVFS(configuration, remapping); + codeql::lockOutputSwiftModuleTraps(configuration, remapping); std::vector args; for (auto& arg : configuration.patchedFrontendOptions) { diff --git a/swift/integration-tests/posix-only/partial-modules/Modules.expected b/swift/integration-tests/posix-only/partial-modules/Modules.expected index 4c738975f343..3cdcff9b9804 100644 --- a/swift/integration-tests/posix-only/partial-modules/Modules.expected +++ b/swift/integration-tests/posix-only/partial-modules/Modules.expected @@ -1,4 +1,5 @@ | file://:0:0:0:0 | A | | file://:0:0:0:0 | B | +| file://:0:0:0:0 | PackageDescription | | file://:0:0:0:0 | main | | file://:0:0:0:0 | partial_modules | diff --git a/swift/integration-tests/runner.py b/swift/integration-tests/runner.py index b9e39325fd96..77a62bfb81ba 100755 --- a/swift/integration-tests/runner.py +++ b/swift/integration-tests/runner.py @@ -62,6 +62,8 @@ def main(opts): ] if opts.check_databases: cmd.append("--check-databases") + else: + cmd.append("--no-check-databases") if opts.learn: cmd.append("--learn") cmd.extend(str(t.parent) for t in succesful_db_creation)