From 0151acd02f7490a3a1e0d074bdd9535ffa147795 Mon Sep 17 00:00:00 2001 From: Krzysztof Modras Date: Tue, 21 Apr 2026 11:26:14 +0200 Subject: [PATCH] Add validate-dnr-rules CLI tool with cross-platform CI Standalone tool for validating Declarative Net Request rulesets against WebKit's content extension translator. Ships prebuilt Linux x64 and macOS arm64 binaries from a GitHub Actions release workflow so Ghostery engineers can vet rulesets without a full WebKit build. --- .github/workflows/validate-dnr-rules.yml | 133 ++++++++++++++ CMakeLists.txt | 7 + Tools/Scripts/validate-dnr-rules | 195 +++++++++++++++++++++ ghostery/.gitignore | 1 + ghostery/validate-dnr-rules/CMakeLists.txt | 64 +++++++ ghostery/validate-dnr-rules/src/config.h | 6 + ghostery/validate-dnr-rules/src/main.cpp | 155 ++++++++++++++++ ghostery/validate-dnr-rules/src/stubs.cpp | 49 ++++++ 8 files changed, 610 insertions(+) create mode 100644 .github/workflows/validate-dnr-rules.yml create mode 100755 Tools/Scripts/validate-dnr-rules create mode 100644 ghostery/.gitignore create mode 100644 ghostery/validate-dnr-rules/CMakeLists.txt create mode 100644 ghostery/validate-dnr-rules/src/config.h create mode 100644 ghostery/validate-dnr-rules/src/main.cpp create mode 100644 ghostery/validate-dnr-rules/src/stubs.cpp diff --git a/.github/workflows/validate-dnr-rules.yml b/.github/workflows/validate-dnr-rules.yml new file mode 100644 index 000000000000..a97a3b3ce9eb --- /dev/null +++ b/.github/workflows/validate-dnr-rules.yml @@ -0,0 +1,133 @@ +name: Build validate-dnr-rules + +on: + push: + branches: [ghostery] + pull_request: + branches: [ghostery] + workflow_dispatch: + +jobs: + build: + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-24.04 + name: linux-x64 + deps: cmake ninja-build pkg-config ruby unifdef libicu-dev g++ perl python3 + - os: macos-15 + name: macos-arm64 + deps: cmake ninja icu4c pkg-config + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Install dependencies (Linux) + if: runner.os == 'Linux' + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends ${{ matrix.deps }} + + - name: Install dependencies (macOS) + if: runner.os == 'macOS' + run: brew install ${{ matrix.deps }} + + - name: Cache CMake build + uses: actions/cache@v4 + with: + path: build + key: cmake-${{ matrix.name }}-${{ hashFiles('Source/WTF/**', 'Source/WebCore/contentextensions/**', 'ghostery/validate-dnr-rules/**') }} + restore-keys: | + cmake-${{ matrix.name }}- + + - name: Configure + run: | + cmake -B build -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DPORT=JSCOnly \ + -DUSE_SYSTEM_MALLOC=ON \ + . + env: + CMAKE_PREFIX_PATH: ${{ runner.os == 'macOS' && '/opt/homebrew/opt/icu4c' || '' }} + + - name: Build + run: cmake --build build --target validate-dnr-rules + + - name: Test + run: | + cat > /tmp/valid-rules.json << 'RULES' + [ + {"id":1,"priority":1,"action":{"type":"block"},"condition":{"regexFilter":"ads\\.example\\.com"}}, + {"id":2,"priority":1,"action":{"type":"block"},"condition":{"urlFilter":"||tracker.example.com^"}} + ] + RULES + ./build/bin/validate-dnr-rules /tmp/valid-rules.json + + cat > /tmp/invalid-rules.json << 'RULES' + [ + {"id":1,"priority":1,"action":{"type":"block"},"condition":{"regexFilter":"ad[0-9]{2}\\.js"}}, + {"id":2,"priority":1,"action":{"type":"block"},"condition":{"regexFilter":"(?:ads|tracking)\\.com"}} + ] + RULES + if ./build/bin/validate-dnr-rules /tmp/invalid-rules.json; then + echo "Expected validator to exit non-zero for invalid rules" + exit 1 + fi + + - name: Sign binary (macOS) + if: runner.os == 'macOS' + run: codesign --sign - --force build/bin/validate-dnr-rules + + - name: Prepare artifact + run: | + cp build/bin/validate-dnr-rules validate-dnr-rules-${{ matrix.name }} + chmod +x validate-dnr-rules-${{ matrix.name }} + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: validate-dnr-rules-${{ matrix.name }} + path: validate-dnr-rules-${{ matrix.name }} + + release: + needs: build + if: github.event_name == 'push' && github.ref == 'refs/heads/ghostery' + runs-on: ubuntu-latest + permissions: + contents: write + + steps: + - name: Download all artifacts + uses: actions/download-artifact@v4 + with: + path: artifacts + + - name: Create release + env: + GH_TOKEN: ${{ github.token }} + GH_REPO: ${{ github.repository }} + run: | + TAG="validate-dnr-rules-$(date +%Y%m%d)-${GITHUB_SHA::8}" + + # Delete existing release with same tag if re-running + gh release delete "$TAG" --yes 2>/dev/null || true + + gh release create "$TAG" \ + --title "validate-dnr-rules $(date +%Y-%m-%d)" \ + --notes "Automated build from commit ${GITHUB_SHA::8}. + + ## Downloads + - **Linux x64**: \`validate-dnr-rules-linux-x64\` + - **macOS arm64**: \`validate-dnr-rules-macos-arm64\` (Intel Macs: run via Rosetta) + + ## Usage + \`\`\` + chmod +x validate-dnr-rules-* + ./validate-dnr-rules-linux-x64 path/to/dnr-rules.json + \`\`\`" \ + artifacts/validate-dnr-rules-linux-x64/validate-dnr-rules-linux-x64 \ + artifacts/validate-dnr-rules-macos-arm64/validate-dnr-rules-macos-arm64 diff --git a/CMakeLists.txt b/CMakeLists.txt index e3a915b46e63..31ac73a37c32 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,6 +53,13 @@ if (DEVELOPER_MODE) add_subdirectory(PerformanceTests) endif () +# ----------------------------------------------------------------------------- +# Ghostery tools +# ----------------------------------------------------------------------------- +if (EXISTS "${CMAKE_SOURCE_DIR}/ghostery/validate-dnr-rules/CMakeLists.txt") + add_subdirectory(ghostery/validate-dnr-rules) +endif () + # ----------------------------------------------------------------------------- # Print the features list last, for maximum visibility. # ----------------------------------------------------------------------------- diff --git a/Tools/Scripts/validate-dnr-rules b/Tools/Scripts/validate-dnr-rules new file mode 100755 index 000000000000..6c13622ed42c --- /dev/null +++ b/Tools/Scripts/validate-dnr-rules @@ -0,0 +1,195 @@ +#!/bin/bash +# validate-dnr-rules - Validate Declarative Net Request rulesets using WebKit's translator +# +# Usage: validate-dnr-rules [--compile] [ ...] +# +# Runs DNR rule files through WebKit's translation pipeline and reports errors. +# With --compile, also compiles translated rules to content blocker bytecode. +# +# Requires: WebKit built with Tools/Scripts/build-webkit --debug + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +SOURCE_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +COMPILE=0 +FILES=() + +for arg in "$@"; do + case "$arg" in + --compile) COMPILE=1 ;; + --help|-h) + echo "Usage: validate-dnr-rules [--compile] [ ...]" + echo "" + echo "Validates DNR rulesets using WebKit's native translator pipeline." + echo "Reports translation errors (unsupported regex, invalid rules, etc)." + echo "" + echo "Options:" + echo " --compile Also compile translated rules to content blocker bytecode" + echo " --help Show this help" + exit 0 + ;; + *) FILES+=("$arg") ;; + esac +done + +if [ ${#FILES[@]} -eq 0 ]; then + echo "Error: No input files specified. Use --help for usage." >&2 + exit 1 +fi + +BUILD_DIR="$("$SCRIPT_DIR/webkit-build-directory" --configuration=Debug --top-level 2>/dev/null || true)" +if [ -z "$BUILD_DIR" ]; then + BUILD_DIR="$SOURCE_ROOT/WebKitBuild" +fi + +FRAMEWORK_DIR="$BUILD_DIR/Debug" + +if [ ! -d "$FRAMEWORK_DIR/WebKit.framework" ]; then + echo "Error: WebKit.framework not found at $FRAMEWORK_DIR" >&2 + echo "Build WebKit first: Tools/Scripts/build-webkit --debug" >&2 + exit 1 +fi + +TOOL_SRC=$(mktemp /tmp/validate-dnr-XXXXXX.mm) +TOOL_BIN=$(mktemp /tmp/validate-dnr-XXXXXX) + +trap "rm -f '$TOOL_SRC' '$TOOL_BIN'" EXIT + +cat > "$TOOL_SRC" << 'OBJC_SOURCE' +#import +#import +#import +#import +#import + +int main(int argc, const char *argv[]) { + @autoreleasepool { + BOOL doCompile = NO; + NSMutableArray *files = [NSMutableArray array]; + + for (int i = 1; i < argc; i++) { + NSString *arg = [NSString stringWithUTF8String:argv[i]]; + if ([arg isEqualToString:@"--compile"]) + doCompile = YES; + else + [files addObject:arg]; + } + + int totalErrors = 0; + + for (NSString *filePath in files) { + NSData *data = [NSData dataWithContentsOfFile:filePath]; + if (!data) { + fprintf(stderr, "ERROR: Cannot read file: %s\n", filePath.UTF8String); + totalErrors++; + continue; + } + + NSString *rulesetID = [[filePath lastPathComponent] stringByDeletingPathExtension]; + NSDictionary *jsonDataDict = @{ rulesetID: data }; + + printf("=== %s ===\n", filePath.UTF8String); + printf("File size: %lu bytes\n", (unsigned long)data.length); + + NSArray *jsonErrors = nil; + NSDictionary *allJSONObjects = [_WKWebExtensionDeclarativeNetRequestTranslator jsonObjectsFromData:jsonDataDict errorStrings:&jsonErrors]; + + if (jsonErrors.count > 0) { + printf("JSON deserialization errors: %lu\n", (unsigned long)jsonErrors.count); + for (NSString *error in jsonErrors) { + printf(" ERROR: %s\n", error.UTF8String); + totalErrors++; + } + } + + NSUInteger ruleCount = 0; + for (NSString *key in allJSONObjects) + ruleCount += [allJSONObjects[key] count]; + printf("Rules parsed: %lu\n", (unsigned long)ruleCount); + + NSArray *translationErrors = nil; + NSArray *convertedRules = [_WKWebExtensionDeclarativeNetRequestTranslator translateRules:allJSONObjects errorStrings:&translationErrors]; + + printf("Rules translated: %lu\n", (unsigned long)convertedRules.count); + + if (translationErrors.count > 0) { + printf("Translation errors: %lu\n", (unsigned long)translationErrors.count); + for (NSString *error in translationErrors) { + printf(" ERROR: %s\n", error.UTF8String); + totalErrors++; + } + } + + if (doCompile && convertedRules.count > 0) { + NSError *jsonSerializationError = nil; + NSData *jsonData = [NSJSONSerialization dataWithJSONObject:convertedRules options:0 error:&jsonSerializationError]; + if (jsonSerializationError) { + printf(" ERROR: JSON serialization failed: %s\n", jsonSerializationError.localizedDescription.UTF8String); + totalErrors++; + } else { + NSString *jsonString = [[NSString alloc] initWithData:jsonData encoding:NSUTF8StringEncoding]; + printf("Content blocker JSON: %lu bytes\n", (unsigned long)jsonData.length); + printf("Compiling..."); + fflush(stdout); + + __block BOOL done = NO; + __block BOOL success = NO; + __block NSString *compilationError = nil; + + NSDate *startTime = [NSDate date]; + + [[WKContentRuleListStore defaultStore] compileContentRuleListForIdentifier:rulesetID encodedContentRuleList:jsonString completionHandler:^(WKContentRuleList *ruleList, NSError *error) { + success = (ruleList != nil); + if (error) + compilationError = error.localizedDescription; + done = YES; + }]; + + while (!done) + [[NSRunLoop currentRunLoop] runMode:NSDefaultRunLoopMode beforeDate:[NSDate dateWithTimeIntervalSinceNow:0.1]]; + + double elapsed = -[startTime timeIntervalSinceNow]; + + if (success) + printf(" OK (%.1f seconds)\n", elapsed); + else { + printf(" FAILED (%.1f seconds): %s\n", elapsed, compilationError.UTF8String); + totalErrors++; + } + + [[WKContentRuleListStore defaultStore] removeContentRuleListForIdentifier:rulesetID completionHandler:^(NSError *error) {}]; + } + } + + printf("\n"); + } + + if (totalErrors) + printf("FAILED: %d error(s) found.\n", totalErrors); + else + printf("OK: All rules validated successfully.\n"); + + return totalErrors ? 1 : 0; + } +} +OBJC_SOURCE + +clang -ObjC++ -std=c++20 -fobjc-arc -w \ + -F"$FRAMEWORK_DIR" \ + -framework WebKit -framework Foundation \ + -lc++ \ + -Wl,-rpath,"$FRAMEWORK_DIR" \ + -o "$TOOL_BIN" "$TOOL_SRC" + +ARGS=() +if [ "$COMPILE" -eq 1 ]; then + ARGS+=(--compile) +fi + +for f in "${FILES[@]}"; do + ARGS+=("$(cd "$(dirname "$f")" && pwd)/$(basename "$f")") +done + +DYLD_FRAMEWORK_PATH="$FRAMEWORK_DIR" "$TOOL_BIN" "${ARGS[@]}" diff --git a/ghostery/.gitignore b/ghostery/.gitignore new file mode 100644 index 000000000000..629c2cfebf23 --- /dev/null +++ b/ghostery/.gitignore @@ -0,0 +1 @@ +validate-dnr-rules/build/ diff --git a/ghostery/validate-dnr-rules/CMakeLists.txt b/ghostery/validate-dnr-rules/CMakeLists.txt new file mode 100644 index 000000000000..6e461ed55de8 --- /dev/null +++ b/ghostery/validate-dnr-rules/CMakeLists.txt @@ -0,0 +1,64 @@ +# Included from the WebKit root CMakeLists.txt; requires the WTF target. +# Build: cmake -B build -G Ninja -DPORT=JSCOnly -DUSE_SYSTEM_MALLOC=ON . +# cmake --build build --target validate-dnr-rules + +set(CE_DIR "${CMAKE_SOURCE_DIR}/Source/WebCore/contentextensions") +set(JSC_DIR "${CMAKE_SOURCE_DIR}/Source/JavaScriptCore") +set(TOOL_DIR "${CMAKE_CURRENT_SOURCE_DIR}") + +set(CE_SOURCES + ${CE_DIR}/URLFilterParser.cpp + ${CE_DIR}/CombinedURLFilters.cpp + ${CE_DIR}/CombinedFiltersAlphabet.cpp + ${CE_DIR}/NFA.cpp +) + +set(TOOL_SOURCES + ${TOOL_DIR}/src/main.cpp + ${TOOL_DIR}/src/stubs.cpp +) + +add_executable(validate-dnr-rules ${TOOL_SOURCES} ${CE_SOURCES}) + +target_compile_definitions(validate-dnr-rules PRIVATE + WEBCORE_EXPORT= + STATICALLY_LINKED_WITH_WTF=1 +) + +target_include_directories(validate-dnr-rules PRIVATE + ${TOOL_DIR}/src + ${CE_DIR} + ${WTF_DIR} + ${WTF_DIR}/wtf + ${CMAKE_BINARY_DIR} + ${WTF_DERIVED_SOURCES_DIR} +) + +# Create include bridge so and resolve. +# WebKit headers use flat but files live in subdirs. +set(INCLUDE_BRIDGE_DIR "${CMAKE_BINARY_DIR}/validate-dnr-includes") +file(MAKE_DIRECTORY "${INCLUDE_BRIDGE_DIR}/WebCore") +file(MAKE_DIRECTORY "${INCLUDE_BRIDGE_DIR}/JavaScriptCore") + +file(GLOB CE_HEADERS "${CE_DIR}/*.h") +foreach(H ${CE_HEADERS}) + get_filename_component(HNAME ${H} NAME) + file(CREATE_LINK "${H}" "${INCLUDE_BRIDGE_DIR}/WebCore/${HNAME}" SYMBOLIC) +endforeach() + +file(GLOB_RECURSE JSC_HEADERS "${JSC_DIR}/*.h") +foreach(H ${JSC_HEADERS}) + get_filename_component(HNAME ${H} NAME) + if(NOT EXISTS "${INCLUDE_BRIDGE_DIR}/JavaScriptCore/${HNAME}") + file(CREATE_LINK "${H}" "${INCLUDE_BRIDGE_DIR}/JavaScriptCore/${HNAME}" SYMBOLIC) + endif() +endforeach() + +target_include_directories(validate-dnr-rules SYSTEM PRIVATE + ${INCLUDE_BRIDGE_DIR} +) + +target_link_libraries(validate-dnr-rules PRIVATE WTF) + +find_package(ICU REQUIRED COMPONENTS uc) +target_link_libraries(validate-dnr-rules PRIVATE ICU::uc) diff --git a/ghostery/validate-dnr-rules/src/config.h b/ghostery/validate-dnr-rules/src/config.h new file mode 100644 index 000000000000..a743a60e7255 --- /dev/null +++ b/ghostery/validate-dnr-rules/src/config.h @@ -0,0 +1,6 @@ +#pragma once + +#include + +#undef ENABLE_CONTENT_EXTENSIONS +#define ENABLE_CONTENT_EXTENSIONS 1 diff --git a/ghostery/validate-dnr-rules/src/main.cpp b/ghostery/validate-dnr-rules/src/main.cpp new file mode 100644 index 000000000000..86caa20d5d88 --- /dev/null +++ b/ghostery/validate-dnr-rules/src/main.cpp @@ -0,0 +1,155 @@ +#include "config.h" + +#include +#include +#include +#include +#include +#include + +#include +#include + +using namespace WebCore::ContentExtensions; + +// Mirrors -[_WKWebExtensionDeclarativeNetRequestRule _regexURLFilterForChromeURLFilter:]: +// strip `||`/`|` anchors, escape regex metachars, expand `*` and `^`, re-apply anchors. +static String regexFromURLFilter(String urlFilter) +{ + bool hasDomainAnchor = urlFilter.startsWith("||"_s); + if (hasDomainAnchor) + urlFilter = urlFilter.substring(2); + + bool hasStartAnchor = !hasDomainAnchor && urlFilter.startsWith('|'); + if (hasStartAnchor) + urlFilter = urlFilter.substring(1); + + bool hasEndAnchor = urlFilter.endsWith('|'); + if (hasEndAnchor) + urlFilter = urlFilter.left(urlFilter.length() - 1); + + StringBuilder escaped; + for (unsigned i = 0; i < urlFilter.length(); ++i) { + char16_t c = urlFilter[i]; + switch (c) { + case '?': case '+': case '[': case '(': case ')': + case '{': case '}': case '$': case '|': case '\\': case '.': + escaped.append('\\'); + } + escaped.append(c); + } + String regex = escaped.toString(); + regex = makeStringByReplacingAll(regex, '*', ".*"_s); + regex = makeStringByReplacingAll(regex, '^', "[^a-zA-Z0-9_.%-]"_s); + + if (hasDomainAnchor) + regex = makeString("^[^:]+://+([^:/]+\\.)?"_s, regex); + if (hasStartAnchor) + regex = makeString('^', regex); + if (hasEndAnchor) + regex = makeString(regex, '$'); + + return regex; +} + +static int validateFile(const char* path) +{ + FILE* fp = fopen(path, "rb"); + if (!fp) { + fprintf(stderr, "ERROR: Cannot open file: %s\n", path); + return 1; + } + + fseek(fp, 0, SEEK_END); + long fileSize = ftell(fp); + fseek(fp, 0, SEEK_SET); + + std::string contents(fileSize, '\0'); + fread(contents.data(), 1, fileSize, fp); + fclose(fp); + + auto jsonString = String::fromUTF8(std::span(reinterpret_cast(contents.data()), contents.size())); + auto jsonValue = JSON::Value::parseJSON(jsonString); + if (!jsonValue) { + fprintf(stderr, "ERROR: Invalid JSON: %s\n", path); + return 1; + } + + auto rulesArray = jsonValue->asArray(); + if (!rulesArray) { + fprintf(stderr, "ERROR: JSON is not an array: %s\n", path); + return 1; + } + + printf("=== %s ===\nRules: %zu\n", path, rulesArray->length()); + + int errors = 0; + int valid = 0; + int total = 0; + + for (size_t i = 0; i < rulesArray->length(); ++i) { + auto ruleObject = rulesArray->get(i)->asObject(); + if (!ruleObject) + continue; + + total++; + + auto conditionObject = ruleObject->getObject("condition"_s); + if (!conditionObject) { + valid++; + continue; + } + + String pattern = conditionObject->getString("regexFilter"_s); + const char* kind = "regex"; + String original = pattern; + if (pattern.isEmpty()) { + original = conditionObject->getString("urlFilter"_s); + if (original.isEmpty()) { + valid++; + continue; + } + pattern = regexFromURLFilter(original); + kind = "url"; + } + + bool caseSensitive = conditionObject->getBoolean("isUrlFilterCaseSensitive"_s).value_or(false); + + CombinedURLFilters combinedFilters; + URLFilterParser parser(combinedFilters); + auto status = parser.addPattern(pattern, caseSensitive, 0); + + if (status == URLFilterParser::Ok || status == URLFilterParser::MatchesEverything) { + valid++; + continue; + } + + errors++; + auto ruleId = ruleObject->getInteger("id"_s).value_or(-1); + printf(" ERROR: Rule %d [%s]: %s — %s\n", static_cast(ruleId), kind, + URLFilterParser::statusString(status).characters(), + original.utf8().data()); + } + + printf("Valid: %d/%d\n\n", valid, total); + return errors; +} + +int main(int argc, const char* argv[]) +{ + if (argc < 2) { + fprintf(stderr, "Usage: validate-dnr-rules [...]\n"); + return 1; + } + + int totalErrors = 0; + for (int i = 1; i < argc; i++) + totalErrors += validateFile(argv[i]); + + if (totalErrors) + printf("FAILED: %d error(s) found.\n", totalErrors); + else + printf("OK: All rules validated successfully.\n"); + + return totalErrors ? 1 : 0; +} diff --git a/ghostery/validate-dnr-rules/src/stubs.cpp b/ghostery/validate-dnr-rules/src/stubs.cpp new file mode 100644 index 000000000000..ed07bc7ff36a --- /dev/null +++ b/ghostery/validate-dnr-rules/src/stubs.cpp @@ -0,0 +1,49 @@ +#include "config.h" +#include +#include +#include + +// Gigacage/bmalloc stubs — not needed when using system malloc +#include +#include +#include + +namespace Gigacage { +void ensureGigacage() { } +} + +extern "C" __attribute__((visibility("default"))) bool disablePrimitiveGigacageRequested = false; + +namespace bmalloc::api { +void commitAlignedPhysical(void*, size_t, HeapKind) { } +void decommitAlignedPhysical(void*, size_t, HeapKind) { } +void disableScavenger() { } +void enableMiniMode(bool) { } +void forceEnablePGM(unsigned short) { } +void freeLargeVirtual(void*, size_t, HeapKind) { } +bool isEnabled(HeapKind) { return false; } +void scavenge() { } +void scavengeThisThread() { } +void* tryLargeZeroedMemalignVirtual(size_t, size_t, CompactAllocationMode, HeapKind) { return nullptr; } +} + +// YARR Unicode property stubs — only needed for \p{} property +// escapes which DNR regexFilter patterns never use. +namespace JSC::Yarr { + +bool characterClassMayContainStrings(BuiltInCharacterClassID) +{ + return false; +} + +std::optional unicodeMatchProperty(WTF::String, CompileMode) +{ + return std::nullopt; +} + +std::optional unicodeMatchPropertyValue(WTF::String, WTF::String) +{ + return std::nullopt; +} + +} // namespace JSC::Yarr