diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml index ae5ca2fc1b8e009..b3f7e8a5edc29f6 100644 --- a/.github/workflows/semgrep.yml +++ b/.github/workflows/semgrep.yml @@ -2,12 +2,15 @@ on: workflow_dispatch: {} schedule: - cron: "0 4 * * *" + pull_request: {} + name: Semgrep config permissions: contents: read + jobs: semgrep: - name: semgrep/ci + name: semgrep runs-on: ubuntu-latest env: SEMGREP_APP_TOKEN: ${{ secrets.SEMGREP_APP_TOKEN }} @@ -18,4 +21,35 @@ jobs: image: semgrep/semgrep steps: - uses: actions/checkout@v4 - - run: semgrep ci + with: + # fetch full history so Semgrep can compare against the base branch + fetch-depth: 0 + + # Semgrep CI to run on Schedule (Cron) or Manual Dispatch + # scans using managed rules at cloudflare.semgrep.dev + - name: Semgrep CI Rules (Managed rules at cloudflare.semgrep.dev) + if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' + run: semgrep ci + + # Semgrep Scan to run on Pull Request events + # scans using rules inside the .semgrep/ folder and fails on error + # include [skip semgrep] in top-most commit message to skip scan + - name: Semgrep Repo Rules (Custom rules found in .semgrep/) + if: github.event_name == 'pull_request' && !contains(github.event.head_commit.message, '[skip semgrep]') + run: | + + git config --global --add safe.directory $PWD + base_commit=$(git merge-base HEAD origin/$GITHUB_BASE_REF) + git diff $base_commit... --diff-filter=ACMRT --name-only | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > tools/relevant_changed_files.txt || true + + # Check if file list is empty to prevent errors + if [ -s tools/relevant_changed_files.txt ]; then + list_of_files=$(cat tools/relevant_changed_files.txt | tr '\n' ' ') + semgrep scan \ + --config .semgrep --metrics=off \ + --include "*.mdx" --include "*.mdx" \ + $list_of_files + # add '--error' to return error code to workflow + else + echo "No relevant files changed." + fi diff --git a/.gitignore b/.gitignore index a62898367df8949..8dfacd8c570751a 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,5 @@ pnpm-debug.log* /worker/functions/ .idea + +tools/relevant_changed_files.txt diff --git a/.semgrep/dates-in-docs.yaml b/.semgrep/dates-in-docs.yaml new file mode 100644 index 000000000000000..fe7be10339392af --- /dev/null +++ b/.semgrep/dates-in-docs.yaml @@ -0,0 +1,41 @@ +rules: + - id: coming-soon + languages: [generic] + message: "Found forbidden string 'coming soon'. Too often we set expectations unfairly by attaching this phrase to a feature that may not actually arrive soon." + severity: MEDIUM + paths: + include: + - "*.htm" + - "*.html" + - "*.md" + - "*.mdx" + - "*.yaml" + - "*.yml" + exclude: + - "/src/content/changelog/**" + - "/src/content/release-notes/**" + - "/.semgrep/**" + - "/.github/**" + patterns: + - pattern-regex: "[Cc]oming [Ss]oon" + + - id: potential-date + languages: [generic] + message: "Potential date found. Documentation should strive to represent universal truth, not something time-bound." + severity: MEDIUM + paths: + include: + - "*.htm" + - "*.html" + - "*.md" + - "*.mdx" + - "*.yaml" + - "*.yml" + exclude: + - "/src/content/changelog/**" + - "/src/content/release-notes/**" + - "/.semgrep/**" + - "/.github/**" + pattern-either: + - pattern-regex: Jan\| Feb\| Mar\| Apr\| May\| Jun\| Jul\| Aug\| Sep\| Nov\| Dec + - pattern-regex: \ 20[0-9][0-9] diff --git a/tools/semgrep-repo-rules b/tools/semgrep-repo-rules new file mode 100755 index 000000000000000..b7eeac6a76526a9 --- /dev/null +++ b/tools/semgrep-repo-rules @@ -0,0 +1,26 @@ +#! /bin/bash + +repo_root_dir="$(git rev-parse --show-toplevel)" + +pushd "${repo_root_dir}" > /dev/null || return + +base_commit=$(git merge-base HEAD origin/production) +git diff $base_commit... --diff-filter=ACMRT --name-only | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > tools/relevant_changed_files.txt || true + +# this file wants to also match uncommitted changes, not just commited changes (in CI this is not the case) +git diff --diff-filter=ACMRT --name-only | grep -E '\.(htm|html|yaml|yml|md|mdx)$' >> tools/relevant_changed_files.txt || true + +if [ -s tools/relevant_changed_files.txt ]; then + list_of_files=$(cat tools/relevant_changed_files.txt | tr '\n' ' ') + + docker run --rm -v "${PWD}:/src" semgrep/semgrep \ + semgrep scan \ + --config .semgrep --metrics=off \ + --include "*.mdx" --include "*.mdx" \ + --force-color \ + $list_of_files +else + echo "No relevant files changed." +fi + +popd > /dev/null || return