From 62977dd6d96d926cc6a9eb94cf8b248d13bfdc6d Mon Sep 17 00:00:00 2001 From: Cefan Rubin Date: Thu, 20 Nov 2025 11:26:09 +0000 Subject: [PATCH 01/14] Try out an example custom semgrep stored in repo for scanning against pull requests --- .github/workflows/semgrep.yml | 19 +++++++++++++++++-- .semgrep/dates-in-docs.yaml | 18 ++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) create mode 100644 .semgrep/dates-in-docs.yaml diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml index ae5ca2fc1b8e009..7e6c1a5496bcb1e 100644 --- a/.github/workflows/semgrep.yml +++ b/.github/workflows/semgrep.yml @@ -2,12 +2,15 @@ on: workflow_dispatch: {} schedule: - cron: "0 4 * * *" + pull_request: {} + name: Semgrep config permissions: contents: read + jobs: semgrep: - name: semgrep/ci + name: semgrep runs-on: ubuntu-latest env: SEMGREP_APP_TOKEN: ${{ secrets.SEMGREP_APP_TOKEN }} @@ -18,4 +21,16 @@ jobs: image: semgrep/semgrep steps: - uses: actions/checkout@v4 - - run: semgrep ci + + # Semgrep CI to run on Schedule (Cron) or Manual Dispatch + # scans using managed rules at cloudflare.semgrep.dev + - name: Semgrep CI (Managed at cloudflare.semgrep.dev) + if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' + run: semgrep ci + + # Semgrep Scan to run on Pull Request events + # scans using rules inside the .semgrep/ folder and fails on error + - name: Semgrep Local (Custom rules found in .semgrep) + if: github.event_name == 'pull_request' + # add '--error' below to return error code to workflow + run: semgrep scan --config .semgrep --include "*.mdx" --include "*.mdx" diff --git a/.semgrep/dates-in-docs.yaml b/.semgrep/dates-in-docs.yaml new file mode 100644 index 000000000000000..4bb6acbdba187d3 --- /dev/null +++ b/.semgrep/dates-in-docs.yaml @@ -0,0 +1,18 @@ +rules: + - id: coming-soon + languages: [html, yaml, generic] + message: "Found the forbidden string 'coming soon'" + severity: ERROR + paths: + include: + - "*.htm" + - "*.html" + - "*.md" + - "*.mdx" + - "*.yaml" + - "*.yml" + exclude: + - "src/content/changelog/**" + - "src/content/release-notes/**" + patterns: + - pattern-regex: "[Cc]oming [Ss]oon" From e21c4e162c0df37d288c9eb4643bb82b068812fc Mon Sep 17 00:00:00 2001 From: Cefan Rubin Date: Thu, 20 Nov 2025 11:36:15 +0000 Subject: [PATCH 02/14] disable metrics and root path to avoid warnings --- .github/workflows/semgrep.yml | 2 +- .semgrep/dates-in-docs.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml index 7e6c1a5496bcb1e..a42010eabbf7793 100644 --- a/.github/workflows/semgrep.yml +++ b/.github/workflows/semgrep.yml @@ -33,4 +33,4 @@ jobs: - name: Semgrep Local (Custom rules found in .semgrep) if: github.event_name == 'pull_request' # add '--error' below to return error code to workflow - run: semgrep scan --config .semgrep --include "*.mdx" --include "*.mdx" + run: semgrep scan --config .semgrep --metrics=off --include "*.mdx" --include "*.mdx" diff --git a/.semgrep/dates-in-docs.yaml b/.semgrep/dates-in-docs.yaml index 4bb6acbdba187d3..31f19f6467e5e48 100644 --- a/.semgrep/dates-in-docs.yaml +++ b/.semgrep/dates-in-docs.yaml @@ -12,7 +12,7 @@ rules: - "*.yaml" - "*.yml" exclude: - - "src/content/changelog/**" - - "src/content/release-notes/**" + - "/src/content/changelog/**" + - "/src/content/release-notes/**" patterns: - pattern-regex: "[Cc]oming [Ss]oon" From 159cbccd18a755b6d352e80b03d6898a68f3cbff Mon Sep 17 00:00:00 2001 From: Cefan Rubin Date: Thu, 20 Nov 2025 11:43:08 +0000 Subject: [PATCH 03/14] shifting this rule to only use the generic parser --- .semgrep/dates-in-docs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.semgrep/dates-in-docs.yaml b/.semgrep/dates-in-docs.yaml index 31f19f6467e5e48..f6683a7bb640a59 100644 --- a/.semgrep/dates-in-docs.yaml +++ b/.semgrep/dates-in-docs.yaml @@ -1,6 +1,6 @@ rules: - id: coming-soon - languages: [html, yaml, generic] + languages: [generic] message: "Found the forbidden string 'coming soon'" severity: ERROR paths: From f5fbe9fef2a532c9daa7782ff4d5cb82c6f59c11 Mon Sep 17 00:00:00 2001 From: Cefan Rubin Date: Thu, 20 Nov 2025 12:15:01 +0000 Subject: [PATCH 04/14] include a way to skip the local semgrep scan by including [skip semgrep] in commit message --- .github/workflows/semgrep.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml index a42010eabbf7793..98a9649ac07e996 100644 --- a/.github/workflows/semgrep.yml +++ b/.github/workflows/semgrep.yml @@ -30,7 +30,8 @@ jobs: # Semgrep Scan to run on Pull Request events # scans using rules inside the .semgrep/ folder and fails on error + # include [skip semgrep] in top-most commit message to skip scan - name: Semgrep Local (Custom rules found in .semgrep) - if: github.event_name == 'pull_request' + if: github.event_name == 'pull_request' && !contains(github.event.head_commit.message, '[skip semgrep]') # add '--error' below to return error code to workflow run: semgrep scan --config .semgrep --metrics=off --include "*.mdx" --include "*.mdx" From fd30430d05e668f89cb70c270b8b25d8b7cdc35c Mon Sep 17 00:00:00 2001 From: Cefan Rubin Date: Thu, 20 Nov 2025 15:21:16 +0000 Subject: [PATCH 05/14] Trying to restrict the checks to the changes made for this PR only --- .github/workflows/semgrep.yml | 21 +++++++++++++++++---- .semgrep/dates-in-docs.yaml | 23 +++++++++++++++++++++-- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml index 98a9649ac07e996..7bc8480cfa15ded 100644 --- a/.github/workflows/semgrep.yml +++ b/.github/workflows/semgrep.yml @@ -24,14 +24,27 @@ jobs: # Semgrep CI to run on Schedule (Cron) or Manual Dispatch # scans using managed rules at cloudflare.semgrep.dev - - name: Semgrep CI (Managed at cloudflare.semgrep.dev) + - name: Semgrep CI Rules (Managed rules at cloudflare.semgrep.dev) if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' run: semgrep ci # Semgrep Scan to run on Pull Request events # scans using rules inside the .semgrep/ folder and fails on error # include [skip semgrep] in top-most commit message to skip scan - - name: Semgrep Local (Custom rules found in .semgrep) + - name: Semgrep Repo Rules (Custom rules found in .semgrep/) if: github.event_name == 'pull_request' && !contains(github.event.head_commit.message, '[skip semgrep]') - # add '--error' below to return error code to workflow - run: semgrep scan --config .semgrep --metrics=off --include "*.mdx" --include "*.mdx" + run: | + git fetch origin ${{ github.base_ref }} + + git diff --name-only origin/${{ github.base_ref }} HEAD --diff-filter=ACMRT | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > changed_files.txt || true + + # Check if file list is empty to prevent errors + if [ -s changed_files.txt ]; then + semgrep scan --config .semgrep --metrics=off \ + --include "*.mdx" --include "*.mdx" \ + --targets changed_files.txt + # add '--error' to return error code to workflow + semgrep scan --config .semgrep --error --targets changed_files.txt + else + echo "No relevant files changed." + fi diff --git a/.semgrep/dates-in-docs.yaml b/.semgrep/dates-in-docs.yaml index f6683a7bb640a59..c53bab0d5a22d27 100644 --- a/.semgrep/dates-in-docs.yaml +++ b/.semgrep/dates-in-docs.yaml @@ -1,8 +1,8 @@ rules: - id: coming-soon languages: [generic] - message: "Found the forbidden string 'coming soon'" - severity: ERROR + message: "Found forbidden string 'coming soon'. Too often we set expectations unfairly by attaching this phrase to a feature that is cancelled or a plan that changes." + severity: MEDIUM paths: include: - "*.htm" @@ -16,3 +16,22 @@ rules: - "/src/content/release-notes/**" patterns: - pattern-regex: "[Cc]oming [Ss]oon" + + - id: potential-date + languages: [generic] + message: "Potential date found. Documentation should strive to represent universal truth, not something time-bound." + severity: MEDIUM + paths: + include: + - "*.htm" + - "*.html" + - "*.md" + - "*.mdx" + - "*.yaml" + - "*.yml" + exclude: + - "/src/content/changelog/**" + - "/src/content/release-notes/**" + pattern-either: + - pattern-regex: Jan\| Feb\| Mar\| Apr\| May\| Jun\| Jul\| Aug\| Sep\| Nov\| Dec + - pattern-regex: \ 20[0-9][0-9] From 7c822f99d92667a1178cdb590dbeee480edef0cf Mon Sep 17 00:00:00 2001 From: Cefan Rubin Date: Thu, 20 Nov 2025 15:28:05 +0000 Subject: [PATCH 06/14] Trying again but with fetch-depth of 0 to get all of the history --- .github/workflows/semgrep.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml index 7bc8480cfa15ded..17aad48e3769973 100644 --- a/.github/workflows/semgrep.yml +++ b/.github/workflows/semgrep.yml @@ -21,6 +21,9 @@ jobs: image: semgrep/semgrep steps: - uses: actions/checkout@v4 + with: + # fetch full history so Semgrep can compare against the base branch + fetch-depth: 0 # Semgrep CI to run on Schedule (Cron) or Manual Dispatch # scans using managed rules at cloudflare.semgrep.dev From d3a2ea5c5b6af3fe633478272a400e2dace33310 Mon Sep 17 00:00:00 2001 From: Cefan Rubin Date: Thu, 20 Nov 2025 15:31:56 +0000 Subject: [PATCH 07/14] Checkout path seems to be wrong, tryig without the fetch --- .github/workflows/semgrep.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml index 17aad48e3769973..5c43b80ee461300 100644 --- a/.github/workflows/semgrep.yml +++ b/.github/workflows/semgrep.yml @@ -37,7 +37,7 @@ jobs: - name: Semgrep Repo Rules (Custom rules found in .semgrep/) if: github.event_name == 'pull_request' && !contains(github.event.head_commit.message, '[skip semgrep]') run: | - git fetch origin ${{ github.base_ref }} + #git fetch origin ${{ github.base_ref }} git diff --name-only origin/${{ github.base_ref }} HEAD --diff-filter=ACMRT | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > changed_files.txt || true From 42500bda33071df268f65360ad67ae1ab9927030 Mon Sep 17 00:00:00 2001 From: Cefan Rubin Date: Mon, 24 Nov 2025 11:18:07 +0000 Subject: [PATCH 08/14] Parity between local script and github script --- .github/workflows/semgrep.yml | 9 +++++---- .gitignore | 2 ++ .semgrep/dates-in-docs.yaml | 2 ++ tools/semgrep-repo-rules | 17 +++++++++++++++++ 4 files changed, 26 insertions(+), 4 deletions(-) create mode 100755 tools/semgrep-repo-rules diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml index 5c43b80ee461300..4a7ba73d3f279b3 100644 --- a/.github/workflows/semgrep.yml +++ b/.github/workflows/semgrep.yml @@ -39,15 +39,16 @@ jobs: run: | #git fetch origin ${{ github.base_ref }} - git diff --name-only origin/${{ github.base_ref }} HEAD --diff-filter=ACMRT | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > changed_files.txt || true + git diff --name-only origin/production HEAD --diff-filter=ACMRT | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > changed_files.txt || true + list_of_files=$(cat tools/changed_files.txt | tr '\n' ' ') # Check if file list is empty to prevent errors if [ -s changed_files.txt ]; then - semgrep scan --config .semgrep --metrics=off \ + semgrep scan \ + --config .semgrep --metrics=off \ --include "*.mdx" --include "*.mdx" \ - --targets changed_files.txt + $list_of_files # add '--error' to return error code to workflow - semgrep scan --config .semgrep --error --targets changed_files.txt else echo "No relevant files changed." fi diff --git a/.gitignore b/.gitignore index a62898367df8949..ff7de176d326dd6 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,5 @@ pnpm-debug.log* /worker/functions/ .idea + +tools/changed_files.txt diff --git a/.semgrep/dates-in-docs.yaml b/.semgrep/dates-in-docs.yaml index c53bab0d5a22d27..a456d0b6ac0e2a0 100644 --- a/.semgrep/dates-in-docs.yaml +++ b/.semgrep/dates-in-docs.yaml @@ -14,6 +14,7 @@ rules: exclude: - "/src/content/changelog/**" - "/src/content/release-notes/**" + - "/.semgrep/**" patterns: - pattern-regex: "[Cc]oming [Ss]oon" @@ -32,6 +33,7 @@ rules: exclude: - "/src/content/changelog/**" - "/src/content/release-notes/**" + - "/.semgrep/**" pattern-either: - pattern-regex: Jan\| Feb\| Mar\| Apr\| May\| Jun\| Jul\| Aug\| Sep\| Nov\| Dec - pattern-regex: \ 20[0-9][0-9] diff --git a/tools/semgrep-repo-rules b/tools/semgrep-repo-rules new file mode 100755 index 000000000000000..4cf9a7c9a2ad1f7 --- /dev/null +++ b/tools/semgrep-repo-rules @@ -0,0 +1,17 @@ +#! /bin/bash + +repo_root_dir="$(git rev-parse --show-toplevel)" + +pushd "${repo_root_dir}" > /dev/null || return + +git diff --name-only origin/production HEAD --diff-filter=ACMRT | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > tools/changed_files.txt || true + +list_of_files=$(cat tools/changed_files.txt | tr '\n' ' ') + +docker run --rm -v "${PWD}:/src" semgrep/semgrep \ + semgrep scan \ + --config .semgrep --metrics=off \ + --include "*.mdx" --include "*.mdx" \ + $list_of_files + +popd > /dev/null || return From 16053627ef89dacda15bed44c119df4d2f6d05e0 Mon Sep 17 00:00:00 2001 From: Cefan Rubin Date: Mon, 24 Nov 2025 11:26:03 +0000 Subject: [PATCH 09/14] Trying to fix directory path used by github action semgrep scan --- .github/workflows/semgrep.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml index 4a7ba73d3f279b3..af8474cee4adfa9 100644 --- a/.github/workflows/semgrep.yml +++ b/.github/workflows/semgrep.yml @@ -39,6 +39,7 @@ jobs: run: | #git fetch origin ${{ github.base_ref }} + cd "$GITHUB_WORKSPACE" git diff --name-only origin/production HEAD --diff-filter=ACMRT | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > changed_files.txt || true list_of_files=$(cat tools/changed_files.txt | tr '\n' ' ') From 2bf151916bf03ae53bcf784f72def2b7e3a67c47 Mon Sep 17 00:00:00 2001 From: Cefan Rubin Date: Mon, 24 Nov 2025 11:31:50 +0000 Subject: [PATCH 10/14] Show git details and PWD of workflow run --- .github/workflows/semgrep.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml index af8474cee4adfa9..b7e1d82e86e4d83 100644 --- a/.github/workflows/semgrep.yml +++ b/.github/workflows/semgrep.yml @@ -40,6 +40,9 @@ jobs: #git fetch origin ${{ github.base_ref }} cd "$GITHUB_WORKSPACE" + echo "$PWD" + git show + git diff --name-only origin/production HEAD --diff-filter=ACMRT | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > changed_files.txt || true list_of_files=$(cat tools/changed_files.txt | tr '\n' ' ') From ee027ea0dc5f47abcd7057e234e0bc72a9bde437 Mon Sep 17 00:00:00 2001 From: Cefan Rubin Date: Mon, 24 Nov 2025 11:35:58 +0000 Subject: [PATCH 11/14] Testing use of the currently working command but with new workflow file structure --- .github/workflows/semgrep.yml | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml index b7e1d82e86e4d83..8aa669f6b186710 100644 --- a/.github/workflows/semgrep.yml +++ b/.github/workflows/semgrep.yml @@ -37,22 +37,23 @@ jobs: - name: Semgrep Repo Rules (Custom rules found in .semgrep/) if: github.event_name == 'pull_request' && !contains(github.event.head_commit.message, '[skip semgrep]') run: | + semgrep ci #git fetch origin ${{ github.base_ref }} - cd "$GITHUB_WORKSPACE" - echo "$PWD" - git show + #cd "$GITHUB_WORKSPACE" + #echo "$PWD" + #git show - git diff --name-only origin/production HEAD --diff-filter=ACMRT | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > changed_files.txt || true - list_of_files=$(cat tools/changed_files.txt | tr '\n' ' ') + #git diff --name-only origin/production HEAD --diff-filter=ACMRT | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > changed_files.txt || true + #list_of_files=$(cat tools/changed_files.txt | tr '\n' ' ') # Check if file list is empty to prevent errors - if [ -s changed_files.txt ]; then - semgrep scan \ - --config .semgrep --metrics=off \ - --include "*.mdx" --include "*.mdx" \ - $list_of_files + #if [ -s changed_files.txt ]; then + # semgrep scan \ + # --config .semgrep --metrics=off \ + # --include "*.mdx" --include "*.mdx" \ + # $list_of_files # add '--error' to return error code to workflow - else - echo "No relevant files changed." - fi + #else + # echo "No relevant files changed." + #fi From 5201d96a67f25a69a5527802fa359ba398e1315b Mon Sep 17 00:00:00 2001 From: Cefan Rubin Date: Mon, 24 Nov 2025 12:48:41 +0000 Subject: [PATCH 12/14] Not exactly parity sought, in CI we compare commited changes made but when run during development we want to consider all changes made to the working directory --- .github/workflows/semgrep.yml | 27 +++++++++++---------------- .semgrep/dates-in-docs.yaml | 2 ++ tools/semgrep-repo-rules | 22 +++++++++++++++------- 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml index 8aa669f6b186710..93afd34675acb90 100644 --- a/.github/workflows/semgrep.yml +++ b/.github/workflows/semgrep.yml @@ -37,23 +37,18 @@ jobs: - name: Semgrep Repo Rules (Custom rules found in .semgrep/) if: github.event_name == 'pull_request' && !contains(github.event.head_commit.message, '[skip semgrep]') run: | - semgrep ci - #git fetch origin ${{ github.base_ref }} - #cd "$GITHUB_WORKSPACE" - #echo "$PWD" - #git show - - #git diff --name-only origin/production HEAD --diff-filter=ACMRT | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > changed_files.txt || true - #list_of_files=$(cat tools/changed_files.txt | tr '\n' ' ') + base_commit=$(git merge-base HEAD origin/$GITHUB_BASE_REF) + git diff $base_commit... --diff-filter=ACMRT --name-only | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > changed_files.txt || true # Check if file list is empty to prevent errors - #if [ -s changed_files.txt ]; then - # semgrep scan \ - # --config .semgrep --metrics=off \ - # --include "*.mdx" --include "*.mdx" \ - # $list_of_files + if [ -s changed_files.txt ]; then + list_of_files=$(cat tools/changed_files.txt | tr '\n' ' ') + semgrep scan \ + --config .semgrep --metrics=off \ + --include "*.mdx" --include "*.mdx" \ + $list_of_files # add '--error' to return error code to workflow - #else - # echo "No relevant files changed." - #fi + else + echo "No relevant files changed." + fi diff --git a/.semgrep/dates-in-docs.yaml b/.semgrep/dates-in-docs.yaml index a456d0b6ac0e2a0..d610eabc9a4229c 100644 --- a/.semgrep/dates-in-docs.yaml +++ b/.semgrep/dates-in-docs.yaml @@ -15,6 +15,7 @@ rules: - "/src/content/changelog/**" - "/src/content/release-notes/**" - "/.semgrep/**" + - "/.github/**" patterns: - pattern-regex: "[Cc]oming [Ss]oon" @@ -34,6 +35,7 @@ rules: - "/src/content/changelog/**" - "/src/content/release-notes/**" - "/.semgrep/**" + - "/.github/**" pattern-either: - pattern-regex: Jan\| Feb\| Mar\| Apr\| May\| Jun\| Jul\| Aug\| Sep\| Nov\| Dec - pattern-regex: \ 20[0-9][0-9] diff --git a/tools/semgrep-repo-rules b/tools/semgrep-repo-rules index 4cf9a7c9a2ad1f7..2a2c4ef40b45e31 100755 --- a/tools/semgrep-repo-rules +++ b/tools/semgrep-repo-rules @@ -4,14 +4,22 @@ repo_root_dir="$(git rev-parse --show-toplevel)" pushd "${repo_root_dir}" > /dev/null || return -git diff --name-only origin/production HEAD --diff-filter=ACMRT | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > tools/changed_files.txt || true +# this file wants to match all changes in working dir, not just commited changes (in CI this is not the case) +#base_commit=$(git merge-base HEAD origin/production) -list_of_files=$(cat tools/changed_files.txt | tr '\n' ' ') +git diff --diff-filter=ACMRT --name-only | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > tools/changed_files.txt || true -docker run --rm -v "${PWD}:/src" semgrep/semgrep \ - semgrep scan \ - --config .semgrep --metrics=off \ - --include "*.mdx" --include "*.mdx" \ - $list_of_files +if [ -s tools/changed_files.txt ]; then + list_of_files=$(cat tools/changed_files.txt | tr '\n' ' ') + + docker run --rm -v "${PWD}:/src" semgrep/semgrep \ + semgrep scan \ + --config .semgrep --metrics=off \ + --include "*.mdx" --include "*.mdx" \ + --force-color \ + $list_of_files +else + echo "No relevant files changed." +fi popd > /dev/null || return From dd14216e018aba338ea3d90a02349d50a296d72c Mon Sep 17 00:00:00 2001 From: Cefan Rubin Date: Mon, 24 Nov 2025 13:17:58 +0000 Subject: [PATCH 13/14] Improved warning message for coming soon and included both committed and uncommitted changes in the local semgrep check --- .github/workflows/semgrep.yml | 6 +++--- .gitignore | 2 +- .semgrep/dates-in-docs.yaml | 2 +- tools/semgrep-repo-rules | 11 ++++++----- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml index 93afd34675acb90..e2cc1d2f0257e64 100644 --- a/.github/workflows/semgrep.yml +++ b/.github/workflows/semgrep.yml @@ -39,11 +39,11 @@ jobs: run: | base_commit=$(git merge-base HEAD origin/$GITHUB_BASE_REF) - git diff $base_commit... --diff-filter=ACMRT --name-only | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > changed_files.txt || true + git diff $base_commit... --diff-filter=ACMRT --name-only | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > tools/relevant_changed_files.txt || true # Check if file list is empty to prevent errors - if [ -s changed_files.txt ]; then - list_of_files=$(cat tools/changed_files.txt | tr '\n' ' ') + if [ -s tools/relevant_changed_files.txt ]; then + list_of_files=$(cat tools/relevant_changed_files.txt | tr '\n' ' ') semgrep scan \ --config .semgrep --metrics=off \ --include "*.mdx" --include "*.mdx" \ diff --git a/.gitignore b/.gitignore index ff7de176d326dd6..8dfacd8c570751a 100644 --- a/.gitignore +++ b/.gitignore @@ -31,4 +31,4 @@ pnpm-debug.log* .idea -tools/changed_files.txt +tools/relevant_changed_files.txt diff --git a/.semgrep/dates-in-docs.yaml b/.semgrep/dates-in-docs.yaml index d610eabc9a4229c..fe7be10339392af 100644 --- a/.semgrep/dates-in-docs.yaml +++ b/.semgrep/dates-in-docs.yaml @@ -1,7 +1,7 @@ rules: - id: coming-soon languages: [generic] - message: "Found forbidden string 'coming soon'. Too often we set expectations unfairly by attaching this phrase to a feature that is cancelled or a plan that changes." + message: "Found forbidden string 'coming soon'. Too often we set expectations unfairly by attaching this phrase to a feature that may not actually arrive soon." severity: MEDIUM paths: include: diff --git a/tools/semgrep-repo-rules b/tools/semgrep-repo-rules index 2a2c4ef40b45e31..b7eeac6a76526a9 100755 --- a/tools/semgrep-repo-rules +++ b/tools/semgrep-repo-rules @@ -4,13 +4,14 @@ repo_root_dir="$(git rev-parse --show-toplevel)" pushd "${repo_root_dir}" > /dev/null || return -# this file wants to match all changes in working dir, not just commited changes (in CI this is not the case) -#base_commit=$(git merge-base HEAD origin/production) +base_commit=$(git merge-base HEAD origin/production) +git diff $base_commit... --diff-filter=ACMRT --name-only | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > tools/relevant_changed_files.txt || true -git diff --diff-filter=ACMRT --name-only | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > tools/changed_files.txt || true +# this file wants to also match uncommitted changes, not just commited changes (in CI this is not the case) +git diff --diff-filter=ACMRT --name-only | grep -E '\.(htm|html|yaml|yml|md|mdx)$' >> tools/relevant_changed_files.txt || true -if [ -s tools/changed_files.txt ]; then - list_of_files=$(cat tools/changed_files.txt | tr '\n' ' ') +if [ -s tools/relevant_changed_files.txt ]; then + list_of_files=$(cat tools/relevant_changed_files.txt | tr '\n' ' ') docker run --rm -v "${PWD}:/src" semgrep/semgrep \ semgrep scan \ From 4c8ec30decbb0aa157a13bcefd4942f91af29480 Mon Sep 17 00:00:00 2001 From: Cefan Rubin Date: Mon, 24 Nov 2025 13:22:53 +0000 Subject: [PATCH 14/14] Avoid fatal git error on ownership within CLI working directory --- .github/workflows/semgrep.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml index e2cc1d2f0257e64..b3f7e8a5edc29f6 100644 --- a/.github/workflows/semgrep.yml +++ b/.github/workflows/semgrep.yml @@ -38,6 +38,7 @@ jobs: if: github.event_name == 'pull_request' && !contains(github.event.head_commit.message, '[skip semgrep]') run: | + git config --global --add safe.directory $PWD base_commit=$(git merge-base HEAD origin/$GITHUB_BASE_REF) git diff $base_commit... --diff-filter=ACMRT --name-only | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > tools/relevant_changed_files.txt || true