From f5014b962944de35d41a7e12e3fd5e67506a27c8 Mon Sep 17 00:00:00 2001
From: Tejas Kashinath <tkashina@amazon.com>
Date: Mon, 18 May 2026 12:43:16 -0400
Subject: [PATCH 1/7] ci: add Claude Code /security-review workflow on PRs

---
 .github/workflows/pr-security-review.yml | 167 +++++++++++++++++++++++
 1 file changed, 167 insertions(+)
 create mode 100644 .github/workflows/pr-security-review.yml

diff --git a/.github/workflows/pr-security-review.yml b/.github/workflows/pr-security-review.yml
new file mode 100644
index 000000000..f0610f90f
--- /dev/null
+++ b/.github/workflows/pr-security-review.yml
@@ -0,0 +1,167 @@
+name: Claude Security Review
+
+on:
+  pull_request_target:
+    types: [opened, reopened, synchronize]
+  workflow_dispatch:
+    inputs:
+      pr_number:
+        description: 'PR number to review'
+        required: true
+        type: string
+
+permissions:
+  id-token: write
+  pull-requests: write
+  issues: write
+  contents: read
+
+concurrency:
+  group: pr-security-review-${{ github.event.pull_request.number || inputs.pr_number }}
+  cancel-in-progress: true
+
+jobs:
+  authorize:
+    runs-on: ubuntu-latest
+    outputs:
+      authorized: ${{ steps.auth.outputs.authorized || steps.dispatch-auth.outputs.authorized }}
+    steps:
+      - name: Check authorization
+        id: auth
+        if: github.event_name == 'pull_request_target'
+        uses: actions/github-script@v9
+        with:
+          script: |
+            const user = context.payload.pull_request.user.login;
+            try {
+              await github.rest.teams.getMembershipForUserInOrg({
+                org: context.repo.owner,
+                team_slug: 'agentcore-cli-devs',
+                username: user,
+              });
+              console.log(`${user} is a member of agentcore-cli-devs`);
+              core.setOutput('authorized', 'true');
+            } catch (teamError) {
+              try {
+                const { data } = await github.rest.repos.getCollaboratorPermissionLevel({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  username: user,
+                });
+                const hasWriteAccess = ['write', 'admin'].includes(data.permission);
+                if (hasWriteAccess) {
+                  console.log(`${user} has write access (${data.permission})`);
+                  core.setOutput('authorized', 'true');
+                } else {
+                  console.log(`${user} does not have write access (${data.permission}) — skipping review`);
+                  core.setOutput('authorized', 'false');
+                }
+              } catch (collabError) {
+                console.log(`${user} authorization check failed (${collabError.status}) — skipping review`);
+                core.setOutput('authorized', 'false');
+              }
+            }
+
+      - name: Auto-authorize workflow_dispatch
+        id: dispatch-auth
+        if: github.event_name == 'workflow_dispatch'
+        run: echo "authorized=true" >> "$GITHUB_OUTPUT"
+
+  security-review:
+    needs: authorize
+    if: needs.authorize.outputs.authorized == 'true'
+    runs-on: ubuntu-latest
+    env:
+      AWS_REGION: us-west-2
+    steps:
+      - name: Resolve PR number
+        id: pr
+        uses: actions/github-script@v9
+        with:
+          script: |
+            const num = context.eventName === 'workflow_dispatch'
+              ? parseInt('${{ inputs.pr_number }}')
+              : context.payload.pull_request.number;
+            const { data: pr } = await github.rest.pulls.get({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: num,
+            });
+            core.setOutput('number', num);
+            core.setOutput('head_sha', pr.head.sha);
+            core.setOutput('base_ref', pr.base.ref);
+
+      - name: Add claude-security-reviewing label
+        uses: actions/github-script@v9
+        with:
+          script: |
+            const prNumber = parseInt('${{ steps.pr.outputs.number }}');
+            try {
+              await github.rest.issues.getLabel({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                name: 'claude-security-reviewing',
+              });
+            } catch (e) {
+              if (e.status === 404) {
+                await github.rest.issues.createLabel({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  name: 'claude-security-reviewing',
+                  color: 'D73A4A',
+                  description: 'Claude Code /security-review in progress',
+                });
+              }
+            }
+            await github.rest.issues.addLabels({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: prNumber,
+              labels: ['claude-security-reviewing'],
+            });
+
+      - name: Checkout PR head
+        uses: actions/checkout@v6
+        with:
+          ref: ${{ steps.pr.outputs.head_sha }}
+          fetch-depth: 0
+
+      - name: Generate GitHub App token
+        id: app-token
+        uses: actions/create-github-app-token@v1
+        with:
+          app-id: ${{ vars.APP_ID }}
+          private-key: ${{ secrets.APP_PRIVATE_KEY }}
+
+      - name: Configure AWS credentials (OIDC)
+        uses: aws-actions/configure-aws-credentials@v6
+        with:
+          role-to-assume: ${{ secrets.BEDROCK_SECURITY_REVIEW_ROLE_ARN }}
+          aws-region: us-west-2
+
+      - name: Run Claude Code /security-review
+        uses: anthropics/claude-code-action@v1
+        with:
+          github_token: ${{ steps.app-token.outputs.token }}
+          use_bedrock: "true"
+          prompt: "/security-review pull request #${{ steps.pr.outputs.number }} (base: ${{ steps.pr.outputs.base_ref }}, head: ${{ steps.pr.outputs.head_sha }}). Post the findings as a single PR review comment on this PR."
+          claude_args: >-
+            --model us.anthropic.claude-opus-4-7
+            --max-turns 20
+
+      - name: Remove claude-security-reviewing label
+        if: always()
+        uses: actions/github-script@v9
+        with:
+          script: |
+            const prNumber = parseInt('${{ steps.pr.outputs.number }}');
+            try {
+              await github.rest.issues.removeLabel({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: prNumber,
+                name: 'claude-security-reviewing',
+              });
+            } catch (error) {
+              console.log('Label removal failed (may not exist):', error.message);
+            }

From 1fa4dafc7df61ba71fe9640b22a06b7a8ca9dab1 Mon Sep 17 00:00:00 2001
From: Tejas Kashinath <tkashina@amazon.com>
Date: Mon, 18 May 2026 12:50:20 -0400
Subject: [PATCH 2/7] ci(security-review): add safe-to-review label trigger and
 document workflow

---
 .github/workflows/pr-security-review.yml | 25 ++++++++++++++++++------
 CONTRIBUTING.md                          |  7 +++++++
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/pr-security-review.yml b/.github/workflows/pr-security-review.yml
index f0610f90f..fcd7473b2 100644
--- a/.github/workflows/pr-security-review.yml
+++ b/.github/workflows/pr-security-review.yml
@@ -2,7 +2,7 @@ name: Claude Security Review
 
 on:
   pull_request_target:
-    types: [opened, reopened, synchronize]
+    types: [opened, reopened, synchronize, labeled]
   workflow_dispatch:
     inputs:
       pr_number:
@@ -23,6 +23,12 @@ concurrency:
 jobs:
   authorize:
     runs-on: ubuntu-latest
+    # On labeled events, only proceed when the label is exactly 'safe-to-review'.
+    # Other labels are ignored entirely so we don't run the auth API calls on every label change.
+    if: |
+      github.event_name != 'pull_request_target' ||
+      github.event.action != 'labeled' ||
+      github.event.label.name == 'safe-to-review'
     outputs:
       authorized: ${{ steps.auth.outputs.authorized || steps.dispatch-auth.outputs.authorized }}
     steps:
@@ -32,14 +38,21 @@ jobs:
         uses: actions/github-script@v9
         with:
           script: |
-            const user = context.payload.pull_request.user.login;
+            // For 'labeled' events the gate is the maintainer who applied the label (sender),
+            // not the PR author. For other events the gate is the PR author.
+            const user = context.payload.action === 'labeled'
+              ? context.payload.sender.login
+              : context.payload.pull_request.user.login;
+            const reason = context.payload.action === 'labeled'
+              ? `labeler ${user}`
+              : `PR author ${user}`;
             try {
               await github.rest.teams.getMembershipForUserInOrg({
                 org: context.repo.owner,
                 team_slug: 'agentcore-cli-devs',
                 username: user,
               });
-              console.log(`${user} is a member of agentcore-cli-devs`);
+              console.log(`${reason} is a member of agentcore-cli-devs`);
               core.setOutput('authorized', 'true');
             } catch (teamError) {
               try {
@@ -50,14 +63,14 @@ jobs:
                 });
                 const hasWriteAccess = ['write', 'admin'].includes(data.permission);
                 if (hasWriteAccess) {
-                  console.log(`${user} has write access (${data.permission})`);
+                  console.log(`${reason} has write access (${data.permission})`);
                   core.setOutput('authorized', 'true');
                 } else {
-                  console.log(`${user} does not have write access (${data.permission}) — skipping review`);
+                  console.log(`${reason} does not have write access (${data.permission}) — skipping review`);
                   core.setOutput('authorized', 'false');
                 }
               } catch (collabError) {
-                console.log(`${user} authorization check failed (${collabError.status}) — skipping review`);
+                console.log(`${reason} authorization check failed (${collabError.status}) — skipping review`);
                 core.setOutput('authorized', 'false');
               }
             }
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index edb4e6a7d..4b01d8e25 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -48,6 +48,13 @@ Looking at the existing issues is a great way to find something to contribute on
 default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help
 wanted' issues is a great place to start.
 
+## Maintainer notes: triggering Claude security review on community PRs
+
+PRs from non-collaborators are skipped by the `Claude Security Review` workflow to avoid running automation against
+unreviewed code. Once a maintainer has manually reviewed a community PR and confirmed the diff is safe to send to the
+automated reviewer, apply the `safe-to-review` label to the PR. The workflow will run automatically. The label can also
+be re-applied (remove and add) to re-trigger a review after new commits land.
+
 ## Code of Conduct
 
 This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). For more

From 0b32025b66d7e6eea7a695a748e1ea3353c28264 Mon Sep 17 00:00:00 2001
From: Tejas Kashinath <tkashina@amazon.com>
Date: Mon, 18 May 2026 12:54:55 -0400
Subject: [PATCH 3/7] ci(security-review): address review feedback (drop
 synchronize, fix script injection, add timeout, inline comments)

---
 .github/workflows/pr-security-review.yml | 31 ++++++++++++++++--------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/pr-security-review.yml b/.github/workflows/pr-security-review.yml
index fcd7473b2..5e474c4ad 100644
--- a/.github/workflows/pr-security-review.yml
+++ b/.github/workflows/pr-security-review.yml
@@ -2,7 +2,7 @@ name: Claude Security Review
 
 on:
   pull_request_target:
-    types: [opened, reopened, synchronize, labeled]
+    types: [opened, reopened, labeled]
   workflow_dispatch:
     inputs:
       pr_number:
@@ -84,17 +84,21 @@ jobs:
     needs: authorize
     if: needs.authorize.outputs.authorized == 'true'
     runs-on: ubuntu-latest
+    timeout-minutes: 30
     env:
       AWS_REGION: us-west-2
     steps:
       - name: Resolve PR number
         id: pr
         uses: actions/github-script@v9
+        env:
+          PR_NUMBER_INPUT: ${{ inputs.pr_number }}
         with:
           script: |
-            const num = context.eventName === 'workflow_dispatch'
-              ? parseInt('${{ inputs.pr_number }}')
-              : context.payload.pull_request.number;
+            const num =
+              context.eventName === 'workflow_dispatch'
+                ? parseInt(process.env.PR_NUMBER_INPUT, 10)
+                : context.payload.pull_request.number;
             const { data: pr } = await github.rest.pulls.get({
               owner: context.repo.owner,
               repo: context.repo.repo,
@@ -106,9 +110,11 @@ jobs:
 
       - name: Add claude-security-reviewing label
         uses: actions/github-script@v9
+        env:
+          PR_NUMBER: ${{ steps.pr.outputs.number }}
         with:
           script: |
-            const prNumber = parseInt('${{ steps.pr.outputs.number }}');
+            const prNumber = parseInt(process.env.PR_NUMBER, 10);
             try {
               await github.rest.issues.getLabel({
                 owner: context.repo.owner,
@@ -156,18 +162,23 @@ jobs:
         uses: anthropics/claude-code-action@v1
         with:
           github_token: ${{ steps.app-token.outputs.token }}
-          use_bedrock: "true"
-          prompt: "/security-review pull request #${{ steps.pr.outputs.number }} (base: ${{ steps.pr.outputs.base_ref }}, head: ${{ steps.pr.outputs.head_sha }}). Post the findings as a single PR review comment on this PR."
+          use_bedrock: 'true'
+          prompt:
+            '/security-review pull request #${{ steps.pr.outputs.number }} (base: ${{ steps.pr.outputs.base_ref }},
+            head: ${{ steps.pr.outputs.head_sha }}). For each finding, post an inline review comment on the exact file
+            and line where the issue lives (use a pull request review with line-level comments, not a single summary
+            comment). If you have no findings, post a single short top-level comment saying so.'
           claude_args: >-
-            --model us.anthropic.claude-opus-4-7
-            --max-turns 20
+            --model us.anthropic.claude-opus-4-7 --max-turns 20
 
       - name: Remove claude-security-reviewing label
         if: always()
         uses: actions/github-script@v9
+        env:
+          PR_NUMBER: ${{ steps.pr.outputs.number }}
         with:
           script: |
-            const prNumber = parseInt('${{ steps.pr.outputs.number }}');
+            const prNumber = parseInt(process.env.PR_NUMBER, 10);
             try {
               await github.rest.issues.removeLabel({
                 owner: context.repo.owner,

From 98ec786a45ba25909480c0c2c2a373f5ba8ff834 Mon Sep 17 00:00:00 2001
From: Tejas Kashinath <tkashina@amazon.com>
Date: Mon, 18 May 2026 12:56:58 -0400
Subject: [PATCH 4/7] ci(security-review): make prompt re-review aware and
 consider other reviewers' comments

---
 .github/workflows/pr-security-review.yml | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/pr-security-review.yml b/.github/workflows/pr-security-review.yml
index 5e474c4ad..32b9b6dcf 100644
--- a/.github/workflows/pr-security-review.yml
+++ b/.github/workflows/pr-security-review.yml
@@ -165,9 +165,16 @@ jobs:
           use_bedrock: 'true'
           prompt:
             '/security-review pull request #${{ steps.pr.outputs.number }} (base: ${{ steps.pr.outputs.base_ref }},
-            head: ${{ steps.pr.outputs.head_sha }}). For each finding, post an inline review comment on the exact file
-            and line where the issue lives (use a pull request review with line-level comments, not a single summary
-            comment). If you have no findings, post a single short top-level comment saying so.'
+            head: ${{ steps.pr.outputs.head_sha }}). Before reviewing, fetch the existing PR review comments and issue
+            comments via the GitHub API. If you (the same bot identity that posts these reviews) have already commented
+            on this PR, treat this as a re-review: for each of your prior findings, check the latest diff and explicitly
+            confirm whether it has been addressed, is still outstanding, or is no longer applicable, and skip re-posting
+            findings that are already resolved. Also read review comments left by other reviewers (humans or other bots)
+            and factor their context into your analysis — do not contradict an accepted resolution and do not duplicate
+            a finding another reviewer has already raised. For each new or still-outstanding finding, post an inline
+            review comment on the exact file and line where the issue lives (use a pull request review with line-level
+            comments, not a single summary comment). If you have no new findings, post a single short top-level comment
+            saying so, including a brief status of any prior findings (resolved / still outstanding).'
           claude_args: >-
             --model us.anthropic.claude-opus-4-7 --max-turns 20
 

From 7ba10e450cc367f845ec172668713b7f7db8d7c1 Mon Sep 17 00:00:00 2001
From: Tejas Kashinath <tkashina@amazon.com>
Date: Mon, 18 May 2026 13:01:32 -0400
Subject: [PATCH 5/7] ci(security-review): trigger on approving review instead
 of safe-to-review label

---
 .github/workflows/pr-security-review.yml | 33 +++++++++++++-----------
 CONTRIBUTING.md                          | 12 +++++----
 2 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/pr-security-review.yml b/.github/workflows/pr-security-review.yml
index 32b9b6dcf..be1cfab3a 100644
--- a/.github/workflows/pr-security-review.yml
+++ b/.github/workflows/pr-security-review.yml
@@ -2,7 +2,9 @@ name: Claude Security Review
 
 on:
   pull_request_target:
-    types: [opened, reopened, labeled]
+    types: [opened, reopened]
+  pull_request_review:
+    types: [submitted]
   workflow_dispatch:
     inputs:
       pr_number:
@@ -17,35 +19,36 @@ permissions:
   contents: read
 
 concurrency:
-  group: pr-security-review-${{ github.event.pull_request.number || inputs.pr_number }}
+  group:
+    pr-security-review-${{ github.event.pull_request.number || github.event.review.pull_request_url || inputs.pr_number
+    }}
   cancel-in-progress: true
 
 jobs:
   authorize:
     runs-on: ubuntu-latest
-    # On labeled events, only proceed when the label is exactly 'safe-to-review'.
-    # Other labels are ignored entirely so we don't run the auth API calls on every label change.
+    # On pull_request_review events, only proceed when the review state is 'approved'.
+    # Comment/changes-requested reviews are ignored to avoid running the workflow on every review.
     if: |
-      github.event_name != 'pull_request_target' ||
-      github.event.action != 'labeled' ||
-      github.event.label.name == 'safe-to-review'
+      github.event_name != 'pull_request_review' ||
+      github.event.review.state == 'approved'
     outputs:
       authorized: ${{ steps.auth.outputs.authorized || steps.dispatch-auth.outputs.authorized }}
     steps:
       - name: Check authorization
         id: auth
-        if: github.event_name == 'pull_request_target'
+        if: github.event_name == 'pull_request_target' || github.event_name == 'pull_request_review'
         uses: actions/github-script@v9
         with:
           script: |
-            // For 'labeled' events the gate is the maintainer who applied the label (sender),
-            // not the PR author. For other events the gate is the PR author.
-            const user = context.payload.action === 'labeled'
-              ? context.payload.sender.login
+            // pull_request_target: gate on the PR author (maintainer-authored PRs auto-run on open/reopen).
+            // pull_request_review: gate on the reviewer who approved (so a maintainer approving a community PR
+            //                      authorizes the security review).
+            const isApproval = context.eventName === 'pull_request_review';
+            const user = isApproval
+              ? context.payload.review.user.login
               : context.payload.pull_request.user.login;
-            const reason = context.payload.action === 'labeled'
-              ? `labeler ${user}`
-              : `PR author ${user}`;
+            const reason = isApproval ? `approver ${user}` : `PR author ${user}`;
             try {
               await github.rest.teams.getMembershipForUserInOrg({
                 org: context.repo.owner,
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 4b01d8e25..3994b91fc 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -48,12 +48,14 @@ Looking at the existing issues is a great way to find something to contribute on
 default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help
 wanted' issues is a great place to start.
 
-## Maintainer notes: triggering Claude security review on community PRs
+## Maintainer notes: Claude security review on community PRs
 
-PRs from non-collaborators are skipped by the `Claude Security Review` workflow to avoid running automation against
-unreviewed code. Once a maintainer has manually reviewed a community PR and confirmed the diff is safe to send to the
-automated reviewer, apply the `safe-to-review` label to the PR. The workflow will run automatically. The label can also
-be re-applied (remove and add) to re-trigger a review after new commits land.
+The `Claude Security Review` workflow runs automatically on maintainer-authored PRs (opened/reopened) and on community
+PRs as soon as a maintainer submits an **approving review**. PRs from non-collaborators are otherwise skipped — the
+approval is the gate, so a maintainer must manually review the diff before the automated reviewer runs.
+
+To re-run the review on a later commit, submit another approving review (resolves to a fresh workflow run), or trigger
+the `Claude Security Review` workflow manually from the Actions tab with the PR number.
 
 ## Code of Conduct
 

From 9e529f6eb6e8a8c6af33d5e5ed137045380d176c Mon Sep 17 00:00:00 2001
From: Tejas Kashinath <tkashina@amazon.com>
Date: Mon, 18 May 2026 14:02:16 -0400
Subject: [PATCH 6/7] ci(security-review): inline /security-review prompt and
 template git context

The Claude Code Action runs the Agent SDK, which doesn't load the /security-review skill bundled with the local CLI binary. Resolving '/security-review' as a prompt was a no-op (num_turns: 0). Inline the bundled prompt into .github/prompts/security-review.md, gather git status/diff in a workflow step, template them in, and feed the result to the action via prompt:. Bumps --max-turns to 30 to accommodate the skill's sub-task fan-out.
---
 .github/prompts/security-review.md       | 252 +++++++++++++++++++++++
 .github/workflows/pr-security-review.yml |  59 ++++--
 2 files changed, 297 insertions(+), 14 deletions(-)
 create mode 100644 .github/prompts/security-review.md

diff --git a/.github/prompts/security-review.md b/.github/prompts/security-review.md
new file mode 100644
index 000000000..3b00e75f9
--- /dev/null
+++ b/.github/prompts/security-review.md
@@ -0,0 +1,252 @@
+You are a senior security engineer conducting a focused security review of the changes on a GitHub pull request.
+
+PR: #{{PR_NUMBER}} (base: `{{BASE_REF}}`, head: `{{HEAD_SHA}}`)
+
+GIT STATUS:
+
+```
+{{GIT_STATUS}}
+```
+
+FILES MODIFIED:
+
+```
+{{FILES_MODIFIED}}
+```
+
+COMMITS:
+
+```
+{{COMMITS}}
+```
+
+DIFF CONTENT:
+
+```
+{{DIFF_CONTENT}}
+```
+
+Review the complete diff above. This contains all code changes in the PR.
+
+OBJECTIVE: Perform a security-focused code review to identify HIGH-CONFIDENCE security vulnerabilities that could have
+real exploitation potential. This is not a general code review — focus ONLY on security implications newly added by this
+PR. Do not comment on existing security concerns.
+
+CRITICAL INSTRUCTIONS:
+
+1. MINIMIZE FALSE POSITIVES: Only flag issues where you're >80% confident of actual exploitability
+2. AVOID NOISE: Skip theoretical issues, style concerns, or low-impact findings
+3. FOCUS ON IMPACT: Prioritize vulnerabilities that could lead to unauthorized access, data breaches, or system
+   compromise
+4. EXCLUSIONS: Do NOT report the following issue types:
+   - Denial of Service (DOS) vulnerabilities, even if they allow service disruption
+   - Secrets or sensitive data stored on disk (these are handled by other processes)
+   - Rate limiting or resource exhaustion issues
+
+SECURITY CATEGORIES TO EXAMINE:
+
+**Input Validation Vulnerabilities:**
+
+- SQL injection via unsanitized user input
+- Command injection in system calls or subprocesses
+- XXE injection in XML parsing
+- Template injection in templating engines
+- NoSQL injection in database queries
+- Path traversal in file operations
+
+**Authentication & Authorization Issues:**
+
+- Authentication bypass logic
+- Privilege escalation paths
+- Session management flaws
+- JWT token vulnerabilities
+- Authorization logic bypasses
+
+**Crypto & Secrets Management:**
+
+- Hardcoded API keys, passwords, or tokens
+- Weak cryptographic algorithms or implementations
+- Improper key storage or management
+- Cryptographic randomness issues
+- Certificate validation bypasses
+
+**Injection & Code Execution:**
+
+- Remote code execution via deserialization
+- Pickle injection in Python
+- YAML deserialization vulnerabilities
+- Eval injection in dynamic code execution
+- XSS vulnerabilities in web applications (reflected, stored, DOM-based)
+
+**Data Exposure:**
+
+- Sensitive data logging or storage
+- PII handling violations
+- API endpoint data leakage
+- Debug information exposure
+
+Additional notes:
+
+- Even if something is only exploitable from the local network, it can still be a HIGH severity issue
+
+ANALYSIS METHODOLOGY:
+
+Phase 1 — Repository Context Research (use file search tools — Read, Glob, Grep, LS):
+
+- Identify existing security frameworks and libraries in use
+- Look for established secure coding patterns in the codebase
+- Examine existing sanitization and validation patterns
+- Understand the project's security model and threat model
+
+Phase 2 — Comparative Analysis:
+
+- Compare new code changes against existing security patterns
+- Identify deviations from established secure practices
+- Look for inconsistent security implementations
+- Flag code that introduces new attack surfaces
+
+Phase 3 — Vulnerability Assessment:
+
+- Examine each modified file for security implications
+- Trace data flow from user inputs to sensitive operations
+- Look for privilege boundaries being crossed unsafely
+- Identify injection points and unsafe deserialization
+
+SEVERITY GUIDELINES:
+
+- **HIGH**: Directly exploitable vulnerabilities leading to RCE, data breach, or authentication bypass
+- **MEDIUM**: Vulnerabilities requiring specific conditions but with significant impact
+- **LOW**: Defense-in-depth issues or lower-impact vulnerabilities
+
+CONFIDENCE SCORING:
+
+- 0.9–1.0: Certain exploit path identified, tested if possible
+- 0.8–0.9: Clear vulnerability pattern with known exploitation methods
+- 0.7–0.8: Suspicious pattern requiring specific conditions to exploit
+- Below 0.7: Don't report (too speculative)
+
+FINAL REMINDER: Focus on HIGH and MEDIUM findings only. Better to miss some theoretical issues than flood the report
+with false positives. Each finding should be something a security engineer would confidently raise in a PR review.
+
+FALSE POSITIVE FILTERING:
+
+> You do not need to run commands to reproduce the vulnerability — just read the code to determine if it is a real
+> vulnerability. Do not write to any files.
+>
+> HARD EXCLUSIONS — Automatically exclude findings matching these patterns:
+>
+> 1. Denial of Service (DOS) vulnerabilities or resource exhaustion attacks.
+> 2. Secrets or credentials stored on disk if they are otherwise secured.
+> 3. Rate limiting concerns or service overload scenarios.
+> 4. Memory consumption or CPU exhaustion issues.
+> 5. Lack of input validation on non-security-critical fields without proven security impact.
+> 6. Input sanitization concerns for GitHub Action workflows unless they are clearly triggerable via untrusted input.
+> 7. A lack of hardening measures. Code is not expected to implement all security best practices, only flag concrete
+>    vulnerabilities.
+> 8. Race conditions or timing attacks that are theoretical rather than practical issues. Only report a race condition
+>    if it is concretely problematic.
+> 9. Vulnerabilities related to outdated third-party libraries. These are managed separately and should not be reported
+>    here.
+> 10. Memory safety issues such as buffer overflows or use-after-free vulnerabilities are impossible in Rust. Do not
+>     report memory safety issues in Rust or any other memory-safe languages.
+> 11. Files that are only unit tests or only used as part of running tests.
+> 12. Log spoofing concerns. Outputting unsanitized user input to logs is not a vulnerability.
+> 13. SSRF vulnerabilities that only control the path. SSRF is only a concern if it can control the host or protocol.
+> 14. Including user-controlled content in AI system prompts is not a vulnerability.
+> 15. Regex injection. Injecting untrusted content into a regex is not a vulnerability.
+> 16. Regex DOS concerns.
+> 17. Insecure documentation. Do not report any findings in documentation files such as markdown files.
+> 18. A lack of audit logs is not a vulnerability.
+>
+> PRECEDENTS:
+>
+> 1. Logging high-value secrets in plaintext is a vulnerability. Logging URLs is assumed to be safe.
+> 2. UUIDs can be assumed to be unguessable and do not need to be validated.
+> 3. Environment variables and CLI flags are trusted values. Attackers are generally not able to modify them in a secure
+>    environment. Any attack that relies on controlling an environment variable is invalid.
+> 4. Resource management issues such as memory or file descriptor leaks are not valid.
+> 5. Subtle or low-impact web vulnerabilities such as tabnabbing, XS-Leaks, prototype pollution, and open redirects
+>    should not be reported unless they are extremely high confidence.
+> 6. React and Angular are generally secure against XSS. These frameworks do not need to sanitize or escape user input
+>    unless they are using `dangerouslySetInnerHTML`, `bypassSecurityTrustHtml`, or similar methods. Do not report XSS
+>    vulnerabilities in React or Angular components or `.tsx` files unless they are using unsafe methods.
+> 7. Most vulnerabilities in GitHub Action workflows are not exploitable in practice. Before validating a GitHub Action
+>    workflow vulnerability ensure it is concrete and has a very specific attack path.
+> 8. A lack of permission checking or authentication in client-side JS/TS code is not a vulnerability. Client-side code
+>    is not trusted and does not need to implement these checks; they are handled on the server side. The same applies
+>    to all flows that send untrusted data to the backend — the backend is responsible for validating and sanitizing all
+>    inputs.
+> 9. Only include MEDIUM findings if they are obvious and concrete issues.
+> 10. Most vulnerabilities in IPython notebooks (`*.ipynb` files) are not exploitable in practice. Before validating a
+>     notebook vulnerability ensure it is concrete and has a very specific attack path where untrusted input can trigger
+>     the vulnerability.
+> 11. Logging non-PII data is not a vulnerability even if the data may be sensitive. Only report logging vulnerabilities
+>     if they expose sensitive information such as secrets, passwords, or PII.
+> 12. Command injection vulnerabilities in shell scripts are generally not exploitable in practice since shell scripts
+>     generally do not run with untrusted user input. Only report command injection vulnerabilities in shell scripts if
+>     they are concrete and have a very specific attack path for untrusted input.
+>
+> SIGNAL QUALITY CRITERIA — for remaining findings, assess:
+>
+> 1. Is there a concrete, exploitable vulnerability with a clear attack path?
+> 2. Does this represent a real security risk vs. theoretical best practice?
+> 3. Are there specific code locations and reproduction steps?
+> 4. Would this finding be actionable for a security team?
+>
+> For each finding, assign a confidence score from 1–10:
+>
+> - 1–3: Low confidence, likely false positive or noise
+> - 4–6: Medium confidence, needs investigation
+> - 7–10: High confidence, likely true vulnerability
+
+WORKFLOW (executed in 3 steps):
+
+1. Use a sub-task to identify vulnerabilities. Use the repository exploration tools to understand the codebase context,
+   then analyze the PR changes for security implications. In the prompt for this sub-task, include all of the above.
+2. Then for each vulnerability identified by the above sub-task, create a new sub-task to filter out false positives.
+   Launch these sub-tasks as parallel sub-tasks. In the prompt for these sub-tasks, include everything in the "FALSE
+   POSITIVE FILTERING" instructions.
+3. Filter out any vulnerabilities where the sub-task reported a confidence less than 8.
+
+POSTING RESULTS — read this section CAREFULLY before posting anything:
+
+A. **Re-review awareness.** Before posting, fetch existing PR review comments and issue comments via the GitHub API
+(`GET /repos/{owner}/{repo}/issues/{number}/comments` and `GET /repos/{owner}/{repo}/pulls/{number}/comments`). If your
+own bot identity has already commented on this PR, treat this run as a re-review:
+
+- For each of your prior findings, examine the current diff and explicitly determine whether the issue is now resolved,
+  still outstanding, or no longer applicable.
+- Skip re-posting findings that are already resolved.
+- Skip re-posting findings that are still on a line you've already commented on (don't duplicate yourself).
+
+B. **Other reviewers.** Read review comments left by other reviewers (humans or other bots) on this PR. Factor their
+context into your analysis: do not contradict an accepted resolution, and do not duplicate a finding another reviewer
+has already raised. If a reviewer has already flagged the same issue, skip it.
+
+C. **Inline review comments.** For each new or still-outstanding finding, post an **inline review comment on the exact
+file and line where the issue lives**. Use a pull request review with line-level comments
+(`POST /repos/{owner}/{repo}/pulls/{number}/reviews` with a `comments` array, each entry having `path`, `line`/`side`,
+and `body`). DO NOT post a single summary comment that lists all findings.
+
+Each inline comment body MUST follow this format:
+
+```
+**Severity:** <HIGH|MEDIUM>
+**Category:** <e.g. command_injection, auth_bypass>
+**Confidence:** <0.7-1.0>
+
+<one-paragraph description of the vulnerability>
+
+**Exploit scenario:** <how it is exploited>
+
+**Recommendation:** <concrete fix>
+```
+
+D. **No-findings case.** If after filtering you have no new findings to post, post a single short top-level issue
+comment (`POST /repos/{owner}/{repo}/issues/{number}/comments`) saying the security review found no new issues, plus a
+brief status of any prior findings (resolved / still outstanding / no longer applicable).
+
+E. **Review event type.** When submitting the review, use `event: COMMENT` (not `APPROVE` or `REQUEST_CHANGES`). The
+human reviewers own the merge gate; this bot is advisory.
+
+START ANALYSIS now.
diff --git a/.github/workflows/pr-security-review.yml b/.github/workflows/pr-security-review.yml
index be1cfab3a..8ceb08070 100644
--- a/.github/workflows/pr-security-review.yml
+++ b/.github/workflows/pr-security-review.yml
@@ -148,6 +148,36 @@ jobs:
           ref: ${{ steps.pr.outputs.head_sha }}
           fetch-depth: 0
 
+      - name: Build security review prompt
+        id: build-prompt
+        env:
+          PR_NUMBER: ${{ steps.pr.outputs.number }}
+          BASE_REF: ${{ steps.pr.outputs.base_ref }}
+          HEAD_SHA: ${{ steps.pr.outputs.head_sha }}
+        run: |
+          set -euo pipefail
+          # Ensure we have the base branch locally so origin/<base> resolves.
+          git fetch --no-tags origin "$BASE_REF"
+          BASE="origin/$BASE_REF"
+
+          export GIT_STATUS="$(git status)"
+          export FILES_MODIFIED="$(git diff --name-only "$BASE"...)"
+          export COMMITS="$(git log --no-decorate "$BASE"...)"
+          # Cap diff at ~512KB to keep the prompt under model context limits.
+          export DIFF_CONTENT="$(git diff "$BASE"... | head -c 524288)"
+
+          OUT="${RUNNER_TEMP}/security-review-prompt.md"
+          python3 - > "$OUT" <<'PY'
+          import os, sys
+          tpl = open(".github/prompts/security-review.md").read()
+          for k in ("PR_NUMBER","BASE_REF","HEAD_SHA","GIT_STATUS","FILES_MODIFIED","COMMITS","DIFF_CONTENT"):
+              tpl = tpl.replace("{{"+k+"}}", os.environ.get(k, ""))
+          sys.stdout.write(tpl)
+          PY
+
+          echo "prompt_file=$OUT" >> "$GITHUB_OUTPUT"
+          echo "Prompt size: $(wc -c < "$OUT") bytes"
+
       - name: Generate GitHub App token
         id: app-token
         uses: actions/create-github-app-token@v1
@@ -161,25 +191,26 @@ jobs:
           role-to-assume: ${{ secrets.BEDROCK_SECURITY_REVIEW_ROLE_ARN }}
           aws-region: us-west-2
 
-      - name: Run Claude Code /security-review
+      - name: Read prompt file
+        id: prompt
+        env:
+          PROMPT_FILE: ${{ steps.build-prompt.outputs.prompt_file }}
+        run: |
+          set -euo pipefail
+          {
+            echo "prompt<<__SECREVIEW_EOF__"
+            cat "$PROMPT_FILE"
+            echo "__SECREVIEW_EOF__"
+          } >> "$GITHUB_OUTPUT"
+
+      - name: Run Claude Code security review
         uses: anthropics/claude-code-action@v1
         with:
           github_token: ${{ steps.app-token.outputs.token }}
           use_bedrock: 'true'
-          prompt:
-            '/security-review pull request #${{ steps.pr.outputs.number }} (base: ${{ steps.pr.outputs.base_ref }},
-            head: ${{ steps.pr.outputs.head_sha }}). Before reviewing, fetch the existing PR review comments and issue
-            comments via the GitHub API. If you (the same bot identity that posts these reviews) have already commented
-            on this PR, treat this as a re-review: for each of your prior findings, check the latest diff and explicitly
-            confirm whether it has been addressed, is still outstanding, or is no longer applicable, and skip re-posting
-            findings that are already resolved. Also read review comments left by other reviewers (humans or other bots)
-            and factor their context into your analysis — do not contradict an accepted resolution and do not duplicate
-            a finding another reviewer has already raised. For each new or still-outstanding finding, post an inline
-            review comment on the exact file and line where the issue lives (use a pull request review with line-level
-            comments, not a single summary comment). If you have no new findings, post a single short top-level comment
-            saying so, including a brief status of any prior findings (resolved / still outstanding).'
+          prompt: ${{ steps.prompt.outputs.prompt }}
           claude_args: >-
-            --model us.anthropic.claude-opus-4-7 --max-turns 20
+            --model us.anthropic.claude-opus-4-7 --max-turns 30
 
       - name: Remove claude-security-reviewing label
         if: always()

From 8eb573e6ed3ee8c102fc06d810944314f884632a Mon Sep 17 00:00:00 2001
From: Tejas Kashinath <tkashina@amazon.com>
Date: Mon, 18 May 2026 14:19:56 -0400
Subject: [PATCH 7/7] ci(security-review): use action's MCP tools for inline
 comments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Claude Code Action provides bundled MCP servers (github_inline_comment, github_comment) that buffer comments and post them via the action's post-step. Direct REST/octokit/curl calls are not available as tools and would not work even if they were. Per anthropics/claude-code-action source:
- The github_inline_comment MCP server only registers when triggered by a PR-context event AND when its tool name appears in --allowedTools.
- Comments are buffered to /tmp/inline-comments-buffer.jsonl and flushed by post-buffered-inline-comments.ts.

Changes:
- Allow-list mcp__github_inline_comment__create_inline_comment and mcp__github_comment__update_claude_comment via claude_args.
- Rewrite POSTING RESULTS section of the prompt to call those MCP tools by name with the correct schema (path, line, body), and to use update_claude_comment for the summary instead of an issue-comment REST call.
- Document that workflow_dispatch will not post inline comments — use only for prompt-plumbing smoke tests.
- Enable show_full_output so the action transcript is visible in run logs.
---
 .github/prompts/security-review.md       | 78 +++++++++++++++++++-----
 .github/workflows/pr-security-review.yml | 12 +++-
 2 files changed, 72 insertions(+), 18 deletions(-)

diff --git a/.github/prompts/security-review.md b/.github/prompts/security-review.md
index 3b00e75f9..397e15dd8 100644
--- a/.github/prompts/security-review.md
+++ b/.github/prompts/security-review.md
@@ -210,25 +210,54 @@ WORKFLOW (executed in 3 steps):
 
 POSTING RESULTS — read this section CAREFULLY before posting anything:
 
-A. **Re-review awareness.** Before posting, fetch existing PR review comments and issue comments via the GitHub API
-(`GET /repos/{owner}/{repo}/issues/{number}/comments` and `GET /repos/{owner}/{repo}/pulls/{number}/comments`). If your
-own bot identity has already commented on this PR, treat this run as a re-review:
+You have access to two MCP tools provided by the `anthropics/claude-code-action` GitHub Action. These are the ONLY
+correct way to publish findings — do NOT call the GitHub REST API (`gh`, `octokit`, `curl`) and do NOT attempt to use
+`POST /pulls/{n}/reviews` directly. Direct REST calls are not available as tools in this environment.
+
+**Tool A — `mcp__github_inline_comment__create_inline_comment`.** Posts an inline review comment on a specific file and
+line. Each call buffers one comment; the action's post-step posts them all to the PR after this session ends. Call it
+once per finding.
+
+Parameters (all you need for almost every case):
+
+- `path` (string, required): repo-relative file path, e.g. `".github/workflows/pr-security-review.yml"`.
+- `line` (number, required for single-line findings): the diff line number (RIGHT side / new file) where the issue
+  lives.
+- `startLine` (number, optional): pair with `line` for a multi-line range (`startLine` ≤ `line`).
+- `body` (string, required): markdown comment body (see format below).
+- `side` (`"LEFT"` | `"RIGHT"`, default `"RIGHT"`): leave at default unless commenting on a deleted line.
+- Do NOT pass `confirmed`. Letting it default lets the action's classifier filter test/probe-style comments.
+
+**Tool B — `mcp__github_comment__update_claude_comment`.** Updates a single sticky top-level comment on the PR. Only
+parameter is `body` (string). Use this for the no-findings summary or the re-review status summary (see D below).
+
+A. **Re-review awareness.** Before posting, READ the existing PR comments included in the diff/git context above. If
+your own bot identity has already commented on this PR (look for review comments authored by the bot whose token is
+running this workflow), treat this run as a re-review:
 
 - For each of your prior findings, examine the current diff and explicitly determine whether the issue is now resolved,
   still outstanding, or no longer applicable.
 - Skip re-posting findings that are already resolved.
 - Skip re-posting findings that are still on a line you've already commented on (don't duplicate yourself).
 
-B. **Other reviewers.** Read review comments left by other reviewers (humans or other bots) on this PR. Factor their
-context into your analysis: do not contradict an accepted resolution, and do not duplicate a finding another reviewer
-has already raised. If a reviewer has already flagged the same issue, skip it.
+If you cannot directly read prior comments from the diff context, default to assuming this is a fresh review and post
+all current findings.
+
+B. **Other reviewers.** Where review comments left by other reviewers are visible in the context, factor them into your
+analysis: do not contradict an accepted resolution, and do not duplicate a finding another reviewer has already raised.
+
+C. **Inline review comments.** For each new or still-outstanding finding, call
+`mcp__github_inline_comment__create_inline_comment` once with:
 
-C. **Inline review comments.** For each new or still-outstanding finding, post an **inline review comment on the exact
-file and line where the issue lives**. Use a pull request review with line-level comments
-(`POST /repos/{owner}/{repo}/pulls/{number}/reviews` with a `comments` array, each entry having `path`, `line`/`side`,
-and `body`). DO NOT post a single summary comment that lists all findings.
+```json
+{
+  "path": "<file path>",
+  "line": <line number on the new file>,
+  "body": "<comment body in the format below>"
+}
+```
 
-Each inline comment body MUST follow this format:
+The `body` MUST follow this format exactly:
 
 ```
 **Severity:** <HIGH|MEDIUM>
@@ -242,11 +271,28 @@ Each inline comment body MUST follow this format:
 **Recommendation:** <concrete fix>
 ```
 
-D. **No-findings case.** If after filtering you have no new findings to post, post a single short top-level issue
-comment (`POST /repos/{owner}/{repo}/issues/{number}/comments`) saying the security review found no new issues, plus a
-brief status of any prior findings (resolved / still outstanding / no longer applicable).
+When suggesting a code fix, optionally include a GitHub suggestion block at the end of the body:
+
+````
+```suggestion
+<replacement code spanning the entire startLine..line range>
+```
+````
+
+DO NOT post a single summary comment that lists all findings inline. Each finding gets its own `create_inline_comment`
+call.
+
+D. **Summary / no-findings case.** After all `create_inline_comment` calls (or if there are none), call
+`mcp__github_comment__update_claude_comment` ONCE with a short summary:
+
+- If you posted N inline findings, the body should be: `Security review complete. Posted N inline finding(s).` plus a
+  one-line status of any prior findings (resolved / still outstanding / no longer applicable) if this is a re-review.
+- If you posted zero inline findings, the body should be: `Security review complete. No new high-confidence findings.`
+  plus the same prior-findings status line if applicable.
+
+DO NOT post the full per-finding markdown into this summary comment — the inline comments carry the detail.
 
-E. **Review event type.** When submitting the review, use `event: COMMENT` (not `APPROVE` or `REQUEST_CHANGES`). The
-human reviewers own the merge gate; this bot is advisory.
+E. **Tool ordering.** Call `create_inline_comment` for every finding first, then call `update_claude_comment` LAST. Do
+not interleave.
 
 START ANALYSIS now.
diff --git a/.github/workflows/pr-security-review.yml b/.github/workflows/pr-security-review.yml
index 8ceb08070..8ca70b55d 100644
--- a/.github/workflows/pr-security-review.yml
+++ b/.github/workflows/pr-security-review.yml
@@ -8,7 +8,10 @@ on:
   workflow_dispatch:
     inputs:
       pr_number:
-        description: 'PR number to review'
+        description:
+          'PR number to review (note: workflow_dispatch will NOT post inline comments — the action only attaches the
+          inline-comment MCP server on PR-context events. Use this only for end-to-end smoke-testing the prompt
+          plumbing.)'
         required: true
         type: string
 
@@ -209,8 +212,13 @@ jobs:
           github_token: ${{ steps.app-token.outputs.token }}
           use_bedrock: 'true'
           prompt: ${{ steps.prompt.outputs.prompt }}
+          show_full_output: 'true'
+          # Allow-listing these MCP tool names is what tells the action to register
+          # the corresponding MCP servers (github_inline_comment + github_comment).
+          # See anthropics/claude-code-action src/mcp/install-mcp-server.ts.
           claude_args: >-
-            --model us.anthropic.claude-opus-4-7 --max-turns 30
+            --model us.anthropic.claude-opus-4-7 --max-turns 30 --allowedTools
+            mcp__github_inline_comment__create_inline_comment,mcp__github_comment__update_claude_comment
 
       - name: Remove claude-security-reviewing label
         if: always()