From d79da466002616ea5ac734a26a655d83470c45a2 Mon Sep 17 00:00:00 2001 From: David Cramer Date: Sun, 17 May 2026 20:58:37 -0700 Subject: [PATCH 1/2] ref(evals): Move eval harness into workspace package Move the eval harness, fixtures, and scaffold command into a private workspace package so they stay out of the production package surface. Backfill source provenance and license support files for captured fixtures so scenarios can be reproduced at the recorded upstream SHA. Co-Authored-By: GPT-5 Codex --- .github/workflows/evals.yml | 14 +- .npmignore | 3 +- .oxlintrc.json | 7 +- AGENTS.md | 12 +- README.md | 2 +- .../robots-prefix-blocks-public-metadata.json | 15 -- ...y-autofix-settings-get-project-access.json | 15 -- ...ry-group-search-view-visit-visibility.json | 15 -- ...od-size-analysis-base-artifact-access.json | 15 -- ...entry-preprod-snapshot-project-access.json | 15 -- ...elease-threshold-empty-project-filter.json | 15 -- ...-replay-count-project-scope-overwrite.json | 15 -- .../sentry-replay-delete-read-scope.json | 15 -- ...ntry-workflow-connect-workflows-authz.json | 15 -- ...-workflow-open-periods-project-access.json | 15 -- package.json | 22 +- {evals => packages/evals}/INTERNAL.md | 41 ++-- {evals => packages/evals}/README.md | 63 ++++-- .../eval-optional-assertion-rationale.json | 16 +- .../robots-prefix-blocks-public-metadata.json | 30 +++ ...y-vitest-evals-duration-sixty-seconds.json | 13 +- ...evals-github-reporter-positional-json.json | 13 +- .../evals}/eval-bug-detection.yaml | 0 {evals => packages/evals}/eval-precision.yaml | 0 .../evals}/eval-security-scanning.yaml | 0 .../LICENSE.md | 105 +++++++++ .../harness.ts | 0 .../fixtures/ignores-style-issues/utils.ts | 0 .../evals}/fixtures/missing-await/cache.ts | 0 .../fixtures/null-property-access/handler.ts | 0 .../evals}/fixtures/off-by-one/paginator.ts | 0 .../LICENSE.md | 105 +++++++++ .../__static_test.ts | 0 .../LICENSE.md | 105 +++++++++ ...rganization_autofix_automation_settings.py | 0 .../LICENSE.md | 105 +++++++++ .../group_search_view_visit.py | 0 .../LICENSE.md | 105 +++++++++ .../organization_preprod_size_analysis.py | 0 .../LICENSE.md | 105 +++++++++ .../preprod_artifact_snapshot.py | 0 .../LICENSE.md | 105 +++++++++ .../release_threshold_index.py | 0 .../LICENSE.md | 105 +++++++++ .../replay_counts.py | 0 .../LICENSE.md | 105 +++++++++ .../project_replay_details.py | 0 .../LICENSE.md | 105 +++++++++ .../options_load.py | 0 .../github/getsentry/vitest-evals/LICENSE | 201 ++++++++++++++++++ .../packages/github-reporter/src/utils.ts | 0 .../github/getsentry/vitest-evals/LICENSE | 201 ++++++++++++++++++ .../packages/github-reporter/src/cli.ts | 0 .../LICENSE.md | 105 +++++++++ .../workflow_validator_utils.py | 0 .../LICENSE.md | 105 +++++++++ .../organization_open_periods.py | 0 .../evals}/fixtures/sql-injection/api.ts | 0 .../evals}/fixtures/stale-closure/counter.tsx | 0 .../fixtures/wrong-comparison/validator.ts | 0 .../evals}/fixtures/xss-reflected/server.ts | 0 packages/evals/package.json | 24 +++ .../evals/scripts}/scaffold-eval.ts | 20 +- .../evals}/security-review/BASELINE.md | 0 ...y-autofix-settings-get-project-access.json | 30 +++ ...ry-group-search-view-visit-visibility.json | 30 +++ ...od-size-analysis-base-artifact-access.json | 30 +++ ...entry-preprod-snapshot-project-access.json | 30 +++ ...elease-threshold-empty-project-filter.json | 30 +++ ...-replay-count-project-scope-overwrite.json | 30 +++ .../sentry-replay-delete-read-scope.json | 30 +++ ...try-slack-options-load-unscoped-group.json | 17 +- ...ntry-workflow-connect-workflows-authz.json | 30 +++ ...-workflow-open-periods-project-access.json | 30 +++ .../evals}/skills/bug-detection.md | 0 {evals => packages/evals}/skills/precision.md | 0 .../evals}/skills/security-scanning.md | 0 .../evals/src}/code-review.eval.ts | 2 +- {src/evals => packages/evals/src}/e2e.eval.ts | 0 .../evals/src}/fixtures.test.ts | 0 {src/evals => packages/evals/src}/fixtures.ts | 0 .../evals/src}/harness.test.ts | 4 +- {src/evals => packages/evals/src}/harness.ts | 4 +- .../evals/src}/index.test.ts | 91 +++++++- {src/evals => packages/evals/src}/index.ts | 30 ++- {src/evals => packages/evals/src}/judge.ts | 10 +- {src/evals => packages/evals/src}/names.ts | 0 .../evals/src}/runner.test.ts | 18 +- {src/evals => packages/evals/src}/runner.ts | 30 ++- .../evals/src}/scaffold.test.ts | 61 ++++++ {src/evals => packages/evals/src}/scaffold.ts | 80 ++++++- .../evals/src}/security-review.eval.ts | 2 +- {src/evals => packages/evals/src}/setup.ts | 2 +- .../evals/src}/types.test.ts | 0 {src/evals => packages/evals/src}/types.ts | 24 ++- {src/evals => packages/evals/src}/usage.ts | 2 +- .../evals/src}/verify.eval.ts | 2 +- .../evals/src}/verify.test.ts | 0 {src/evals => packages/evals/src}/verify.ts | 20 +- packages/evals/tsconfig.json | 20 ++ ...prod-size-analysis-base-artifact-keep.json | 15 ++ .../slack-signature-stub-reject.json | 15 ++ ...flow-open-periods-project-access-keep.json | 15 ++ .../evals/vitest.config.ts | 4 +- packages/evals/vitest.unit.config.ts | 8 + pnpm-lock.yaml | 38 +++- scripts/update-pricing.ts | 7 +- tsconfig.build.json | 4 +- vitest.integration.config.ts | 3 +- warden.toml | 4 +- 110 files changed, 2591 insertions(+), 303 deletions(-) delete mode 100644 evals/code-review/robots-prefix-blocks-public-metadata.json delete mode 100644 evals/security-review/sentry-autofix-settings-get-project-access.json delete mode 100644 evals/security-review/sentry-group-search-view-visit-visibility.json delete mode 100644 evals/security-review/sentry-preprod-size-analysis-base-artifact-access.json delete mode 100644 evals/security-review/sentry-preprod-snapshot-project-access.json delete mode 100644 evals/security-review/sentry-release-threshold-empty-project-filter.json delete mode 100644 evals/security-review/sentry-replay-count-project-scope-overwrite.json delete mode 100644 evals/security-review/sentry-replay-delete-read-scope.json delete mode 100644 evals/security-review/sentry-workflow-connect-workflows-authz.json delete mode 100644 evals/security-review/sentry-workflow-open-periods-project-access.json rename {evals => packages/evals}/INTERNAL.md (63%) rename {evals => packages/evals}/README.md (80%) rename {evals => packages/evals}/code-review/eval-optional-assertion-rationale.json (66%) create mode 100644 packages/evals/code-review/robots-prefix-blocks-public-metadata.json rename {evals => packages/evals}/code-review/sentry-vitest-evals-duration-sixty-seconds.json (67%) rename {evals => packages/evals}/code-review/sentry-vitest-evals-github-reporter-positional-json.json (72%) rename {evals => packages/evals}/eval-bug-detection.yaml (100%) rename {evals => packages/evals}/eval-precision.yaml (100%) rename {evals => packages/evals}/eval-security-scanning.yaml (100%) create mode 100644 packages/evals/fixtures/eval-optional-assertion-rationale/LICENSE.md rename {evals => packages/evals}/fixtures/eval-optional-assertion-rationale/harness.ts (100%) rename {evals => packages/evals}/fixtures/ignores-style-issues/utils.ts (100%) rename {evals => packages/evals}/fixtures/missing-await/cache.ts (100%) rename {evals => packages/evals}/fixtures/null-property-access/handler.ts (100%) rename {evals => packages/evals}/fixtures/off-by-one/paginator.ts (100%) create mode 100644 packages/evals/fixtures/robots-prefix-blocks-public-metadata/LICENSE.md rename {evals => packages/evals}/fixtures/robots-prefix-blocks-public-metadata/__static_test.ts (100%) create mode 100644 packages/evals/fixtures/sentry-autofix-settings-get-project-access/LICENSE.md rename {evals => packages/evals}/fixtures/sentry-autofix-settings-get-project-access/organization_autofix_automation_settings.py (100%) create mode 100644 packages/evals/fixtures/sentry-group-search-view-visit-visibility/LICENSE.md rename {evals => packages/evals}/fixtures/sentry-group-search-view-visit-visibility/group_search_view_visit.py (100%) create mode 100644 packages/evals/fixtures/sentry-preprod-size-analysis-base-artifact-access/LICENSE.md rename {evals => packages/evals}/fixtures/sentry-preprod-size-analysis-base-artifact-access/organization_preprod_size_analysis.py (100%) create mode 100644 packages/evals/fixtures/sentry-preprod-snapshot-project-access/LICENSE.md rename {evals => packages/evals}/fixtures/sentry-preprod-snapshot-project-access/preprod_artifact_snapshot.py (100%) create mode 100644 packages/evals/fixtures/sentry-release-threshold-empty-project-filter/LICENSE.md rename {evals => packages/evals}/fixtures/sentry-release-threshold-empty-project-filter/release_threshold_index.py (100%) create mode 100644 packages/evals/fixtures/sentry-replay-count-project-scope-overwrite/LICENSE.md rename {evals => packages/evals}/fixtures/sentry-replay-count-project-scope-overwrite/replay_counts.py (100%) create mode 100644 packages/evals/fixtures/sentry-replay-delete-read-scope/LICENSE.md rename {evals => packages/evals}/fixtures/sentry-replay-delete-read-scope/project_replay_details.py (100%) create mode 100644 packages/evals/fixtures/sentry-slack-options-load-unscoped-group/LICENSE.md rename {evals => packages/evals}/fixtures/sentry-slack-options-load-unscoped-group/options_load.py (100%) create mode 100644 packages/evals/fixtures/sentry-vitest-evals-duration-sixty-seconds/github/getsentry/vitest-evals/LICENSE rename {evals => packages/evals}/fixtures/sentry-vitest-evals-duration-sixty-seconds/github/getsentry/vitest-evals/packages/github-reporter/src/utils.ts (100%) create mode 100644 packages/evals/fixtures/sentry-vitest-evals-github-reporter-positional-json/github/getsentry/vitest-evals/LICENSE rename {evals => packages/evals}/fixtures/sentry-vitest-evals-github-reporter-positional-json/github/getsentry/vitest-evals/packages/github-reporter/src/cli.ts (100%) create mode 100644 packages/evals/fixtures/sentry-workflow-connect-workflows-authz/LICENSE.md rename {evals => packages/evals}/fixtures/sentry-workflow-connect-workflows-authz/workflow_validator_utils.py (100%) create mode 100644 packages/evals/fixtures/sentry-workflow-open-periods-project-access/LICENSE.md rename {evals => packages/evals}/fixtures/sentry-workflow-open-periods-project-access/organization_open_periods.py (100%) rename {evals => packages/evals}/fixtures/sql-injection/api.ts (100%) rename {evals => packages/evals}/fixtures/stale-closure/counter.tsx (100%) rename {evals => packages/evals}/fixtures/wrong-comparison/validator.ts (100%) rename {evals => packages/evals}/fixtures/xss-reflected/server.ts (100%) create mode 100644 packages/evals/package.json rename {scripts => packages/evals/scripts}/scaffold-eval.ts (80%) rename {evals => packages/evals}/security-review/BASELINE.md (100%) create mode 100644 packages/evals/security-review/sentry-autofix-settings-get-project-access.json create mode 100644 packages/evals/security-review/sentry-group-search-view-visit-visibility.json create mode 100644 packages/evals/security-review/sentry-preprod-size-analysis-base-artifact-access.json create mode 100644 packages/evals/security-review/sentry-preprod-snapshot-project-access.json create mode 100644 packages/evals/security-review/sentry-release-threshold-empty-project-filter.json create mode 100644 packages/evals/security-review/sentry-replay-count-project-scope-overwrite.json create mode 100644 packages/evals/security-review/sentry-replay-delete-read-scope.json rename {evals => packages/evals}/security-review/sentry-slack-options-load-unscoped-group.json (54%) create mode 100644 packages/evals/security-review/sentry-workflow-connect-workflows-authz.json create mode 100644 packages/evals/security-review/sentry-workflow-open-periods-project-access.json rename {evals => packages/evals}/skills/bug-detection.md (100%) rename {evals => packages/evals}/skills/precision.md (100%) rename {evals => packages/evals}/skills/security-scanning.md (100%) rename {src/evals => packages/evals/src}/code-review.eval.ts (95%) rename {src/evals => packages/evals/src}/e2e.eval.ts (100%) rename {src/evals => packages/evals/src}/fixtures.test.ts (100%) rename {src/evals => packages/evals/src}/fixtures.ts (100%) rename {src/evals => packages/evals/src}/harness.test.ts (97%) rename {src/evals => packages/evals/src}/harness.ts (97%) rename {src/evals => packages/evals/src}/index.test.ts (78%) rename {src/evals => packages/evals/src}/index.ts (88%) rename {src/evals => packages/evals/src}/judge.ts (95%) rename {src/evals => packages/evals/src}/names.ts (100%) rename {src/evals => packages/evals/src}/runner.test.ts (88%) rename {src/evals => packages/evals/src}/runner.ts (87%) rename {src/evals => packages/evals/src}/scaffold.test.ts (70%) rename {src/evals => packages/evals/src}/scaffold.ts (79%) rename {src/evals => packages/evals/src}/security-review.eval.ts (94%) rename {src/evals => packages/evals/src}/setup.ts (90%) rename {src/evals => packages/evals/src}/types.test.ts (100%) rename {src/evals => packages/evals/src}/types.ts (86%) rename {src/evals => packages/evals/src}/usage.ts (93%) rename {src/evals => packages/evals/src}/verify.eval.ts (95%) rename {src/evals => packages/evals/src}/verify.test.ts (100%) rename {src/evals => packages/evals/src}/verify.ts (93%) create mode 100644 packages/evals/tsconfig.json rename {evals => packages/evals}/verification/preprod-size-analysis-base-artifact-keep.json (60%) rename {evals => packages/evals}/verification/slack-signature-stub-reject.json (59%) rename {evals => packages/evals}/verification/workflow-open-periods-project-access-keep.json (61%) rename vitest.evals.config.ts => packages/evals/vitest.config.ts (88%) create mode 100644 packages/evals/vitest.unit.config.ts diff --git a/.github/workflows/evals.yml b/.github/workflows/evals.yml index c2ed6b40..36952b72 100644 --- a/.github/workflows/evals.yml +++ b/.github/workflows/evals.yml @@ -11,13 +11,10 @@ on: push: branches: [main] paths: - - "evals/**" - - "scripts/scaffold-eval.ts" - - "src/evals/**" + - "packages/evals/**" - ".github/workflows/evals.yml" - "package.json" - "pnpm-lock.yaml" - - "vitest.evals.config.ts" pull_request: types: [opened, synchronize, reopened, labeled] @@ -33,13 +30,10 @@ jobs: with: script: | const evalPaths = [ - 'evals/', - 'scripts/scaffold-eval.ts', - 'src/evals/', + 'packages/evals/', '.github/workflows/evals.yml', 'package.json', 'pnpm-lock.yaml', - 'vitest.evals.config.ts', ]; function setRun(run, reason) { @@ -113,8 +107,8 @@ jobs: - name: Run evals env: - VITEST_EVALS_JSON: eval-results.json - VITEST_EVALS_JUNIT: eval-results.junit.xml + VITEST_EVALS_JSON: ../../eval-results.json + VITEST_EVALS_JUNIT: ../../eval-results.junit.xml run: | set +e pnpm evals diff --git a/.npmignore b/.npmignore index ac5482f2..4073c678 100644 --- a/.npmignore +++ b/.npmignore @@ -15,6 +15,7 @@ action.yml *.test.d.ts **/*.test.* vitest*.config.ts +coverage/ # Environment and secrets (.npmignore overrides .gitignore) .env @@ -24,6 +25,7 @@ vitest*.config.ts .github/ .agents/ .codex/ +.cursor/ .warden/ .claude/ .dex/ @@ -34,7 +36,6 @@ pnpm-lock.yaml pnpm-workspace.yaml # Evals and dev scripts -evals/ /scripts/ superwarden-bench/ diff --git a/.oxlintrc.json b/.oxlintrc.json index f8748612..d4a26718 100644 --- a/.oxlintrc.json +++ b/.oxlintrc.json @@ -12,10 +12,11 @@ "ignorePatterns": [ "dist/**", "node_modules/**", + "coverage/**", "*.js", "examples/**", - "scripts/**", - "vitest*.config.ts" + "packages/docs/**/*.astro", + "packages/evals/fixtures/**" ], "rules": { "constructor-super": "error", @@ -166,4 +167,4 @@ } } ] -} \ No newline at end of file +} diff --git a/AGENTS.md b/AGENTS.md index f40f3aa3..574387e0 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -23,6 +23,7 @@ TELEMETRY.md # Sentry telemetry investigation map; points to Junior's packages/ ├── docs/ # Astro docs site (dex-docs), deployed via Vercel +├── evals/ # Private eval package: runner, scenarios, fixtures, test skills src/ # @sentry/warden core (root package) ├── index.ts # Library entry point @@ -37,17 +38,8 @@ src/ # @sentry/warden core (root package) ├── cli/ # CLI entry and commands │ └── output/ # CLI output formatting ├── action/ # GitHub Action entry -├── evals/ # Eval runner, judge, and types ├── utils/ # Shared utilities └── examples/ # Example configurations - -evals/ # Eval specs, fixtures, and test skills (see evals/README.md) -├── eval-*.yaml # Harness smoke suites using eval-* test skills -├── code-review/ # Code-review benchmark scenarios -├── security-review/ # Security-review benchmark scenarios -├── verification/ # Verifier-only eval scenarios -├── skills/ # Test skills used as eval vehicles -└── fixtures/ # Source code with known issues ``` ## Key Conventions @@ -132,7 +124,7 @@ Skills define **what to look for**, not how to respond to findings: ## Evals -End-to-end evals for the full pipeline plus verifier-only evals. The Vitest entrypoints are split as `src/evals/e2e.eval.ts`, `src/evals/code-review.eval.ts`, `src/evals/security-review.eval.ts`, and `src/evals/verify.eval.ts`. See [`evals/INTERNAL.md`](evals/INTERNAL.md) for maintainer workflow and [`evals/README.md`](evals/README.md) for schemas. Run with `pnpm evals`; scaffold PR fixtures with `pnpm evals:scaffold `. +End-to-end evals for the full pipeline plus verifier-only evals live in `packages/evals/`. See [`packages/evals/INTERNAL.md`](packages/evals/INTERNAL.md) for maintainer workflow and [`packages/evals/README.md`](packages/evals/README.md) for schemas. Run with `pnpm evals`; scaffold PR fixtures with `pnpm evals:scaffold `. ## Voice diff --git a/README.md b/README.md index 7295a81f..1020acbc 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ pnpm test:coverage # unit tests with LCOV coverage pnpm evals # end-to-end evals (requires API key) ``` -See [`evals/README.md`](evals/README.md) for the eval framework. +See [`packages/evals/README.md`](packages/evals/README.md) for the eval framework. ## License diff --git a/evals/code-review/robots-prefix-blocks-public-metadata.json b/evals/code-review/robots-prefix-blocks-public-metadata.json deleted file mode 100644 index 02700ac8..00000000 --- a/evals/code-review/robots-prefix-blocks-public-metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "given": "a robots.txt golden test disallows a prefix that also blocks a documented public metadata endpoint", - "files": [ - "fixtures/robots-prefix-blocks-public-metadata/__static_test.ts" - ], - "should_find": [ - { - "finding": "robots.txt Disallow /mcp is a prefix rule that blocks the public /mcp.json metadata endpoint from crawlers", - "severity": "high" - } - ], - "should_not_find": [ - "the test file being test-only makes the finding low severity" - ] -} diff --git a/evals/security-review/sentry-autofix-settings-get-project-access.json b/evals/security-review/sentry-autofix-settings-get-project-access.json deleted file mode 100644 index 50ec2020..00000000 --- a/evals/security-review/sentry-autofix-settings-get-project-access.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "given": "Autofix automation settings GET lists every project in the organization instead of using OrganizationEndpoint.get_projects", - "files": [ - "fixtures/sentry-autofix-settings-get-project-access/organization_autofix_automation_settings.py" - ], - "should_find": [ - { - "finding": "Autofix automation settings GET queries all organization projects directly and returns settings for projects the caller cannot access" - } - ], - "should_not_find": [ - "repository provider validation", - "audit log contents" - ] -} diff --git a/evals/security-review/sentry-group-search-view-visit-visibility.json b/evals/security-review/sentry-group-search-view-visit-visibility.json deleted file mode 100644 index 8bbf5105..00000000 --- a/evals/security-review/sentry-group-search-view-visit-visibility.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "given": "group search view visit endpoint updates private views by ID without applying the existing object permission or visibility checks", - "files": [ - "fixtures/sentry-group-search-view-visit-visibility/group_search_view_visit.py" - ], - "should_find": [ - { - "finding": "visit endpoint loads a GroupSearchView by organization and id only, allowing any org member to mark private views as visited and learn or mutate metadata without object permission checks" - } - ], - "should_not_find": [ - "missing authentication as the primary issue", - "timezone usage" - ] -} diff --git a/evals/security-review/sentry-preprod-size-analysis-base-artifact-access.json b/evals/security-review/sentry-preprod-size-analysis-base-artifact-access.json deleted file mode 100644 index d272fd02..00000000 --- a/evals/security-review/sentry-preprod-size-analysis-base-artifact-access.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "given": "public size-analysis endpoint accepts a baseArtifactId and loads the base artifact by organization only", - "files": [ - "fixtures/sentry-preprod-size-analysis-base-artifact-access/organization_preprod_size_analysis.py" - ], - "should_find": [ - { - "finding": "baseArtifactId lets a caller attach an inaccessible project artifact as the comparison base because the lookup checks organization_id but not project access" - } - ], - "should_not_find": [ - "integer parsing error handling", - "response field naming" - ] -} diff --git a/evals/security-review/sentry-preprod-snapshot-project-access.json b/evals/security-review/sentry-preprod-snapshot-project-access.json deleted file mode 100644 index 3d1a6ac2..00000000 --- a/evals/security-review/sentry-preprod-snapshot-project-access.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "given": "preprod snapshot endpoint fetches artifacts by organization only and returns or deletes project artifacts without checking project membership", - "files": [ - "fixtures/sentry-preprod-snapshot-project-access/preprod_artifact_snapshot.py" - ], - "should_find": [ - { - "finding": "preprod snapshot GET and DELETE load artifacts by organization_id only and do not call has_project_access before exposing manifests or deleting artifacts" - } - ], - "should_not_find": [ - "exception logging", - "missing rate limiting" - ] -} diff --git a/evals/security-review/sentry-release-threshold-empty-project-filter.json b/evals/security-review/sentry-release-threshold-empty-project-filter.json deleted file mode 100644 index fdafd26a..00000000 --- a/evals/security-review/sentry-release-threshold-empty-project-filter.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "given": "release threshold endpoint builds an empty ORM filter when the caller has no accessible projects", - "files": [ - "fixtures/sentry-release-threshold-empty-project-filter/release_threshold_index.py" - ], - "should_find": [ - { - "finding": "ReleaseThreshold query omits project and organization scoping when get_projects returns an empty list, allowing cross-project or cross-organization threshold disclosure" - } - ], - "should_not_find": [ - "generic missing serializer validation", - "missing rate limiting" - ] -} diff --git a/evals/security-review/sentry-replay-count-project-scope-overwrite.json b/evals/security-review/sentry-replay-count-project-scope-overwrite.json deleted file mode 100644 index 5d608477..00000000 --- a/evals/security-review/sentry-replay-count-project-scope-overwrite.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "given": "replay count helper replaces request-scoped projects with projects from issue IDs resolved across the organization", - "files": [ - "fixtures/sentry-replay-count-project-scope-overwrite/replay_counts.py" - ], - "should_find": [ - { - "finding": "issue.id replay count path overwrites snuba_params.projects with projects from all matching organization groups, bypassing the caller's project access scope" - } - ], - "should_not_find": [ - "query string parsing as the primary issue", - "missing pagination" - ] -} diff --git a/evals/security-review/sentry-replay-delete-read-scope.json b/evals/security-review/sentry-replay-delete-read-scope.json deleted file mode 100644 index 3883e02e..00000000 --- a/evals/security-review/sentry-replay-delete-read-scope.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "given": "replay detail endpoint grants DELETE to project:read and then enqueues destructive replay deletion", - "files": [ - "fixtures/sentry-replay-delete-read-scope/project_replay_details.py" - ], - "should_find": [ - { - "finding": "DELETE accepts project:read scope, so read-only project users can permanently delete replay data" - } - ], - "should_not_find": [ - "missing UUID validation", - "missing feature flag check" - ] -} diff --git a/evals/security-review/sentry-workflow-connect-workflows-authz.json b/evals/security-review/sentry-workflow-connect-workflows-authz.json deleted file mode 100644 index 70f9e6bf..00000000 --- a/evals/security-review/sentry-workflow-connect-workflows-authz.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "given": "detector-to-workflow connection validates target workflows exist in the organization but does not check permission to modify those workflows", - "files": [ - "fixtures/sentry-workflow-connect-workflows-authz/workflow_validator_utils.py" - ], - "should_find": [ - { - "finding": "connect_detectors_to_workflows permits linking a detector to arbitrary organization workflows because validate_workflows_exist performs no authorization check" - } - ], - "should_not_find": [ - "detector ID enumeration", - "bulk operation performance" - ] -} diff --git a/evals/security-review/sentry-workflow-open-periods-project-access.json b/evals/security-review/sentry-workflow-open-periods-project-access.json deleted file mode 100644 index 573a6d52..00000000 --- a/evals/security-review/sentry-workflow-open-periods-project-access.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "given": "workflow open-period endpoint resolves detectorId or groupId by organization only and returns issue activity without project permission checks", - "files": [ - "fixtures/sentry-workflow-open-periods-project-access/organization_open_periods.py" - ], - "should_find": [ - { - "finding": "open-period lookup accepts detectorId or groupId from any project in the organization and returns group activity without checking the caller's project access" - } - ], - "should_not_find": [ - "date parsing", - "missing detectorId format validation" - ] -} diff --git a/package.json b/package.json index e05d8d6b..60ccd7b7 100644 --- a/package.json +++ b/package.json @@ -15,15 +15,15 @@ "build": "pnpm run clean:dist && tsc -p tsconfig.build.json", "build:action": "rm -rf dist/action && ncc build src/action/main.ts -o dist/action --no-source-map-register --license licenses.txt", "dev": "tsc --watch", - "lint": "oxlint src", - "lint:fix": "oxlint --fix src", + "lint": "oxlint .", + "lint:fix": "oxlint --fix .", "test": "vitest run", "test:coverage": "vitest run --coverage", "test:watch": "vitest", "test:examples": "vitest run --config vitest.integration.config.ts", - "evals": "vitest run --config vitest.evals.config.ts", - "evals:scaffold": "tsx scripts/scaffold-eval.ts", - "typecheck": "tsc --noEmit", + "evals": "pnpm -C packages/evals evals", + "evals:scaffold": "pnpm -C packages/evals scaffold", + "typecheck": "tsc --noEmit && pnpm -C packages/evals typecheck", "update-pricing": "tsx scripts/update-pricing.ts", "generate:jsonl-schema": "tsx scripts/generate-jsonl-schema.ts", "docs": "pnpm --filter dex-docs dev", @@ -36,6 +36,15 @@ "src/**/*.ts": [ "oxlint --fix" ], + "packages/evals/src/**/*.ts": [ + "oxlint --fix" + ], + "packages/evals/scripts/**/*.ts": [ + "oxlint --fix" + ], + "packages/evals/*.ts": [ + "oxlint --fix" + ], "packages/docs/**/*.astro": [ "pnpm -C packages/docs build" ] @@ -89,8 +98,7 @@ "tinyrainbow": "^3.0.3", "tsx": "^4.19.0", "typescript": "^5.9.3", - "vitest": "^4.1.6", - "vitest-evals": "0.9.0-beta.3" + "vitest": "^4.1.6" }, "engines": { "node": ">=20.0.0" diff --git a/evals/INTERNAL.md b/packages/evals/INTERNAL.md similarity index 63% rename from evals/INTERNAL.md rename to packages/evals/INTERNAL.md index a771f720..2f46e283 100644 --- a/evals/INTERNAL.md +++ b/packages/evals/INTERNAL.md @@ -13,10 +13,10 @@ pnpm evals -t verification pnpm evals:scaffold https://github.com/getsentry/sentry/pull/12345 ``` -CI runs evals for changes under `evals/`, `src/evals/`, the eval workflow, or -the eval config. Add the `run-evals` label to a same-repository PR to force a -run when runtime or prompt changes need benchmark coverage. Fork PRs do not get -eval secrets. +CI runs evals for changes under `packages/evals/`, the eval workflow, package +metadata, or the lockfile. Add the `run-evals` label to a same-repository PR to +force a run when runtime or prompt changes need benchmark coverage. Fork PRs do +not get eval secrets. The raw Vitest eval command can exit non-zero when individual evals miss. CI still records the JSON and JUnit reports, publishes JUnit annotations and a job @@ -32,17 +32,17 @@ source finding is not actually a reachable bug under the target skill. ## Eval Layers -- `evals/*.yaml`: small full-pipeline suites using test skills. -- `evals/code-review/*.json`: one full-pipeline code-review scenario per file. -- `evals/security-review/*.json`: one full-pipeline security-review scenario per file. -- `evals/verification/*.json`: one candidate finding sent directly to `verifyFindings`. -- `evals/fixtures/*`: checked-in fixture source code. Eval runs copy these files +- `packages/evals/*.yaml`: small full-pipeline suites using test skills. +- `packages/evals/code-review/*.json`: one full-pipeline code-review scenario per file. +- `packages/evals/security-review/*.json`: one full-pipeline security-review scenario per file. +- `packages/evals/verification/*.json`: one candidate finding sent directly to `verifyFindings`. +- `packages/evals/fixtures/*`: checked-in fixture source code. Eval runs copy these files into temporary git repos under the OS temp directory, preserving paths under - `evals/fixtures/`. -- `src/evals/e2e.eval.ts`: generic YAML full-pipeline suites. -- `src/evals/code-review.eval.ts`: code-review correctness benchmark scenarios. -- `src/evals/security-review.eval.ts`: security-review benchmark scenarios. -- `src/evals/verify.eval.ts`: verifier-only scenarios. + `packages/evals/fixtures/`. +- `packages/evals/src/e2e.eval.ts`: generic YAML full-pipeline suites. +- `packages/evals/src/code-review.eval.ts`: code-review correctness benchmark scenarios. +- `packages/evals/src/security-review.eval.ts`: security-review benchmark scenarios. +- `packages/evals/src/verify.eval.ts`: verifier-only scenarios. Eval names should read as `/`. Runtime and model belong to the suite configuration, not the case identity. Avoid category names that hide the @@ -50,12 +50,17 @@ real skill under test. ## Adding Full-Pipeline Evals -1. Add or scaffold a scenario JSON file under `evals//`. -2. Add focused, checked-in fixture files under `evals/fixtures//`. +1. Add or scaffold a scenario JSON file under `packages/evals//`. +2. Add focused, checked-in fixture files under `packages/evals/fixtures//`. GitHub scaffolds use - `evals/fixtures//github///` to + `packages/evals/fixtures//github///` to preserve source context while eval output uses `/`. Scaffolded source repositories are still passed to prompts as repository context. + They also record `notes.repository`, `notes.source_ref`, and + `notes.source_files` so the original source state can be reproduced with + `git clone`, `git checkout`, and `git show :`. + The scaffolder also copies the root LICENSE-like file into `supporting_files` + so source provenance travels with the fixture without entering the eval diff. 3. Write a specific `should_find` assertion for the verified bug, not for the current Warden output, and add useful `should_not_find` guards. 4. Run the narrow case first with `pnpm evals -t `. @@ -71,7 +76,7 @@ tighten it before committing. Use verifier-only evals when discovery found a real candidate but verification dropped it, or when verification must reject a known false positive. -Each `evals/verification/*.json` file contains: +Each `packages/evals/verification/*.json` file contains: - `files`: repo context for the verifier to inspect - `candidate`: the exact finding object to verify diff --git a/evals/README.md b/packages/evals/README.md similarity index 80% rename from evals/README.md rename to packages/evals/README.md index 56e3bb5e..a9235362 100644 --- a/evals/README.md +++ b/packages/evals/README.md @@ -24,10 +24,10 @@ real. ## Eval Formats -Small suites can use YAML files at the top level of `evals/`. These are mostly +Small suites can use YAML files at the top level of `packages/evals/`. These are mostly generic harness smoke suites that use `eval-*` test skills. Product-facing benchmark suites should prefer one JSON file per scenario under the real skill -name, such as `evals/code-review/` or `evals/security-review/`. +name, such as `packages/evals/code-review/` or `packages/evals/security-review/`. ```yaml skill: skills/bug-detection.md @@ -78,7 +78,7 @@ case. ## Eval Structure ``` -evals/ +packages/evals/ ├── README.md ├── eval-bug-detection.yaml # Harness smoke suite using eval-bug-detection ├── eval-security-scanning.yaml # Harness smoke suite using eval-security-scanning @@ -124,10 +124,10 @@ over multiple runtimes or models without changing case identity. The Vitest entrypoints are intentionally split by eval layer: -- `src/evals/e2e.eval.ts`: generic YAML full-pipeline suites. -- `src/evals/code-review.eval.ts`: code-review correctness benchmark scenarios. -- `src/evals/security-review.eval.ts`: security-review benchmark scenarios. -- `src/evals/verify.eval.ts`: verifier-only scenarios from `evals/verification/`. +- `packages/evals/src/e2e.eval.ts`: generic YAML full-pipeline suites. +- `packages/evals/src/code-review.eval.ts`: code-review correctness benchmark scenarios. +- `packages/evals/src/security-review.eval.ts`: security-review benchmark scenarios. +- `packages/evals/src/verify.eval.ts`: verifier-only scenarios from `packages/evals/verification/`. ## YAML Schema @@ -135,7 +135,7 @@ The Vitest entrypoints are intentionally split by eval layer: | Field | Required | Description | |-------|----------|-------------| -| `skill` | Yes | Path to test skill, relative to `evals/` | +| `skill` | Yes | Path to test skill, relative to `packages/evals/` | | `runtime` | No | Default runtime for all evals: `claude` or `pi` (default: `claude`) | | `model` | No | Default model for all evals (default: `claude-sonnet-4-6`; Pi models must use provider/model format, e.g. `anthropic/claude-sonnet-4-6`) | | `evals` | Yes | List of eval scenarios (at least one) | @@ -146,7 +146,8 @@ The Vitest entrypoints are intentionally split by eval layer: |-------|----------|-------------| | `name` | Yes | Scenario name (used in test output) | | `given` | Yes | What code/situation the eval sets up (BDD "given") | -| `files` | Yes | Fixture files, relative to `evals/` | +| `files` | Yes | Fixture files, relative to `packages/evals/` | +| `supporting_files` | No | Context files, relative to `packages/evals/`, copied into the temp repo before the eval diff | | `model` | No | Model override for this scenario | | `runtime` | No | Runtime override for this scenario | | `should_find` | Yes | What the pipeline should detect (at least one) | @@ -155,13 +156,16 @@ The Vitest entrypoints are intentionally split by eval layer: | `should_find[].required` | No | If true (default), eval fails when not found | | `should_not_find` | No | Things the pipeline should NOT report (precision) | | `notes` | No | Maintainer-only provenance, ignored by eval execution | +| `notes.repository` | No | Source repository for GitHub-captured fixtures, e.g. `getsentry/sentry` | +| `notes.source_ref` | No | Exact source commit SHA to checkout for reproducing GitHub-captured fixture state | +| `notes.source_files` | No | Mapping from checked-in fixture files to source repository paths at `notes.source_ref` | Standalone JSON scenario files may omit `name`; it defaults to the JSON filename without `.json`. ## Verification Evals -Verifier-only evals live in `evals/verification/`. They feed one candidate +Verifier-only evals live in `packages/evals/verification/`. They feed one candidate finding directly into Warden's verification pass and assert whether it should be kept or rejected. Use them when a full pipeline eval finds the right issue and a later verification pass drops it, or when the verifier must reject a known false @@ -237,15 +241,15 @@ to improve discovery, verification, merging, or judging later. ## Adding a New Eval -1. Pick an existing skill directory, or create `evals//` -2. Add a YAML scenario entry for harness smoke suites or create `evals//.json` -3. Create checked-in fixture files under `evals/fixtures//` +1. Pick an existing skill directory, or create `packages/evals//` +2. Add a YAML scenario entry for harness smoke suites or create `packages/evals//.json` +3. Create checked-in fixture files under `packages/evals/fixtures//` 4. Run `pnpm evals` to verify -If a new category needs a different test skill, add it to `evals/skills/`. +If a new category needs a different test skill, add it to `packages/evals/skills/`. To exercise a built-in directory-format skill, point `skill` at its `SKILL.md` -relative to `evals/`, for example -`../src/builtin-skills/security-review/SKILL.md`. +relative to `packages/evals/`, for example +`../../src/builtin-skills/security-review/SKILL.md`. ### Scaffolding From GitHub @@ -264,14 +268,30 @@ The scaffold writes a `TODO` `should_find` assertion. That stub is expected to fail until you replace it with the exact expected finding, and it should not be committed as-is. -Scaffolded GitHub fixtures include source context in their paths: -`evals/fixtures//github///`. +Source-captured fixtures include source context in their paths: +`packages/evals/fixtures//github///`. Eval runs copy them into the temp repo as `/` so test output stays focused on the case and original source file. The source repository is still included in prompt context and `notes.repository`. +Scaffolded and backfilled source fixtures also record `notes.source_ref` and +`notes.source_files`, so maintainers can recreate the captured source state +with: + +```bash +git clone https://github.com//.git +git -C checkout +git -C show : +``` + Hand-written fixtures can stay shorter when the source repository path is not useful. +Scaffolded GitHub fixtures also copy the source repository's root LICENSE-like +file into `supporting_files`. Supporting files are available in the temp repo +for provenance and context, but are committed before the eval branch so they do +not appear in the review diff. Scaffolding fails when no root license file can +be found at the captured ref. + When a scaffold skips files, it records them in `notes.skipped_files` and prints them in CLI output. Review that list before committing the eval. @@ -288,11 +308,12 @@ them in CLI output. Review that list before committing the eval. ## How It Works -1. **Discovery**: Scan `evals/` for YAML suites and JSON scenario directories +1. **Discovery**: Scan `packages/evals/` for YAML suites and JSON scenario directories 2. **Loading**: Parse YAML/JSON, validate with Zod, resolve paths 3. **Git repo**: Copy checked-in fixtures into a temp repo, preserving paths - under `evals/fixtures/`, and commit them on an `eval` branch (empty `main` - as base), so the agent has a real repo to explore + under `packages/evals/fixtures/`, copy supporting files onto `main`, and + commit fixture files on an `eval` branch, so the agent has a real repo to + explore 4. **Context**: Build `EventContext` from real `git diff main...eval` 5. **Execution**: Run the skill via `runSkill()` with the real SDK pipeline; the agent operates in the temp repo with Read/Grep tools diff --git a/evals/code-review/eval-optional-assertion-rationale.json b/packages/evals/code-review/eval-optional-assertion-rationale.json similarity index 66% rename from evals/code-review/eval-optional-assertion-rationale.json rename to packages/evals/code-review/eval-optional-assertion-rationale.json index 47a5f08a..166ee789 100644 --- a/evals/code-review/eval-optional-assertion-rationale.json +++ b/packages/evals/code-review/eval-optional-assertion-rationale.json @@ -15,6 +15,18 @@ ], "notes": { "source": "https://github.com/getsentry/warden/pull/327", - "body": "Regression seed from Warden code-review finding: optional should_find assertions were allowed to miss for score calculation but still appeared as failure reasons in eval reports." - } + "body": "Regression seed from Warden code-review finding: optional should_find assertions were allowed to miss for score calculation but still appeared as failure reasons in eval reports.", + "repository": "getsentry/warden", + "source_ref": "5a8d78ffe5c7efc4678cfd1ae4cd9cc8126f6031", + "source_files": [ + { + "fixturePath": "fixtures/eval-optional-assertion-rationale/harness.ts", + "sourcePath": "src/evals/harness.ts", + "ref": "5a8d78ffe5c7efc4678cfd1ae4cd9cc8126f6031" + } + ] + }, + "supporting_files": [ + "fixtures/eval-optional-assertion-rationale/LICENSE.md" + ] } diff --git a/packages/evals/code-review/robots-prefix-blocks-public-metadata.json b/packages/evals/code-review/robots-prefix-blocks-public-metadata.json new file mode 100644 index 00000000..aabf32ab --- /dev/null +++ b/packages/evals/code-review/robots-prefix-blocks-public-metadata.json @@ -0,0 +1,30 @@ +{ + "given": "a robots.txt golden test disallows a prefix that also blocks a documented public metadata endpoint", + "files": [ + "fixtures/robots-prefix-blocks-public-metadata/__static_test.ts" + ], + "should_find": [ + { + "finding": "robots.txt Disallow /mcp is a prefix rule that blocks the public /mcp.json metadata endpoint from crawlers", + "severity": "high" + } + ], + "should_not_find": [ + "the test file being test-only makes the finding low severity" + ], + "supporting_files": [ + "fixtures/robots-prefix-blocks-public-metadata/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/warden/issues/317", + "repository": "getsentry/warden", + "source_ref": "69ea9ccd221a02cdcf39e7811609a2f83f2998c5", + "source_files": [ + { + "fixturePath": "fixtures/robots-prefix-blocks-public-metadata/__static_test.ts", + "sourcePath": "evals/fixtures/robots-prefix-blocks-public-metadata/__static_test.ts", + "ref": "69ea9ccd221a02cdcf39e7811609a2f83f2998c5" + } + ] + } +} diff --git a/evals/code-review/sentry-vitest-evals-duration-sixty-seconds.json b/packages/evals/code-review/sentry-vitest-evals-duration-sixty-seconds.json similarity index 67% rename from evals/code-review/sentry-vitest-evals-duration-sixty-seconds.json rename to packages/evals/code-review/sentry-vitest-evals-duration-sixty-seconds.json index 7a5f9cdc..05216684 100644 --- a/evals/code-review/sentry-vitest-evals-duration-sixty-seconds.json +++ b/packages/evals/code-review/sentry-vitest-evals-duration-sixty-seconds.json @@ -3,6 +3,9 @@ "files": [ "fixtures/sentry-vitest-evals-duration-sixty-seconds/github/getsentry/vitest-evals/packages/github-reporter/src/utils.ts" ], + "supporting_files": [ + "fixtures/sentry-vitest-evals-duration-sixty-seconds/github/getsentry/vitest-evals/LICENSE" + ], "should_find": [ { "finding": "formatDuration rounds display seconds without normalizing carry into the next unit, so values near minute boundaries can render as 60s or 1m 60s instead of 1m 0s or 2m 0s" @@ -17,7 +20,15 @@ "notes": { "source": "https://github.com/getsentry/vitest-evals/pull/56#discussion_r3255240096", "repository": "getsentry/vitest-evals", + "source_ref": "72d64e16efcfa5c50c2f1091b24b2dfacbcf1b3d", "side": "head", - "body": "Regression seed from Cursor Bugbot. Verified real: rounding seconds without carrying into the next unit can produce unnormalized duration strings at minute boundaries, including the reported Xm 60s case." + "body": "Regression seed from Cursor Bugbot. Verified real: rounding seconds without carrying into the next unit can produce unnormalized duration strings at minute boundaries, including the reported Xm 60s case.", + "source_files": [ + { + "fixturePath": "fixtures/sentry-vitest-evals-duration-sixty-seconds/github/getsentry/vitest-evals/packages/github-reporter/src/utils.ts", + "sourcePath": "packages/github-reporter/src/utils.ts", + "ref": "72d64e16efcfa5c50c2f1091b24b2dfacbcf1b3d" + } + ] } } diff --git a/evals/code-review/sentry-vitest-evals-github-reporter-positional-json.json b/packages/evals/code-review/sentry-vitest-evals-github-reporter-positional-json.json similarity index 72% rename from evals/code-review/sentry-vitest-evals-github-reporter-positional-json.json rename to packages/evals/code-review/sentry-vitest-evals-github-reporter-positional-json.json index 22827791..db0f5732 100644 --- a/evals/code-review/sentry-vitest-evals-github-reporter-positional-json.json +++ b/packages/evals/code-review/sentry-vitest-evals-github-reporter-positional-json.json @@ -3,6 +3,9 @@ "files": [ "fixtures/sentry-vitest-evals-github-reporter-positional-json/github/getsentry/vitest-evals/packages/github-reporter/src/cli.ts" ], + "supporting_files": [ + "fixtures/sentry-vitest-evals-github-reporter-positional-json/github/getsentry/vitest-evals/LICENSE" + ], "should_find": [ { "finding": "parseArgs initializes jsonPath from VITEST_EVALS_JSON_REPORT or vitest-results.json before argument parsing, so the default-case branch intended to accept a bare positional JSON report path is unreachable and vitest-evals-github-report my-results.json throws Unknown argument instead of using that file" @@ -17,7 +20,15 @@ "notes": { "source": "https://github.com/getsentry/vitest-evals/pull/56#discussion_r3255238970", "repository": "getsentry/vitest-evals", + "source_ref": "72d64e16efcfa5c50c2f1091b24b2dfacbcf1b3d", "side": "head", - "body": "Regression seed from Sentry-disclosed potential bug. Verified real: parseArgs defaults jsonPath before checking the positional fallback, making a bare report path unreachable. Severity is intentionally not asserted; Sentry labeled it low, while Warden may treat shipped CLI argument failures under its published-interface rubric." + "body": "Regression seed from Sentry-disclosed potential bug. Verified real: parseArgs defaults jsonPath before checking the positional fallback, making a bare report path unreachable. Severity is intentionally not asserted; Sentry labeled it low, while Warden may treat shipped CLI argument failures under its published-interface rubric.", + "source_files": [ + { + "fixturePath": "fixtures/sentry-vitest-evals-github-reporter-positional-json/github/getsentry/vitest-evals/packages/github-reporter/src/cli.ts", + "sourcePath": "packages/github-reporter/src/cli.ts", + "ref": "72d64e16efcfa5c50c2f1091b24b2dfacbcf1b3d" + } + ] } } diff --git a/evals/eval-bug-detection.yaml b/packages/evals/eval-bug-detection.yaml similarity index 100% rename from evals/eval-bug-detection.yaml rename to packages/evals/eval-bug-detection.yaml diff --git a/evals/eval-precision.yaml b/packages/evals/eval-precision.yaml similarity index 100% rename from evals/eval-precision.yaml rename to packages/evals/eval-precision.yaml diff --git a/evals/eval-security-scanning.yaml b/packages/evals/eval-security-scanning.yaml similarity index 100% rename from evals/eval-security-scanning.yaml rename to packages/evals/eval-security-scanning.yaml diff --git a/packages/evals/fixtures/eval-optional-assertion-rationale/LICENSE.md b/packages/evals/fixtures/eval-optional-assertion-rationale/LICENSE.md new file mode 100644 index 00000000..506811fc --- /dev/null +++ b/packages/evals/fixtures/eval-optional-assertion-rationale/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, ALv2 Future License + +## Abbreviation + +FSL-1.1-ALv2 + +## Notice + +Copyright 2025 Functional Software, Inc. + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/eval-optional-assertion-rationale/harness.ts b/packages/evals/fixtures/eval-optional-assertion-rationale/harness.ts similarity index 100% rename from evals/fixtures/eval-optional-assertion-rationale/harness.ts rename to packages/evals/fixtures/eval-optional-assertion-rationale/harness.ts diff --git a/evals/fixtures/ignores-style-issues/utils.ts b/packages/evals/fixtures/ignores-style-issues/utils.ts similarity index 100% rename from evals/fixtures/ignores-style-issues/utils.ts rename to packages/evals/fixtures/ignores-style-issues/utils.ts diff --git a/evals/fixtures/missing-await/cache.ts b/packages/evals/fixtures/missing-await/cache.ts similarity index 100% rename from evals/fixtures/missing-await/cache.ts rename to packages/evals/fixtures/missing-await/cache.ts diff --git a/evals/fixtures/null-property-access/handler.ts b/packages/evals/fixtures/null-property-access/handler.ts similarity index 100% rename from evals/fixtures/null-property-access/handler.ts rename to packages/evals/fixtures/null-property-access/handler.ts diff --git a/evals/fixtures/off-by-one/paginator.ts b/packages/evals/fixtures/off-by-one/paginator.ts similarity index 100% rename from evals/fixtures/off-by-one/paginator.ts rename to packages/evals/fixtures/off-by-one/paginator.ts diff --git a/packages/evals/fixtures/robots-prefix-blocks-public-metadata/LICENSE.md b/packages/evals/fixtures/robots-prefix-blocks-public-metadata/LICENSE.md new file mode 100644 index 00000000..506811fc --- /dev/null +++ b/packages/evals/fixtures/robots-prefix-blocks-public-metadata/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, ALv2 Future License + +## Abbreviation + +FSL-1.1-ALv2 + +## Notice + +Copyright 2025 Functional Software, Inc. + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/robots-prefix-blocks-public-metadata/__static_test.ts b/packages/evals/fixtures/robots-prefix-blocks-public-metadata/__static_test.ts similarity index 100% rename from evals/fixtures/robots-prefix-blocks-public-metadata/__static_test.ts rename to packages/evals/fixtures/robots-prefix-blocks-public-metadata/__static_test.ts diff --git a/packages/evals/fixtures/sentry-autofix-settings-get-project-access/LICENSE.md b/packages/evals/fixtures/sentry-autofix-settings-get-project-access/LICENSE.md new file mode 100644 index 00000000..f97b1d1f --- /dev/null +++ b/packages/evals/fixtures/sentry-autofix-settings-get-project-access/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, Apache 2.0 Future License + +## Abbreviation + +FSL-1.1-Apache-2.0 + +## Notice + +Copyright 2008-2024 Functional Software, Inc. dba Sentry + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/sentry-autofix-settings-get-project-access/organization_autofix_automation_settings.py b/packages/evals/fixtures/sentry-autofix-settings-get-project-access/organization_autofix_automation_settings.py similarity index 100% rename from evals/fixtures/sentry-autofix-settings-get-project-access/organization_autofix_automation_settings.py rename to packages/evals/fixtures/sentry-autofix-settings-get-project-access/organization_autofix_automation_settings.py diff --git a/packages/evals/fixtures/sentry-group-search-view-visit-visibility/LICENSE.md b/packages/evals/fixtures/sentry-group-search-view-visit-visibility/LICENSE.md new file mode 100644 index 00000000..f97b1d1f --- /dev/null +++ b/packages/evals/fixtures/sentry-group-search-view-visit-visibility/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, Apache 2.0 Future License + +## Abbreviation + +FSL-1.1-Apache-2.0 + +## Notice + +Copyright 2008-2024 Functional Software, Inc. dba Sentry + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/sentry-group-search-view-visit-visibility/group_search_view_visit.py b/packages/evals/fixtures/sentry-group-search-view-visit-visibility/group_search_view_visit.py similarity index 100% rename from evals/fixtures/sentry-group-search-view-visit-visibility/group_search_view_visit.py rename to packages/evals/fixtures/sentry-group-search-view-visit-visibility/group_search_view_visit.py diff --git a/packages/evals/fixtures/sentry-preprod-size-analysis-base-artifact-access/LICENSE.md b/packages/evals/fixtures/sentry-preprod-size-analysis-base-artifact-access/LICENSE.md new file mode 100644 index 00000000..f97b1d1f --- /dev/null +++ b/packages/evals/fixtures/sentry-preprod-size-analysis-base-artifact-access/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, Apache 2.0 Future License + +## Abbreviation + +FSL-1.1-Apache-2.0 + +## Notice + +Copyright 2008-2024 Functional Software, Inc. dba Sentry + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/sentry-preprod-size-analysis-base-artifact-access/organization_preprod_size_analysis.py b/packages/evals/fixtures/sentry-preprod-size-analysis-base-artifact-access/organization_preprod_size_analysis.py similarity index 100% rename from evals/fixtures/sentry-preprod-size-analysis-base-artifact-access/organization_preprod_size_analysis.py rename to packages/evals/fixtures/sentry-preprod-size-analysis-base-artifact-access/organization_preprod_size_analysis.py diff --git a/packages/evals/fixtures/sentry-preprod-snapshot-project-access/LICENSE.md b/packages/evals/fixtures/sentry-preprod-snapshot-project-access/LICENSE.md new file mode 100644 index 00000000..f97b1d1f --- /dev/null +++ b/packages/evals/fixtures/sentry-preprod-snapshot-project-access/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, Apache 2.0 Future License + +## Abbreviation + +FSL-1.1-Apache-2.0 + +## Notice + +Copyright 2008-2024 Functional Software, Inc. dba Sentry + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/sentry-preprod-snapshot-project-access/preprod_artifact_snapshot.py b/packages/evals/fixtures/sentry-preprod-snapshot-project-access/preprod_artifact_snapshot.py similarity index 100% rename from evals/fixtures/sentry-preprod-snapshot-project-access/preprod_artifact_snapshot.py rename to packages/evals/fixtures/sentry-preprod-snapshot-project-access/preprod_artifact_snapshot.py diff --git a/packages/evals/fixtures/sentry-release-threshold-empty-project-filter/LICENSE.md b/packages/evals/fixtures/sentry-release-threshold-empty-project-filter/LICENSE.md new file mode 100644 index 00000000..f97b1d1f --- /dev/null +++ b/packages/evals/fixtures/sentry-release-threshold-empty-project-filter/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, Apache 2.0 Future License + +## Abbreviation + +FSL-1.1-Apache-2.0 + +## Notice + +Copyright 2008-2024 Functional Software, Inc. dba Sentry + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/sentry-release-threshold-empty-project-filter/release_threshold_index.py b/packages/evals/fixtures/sentry-release-threshold-empty-project-filter/release_threshold_index.py similarity index 100% rename from evals/fixtures/sentry-release-threshold-empty-project-filter/release_threshold_index.py rename to packages/evals/fixtures/sentry-release-threshold-empty-project-filter/release_threshold_index.py diff --git a/packages/evals/fixtures/sentry-replay-count-project-scope-overwrite/LICENSE.md b/packages/evals/fixtures/sentry-replay-count-project-scope-overwrite/LICENSE.md new file mode 100644 index 00000000..f97b1d1f --- /dev/null +++ b/packages/evals/fixtures/sentry-replay-count-project-scope-overwrite/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, Apache 2.0 Future License + +## Abbreviation + +FSL-1.1-Apache-2.0 + +## Notice + +Copyright 2008-2024 Functional Software, Inc. dba Sentry + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/sentry-replay-count-project-scope-overwrite/replay_counts.py b/packages/evals/fixtures/sentry-replay-count-project-scope-overwrite/replay_counts.py similarity index 100% rename from evals/fixtures/sentry-replay-count-project-scope-overwrite/replay_counts.py rename to packages/evals/fixtures/sentry-replay-count-project-scope-overwrite/replay_counts.py diff --git a/packages/evals/fixtures/sentry-replay-delete-read-scope/LICENSE.md b/packages/evals/fixtures/sentry-replay-delete-read-scope/LICENSE.md new file mode 100644 index 00000000..f97b1d1f --- /dev/null +++ b/packages/evals/fixtures/sentry-replay-delete-read-scope/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, Apache 2.0 Future License + +## Abbreviation + +FSL-1.1-Apache-2.0 + +## Notice + +Copyright 2008-2024 Functional Software, Inc. dba Sentry + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/sentry-replay-delete-read-scope/project_replay_details.py b/packages/evals/fixtures/sentry-replay-delete-read-scope/project_replay_details.py similarity index 100% rename from evals/fixtures/sentry-replay-delete-read-scope/project_replay_details.py rename to packages/evals/fixtures/sentry-replay-delete-read-scope/project_replay_details.py diff --git a/packages/evals/fixtures/sentry-slack-options-load-unscoped-group/LICENSE.md b/packages/evals/fixtures/sentry-slack-options-load-unscoped-group/LICENSE.md new file mode 100644 index 00000000..f97b1d1f --- /dev/null +++ b/packages/evals/fixtures/sentry-slack-options-load-unscoped-group/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, Apache 2.0 Future License + +## Abbreviation + +FSL-1.1-Apache-2.0 + +## Notice + +Copyright 2008-2024 Functional Software, Inc. dba Sentry + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/sentry-slack-options-load-unscoped-group/options_load.py b/packages/evals/fixtures/sentry-slack-options-load-unscoped-group/options_load.py similarity index 100% rename from evals/fixtures/sentry-slack-options-load-unscoped-group/options_load.py rename to packages/evals/fixtures/sentry-slack-options-load-unscoped-group/options_load.py diff --git a/packages/evals/fixtures/sentry-vitest-evals-duration-sixty-seconds/github/getsentry/vitest-evals/LICENSE b/packages/evals/fixtures/sentry-vitest-evals-duration-sixty-seconds/github/getsentry/vitest-evals/LICENSE new file mode 100644 index 00000000..261eeb9e --- /dev/null +++ b/packages/evals/fixtures/sentry-vitest-evals-duration-sixty-seconds/github/getsentry/vitest-evals/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/evals/fixtures/sentry-vitest-evals-duration-sixty-seconds/github/getsentry/vitest-evals/packages/github-reporter/src/utils.ts b/packages/evals/fixtures/sentry-vitest-evals-duration-sixty-seconds/github/getsentry/vitest-evals/packages/github-reporter/src/utils.ts similarity index 100% rename from evals/fixtures/sentry-vitest-evals-duration-sixty-seconds/github/getsentry/vitest-evals/packages/github-reporter/src/utils.ts rename to packages/evals/fixtures/sentry-vitest-evals-duration-sixty-seconds/github/getsentry/vitest-evals/packages/github-reporter/src/utils.ts diff --git a/packages/evals/fixtures/sentry-vitest-evals-github-reporter-positional-json/github/getsentry/vitest-evals/LICENSE b/packages/evals/fixtures/sentry-vitest-evals-github-reporter-positional-json/github/getsentry/vitest-evals/LICENSE new file mode 100644 index 00000000..261eeb9e --- /dev/null +++ b/packages/evals/fixtures/sentry-vitest-evals-github-reporter-positional-json/github/getsentry/vitest-evals/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/evals/fixtures/sentry-vitest-evals-github-reporter-positional-json/github/getsentry/vitest-evals/packages/github-reporter/src/cli.ts b/packages/evals/fixtures/sentry-vitest-evals-github-reporter-positional-json/github/getsentry/vitest-evals/packages/github-reporter/src/cli.ts similarity index 100% rename from evals/fixtures/sentry-vitest-evals-github-reporter-positional-json/github/getsentry/vitest-evals/packages/github-reporter/src/cli.ts rename to packages/evals/fixtures/sentry-vitest-evals-github-reporter-positional-json/github/getsentry/vitest-evals/packages/github-reporter/src/cli.ts diff --git a/packages/evals/fixtures/sentry-workflow-connect-workflows-authz/LICENSE.md b/packages/evals/fixtures/sentry-workflow-connect-workflows-authz/LICENSE.md new file mode 100644 index 00000000..f97b1d1f --- /dev/null +++ b/packages/evals/fixtures/sentry-workflow-connect-workflows-authz/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, Apache 2.0 Future License + +## Abbreviation + +FSL-1.1-Apache-2.0 + +## Notice + +Copyright 2008-2024 Functional Software, Inc. dba Sentry + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/sentry-workflow-connect-workflows-authz/workflow_validator_utils.py b/packages/evals/fixtures/sentry-workflow-connect-workflows-authz/workflow_validator_utils.py similarity index 100% rename from evals/fixtures/sentry-workflow-connect-workflows-authz/workflow_validator_utils.py rename to packages/evals/fixtures/sentry-workflow-connect-workflows-authz/workflow_validator_utils.py diff --git a/packages/evals/fixtures/sentry-workflow-open-periods-project-access/LICENSE.md b/packages/evals/fixtures/sentry-workflow-open-periods-project-access/LICENSE.md new file mode 100644 index 00000000..f97b1d1f --- /dev/null +++ b/packages/evals/fixtures/sentry-workflow-open-periods-project-access/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, Apache 2.0 Future License + +## Abbreviation + +FSL-1.1-Apache-2.0 + +## Notice + +Copyright 2008-2024 Functional Software, Inc. dba Sentry + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/sentry-workflow-open-periods-project-access/organization_open_periods.py b/packages/evals/fixtures/sentry-workflow-open-periods-project-access/organization_open_periods.py similarity index 100% rename from evals/fixtures/sentry-workflow-open-periods-project-access/organization_open_periods.py rename to packages/evals/fixtures/sentry-workflow-open-periods-project-access/organization_open_periods.py diff --git a/evals/fixtures/sql-injection/api.ts b/packages/evals/fixtures/sql-injection/api.ts similarity index 100% rename from evals/fixtures/sql-injection/api.ts rename to packages/evals/fixtures/sql-injection/api.ts diff --git a/evals/fixtures/stale-closure/counter.tsx b/packages/evals/fixtures/stale-closure/counter.tsx similarity index 100% rename from evals/fixtures/stale-closure/counter.tsx rename to packages/evals/fixtures/stale-closure/counter.tsx diff --git a/evals/fixtures/wrong-comparison/validator.ts b/packages/evals/fixtures/wrong-comparison/validator.ts similarity index 100% rename from evals/fixtures/wrong-comparison/validator.ts rename to packages/evals/fixtures/wrong-comparison/validator.ts diff --git a/evals/fixtures/xss-reflected/server.ts b/packages/evals/fixtures/xss-reflected/server.ts similarity index 100% rename from evals/fixtures/xss-reflected/server.ts rename to packages/evals/fixtures/xss-reflected/server.ts diff --git a/packages/evals/package.json b/packages/evals/package.json new file mode 100644 index 00000000..e087777b --- /dev/null +++ b/packages/evals/package.json @@ -0,0 +1,24 @@ +{ + "name": "warden-evals", + "private": true, + "type": "module", + "scripts": { + "evals": "vitest run --config vitest.config.ts", + "scaffold": "tsx scripts/scaffold-eval.ts", + "test": "vitest run --config vitest.unit.config.ts", + "typecheck": "tsc --noEmit" + }, + "dependencies": { + "@octokit/rest": "^22.0.1", + "dotenv": "^17.2.3", + "vitest-evals": "0.9.0-beta.3", + "yaml": "^2.8.2", + "zod": "^4.3.6" + }, + "devDependencies": { + "@types/node": "^25.0.10", + "tsx": "^4.19.0", + "typescript": "^5.9.3", + "vitest": "^4.1.6" + } +} diff --git a/scripts/scaffold-eval.ts b/packages/evals/scripts/scaffold-eval.ts similarity index 80% rename from scripts/scaffold-eval.ts rename to packages/evals/scripts/scaffold-eval.ts index bc53b6cd..b587c8bd 100644 --- a/scripts/scaffold-eval.ts +++ b/packages/evals/scripts/scaffold-eval.ts @@ -1,6 +1,9 @@ #!/usr/bin/env tsx import { resolve } from 'node:path'; -import { scaffoldEvalFromGitHubPullRequest } from '../src/evals/scaffold.js'; +import { + scaffoldEvalFromGitHubPullRequest, + type ScaffoldEvalOptions, +} from '../src/scaffold.js'; interface Args { url?: string; @@ -20,7 +23,7 @@ function usage(exitCode = 2): never { ' --category Eval category directory (default: security-review)', ' --side Which PR side to copy fixtures from (default: base)', ' --name Scenario name (default: slugified PR title)', - ' --evals-dir Evals directory (default: ./evals)', + ' --evals-dir Evals package root (default: packages/evals)', ' --force Overwrite existing generated files', ].join('\n'); @@ -32,11 +35,11 @@ function usage(exitCode = 2): never { process.exit(exitCode); } -function parseArgs(argv: string[]): Args { +function parseArgs(argv: string[]): ScaffoldEvalOptions { const args: Args = { category: 'security-review', side: 'base', - evalsDir: resolve(process.cwd(), 'evals'), + evalsDir: resolve(process.cwd()), force: false, }; @@ -73,18 +76,25 @@ function parseArgs(argv: string[]): Args { usage(); } - return args; + return { ...args, url: args.url }; } const args = parseArgs(process.argv.slice(2)); const result = await scaffoldEvalFromGitHubPullRequest(args); console.log(`Created eval: ${result.name}`); +console.log(`Source: ${result.repository}@${result.sourceRef}`); console.log(`Scenario: ${result.scenarioPath}`); console.log('Fixtures:'); for (const file of result.files) { console.log(` ${file.fixturePath} <- ${file.sourcePath}@${file.ref}`); } +if (result.supportingFiles.length > 0) { + console.log('Supporting fixtures:'); + for (const file of result.supportingFiles) { + console.log(` ${file.fixturePath} <- ${file.sourcePath}@${file.ref}`); + } +} if (result.skippedFiles.length > 0) { console.log('Skipped:'); for (const file of result.skippedFiles) { diff --git a/evals/security-review/BASELINE.md b/packages/evals/security-review/BASELINE.md similarity index 100% rename from evals/security-review/BASELINE.md rename to packages/evals/security-review/BASELINE.md diff --git a/packages/evals/security-review/sentry-autofix-settings-get-project-access.json b/packages/evals/security-review/sentry-autofix-settings-get-project-access.json new file mode 100644 index 00000000..6c6767cf --- /dev/null +++ b/packages/evals/security-review/sentry-autofix-settings-get-project-access.json @@ -0,0 +1,30 @@ +{ + "given": "Autofix automation settings GET lists every project in the organization instead of using OrganizationEndpoint.get_projects", + "files": [ + "fixtures/sentry-autofix-settings-get-project-access/organization_autofix_automation_settings.py" + ], + "should_find": [ + { + "finding": "Autofix automation settings GET queries all organization projects directly and returns settings for projects the caller cannot access" + } + ], + "should_not_find": [ + "repository provider validation", + "audit log contents" + ], + "supporting_files": [ + "fixtures/sentry-autofix-settings-get-project-access/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/pull/114977", + "repository": "getsentry/sentry", + "source_ref": "841e962b7e9af1f442a7c9da6eb6ec7e2a01843a", + "source_files": [ + { + "fixturePath": "fixtures/sentry-autofix-settings-get-project-access/organization_autofix_automation_settings.py", + "sourcePath": "src/sentry/seer/endpoints/organization_autofix_automation_settings.py", + "ref": "841e962b7e9af1f442a7c9da6eb6ec7e2a01843a" + } + ] + } +} diff --git a/packages/evals/security-review/sentry-group-search-view-visit-visibility.json b/packages/evals/security-review/sentry-group-search-view-visit-visibility.json new file mode 100644 index 00000000..6078fbde --- /dev/null +++ b/packages/evals/security-review/sentry-group-search-view-visit-visibility.json @@ -0,0 +1,30 @@ +{ + "given": "group search view visit endpoint updates private views by ID without applying the existing object permission or visibility checks", + "files": [ + "fixtures/sentry-group-search-view-visit-visibility/group_search_view_visit.py" + ], + "should_find": [ + { + "finding": "visit endpoint loads a GroupSearchView by organization and id only, allowing any org member to mark private views as visited and learn or mutate metadata without object permission checks" + } + ], + "should_not_find": [ + "missing authentication as the primary issue", + "timezone usage" + ], + "supporting_files": [ + "fixtures/sentry-group-search-view-visit-visibility/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/blob/d12e82ea6554d6794b1a490369ec6baa75aeaa15/src/sentry/issues/endpoints/organization_group_search_view_visit.py", + "repository": "getsentry/sentry", + "source_ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15", + "source_files": [ + { + "fixturePath": "fixtures/sentry-group-search-view-visit-visibility/group_search_view_visit.py", + "sourcePath": "src/sentry/issues/endpoints/organization_group_search_view_visit.py", + "ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15" + } + ] + } +} diff --git a/packages/evals/security-review/sentry-preprod-size-analysis-base-artifact-access.json b/packages/evals/security-review/sentry-preprod-size-analysis-base-artifact-access.json new file mode 100644 index 00000000..972f0a04 --- /dev/null +++ b/packages/evals/security-review/sentry-preprod-size-analysis-base-artifact-access.json @@ -0,0 +1,30 @@ +{ + "given": "public size-analysis endpoint accepts a baseArtifactId and loads the base artifact by organization only", + "files": [ + "fixtures/sentry-preprod-size-analysis-base-artifact-access/organization_preprod_size_analysis.py" + ], + "should_find": [ + { + "finding": "baseArtifactId lets a caller attach an inaccessible project artifact as the comparison base because the lookup checks organization_id but not project access" + } + ], + "should_not_find": [ + "integer parsing error handling", + "response field naming" + ], + "supporting_files": [ + "fixtures/sentry-preprod-size-analysis-base-artifact-access/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/blob/d12e82ea6554d6794b1a490369ec6baa75aeaa15/src/sentry/preprod/api/endpoints/public/organization_preprod_size_analysis.py", + "repository": "getsentry/sentry", + "source_ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15", + "source_files": [ + { + "fixturePath": "fixtures/sentry-preprod-size-analysis-base-artifact-access/organization_preprod_size_analysis.py", + "sourcePath": "src/sentry/preprod/api/endpoints/public/organization_preprod_size_analysis.py", + "ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15" + } + ] + } +} diff --git a/packages/evals/security-review/sentry-preprod-snapshot-project-access.json b/packages/evals/security-review/sentry-preprod-snapshot-project-access.json new file mode 100644 index 00000000..7f751cec --- /dev/null +++ b/packages/evals/security-review/sentry-preprod-snapshot-project-access.json @@ -0,0 +1,30 @@ +{ + "given": "preprod snapshot endpoint fetches artifacts by organization only and returns or deletes project artifacts without checking project membership", + "files": [ + "fixtures/sentry-preprod-snapshot-project-access/preprod_artifact_snapshot.py" + ], + "should_find": [ + { + "finding": "preprod snapshot GET and DELETE load artifacts by organization_id only and do not call has_project_access before exposing manifests or deleting artifacts" + } + ], + "should_not_find": [ + "exception logging", + "missing rate limiting" + ], + "supporting_files": [ + "fixtures/sentry-preprod-snapshot-project-access/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/pull/114169", + "repository": "getsentry/sentry", + "source_ref": "004819f53b2634ebe0faa1e9c59575c5203ff37b", + "source_files": [ + { + "fixturePath": "fixtures/sentry-preprod-snapshot-project-access/preprod_artifact_snapshot.py", + "sourcePath": "src/sentry/preprod/api/endpoints/preprod_artifact_snapshot.py", + "ref": "004819f53b2634ebe0faa1e9c59575c5203ff37b" + } + ] + } +} diff --git a/packages/evals/security-review/sentry-release-threshold-empty-project-filter.json b/packages/evals/security-review/sentry-release-threshold-empty-project-filter.json new file mode 100644 index 00000000..4a5587fe --- /dev/null +++ b/packages/evals/security-review/sentry-release-threshold-empty-project-filter.json @@ -0,0 +1,30 @@ +{ + "given": "release threshold endpoint builds an empty ORM filter when the caller has no accessible projects", + "files": [ + "fixtures/sentry-release-threshold-empty-project-filter/release_threshold_index.py" + ], + "should_find": [ + { + "finding": "ReleaseThreshold query omits project and organization scoping when get_projects returns an empty list, allowing cross-project or cross-organization threshold disclosure" + } + ], + "should_not_find": [ + "generic missing serializer validation", + "missing rate limiting" + ], + "supporting_files": [ + "fixtures/sentry-release-threshold-empty-project-filter/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/pull/114049", + "repository": "getsentry/sentry", + "source_ref": "aebfdffc6afec83f0c66830e93fd872aa4c85c43", + "source_files": [ + { + "fixturePath": "fixtures/sentry-release-threshold-empty-project-filter/release_threshold_index.py", + "sourcePath": "src/sentry/api/endpoints/release_thresholds/release_threshold_index.py", + "ref": "aebfdffc6afec83f0c66830e93fd872aa4c85c43" + } + ] + } +} diff --git a/packages/evals/security-review/sentry-replay-count-project-scope-overwrite.json b/packages/evals/security-review/sentry-replay-count-project-scope-overwrite.json new file mode 100644 index 00000000..f0444306 --- /dev/null +++ b/packages/evals/security-review/sentry-replay-count-project-scope-overwrite.json @@ -0,0 +1,30 @@ +{ + "given": "replay count helper replaces request-scoped projects with projects from issue IDs resolved across the organization", + "files": [ + "fixtures/sentry-replay-count-project-scope-overwrite/replay_counts.py" + ], + "should_find": [ + { + "finding": "issue.id replay count path overwrites snuba_params.projects with projects from all matching organization groups, bypassing the caller's project access scope" + } + ], + "should_not_find": [ + "query string parsing as the primary issue", + "missing pagination" + ], + "supporting_files": [ + "fixtures/sentry-replay-count-project-scope-overwrite/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/blob/d12e82ea6554d6794b1a490369ec6baa75aeaa15/src/sentry/replays/usecases/replay_counts.py", + "repository": "getsentry/sentry", + "source_ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15", + "source_files": [ + { + "fixturePath": "fixtures/sentry-replay-count-project-scope-overwrite/replay_counts.py", + "sourcePath": "src/sentry/replays/usecases/replay_counts.py", + "ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15" + } + ] + } +} diff --git a/packages/evals/security-review/sentry-replay-delete-read-scope.json b/packages/evals/security-review/sentry-replay-delete-read-scope.json new file mode 100644 index 00000000..acedbee9 --- /dev/null +++ b/packages/evals/security-review/sentry-replay-delete-read-scope.json @@ -0,0 +1,30 @@ +{ + "given": "replay detail endpoint grants DELETE to project:read and then enqueues destructive replay deletion", + "files": [ + "fixtures/sentry-replay-delete-read-scope/project_replay_details.py" + ], + "should_find": [ + { + "finding": "DELETE accepts project:read scope, so read-only project users can permanently delete replay data" + } + ], + "should_not_find": [ + "missing UUID validation", + "missing feature flag check" + ], + "supporting_files": [ + "fixtures/sentry-replay-delete-read-scope/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/pull/114159", + "repository": "getsentry/sentry", + "source_ref": "8e848834d2efde23ae32f1c93957842460b12ea4", + "source_files": [ + { + "fixturePath": "fixtures/sentry-replay-delete-read-scope/project_replay_details.py", + "sourcePath": "src/sentry/replays/endpoints/project_replay_details.py", + "ref": "8e848834d2efde23ae32f1c93957842460b12ea4" + } + ] + } +} diff --git a/evals/security-review/sentry-slack-options-load-unscoped-group.json b/packages/evals/security-review/sentry-slack-options-load-unscoped-group.json similarity index 54% rename from evals/security-review/sentry-slack-options-load-unscoped-group.json rename to packages/evals/security-review/sentry-slack-options-load-unscoped-group.json index d6d5124a..949f1498 100644 --- a/evals/security-review/sentry-slack-options-load-unscoped-group.json +++ b/packages/evals/security-review/sentry-slack-options-load-unscoped-group.json @@ -12,5 +12,20 @@ "Slack signature validation stub or validate() returning True as an unauthenticated access vulnerability", "timing-safe token comparison", "regex performance" - ] + ], + "supporting_files": [ + "fixtures/sentry-slack-options-load-unscoped-group/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/pull/114185", + "repository": "getsentry/sentry", + "source_ref": "b0c31079134ae8f755161842361ae28ace2baef1", + "source_files": [ + { + "fixturePath": "fixtures/sentry-slack-options-load-unscoped-group/options_load.py", + "sourcePath": "src/sentry/integrations/slack/webhooks/options_load.py", + "ref": "b0c31079134ae8f755161842361ae28ace2baef1" + } + ] + } } diff --git a/packages/evals/security-review/sentry-workflow-connect-workflows-authz.json b/packages/evals/security-review/sentry-workflow-connect-workflows-authz.json new file mode 100644 index 00000000..d2f11a20 --- /dev/null +++ b/packages/evals/security-review/sentry-workflow-connect-workflows-authz.json @@ -0,0 +1,30 @@ +{ + "given": "detector-to-workflow connection validates target workflows exist in the organization but does not check permission to modify those workflows", + "files": [ + "fixtures/sentry-workflow-connect-workflows-authz/workflow_validator_utils.py" + ], + "should_find": [ + { + "finding": "connect_detectors_to_workflows permits linking a detector to arbitrary organization workflows because validate_workflows_exist performs no authorization check" + } + ], + "should_not_find": [ + "detector ID enumeration", + "bulk operation performance" + ], + "supporting_files": [ + "fixtures/sentry-workflow-connect-workflows-authz/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/blob/d12e82ea6554d6794b1a490369ec6baa75aeaa15/src/sentry/workflow_engine/endpoints/validators/utils.py", + "repository": "getsentry/sentry", + "source_ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15", + "source_files": [ + { + "fixturePath": "fixtures/sentry-workflow-connect-workflows-authz/workflow_validator_utils.py", + "sourcePath": "src/sentry/workflow_engine/endpoints/validators/utils.py", + "ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15" + } + ] + } +} diff --git a/packages/evals/security-review/sentry-workflow-open-periods-project-access.json b/packages/evals/security-review/sentry-workflow-open-periods-project-access.json new file mode 100644 index 00000000..83eeef50 --- /dev/null +++ b/packages/evals/security-review/sentry-workflow-open-periods-project-access.json @@ -0,0 +1,30 @@ +{ + "given": "workflow open-period endpoint resolves detectorId or groupId by organization only and returns issue activity without project permission checks", + "files": [ + "fixtures/sentry-workflow-open-periods-project-access/organization_open_periods.py" + ], + "should_find": [ + { + "finding": "open-period lookup accepts detectorId or groupId from any project in the organization and returns group activity without checking the caller's project access" + } + ], + "should_not_find": [ + "date parsing", + "missing detectorId format validation" + ], + "supporting_files": [ + "fixtures/sentry-workflow-open-periods-project-access/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/blob/d12e82ea6554d6794b1a490369ec6baa75aeaa15/src/sentry/workflow_engine/endpoints/organization_open_periods.py", + "repository": "getsentry/sentry", + "source_ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15", + "source_files": [ + { + "fixturePath": "fixtures/sentry-workflow-open-periods-project-access/organization_open_periods.py", + "sourcePath": "src/sentry/workflow_engine/endpoints/organization_open_periods.py", + "ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15" + } + ] + } +} diff --git a/evals/skills/bug-detection.md b/packages/evals/skills/bug-detection.md similarity index 100% rename from evals/skills/bug-detection.md rename to packages/evals/skills/bug-detection.md diff --git a/evals/skills/precision.md b/packages/evals/skills/precision.md similarity index 100% rename from evals/skills/precision.md rename to packages/evals/skills/precision.md diff --git a/evals/skills/security-scanning.md b/packages/evals/skills/security-scanning.md similarity index 100% rename from evals/skills/security-scanning.md rename to packages/evals/skills/security-scanning.md diff --git a/src/evals/code-review.eval.ts b/packages/evals/src/code-review.eval.ts similarity index 95% rename from src/evals/code-review.eval.ts rename to packages/evals/src/code-review.eval.ts index 590bc70c..bfe7f026 100644 --- a/src/evals/code-review.eval.ts +++ b/packages/evals/src/code-review.eval.ts @@ -11,7 +11,7 @@ import { formatEvalId, formatEvalTestName } from './names.js'; const apiKey = process.env['ANTHROPIC_API_KEY'] ?? ''; const evals = discoverEvalScenarios({ category: 'code-review', - skill: '../src/builtin-skills/code-review/SKILL.md', + skill: '../../src/builtin-skills/code-review/SKILL.md', runtime: 'pi', model: 'anthropic/claude-sonnet-4-6', }); diff --git a/src/evals/e2e.eval.ts b/packages/evals/src/e2e.eval.ts similarity index 100% rename from src/evals/e2e.eval.ts rename to packages/evals/src/e2e.eval.ts diff --git a/src/evals/fixtures.test.ts b/packages/evals/src/fixtures.test.ts similarity index 100% rename from src/evals/fixtures.test.ts rename to packages/evals/src/fixtures.test.ts diff --git a/src/evals/fixtures.ts b/packages/evals/src/fixtures.ts similarity index 100% rename from src/evals/fixtures.ts rename to packages/evals/src/fixtures.ts diff --git a/src/evals/harness.test.ts b/packages/evals/src/harness.test.ts similarity index 97% rename from src/evals/harness.test.ts rename to packages/evals/src/harness.test.ts index e92b5940..e1fc7f1b 100644 --- a/src/evals/harness.test.ts +++ b/packages/evals/src/harness.test.ts @@ -3,9 +3,9 @@ import type { JudgeContext } from 'vitest-evals'; import { createWardenEvalJudge } from './harness.js'; import { runJudge } from './judge.js'; import { DEFAULT_EVAL_MODEL, DEFAULT_EVAL_RUNTIME } from './types.js'; -import { emptyUsage } from '../sdk/usage.js'; +import { emptyUsage } from '../../../src/sdk/usage.js'; import type { EvalMeta } from './types.js'; -import type { Finding } from '../types/index.js'; +import type { Finding } from '../../../src/types/index.js'; vi.mock('./judge.js', () => ({ runJudge: vi.fn(), diff --git a/src/evals/harness.ts b/packages/evals/src/harness.ts similarity index 97% rename from src/evals/harness.ts rename to packages/evals/src/harness.ts index a6959ef7..4cba9909 100644 --- a/src/evals/harness.ts +++ b/packages/evals/src/harness.ts @@ -11,8 +11,8 @@ import { runJudge } from './judge.js'; import { runEvalSkill, type RunEvalOptions } from './runner.js'; import { evalPassed, type EvalMeta, type JudgeResponse } from './types.js'; import { usageToSummary } from './usage.js'; -import { FindingSchema } from '../types/index.js'; -import type { Finding, SkillReport, UsageStats } from '../types/index.js'; +import { FindingSchema } from '../../../src/types/index.js'; +import type { Finding, SkillReport, UsageStats } from '../../../src/types/index.js'; export const WardenEvalOutputSchema = z.object({ name: z.string(), diff --git a/src/evals/index.test.ts b/packages/evals/src/index.test.ts similarity index 78% rename from src/evals/index.test.ts rename to packages/evals/src/index.test.ts index 10c6601f..11c7ef48 100644 --- a/src/evals/index.test.ts +++ b/packages/evals/src/index.test.ts @@ -1,4 +1,5 @@ import { describe, it, expect } from 'vitest'; +import { readFileSync } from 'node:fs'; import { join } from 'node:path'; import { discoverEvalFiles, @@ -18,7 +19,7 @@ import { EvalScenarioSchema, } from './types.js'; -const evalsDir = join(import.meta.dirname, '..', '..', 'evals'); +const evalsDir = join(import.meta.dirname, '..'); describe('discoverEvalFiles', () => { it('returns array of YAML file paths', () => { @@ -177,7 +178,7 @@ describe('standalone scenario files', () => { const scenario = loadEvalScenarioFile(file); const meta = resolveEvalScenarioMeta(scenario, file, { category: 'security-review', - skill: '../src/builtin-skills/security-review/SKILL.md', + skill: '../../src/builtin-skills/security-review/SKILL.md', runtime: 'pi', model: 'anthropic/claude-sonnet-4-6', baseDir: evalsDir, @@ -195,7 +196,7 @@ describe('standalone scenario files', () => { it('discovers all standalone scenarios for a category', () => { const metas = discoverEvalScenarios({ category: 'security-review', - skill: '../src/builtin-skills/security-review/SKILL.md', + skill: '../../src/builtin-skills/security-review/SKILL.md', runtime: 'pi', model: 'anthropic/claude-sonnet-4-6', baseDir: evalsDir, @@ -208,7 +209,7 @@ describe('standalone scenario files', () => { it('discovers standalone code-review scenarios', () => { const metas = discoverEvalScenarios({ category: 'code-review', - skill: '../src/builtin-skills/code-review/SKILL.md', + skill: '../../src/builtin-skills/code-review/SKILL.md', runtime: 'pi', model: 'anthropic/claude-sonnet-4-6', baseDir: evalsDir, @@ -224,6 +225,84 @@ describe('standalone scenario files', () => { expect(metas[0]?.skillPath).toContain('src/builtin-skills/code-review/SKILL.md'); }); + it('requires repro metadata for source-captured fixtures', () => { + const licenseFilePattern = /\/(?:LICENSE(?:\.(?:md|txt))?|LICENCE(?:\.md)?|COPYING(?:\.md)?)$/; + const scenarioFiles = [ + ...discoverEvalScenarioFiles('code-review', evalsDir), + ...discoverEvalScenarioFiles('security-review', evalsDir), + ...discoverEvalScenarioFiles('verification', evalsDir), + ]; + const missingMetadata: string[] = []; + + for (const file of scenarioFiles) { + const scenario = JSON.parse(readFileSync(file, 'utf-8')) as { + files?: string[]; + supporting_files?: string[]; + notes?: { + repository?: string; + source_ref?: string; + source_files?: { + fixturePath: string; + sourcePath: string; + ref?: string; + }[]; + }; + }; + const fixtureFiles = scenario.files ?? []; + const supportingFiles = scenario.supporting_files ?? []; + const sourceFiles = scenario.notes?.source_files ?? []; + const fixtureRoots = new Set(); + const repositories = new Set(); + let sourceCaptured = Boolean(scenario.notes?.repository); + for (const fixture of fixtureFiles) { + if (fixture.startsWith('fixtures/sentry-') || fixture.includes('/github/')) { + sourceCaptured = true; + } + const segments = fixture.split('/'); + const githubIndex = segments.indexOf('github'); + if (githubIndex !== -1 && segments[githubIndex + 1] && segments[githubIndex + 2]) { + fixtureRoots.add(segments.slice(0, githubIndex + 3).join('/')); + repositories.add(`${segments[githubIndex + 1]}/${segments[githubIndex + 2]}`); + } + } + + for (const root of fixtureRoots) { + const hasLicense = [...fixtureFiles, ...supportingFiles].some( + (supportingFile) => + supportingFile.startsWith(`${root}/`) && + licenseFilePattern.test(`/${supportingFile}`) + ); + if (!hasLicense) { + missingMetadata.push(`${file}: ${root}/LICENSE`); + } + } + for (const repository of repositories) { + if (scenario.notes?.repository !== repository || !scenario.notes.source_ref) { + missingMetadata.push(`${file}: ${repository}@`); + } + } + if (sourceCaptured) { + if (!scenario.notes?.repository || !scenario.notes.source_ref) { + missingMetadata.push(`${file}: `); + } + const hasLicense = [...fixtureFiles, ...supportingFiles].some((fixture) => + licenseFilePattern.test(`/${fixture}`) + ); + if (!hasLicense) { + missingMetadata.push(`${file}: `); + } + for (const fixture of fixtureFiles) { + const sourceFile = sourceFiles.find((entry) => entry.fixturePath === fixture); + if (!sourceFile?.sourcePath) { + missingMetadata.push(`${file}: ${fixture} -> `); + } + } + } + } + + expect(missingMetadata).toEqual([]); + }); + it('throws when a standalone scenario fixture file does not exist', () => { const scenario = EvalScenarioFileSchema.parse({ given: 'an eval with a missing fixture', @@ -233,7 +312,7 @@ describe('standalone scenario files', () => { expect(() => resolveEvalScenarioMeta(scenario, join(evalsDir, 'security-review', 'missing-fixture.json'), { category: 'security-review', - skill: '../src/builtin-skills/security-review/SKILL.md', + skill: '../../src/builtin-skills/security-review/SKILL.md', baseDir: evalsDir, })).toThrow('Eval fixture not found for security-review/missing-fixture'); }); @@ -310,7 +389,7 @@ describe('EvalFileSchema', () => { it('accepts a Pi runtime with provider-qualified model', () => { const valid = { - skill: '../src/builtin-skills/security-review/SKILL.md', + skill: '../../src/builtin-skills/security-review/SKILL.md', runtime: 'pi', model: 'anthropic/claude-sonnet-4-6', evals: [{ diff --git a/src/evals/index.ts b/packages/evals/src/index.ts similarity index 88% rename from src/evals/index.ts rename to packages/evals/src/index.ts index 6d9fb05d..5c1913aa 100644 --- a/src/evals/index.ts +++ b/packages/evals/src/index.ts @@ -8,20 +8,20 @@ import { EvalScenarioFileSchema, } from './types.js'; import type { EvalFile, EvalMeta, EvalScenarioFile } from './types.js'; -import type { RuntimeName } from '../sdk/runtimes/types.js'; +import type { RuntimeName } from '../../../src/sdk/runtimes/types.js'; export type { EvalMeta }; export interface EvalScenarioSetOptions { - /** Category/suite name, and directory under evals/ containing JSON scenarios. */ + /** Category/suite name, and directory under the eval package root containing JSON scenarios. */ category: string; - /** Skill to run, relative to evals/ directory. */ + /** Skill to run, relative to the eval package root. */ skill: string; /** Default runtime for all scenarios in this set. */ runtime?: RuntimeName; /** Default model for all scenarios in this set. */ model?: string; - /** Optional evals directory override for tests. */ + /** Optional eval package root override for tests. */ baseDir?: string; } @@ -29,7 +29,7 @@ export interface EvalScenarioSetOptions { * Get the default evals directory path. */ function getEvalsDir(): string { - return join(import.meta.dirname, '..', '..', 'evals'); + return join(import.meta.dirname, '..'); } function fallbackSkillName(skillPath: string): string { @@ -100,7 +100,7 @@ export function discoverEvalFiles(baseDir?: string): string[] { } /** - * Discover standalone JSON scenario files in evals//. + * Discover standalone JSON scenario files in //. */ export function discoverEvalScenarioFiles(category: string, baseDir?: string): string[] { const scenarioDir = join(baseDir ?? getEvalsDir(), category); @@ -186,7 +186,7 @@ export function loadEvalScenarioFile(filePath: string): EvalScenarioFile { /** * Resolve all eval scenarios from a YAML file into executable EvalMeta objects. - * Resolves relative paths for skills and fixtures against the evals directory. + * Resolves relative paths for skills and fixtures against the eval package root. */ export function resolveEvalMetas(evalFile: EvalFile, yamlPath: string): EvalMeta[] { const evalsDir = join(yamlPath, '..'); @@ -207,6 +207,13 @@ export function resolveEvalMetas(evalFile: EvalFile, yamlPath: string): EvalMeta } return filePath; }); + const supportingFilePaths = scenario.supporting_files.map((file) => { + const filePath = join(evalsDir, file); + if (!existsSync(filePath)) { + throw new Error(`Eval supporting fixture not found for ${category}/${scenario.name}: ${file}`); + } + return filePath; + }); return { name: scenario.name, @@ -215,6 +222,7 @@ export function resolveEvalMetas(evalFile: EvalFile, yamlPath: string): EvalMeta given: scenario.given, skillPath, filePaths, + supportingFilePaths, model: scenario.model ?? evalFile.model, runtime: scenario.runtime ?? evalFile.runtime, should_find: scenario.should_find, @@ -246,6 +254,13 @@ export function resolveEvalScenarioMeta( } return filePath; }); + const supportingFilePaths = scenario.supporting_files.map((file) => { + const filePath = join(evalsDir, file); + if (!existsSync(filePath)) { + throw new Error(`Eval supporting fixture not found for ${options.category}/${name}: ${file}`); + } + return filePath; + }); return { name, @@ -254,6 +269,7 @@ export function resolveEvalScenarioMeta( given: scenario.given, skillPath, filePaths, + supportingFilePaths, model: scenario.model ?? options.model ?? DEFAULT_EVAL_MODEL, runtime: scenario.runtime ?? options.runtime ?? DEFAULT_EVAL_RUNTIME, should_find: scenario.should_find, diff --git a/src/evals/judge.ts b/packages/evals/src/judge.ts similarity index 95% rename from src/evals/judge.ts rename to packages/evals/src/judge.ts index 6f691ca9..5ddd2807 100644 --- a/src/evals/judge.ts +++ b/packages/evals/src/judge.ts @@ -1,11 +1,11 @@ import Anthropic from '@anthropic-ai/sdk'; -import type { Finding } from '../types/index.js'; -import { apiUsageToStats } from '../sdk/pricing.js'; -import { emptyUsage } from '../sdk/usage.js'; -import { extractJson } from '../sdk/haiku.js'; +import type { Finding } from '../../../src/types/index.js'; +import { apiUsageToStats } from '../../../src/sdk/pricing.js'; +import { emptyUsage } from '../../../src/sdk/usage.js'; +import { extractJson } from '../../../src/sdk/haiku.js'; import type { EvalMeta, JudgeResponse } from './types.js'; import { DEFAULT_EVAL_MODEL, JudgeResponseSchema } from './types.js'; -import type { UsageStats } from '../types/index.js'; +import type { UsageStats } from '../../../src/types/index.js'; const JUDGE_MODEL = DEFAULT_EVAL_MODEL; const JUDGE_MAX_TOKENS = 4096; diff --git a/src/evals/names.ts b/packages/evals/src/names.ts similarity index 100% rename from src/evals/names.ts rename to packages/evals/src/names.ts diff --git a/src/evals/runner.test.ts b/packages/evals/src/runner.test.ts similarity index 88% rename from src/evals/runner.test.ts rename to packages/evals/src/runner.test.ts index cf02f463..223552c9 100644 --- a/src/evals/runner.test.ts +++ b/packages/evals/src/runner.test.ts @@ -6,8 +6,8 @@ import { describe, expect, it } from 'vitest'; import { setupEvalRepo } from './runner.js'; import type { EvalMeta } from './types.js'; -const evalsDir = join(import.meta.dirname, '..', '..', 'evals'); -const repoRoot = join(import.meta.dirname, '..', '..'); +const evalsDir = join(import.meta.dirname, '..'); +const repoRoot = join(import.meta.dirname, '..', '..', '..'); function git(cwd: string, args: string[]): string { return execFileSync('git', args, { cwd, encoding: 'utf8' }); @@ -67,8 +67,19 @@ describe('setupEvalRepo', () => { 'api', 'endpoint.py', ); + const licensePath = join( + tempRoot, + 'evals', + 'fixtures', + 'source-context', + 'github', + 'getsentry', + 'sentry', + 'LICENSE', + ); mkdirSync(dirname(fixturePath), { recursive: true }); writeFileSync(fixturePath, 'def endpoint():\n pass\n'); + writeFileSync(licensePath, 'source license\n'); const meta: EvalMeta = { name: 'source-context', @@ -77,6 +88,7 @@ describe('setupEvalRepo', () => { given: 'fixture source path carries repository context', skillPath: join(repoRoot, 'src', 'builtin-skills', 'security-review', 'SKILL.md'), filePaths: [fixturePath], + supportingFilePaths: [licensePath], model: 'anthropic/claude-sonnet-4-6', runtime: 'pi', should_find: [{ finding: 'source path context', required: true }], @@ -99,6 +111,8 @@ describe('setupEvalRepo', () => { .toBe('https://github.com/getsentry/sentry.git'); expect(existsSync(join(repoDir, 'source-context', 'src', 'sentry', 'api', 'endpoint.py'))) .toBe(true); + expect(git(repoDir, ['cat-file', '-e', 'main:source-context/LICENSE'])) + .toBe(''); } finally { if (repoDir) { rmSync(repoDir, { recursive: true, force: true }); diff --git a/src/evals/runner.ts b/packages/evals/src/runner.ts similarity index 87% rename from src/evals/runner.ts rename to packages/evals/src/runner.ts index c09e333b..7e6b1141 100644 --- a/src/evals/runner.ts +++ b/packages/evals/src/runner.ts @@ -1,16 +1,16 @@ import { basename, join, dirname } from 'node:path'; import { copyFileSync, cpSync, mkdirSync, mkdtempSync, rmSync, existsSync } from 'node:fs'; import { tmpdir } from 'node:os'; -import { execGitNonInteractive } from '../utils/exec.js'; -import { buildLocalEventContext } from '../cli/context.js'; -import { resolveSkillAsync } from '../skills/loader.js'; -import { runSkill } from '../sdk/runner.js'; +import { execGitNonInteractive } from '../../../src/utils/exec.js'; +import { buildLocalEventContext } from '../../../src/cli/context.js'; +import { resolveSkillAsync } from '../../../src/skills/loader.js'; +import { runSkill } from '../../../src/sdk/runner.js'; import { evalFixtureRepoPath, singleEvalFixtureSourceRepository } from './fixtures.js'; import { formatEvalId } from './names.js'; import type { EvalMeta } from './types.js'; -import type { Finding, SkillReport } from '../types/index.js'; -import type { FindingProcessingEvent } from '../sdk/runner.js'; -import type { RuntimeName } from '../sdk/runtimes/types.js'; +import type { Finding, SkillReport } from '../../../src/types/index.js'; +import type { FindingProcessingEvent } from '../../../src/sdk/runner.js'; +import type { RuntimeName } from '../../../src/sdk/runtimes/types.js'; export interface RunEvalOptions { /** Anthropic API key */ @@ -36,6 +36,12 @@ export interface EvalSkillRunResult { durationMs: number; } +function copyFixtureIntoRepo(srcPath: string, repoDir: string): void { + const destPath = join(repoDir, ...evalFixtureRepoPath(srcPath).split('/')); + mkdirSync(dirname(destPath), { recursive: true }); + copyFileSync(srcPath, destPath); +} + /** * Set up a temporary git repository for an eval scenario. * @@ -75,15 +81,17 @@ export function setupEvalRepo(meta: EvalMeta, log: (msg: string) => void): strin copyFileSync(meta.skillPath, join(skillDestDir, basename(meta.skillPath))); } + for (const srcPath of meta.supportingFilePaths ?? []) { + copyFixtureIntoRepo(srcPath, tmpDir); + } + git(['add', '.']); git(['commit', '-m', 'install eval skill']); git(['checkout', '-b', 'eval']); - // Copy fixture files, preserving their path under evals/fixtures. + // Copy fixture files, preserving their path under the eval package fixtures. for (const srcPath of meta.filePaths) { - const destPath = join(tmpDir, ...evalFixtureRepoPath(srcPath).split('/')); - mkdirSync(dirname(destPath), { recursive: true }); - copyFileSync(srcPath, destPath); + copyFixtureIntoRepo(srcPath, tmpDir); } git(['add', '.']); diff --git a/src/evals/scaffold.test.ts b/packages/evals/src/scaffold.test.ts similarity index 70% rename from src/evals/scaffold.test.ts rename to packages/evals/src/scaffold.test.ts index c4d4a053..c495b739 100644 --- a/src/evals/scaffold.test.ts +++ b/packages/evals/src/scaffold.test.ts @@ -94,6 +94,8 @@ describe('scaffoldEvalFromGitHubPullRequest', () => { }); expect(result.name).toBe('fix-project-access-bypass'); + expect(result.repository).toBe('getsentry/sentry'); + expect(result.sourceRef).toBe('base-sha'); expect(result.files.map((file) => file.sourcePath)).toEqual([ 'src/api.py', 'src/previous.py', @@ -106,11 +108,16 @@ describe('scaffoldEvalFromGitHubPullRequest', () => { 'fixtures/fix-project-access-bypass/github/getsentry/sentry/src/api.py', 'fixtures/fix-project-access-bypass/github/getsentry/sentry/src/previous.py', ]); + expect(result.supportingFiles.map((file) => file.fixturePath)).toEqual([ + 'fixtures/fix-project-access-bypass/github/getsentry/sentry/LICENSE', + ]); expect(readFileSync(join(tempDir, 'fixtures/fix-project-access-bypass/github/getsentry/sentry/src/api.py'), 'utf-8')) .toBe('src/api.py@base-sha\n'); expect(readFileSync(join(tempDir, 'fixtures/fix-project-access-bypass/github/getsentry/sentry/src/previous.py'), 'utf-8')) .toBe('src/previous.py@base-sha\n'); + expect(readFileSync(join(tempDir, 'fixtures/fix-project-access-bypass/github/getsentry/sentry/LICENSE'), 'utf-8')) + .toBe('LICENSE@base-sha\n'); const scenario = JSON.parse( readFileSync(join(tempDir, 'security-review/fix-project-access-bypass.json'), 'utf-8') @@ -121,12 +128,28 @@ describe('scaffoldEvalFromGitHubPullRequest', () => { 'fixtures/fix-project-access-bypass/github/getsentry/sentry/src/api.py', 'fixtures/fix-project-access-bypass/github/getsentry/sentry/src/previous.py', ], + supporting_files: [ + 'fixtures/fix-project-access-bypass/github/getsentry/sentry/LICENSE', + ], should_find: [{ finding: 'TODO: describe the vulnerability fixed by https://github.com/getsentry/sentry/pull/12345', }], notes: { source: 'https://github.com/getsentry/sentry/pull/12345', repository: 'getsentry/sentry', + source_ref: 'base-sha', + source_files: [ + { + fixturePath: 'fixtures/fix-project-access-bypass/github/getsentry/sentry/src/api.py', + sourcePath: 'src/api.py', + ref: 'base-sha', + }, + { + fixturePath: 'fixtures/fix-project-access-bypass/github/getsentry/sentry/src/previous.py', + sourcePath: 'src/previous.py', + ref: 'base-sha', + }, + ], side: 'base', skipped_files: [{ sourcePath: 'src/new.py', @@ -139,11 +162,49 @@ describe('scaffoldEvalFromGitHubPullRequest', () => { join(tempDir, 'security-review/fix-project-access-bypass.json') ); expect(validatedScenario.notes?.source).toBe('https://github.com/getsentry/sentry/pull/12345'); + expect(validatedScenario.notes?.source_ref).toBe('base-sha'); + expect(validatedScenario.notes?.source_files).toEqual([ + { + fixturePath: 'fixtures/fix-project-access-bypass/github/getsentry/sentry/src/api.py', + sourcePath: 'src/api.py', + ref: 'base-sha', + }, + { + fixturePath: 'fixtures/fix-project-access-bypass/github/getsentry/sentry/src/previous.py', + sourcePath: 'src/previous.py', + ref: 'base-sha', + }, + ]); expect(validatedScenario.notes?.side).toBe('base'); expect(validatedScenario.notes?.skipped_files).toEqual([{ sourcePath: 'src/new.py', reason: 'added file has no base-side content', }]); + expect(validatedScenario.supporting_files).toEqual([ + 'fixtures/fix-project-access-bypass/github/getsentry/sentry/LICENSE', + ]); + }); + + it('requires a source repository license when scaffolding fixtures', async () => { + const missingLicense = new Error('not found') as Error & { status: number }; + missingLicense.status = 404; + octokitMocks.getContent.mockImplementation(async ({ path, ref }: { path: string; ref: string }) => { + if (['LICENSE', 'LICENSE.md', 'LICENSE.txt', 'LICENCE', 'LICENCE.md', 'COPYING', 'COPYING.md'].includes(path)) { + throw missingLicense; + } + return { + data: { + type: 'file', + content: Buffer.from(`${path}@${ref}\n`).toString('base64'), + }, + }; + }); + + await expect(scaffoldEvalFromGitHubPullRequest({ + url: 'https://github.com/getsentry/sentry/pull/12345', + category: 'security-review', + evalsDir: tempDir, + })).rejects.toThrow('No root LICENSE file could be scaffolded'); }); it('rejects unsafe category and scenario names', async () => { diff --git a/src/evals/scaffold.ts b/packages/evals/src/scaffold.ts similarity index 79% rename from src/evals/scaffold.ts rename to packages/evals/src/scaffold.ts index bbb71990..9803647b 100644 --- a/src/evals/scaffold.ts +++ b/packages/evals/src/scaffold.ts @@ -33,8 +33,11 @@ export interface SkippedScaffoldFile { export interface ScaffoldedEval { name: string; + repository: string; + sourceRef: string; scenarioPath: string; files: ScaffoldedEvalFile[]; + supportingFiles: ScaffoldedEvalFile[]; skippedFiles: SkippedScaffoldFile[]; } @@ -50,6 +53,15 @@ interface PullFile { } const SAFE_PATH_SEGMENT = /^[a-zA-Z0-9._-]+$/; +const LICENSE_FILE_CANDIDATES = [ + 'LICENSE', + 'LICENSE.md', + 'LICENSE.txt', + 'LICENCE', + 'LICENCE.md', + 'COPYING', + 'COPYING.md', +]; function requireSafePathSegment(value: string, label: string): string { if (!SAFE_PATH_SEGMENT.test(value) || value === '.' || value === '..') { @@ -200,18 +212,35 @@ async function fetchFileContent( } } +async function fetchLicenseFileContent( + octokit: Octokit, + pull: GitHubPullRequestRef, + ref: string, +): Promise<(GitHubFileContent & { sourcePath: string }) | undefined> { + for (const sourcePath of LICENSE_FILE_CANDIDATES) { + const content = await fetchFileContent(octokit, pull, sourcePath, ref); + if (content) { + return { ...content, sourcePath }; + } + } + return undefined; +} + function scenarioJson(args: { title: string; body?: string | null; - files: string[]; + files: ScaffoldedEvalFile[]; + supportingFiles: string[]; url: string; repository: string; + sourceRef: string; side: PullRequestSide; skippedFiles: SkippedScaffoldFile[]; }): string { return `${JSON.stringify({ given: args.title, - files: args.files, + files: args.files.map((file) => file.fixturePath), + supporting_files: args.supportingFiles, should_find: [{ finding: `TODO: describe the vulnerability fixed by ${args.url}`, }], @@ -219,6 +248,12 @@ function scenarioJson(args: { notes: { source: args.url, repository: args.repository, + source_ref: args.sourceRef, + source_files: args.files.map((file) => ({ + fixturePath: file.fixturePath, + sourcePath: file.sourcePath, + ref: file.ref, + })), side: args.side, skipped_files: args.skippedFiles.length > 0 ? args.skippedFiles : undefined, body: args.body || undefined, @@ -254,8 +289,10 @@ export async function scaffoldEvalFromGitHubPullRequest( const scenarioPath = join(options.evalsDir, category, `${name}.json`); const seenFixturePaths = new Set(); const copiedFiles: ScaffoldedEvalFile[] = []; + const supportingFiles: ScaffoldedEvalFile[] = []; const skippedFiles: SkippedScaffoldFile[] = []; const contents: (ScaffoldedEvalFile & { content: string })[] = []; + const supportingContents: (ScaffoldedEvalFile & { content: string })[] = []; if (!options.force && existsSync(scenarioPath)) { throw new Error(`Eval scenario already exists: ${scenarioPath}`); @@ -297,10 +334,40 @@ export async function scaffoldEvalFromGitHubPullRequest( throw new Error(`No ${side}-side files could be scaffolded from ${options.url}`); } + const license = await fetchLicenseFileContent(octokit, pull, ref); + if (!license) { + throw new Error( + `No root LICENSE file could be scaffolded from ${pull.owner}/${pull.repo} at ${side} ref ${ref}` + ); + } + + if (!copiedFiles.some((file) => file.sourcePath === license.sourcePath)) { + const fixturePath = posix.join( + 'fixtures', + name, + fixturePathForSource(pull, license.sourcePath, seenFixturePaths), + ); + const fullFixturePath = fromEvalsPath(options.evalsDir, fixturePath); + if (!options.force && existsSync(fullFixturePath)) { + throw new Error(`Eval fixture already exists: ${fullFixturePath}`); + } + supportingContents.push({ + sourcePath: license.sourcePath, + fixturePath, + ref: license.ref, + content: license.content, + }); + supportingFiles.push({ + sourcePath: license.sourcePath, + fixturePath, + ref: license.ref, + }); + } + mkdirSync(fixtureDir, { recursive: true }); mkdirSync(join(options.evalsDir, category), { recursive: true }); - for (const content of contents) { + for (const content of [...contents, ...supportingContents]) { const fullFixturePath = fromEvalsPath(options.evalsDir, content.fixturePath); mkdirSync(dirname(fullFixturePath), { recursive: true }); writeFileSync( @@ -315,9 +382,11 @@ export async function scaffoldEvalFromGitHubPullRequest( scenarioJson({ title: pr.title, body: pr.body, - files: copiedFiles.map((file) => file.fixturePath), + files: copiedFiles, + supportingFiles: supportingFiles.map((file) => file.fixturePath), url: options.url, repository: `${pull.owner}/${pull.repo}`, + sourceRef: ref, side, skippedFiles, }), @@ -326,8 +395,11 @@ export async function scaffoldEvalFromGitHubPullRequest( return { name, + repository: `${pull.owner}/${pull.repo}`, + sourceRef: ref, scenarioPath, files: copiedFiles, + supportingFiles, skippedFiles, }; } diff --git a/src/evals/security-review.eval.ts b/packages/evals/src/security-review.eval.ts similarity index 94% rename from src/evals/security-review.eval.ts rename to packages/evals/src/security-review.eval.ts index 93d240f6..b120e72e 100644 --- a/src/evals/security-review.eval.ts +++ b/packages/evals/src/security-review.eval.ts @@ -11,7 +11,7 @@ import { formatEvalId, formatEvalTestName } from './names.js'; const apiKey = process.env['ANTHROPIC_API_KEY'] ?? ''; const evals = discoverEvalScenarios({ category: 'security-review', - skill: '../src/builtin-skills/security-review/SKILL.md', + skill: '../../src/builtin-skills/security-review/SKILL.md', runtime: 'pi', model: 'anthropic/claude-sonnet-4-6', }); diff --git a/src/evals/setup.ts b/packages/evals/src/setup.ts similarity index 90% rename from src/evals/setup.ts rename to packages/evals/src/setup.ts index 42c62476..22a7bb0e 100644 --- a/src/evals/setup.ts +++ b/packages/evals/src/setup.ts @@ -10,7 +10,7 @@ import { config as dotenvConfig } from 'dotenv'; * 3. .env.test (test-specific overrides) */ function loadTestEnv(): void { - const root = join(import.meta.dirname, '..', '..'); + const root = join(import.meta.dirname, '..', '..', '..'); const envFiles = ['.env', '.env.local', '.env.test']; diff --git a/src/evals/types.test.ts b/packages/evals/src/types.test.ts similarity index 100% rename from src/evals/types.test.ts rename to packages/evals/src/types.test.ts diff --git a/src/evals/types.ts b/packages/evals/src/types.ts similarity index 86% rename from src/evals/types.ts rename to packages/evals/src/types.ts index 2b032ff6..acd3c53b 100644 --- a/src/evals/types.ts +++ b/packages/evals/src/types.ts @@ -1,8 +1,8 @@ import { z } from 'zod'; -import { RuntimeNameSchema } from '../sdk/runtimes/types.js'; -import { SeveritySchema } from '../types/index.js'; -import type { Finding } from '../types/index.js'; -import type { RuntimeName } from '../sdk/runtimes/types.js'; +import { RuntimeNameSchema } from '../../../src/sdk/runtimes/types.js'; +import { SeveritySchema } from '../../../src/types/index.js'; +import type { Finding } from '../../../src/types/index.js'; +import type { RuntimeName } from '../../../src/sdk/runtimes/types.js'; /** Default model for eval skill execution and judging. */ export const DEFAULT_EVAL_MODEL = 'claude-sonnet-4-6'; @@ -31,6 +31,14 @@ export const EvalScenarioNotesSchema = z.object({ source: z.string().optional(), /** Source repository for scaffolded fixtures, for example "getsentry/sentry". */ repository: z.string().optional(), + /** Exact source commit SHA to checkout when reproducing the captured fixture state. */ + source_ref: z.string().optional(), + /** Mapping from checked-in fixture paths to source repository paths. */ + source_files: z.array(z.object({ + fixturePath: z.string(), + sourcePath: z.string(), + ref: z.string().optional(), + })).optional(), /** Which source side was captured when scaffolded from a PR. */ side: z.string().optional(), /** Files intentionally skipped while scaffolding, with maintainer-facing reasons. */ @@ -50,8 +58,10 @@ export const EvalScenarioSchema = z.object({ name: z.string(), /** What this eval tests (BDD "given" / description) */ given: z.string(), - /** Fixture files to use, relative to evals/ directory */ + /** Fixture files to use, relative to the eval package root */ files: z.array(z.string()).min(1), + /** Supporting fixture files copied into the eval repo without being part of the diff */ + supporting_files: z.array(z.string()).default([]), /** Model override for this specific scenario */ model: z.string().optional(), /** Runtime override for this specific scenario */ @@ -89,7 +99,7 @@ export type EvalScenarioFile = z.infer; * severity: high */ export const EvalFileSchema = z.object({ - /** Skill to run, relative to evals/ directory */ + /** Skill to run, relative to the eval package root */ skill: z.string(), /** Default runtime for all evals in this file */ runtime: RuntimeNameSchema.default(DEFAULT_EVAL_RUNTIME), @@ -117,6 +127,8 @@ export interface EvalMeta { skillPath: string; /** Resolved absolute paths to fixture files */ filePaths: string[]; + /** Resolved absolute paths to supporting files copied before the eval diff */ + supportingFilePaths?: string[]; /** Model to use for skill execution */ model: string; /** Runtime to use for skill execution */ diff --git a/src/evals/usage.ts b/packages/evals/src/usage.ts similarity index 93% rename from src/evals/usage.ts rename to packages/evals/src/usage.ts index a8db6901..1da10002 100644 --- a/src/evals/usage.ts +++ b/packages/evals/src/usage.ts @@ -1,5 +1,5 @@ import { normalizeMetadata, type UsageSummary } from 'vitest-evals/harness'; -import type { UsageStats } from '../types/index.js'; +import type { UsageStats } from '../../../src/types/index.js'; export interface EvalUsageSummaryInput { provider: string; diff --git a/src/evals/verify.eval.ts b/packages/evals/src/verify.eval.ts similarity index 95% rename from src/evals/verify.eval.ts rename to packages/evals/src/verify.eval.ts index 3fb8b849..929321dc 100644 --- a/src/evals/verify.eval.ts +++ b/packages/evals/src/verify.eval.ts @@ -10,7 +10,7 @@ import { formatEvalTestName } from './names.js'; const apiKey = process.env['ANTHROPIC_API_KEY'] ?? ''; const evals = discoverVerificationEvalScenarios({ category: 'verification', - skill: '../src/builtin-skills/security-review/SKILL.md', + skill: '../../src/builtin-skills/security-review/SKILL.md', runtime: 'pi', model: 'anthropic/claude-sonnet-4-6', }); diff --git a/src/evals/verify.test.ts b/packages/evals/src/verify.test.ts similarity index 100% rename from src/evals/verify.test.ts rename to packages/evals/src/verify.test.ts diff --git a/src/evals/verify.ts b/packages/evals/src/verify.ts similarity index 93% rename from src/evals/verify.ts rename to packages/evals/src/verify.ts index 0264349b..7d6d4835 100644 --- a/src/evals/verify.ts +++ b/packages/evals/src/verify.ts @@ -9,10 +9,10 @@ import { toJsonValue, type Harness, } from 'vitest-evals/harness'; -import { resolveSkillAsync } from '../skills/loader.js'; -import { verifyFindings } from '../sdk/verify.js'; -import { FindingSchema, type Finding, type UsageStats } from '../types/index.js'; -import { RuntimeNameSchema, type RuntimeName } from '../sdk/runtimes/types.js'; +import { resolveSkillAsync } from '../../../src/skills/loader.js'; +import { verifyFindings } from '../../../src/sdk/verify.js'; +import { FindingSchema, type Finding, type UsageStats } from '../../../src/types/index.js'; +import { RuntimeNameSchema, type RuntimeName } from '../../../src/sdk/runtimes/types.js'; import { discoverEvalScenarioFiles, resolveEvalSkillName } from './index.js'; import { evalFixtureRepoPath, singleEvalFixtureSourceRepository } from './fixtures.js'; import { formatEvalId } from './names.js'; @@ -33,6 +33,7 @@ const VerificationScenarioFileSchema = z.object({ name: z.string().optional(), given: z.string(), files: z.array(z.string()).min(1), + supporting_files: z.array(z.string()).default([]), candidate: FindingSchema, expect: VerificationExpectationSchema, model: z.string().optional(), @@ -57,6 +58,7 @@ export interface VerificationEvalMeta { given: string; skillPath: string; filePaths: string[]; + supportingFilePaths?: string[]; candidate: Finding; expectedVerdict: z.infer; model: string; @@ -89,7 +91,7 @@ interface VerificationEvalRunResult { } function getEvalsDir(): string { - return join(import.meta.dirname, '..', '..', 'evals'); + return join(import.meta.dirname, '..'); } function loadVerificationScenario(filePath: string) { @@ -130,6 +132,13 @@ export function resolveVerificationEvalMeta( } return filePath; }); + const supportingFilePaths = scenario.supporting_files.map((file) => { + const filePath = join(evalsDir, file); + if (!existsSync(filePath)) { + throw new Error(`Verification eval supporting fixture not found for ${options.category}/${name}: ${file}`); + } + return filePath; + }); return { name, @@ -138,6 +147,7 @@ export function resolveVerificationEvalMeta( given: scenario.given, skillPath, filePaths, + supportingFilePaths, candidate: scenario.candidate, expectedVerdict: scenario.expect.verdict, model: scenario.model ?? options.model ?? DEFAULT_VERIFICATION_MODEL, diff --git a/packages/evals/tsconfig.json b/packages/evals/tsconfig.json new file mode 100644 index 00000000..2a0a218c --- /dev/null +++ b/packages/evals/tsconfig.json @@ -0,0 +1,20 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "NodeNext", + "moduleResolution": "NodeNext", + "lib": ["ES2022"], + "strict": true, + "jsx": "react-jsx", + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true, + "isolatedModules": true, + "noUncheckedIndexedAccess": true, + "noImplicitOverride": true, + "noPropertyAccessFromIndexSignature": true, + "types": ["node", "vitest"] + }, + "include": ["src/**/*", "scripts/**/*"] +} diff --git a/evals/verification/preprod-size-analysis-base-artifact-keep.json b/packages/evals/verification/preprod-size-analysis-base-artifact-keep.json similarity index 60% rename from evals/verification/preprod-size-analysis-base-artifact-keep.json rename to packages/evals/verification/preprod-size-analysis-base-artifact-keep.json index c8e924a4..84e97ebb 100644 --- a/evals/verification/preprod-size-analysis-base-artifact-keep.json +++ b/packages/evals/verification/preprod-size-analysis-base-artifact-keep.json @@ -17,5 +17,20 @@ }, "expect": { "verdict": "keep" + }, + "supporting_files": [ + "fixtures/sentry-preprod-size-analysis-base-artifact-access/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/blob/d12e82ea6554d6794b1a490369ec6baa75aeaa15/src/sentry/preprod/api/endpoints/public/organization_preprod_size_analysis.py", + "repository": "getsentry/sentry", + "source_ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15", + "source_files": [ + { + "fixturePath": "fixtures/sentry-preprod-size-analysis-base-artifact-access/organization_preprod_size_analysis.py", + "sourcePath": "src/sentry/preprod/api/endpoints/public/organization_preprod_size_analysis.py", + "ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15" + } + ] } } diff --git a/evals/verification/slack-signature-stub-reject.json b/packages/evals/verification/slack-signature-stub-reject.json similarity index 59% rename from evals/verification/slack-signature-stub-reject.json rename to packages/evals/verification/slack-signature-stub-reject.json index fa13f732..9fa875b3 100644 --- a/evals/verification/slack-signature-stub-reject.json +++ b/packages/evals/verification/slack-signature-stub-reject.json @@ -17,5 +17,20 @@ }, "expect": { "verdict": "reject" + }, + "supporting_files": [ + "fixtures/sentry-slack-options-load-unscoped-group/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/pull/114185", + "repository": "getsentry/sentry", + "source_ref": "b0c31079134ae8f755161842361ae28ace2baef1", + "source_files": [ + { + "fixturePath": "fixtures/sentry-slack-options-load-unscoped-group/options_load.py", + "sourcePath": "src/sentry/integrations/slack/webhooks/options_load.py", + "ref": "b0c31079134ae8f755161842361ae28ace2baef1" + } + ] } } diff --git a/evals/verification/workflow-open-periods-project-access-keep.json b/packages/evals/verification/workflow-open-periods-project-access-keep.json similarity index 61% rename from evals/verification/workflow-open-periods-project-access-keep.json rename to packages/evals/verification/workflow-open-periods-project-access-keep.json index c5f49014..92b38bd7 100644 --- a/evals/verification/workflow-open-periods-project-access-keep.json +++ b/packages/evals/verification/workflow-open-periods-project-access-keep.json @@ -17,5 +17,20 @@ }, "expect": { "verdict": "keep" + }, + "supporting_files": [ + "fixtures/sentry-workflow-open-periods-project-access/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/blob/d12e82ea6554d6794b1a490369ec6baa75aeaa15/src/sentry/workflow_engine/endpoints/organization_open_periods.py", + "repository": "getsentry/sentry", + "source_ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15", + "source_files": [ + { + "fixturePath": "fixtures/sentry-workflow-open-periods-project-access/organization_open_periods.py", + "sourcePath": "src/sentry/workflow_engine/endpoints/organization_open_periods.py", + "ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15" + } + ] } } diff --git a/vitest.evals.config.ts b/packages/evals/vitest.config.ts similarity index 88% rename from vitest.evals.config.ts rename to packages/evals/vitest.config.ts index cb0a4039..f339dfb3 100644 --- a/vitest.evals.config.ts +++ b/packages/evals/vitest.config.ts @@ -6,10 +6,10 @@ const junitOutputFile = process.env['VITEST_EVALS_JUNIT']; export default defineConfig({ test: { // Only run eval suites. - include: ['src/evals/**/*.eval.ts'], + include: ['src/**/*.eval.ts'], exclude: ['**/node_modules/**', '**/dist/**'], // Load .env, .env.local, .env.test for API keys - setupFiles: ['./src/evals/setup.ts'], + setupFiles: ['./src/setup.ts'], reporters: [ ['vitest-evals/reporter', { toolDetails: false }], ...(jsonOutputFile ? [['json']] : []), diff --git a/packages/evals/vitest.unit.config.ts b/packages/evals/vitest.unit.config.ts new file mode 100644 index 00000000..e357d9a6 --- /dev/null +++ b/packages/evals/vitest.unit.config.ts @@ -0,0 +1,8 @@ +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + include: ['src/**/*.test.ts'], + exclude: ['**/node_modules/**', '**/dist/**'], + }, +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 79e40aca..2aaca0aa 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -105,9 +105,6 @@ importers: vitest: specifier: ^4.1.6 version: 4.1.6(@opentelemetry/api@1.9.0)(@types/node@25.0.10)(@vitest/coverage-v8@4.1.6)(vite@7.3.1(@types/node@25.0.10)(jiti@2.7.0)(tsx@4.21.0)(yaml@2.8.3)) - vitest-evals: - specifier: 0.9.0-beta.3 - version: 0.9.0-beta.3(tinyrainbow@3.0.3)(vitest@4.1.6)(zod@4.3.6) packages/docs: dependencies: @@ -140,6 +137,37 @@ importers: specifier: ^2.0.0 version: 2.2.13(@aws-sdk/credential-provider-web-identity@3.972.38) + packages/evals: + dependencies: + '@octokit/rest': + specifier: ^22.0.1 + version: 22.0.1 + dotenv: + specifier: ^17.2.3 + version: 17.2.3 + vitest-evals: + specifier: 0.9.0-beta.3 + version: 0.9.0-beta.3(tinyrainbow@3.1.0)(vitest@4.1.6)(zod@4.3.6) + yaml: + specifier: ^2.8.2 + version: 2.8.3 + zod: + specifier: ^4.3.6 + version: 4.3.6 + devDependencies: + '@types/node': + specifier: ^25.0.10 + version: 25.0.10 + tsx: + specifier: ^4.19.0 + version: 4.21.0 + typescript: + specifier: ^5.9.3 + version: 5.9.3 + vitest: + specifier: ^4.1.6 + version: 4.1.6(@opentelemetry/api@1.9.0)(@types/node@25.0.10)(@vitest/coverage-v8@4.1.6)(vite@7.3.1(@types/node@25.0.10)(jiti@2.7.0)(tsx@4.21.0)(yaml@2.8.3)) + packages: '@alcalzone/ansi-tokenize@0.1.3': @@ -9118,9 +9146,9 @@ snapshots: optionalDependencies: vite: 6.4.1(@types/node@25.0.10)(jiti@2.7.0)(tsx@4.21.0)(yaml@2.8.3) - vitest-evals@0.9.0-beta.3(tinyrainbow@3.0.3)(vitest@4.1.6)(zod@4.3.6): + vitest-evals@0.9.0-beta.3(tinyrainbow@3.1.0)(vitest@4.1.6)(zod@4.3.6): dependencies: - tinyrainbow: 3.0.3 + tinyrainbow: 3.1.0 vitest: 4.1.6(@opentelemetry/api@1.9.0)(@types/node@25.0.10)(@vitest/coverage-v8@4.1.6)(vite@7.3.1(@types/node@25.0.10)(jiti@2.7.0)(tsx@4.21.0)(yaml@2.8.3)) optionalDependencies: zod: 4.3.6 diff --git a/scripts/update-pricing.ts b/scripts/update-pricing.ts index d704a2e4..d60490c7 100644 --- a/scripts/update-pricing.ts +++ b/scripts/update-pricing.ts @@ -52,7 +52,7 @@ const PRICE_FALLBACKS: Record = { 'claude-sonnet-4-6': 'claude-sonnet-4-5', }; -function hasPrice(record: ModelPricingRecord | undefined): boolean { +function hasPrice(record: ModelPricingRecord | undefined): record is ModelPricingRecord { return record !== undefined && ( record.inputPerMTok > 0 || record.outputPerMTok > 0 || @@ -64,10 +64,11 @@ function hasPrice(record: ModelPricingRecord | undefined): boolean { function fillPricingFallbacks(pricing: Record): void { for (const [target, source] of Object.entries(PRICE_FALLBACKS)) { - if (hasPrice(pricing[target]) || !hasPrice(pricing[source])) { + const sourcePricing = pricing[source]; + if (hasPrice(pricing[target]) || !hasPrice(sourcePricing)) { continue; } - pricing[target] = { ...pricing[source]! }; + pricing[target] = { ...sourcePricing }; } } diff --git a/tsconfig.build.json b/tsconfig.build.json index 37105381..6e0067cd 100644 --- a/tsconfig.build.json +++ b/tsconfig.build.json @@ -5,9 +5,7 @@ "dist", "src/**/*.test.ts", "src/**/*.test.tsx", - "src/**/*.eval.ts", "src/**/*.integration.test.ts", - "src/**/*.integration.test.tsx", - "src/evals/**" + "src/**/*.integration.test.tsx" ] } diff --git a/vitest.integration.config.ts b/vitest.integration.config.ts index 6be302f1..ed0ee42c 100644 --- a/vitest.integration.config.ts +++ b/vitest.integration.config.ts @@ -5,7 +5,6 @@ export default defineConfig({ // Only run integration tests include: ['**/*.integration.test.ts'], exclude: ['**/node_modules/**', '**/dist/**'], - // Load .env, .env.local, .env.test - setupFiles: ['./src/evals/setup.ts'], + passWithNoTests: true, }, }); diff --git a/warden.toml b/warden.toml index 3e32b997..f021ebd8 100644 --- a/warden.toml +++ b/warden.toml @@ -7,8 +7,8 @@ model = "anthropic/claude-sonnet-4-6" failOn = "high" # Show annotations for medium+ severity findings reportOn = "medium" -# Exclude build output from all skills -ignorePaths = ["dist/**", "evals/**"] +# Exclude build output and internal eval fixtures from all skills +ignorePaths = ["dist/**", "packages/evals/**"] [[skills]] name = "security-review" From f3f9e9ddaa58bb407cd02b7a832c70358fd462c4 Mon Sep 17 00:00:00 2001 From: David Cramer Date: Sun, 17 May 2026 21:13:16 -0700 Subject: [PATCH 2/2] fix(evals): Preserve verifier support files Pass verifier supporting fixture files through to eval repo setup so source licenses and other support context are copied before verification runs. Add a regression test covering the verifier path. Co-Authored-By: GPT-5 Codex --- packages/evals/src/verify.test.ts | 76 +++++++++++++++++++++++++++++-- packages/evals/src/verify.ts | 1 + 2 files changed, 74 insertions(+), 3 deletions(-) diff --git a/packages/evals/src/verify.test.ts b/packages/evals/src/verify.test.ts index ee4bfcee..bb678e01 100644 --- a/packages/evals/src/verify.test.ts +++ b/packages/evals/src/verify.test.ts @@ -1,12 +1,36 @@ -import { mkdtempSync, rmSync, writeFileSync } from 'node:fs'; +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; -import { afterEach, describe, expect, it } from 'vitest'; -import { resolveVerificationEvalMeta } from './verify.js'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { resolveVerificationEvalMeta, runVerificationEval } from './verify.js'; +import type { EvalMeta } from './types.js'; +import type { Finding } from '../../../src/types/index.js'; + +const mocks = vi.hoisted(() => ({ + resolveSkillAsync: vi.fn(), + setupEvalRepo: vi.fn(), + verifyFindings: vi.fn(), +})); + +vi.mock('../../../src/skills/loader.js', () => ({ + resolveSkillAsync: mocks.resolveSkillAsync, +})); + +vi.mock('../../../src/sdk/verify.js', () => ({ + verifyFindings: mocks.verifyFindings, +})); + +vi.mock('./runner.js', () => ({ + setupEvalRepo: mocks.setupEvalRepo, +})); describe('resolveVerificationEvalMeta', () => { let tempDir: string | undefined; + beforeEach(() => { + vi.resetAllMocks(); + }); + afterEach(() => { if (tempDir) { rmSync(tempDir, { recursive: true, force: true }); @@ -25,4 +49,50 @@ describe('resolveVerificationEvalMeta', () => { baseDir: tempDir, })).toThrow(`Invalid verification eval ${scenarioPath}`); }); + + it('keeps supporting files when setting up verification eval repos', async () => { + tempDir = mkdtempSync(join(tmpdir(), 'warden-verify-eval-')); + const repoDir = join(tempDir, 'repo'); + const skillPath = join(tempDir, 'skills', 'security-review', 'SKILL.md'); + const fixturePath = join(tempDir, 'fixtures', 'endpoint.py'); + const licensePath = join(tempDir, 'fixtures', 'LICENSE'); + mkdirSync(join(tempDir, 'skills', 'security-review'), { recursive: true }); + mkdirSync(join(tempDir, 'fixtures'), { recursive: true }); + writeFileSync(skillPath, '---\nname: security-review\n---\n'); + writeFileSync(fixturePath, 'def endpoint():\n pass\n'); + writeFileSync(licensePath, 'source license\n'); + + const candidate: Finding = { + id: 'candidate', + severity: 'high', + title: 'candidate finding', + description: 'candidate description', + }; + mocks.setupEvalRepo.mockReturnValue(repoDir); + mocks.resolveSkillAsync.mockResolvedValue({ name: 'security-review' }); + mocks.verifyFindings.mockResolvedValue({ findings: [] }); + + await runVerificationEval({ + name: 'license-context', + category: 'verification', + skillName: 'security-review', + given: 'license context should be present', + skillPath, + filePaths: [fixturePath], + supportingFilePaths: [licensePath], + candidate, + expectedVerdict: 'reject', + model: 'anthropic/claude-sonnet-4-6', + runtime: 'pi', + }, { + apiKey: 'test-api-key', + }); + + expect(mocks.setupEvalRepo).toHaveBeenCalledWith( + expect.objectContaining>({ + supportingFilePaths: [licensePath], + }), + expect.any(Function), + ); + }); }); diff --git a/packages/evals/src/verify.ts b/packages/evals/src/verify.ts index 7d6d4835..989d628b 100644 --- a/packages/evals/src/verify.ts +++ b/packages/evals/src/verify.ts @@ -182,6 +182,7 @@ export async function runVerificationEval( given: meta.given, skillPath: meta.skillPath, filePaths: meta.filePaths, + supportingFilePaths: meta.supportingFilePaths, model: meta.model, runtime: meta.runtime, should_find: [{ finding: meta.given, required: true }],