diff --git a/.github/workflows/evals.yml b/.github/workflows/evals.yml index c2ed6b40..36952b72 100644 --- a/.github/workflows/evals.yml +++ b/.github/workflows/evals.yml @@ -11,13 +11,10 @@ on: push: branches: [main] paths: - - "evals/**" - - "scripts/scaffold-eval.ts" - - "src/evals/**" + - "packages/evals/**" - ".github/workflows/evals.yml" - "package.json" - "pnpm-lock.yaml" - - "vitest.evals.config.ts" pull_request: types: [opened, synchronize, reopened, labeled] @@ -33,13 +30,10 @@ jobs: with: script: | const evalPaths = [ - 'evals/', - 'scripts/scaffold-eval.ts', - 'src/evals/', + 'packages/evals/', '.github/workflows/evals.yml', 'package.json', 'pnpm-lock.yaml', - 'vitest.evals.config.ts', ]; function setRun(run, reason) { @@ -113,8 +107,8 @@ jobs: - name: Run evals env: - VITEST_EVALS_JSON: eval-results.json - VITEST_EVALS_JUNIT: eval-results.junit.xml + VITEST_EVALS_JSON: ../../eval-results.json + VITEST_EVALS_JUNIT: ../../eval-results.junit.xml run: | set +e pnpm evals diff --git a/.npmignore b/.npmignore index ac5482f2..4073c678 100644 --- a/.npmignore +++ b/.npmignore @@ -15,6 +15,7 @@ action.yml *.test.d.ts **/*.test.* vitest*.config.ts +coverage/ # Environment and secrets (.npmignore overrides .gitignore) .env @@ -24,6 +25,7 @@ vitest*.config.ts .github/ .agents/ .codex/ +.cursor/ .warden/ .claude/ .dex/ @@ -34,7 +36,6 @@ pnpm-lock.yaml pnpm-workspace.yaml # Evals and dev scripts -evals/ /scripts/ superwarden-bench/ diff --git a/.oxlintrc.json b/.oxlintrc.json index f8748612..d4a26718 100644 --- a/.oxlintrc.json +++ b/.oxlintrc.json @@ -12,10 +12,11 @@ "ignorePatterns": [ "dist/**", "node_modules/**", + "coverage/**", "*.js", "examples/**", - "scripts/**", - "vitest*.config.ts" + "packages/docs/**/*.astro", + "packages/evals/fixtures/**" ], "rules": { "constructor-super": "error", @@ -166,4 +167,4 @@ } } ] -} \ No newline at end of file +} diff --git a/AGENTS.md b/AGENTS.md index f40f3aa3..574387e0 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -23,6 +23,7 @@ TELEMETRY.md # Sentry telemetry investigation map; points to Junior's packages/ ├── docs/ # Astro docs site (dex-docs), deployed via Vercel +├── evals/ # Private eval package: runner, scenarios, fixtures, test skills src/ # @sentry/warden core (root package) ├── index.ts # Library entry point @@ -37,17 +38,8 @@ src/ # @sentry/warden core (root package) ├── cli/ # CLI entry and commands │ └── output/ # CLI output formatting ├── action/ # GitHub Action entry -├── evals/ # Eval runner, judge, and types ├── utils/ # Shared utilities └── examples/ # Example configurations - -evals/ # Eval specs, fixtures, and test skills (see evals/README.md) -├── eval-*.yaml # Harness smoke suites using eval-* test skills -├── code-review/ # Code-review benchmark scenarios -├── security-review/ # Security-review benchmark scenarios -├── verification/ # Verifier-only eval scenarios -├── skills/ # Test skills used as eval vehicles -└── fixtures/ # Source code with known issues ``` ## Key Conventions @@ -132,7 +124,7 @@ Skills define **what to look for**, not how to respond to findings: ## Evals -End-to-end evals for the full pipeline plus verifier-only evals. The Vitest entrypoints are split as `src/evals/e2e.eval.ts`, `src/evals/code-review.eval.ts`, `src/evals/security-review.eval.ts`, and `src/evals/verify.eval.ts`. See [`evals/INTERNAL.md`](evals/INTERNAL.md) for maintainer workflow and [`evals/README.md`](evals/README.md) for schemas. Run with `pnpm evals`; scaffold PR fixtures with `pnpm evals:scaffold `. +End-to-end evals for the full pipeline plus verifier-only evals live in `packages/evals/`. See [`packages/evals/INTERNAL.md`](packages/evals/INTERNAL.md) for maintainer workflow and [`packages/evals/README.md`](packages/evals/README.md) for schemas. Run with `pnpm evals`; scaffold PR fixtures with `pnpm evals:scaffold `. ## Voice diff --git a/README.md b/README.md index 7295a81f..1020acbc 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ pnpm test:coverage # unit tests with LCOV coverage pnpm evals # end-to-end evals (requires API key) ``` -See [`evals/README.md`](evals/README.md) for the eval framework. +See [`packages/evals/README.md`](packages/evals/README.md) for the eval framework. ## License diff --git a/evals/code-review/robots-prefix-blocks-public-metadata.json b/evals/code-review/robots-prefix-blocks-public-metadata.json deleted file mode 100644 index 02700ac8..00000000 --- a/evals/code-review/robots-prefix-blocks-public-metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "given": "a robots.txt golden test disallows a prefix that also blocks a documented public metadata endpoint", - "files": [ - "fixtures/robots-prefix-blocks-public-metadata/__static_test.ts" - ], - "should_find": [ - { - "finding": "robots.txt Disallow /mcp is a prefix rule that blocks the public /mcp.json metadata endpoint from crawlers", - "severity": "high" - } - ], - "should_not_find": [ - "the test file being test-only makes the finding low severity" - ] -} diff --git a/evals/security-review/sentry-autofix-settings-get-project-access.json b/evals/security-review/sentry-autofix-settings-get-project-access.json deleted file mode 100644 index 50ec2020..00000000 --- a/evals/security-review/sentry-autofix-settings-get-project-access.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "given": "Autofix automation settings GET lists every project in the organization instead of using OrganizationEndpoint.get_projects", - "files": [ - "fixtures/sentry-autofix-settings-get-project-access/organization_autofix_automation_settings.py" - ], - "should_find": [ - { - "finding": "Autofix automation settings GET queries all organization projects directly and returns settings for projects the caller cannot access" - } - ], - "should_not_find": [ - "repository provider validation", - "audit log contents" - ] -} diff --git a/evals/security-review/sentry-group-search-view-visit-visibility.json b/evals/security-review/sentry-group-search-view-visit-visibility.json deleted file mode 100644 index 8bbf5105..00000000 --- a/evals/security-review/sentry-group-search-view-visit-visibility.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "given": "group search view visit endpoint updates private views by ID without applying the existing object permission or visibility checks", - "files": [ - "fixtures/sentry-group-search-view-visit-visibility/group_search_view_visit.py" - ], - "should_find": [ - { - "finding": "visit endpoint loads a GroupSearchView by organization and id only, allowing any org member to mark private views as visited and learn or mutate metadata without object permission checks" - } - ], - "should_not_find": [ - "missing authentication as the primary issue", - "timezone usage" - ] -} diff --git a/evals/security-review/sentry-preprod-size-analysis-base-artifact-access.json b/evals/security-review/sentry-preprod-size-analysis-base-artifact-access.json deleted file mode 100644 index d272fd02..00000000 --- a/evals/security-review/sentry-preprod-size-analysis-base-artifact-access.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "given": "public size-analysis endpoint accepts a baseArtifactId and loads the base artifact by organization only", - "files": [ - "fixtures/sentry-preprod-size-analysis-base-artifact-access/organization_preprod_size_analysis.py" - ], - "should_find": [ - { - "finding": "baseArtifactId lets a caller attach an inaccessible project artifact as the comparison base because the lookup checks organization_id but not project access" - } - ], - "should_not_find": [ - "integer parsing error handling", - "response field naming" - ] -} diff --git a/evals/security-review/sentry-preprod-snapshot-project-access.json b/evals/security-review/sentry-preprod-snapshot-project-access.json deleted file mode 100644 index 3d1a6ac2..00000000 --- a/evals/security-review/sentry-preprod-snapshot-project-access.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "given": "preprod snapshot endpoint fetches artifacts by organization only and returns or deletes project artifacts without checking project membership", - "files": [ - "fixtures/sentry-preprod-snapshot-project-access/preprod_artifact_snapshot.py" - ], - "should_find": [ - { - "finding": "preprod snapshot GET and DELETE load artifacts by organization_id only and do not call has_project_access before exposing manifests or deleting artifacts" - } - ], - "should_not_find": [ - "exception logging", - "missing rate limiting" - ] -} diff --git a/evals/security-review/sentry-release-threshold-empty-project-filter.json b/evals/security-review/sentry-release-threshold-empty-project-filter.json deleted file mode 100644 index fdafd26a..00000000 --- a/evals/security-review/sentry-release-threshold-empty-project-filter.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "given": "release threshold endpoint builds an empty ORM filter when the caller has no accessible projects", - "files": [ - "fixtures/sentry-release-threshold-empty-project-filter/release_threshold_index.py" - ], - "should_find": [ - { - "finding": "ReleaseThreshold query omits project and organization scoping when get_projects returns an empty list, allowing cross-project or cross-organization threshold disclosure" - } - ], - "should_not_find": [ - "generic missing serializer validation", - "missing rate limiting" - ] -} diff --git a/evals/security-review/sentry-replay-count-project-scope-overwrite.json b/evals/security-review/sentry-replay-count-project-scope-overwrite.json deleted file mode 100644 index 5d608477..00000000 --- a/evals/security-review/sentry-replay-count-project-scope-overwrite.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "given": "replay count helper replaces request-scoped projects with projects from issue IDs resolved across the organization", - "files": [ - "fixtures/sentry-replay-count-project-scope-overwrite/replay_counts.py" - ], - "should_find": [ - { - "finding": "issue.id replay count path overwrites snuba_params.projects with projects from all matching organization groups, bypassing the caller's project access scope" - } - ], - "should_not_find": [ - "query string parsing as the primary issue", - "missing pagination" - ] -} diff --git a/evals/security-review/sentry-replay-delete-read-scope.json b/evals/security-review/sentry-replay-delete-read-scope.json deleted file mode 100644 index 3883e02e..00000000 --- a/evals/security-review/sentry-replay-delete-read-scope.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "given": "replay detail endpoint grants DELETE to project:read and then enqueues destructive replay deletion", - "files": [ - "fixtures/sentry-replay-delete-read-scope/project_replay_details.py" - ], - "should_find": [ - { - "finding": "DELETE accepts project:read scope, so read-only project users can permanently delete replay data" - } - ], - "should_not_find": [ - "missing UUID validation", - "missing feature flag check" - ] -} diff --git a/evals/security-review/sentry-workflow-connect-workflows-authz.json b/evals/security-review/sentry-workflow-connect-workflows-authz.json deleted file mode 100644 index 70f9e6bf..00000000 --- a/evals/security-review/sentry-workflow-connect-workflows-authz.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "given": "detector-to-workflow connection validates target workflows exist in the organization but does not check permission to modify those workflows", - "files": [ - "fixtures/sentry-workflow-connect-workflows-authz/workflow_validator_utils.py" - ], - "should_find": [ - { - "finding": "connect_detectors_to_workflows permits linking a detector to arbitrary organization workflows because validate_workflows_exist performs no authorization check" - } - ], - "should_not_find": [ - "detector ID enumeration", - "bulk operation performance" - ] -} diff --git a/evals/security-review/sentry-workflow-open-periods-project-access.json b/evals/security-review/sentry-workflow-open-periods-project-access.json deleted file mode 100644 index 573a6d52..00000000 --- a/evals/security-review/sentry-workflow-open-periods-project-access.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "given": "workflow open-period endpoint resolves detectorId or groupId by organization only and returns issue activity without project permission checks", - "files": [ - "fixtures/sentry-workflow-open-periods-project-access/organization_open_periods.py" - ], - "should_find": [ - { - "finding": "open-period lookup accepts detectorId or groupId from any project in the organization and returns group activity without checking the caller's project access" - } - ], - "should_not_find": [ - "date parsing", - "missing detectorId format validation" - ] -} diff --git a/package.json b/package.json index e05d8d6b..60ccd7b7 100644 --- a/package.json +++ b/package.json @@ -15,15 +15,15 @@ "build": "pnpm run clean:dist && tsc -p tsconfig.build.json", "build:action": "rm -rf dist/action && ncc build src/action/main.ts -o dist/action --no-source-map-register --license licenses.txt", "dev": "tsc --watch", - "lint": "oxlint src", - "lint:fix": "oxlint --fix src", + "lint": "oxlint .", + "lint:fix": "oxlint --fix .", "test": "vitest run", "test:coverage": "vitest run --coverage", "test:watch": "vitest", "test:examples": "vitest run --config vitest.integration.config.ts", - "evals": "vitest run --config vitest.evals.config.ts", - "evals:scaffold": "tsx scripts/scaffold-eval.ts", - "typecheck": "tsc --noEmit", + "evals": "pnpm -C packages/evals evals", + "evals:scaffold": "pnpm -C packages/evals scaffold", + "typecheck": "tsc --noEmit && pnpm -C packages/evals typecheck", "update-pricing": "tsx scripts/update-pricing.ts", "generate:jsonl-schema": "tsx scripts/generate-jsonl-schema.ts", "docs": "pnpm --filter dex-docs dev", @@ -36,6 +36,15 @@ "src/**/*.ts": [ "oxlint --fix" ], + "packages/evals/src/**/*.ts": [ + "oxlint --fix" + ], + "packages/evals/scripts/**/*.ts": [ + "oxlint --fix" + ], + "packages/evals/*.ts": [ + "oxlint --fix" + ], "packages/docs/**/*.astro": [ "pnpm -C packages/docs build" ] @@ -89,8 +98,7 @@ "tinyrainbow": "^3.0.3", "tsx": "^4.19.0", "typescript": "^5.9.3", - "vitest": "^4.1.6", - "vitest-evals": "0.9.0-beta.3" + "vitest": "^4.1.6" }, "engines": { "node": ">=20.0.0" diff --git a/evals/INTERNAL.md b/packages/evals/INTERNAL.md similarity index 63% rename from evals/INTERNAL.md rename to packages/evals/INTERNAL.md index a771f720..2f46e283 100644 --- a/evals/INTERNAL.md +++ b/packages/evals/INTERNAL.md @@ -13,10 +13,10 @@ pnpm evals -t verification pnpm evals:scaffold https://github.com/getsentry/sentry/pull/12345 ``` -CI runs evals for changes under `evals/`, `src/evals/`, the eval workflow, or -the eval config. Add the `run-evals` label to a same-repository PR to force a -run when runtime or prompt changes need benchmark coverage. Fork PRs do not get -eval secrets. +CI runs evals for changes under `packages/evals/`, the eval workflow, package +metadata, or the lockfile. Add the `run-evals` label to a same-repository PR to +force a run when runtime or prompt changes need benchmark coverage. Fork PRs do +not get eval secrets. The raw Vitest eval command can exit non-zero when individual evals miss. CI still records the JSON and JUnit reports, publishes JUnit annotations and a job @@ -32,17 +32,17 @@ source finding is not actually a reachable bug under the target skill. ## Eval Layers -- `evals/*.yaml`: small full-pipeline suites using test skills. -- `evals/code-review/*.json`: one full-pipeline code-review scenario per file. -- `evals/security-review/*.json`: one full-pipeline security-review scenario per file. -- `evals/verification/*.json`: one candidate finding sent directly to `verifyFindings`. -- `evals/fixtures/*`: checked-in fixture source code. Eval runs copy these files +- `packages/evals/*.yaml`: small full-pipeline suites using test skills. +- `packages/evals/code-review/*.json`: one full-pipeline code-review scenario per file. +- `packages/evals/security-review/*.json`: one full-pipeline security-review scenario per file. +- `packages/evals/verification/*.json`: one candidate finding sent directly to `verifyFindings`. +- `packages/evals/fixtures/*`: checked-in fixture source code. Eval runs copy these files into temporary git repos under the OS temp directory, preserving paths under - `evals/fixtures/`. -- `src/evals/e2e.eval.ts`: generic YAML full-pipeline suites. -- `src/evals/code-review.eval.ts`: code-review correctness benchmark scenarios. -- `src/evals/security-review.eval.ts`: security-review benchmark scenarios. -- `src/evals/verify.eval.ts`: verifier-only scenarios. + `packages/evals/fixtures/`. +- `packages/evals/src/e2e.eval.ts`: generic YAML full-pipeline suites. +- `packages/evals/src/code-review.eval.ts`: code-review correctness benchmark scenarios. +- `packages/evals/src/security-review.eval.ts`: security-review benchmark scenarios. +- `packages/evals/src/verify.eval.ts`: verifier-only scenarios. Eval names should read as `/`. Runtime and model belong to the suite configuration, not the case identity. Avoid category names that hide the @@ -50,12 +50,17 @@ real skill under test. ## Adding Full-Pipeline Evals -1. Add or scaffold a scenario JSON file under `evals//`. -2. Add focused, checked-in fixture files under `evals/fixtures//`. +1. Add or scaffold a scenario JSON file under `packages/evals//`. +2. Add focused, checked-in fixture files under `packages/evals/fixtures//`. GitHub scaffolds use - `evals/fixtures//github///` to + `packages/evals/fixtures//github///` to preserve source context while eval output uses `/`. Scaffolded source repositories are still passed to prompts as repository context. + They also record `notes.repository`, `notes.source_ref`, and + `notes.source_files` so the original source state can be reproduced with + `git clone`, `git checkout`, and `git show :`. + The scaffolder also copies the root LICENSE-like file into `supporting_files` + so source provenance travels with the fixture without entering the eval diff. 3. Write a specific `should_find` assertion for the verified bug, not for the current Warden output, and add useful `should_not_find` guards. 4. Run the narrow case first with `pnpm evals -t `. @@ -71,7 +76,7 @@ tighten it before committing. Use verifier-only evals when discovery found a real candidate but verification dropped it, or when verification must reject a known false positive. -Each `evals/verification/*.json` file contains: +Each `packages/evals/verification/*.json` file contains: - `files`: repo context for the verifier to inspect - `candidate`: the exact finding object to verify diff --git a/evals/README.md b/packages/evals/README.md similarity index 80% rename from evals/README.md rename to packages/evals/README.md index 56e3bb5e..a9235362 100644 --- a/evals/README.md +++ b/packages/evals/README.md @@ -24,10 +24,10 @@ real. ## Eval Formats -Small suites can use YAML files at the top level of `evals/`. These are mostly +Small suites can use YAML files at the top level of `packages/evals/`. These are mostly generic harness smoke suites that use `eval-*` test skills. Product-facing benchmark suites should prefer one JSON file per scenario under the real skill -name, such as `evals/code-review/` or `evals/security-review/`. +name, such as `packages/evals/code-review/` or `packages/evals/security-review/`. ```yaml skill: skills/bug-detection.md @@ -78,7 +78,7 @@ case. ## Eval Structure ``` -evals/ +packages/evals/ ├── README.md ├── eval-bug-detection.yaml # Harness smoke suite using eval-bug-detection ├── eval-security-scanning.yaml # Harness smoke suite using eval-security-scanning @@ -124,10 +124,10 @@ over multiple runtimes or models without changing case identity. The Vitest entrypoints are intentionally split by eval layer: -- `src/evals/e2e.eval.ts`: generic YAML full-pipeline suites. -- `src/evals/code-review.eval.ts`: code-review correctness benchmark scenarios. -- `src/evals/security-review.eval.ts`: security-review benchmark scenarios. -- `src/evals/verify.eval.ts`: verifier-only scenarios from `evals/verification/`. +- `packages/evals/src/e2e.eval.ts`: generic YAML full-pipeline suites. +- `packages/evals/src/code-review.eval.ts`: code-review correctness benchmark scenarios. +- `packages/evals/src/security-review.eval.ts`: security-review benchmark scenarios. +- `packages/evals/src/verify.eval.ts`: verifier-only scenarios from `packages/evals/verification/`. ## YAML Schema @@ -135,7 +135,7 @@ The Vitest entrypoints are intentionally split by eval layer: | Field | Required | Description | |-------|----------|-------------| -| `skill` | Yes | Path to test skill, relative to `evals/` | +| `skill` | Yes | Path to test skill, relative to `packages/evals/` | | `runtime` | No | Default runtime for all evals: `claude` or `pi` (default: `claude`) | | `model` | No | Default model for all evals (default: `claude-sonnet-4-6`; Pi models must use provider/model format, e.g. `anthropic/claude-sonnet-4-6`) | | `evals` | Yes | List of eval scenarios (at least one) | @@ -146,7 +146,8 @@ The Vitest entrypoints are intentionally split by eval layer: |-------|----------|-------------| | `name` | Yes | Scenario name (used in test output) | | `given` | Yes | What code/situation the eval sets up (BDD "given") | -| `files` | Yes | Fixture files, relative to `evals/` | +| `files` | Yes | Fixture files, relative to `packages/evals/` | +| `supporting_files` | No | Context files, relative to `packages/evals/`, copied into the temp repo before the eval diff | | `model` | No | Model override for this scenario | | `runtime` | No | Runtime override for this scenario | | `should_find` | Yes | What the pipeline should detect (at least one) | @@ -155,13 +156,16 @@ The Vitest entrypoints are intentionally split by eval layer: | `should_find[].required` | No | If true (default), eval fails when not found | | `should_not_find` | No | Things the pipeline should NOT report (precision) | | `notes` | No | Maintainer-only provenance, ignored by eval execution | +| `notes.repository` | No | Source repository for GitHub-captured fixtures, e.g. `getsentry/sentry` | +| `notes.source_ref` | No | Exact source commit SHA to checkout for reproducing GitHub-captured fixture state | +| `notes.source_files` | No | Mapping from checked-in fixture files to source repository paths at `notes.source_ref` | Standalone JSON scenario files may omit `name`; it defaults to the JSON filename without `.json`. ## Verification Evals -Verifier-only evals live in `evals/verification/`. They feed one candidate +Verifier-only evals live in `packages/evals/verification/`. They feed one candidate finding directly into Warden's verification pass and assert whether it should be kept or rejected. Use them when a full pipeline eval finds the right issue and a later verification pass drops it, or when the verifier must reject a known false @@ -237,15 +241,15 @@ to improve discovery, verification, merging, or judging later. ## Adding a New Eval -1. Pick an existing skill directory, or create `evals//` -2. Add a YAML scenario entry for harness smoke suites or create `evals//.json` -3. Create checked-in fixture files under `evals/fixtures//` +1. Pick an existing skill directory, or create `packages/evals//` +2. Add a YAML scenario entry for harness smoke suites or create `packages/evals//.json` +3. Create checked-in fixture files under `packages/evals/fixtures//` 4. Run `pnpm evals` to verify -If a new category needs a different test skill, add it to `evals/skills/`. +If a new category needs a different test skill, add it to `packages/evals/skills/`. To exercise a built-in directory-format skill, point `skill` at its `SKILL.md` -relative to `evals/`, for example -`../src/builtin-skills/security-review/SKILL.md`. +relative to `packages/evals/`, for example +`../../src/builtin-skills/security-review/SKILL.md`. ### Scaffolding From GitHub @@ -264,14 +268,30 @@ The scaffold writes a `TODO` `should_find` assertion. That stub is expected to fail until you replace it with the exact expected finding, and it should not be committed as-is. -Scaffolded GitHub fixtures include source context in their paths: -`evals/fixtures//github///`. +Source-captured fixtures include source context in their paths: +`packages/evals/fixtures//github///`. Eval runs copy them into the temp repo as `/` so test output stays focused on the case and original source file. The source repository is still included in prompt context and `notes.repository`. +Scaffolded and backfilled source fixtures also record `notes.source_ref` and +`notes.source_files`, so maintainers can recreate the captured source state +with: + +```bash +git clone https://github.com//.git +git -C checkout +git -C show : +``` + Hand-written fixtures can stay shorter when the source repository path is not useful. +Scaffolded GitHub fixtures also copy the source repository's root LICENSE-like +file into `supporting_files`. Supporting files are available in the temp repo +for provenance and context, but are committed before the eval branch so they do +not appear in the review diff. Scaffolding fails when no root license file can +be found at the captured ref. + When a scaffold skips files, it records them in `notes.skipped_files` and prints them in CLI output. Review that list before committing the eval. @@ -288,11 +308,12 @@ them in CLI output. Review that list before committing the eval. ## How It Works -1. **Discovery**: Scan `evals/` for YAML suites and JSON scenario directories +1. **Discovery**: Scan `packages/evals/` for YAML suites and JSON scenario directories 2. **Loading**: Parse YAML/JSON, validate with Zod, resolve paths 3. **Git repo**: Copy checked-in fixtures into a temp repo, preserving paths - under `evals/fixtures/`, and commit them on an `eval` branch (empty `main` - as base), so the agent has a real repo to explore + under `packages/evals/fixtures/`, copy supporting files onto `main`, and + commit fixture files on an `eval` branch, so the agent has a real repo to + explore 4. **Context**: Build `EventContext` from real `git diff main...eval` 5. **Execution**: Run the skill via `runSkill()` with the real SDK pipeline; the agent operates in the temp repo with Read/Grep tools diff --git a/evals/code-review/eval-optional-assertion-rationale.json b/packages/evals/code-review/eval-optional-assertion-rationale.json similarity index 66% rename from evals/code-review/eval-optional-assertion-rationale.json rename to packages/evals/code-review/eval-optional-assertion-rationale.json index 47a5f08a..166ee789 100644 --- a/evals/code-review/eval-optional-assertion-rationale.json +++ b/packages/evals/code-review/eval-optional-assertion-rationale.json @@ -15,6 +15,18 @@ ], "notes": { "source": "https://github.com/getsentry/warden/pull/327", - "body": "Regression seed from Warden code-review finding: optional should_find assertions were allowed to miss for score calculation but still appeared as failure reasons in eval reports." - } + "body": "Regression seed from Warden code-review finding: optional should_find assertions were allowed to miss for score calculation but still appeared as failure reasons in eval reports.", + "repository": "getsentry/warden", + "source_ref": "5a8d78ffe5c7efc4678cfd1ae4cd9cc8126f6031", + "source_files": [ + { + "fixturePath": "fixtures/eval-optional-assertion-rationale/harness.ts", + "sourcePath": "src/evals/harness.ts", + "ref": "5a8d78ffe5c7efc4678cfd1ae4cd9cc8126f6031" + } + ] + }, + "supporting_files": [ + "fixtures/eval-optional-assertion-rationale/LICENSE.md" + ] } diff --git a/packages/evals/code-review/robots-prefix-blocks-public-metadata.json b/packages/evals/code-review/robots-prefix-blocks-public-metadata.json new file mode 100644 index 00000000..aabf32ab --- /dev/null +++ b/packages/evals/code-review/robots-prefix-blocks-public-metadata.json @@ -0,0 +1,30 @@ +{ + "given": "a robots.txt golden test disallows a prefix that also blocks a documented public metadata endpoint", + "files": [ + "fixtures/robots-prefix-blocks-public-metadata/__static_test.ts" + ], + "should_find": [ + { + "finding": "robots.txt Disallow /mcp is a prefix rule that blocks the public /mcp.json metadata endpoint from crawlers", + "severity": "high" + } + ], + "should_not_find": [ + "the test file being test-only makes the finding low severity" + ], + "supporting_files": [ + "fixtures/robots-prefix-blocks-public-metadata/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/warden/issues/317", + "repository": "getsentry/warden", + "source_ref": "69ea9ccd221a02cdcf39e7811609a2f83f2998c5", + "source_files": [ + { + "fixturePath": "fixtures/robots-prefix-blocks-public-metadata/__static_test.ts", + "sourcePath": "evals/fixtures/robots-prefix-blocks-public-metadata/__static_test.ts", + "ref": "69ea9ccd221a02cdcf39e7811609a2f83f2998c5" + } + ] + } +} diff --git a/evals/code-review/sentry-vitest-evals-duration-sixty-seconds.json b/packages/evals/code-review/sentry-vitest-evals-duration-sixty-seconds.json similarity index 67% rename from evals/code-review/sentry-vitest-evals-duration-sixty-seconds.json rename to packages/evals/code-review/sentry-vitest-evals-duration-sixty-seconds.json index 7a5f9cdc..05216684 100644 --- a/evals/code-review/sentry-vitest-evals-duration-sixty-seconds.json +++ b/packages/evals/code-review/sentry-vitest-evals-duration-sixty-seconds.json @@ -3,6 +3,9 @@ "files": [ "fixtures/sentry-vitest-evals-duration-sixty-seconds/github/getsentry/vitest-evals/packages/github-reporter/src/utils.ts" ], + "supporting_files": [ + "fixtures/sentry-vitest-evals-duration-sixty-seconds/github/getsentry/vitest-evals/LICENSE" + ], "should_find": [ { "finding": "formatDuration rounds display seconds without normalizing carry into the next unit, so values near minute boundaries can render as 60s or 1m 60s instead of 1m 0s or 2m 0s" @@ -17,7 +20,15 @@ "notes": { "source": "https://github.com/getsentry/vitest-evals/pull/56#discussion_r3255240096", "repository": "getsentry/vitest-evals", + "source_ref": "72d64e16efcfa5c50c2f1091b24b2dfacbcf1b3d", "side": "head", - "body": "Regression seed from Cursor Bugbot. Verified real: rounding seconds without carrying into the next unit can produce unnormalized duration strings at minute boundaries, including the reported Xm 60s case." + "body": "Regression seed from Cursor Bugbot. Verified real: rounding seconds without carrying into the next unit can produce unnormalized duration strings at minute boundaries, including the reported Xm 60s case.", + "source_files": [ + { + "fixturePath": "fixtures/sentry-vitest-evals-duration-sixty-seconds/github/getsentry/vitest-evals/packages/github-reporter/src/utils.ts", + "sourcePath": "packages/github-reporter/src/utils.ts", + "ref": "72d64e16efcfa5c50c2f1091b24b2dfacbcf1b3d" + } + ] } } diff --git a/evals/code-review/sentry-vitest-evals-github-reporter-positional-json.json b/packages/evals/code-review/sentry-vitest-evals-github-reporter-positional-json.json similarity index 72% rename from evals/code-review/sentry-vitest-evals-github-reporter-positional-json.json rename to packages/evals/code-review/sentry-vitest-evals-github-reporter-positional-json.json index 22827791..db0f5732 100644 --- a/evals/code-review/sentry-vitest-evals-github-reporter-positional-json.json +++ b/packages/evals/code-review/sentry-vitest-evals-github-reporter-positional-json.json @@ -3,6 +3,9 @@ "files": [ "fixtures/sentry-vitest-evals-github-reporter-positional-json/github/getsentry/vitest-evals/packages/github-reporter/src/cli.ts" ], + "supporting_files": [ + "fixtures/sentry-vitest-evals-github-reporter-positional-json/github/getsentry/vitest-evals/LICENSE" + ], "should_find": [ { "finding": "parseArgs initializes jsonPath from VITEST_EVALS_JSON_REPORT or vitest-results.json before argument parsing, so the default-case branch intended to accept a bare positional JSON report path is unreachable and vitest-evals-github-report my-results.json throws Unknown argument instead of using that file" @@ -17,7 +20,15 @@ "notes": { "source": "https://github.com/getsentry/vitest-evals/pull/56#discussion_r3255238970", "repository": "getsentry/vitest-evals", + "source_ref": "72d64e16efcfa5c50c2f1091b24b2dfacbcf1b3d", "side": "head", - "body": "Regression seed from Sentry-disclosed potential bug. Verified real: parseArgs defaults jsonPath before checking the positional fallback, making a bare report path unreachable. Severity is intentionally not asserted; Sentry labeled it low, while Warden may treat shipped CLI argument failures under its published-interface rubric." + "body": "Regression seed from Sentry-disclosed potential bug. Verified real: parseArgs defaults jsonPath before checking the positional fallback, making a bare report path unreachable. Severity is intentionally not asserted; Sentry labeled it low, while Warden may treat shipped CLI argument failures under its published-interface rubric.", + "source_files": [ + { + "fixturePath": "fixtures/sentry-vitest-evals-github-reporter-positional-json/github/getsentry/vitest-evals/packages/github-reporter/src/cli.ts", + "sourcePath": "packages/github-reporter/src/cli.ts", + "ref": "72d64e16efcfa5c50c2f1091b24b2dfacbcf1b3d" + } + ] } } diff --git a/evals/eval-bug-detection.yaml b/packages/evals/eval-bug-detection.yaml similarity index 100% rename from evals/eval-bug-detection.yaml rename to packages/evals/eval-bug-detection.yaml diff --git a/evals/eval-precision.yaml b/packages/evals/eval-precision.yaml similarity index 100% rename from evals/eval-precision.yaml rename to packages/evals/eval-precision.yaml diff --git a/evals/eval-security-scanning.yaml b/packages/evals/eval-security-scanning.yaml similarity index 100% rename from evals/eval-security-scanning.yaml rename to packages/evals/eval-security-scanning.yaml diff --git a/packages/evals/fixtures/eval-optional-assertion-rationale/LICENSE.md b/packages/evals/fixtures/eval-optional-assertion-rationale/LICENSE.md new file mode 100644 index 00000000..506811fc --- /dev/null +++ b/packages/evals/fixtures/eval-optional-assertion-rationale/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, ALv2 Future License + +## Abbreviation + +FSL-1.1-ALv2 + +## Notice + +Copyright 2025 Functional Software, Inc. + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/eval-optional-assertion-rationale/harness.ts b/packages/evals/fixtures/eval-optional-assertion-rationale/harness.ts similarity index 100% rename from evals/fixtures/eval-optional-assertion-rationale/harness.ts rename to packages/evals/fixtures/eval-optional-assertion-rationale/harness.ts diff --git a/evals/fixtures/ignores-style-issues/utils.ts b/packages/evals/fixtures/ignores-style-issues/utils.ts similarity index 100% rename from evals/fixtures/ignores-style-issues/utils.ts rename to packages/evals/fixtures/ignores-style-issues/utils.ts diff --git a/evals/fixtures/missing-await/cache.ts b/packages/evals/fixtures/missing-await/cache.ts similarity index 100% rename from evals/fixtures/missing-await/cache.ts rename to packages/evals/fixtures/missing-await/cache.ts diff --git a/evals/fixtures/null-property-access/handler.ts b/packages/evals/fixtures/null-property-access/handler.ts similarity index 100% rename from evals/fixtures/null-property-access/handler.ts rename to packages/evals/fixtures/null-property-access/handler.ts diff --git a/evals/fixtures/off-by-one/paginator.ts b/packages/evals/fixtures/off-by-one/paginator.ts similarity index 100% rename from evals/fixtures/off-by-one/paginator.ts rename to packages/evals/fixtures/off-by-one/paginator.ts diff --git a/packages/evals/fixtures/robots-prefix-blocks-public-metadata/LICENSE.md b/packages/evals/fixtures/robots-prefix-blocks-public-metadata/LICENSE.md new file mode 100644 index 00000000..506811fc --- /dev/null +++ b/packages/evals/fixtures/robots-prefix-blocks-public-metadata/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, ALv2 Future License + +## Abbreviation + +FSL-1.1-ALv2 + +## Notice + +Copyright 2025 Functional Software, Inc. + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/robots-prefix-blocks-public-metadata/__static_test.ts b/packages/evals/fixtures/robots-prefix-blocks-public-metadata/__static_test.ts similarity index 100% rename from evals/fixtures/robots-prefix-blocks-public-metadata/__static_test.ts rename to packages/evals/fixtures/robots-prefix-blocks-public-metadata/__static_test.ts diff --git a/packages/evals/fixtures/sentry-autofix-settings-get-project-access/LICENSE.md b/packages/evals/fixtures/sentry-autofix-settings-get-project-access/LICENSE.md new file mode 100644 index 00000000..f97b1d1f --- /dev/null +++ b/packages/evals/fixtures/sentry-autofix-settings-get-project-access/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, Apache 2.0 Future License + +## Abbreviation + +FSL-1.1-Apache-2.0 + +## Notice + +Copyright 2008-2024 Functional Software, Inc. dba Sentry + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/sentry-autofix-settings-get-project-access/organization_autofix_automation_settings.py b/packages/evals/fixtures/sentry-autofix-settings-get-project-access/organization_autofix_automation_settings.py similarity index 100% rename from evals/fixtures/sentry-autofix-settings-get-project-access/organization_autofix_automation_settings.py rename to packages/evals/fixtures/sentry-autofix-settings-get-project-access/organization_autofix_automation_settings.py diff --git a/packages/evals/fixtures/sentry-group-search-view-visit-visibility/LICENSE.md b/packages/evals/fixtures/sentry-group-search-view-visit-visibility/LICENSE.md new file mode 100644 index 00000000..f97b1d1f --- /dev/null +++ b/packages/evals/fixtures/sentry-group-search-view-visit-visibility/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, Apache 2.0 Future License + +## Abbreviation + +FSL-1.1-Apache-2.0 + +## Notice + +Copyright 2008-2024 Functional Software, Inc. dba Sentry + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/sentry-group-search-view-visit-visibility/group_search_view_visit.py b/packages/evals/fixtures/sentry-group-search-view-visit-visibility/group_search_view_visit.py similarity index 100% rename from evals/fixtures/sentry-group-search-view-visit-visibility/group_search_view_visit.py rename to packages/evals/fixtures/sentry-group-search-view-visit-visibility/group_search_view_visit.py diff --git a/packages/evals/fixtures/sentry-preprod-size-analysis-base-artifact-access/LICENSE.md b/packages/evals/fixtures/sentry-preprod-size-analysis-base-artifact-access/LICENSE.md new file mode 100644 index 00000000..f97b1d1f --- /dev/null +++ b/packages/evals/fixtures/sentry-preprod-size-analysis-base-artifact-access/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, Apache 2.0 Future License + +## Abbreviation + +FSL-1.1-Apache-2.0 + +## Notice + +Copyright 2008-2024 Functional Software, Inc. dba Sentry + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/sentry-preprod-size-analysis-base-artifact-access/organization_preprod_size_analysis.py b/packages/evals/fixtures/sentry-preprod-size-analysis-base-artifact-access/organization_preprod_size_analysis.py similarity index 100% rename from evals/fixtures/sentry-preprod-size-analysis-base-artifact-access/organization_preprod_size_analysis.py rename to packages/evals/fixtures/sentry-preprod-size-analysis-base-artifact-access/organization_preprod_size_analysis.py diff --git a/packages/evals/fixtures/sentry-preprod-snapshot-project-access/LICENSE.md b/packages/evals/fixtures/sentry-preprod-snapshot-project-access/LICENSE.md new file mode 100644 index 00000000..f97b1d1f --- /dev/null +++ b/packages/evals/fixtures/sentry-preprod-snapshot-project-access/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, Apache 2.0 Future License + +## Abbreviation + +FSL-1.1-Apache-2.0 + +## Notice + +Copyright 2008-2024 Functional Software, Inc. dba Sentry + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/sentry-preprod-snapshot-project-access/preprod_artifact_snapshot.py b/packages/evals/fixtures/sentry-preprod-snapshot-project-access/preprod_artifact_snapshot.py similarity index 100% rename from evals/fixtures/sentry-preprod-snapshot-project-access/preprod_artifact_snapshot.py rename to packages/evals/fixtures/sentry-preprod-snapshot-project-access/preprod_artifact_snapshot.py diff --git a/packages/evals/fixtures/sentry-release-threshold-empty-project-filter/LICENSE.md b/packages/evals/fixtures/sentry-release-threshold-empty-project-filter/LICENSE.md new file mode 100644 index 00000000..f97b1d1f --- /dev/null +++ b/packages/evals/fixtures/sentry-release-threshold-empty-project-filter/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, Apache 2.0 Future License + +## Abbreviation + +FSL-1.1-Apache-2.0 + +## Notice + +Copyright 2008-2024 Functional Software, Inc. dba Sentry + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/sentry-release-threshold-empty-project-filter/release_threshold_index.py b/packages/evals/fixtures/sentry-release-threshold-empty-project-filter/release_threshold_index.py similarity index 100% rename from evals/fixtures/sentry-release-threshold-empty-project-filter/release_threshold_index.py rename to packages/evals/fixtures/sentry-release-threshold-empty-project-filter/release_threshold_index.py diff --git a/packages/evals/fixtures/sentry-replay-count-project-scope-overwrite/LICENSE.md b/packages/evals/fixtures/sentry-replay-count-project-scope-overwrite/LICENSE.md new file mode 100644 index 00000000..f97b1d1f --- /dev/null +++ b/packages/evals/fixtures/sentry-replay-count-project-scope-overwrite/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, Apache 2.0 Future License + +## Abbreviation + +FSL-1.1-Apache-2.0 + +## Notice + +Copyright 2008-2024 Functional Software, Inc. dba Sentry + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/sentry-replay-count-project-scope-overwrite/replay_counts.py b/packages/evals/fixtures/sentry-replay-count-project-scope-overwrite/replay_counts.py similarity index 100% rename from evals/fixtures/sentry-replay-count-project-scope-overwrite/replay_counts.py rename to packages/evals/fixtures/sentry-replay-count-project-scope-overwrite/replay_counts.py diff --git a/packages/evals/fixtures/sentry-replay-delete-read-scope/LICENSE.md b/packages/evals/fixtures/sentry-replay-delete-read-scope/LICENSE.md new file mode 100644 index 00000000..f97b1d1f --- /dev/null +++ b/packages/evals/fixtures/sentry-replay-delete-read-scope/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, Apache 2.0 Future License + +## Abbreviation + +FSL-1.1-Apache-2.0 + +## Notice + +Copyright 2008-2024 Functional Software, Inc. dba Sentry + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/sentry-replay-delete-read-scope/project_replay_details.py b/packages/evals/fixtures/sentry-replay-delete-read-scope/project_replay_details.py similarity index 100% rename from evals/fixtures/sentry-replay-delete-read-scope/project_replay_details.py rename to packages/evals/fixtures/sentry-replay-delete-read-scope/project_replay_details.py diff --git a/packages/evals/fixtures/sentry-slack-options-load-unscoped-group/LICENSE.md b/packages/evals/fixtures/sentry-slack-options-load-unscoped-group/LICENSE.md new file mode 100644 index 00000000..f97b1d1f --- /dev/null +++ b/packages/evals/fixtures/sentry-slack-options-load-unscoped-group/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, Apache 2.0 Future License + +## Abbreviation + +FSL-1.1-Apache-2.0 + +## Notice + +Copyright 2008-2024 Functional Software, Inc. dba Sentry + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/sentry-slack-options-load-unscoped-group/options_load.py b/packages/evals/fixtures/sentry-slack-options-load-unscoped-group/options_load.py similarity index 100% rename from evals/fixtures/sentry-slack-options-load-unscoped-group/options_load.py rename to packages/evals/fixtures/sentry-slack-options-load-unscoped-group/options_load.py diff --git a/packages/evals/fixtures/sentry-vitest-evals-duration-sixty-seconds/github/getsentry/vitest-evals/LICENSE b/packages/evals/fixtures/sentry-vitest-evals-duration-sixty-seconds/github/getsentry/vitest-evals/LICENSE new file mode 100644 index 00000000..261eeb9e --- /dev/null +++ b/packages/evals/fixtures/sentry-vitest-evals-duration-sixty-seconds/github/getsentry/vitest-evals/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/evals/fixtures/sentry-vitest-evals-duration-sixty-seconds/github/getsentry/vitest-evals/packages/github-reporter/src/utils.ts b/packages/evals/fixtures/sentry-vitest-evals-duration-sixty-seconds/github/getsentry/vitest-evals/packages/github-reporter/src/utils.ts similarity index 100% rename from evals/fixtures/sentry-vitest-evals-duration-sixty-seconds/github/getsentry/vitest-evals/packages/github-reporter/src/utils.ts rename to packages/evals/fixtures/sentry-vitest-evals-duration-sixty-seconds/github/getsentry/vitest-evals/packages/github-reporter/src/utils.ts diff --git a/packages/evals/fixtures/sentry-vitest-evals-github-reporter-positional-json/github/getsentry/vitest-evals/LICENSE b/packages/evals/fixtures/sentry-vitest-evals-github-reporter-positional-json/github/getsentry/vitest-evals/LICENSE new file mode 100644 index 00000000..261eeb9e --- /dev/null +++ b/packages/evals/fixtures/sentry-vitest-evals-github-reporter-positional-json/github/getsentry/vitest-evals/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/evals/fixtures/sentry-vitest-evals-github-reporter-positional-json/github/getsentry/vitest-evals/packages/github-reporter/src/cli.ts b/packages/evals/fixtures/sentry-vitest-evals-github-reporter-positional-json/github/getsentry/vitest-evals/packages/github-reporter/src/cli.ts similarity index 100% rename from evals/fixtures/sentry-vitest-evals-github-reporter-positional-json/github/getsentry/vitest-evals/packages/github-reporter/src/cli.ts rename to packages/evals/fixtures/sentry-vitest-evals-github-reporter-positional-json/github/getsentry/vitest-evals/packages/github-reporter/src/cli.ts diff --git a/packages/evals/fixtures/sentry-workflow-connect-workflows-authz/LICENSE.md b/packages/evals/fixtures/sentry-workflow-connect-workflows-authz/LICENSE.md new file mode 100644 index 00000000..f97b1d1f --- /dev/null +++ b/packages/evals/fixtures/sentry-workflow-connect-workflows-authz/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, Apache 2.0 Future License + +## Abbreviation + +FSL-1.1-Apache-2.0 + +## Notice + +Copyright 2008-2024 Functional Software, Inc. dba Sentry + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/sentry-workflow-connect-workflows-authz/workflow_validator_utils.py b/packages/evals/fixtures/sentry-workflow-connect-workflows-authz/workflow_validator_utils.py similarity index 100% rename from evals/fixtures/sentry-workflow-connect-workflows-authz/workflow_validator_utils.py rename to packages/evals/fixtures/sentry-workflow-connect-workflows-authz/workflow_validator_utils.py diff --git a/packages/evals/fixtures/sentry-workflow-open-periods-project-access/LICENSE.md b/packages/evals/fixtures/sentry-workflow-open-periods-project-access/LICENSE.md new file mode 100644 index 00000000..f97b1d1f --- /dev/null +++ b/packages/evals/fixtures/sentry-workflow-open-periods-project-access/LICENSE.md @@ -0,0 +1,105 @@ +# Functional Source License, Version 1.1, Apache 2.0 Future License + +## Abbreviation + +FSL-1.1-Apache-2.0 + +## Notice + +Copyright 2008-2024 Functional Software, Inc. dba Sentry + +## Terms and Conditions + +### Licensor ("We") + +The party offering the Software under these Terms and Conditions. + +### The Software + +The "Software" is each version of the software that we make available under +these Terms and Conditions, as indicated by our inclusion of these Terms and +Conditions with the Software. + +### License Grant + +Subject to your compliance with this License Grant and the Patents, +Redistribution and Trademark clauses below, we hereby grant you the right to +use, copy, modify, create derivative works, publicly perform, publicly display +and redistribute the Software for any Permitted Purpose identified below. + +### Permitted Purpose + +A Permitted Purpose is any purpose other than a Competing Use. A Competing Use +means making the Software available to others in a commercial product or +service that: + +1. substitutes for the Software; + +2. substitutes for any other product or service we offer using the Software + that exists as of the date we make the Software available; or + +3. offers the same or substantially similar functionality as the Software. + +Permitted Purposes specifically include using the Software: + +1. for your internal use and access; + +2. for non-commercial education; + +3. for non-commercial research; and + +4. in connection with professional services that you provide to a licensee + using the Software in accordance with these Terms and Conditions. + +### Patents + +To the extent your use for a Permitted Purpose would necessarily infringe our +patents, the license grant above includes a license under our patents. If you +make a claim against any party that the Software infringes or contributes to +the infringement of any patent, then your patent license to the Software ends +immediately. + +### Redistribution + +The Terms and Conditions apply to all copies, modifications and derivatives of +the Software. + +If you redistribute any copies, modifications or derivatives of the Software, +you must include a copy of or a link to these Terms and Conditions and not +remove any copyright notices provided in or with the Software. + +### Disclaimer + +THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR +PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. + +IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE +SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, +EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. + +### Trademarks + +Except for displaying the License Details and identifying us as the origin of +the Software, you have no right under these Terms and Conditions to use our +trademarks, trade names, service marks or product names. + +## Grant of Future License + +We hereby irrevocably grant you an additional license to use the Software under +the Apache License, Version 2.0 that is effective on the second anniversary of +the date we make the Software available. On or after that date, you may use the +Software under the Apache License, Version 2.0, in which case the following +will apply: + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. + +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/evals/fixtures/sentry-workflow-open-periods-project-access/organization_open_periods.py b/packages/evals/fixtures/sentry-workflow-open-periods-project-access/organization_open_periods.py similarity index 100% rename from evals/fixtures/sentry-workflow-open-periods-project-access/organization_open_periods.py rename to packages/evals/fixtures/sentry-workflow-open-periods-project-access/organization_open_periods.py diff --git a/evals/fixtures/sql-injection/api.ts b/packages/evals/fixtures/sql-injection/api.ts similarity index 100% rename from evals/fixtures/sql-injection/api.ts rename to packages/evals/fixtures/sql-injection/api.ts diff --git a/evals/fixtures/stale-closure/counter.tsx b/packages/evals/fixtures/stale-closure/counter.tsx similarity index 100% rename from evals/fixtures/stale-closure/counter.tsx rename to packages/evals/fixtures/stale-closure/counter.tsx diff --git a/evals/fixtures/wrong-comparison/validator.ts b/packages/evals/fixtures/wrong-comparison/validator.ts similarity index 100% rename from evals/fixtures/wrong-comparison/validator.ts rename to packages/evals/fixtures/wrong-comparison/validator.ts diff --git a/evals/fixtures/xss-reflected/server.ts b/packages/evals/fixtures/xss-reflected/server.ts similarity index 100% rename from evals/fixtures/xss-reflected/server.ts rename to packages/evals/fixtures/xss-reflected/server.ts diff --git a/packages/evals/package.json b/packages/evals/package.json new file mode 100644 index 00000000..e087777b --- /dev/null +++ b/packages/evals/package.json @@ -0,0 +1,24 @@ +{ + "name": "warden-evals", + "private": true, + "type": "module", + "scripts": { + "evals": "vitest run --config vitest.config.ts", + "scaffold": "tsx scripts/scaffold-eval.ts", + "test": "vitest run --config vitest.unit.config.ts", + "typecheck": "tsc --noEmit" + }, + "dependencies": { + "@octokit/rest": "^22.0.1", + "dotenv": "^17.2.3", + "vitest-evals": "0.9.0-beta.3", + "yaml": "^2.8.2", + "zod": "^4.3.6" + }, + "devDependencies": { + "@types/node": "^25.0.10", + "tsx": "^4.19.0", + "typescript": "^5.9.3", + "vitest": "^4.1.6" + } +} diff --git a/scripts/scaffold-eval.ts b/packages/evals/scripts/scaffold-eval.ts similarity index 80% rename from scripts/scaffold-eval.ts rename to packages/evals/scripts/scaffold-eval.ts index bc53b6cd..b587c8bd 100644 --- a/scripts/scaffold-eval.ts +++ b/packages/evals/scripts/scaffold-eval.ts @@ -1,6 +1,9 @@ #!/usr/bin/env tsx import { resolve } from 'node:path'; -import { scaffoldEvalFromGitHubPullRequest } from '../src/evals/scaffold.js'; +import { + scaffoldEvalFromGitHubPullRequest, + type ScaffoldEvalOptions, +} from '../src/scaffold.js'; interface Args { url?: string; @@ -20,7 +23,7 @@ function usage(exitCode = 2): never { ' --category Eval category directory (default: security-review)', ' --side Which PR side to copy fixtures from (default: base)', ' --name Scenario name (default: slugified PR title)', - ' --evals-dir Evals directory (default: ./evals)', + ' --evals-dir Evals package root (default: packages/evals)', ' --force Overwrite existing generated files', ].join('\n'); @@ -32,11 +35,11 @@ function usage(exitCode = 2): never { process.exit(exitCode); } -function parseArgs(argv: string[]): Args { +function parseArgs(argv: string[]): ScaffoldEvalOptions { const args: Args = { category: 'security-review', side: 'base', - evalsDir: resolve(process.cwd(), 'evals'), + evalsDir: resolve(process.cwd()), force: false, }; @@ -73,18 +76,25 @@ function parseArgs(argv: string[]): Args { usage(); } - return args; + return { ...args, url: args.url }; } const args = parseArgs(process.argv.slice(2)); const result = await scaffoldEvalFromGitHubPullRequest(args); console.log(`Created eval: ${result.name}`); +console.log(`Source: ${result.repository}@${result.sourceRef}`); console.log(`Scenario: ${result.scenarioPath}`); console.log('Fixtures:'); for (const file of result.files) { console.log(` ${file.fixturePath} <- ${file.sourcePath}@${file.ref}`); } +if (result.supportingFiles.length > 0) { + console.log('Supporting fixtures:'); + for (const file of result.supportingFiles) { + console.log(` ${file.fixturePath} <- ${file.sourcePath}@${file.ref}`); + } +} if (result.skippedFiles.length > 0) { console.log('Skipped:'); for (const file of result.skippedFiles) { diff --git a/evals/security-review/BASELINE.md b/packages/evals/security-review/BASELINE.md similarity index 100% rename from evals/security-review/BASELINE.md rename to packages/evals/security-review/BASELINE.md diff --git a/packages/evals/security-review/sentry-autofix-settings-get-project-access.json b/packages/evals/security-review/sentry-autofix-settings-get-project-access.json new file mode 100644 index 00000000..6c6767cf --- /dev/null +++ b/packages/evals/security-review/sentry-autofix-settings-get-project-access.json @@ -0,0 +1,30 @@ +{ + "given": "Autofix automation settings GET lists every project in the organization instead of using OrganizationEndpoint.get_projects", + "files": [ + "fixtures/sentry-autofix-settings-get-project-access/organization_autofix_automation_settings.py" + ], + "should_find": [ + { + "finding": "Autofix automation settings GET queries all organization projects directly and returns settings for projects the caller cannot access" + } + ], + "should_not_find": [ + "repository provider validation", + "audit log contents" + ], + "supporting_files": [ + "fixtures/sentry-autofix-settings-get-project-access/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/pull/114977", + "repository": "getsentry/sentry", + "source_ref": "841e962b7e9af1f442a7c9da6eb6ec7e2a01843a", + "source_files": [ + { + "fixturePath": "fixtures/sentry-autofix-settings-get-project-access/organization_autofix_automation_settings.py", + "sourcePath": "src/sentry/seer/endpoints/organization_autofix_automation_settings.py", + "ref": "841e962b7e9af1f442a7c9da6eb6ec7e2a01843a" + } + ] + } +} diff --git a/packages/evals/security-review/sentry-group-search-view-visit-visibility.json b/packages/evals/security-review/sentry-group-search-view-visit-visibility.json new file mode 100644 index 00000000..6078fbde --- /dev/null +++ b/packages/evals/security-review/sentry-group-search-view-visit-visibility.json @@ -0,0 +1,30 @@ +{ + "given": "group search view visit endpoint updates private views by ID without applying the existing object permission or visibility checks", + "files": [ + "fixtures/sentry-group-search-view-visit-visibility/group_search_view_visit.py" + ], + "should_find": [ + { + "finding": "visit endpoint loads a GroupSearchView by organization and id only, allowing any org member to mark private views as visited and learn or mutate metadata without object permission checks" + } + ], + "should_not_find": [ + "missing authentication as the primary issue", + "timezone usage" + ], + "supporting_files": [ + "fixtures/sentry-group-search-view-visit-visibility/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/blob/d12e82ea6554d6794b1a490369ec6baa75aeaa15/src/sentry/issues/endpoints/organization_group_search_view_visit.py", + "repository": "getsentry/sentry", + "source_ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15", + "source_files": [ + { + "fixturePath": "fixtures/sentry-group-search-view-visit-visibility/group_search_view_visit.py", + "sourcePath": "src/sentry/issues/endpoints/organization_group_search_view_visit.py", + "ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15" + } + ] + } +} diff --git a/packages/evals/security-review/sentry-preprod-size-analysis-base-artifact-access.json b/packages/evals/security-review/sentry-preprod-size-analysis-base-artifact-access.json new file mode 100644 index 00000000..972f0a04 --- /dev/null +++ b/packages/evals/security-review/sentry-preprod-size-analysis-base-artifact-access.json @@ -0,0 +1,30 @@ +{ + "given": "public size-analysis endpoint accepts a baseArtifactId and loads the base artifact by organization only", + "files": [ + "fixtures/sentry-preprod-size-analysis-base-artifact-access/organization_preprod_size_analysis.py" + ], + "should_find": [ + { + "finding": "baseArtifactId lets a caller attach an inaccessible project artifact as the comparison base because the lookup checks organization_id but not project access" + } + ], + "should_not_find": [ + "integer parsing error handling", + "response field naming" + ], + "supporting_files": [ + "fixtures/sentry-preprod-size-analysis-base-artifact-access/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/blob/d12e82ea6554d6794b1a490369ec6baa75aeaa15/src/sentry/preprod/api/endpoints/public/organization_preprod_size_analysis.py", + "repository": "getsentry/sentry", + "source_ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15", + "source_files": [ + { + "fixturePath": "fixtures/sentry-preprod-size-analysis-base-artifact-access/organization_preprod_size_analysis.py", + "sourcePath": "src/sentry/preprod/api/endpoints/public/organization_preprod_size_analysis.py", + "ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15" + } + ] + } +} diff --git a/packages/evals/security-review/sentry-preprod-snapshot-project-access.json b/packages/evals/security-review/sentry-preprod-snapshot-project-access.json new file mode 100644 index 00000000..7f751cec --- /dev/null +++ b/packages/evals/security-review/sentry-preprod-snapshot-project-access.json @@ -0,0 +1,30 @@ +{ + "given": "preprod snapshot endpoint fetches artifacts by organization only and returns or deletes project artifacts without checking project membership", + "files": [ + "fixtures/sentry-preprod-snapshot-project-access/preprod_artifact_snapshot.py" + ], + "should_find": [ + { + "finding": "preprod snapshot GET and DELETE load artifacts by organization_id only and do not call has_project_access before exposing manifests or deleting artifacts" + } + ], + "should_not_find": [ + "exception logging", + "missing rate limiting" + ], + "supporting_files": [ + "fixtures/sentry-preprod-snapshot-project-access/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/pull/114169", + "repository": "getsentry/sentry", + "source_ref": "004819f53b2634ebe0faa1e9c59575c5203ff37b", + "source_files": [ + { + "fixturePath": "fixtures/sentry-preprod-snapshot-project-access/preprod_artifact_snapshot.py", + "sourcePath": "src/sentry/preprod/api/endpoints/preprod_artifact_snapshot.py", + "ref": "004819f53b2634ebe0faa1e9c59575c5203ff37b" + } + ] + } +} diff --git a/packages/evals/security-review/sentry-release-threshold-empty-project-filter.json b/packages/evals/security-review/sentry-release-threshold-empty-project-filter.json new file mode 100644 index 00000000..4a5587fe --- /dev/null +++ b/packages/evals/security-review/sentry-release-threshold-empty-project-filter.json @@ -0,0 +1,30 @@ +{ + "given": "release threshold endpoint builds an empty ORM filter when the caller has no accessible projects", + "files": [ + "fixtures/sentry-release-threshold-empty-project-filter/release_threshold_index.py" + ], + "should_find": [ + { + "finding": "ReleaseThreshold query omits project and organization scoping when get_projects returns an empty list, allowing cross-project or cross-organization threshold disclosure" + } + ], + "should_not_find": [ + "generic missing serializer validation", + "missing rate limiting" + ], + "supporting_files": [ + "fixtures/sentry-release-threshold-empty-project-filter/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/pull/114049", + "repository": "getsentry/sentry", + "source_ref": "aebfdffc6afec83f0c66830e93fd872aa4c85c43", + "source_files": [ + { + "fixturePath": "fixtures/sentry-release-threshold-empty-project-filter/release_threshold_index.py", + "sourcePath": "src/sentry/api/endpoints/release_thresholds/release_threshold_index.py", + "ref": "aebfdffc6afec83f0c66830e93fd872aa4c85c43" + } + ] + } +} diff --git a/packages/evals/security-review/sentry-replay-count-project-scope-overwrite.json b/packages/evals/security-review/sentry-replay-count-project-scope-overwrite.json new file mode 100644 index 00000000..f0444306 --- /dev/null +++ b/packages/evals/security-review/sentry-replay-count-project-scope-overwrite.json @@ -0,0 +1,30 @@ +{ + "given": "replay count helper replaces request-scoped projects with projects from issue IDs resolved across the organization", + "files": [ + "fixtures/sentry-replay-count-project-scope-overwrite/replay_counts.py" + ], + "should_find": [ + { + "finding": "issue.id replay count path overwrites snuba_params.projects with projects from all matching organization groups, bypassing the caller's project access scope" + } + ], + "should_not_find": [ + "query string parsing as the primary issue", + "missing pagination" + ], + "supporting_files": [ + "fixtures/sentry-replay-count-project-scope-overwrite/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/blob/d12e82ea6554d6794b1a490369ec6baa75aeaa15/src/sentry/replays/usecases/replay_counts.py", + "repository": "getsentry/sentry", + "source_ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15", + "source_files": [ + { + "fixturePath": "fixtures/sentry-replay-count-project-scope-overwrite/replay_counts.py", + "sourcePath": "src/sentry/replays/usecases/replay_counts.py", + "ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15" + } + ] + } +} diff --git a/packages/evals/security-review/sentry-replay-delete-read-scope.json b/packages/evals/security-review/sentry-replay-delete-read-scope.json new file mode 100644 index 00000000..acedbee9 --- /dev/null +++ b/packages/evals/security-review/sentry-replay-delete-read-scope.json @@ -0,0 +1,30 @@ +{ + "given": "replay detail endpoint grants DELETE to project:read and then enqueues destructive replay deletion", + "files": [ + "fixtures/sentry-replay-delete-read-scope/project_replay_details.py" + ], + "should_find": [ + { + "finding": "DELETE accepts project:read scope, so read-only project users can permanently delete replay data" + } + ], + "should_not_find": [ + "missing UUID validation", + "missing feature flag check" + ], + "supporting_files": [ + "fixtures/sentry-replay-delete-read-scope/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/pull/114159", + "repository": "getsentry/sentry", + "source_ref": "8e848834d2efde23ae32f1c93957842460b12ea4", + "source_files": [ + { + "fixturePath": "fixtures/sentry-replay-delete-read-scope/project_replay_details.py", + "sourcePath": "src/sentry/replays/endpoints/project_replay_details.py", + "ref": "8e848834d2efde23ae32f1c93957842460b12ea4" + } + ] + } +} diff --git a/evals/security-review/sentry-slack-options-load-unscoped-group.json b/packages/evals/security-review/sentry-slack-options-load-unscoped-group.json similarity index 54% rename from evals/security-review/sentry-slack-options-load-unscoped-group.json rename to packages/evals/security-review/sentry-slack-options-load-unscoped-group.json index d6d5124a..949f1498 100644 --- a/evals/security-review/sentry-slack-options-load-unscoped-group.json +++ b/packages/evals/security-review/sentry-slack-options-load-unscoped-group.json @@ -12,5 +12,20 @@ "Slack signature validation stub or validate() returning True as an unauthenticated access vulnerability", "timing-safe token comparison", "regex performance" - ] + ], + "supporting_files": [ + "fixtures/sentry-slack-options-load-unscoped-group/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/pull/114185", + "repository": "getsentry/sentry", + "source_ref": "b0c31079134ae8f755161842361ae28ace2baef1", + "source_files": [ + { + "fixturePath": "fixtures/sentry-slack-options-load-unscoped-group/options_load.py", + "sourcePath": "src/sentry/integrations/slack/webhooks/options_load.py", + "ref": "b0c31079134ae8f755161842361ae28ace2baef1" + } + ] + } } diff --git a/packages/evals/security-review/sentry-workflow-connect-workflows-authz.json b/packages/evals/security-review/sentry-workflow-connect-workflows-authz.json new file mode 100644 index 00000000..d2f11a20 --- /dev/null +++ b/packages/evals/security-review/sentry-workflow-connect-workflows-authz.json @@ -0,0 +1,30 @@ +{ + "given": "detector-to-workflow connection validates target workflows exist in the organization but does not check permission to modify those workflows", + "files": [ + "fixtures/sentry-workflow-connect-workflows-authz/workflow_validator_utils.py" + ], + "should_find": [ + { + "finding": "connect_detectors_to_workflows permits linking a detector to arbitrary organization workflows because validate_workflows_exist performs no authorization check" + } + ], + "should_not_find": [ + "detector ID enumeration", + "bulk operation performance" + ], + "supporting_files": [ + "fixtures/sentry-workflow-connect-workflows-authz/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/blob/d12e82ea6554d6794b1a490369ec6baa75aeaa15/src/sentry/workflow_engine/endpoints/validators/utils.py", + "repository": "getsentry/sentry", + "source_ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15", + "source_files": [ + { + "fixturePath": "fixtures/sentry-workflow-connect-workflows-authz/workflow_validator_utils.py", + "sourcePath": "src/sentry/workflow_engine/endpoints/validators/utils.py", + "ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15" + } + ] + } +} diff --git a/packages/evals/security-review/sentry-workflow-open-periods-project-access.json b/packages/evals/security-review/sentry-workflow-open-periods-project-access.json new file mode 100644 index 00000000..83eeef50 --- /dev/null +++ b/packages/evals/security-review/sentry-workflow-open-periods-project-access.json @@ -0,0 +1,30 @@ +{ + "given": "workflow open-period endpoint resolves detectorId or groupId by organization only and returns issue activity without project permission checks", + "files": [ + "fixtures/sentry-workflow-open-periods-project-access/organization_open_periods.py" + ], + "should_find": [ + { + "finding": "open-period lookup accepts detectorId or groupId from any project in the organization and returns group activity without checking the caller's project access" + } + ], + "should_not_find": [ + "date parsing", + "missing detectorId format validation" + ], + "supporting_files": [ + "fixtures/sentry-workflow-open-periods-project-access/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/blob/d12e82ea6554d6794b1a490369ec6baa75aeaa15/src/sentry/workflow_engine/endpoints/organization_open_periods.py", + "repository": "getsentry/sentry", + "source_ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15", + "source_files": [ + { + "fixturePath": "fixtures/sentry-workflow-open-periods-project-access/organization_open_periods.py", + "sourcePath": "src/sentry/workflow_engine/endpoints/organization_open_periods.py", + "ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15" + } + ] + } +} diff --git a/evals/skills/bug-detection.md b/packages/evals/skills/bug-detection.md similarity index 100% rename from evals/skills/bug-detection.md rename to packages/evals/skills/bug-detection.md diff --git a/evals/skills/precision.md b/packages/evals/skills/precision.md similarity index 100% rename from evals/skills/precision.md rename to packages/evals/skills/precision.md diff --git a/evals/skills/security-scanning.md b/packages/evals/skills/security-scanning.md similarity index 100% rename from evals/skills/security-scanning.md rename to packages/evals/skills/security-scanning.md diff --git a/src/evals/code-review.eval.ts b/packages/evals/src/code-review.eval.ts similarity index 95% rename from src/evals/code-review.eval.ts rename to packages/evals/src/code-review.eval.ts index 590bc70c..bfe7f026 100644 --- a/src/evals/code-review.eval.ts +++ b/packages/evals/src/code-review.eval.ts @@ -11,7 +11,7 @@ import { formatEvalId, formatEvalTestName } from './names.js'; const apiKey = process.env['ANTHROPIC_API_KEY'] ?? ''; const evals = discoverEvalScenarios({ category: 'code-review', - skill: '../src/builtin-skills/code-review/SKILL.md', + skill: '../../src/builtin-skills/code-review/SKILL.md', runtime: 'pi', model: 'anthropic/claude-sonnet-4-6', }); diff --git a/src/evals/e2e.eval.ts b/packages/evals/src/e2e.eval.ts similarity index 100% rename from src/evals/e2e.eval.ts rename to packages/evals/src/e2e.eval.ts diff --git a/src/evals/fixtures.test.ts b/packages/evals/src/fixtures.test.ts similarity index 100% rename from src/evals/fixtures.test.ts rename to packages/evals/src/fixtures.test.ts diff --git a/src/evals/fixtures.ts b/packages/evals/src/fixtures.ts similarity index 100% rename from src/evals/fixtures.ts rename to packages/evals/src/fixtures.ts diff --git a/src/evals/harness.test.ts b/packages/evals/src/harness.test.ts similarity index 97% rename from src/evals/harness.test.ts rename to packages/evals/src/harness.test.ts index e92b5940..e1fc7f1b 100644 --- a/src/evals/harness.test.ts +++ b/packages/evals/src/harness.test.ts @@ -3,9 +3,9 @@ import type { JudgeContext } from 'vitest-evals'; import { createWardenEvalJudge } from './harness.js'; import { runJudge } from './judge.js'; import { DEFAULT_EVAL_MODEL, DEFAULT_EVAL_RUNTIME } from './types.js'; -import { emptyUsage } from '../sdk/usage.js'; +import { emptyUsage } from '../../../src/sdk/usage.js'; import type { EvalMeta } from './types.js'; -import type { Finding } from '../types/index.js'; +import type { Finding } from '../../../src/types/index.js'; vi.mock('./judge.js', () => ({ runJudge: vi.fn(), diff --git a/src/evals/harness.ts b/packages/evals/src/harness.ts similarity index 97% rename from src/evals/harness.ts rename to packages/evals/src/harness.ts index a6959ef7..4cba9909 100644 --- a/src/evals/harness.ts +++ b/packages/evals/src/harness.ts @@ -11,8 +11,8 @@ import { runJudge } from './judge.js'; import { runEvalSkill, type RunEvalOptions } from './runner.js'; import { evalPassed, type EvalMeta, type JudgeResponse } from './types.js'; import { usageToSummary } from './usage.js'; -import { FindingSchema } from '../types/index.js'; -import type { Finding, SkillReport, UsageStats } from '../types/index.js'; +import { FindingSchema } from '../../../src/types/index.js'; +import type { Finding, SkillReport, UsageStats } from '../../../src/types/index.js'; export const WardenEvalOutputSchema = z.object({ name: z.string(), diff --git a/src/evals/index.test.ts b/packages/evals/src/index.test.ts similarity index 78% rename from src/evals/index.test.ts rename to packages/evals/src/index.test.ts index 10c6601f..11c7ef48 100644 --- a/src/evals/index.test.ts +++ b/packages/evals/src/index.test.ts @@ -1,4 +1,5 @@ import { describe, it, expect } from 'vitest'; +import { readFileSync } from 'node:fs'; import { join } from 'node:path'; import { discoverEvalFiles, @@ -18,7 +19,7 @@ import { EvalScenarioSchema, } from './types.js'; -const evalsDir = join(import.meta.dirname, '..', '..', 'evals'); +const evalsDir = join(import.meta.dirname, '..'); describe('discoverEvalFiles', () => { it('returns array of YAML file paths', () => { @@ -177,7 +178,7 @@ describe('standalone scenario files', () => { const scenario = loadEvalScenarioFile(file); const meta = resolveEvalScenarioMeta(scenario, file, { category: 'security-review', - skill: '../src/builtin-skills/security-review/SKILL.md', + skill: '../../src/builtin-skills/security-review/SKILL.md', runtime: 'pi', model: 'anthropic/claude-sonnet-4-6', baseDir: evalsDir, @@ -195,7 +196,7 @@ describe('standalone scenario files', () => { it('discovers all standalone scenarios for a category', () => { const metas = discoverEvalScenarios({ category: 'security-review', - skill: '../src/builtin-skills/security-review/SKILL.md', + skill: '../../src/builtin-skills/security-review/SKILL.md', runtime: 'pi', model: 'anthropic/claude-sonnet-4-6', baseDir: evalsDir, @@ -208,7 +209,7 @@ describe('standalone scenario files', () => { it('discovers standalone code-review scenarios', () => { const metas = discoverEvalScenarios({ category: 'code-review', - skill: '../src/builtin-skills/code-review/SKILL.md', + skill: '../../src/builtin-skills/code-review/SKILL.md', runtime: 'pi', model: 'anthropic/claude-sonnet-4-6', baseDir: evalsDir, @@ -224,6 +225,84 @@ describe('standalone scenario files', () => { expect(metas[0]?.skillPath).toContain('src/builtin-skills/code-review/SKILL.md'); }); + it('requires repro metadata for source-captured fixtures', () => { + const licenseFilePattern = /\/(?:LICENSE(?:\.(?:md|txt))?|LICENCE(?:\.md)?|COPYING(?:\.md)?)$/; + const scenarioFiles = [ + ...discoverEvalScenarioFiles('code-review', evalsDir), + ...discoverEvalScenarioFiles('security-review', evalsDir), + ...discoverEvalScenarioFiles('verification', evalsDir), + ]; + const missingMetadata: string[] = []; + + for (const file of scenarioFiles) { + const scenario = JSON.parse(readFileSync(file, 'utf-8')) as { + files?: string[]; + supporting_files?: string[]; + notes?: { + repository?: string; + source_ref?: string; + source_files?: { + fixturePath: string; + sourcePath: string; + ref?: string; + }[]; + }; + }; + const fixtureFiles = scenario.files ?? []; + const supportingFiles = scenario.supporting_files ?? []; + const sourceFiles = scenario.notes?.source_files ?? []; + const fixtureRoots = new Set(); + const repositories = new Set(); + let sourceCaptured = Boolean(scenario.notes?.repository); + for (const fixture of fixtureFiles) { + if (fixture.startsWith('fixtures/sentry-') || fixture.includes('/github/')) { + sourceCaptured = true; + } + const segments = fixture.split('/'); + const githubIndex = segments.indexOf('github'); + if (githubIndex !== -1 && segments[githubIndex + 1] && segments[githubIndex + 2]) { + fixtureRoots.add(segments.slice(0, githubIndex + 3).join('/')); + repositories.add(`${segments[githubIndex + 1]}/${segments[githubIndex + 2]}`); + } + } + + for (const root of fixtureRoots) { + const hasLicense = [...fixtureFiles, ...supportingFiles].some( + (supportingFile) => + supportingFile.startsWith(`${root}/`) && + licenseFilePattern.test(`/${supportingFile}`) + ); + if (!hasLicense) { + missingMetadata.push(`${file}: ${root}/LICENSE`); + } + } + for (const repository of repositories) { + if (scenario.notes?.repository !== repository || !scenario.notes.source_ref) { + missingMetadata.push(`${file}: ${repository}@`); + } + } + if (sourceCaptured) { + if (!scenario.notes?.repository || !scenario.notes.source_ref) { + missingMetadata.push(`${file}: `); + } + const hasLicense = [...fixtureFiles, ...supportingFiles].some((fixture) => + licenseFilePattern.test(`/${fixture}`) + ); + if (!hasLicense) { + missingMetadata.push(`${file}: `); + } + for (const fixture of fixtureFiles) { + const sourceFile = sourceFiles.find((entry) => entry.fixturePath === fixture); + if (!sourceFile?.sourcePath) { + missingMetadata.push(`${file}: ${fixture} -> `); + } + } + } + } + + expect(missingMetadata).toEqual([]); + }); + it('throws when a standalone scenario fixture file does not exist', () => { const scenario = EvalScenarioFileSchema.parse({ given: 'an eval with a missing fixture', @@ -233,7 +312,7 @@ describe('standalone scenario files', () => { expect(() => resolveEvalScenarioMeta(scenario, join(evalsDir, 'security-review', 'missing-fixture.json'), { category: 'security-review', - skill: '../src/builtin-skills/security-review/SKILL.md', + skill: '../../src/builtin-skills/security-review/SKILL.md', baseDir: evalsDir, })).toThrow('Eval fixture not found for security-review/missing-fixture'); }); @@ -310,7 +389,7 @@ describe('EvalFileSchema', () => { it('accepts a Pi runtime with provider-qualified model', () => { const valid = { - skill: '../src/builtin-skills/security-review/SKILL.md', + skill: '../../src/builtin-skills/security-review/SKILL.md', runtime: 'pi', model: 'anthropic/claude-sonnet-4-6', evals: [{ diff --git a/src/evals/index.ts b/packages/evals/src/index.ts similarity index 88% rename from src/evals/index.ts rename to packages/evals/src/index.ts index 6d9fb05d..5c1913aa 100644 --- a/src/evals/index.ts +++ b/packages/evals/src/index.ts @@ -8,20 +8,20 @@ import { EvalScenarioFileSchema, } from './types.js'; import type { EvalFile, EvalMeta, EvalScenarioFile } from './types.js'; -import type { RuntimeName } from '../sdk/runtimes/types.js'; +import type { RuntimeName } from '../../../src/sdk/runtimes/types.js'; export type { EvalMeta }; export interface EvalScenarioSetOptions { - /** Category/suite name, and directory under evals/ containing JSON scenarios. */ + /** Category/suite name, and directory under the eval package root containing JSON scenarios. */ category: string; - /** Skill to run, relative to evals/ directory. */ + /** Skill to run, relative to the eval package root. */ skill: string; /** Default runtime for all scenarios in this set. */ runtime?: RuntimeName; /** Default model for all scenarios in this set. */ model?: string; - /** Optional evals directory override for tests. */ + /** Optional eval package root override for tests. */ baseDir?: string; } @@ -29,7 +29,7 @@ export interface EvalScenarioSetOptions { * Get the default evals directory path. */ function getEvalsDir(): string { - return join(import.meta.dirname, '..', '..', 'evals'); + return join(import.meta.dirname, '..'); } function fallbackSkillName(skillPath: string): string { @@ -100,7 +100,7 @@ export function discoverEvalFiles(baseDir?: string): string[] { } /** - * Discover standalone JSON scenario files in evals//. + * Discover standalone JSON scenario files in //. */ export function discoverEvalScenarioFiles(category: string, baseDir?: string): string[] { const scenarioDir = join(baseDir ?? getEvalsDir(), category); @@ -186,7 +186,7 @@ export function loadEvalScenarioFile(filePath: string): EvalScenarioFile { /** * Resolve all eval scenarios from a YAML file into executable EvalMeta objects. - * Resolves relative paths for skills and fixtures against the evals directory. + * Resolves relative paths for skills and fixtures against the eval package root. */ export function resolveEvalMetas(evalFile: EvalFile, yamlPath: string): EvalMeta[] { const evalsDir = join(yamlPath, '..'); @@ -207,6 +207,13 @@ export function resolveEvalMetas(evalFile: EvalFile, yamlPath: string): EvalMeta } return filePath; }); + const supportingFilePaths = scenario.supporting_files.map((file) => { + const filePath = join(evalsDir, file); + if (!existsSync(filePath)) { + throw new Error(`Eval supporting fixture not found for ${category}/${scenario.name}: ${file}`); + } + return filePath; + }); return { name: scenario.name, @@ -215,6 +222,7 @@ export function resolveEvalMetas(evalFile: EvalFile, yamlPath: string): EvalMeta given: scenario.given, skillPath, filePaths, + supportingFilePaths, model: scenario.model ?? evalFile.model, runtime: scenario.runtime ?? evalFile.runtime, should_find: scenario.should_find, @@ -246,6 +254,13 @@ export function resolveEvalScenarioMeta( } return filePath; }); + const supportingFilePaths = scenario.supporting_files.map((file) => { + const filePath = join(evalsDir, file); + if (!existsSync(filePath)) { + throw new Error(`Eval supporting fixture not found for ${options.category}/${name}: ${file}`); + } + return filePath; + }); return { name, @@ -254,6 +269,7 @@ export function resolveEvalScenarioMeta( given: scenario.given, skillPath, filePaths, + supportingFilePaths, model: scenario.model ?? options.model ?? DEFAULT_EVAL_MODEL, runtime: scenario.runtime ?? options.runtime ?? DEFAULT_EVAL_RUNTIME, should_find: scenario.should_find, diff --git a/src/evals/judge.ts b/packages/evals/src/judge.ts similarity index 95% rename from src/evals/judge.ts rename to packages/evals/src/judge.ts index 6f691ca9..5ddd2807 100644 --- a/src/evals/judge.ts +++ b/packages/evals/src/judge.ts @@ -1,11 +1,11 @@ import Anthropic from '@anthropic-ai/sdk'; -import type { Finding } from '../types/index.js'; -import { apiUsageToStats } from '../sdk/pricing.js'; -import { emptyUsage } from '../sdk/usage.js'; -import { extractJson } from '../sdk/haiku.js'; +import type { Finding } from '../../../src/types/index.js'; +import { apiUsageToStats } from '../../../src/sdk/pricing.js'; +import { emptyUsage } from '../../../src/sdk/usage.js'; +import { extractJson } from '../../../src/sdk/haiku.js'; import type { EvalMeta, JudgeResponse } from './types.js'; import { DEFAULT_EVAL_MODEL, JudgeResponseSchema } from './types.js'; -import type { UsageStats } from '../types/index.js'; +import type { UsageStats } from '../../../src/types/index.js'; const JUDGE_MODEL = DEFAULT_EVAL_MODEL; const JUDGE_MAX_TOKENS = 4096; diff --git a/src/evals/names.ts b/packages/evals/src/names.ts similarity index 100% rename from src/evals/names.ts rename to packages/evals/src/names.ts diff --git a/src/evals/runner.test.ts b/packages/evals/src/runner.test.ts similarity index 88% rename from src/evals/runner.test.ts rename to packages/evals/src/runner.test.ts index cf02f463..223552c9 100644 --- a/src/evals/runner.test.ts +++ b/packages/evals/src/runner.test.ts @@ -6,8 +6,8 @@ import { describe, expect, it } from 'vitest'; import { setupEvalRepo } from './runner.js'; import type { EvalMeta } from './types.js'; -const evalsDir = join(import.meta.dirname, '..', '..', 'evals'); -const repoRoot = join(import.meta.dirname, '..', '..'); +const evalsDir = join(import.meta.dirname, '..'); +const repoRoot = join(import.meta.dirname, '..', '..', '..'); function git(cwd: string, args: string[]): string { return execFileSync('git', args, { cwd, encoding: 'utf8' }); @@ -67,8 +67,19 @@ describe('setupEvalRepo', () => { 'api', 'endpoint.py', ); + const licensePath = join( + tempRoot, + 'evals', + 'fixtures', + 'source-context', + 'github', + 'getsentry', + 'sentry', + 'LICENSE', + ); mkdirSync(dirname(fixturePath), { recursive: true }); writeFileSync(fixturePath, 'def endpoint():\n pass\n'); + writeFileSync(licensePath, 'source license\n'); const meta: EvalMeta = { name: 'source-context', @@ -77,6 +88,7 @@ describe('setupEvalRepo', () => { given: 'fixture source path carries repository context', skillPath: join(repoRoot, 'src', 'builtin-skills', 'security-review', 'SKILL.md'), filePaths: [fixturePath], + supportingFilePaths: [licensePath], model: 'anthropic/claude-sonnet-4-6', runtime: 'pi', should_find: [{ finding: 'source path context', required: true }], @@ -99,6 +111,8 @@ describe('setupEvalRepo', () => { .toBe('https://github.com/getsentry/sentry.git'); expect(existsSync(join(repoDir, 'source-context', 'src', 'sentry', 'api', 'endpoint.py'))) .toBe(true); + expect(git(repoDir, ['cat-file', '-e', 'main:source-context/LICENSE'])) + .toBe(''); } finally { if (repoDir) { rmSync(repoDir, { recursive: true, force: true }); diff --git a/src/evals/runner.ts b/packages/evals/src/runner.ts similarity index 87% rename from src/evals/runner.ts rename to packages/evals/src/runner.ts index c09e333b..7e6b1141 100644 --- a/src/evals/runner.ts +++ b/packages/evals/src/runner.ts @@ -1,16 +1,16 @@ import { basename, join, dirname } from 'node:path'; import { copyFileSync, cpSync, mkdirSync, mkdtempSync, rmSync, existsSync } from 'node:fs'; import { tmpdir } from 'node:os'; -import { execGitNonInteractive } from '../utils/exec.js'; -import { buildLocalEventContext } from '../cli/context.js'; -import { resolveSkillAsync } from '../skills/loader.js'; -import { runSkill } from '../sdk/runner.js'; +import { execGitNonInteractive } from '../../../src/utils/exec.js'; +import { buildLocalEventContext } from '../../../src/cli/context.js'; +import { resolveSkillAsync } from '../../../src/skills/loader.js'; +import { runSkill } from '../../../src/sdk/runner.js'; import { evalFixtureRepoPath, singleEvalFixtureSourceRepository } from './fixtures.js'; import { formatEvalId } from './names.js'; import type { EvalMeta } from './types.js'; -import type { Finding, SkillReport } from '../types/index.js'; -import type { FindingProcessingEvent } from '../sdk/runner.js'; -import type { RuntimeName } from '../sdk/runtimes/types.js'; +import type { Finding, SkillReport } from '../../../src/types/index.js'; +import type { FindingProcessingEvent } from '../../../src/sdk/runner.js'; +import type { RuntimeName } from '../../../src/sdk/runtimes/types.js'; export interface RunEvalOptions { /** Anthropic API key */ @@ -36,6 +36,12 @@ export interface EvalSkillRunResult { durationMs: number; } +function copyFixtureIntoRepo(srcPath: string, repoDir: string): void { + const destPath = join(repoDir, ...evalFixtureRepoPath(srcPath).split('/')); + mkdirSync(dirname(destPath), { recursive: true }); + copyFileSync(srcPath, destPath); +} + /** * Set up a temporary git repository for an eval scenario. * @@ -75,15 +81,17 @@ export function setupEvalRepo(meta: EvalMeta, log: (msg: string) => void): strin copyFileSync(meta.skillPath, join(skillDestDir, basename(meta.skillPath))); } + for (const srcPath of meta.supportingFilePaths ?? []) { + copyFixtureIntoRepo(srcPath, tmpDir); + } + git(['add', '.']); git(['commit', '-m', 'install eval skill']); git(['checkout', '-b', 'eval']); - // Copy fixture files, preserving their path under evals/fixtures. + // Copy fixture files, preserving their path under the eval package fixtures. for (const srcPath of meta.filePaths) { - const destPath = join(tmpDir, ...evalFixtureRepoPath(srcPath).split('/')); - mkdirSync(dirname(destPath), { recursive: true }); - copyFileSync(srcPath, destPath); + copyFixtureIntoRepo(srcPath, tmpDir); } git(['add', '.']); diff --git a/src/evals/scaffold.test.ts b/packages/evals/src/scaffold.test.ts similarity index 70% rename from src/evals/scaffold.test.ts rename to packages/evals/src/scaffold.test.ts index c4d4a053..c495b739 100644 --- a/src/evals/scaffold.test.ts +++ b/packages/evals/src/scaffold.test.ts @@ -94,6 +94,8 @@ describe('scaffoldEvalFromGitHubPullRequest', () => { }); expect(result.name).toBe('fix-project-access-bypass'); + expect(result.repository).toBe('getsentry/sentry'); + expect(result.sourceRef).toBe('base-sha'); expect(result.files.map((file) => file.sourcePath)).toEqual([ 'src/api.py', 'src/previous.py', @@ -106,11 +108,16 @@ describe('scaffoldEvalFromGitHubPullRequest', () => { 'fixtures/fix-project-access-bypass/github/getsentry/sentry/src/api.py', 'fixtures/fix-project-access-bypass/github/getsentry/sentry/src/previous.py', ]); + expect(result.supportingFiles.map((file) => file.fixturePath)).toEqual([ + 'fixtures/fix-project-access-bypass/github/getsentry/sentry/LICENSE', + ]); expect(readFileSync(join(tempDir, 'fixtures/fix-project-access-bypass/github/getsentry/sentry/src/api.py'), 'utf-8')) .toBe('src/api.py@base-sha\n'); expect(readFileSync(join(tempDir, 'fixtures/fix-project-access-bypass/github/getsentry/sentry/src/previous.py'), 'utf-8')) .toBe('src/previous.py@base-sha\n'); + expect(readFileSync(join(tempDir, 'fixtures/fix-project-access-bypass/github/getsentry/sentry/LICENSE'), 'utf-8')) + .toBe('LICENSE@base-sha\n'); const scenario = JSON.parse( readFileSync(join(tempDir, 'security-review/fix-project-access-bypass.json'), 'utf-8') @@ -121,12 +128,28 @@ describe('scaffoldEvalFromGitHubPullRequest', () => { 'fixtures/fix-project-access-bypass/github/getsentry/sentry/src/api.py', 'fixtures/fix-project-access-bypass/github/getsentry/sentry/src/previous.py', ], + supporting_files: [ + 'fixtures/fix-project-access-bypass/github/getsentry/sentry/LICENSE', + ], should_find: [{ finding: 'TODO: describe the vulnerability fixed by https://github.com/getsentry/sentry/pull/12345', }], notes: { source: 'https://github.com/getsentry/sentry/pull/12345', repository: 'getsentry/sentry', + source_ref: 'base-sha', + source_files: [ + { + fixturePath: 'fixtures/fix-project-access-bypass/github/getsentry/sentry/src/api.py', + sourcePath: 'src/api.py', + ref: 'base-sha', + }, + { + fixturePath: 'fixtures/fix-project-access-bypass/github/getsentry/sentry/src/previous.py', + sourcePath: 'src/previous.py', + ref: 'base-sha', + }, + ], side: 'base', skipped_files: [{ sourcePath: 'src/new.py', @@ -139,11 +162,49 @@ describe('scaffoldEvalFromGitHubPullRequest', () => { join(tempDir, 'security-review/fix-project-access-bypass.json') ); expect(validatedScenario.notes?.source).toBe('https://github.com/getsentry/sentry/pull/12345'); + expect(validatedScenario.notes?.source_ref).toBe('base-sha'); + expect(validatedScenario.notes?.source_files).toEqual([ + { + fixturePath: 'fixtures/fix-project-access-bypass/github/getsentry/sentry/src/api.py', + sourcePath: 'src/api.py', + ref: 'base-sha', + }, + { + fixturePath: 'fixtures/fix-project-access-bypass/github/getsentry/sentry/src/previous.py', + sourcePath: 'src/previous.py', + ref: 'base-sha', + }, + ]); expect(validatedScenario.notes?.side).toBe('base'); expect(validatedScenario.notes?.skipped_files).toEqual([{ sourcePath: 'src/new.py', reason: 'added file has no base-side content', }]); + expect(validatedScenario.supporting_files).toEqual([ + 'fixtures/fix-project-access-bypass/github/getsentry/sentry/LICENSE', + ]); + }); + + it('requires a source repository license when scaffolding fixtures', async () => { + const missingLicense = new Error('not found') as Error & { status: number }; + missingLicense.status = 404; + octokitMocks.getContent.mockImplementation(async ({ path, ref }: { path: string; ref: string }) => { + if (['LICENSE', 'LICENSE.md', 'LICENSE.txt', 'LICENCE', 'LICENCE.md', 'COPYING', 'COPYING.md'].includes(path)) { + throw missingLicense; + } + return { + data: { + type: 'file', + content: Buffer.from(`${path}@${ref}\n`).toString('base64'), + }, + }; + }); + + await expect(scaffoldEvalFromGitHubPullRequest({ + url: 'https://github.com/getsentry/sentry/pull/12345', + category: 'security-review', + evalsDir: tempDir, + })).rejects.toThrow('No root LICENSE file could be scaffolded'); }); it('rejects unsafe category and scenario names', async () => { diff --git a/src/evals/scaffold.ts b/packages/evals/src/scaffold.ts similarity index 79% rename from src/evals/scaffold.ts rename to packages/evals/src/scaffold.ts index bbb71990..9803647b 100644 --- a/src/evals/scaffold.ts +++ b/packages/evals/src/scaffold.ts @@ -33,8 +33,11 @@ export interface SkippedScaffoldFile { export interface ScaffoldedEval { name: string; + repository: string; + sourceRef: string; scenarioPath: string; files: ScaffoldedEvalFile[]; + supportingFiles: ScaffoldedEvalFile[]; skippedFiles: SkippedScaffoldFile[]; } @@ -50,6 +53,15 @@ interface PullFile { } const SAFE_PATH_SEGMENT = /^[a-zA-Z0-9._-]+$/; +const LICENSE_FILE_CANDIDATES = [ + 'LICENSE', + 'LICENSE.md', + 'LICENSE.txt', + 'LICENCE', + 'LICENCE.md', + 'COPYING', + 'COPYING.md', +]; function requireSafePathSegment(value: string, label: string): string { if (!SAFE_PATH_SEGMENT.test(value) || value === '.' || value === '..') { @@ -200,18 +212,35 @@ async function fetchFileContent( } } +async function fetchLicenseFileContent( + octokit: Octokit, + pull: GitHubPullRequestRef, + ref: string, +): Promise<(GitHubFileContent & { sourcePath: string }) | undefined> { + for (const sourcePath of LICENSE_FILE_CANDIDATES) { + const content = await fetchFileContent(octokit, pull, sourcePath, ref); + if (content) { + return { ...content, sourcePath }; + } + } + return undefined; +} + function scenarioJson(args: { title: string; body?: string | null; - files: string[]; + files: ScaffoldedEvalFile[]; + supportingFiles: string[]; url: string; repository: string; + sourceRef: string; side: PullRequestSide; skippedFiles: SkippedScaffoldFile[]; }): string { return `${JSON.stringify({ given: args.title, - files: args.files, + files: args.files.map((file) => file.fixturePath), + supporting_files: args.supportingFiles, should_find: [{ finding: `TODO: describe the vulnerability fixed by ${args.url}`, }], @@ -219,6 +248,12 @@ function scenarioJson(args: { notes: { source: args.url, repository: args.repository, + source_ref: args.sourceRef, + source_files: args.files.map((file) => ({ + fixturePath: file.fixturePath, + sourcePath: file.sourcePath, + ref: file.ref, + })), side: args.side, skipped_files: args.skippedFiles.length > 0 ? args.skippedFiles : undefined, body: args.body || undefined, @@ -254,8 +289,10 @@ export async function scaffoldEvalFromGitHubPullRequest( const scenarioPath = join(options.evalsDir, category, `${name}.json`); const seenFixturePaths = new Set(); const copiedFiles: ScaffoldedEvalFile[] = []; + const supportingFiles: ScaffoldedEvalFile[] = []; const skippedFiles: SkippedScaffoldFile[] = []; const contents: (ScaffoldedEvalFile & { content: string })[] = []; + const supportingContents: (ScaffoldedEvalFile & { content: string })[] = []; if (!options.force && existsSync(scenarioPath)) { throw new Error(`Eval scenario already exists: ${scenarioPath}`); @@ -297,10 +334,40 @@ export async function scaffoldEvalFromGitHubPullRequest( throw new Error(`No ${side}-side files could be scaffolded from ${options.url}`); } + const license = await fetchLicenseFileContent(octokit, pull, ref); + if (!license) { + throw new Error( + `No root LICENSE file could be scaffolded from ${pull.owner}/${pull.repo} at ${side} ref ${ref}` + ); + } + + if (!copiedFiles.some((file) => file.sourcePath === license.sourcePath)) { + const fixturePath = posix.join( + 'fixtures', + name, + fixturePathForSource(pull, license.sourcePath, seenFixturePaths), + ); + const fullFixturePath = fromEvalsPath(options.evalsDir, fixturePath); + if (!options.force && existsSync(fullFixturePath)) { + throw new Error(`Eval fixture already exists: ${fullFixturePath}`); + } + supportingContents.push({ + sourcePath: license.sourcePath, + fixturePath, + ref: license.ref, + content: license.content, + }); + supportingFiles.push({ + sourcePath: license.sourcePath, + fixturePath, + ref: license.ref, + }); + } + mkdirSync(fixtureDir, { recursive: true }); mkdirSync(join(options.evalsDir, category), { recursive: true }); - for (const content of contents) { + for (const content of [...contents, ...supportingContents]) { const fullFixturePath = fromEvalsPath(options.evalsDir, content.fixturePath); mkdirSync(dirname(fullFixturePath), { recursive: true }); writeFileSync( @@ -315,9 +382,11 @@ export async function scaffoldEvalFromGitHubPullRequest( scenarioJson({ title: pr.title, body: pr.body, - files: copiedFiles.map((file) => file.fixturePath), + files: copiedFiles, + supportingFiles: supportingFiles.map((file) => file.fixturePath), url: options.url, repository: `${pull.owner}/${pull.repo}`, + sourceRef: ref, side, skippedFiles, }), @@ -326,8 +395,11 @@ export async function scaffoldEvalFromGitHubPullRequest( return { name, + repository: `${pull.owner}/${pull.repo}`, + sourceRef: ref, scenarioPath, files: copiedFiles, + supportingFiles, skippedFiles, }; } diff --git a/src/evals/security-review.eval.ts b/packages/evals/src/security-review.eval.ts similarity index 94% rename from src/evals/security-review.eval.ts rename to packages/evals/src/security-review.eval.ts index 93d240f6..b120e72e 100644 --- a/src/evals/security-review.eval.ts +++ b/packages/evals/src/security-review.eval.ts @@ -11,7 +11,7 @@ import { formatEvalId, formatEvalTestName } from './names.js'; const apiKey = process.env['ANTHROPIC_API_KEY'] ?? ''; const evals = discoverEvalScenarios({ category: 'security-review', - skill: '../src/builtin-skills/security-review/SKILL.md', + skill: '../../src/builtin-skills/security-review/SKILL.md', runtime: 'pi', model: 'anthropic/claude-sonnet-4-6', }); diff --git a/src/evals/setup.ts b/packages/evals/src/setup.ts similarity index 90% rename from src/evals/setup.ts rename to packages/evals/src/setup.ts index 42c62476..22a7bb0e 100644 --- a/src/evals/setup.ts +++ b/packages/evals/src/setup.ts @@ -10,7 +10,7 @@ import { config as dotenvConfig } from 'dotenv'; * 3. .env.test (test-specific overrides) */ function loadTestEnv(): void { - const root = join(import.meta.dirname, '..', '..'); + const root = join(import.meta.dirname, '..', '..', '..'); const envFiles = ['.env', '.env.local', '.env.test']; diff --git a/src/evals/types.test.ts b/packages/evals/src/types.test.ts similarity index 100% rename from src/evals/types.test.ts rename to packages/evals/src/types.test.ts diff --git a/src/evals/types.ts b/packages/evals/src/types.ts similarity index 86% rename from src/evals/types.ts rename to packages/evals/src/types.ts index 2b032ff6..acd3c53b 100644 --- a/src/evals/types.ts +++ b/packages/evals/src/types.ts @@ -1,8 +1,8 @@ import { z } from 'zod'; -import { RuntimeNameSchema } from '../sdk/runtimes/types.js'; -import { SeveritySchema } from '../types/index.js'; -import type { Finding } from '../types/index.js'; -import type { RuntimeName } from '../sdk/runtimes/types.js'; +import { RuntimeNameSchema } from '../../../src/sdk/runtimes/types.js'; +import { SeveritySchema } from '../../../src/types/index.js'; +import type { Finding } from '../../../src/types/index.js'; +import type { RuntimeName } from '../../../src/sdk/runtimes/types.js'; /** Default model for eval skill execution and judging. */ export const DEFAULT_EVAL_MODEL = 'claude-sonnet-4-6'; @@ -31,6 +31,14 @@ export const EvalScenarioNotesSchema = z.object({ source: z.string().optional(), /** Source repository for scaffolded fixtures, for example "getsentry/sentry". */ repository: z.string().optional(), + /** Exact source commit SHA to checkout when reproducing the captured fixture state. */ + source_ref: z.string().optional(), + /** Mapping from checked-in fixture paths to source repository paths. */ + source_files: z.array(z.object({ + fixturePath: z.string(), + sourcePath: z.string(), + ref: z.string().optional(), + })).optional(), /** Which source side was captured when scaffolded from a PR. */ side: z.string().optional(), /** Files intentionally skipped while scaffolding, with maintainer-facing reasons. */ @@ -50,8 +58,10 @@ export const EvalScenarioSchema = z.object({ name: z.string(), /** What this eval tests (BDD "given" / description) */ given: z.string(), - /** Fixture files to use, relative to evals/ directory */ + /** Fixture files to use, relative to the eval package root */ files: z.array(z.string()).min(1), + /** Supporting fixture files copied into the eval repo without being part of the diff */ + supporting_files: z.array(z.string()).default([]), /** Model override for this specific scenario */ model: z.string().optional(), /** Runtime override for this specific scenario */ @@ -89,7 +99,7 @@ export type EvalScenarioFile = z.infer; * severity: high */ export const EvalFileSchema = z.object({ - /** Skill to run, relative to evals/ directory */ + /** Skill to run, relative to the eval package root */ skill: z.string(), /** Default runtime for all evals in this file */ runtime: RuntimeNameSchema.default(DEFAULT_EVAL_RUNTIME), @@ -117,6 +127,8 @@ export interface EvalMeta { skillPath: string; /** Resolved absolute paths to fixture files */ filePaths: string[]; + /** Resolved absolute paths to supporting files copied before the eval diff */ + supportingFilePaths?: string[]; /** Model to use for skill execution */ model: string; /** Runtime to use for skill execution */ diff --git a/src/evals/usage.ts b/packages/evals/src/usage.ts similarity index 93% rename from src/evals/usage.ts rename to packages/evals/src/usage.ts index a8db6901..1da10002 100644 --- a/src/evals/usage.ts +++ b/packages/evals/src/usage.ts @@ -1,5 +1,5 @@ import { normalizeMetadata, type UsageSummary } from 'vitest-evals/harness'; -import type { UsageStats } from '../types/index.js'; +import type { UsageStats } from '../../../src/types/index.js'; export interface EvalUsageSummaryInput { provider: string; diff --git a/src/evals/verify.eval.ts b/packages/evals/src/verify.eval.ts similarity index 95% rename from src/evals/verify.eval.ts rename to packages/evals/src/verify.eval.ts index 3fb8b849..929321dc 100644 --- a/src/evals/verify.eval.ts +++ b/packages/evals/src/verify.eval.ts @@ -10,7 +10,7 @@ import { formatEvalTestName } from './names.js'; const apiKey = process.env['ANTHROPIC_API_KEY'] ?? ''; const evals = discoverVerificationEvalScenarios({ category: 'verification', - skill: '../src/builtin-skills/security-review/SKILL.md', + skill: '../../src/builtin-skills/security-review/SKILL.md', runtime: 'pi', model: 'anthropic/claude-sonnet-4-6', }); diff --git a/packages/evals/src/verify.test.ts b/packages/evals/src/verify.test.ts new file mode 100644 index 00000000..bb678e01 --- /dev/null +++ b/packages/evals/src/verify.test.ts @@ -0,0 +1,98 @@ +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { resolveVerificationEvalMeta, runVerificationEval } from './verify.js'; +import type { EvalMeta } from './types.js'; +import type { Finding } from '../../../src/types/index.js'; + +const mocks = vi.hoisted(() => ({ + resolveSkillAsync: vi.fn(), + setupEvalRepo: vi.fn(), + verifyFindings: vi.fn(), +})); + +vi.mock('../../../src/skills/loader.js', () => ({ + resolveSkillAsync: mocks.resolveSkillAsync, +})); + +vi.mock('../../../src/sdk/verify.js', () => ({ + verifyFindings: mocks.verifyFindings, +})); + +vi.mock('./runner.js', () => ({ + setupEvalRepo: mocks.setupEvalRepo, +})); + +describe('resolveVerificationEvalMeta', () => { + let tempDir: string | undefined; + + beforeEach(() => { + vi.resetAllMocks(); + }); + + afterEach(() => { + if (tempDir) { + rmSync(tempDir, { recursive: true, force: true }); + tempDir = undefined; + } + }); + + it('includes the file path when verification scenario JSON is malformed', () => { + tempDir = mkdtempSync(join(tmpdir(), 'warden-verify-eval-')); + const scenarioPath = join(tempDir, 'bad.json'); + writeFileSync(scenarioPath, '{ bad json'); + + expect(() => resolveVerificationEvalMeta(scenarioPath, { + category: 'verification', + skill: 'skills/security-review.md', + baseDir: tempDir, + })).toThrow(`Invalid verification eval ${scenarioPath}`); + }); + + it('keeps supporting files when setting up verification eval repos', async () => { + tempDir = mkdtempSync(join(tmpdir(), 'warden-verify-eval-')); + const repoDir = join(tempDir, 'repo'); + const skillPath = join(tempDir, 'skills', 'security-review', 'SKILL.md'); + const fixturePath = join(tempDir, 'fixtures', 'endpoint.py'); + const licensePath = join(tempDir, 'fixtures', 'LICENSE'); + mkdirSync(join(tempDir, 'skills', 'security-review'), { recursive: true }); + mkdirSync(join(tempDir, 'fixtures'), { recursive: true }); + writeFileSync(skillPath, '---\nname: security-review\n---\n'); + writeFileSync(fixturePath, 'def endpoint():\n pass\n'); + writeFileSync(licensePath, 'source license\n'); + + const candidate: Finding = { + id: 'candidate', + severity: 'high', + title: 'candidate finding', + description: 'candidate description', + }; + mocks.setupEvalRepo.mockReturnValue(repoDir); + mocks.resolveSkillAsync.mockResolvedValue({ name: 'security-review' }); + mocks.verifyFindings.mockResolvedValue({ findings: [] }); + + await runVerificationEval({ + name: 'license-context', + category: 'verification', + skillName: 'security-review', + given: 'license context should be present', + skillPath, + filePaths: [fixturePath], + supportingFilePaths: [licensePath], + candidate, + expectedVerdict: 'reject', + model: 'anthropic/claude-sonnet-4-6', + runtime: 'pi', + }, { + apiKey: 'test-api-key', + }); + + expect(mocks.setupEvalRepo).toHaveBeenCalledWith( + expect.objectContaining>({ + supportingFilePaths: [licensePath], + }), + expect.any(Function), + ); + }); +}); diff --git a/src/evals/verify.ts b/packages/evals/src/verify.ts similarity index 92% rename from src/evals/verify.ts rename to packages/evals/src/verify.ts index 0264349b..989d628b 100644 --- a/src/evals/verify.ts +++ b/packages/evals/src/verify.ts @@ -9,10 +9,10 @@ import { toJsonValue, type Harness, } from 'vitest-evals/harness'; -import { resolveSkillAsync } from '../skills/loader.js'; -import { verifyFindings } from '../sdk/verify.js'; -import { FindingSchema, type Finding, type UsageStats } from '../types/index.js'; -import { RuntimeNameSchema, type RuntimeName } from '../sdk/runtimes/types.js'; +import { resolveSkillAsync } from '../../../src/skills/loader.js'; +import { verifyFindings } from '../../../src/sdk/verify.js'; +import { FindingSchema, type Finding, type UsageStats } from '../../../src/types/index.js'; +import { RuntimeNameSchema, type RuntimeName } from '../../../src/sdk/runtimes/types.js'; import { discoverEvalScenarioFiles, resolveEvalSkillName } from './index.js'; import { evalFixtureRepoPath, singleEvalFixtureSourceRepository } from './fixtures.js'; import { formatEvalId } from './names.js'; @@ -33,6 +33,7 @@ const VerificationScenarioFileSchema = z.object({ name: z.string().optional(), given: z.string(), files: z.array(z.string()).min(1), + supporting_files: z.array(z.string()).default([]), candidate: FindingSchema, expect: VerificationExpectationSchema, model: z.string().optional(), @@ -57,6 +58,7 @@ export interface VerificationEvalMeta { given: string; skillPath: string; filePaths: string[]; + supportingFilePaths?: string[]; candidate: Finding; expectedVerdict: z.infer; model: string; @@ -89,7 +91,7 @@ interface VerificationEvalRunResult { } function getEvalsDir(): string { - return join(import.meta.dirname, '..', '..', 'evals'); + return join(import.meta.dirname, '..'); } function loadVerificationScenario(filePath: string) { @@ -130,6 +132,13 @@ export function resolveVerificationEvalMeta( } return filePath; }); + const supportingFilePaths = scenario.supporting_files.map((file) => { + const filePath = join(evalsDir, file); + if (!existsSync(filePath)) { + throw new Error(`Verification eval supporting fixture not found for ${options.category}/${name}: ${file}`); + } + return filePath; + }); return { name, @@ -138,6 +147,7 @@ export function resolveVerificationEvalMeta( given: scenario.given, skillPath, filePaths, + supportingFilePaths, candidate: scenario.candidate, expectedVerdict: scenario.expect.verdict, model: scenario.model ?? options.model ?? DEFAULT_VERIFICATION_MODEL, @@ -172,6 +182,7 @@ export async function runVerificationEval( given: meta.given, skillPath: meta.skillPath, filePaths: meta.filePaths, + supportingFilePaths: meta.supportingFilePaths, model: meta.model, runtime: meta.runtime, should_find: [{ finding: meta.given, required: true }], diff --git a/packages/evals/tsconfig.json b/packages/evals/tsconfig.json new file mode 100644 index 00000000..2a0a218c --- /dev/null +++ b/packages/evals/tsconfig.json @@ -0,0 +1,20 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "NodeNext", + "moduleResolution": "NodeNext", + "lib": ["ES2022"], + "strict": true, + "jsx": "react-jsx", + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true, + "isolatedModules": true, + "noUncheckedIndexedAccess": true, + "noImplicitOverride": true, + "noPropertyAccessFromIndexSignature": true, + "types": ["node", "vitest"] + }, + "include": ["src/**/*", "scripts/**/*"] +} diff --git a/evals/verification/preprod-size-analysis-base-artifact-keep.json b/packages/evals/verification/preprod-size-analysis-base-artifact-keep.json similarity index 60% rename from evals/verification/preprod-size-analysis-base-artifact-keep.json rename to packages/evals/verification/preprod-size-analysis-base-artifact-keep.json index c8e924a4..84e97ebb 100644 --- a/evals/verification/preprod-size-analysis-base-artifact-keep.json +++ b/packages/evals/verification/preprod-size-analysis-base-artifact-keep.json @@ -17,5 +17,20 @@ }, "expect": { "verdict": "keep" + }, + "supporting_files": [ + "fixtures/sentry-preprod-size-analysis-base-artifact-access/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/blob/d12e82ea6554d6794b1a490369ec6baa75aeaa15/src/sentry/preprod/api/endpoints/public/organization_preprod_size_analysis.py", + "repository": "getsentry/sentry", + "source_ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15", + "source_files": [ + { + "fixturePath": "fixtures/sentry-preprod-size-analysis-base-artifact-access/organization_preprod_size_analysis.py", + "sourcePath": "src/sentry/preprod/api/endpoints/public/organization_preprod_size_analysis.py", + "ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15" + } + ] } } diff --git a/evals/verification/slack-signature-stub-reject.json b/packages/evals/verification/slack-signature-stub-reject.json similarity index 59% rename from evals/verification/slack-signature-stub-reject.json rename to packages/evals/verification/slack-signature-stub-reject.json index fa13f732..9fa875b3 100644 --- a/evals/verification/slack-signature-stub-reject.json +++ b/packages/evals/verification/slack-signature-stub-reject.json @@ -17,5 +17,20 @@ }, "expect": { "verdict": "reject" + }, + "supporting_files": [ + "fixtures/sentry-slack-options-load-unscoped-group/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/pull/114185", + "repository": "getsentry/sentry", + "source_ref": "b0c31079134ae8f755161842361ae28ace2baef1", + "source_files": [ + { + "fixturePath": "fixtures/sentry-slack-options-load-unscoped-group/options_load.py", + "sourcePath": "src/sentry/integrations/slack/webhooks/options_load.py", + "ref": "b0c31079134ae8f755161842361ae28ace2baef1" + } + ] } } diff --git a/evals/verification/workflow-open-periods-project-access-keep.json b/packages/evals/verification/workflow-open-periods-project-access-keep.json similarity index 61% rename from evals/verification/workflow-open-periods-project-access-keep.json rename to packages/evals/verification/workflow-open-periods-project-access-keep.json index c5f49014..92b38bd7 100644 --- a/evals/verification/workflow-open-periods-project-access-keep.json +++ b/packages/evals/verification/workflow-open-periods-project-access-keep.json @@ -17,5 +17,20 @@ }, "expect": { "verdict": "keep" + }, + "supporting_files": [ + "fixtures/sentry-workflow-open-periods-project-access/LICENSE.md" + ], + "notes": { + "source": "https://github.com/getsentry/sentry/blob/d12e82ea6554d6794b1a490369ec6baa75aeaa15/src/sentry/workflow_engine/endpoints/organization_open_periods.py", + "repository": "getsentry/sentry", + "source_ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15", + "source_files": [ + { + "fixturePath": "fixtures/sentry-workflow-open-periods-project-access/organization_open_periods.py", + "sourcePath": "src/sentry/workflow_engine/endpoints/organization_open_periods.py", + "ref": "d12e82ea6554d6794b1a490369ec6baa75aeaa15" + } + ] } } diff --git a/vitest.evals.config.ts b/packages/evals/vitest.config.ts similarity index 88% rename from vitest.evals.config.ts rename to packages/evals/vitest.config.ts index cb0a4039..f339dfb3 100644 --- a/vitest.evals.config.ts +++ b/packages/evals/vitest.config.ts @@ -6,10 +6,10 @@ const junitOutputFile = process.env['VITEST_EVALS_JUNIT']; export default defineConfig({ test: { // Only run eval suites. - include: ['src/evals/**/*.eval.ts'], + include: ['src/**/*.eval.ts'], exclude: ['**/node_modules/**', '**/dist/**'], // Load .env, .env.local, .env.test for API keys - setupFiles: ['./src/evals/setup.ts'], + setupFiles: ['./src/setup.ts'], reporters: [ ['vitest-evals/reporter', { toolDetails: false }], ...(jsonOutputFile ? [['json']] : []), diff --git a/packages/evals/vitest.unit.config.ts b/packages/evals/vitest.unit.config.ts new file mode 100644 index 00000000..e357d9a6 --- /dev/null +++ b/packages/evals/vitest.unit.config.ts @@ -0,0 +1,8 @@ +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + include: ['src/**/*.test.ts'], + exclude: ['**/node_modules/**', '**/dist/**'], + }, +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 79e40aca..2aaca0aa 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -105,9 +105,6 @@ importers: vitest: specifier: ^4.1.6 version: 4.1.6(@opentelemetry/api@1.9.0)(@types/node@25.0.10)(@vitest/coverage-v8@4.1.6)(vite@7.3.1(@types/node@25.0.10)(jiti@2.7.0)(tsx@4.21.0)(yaml@2.8.3)) - vitest-evals: - specifier: 0.9.0-beta.3 - version: 0.9.0-beta.3(tinyrainbow@3.0.3)(vitest@4.1.6)(zod@4.3.6) packages/docs: dependencies: @@ -140,6 +137,37 @@ importers: specifier: ^2.0.0 version: 2.2.13(@aws-sdk/credential-provider-web-identity@3.972.38) + packages/evals: + dependencies: + '@octokit/rest': + specifier: ^22.0.1 + version: 22.0.1 + dotenv: + specifier: ^17.2.3 + version: 17.2.3 + vitest-evals: + specifier: 0.9.0-beta.3 + version: 0.9.0-beta.3(tinyrainbow@3.1.0)(vitest@4.1.6)(zod@4.3.6) + yaml: + specifier: ^2.8.2 + version: 2.8.3 + zod: + specifier: ^4.3.6 + version: 4.3.6 + devDependencies: + '@types/node': + specifier: ^25.0.10 + version: 25.0.10 + tsx: + specifier: ^4.19.0 + version: 4.21.0 + typescript: + specifier: ^5.9.3 + version: 5.9.3 + vitest: + specifier: ^4.1.6 + version: 4.1.6(@opentelemetry/api@1.9.0)(@types/node@25.0.10)(@vitest/coverage-v8@4.1.6)(vite@7.3.1(@types/node@25.0.10)(jiti@2.7.0)(tsx@4.21.0)(yaml@2.8.3)) + packages: '@alcalzone/ansi-tokenize@0.1.3': @@ -9118,9 +9146,9 @@ snapshots: optionalDependencies: vite: 6.4.1(@types/node@25.0.10)(jiti@2.7.0)(tsx@4.21.0)(yaml@2.8.3) - vitest-evals@0.9.0-beta.3(tinyrainbow@3.0.3)(vitest@4.1.6)(zod@4.3.6): + vitest-evals@0.9.0-beta.3(tinyrainbow@3.1.0)(vitest@4.1.6)(zod@4.3.6): dependencies: - tinyrainbow: 3.0.3 + tinyrainbow: 3.1.0 vitest: 4.1.6(@opentelemetry/api@1.9.0)(@types/node@25.0.10)(@vitest/coverage-v8@4.1.6)(vite@7.3.1(@types/node@25.0.10)(jiti@2.7.0)(tsx@4.21.0)(yaml@2.8.3)) optionalDependencies: zod: 4.3.6 diff --git a/scripts/update-pricing.ts b/scripts/update-pricing.ts index d704a2e4..d60490c7 100644 --- a/scripts/update-pricing.ts +++ b/scripts/update-pricing.ts @@ -52,7 +52,7 @@ const PRICE_FALLBACKS: Record = { 'claude-sonnet-4-6': 'claude-sonnet-4-5', }; -function hasPrice(record: ModelPricingRecord | undefined): boolean { +function hasPrice(record: ModelPricingRecord | undefined): record is ModelPricingRecord { return record !== undefined && ( record.inputPerMTok > 0 || record.outputPerMTok > 0 || @@ -64,10 +64,11 @@ function hasPrice(record: ModelPricingRecord | undefined): boolean { function fillPricingFallbacks(pricing: Record): void { for (const [target, source] of Object.entries(PRICE_FALLBACKS)) { - if (hasPrice(pricing[target]) || !hasPrice(pricing[source])) { + const sourcePricing = pricing[source]; + if (hasPrice(pricing[target]) || !hasPrice(sourcePricing)) { continue; } - pricing[target] = { ...pricing[source]! }; + pricing[target] = { ...sourcePricing }; } } diff --git a/src/evals/verify.test.ts b/src/evals/verify.test.ts deleted file mode 100644 index ee4bfcee..00000000 --- a/src/evals/verify.test.ts +++ /dev/null @@ -1,28 +0,0 @@ -import { mkdtempSync, rmSync, writeFileSync } from 'node:fs'; -import { tmpdir } from 'node:os'; -import { join } from 'node:path'; -import { afterEach, describe, expect, it } from 'vitest'; -import { resolveVerificationEvalMeta } from './verify.js'; - -describe('resolveVerificationEvalMeta', () => { - let tempDir: string | undefined; - - afterEach(() => { - if (tempDir) { - rmSync(tempDir, { recursive: true, force: true }); - tempDir = undefined; - } - }); - - it('includes the file path when verification scenario JSON is malformed', () => { - tempDir = mkdtempSync(join(tmpdir(), 'warden-verify-eval-')); - const scenarioPath = join(tempDir, 'bad.json'); - writeFileSync(scenarioPath, '{ bad json'); - - expect(() => resolveVerificationEvalMeta(scenarioPath, { - category: 'verification', - skill: 'skills/security-review.md', - baseDir: tempDir, - })).toThrow(`Invalid verification eval ${scenarioPath}`); - }); -}); diff --git a/tsconfig.build.json b/tsconfig.build.json index 37105381..6e0067cd 100644 --- a/tsconfig.build.json +++ b/tsconfig.build.json @@ -5,9 +5,7 @@ "dist", "src/**/*.test.ts", "src/**/*.test.tsx", - "src/**/*.eval.ts", "src/**/*.integration.test.ts", - "src/**/*.integration.test.tsx", - "src/evals/**" + "src/**/*.integration.test.tsx" ] } diff --git a/vitest.integration.config.ts b/vitest.integration.config.ts index 6be302f1..ed0ee42c 100644 --- a/vitest.integration.config.ts +++ b/vitest.integration.config.ts @@ -5,7 +5,6 @@ export default defineConfig({ // Only run integration tests include: ['**/*.integration.test.ts'], exclude: ['**/node_modules/**', '**/dist/**'], - // Load .env, .env.local, .env.test - setupFiles: ['./src/evals/setup.ts'], + passWithNoTests: true, }, }); diff --git a/warden.toml b/warden.toml index 3e32b997..f021ebd8 100644 --- a/warden.toml +++ b/warden.toml @@ -7,8 +7,8 @@ model = "anthropic/claude-sonnet-4-6" failOn = "high" # Show annotations for medium+ severity findings reportOn = "medium" -# Exclude build output from all skills -ignorePaths = ["dist/**", "evals/**"] +# Exclude build output and internal eval fixtures from all skills +ignorePaths = ["dist/**", "packages/evals/**"] [[skills]] name = "security-review"