From 2fb34e2bd0c0503cc003dc83f53f114e0caebdba Mon Sep 17 00:00:00 2001 From: Henry Avila Date: Wed, 3 Jun 2026 22:09:54 -0300 Subject: [PATCH 1/2] feat(analyze): exclude vendor & generated dirs from broad file scope MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --all and path() on a directory scanned EVERY .php under the working dir, including vendor/ — on a real Laravel app (Arch) --emit produced 55k+ units and a 1.5GB work order, dominated by dependency code. FileScopeResolver now excludes vendor, node_modules, storage and bootstrap/cache from broad scans (relative to each scanned root, so an explicit --path=vendor/foo still works). --- src/Analyze/FileScopeResolver.php | 12 ++++++- tests/Unit/Analyze/FileScopeResolverTest.php | 33 ++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/src/Analyze/FileScopeResolver.php b/src/Analyze/FileScopeResolver.php index e9a1bf7..2f23130 100644 --- a/src/Analyze/FileScopeResolver.php +++ b/src/Analyze/FileScopeResolver.php @@ -19,6 +19,15 @@ */ final class FileScopeResolver { + /** + * Directories never reviewed by a broad scan — dependencies and generated + * output. Excluded relative to each scanned root, so an explicit + * `--path=vendor/foo` still works; only `--all` / directory scans skip them. + * + * @var list + */ + private const EXCLUDED_DIRS = ['vendor', 'node_modules', 'storage', 'bootstrap/cache']; + public function __construct( private readonly CommandExecutor $executor, private readonly string $workingDirectory, @@ -110,7 +119,8 @@ private function toExistingPhpFiles(array $files): array private function phpFilesIn(string $dir): array { $files = []; - foreach (Finder::create()->files()->in($dir)->name('*.php')->sortByName() as $file) { + $finder = Finder::create()->files()->in($dir)->name('*.php')->exclude(self::EXCLUDED_DIRS)->sortByName(); + foreach ($finder as $file) { $files[] = $file->getRealPath() ?: $file->getPathname(); } diff --git a/tests/Unit/Analyze/FileScopeResolverTest.php b/tests/Unit/Analyze/FileScopeResolverTest.php index 6d2e24b..b90dd50 100644 --- a/tests/Unit/Analyze/FileScopeResolverTest.php +++ b/tests/Unit/Analyze/FileScopeResolverTest.php @@ -123,3 +123,36 @@ function fsrExecutor(string $gitOutput): FakeCommandExecutor fsrCleanup($base); } }); + +it('excludes vendor, node_modules and generated dirs from --all', function (): void { + $base = fsrBase(); + + try { + fsrWrite($base, 'app/Real.php'); + fsrWrite($base, 'vendor/acme/lib/Dep.php'); + fsrWrite($base, 'node_modules/pkg/index.php'); + fsrWrite($base, 'storage/framework/views/cached.php'); + fsrWrite($base, 'bootstrap/cache/packages.php'); + $resolver = new FileScopeResolver(fsrExecutor(''), $base); + + $all = $resolver->all(); + + expect($all)->toHaveCount(1) + ->and($all[0])->toContain('Real.php'); + } finally { + fsrCleanup($base); + } +}); + +it('still scans an explicitly-requested vendor subtree (exclusion is for broad scans)', function (): void { + $base = fsrBase(); + + try { + fsrWrite($base, 'vendor/acme/lib/Dep.php'); + $resolver = new FileScopeResolver(fsrExecutor(''), $base); + + expect($resolver->path('vendor/acme/lib'))->toHaveCount(1); + } finally { + fsrCleanup($base); + } +}); From b6a78d6306a57d0a7413dd193844e96475d72ced Mon Sep 17 00:00:00 2001 From: Henry Avila Date: Wed, 3 Jun 2026 22:09:54 -0300 Subject: [PATCH 2/2] feat(analyze): fall back to context-emit when no LLM driver is configured MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit context-emit (--emit → /codeguard-review → --ingest) is the supported, subscription-based transport; an API driver is an optional seam that ships empty (NullLlmClient). So when no driver is bound, codeguard:analyze now informs and emits a work order instead of printing a dead-end 'not configured' notice and adjudicating nothing. The synchronous LlmClient path runs only when a real driver replaces NullLlmClient. Also adds a --ingest file-not-found error test. --- src/Commands/CodeguardAnalyzeCommand.php | 24 +++++++---- tests/Feature/CodeguardAnalyzeCommandTest.php | 43 +++++++++++++++---- 2 files changed, 49 insertions(+), 18 deletions(-) diff --git a/src/Commands/CodeguardAnalyzeCommand.php b/src/Commands/CodeguardAnalyzeCommand.php index 48e7b71..b639303 100644 --- a/src/Commands/CodeguardAnalyzeCommand.php +++ b/src/Commands/CodeguardAnalyzeCommand.php @@ -8,6 +8,7 @@ use Henryavila\Codeguard\Analyze\AnalyzeResult; use Henryavila\Codeguard\Analyze\AnalyzeRunner; use Henryavila\Codeguard\Analyze\FileScopeResolver; +use Henryavila\Codeguard\Analyze\LlmClient; use Henryavila\Codeguard\Analyze\Severity; use Henryavila\Codeguard\Telemetry\EventName; use Henryavila\Codeguard\Telemetry\EventStatus; @@ -40,6 +41,7 @@ public function handle( FileScopeResolver $scope, Recorder $recorder, AnalyzeBaseline $baseline, + LlmClient $llm, ): int { if ((bool) $this->option('emit')) { return $this->handleEmit($config, $runner, $scope); @@ -50,6 +52,19 @@ public function handle( return $this->handleIngest($config, $runner, $scope, $recorder, $baseline, $ingest); } + // No real adjudicating driver → context-emit is the supported transport. + // Inform and fall back to writing a work order for /codeguard-review, + // instead of a dead-end notice. The synchronous path below runs only + // when a driver (e.g. an API client) is bound in place of NullLlmClient. + if (! $llm->isConfigured()) { + $this->components->info( + 'No LLM driver configured — emitting a work order for context-emit review ' + .'(run /codeguard-review, or --ingest its findings). Uses your Claude Code subscription, no metered API.', + ); + + return $this->handleEmit($config, $runner, $scope); + } + $context = $this->resolveContext(); $failOn = $this->resolveFailOn(); @@ -67,15 +82,6 @@ public function handle( $files = $this->resolveFiles($scope); $result = $runner->run($files, $config->enabledPresets, $failOn, $context); - if (! $result->adjudicated) { - $this->components->warn( - 'LLM driver not configured — set config(\'codeguard.patterns.driver\'). No patterns adjudicated.', - ); - $this->emitCommandEnd($recorder, self::SUCCESS, $startHrtime); - - return self::SUCCESS; - } - $this->maybeAccept($baseline, $result); $this->renderFindings($result); diff --git a/tests/Feature/CodeguardAnalyzeCommandTest.php b/tests/Feature/CodeguardAnalyzeCommandTest.php index cb023f9..0cc9828 100644 --- a/tests/Feature/CodeguardAnalyzeCommandTest.php +++ b/tests/Feature/CodeguardAnalyzeCommandTest.php @@ -189,25 +189,32 @@ function analyzeReadEvents(string $path): array } }); -it('does not adjudicate or fake a clean repo when no driver is configured', function (): void { +it('falls back to context-emit (work order) when no LLM driver is configured', function (): void { $telemetry = analyzeTelemetryPath(); $file = analyzeFixtureFile(); + $out = sys_get_temp_dir().DIRECTORY_SEPARATOR.'codeguard-fallback-'.uniqid().'.json'; $fake = new FakeLlmClient(analyzeFindingHandler('critical'), configured: false); analyzeBind($telemetry, $fake); try { - $exit = Artisan::call('codeguard:analyze', ['--path' => $file, '--context' => 'ci']); - - $events = analyzeReadEvents($telemetry); - $analyzeEnded = array_values(array_filter( - $events, - static fn (array $event): bool => ($event['event'] ?? '') === 'analyze.ended', - )); + $exit = Artisan::call('codeguard:analyze', ['--path' => $file, '--out' => $out, '--context' => 'ci']); + $output = Artisan::output(); + // No synchronous adjudication is attempted, and instead of a dead-end + // notice the command emits a work order for the context-emit review path. expect($exit)->toBe(0) ->and($fake->calls)->toHaveCount(0) - ->and($analyzeEnded[0]['status'] ?? null)->toBe('skip'); + ->and(is_file($out))->toBeTrue() + ->and($output)->toContain('context-emit'); + + $decoded = json_decode((string) file_get_contents($out), true); + $units = (is_array($decoded) && is_array($decoded['units'] ?? null)) ? $decoded['units'] : []; + + expect($units)->toHaveCount(1); } finally { + if (is_file($out)) { + unlink($out); + } analyzeCleanup($file, $telemetry); } }); @@ -236,6 +243,24 @@ function analyzeReadEvents(string $path): array } }); +it('fails with a clear error when the --ingest findings file does not exist', function (): void { + $telemetry = analyzeTelemetryPath(); + $file = analyzeFixtureFile(); + $missing = sys_get_temp_dir().DIRECTORY_SEPARATOR.'codeguard-missing-'.uniqid().'.json'; + $fake = new FakeLlmClient(fn (AnalysisUnit $unit): array => []); + analyzeBind($telemetry, $fake); + + try { + $exit = Artisan::call('codeguard:analyze', ['--ingest' => $missing, '--path' => $file, '--context' => 'ci']); + + expect($exit)->toBe(1) + ->and(Artisan::output())->toContain('not found') + ->and($fake->calls)->toHaveCount(0); + } finally { + analyzeCleanup($file, $telemetry); + } +}); + it('emits a work order JSON with units and prompt-ready patterns', function (): void { $file = analyzeFixtureFile(); $out = sys_get_temp_dir().DIRECTORY_SEPARATOR.'codeguard-workorder-'.uniqid().'.json';