From 0d185744215d4acc20b0802b3b59c765807b20dc Mon Sep 17 00:00:00 2001 From: Leo Di Donato <120051+leodido@users.noreply.github.com> Date: Mon, 10 Nov 2025 15:33:31 +0000 Subject: [PATCH 1/2] Avoid re-uploading downloaded artifacts in sign-cache The sign-cache command was re-uploading ALL artifacts, including those that were already in the remote cache. This caused issues: 1. Unnecessary network traffic and storage costs 2. Potential digest mismatches if artifacts were re-compressed 3. Overwrites existing artifacts that may have been downloaded This fix adds a HasFile() method to the RemoteCache interface to check if an artifact already exists before uploading. The sign-cache command now: - Checks if artifact exists in remote cache - Skips upload if it already exists (downloaded artifacts) - Uploads only new artifacts (locally built) - Always uploads attestation files (they're new) This aligns with the build command behavior, which only uploads newly built packages via GetNewPackagesForCache(). Changes: - Add HasFile() method to RemoteCache interface - Implement HasFile() in S3Cache, GSUtilCache, NoRemoteCache - Update UploadArtifactWithAttestation to check before uploading - Add tests for skip/upload behavior - Update wrapper types (pushOnly/pullOnly) to implement HasFile() Benefits: - Prevents re-uploading downloaded artifacts - Reduces network traffic and costs - Avoids potential digest mismatches - Maintains SLSA attestation integrity Co-authored-by: Ona --- cmd/build.go | 8 +++ pkg/leeway/cache/remote/gsutil.go | 21 ++++++++ pkg/leeway/cache/remote/no_cache.go | 5 ++ pkg/leeway/cache/remote/s3.go | 24 +++++++++ pkg/leeway/cache/types.go | 4 ++ pkg/leeway/signing/attestation_test.go | 4 ++ pkg/leeway/signing/upload.go | 29 +++++++++-- pkg/leeway/signing/upload_test.go | 68 ++++++++++++++++++++++++++ 8 files changed, 158 insertions(+), 5 deletions(-) diff --git a/cmd/build.go b/cmd/build.go index d5d4f88..81627a7 100644 --- a/cmd/build.go +++ b/cmd/build.go @@ -414,6 +414,10 @@ func (c *pushOnlyRemoteCache) UploadFile(ctx context.Context, filePath string, k return c.C.UploadFile(ctx, filePath, key) } +func (c *pushOnlyRemoteCache) HasFile(ctx context.Context, key string) (bool, error) { + return c.C.HasFile(ctx, key) +} + type pullOnlyRemoteCache struct { C cache.RemoteCache } @@ -434,6 +438,10 @@ func (c *pullOnlyRemoteCache) UploadFile(ctx context.Context, filePath string, k return nil } +func (c *pullOnlyRemoteCache) HasFile(ctx context.Context, key string) (bool, error) { + return c.C.HasFile(ctx, key) +} + func getRemoteCacheFromEnv() cache.RemoteCache { return getRemoteCache(nil) } diff --git a/pkg/leeway/cache/remote/gsutil.go b/pkg/leeway/cache/remote/gsutil.go index db8adb7..98ac6ba 100644 --- a/pkg/leeway/cache/remote/gsutil.go +++ b/pkg/leeway/cache/remote/gsutil.go @@ -192,6 +192,27 @@ func (rs *GSUtilCache) UploadFile(ctx context.Context, filePath string, key stri return nil } +// HasFile checks if a file exists in the remote cache with the given key +func (rs *GSUtilCache) HasFile(ctx context.Context, key string) (bool, error) { + target := fmt.Sprintf("gs://%s/%s", rs.BucketName, key) + + cmd := exec.CommandContext(ctx, "gsutil", "stat", target) + if err := cmd.Run(); err != nil { + // gsutil stat returns non-zero exit code if file doesn't exist + if exitErr, ok := err.(*exec.ExitError); ok { + if status, ok := exitErr.Sys().(syscall.WaitStatus); ok { + if status.ExitStatus() == 1 { + // File doesn't exist + return false, nil + } + } + } + return false, fmt.Errorf("failed to check if file exists at %s: %w", target, err) + } + + return true, nil +} + func parseGSUtilStatOutput(reader io.Reader) map[string]struct{} { exists := make(map[string]struct{}) scanner := bufio.NewScanner(reader) diff --git a/pkg/leeway/cache/remote/no_cache.go b/pkg/leeway/cache/remote/no_cache.go index a1187d2..1ef925b 100644 --- a/pkg/leeway/cache/remote/no_cache.go +++ b/pkg/leeway/cache/remote/no_cache.go @@ -33,3 +33,8 @@ func (NoRemoteCache) Upload(ctx context.Context, src cache.LocalCache, pkgs []ca func (NoRemoteCache) UploadFile(ctx context.Context, filePath string, key string) error { return nil } + +// HasFile checks if a file exists in the remote cache with the given key +func (NoRemoteCache) HasFile(ctx context.Context, key string) (bool, error) { + return false, nil +} diff --git a/pkg/leeway/cache/remote/s3.go b/pkg/leeway/cache/remote/s3.go index 0e48b04..e25c21d 100644 --- a/pkg/leeway/cache/remote/s3.go +++ b/pkg/leeway/cache/remote/s3.go @@ -964,6 +964,30 @@ func (s *S3Cache) UploadFile(ctx context.Context, filePath string, key string) e return nil } +// HasFile checks if a file exists in the remote cache with the given key +func (s *S3Cache) HasFile(ctx context.Context, key string) (bool, error) { + // Wait for rate limiter permission + if err := s.waitForRateLimit(ctx); err != nil { + log.WithError(err).Debug("Rate limiter error during file existence check") + return false, fmt.Errorf("rate limiter error: %w", err) + } + + // Use timeout for the check operation + timeoutCtx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + + exists, err := s.storage.HasObject(timeoutCtx, key) + if err != nil { + log.WithFields(log.Fields{ + "key": key, + "error": err, + }).Debug("failed to check file existence in remote cache") + return false, fmt.Errorf("failed to check if file exists: %w", err) + } + + return exists, nil +} + // s3ClientAPI is a subset of the S3 client interface we need type s3ClientAPI interface { HeadObject(ctx context.Context, params *s3.HeadObjectInput, optFns ...func(*s3.Options)) (*s3.HeadObjectOutput, error) diff --git a/pkg/leeway/cache/types.go b/pkg/leeway/cache/types.go index b52c80e..1e2853f 100644 --- a/pkg/leeway/cache/types.go +++ b/pkg/leeway/cache/types.go @@ -56,6 +56,10 @@ type RemoteCache interface { // UploadFile uploads a single file to the remote cache with the given key // This is useful for uploading individual files like attestations without Package abstraction UploadFile(ctx context.Context, filePath string, key string) error + + // HasFile checks if a file exists in the remote cache with the given key + // This is useful for checking if artifacts need to be uploaded + HasFile(ctx context.Context, key string) (bool, error) } // ObjectStorage represents a generic object storage interface diff --git a/pkg/leeway/signing/attestation_test.go b/pkg/leeway/signing/attestation_test.go index b121231..0fc811d 100644 --- a/pkg/leeway/signing/attestation_test.go +++ b/pkg/leeway/signing/attestation_test.go @@ -914,6 +914,10 @@ func (m *mockRemoteCache) UploadFile(ctx context.Context, filePath string, key s return nil } +func (m *mockRemoteCache) HasFile(ctx context.Context, key string) (bool, error) { + return false, nil +} + // TestGetEnvOrDefault tests the environment variable helper // TestValidateSigstoreEnvironment tests Sigstore environment validation func TestValidateSigstoreEnvironment(t *testing.T) { diff --git a/pkg/leeway/signing/upload.go b/pkg/leeway/signing/upload.go index f0d0809..fe28976 100644 --- a/pkg/leeway/signing/upload.go +++ b/pkg/leeway/signing/upload.go @@ -86,9 +86,28 @@ func (u *ArtifactUploader) UploadArtifactWithAttestation(ctx context.Context, ar return fmt.Errorf("context cancelled before upload: %w", err) } - // Upload artifact first - if err := u.remoteCache.UploadFile(ctx, artifactPath, artifactKey); err != nil { - return fmt.Errorf("failed to upload artifact: %w", err) + // Check if artifact already exists in remote cache + // Only upload if it doesn't exist (avoid re-uploading downloaded artifacts) + artifactExists, err := u.remoteCache.HasFile(ctx, artifactKey) + if err != nil { + log.WithError(err).WithField("key", artifactKey).Warn("Failed to check if artifact exists, will attempt upload") + artifactExists = false // Assume it doesn't exist and try to upload + } + + if artifactExists { + log.WithFields(log.Fields{ + "artifact": artifactPath, + "artifact_key": artifactKey, + }).Info("Artifact already exists in remote cache, skipping upload") + } else { + // Upload artifact only if it doesn't exist + if err := u.remoteCache.UploadFile(ctx, artifactPath, artifactKey); err != nil { + return fmt.Errorf("failed to upload artifact: %w", err) + } + log.WithFields(log.Fields{ + "artifact": artifactPath, + "artifact_key": artifactKey, + }).Info("Successfully uploaded artifact to remote cache") } // Check context between uploads @@ -96,7 +115,7 @@ func (u *ArtifactUploader) UploadArtifactWithAttestation(ctx context.Context, ar return fmt.Errorf("context cancelled between uploads: %w", err) } - // Upload attestation file + // Always upload attestation file (it's new) if err := u.remoteCache.UploadFile(ctx, attestationPath, attestationKey); err != nil { return fmt.Errorf("failed to upload .att file: %w", err) } @@ -105,7 +124,7 @@ func (u *ArtifactUploader) UploadArtifactWithAttestation(ctx context.Context, ar "artifact": artifactPath, "artifact_key": artifactKey, "att_key": attestationKey, - }).Info("Successfully uploaded artifact and .att file") + }).Info("Successfully uploaded attestation file") return nil } diff --git a/pkg/leeway/signing/upload_test.go b/pkg/leeway/signing/upload_test.go index ab28d62..94c2e0c 100644 --- a/pkg/leeway/signing/upload_test.go +++ b/pkg/leeway/signing/upload_test.go @@ -83,6 +83,18 @@ func (m *mockRemoteCacheUpload) UploadFile(ctx context.Context, filePath string, return nil } +func (m *mockRemoteCacheUpload) HasFile(ctx context.Context, key string) (bool, error) { + m.mu.Lock() + defer m.mu.Unlock() + + if m.uploadedFiles == nil { + return false, nil + } + + _, exists := m.uploadedFiles[key] + return exists, nil +} + // Test helper to create a test artifact file func createTestArtifactFile(t *testing.T, dir, name, content string) string { path := filepath.Join(dir, name) @@ -361,3 +373,59 @@ func TestArtifactUploader_ConcurrentUploads(t *testing.T) { assert.NoError(t, err) } } + +// TestArtifactUploader_SkipsExistingArtifacts tests that existing artifacts are not re-uploaded +func TestArtifactUploader_SkipsExistingArtifacts(t *testing.T) { + tmpDir := t.TempDir() + artifactPath := filepath.Join(tmpDir, "existing.tar.gz") + artifactContent := []byte("existing artifact content") + err := os.WriteFile(artifactPath, artifactContent, 0644) + require.NoError(t, err) + + mockCache := &mockRemoteCacheUpload{ + uploadedFiles: make(map[string][]byte), + } + + // Pre-populate the cache with the artifact (simulating it already exists) + mockCache.uploadedFiles["existing.tar.gz"] = artifactContent + + uploader := NewArtifactUploader(mockCache) + attestation := []byte(`{"test":"attestation"}`) + + err = uploader.UploadArtifactWithAttestation(context.Background(), artifactPath, attestation) + require.NoError(t, err) + + // Verify that the artifact was NOT re-uploaded (content should be unchanged) + assert.Equal(t, artifactContent, mockCache.uploadedFiles["existing.tar.gz"], "Artifact should not be re-uploaded") + + // Verify that the attestation WAS uploaded + assert.Contains(t, mockCache.uploadedFiles, "existing.tar.gz.att", "Attestation should be uploaded") + assert.Equal(t, attestation, mockCache.uploadedFiles["existing.tar.gz.att"], "Attestation content should match") +} + +// TestArtifactUploader_UploadsNewArtifacts tests that new artifacts are uploaded +func TestArtifactUploader_UploadsNewArtifacts(t *testing.T) { + tmpDir := t.TempDir() + artifactPath := filepath.Join(tmpDir, "new.tar.gz") + artifactContent := []byte("new artifact content") + err := os.WriteFile(artifactPath, artifactContent, 0644) + require.NoError(t, err) + + mockCache := &mockRemoteCacheUpload{ + uploadedFiles: make(map[string][]byte), + } + + uploader := NewArtifactUploader(mockCache) + attestation := []byte(`{"test":"attestation"}`) + + err = uploader.UploadArtifactWithAttestation(context.Background(), artifactPath, attestation) + require.NoError(t, err) + + // Verify that the artifact WAS uploaded + assert.Contains(t, mockCache.uploadedFiles, "new.tar.gz", "New artifact should be uploaded") + assert.Equal(t, artifactContent, mockCache.uploadedFiles["new.tar.gz"], "Artifact content should match") + + // Verify that the attestation WAS uploaded + assert.Contains(t, mockCache.uploadedFiles, "new.tar.gz.att", "Attestation should be uploaded") + assert.Equal(t, attestation, mockCache.uploadedFiles["new.tar.gz.att"], "Attestation content should match") +} From 23a00f7c9c903d666a256b3c10930c873b5d36af Mon Sep 17 00:00:00 2001 From: Leo Di Donato <120051+leodido@users.noreply.github.com> Date: Mon, 10 Nov 2025 15:38:29 +0000 Subject: [PATCH 2/2] Add comprehensive tests for artifact upload behavior Improve test coverage to prevent regression of the feature that avoids re-uploading downloaded artifacts: 1. Track upload call counts in mock to verify behavior - Add uploadCalls map to mockRemoteCacheUpload - Increment counter on each UploadFile call 2. Enhance existing tests with call count assertions - TestArtifactUploader_SkipsExistingArtifacts: Verify uploadCalls = 0 - TestArtifactUploader_UploadsNewArtifacts: Verify uploadCalls = 1 3. Add workflow simulation tests - TestArtifactUploader_SimulatesDownloadedArtifactWorkflow: Simulates build job uploading, sign job downloading, and verifies artifact is NOT re-uploaded (uploadCalls stays at 1) - TestArtifactUploader_SimulatesLocallyBuiltArtifactWorkflow: Simulates locally built artifact and verifies both artifact and attestation are uploaded These tests ensure the feature cannot regress by explicitly verifying that UploadFile is NOT called for existing artifacts. Co-authored-by: Ona --- pkg/leeway/signing/upload_test.go | 115 ++++++++++++++++++++++++++++-- 1 file changed, 109 insertions(+), 6 deletions(-) diff --git a/pkg/leeway/signing/upload_test.go b/pkg/leeway/signing/upload_test.go index 94c2e0c..402fe11 100644 --- a/pkg/leeway/signing/upload_test.go +++ b/pkg/leeway/signing/upload_test.go @@ -18,6 +18,7 @@ import ( type mockRemoteCacheUpload struct { uploadedFiles map[string][]byte uploadErrors map[string]error + uploadCalls map[string]int // Track number of times each key was uploaded mu sync.Mutex } @@ -62,6 +63,12 @@ func (m *mockRemoteCacheUpload) UploadFile(ctx context.Context, filePath string, m.mu.Lock() defer m.mu.Unlock() + // Track upload calls + if m.uploadCalls == nil { + m.uploadCalls = make(map[string]int) + } + m.uploadCalls[key]++ + // Check if this key should fail if err, exists := m.uploadErrors[key]; exists { return err @@ -384,6 +391,7 @@ func TestArtifactUploader_SkipsExistingArtifacts(t *testing.T) { mockCache := &mockRemoteCacheUpload{ uploadedFiles: make(map[string][]byte), + uploadCalls: make(map[string]int), } // Pre-populate the cache with the artifact (simulating it already exists) @@ -395,11 +403,12 @@ func TestArtifactUploader_SkipsExistingArtifacts(t *testing.T) { err = uploader.UploadArtifactWithAttestation(context.Background(), artifactPath, attestation) require.NoError(t, err) - // Verify that the artifact was NOT re-uploaded (content should be unchanged) - assert.Equal(t, artifactContent, mockCache.uploadedFiles["existing.tar.gz"], "Artifact should not be re-uploaded") + // CRITICAL: Verify that UploadFile was NOT called for the artifact + assert.Equal(t, 0, mockCache.uploadCalls["existing.tar.gz"], "Artifact should not be uploaded when it already exists") // Verify that the attestation WAS uploaded - assert.Contains(t, mockCache.uploadedFiles, "existing.tar.gz.att", "Attestation should be uploaded") + assert.Equal(t, 1, mockCache.uploadCalls["existing.tar.gz.att"], "Attestation should be uploaded exactly once") + assert.Contains(t, mockCache.uploadedFiles, "existing.tar.gz.att", "Attestation should be in cache") assert.Equal(t, attestation, mockCache.uploadedFiles["existing.tar.gz.att"], "Attestation content should match") } @@ -413,6 +422,7 @@ func TestArtifactUploader_UploadsNewArtifacts(t *testing.T) { mockCache := &mockRemoteCacheUpload{ uploadedFiles: make(map[string][]byte), + uploadCalls: make(map[string]int), } uploader := NewArtifactUploader(mockCache) @@ -421,11 +431,104 @@ func TestArtifactUploader_UploadsNewArtifacts(t *testing.T) { err = uploader.UploadArtifactWithAttestation(context.Background(), artifactPath, attestation) require.NoError(t, err) - // Verify that the artifact WAS uploaded - assert.Contains(t, mockCache.uploadedFiles, "new.tar.gz", "New artifact should be uploaded") + // Verify that UploadFile was called for the artifact + assert.Equal(t, 1, mockCache.uploadCalls["new.tar.gz"], "New artifact should be uploaded exactly once") + assert.Contains(t, mockCache.uploadedFiles, "new.tar.gz", "New artifact should be in cache") assert.Equal(t, artifactContent, mockCache.uploadedFiles["new.tar.gz"], "Artifact content should match") // Verify that the attestation WAS uploaded - assert.Contains(t, mockCache.uploadedFiles, "new.tar.gz.att", "Attestation should be uploaded") + assert.Equal(t, 1, mockCache.uploadCalls["new.tar.gz.att"], "Attestation should be uploaded exactly once") + assert.Contains(t, mockCache.uploadedFiles, "new.tar.gz.att", "Attestation should be in cache") assert.Equal(t, attestation, mockCache.uploadedFiles["new.tar.gz.att"], "Attestation content should match") } + +// TestArtifactUploader_SimulatesDownloadedArtifactWorkflow tests the complete workflow +// where an artifact is downloaded from remote cache and then signed +func TestArtifactUploader_SimulatesDownloadedArtifactWorkflow(t *testing.T) { + tmpDir := t.TempDir() + + // Simulate the workflow: + // 1. Build job uploads artifact to S3 + // 2. Sign job downloads artifact from S3 + // 3. Sign job creates attestation + // 4. Sign job should NOT re-upload artifact, only upload attestation + + mockCache := &mockRemoteCacheUpload{ + uploadedFiles: make(map[string][]byte), + uploadCalls: make(map[string]int), + } + + // Step 1: Simulate build job uploading artifact + buildArtifactContent := []byte("artifact built by build job") + mockCache.uploadedFiles["downloaded.tar.gz"] = buildArtifactContent + mockCache.uploadCalls["downloaded.tar.gz"] = 1 // Track that build job uploaded it + + // Step 2: Simulate sign job downloading artifact (creates local file) + downloadedArtifactPath := filepath.Join(tmpDir, "downloaded.tar.gz") + err := os.WriteFile(downloadedArtifactPath, buildArtifactContent, 0644) + require.NoError(t, err) + + // Step 3 & 4: Sign job creates attestation and uploads + uploader := NewArtifactUploader(mockCache) + attestation := []byte(`{"test":"attestation for downloaded artifact"}`) + + err = uploader.UploadArtifactWithAttestation(context.Background(), downloadedArtifactPath, attestation) + require.NoError(t, err) + + // CRITICAL VERIFICATION: Artifact should NOT be re-uploaded + // uploadCalls should still be 1 (from build job), not 2 + assert.Equal(t, 1, mockCache.uploadCalls["downloaded.tar.gz"], + "Downloaded artifact should NOT be re-uploaded by sign job") + + // Attestation should be uploaded + assert.Equal(t, 1, mockCache.uploadCalls["downloaded.tar.gz.att"], + "Attestation should be uploaded exactly once") + assert.Contains(t, mockCache.uploadedFiles, "downloaded.tar.gz.att", + "Attestation should be in cache") + assert.Equal(t, attestation, mockCache.uploadedFiles["downloaded.tar.gz.att"], + "Attestation content should match") + + // Artifact content should remain unchanged (not overwritten) + assert.Equal(t, buildArtifactContent, mockCache.uploadedFiles["downloaded.tar.gz"], + "Artifact content should remain unchanged from build job") +} + +// TestArtifactUploader_SimulatesLocallyBuiltArtifactWorkflow tests the workflow +// where an artifact is built locally and needs to be uploaded +func TestArtifactUploader_SimulatesLocallyBuiltArtifactWorkflow(t *testing.T) { + tmpDir := t.TempDir() + + // Simulate the workflow: + // 1. Build job builds artifact locally (not in remote cache) + // 2. Sign job creates attestation + // 3. Sign job should upload BOTH artifact and attestation + + mockCache := &mockRemoteCacheUpload{ + uploadedFiles: make(map[string][]byte), + uploadCalls: make(map[string]int), + } + + // Step 1: Simulate locally built artifact (not in remote cache yet) + localArtifactPath := filepath.Join(tmpDir, "local.tar.gz") + localArtifactContent := []byte("artifact built locally") + err := os.WriteFile(localArtifactPath, localArtifactContent, 0644) + require.NoError(t, err) + + // Step 2 & 3: Sign job creates attestation and uploads + uploader := NewArtifactUploader(mockCache) + attestation := []byte(`{"test":"attestation for local artifact"}`) + + err = uploader.UploadArtifactWithAttestation(context.Background(), localArtifactPath, attestation) + require.NoError(t, err) + + // VERIFICATION: Both artifact and attestation should be uploaded + assert.Equal(t, 1, mockCache.uploadCalls["local.tar.gz"], + "Locally built artifact should be uploaded") + assert.Equal(t, localArtifactContent, mockCache.uploadedFiles["local.tar.gz"], + "Artifact content should match") + + assert.Equal(t, 1, mockCache.uploadCalls["local.tar.gz.att"], + "Attestation should be uploaded") + assert.Equal(t, attestation, mockCache.uploadedFiles["local.tar.gz.att"], + "Attestation content should match") +}