diff --git a/cmd/build.go b/cmd/build.go index d5d4f88..81627a7 100644 --- a/cmd/build.go +++ b/cmd/build.go @@ -414,6 +414,10 @@ func (c *pushOnlyRemoteCache) UploadFile(ctx context.Context, filePath string, k return c.C.UploadFile(ctx, filePath, key) } +func (c *pushOnlyRemoteCache) HasFile(ctx context.Context, key string) (bool, error) { + return c.C.HasFile(ctx, key) +} + type pullOnlyRemoteCache struct { C cache.RemoteCache } @@ -434,6 +438,10 @@ func (c *pullOnlyRemoteCache) UploadFile(ctx context.Context, filePath string, k return nil } +func (c *pullOnlyRemoteCache) HasFile(ctx context.Context, key string) (bool, error) { + return c.C.HasFile(ctx, key) +} + func getRemoteCacheFromEnv() cache.RemoteCache { return getRemoteCache(nil) } diff --git a/pkg/leeway/cache/remote/gsutil.go b/pkg/leeway/cache/remote/gsutil.go index db8adb7..98ac6ba 100644 --- a/pkg/leeway/cache/remote/gsutil.go +++ b/pkg/leeway/cache/remote/gsutil.go @@ -192,6 +192,27 @@ func (rs *GSUtilCache) UploadFile(ctx context.Context, filePath string, key stri return nil } +// HasFile checks if a file exists in the remote cache with the given key +func (rs *GSUtilCache) HasFile(ctx context.Context, key string) (bool, error) { + target := fmt.Sprintf("gs://%s/%s", rs.BucketName, key) + + cmd := exec.CommandContext(ctx, "gsutil", "stat", target) + if err := cmd.Run(); err != nil { + // gsutil stat returns non-zero exit code if file doesn't exist + if exitErr, ok := err.(*exec.ExitError); ok { + if status, ok := exitErr.Sys().(syscall.WaitStatus); ok { + if status.ExitStatus() == 1 { + // File doesn't exist + return false, nil + } + } + } + return false, fmt.Errorf("failed to check if file exists at %s: %w", target, err) + } + + return true, nil +} + func parseGSUtilStatOutput(reader io.Reader) map[string]struct{} { exists := make(map[string]struct{}) scanner := bufio.NewScanner(reader) diff --git a/pkg/leeway/cache/remote/no_cache.go b/pkg/leeway/cache/remote/no_cache.go index a1187d2..1ef925b 100644 --- a/pkg/leeway/cache/remote/no_cache.go +++ b/pkg/leeway/cache/remote/no_cache.go @@ -33,3 +33,8 @@ func (NoRemoteCache) Upload(ctx context.Context, src cache.LocalCache, pkgs []ca func (NoRemoteCache) UploadFile(ctx context.Context, filePath string, key string) error { return nil } + +// HasFile checks if a file exists in the remote cache with the given key +func (NoRemoteCache) HasFile(ctx context.Context, key string) (bool, error) { + return false, nil +} diff --git a/pkg/leeway/cache/remote/s3.go b/pkg/leeway/cache/remote/s3.go index 0e48b04..e25c21d 100644 --- a/pkg/leeway/cache/remote/s3.go +++ b/pkg/leeway/cache/remote/s3.go @@ -964,6 +964,30 @@ func (s *S3Cache) UploadFile(ctx context.Context, filePath string, key string) e return nil } +// HasFile checks if a file exists in the remote cache with the given key +func (s *S3Cache) HasFile(ctx context.Context, key string) (bool, error) { + // Wait for rate limiter permission + if err := s.waitForRateLimit(ctx); err != nil { + log.WithError(err).Debug("Rate limiter error during file existence check") + return false, fmt.Errorf("rate limiter error: %w", err) + } + + // Use timeout for the check operation + timeoutCtx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + + exists, err := s.storage.HasObject(timeoutCtx, key) + if err != nil { + log.WithFields(log.Fields{ + "key": key, + "error": err, + }).Debug("failed to check file existence in remote cache") + return false, fmt.Errorf("failed to check if file exists: %w", err) + } + + return exists, nil +} + // s3ClientAPI is a subset of the S3 client interface we need type s3ClientAPI interface { HeadObject(ctx context.Context, params *s3.HeadObjectInput, optFns ...func(*s3.Options)) (*s3.HeadObjectOutput, error) diff --git a/pkg/leeway/cache/types.go b/pkg/leeway/cache/types.go index b52c80e..1e2853f 100644 --- a/pkg/leeway/cache/types.go +++ b/pkg/leeway/cache/types.go @@ -56,6 +56,10 @@ type RemoteCache interface { // UploadFile uploads a single file to the remote cache with the given key // This is useful for uploading individual files like attestations without Package abstraction UploadFile(ctx context.Context, filePath string, key string) error + + // HasFile checks if a file exists in the remote cache with the given key + // This is useful for checking if artifacts need to be uploaded + HasFile(ctx context.Context, key string) (bool, error) } // ObjectStorage represents a generic object storage interface diff --git a/pkg/leeway/signing/attestation_test.go b/pkg/leeway/signing/attestation_test.go index b121231..0fc811d 100644 --- a/pkg/leeway/signing/attestation_test.go +++ b/pkg/leeway/signing/attestation_test.go @@ -914,6 +914,10 @@ func (m *mockRemoteCache) UploadFile(ctx context.Context, filePath string, key s return nil } +func (m *mockRemoteCache) HasFile(ctx context.Context, key string) (bool, error) { + return false, nil +} + // TestGetEnvOrDefault tests the environment variable helper // TestValidateSigstoreEnvironment tests Sigstore environment validation func TestValidateSigstoreEnvironment(t *testing.T) { diff --git a/pkg/leeway/signing/upload.go b/pkg/leeway/signing/upload.go index f0d0809..fe28976 100644 --- a/pkg/leeway/signing/upload.go +++ b/pkg/leeway/signing/upload.go @@ -86,9 +86,28 @@ func (u *ArtifactUploader) UploadArtifactWithAttestation(ctx context.Context, ar return fmt.Errorf("context cancelled before upload: %w", err) } - // Upload artifact first - if err := u.remoteCache.UploadFile(ctx, artifactPath, artifactKey); err != nil { - return fmt.Errorf("failed to upload artifact: %w", err) + // Check if artifact already exists in remote cache + // Only upload if it doesn't exist (avoid re-uploading downloaded artifacts) + artifactExists, err := u.remoteCache.HasFile(ctx, artifactKey) + if err != nil { + log.WithError(err).WithField("key", artifactKey).Warn("Failed to check if artifact exists, will attempt upload") + artifactExists = false // Assume it doesn't exist and try to upload + } + + if artifactExists { + log.WithFields(log.Fields{ + "artifact": artifactPath, + "artifact_key": artifactKey, + }).Info("Artifact already exists in remote cache, skipping upload") + } else { + // Upload artifact only if it doesn't exist + if err := u.remoteCache.UploadFile(ctx, artifactPath, artifactKey); err != nil { + return fmt.Errorf("failed to upload artifact: %w", err) + } + log.WithFields(log.Fields{ + "artifact": artifactPath, + "artifact_key": artifactKey, + }).Info("Successfully uploaded artifact to remote cache") } // Check context between uploads @@ -96,7 +115,7 @@ func (u *ArtifactUploader) UploadArtifactWithAttestation(ctx context.Context, ar return fmt.Errorf("context cancelled between uploads: %w", err) } - // Upload attestation file + // Always upload attestation file (it's new) if err := u.remoteCache.UploadFile(ctx, attestationPath, attestationKey); err != nil { return fmt.Errorf("failed to upload .att file: %w", err) } @@ -105,7 +124,7 @@ func (u *ArtifactUploader) UploadArtifactWithAttestation(ctx context.Context, ar "artifact": artifactPath, "artifact_key": artifactKey, "att_key": attestationKey, - }).Info("Successfully uploaded artifact and .att file") + }).Info("Successfully uploaded attestation file") return nil } diff --git a/pkg/leeway/signing/upload_test.go b/pkg/leeway/signing/upload_test.go index ab28d62..402fe11 100644 --- a/pkg/leeway/signing/upload_test.go +++ b/pkg/leeway/signing/upload_test.go @@ -18,6 +18,7 @@ import ( type mockRemoteCacheUpload struct { uploadedFiles map[string][]byte uploadErrors map[string]error + uploadCalls map[string]int // Track number of times each key was uploaded mu sync.Mutex } @@ -62,6 +63,12 @@ func (m *mockRemoteCacheUpload) UploadFile(ctx context.Context, filePath string, m.mu.Lock() defer m.mu.Unlock() + // Track upload calls + if m.uploadCalls == nil { + m.uploadCalls = make(map[string]int) + } + m.uploadCalls[key]++ + // Check if this key should fail if err, exists := m.uploadErrors[key]; exists { return err @@ -83,6 +90,18 @@ func (m *mockRemoteCacheUpload) UploadFile(ctx context.Context, filePath string, return nil } +func (m *mockRemoteCacheUpload) HasFile(ctx context.Context, key string) (bool, error) { + m.mu.Lock() + defer m.mu.Unlock() + + if m.uploadedFiles == nil { + return false, nil + } + + _, exists := m.uploadedFiles[key] + return exists, nil +} + // Test helper to create a test artifact file func createTestArtifactFile(t *testing.T, dir, name, content string) string { path := filepath.Join(dir, name) @@ -361,3 +380,155 @@ func TestArtifactUploader_ConcurrentUploads(t *testing.T) { assert.NoError(t, err) } } + +// TestArtifactUploader_SkipsExistingArtifacts tests that existing artifacts are not re-uploaded +func TestArtifactUploader_SkipsExistingArtifacts(t *testing.T) { + tmpDir := t.TempDir() + artifactPath := filepath.Join(tmpDir, "existing.tar.gz") + artifactContent := []byte("existing artifact content") + err := os.WriteFile(artifactPath, artifactContent, 0644) + require.NoError(t, err) + + mockCache := &mockRemoteCacheUpload{ + uploadedFiles: make(map[string][]byte), + uploadCalls: make(map[string]int), + } + + // Pre-populate the cache with the artifact (simulating it already exists) + mockCache.uploadedFiles["existing.tar.gz"] = artifactContent + + uploader := NewArtifactUploader(mockCache) + attestation := []byte(`{"test":"attestation"}`) + + err = uploader.UploadArtifactWithAttestation(context.Background(), artifactPath, attestation) + require.NoError(t, err) + + // CRITICAL: Verify that UploadFile was NOT called for the artifact + assert.Equal(t, 0, mockCache.uploadCalls["existing.tar.gz"], "Artifact should not be uploaded when it already exists") + + // Verify that the attestation WAS uploaded + assert.Equal(t, 1, mockCache.uploadCalls["existing.tar.gz.att"], "Attestation should be uploaded exactly once") + assert.Contains(t, mockCache.uploadedFiles, "existing.tar.gz.att", "Attestation should be in cache") + assert.Equal(t, attestation, mockCache.uploadedFiles["existing.tar.gz.att"], "Attestation content should match") +} + +// TestArtifactUploader_UploadsNewArtifacts tests that new artifacts are uploaded +func TestArtifactUploader_UploadsNewArtifacts(t *testing.T) { + tmpDir := t.TempDir() + artifactPath := filepath.Join(tmpDir, "new.tar.gz") + artifactContent := []byte("new artifact content") + err := os.WriteFile(artifactPath, artifactContent, 0644) + require.NoError(t, err) + + mockCache := &mockRemoteCacheUpload{ + uploadedFiles: make(map[string][]byte), + uploadCalls: make(map[string]int), + } + + uploader := NewArtifactUploader(mockCache) + attestation := []byte(`{"test":"attestation"}`) + + err = uploader.UploadArtifactWithAttestation(context.Background(), artifactPath, attestation) + require.NoError(t, err) + + // Verify that UploadFile was called for the artifact + assert.Equal(t, 1, mockCache.uploadCalls["new.tar.gz"], "New artifact should be uploaded exactly once") + assert.Contains(t, mockCache.uploadedFiles, "new.tar.gz", "New artifact should be in cache") + assert.Equal(t, artifactContent, mockCache.uploadedFiles["new.tar.gz"], "Artifact content should match") + + // Verify that the attestation WAS uploaded + assert.Equal(t, 1, mockCache.uploadCalls["new.tar.gz.att"], "Attestation should be uploaded exactly once") + assert.Contains(t, mockCache.uploadedFiles, "new.tar.gz.att", "Attestation should be in cache") + assert.Equal(t, attestation, mockCache.uploadedFiles["new.tar.gz.att"], "Attestation content should match") +} + +// TestArtifactUploader_SimulatesDownloadedArtifactWorkflow tests the complete workflow +// where an artifact is downloaded from remote cache and then signed +func TestArtifactUploader_SimulatesDownloadedArtifactWorkflow(t *testing.T) { + tmpDir := t.TempDir() + + // Simulate the workflow: + // 1. Build job uploads artifact to S3 + // 2. Sign job downloads artifact from S3 + // 3. Sign job creates attestation + // 4. Sign job should NOT re-upload artifact, only upload attestation + + mockCache := &mockRemoteCacheUpload{ + uploadedFiles: make(map[string][]byte), + uploadCalls: make(map[string]int), + } + + // Step 1: Simulate build job uploading artifact + buildArtifactContent := []byte("artifact built by build job") + mockCache.uploadedFiles["downloaded.tar.gz"] = buildArtifactContent + mockCache.uploadCalls["downloaded.tar.gz"] = 1 // Track that build job uploaded it + + // Step 2: Simulate sign job downloading artifact (creates local file) + downloadedArtifactPath := filepath.Join(tmpDir, "downloaded.tar.gz") + err := os.WriteFile(downloadedArtifactPath, buildArtifactContent, 0644) + require.NoError(t, err) + + // Step 3 & 4: Sign job creates attestation and uploads + uploader := NewArtifactUploader(mockCache) + attestation := []byte(`{"test":"attestation for downloaded artifact"}`) + + err = uploader.UploadArtifactWithAttestation(context.Background(), downloadedArtifactPath, attestation) + require.NoError(t, err) + + // CRITICAL VERIFICATION: Artifact should NOT be re-uploaded + // uploadCalls should still be 1 (from build job), not 2 + assert.Equal(t, 1, mockCache.uploadCalls["downloaded.tar.gz"], + "Downloaded artifact should NOT be re-uploaded by sign job") + + // Attestation should be uploaded + assert.Equal(t, 1, mockCache.uploadCalls["downloaded.tar.gz.att"], + "Attestation should be uploaded exactly once") + assert.Contains(t, mockCache.uploadedFiles, "downloaded.tar.gz.att", + "Attestation should be in cache") + assert.Equal(t, attestation, mockCache.uploadedFiles["downloaded.tar.gz.att"], + "Attestation content should match") + + // Artifact content should remain unchanged (not overwritten) + assert.Equal(t, buildArtifactContent, mockCache.uploadedFiles["downloaded.tar.gz"], + "Artifact content should remain unchanged from build job") +} + +// TestArtifactUploader_SimulatesLocallyBuiltArtifactWorkflow tests the workflow +// where an artifact is built locally and needs to be uploaded +func TestArtifactUploader_SimulatesLocallyBuiltArtifactWorkflow(t *testing.T) { + tmpDir := t.TempDir() + + // Simulate the workflow: + // 1. Build job builds artifact locally (not in remote cache) + // 2. Sign job creates attestation + // 3. Sign job should upload BOTH artifact and attestation + + mockCache := &mockRemoteCacheUpload{ + uploadedFiles: make(map[string][]byte), + uploadCalls: make(map[string]int), + } + + // Step 1: Simulate locally built artifact (not in remote cache yet) + localArtifactPath := filepath.Join(tmpDir, "local.tar.gz") + localArtifactContent := []byte("artifact built locally") + err := os.WriteFile(localArtifactPath, localArtifactContent, 0644) + require.NoError(t, err) + + // Step 2 & 3: Sign job creates attestation and uploads + uploader := NewArtifactUploader(mockCache) + attestation := []byte(`{"test":"attestation for local artifact"}`) + + err = uploader.UploadArtifactWithAttestation(context.Background(), localArtifactPath, attestation) + require.NoError(t, err) + + // VERIFICATION: Both artifact and attestation should be uploaded + assert.Equal(t, 1, mockCache.uploadCalls["local.tar.gz"], + "Locally built artifact should be uploaded") + assert.Equal(t, localArtifactContent, mockCache.uploadedFiles["local.tar.gz"], + "Artifact content should match") + + assert.Equal(t, 1, mockCache.uploadCalls["local.tar.gz.att"], + "Attestation should be uploaded") + assert.Equal(t, attestation, mockCache.uploadedFiles["local.tar.gz.att"], + "Attestation content should match") +}