diff --git a/pkg/blobmanager/s3accesspoint/backend.go b/pkg/blobmanager/s3accesspoint/backend.go index 1897eeb92..6c30e663a 100644 --- a/pkg/blobmanager/s3accesspoint/backend.go +++ b/pkg/blobmanager/s3accesspoint/backend.go @@ -30,6 +30,7 @@ import ( "github.com/aws/aws-sdk-go-v2/service/s3" s3types "github.com/aws/aws-sdk-go-v2/service/s3/types" "github.com/aws/aws-sdk-go-v2/service/sts" + ststypes "github.com/aws/aws-sdk-go-v2/service/sts/types" "github.com/aws/smithy-go" pb "github.com/chainloop-dev/chainloop/app/artifact-cas/api/cas/v1" robotaccount "github.com/chainloop-dev/chainloop/internal/robotaccount/cas" @@ -47,6 +48,13 @@ const ( // be useless against an AP policy condition. var ErrMissingRequestingOrg = errors.New("s3accesspoint: requesting org missing from claims") +// stsAssumer is the subset of *sts.Client that the credentials provider +// actually uses. Keeping the dependency at interface-level lets tests +// inject a fake without spinning up a real AWS config. +type stsAssumer interface { + AssumeRole(ctx context.Context, in *sts.AssumeRoleInput, optFns ...func(*sts.Options)) (*sts.AssumeRoleOutput, error) +} + // Backend is the per-tenant uploader/downloader. One *Backend instance is // bound to one access point; the actual AWS credentials are minted // per-request via STS using the org UUID found in the request context. @@ -56,7 +64,7 @@ type Backend struct { // stsClient is built once at construction using the pod's ambient // IAM identity. The credential chain (IRSA → IMDS → env → // ~/.aws/credentials) picks up the identity automatically. - stsClient *sts.Client + stsClient stsAssumer // s3Client uses a custom CredentialsProvider that mints a scoped // session per request (cached in-process per requesting-org so back- @@ -261,7 +269,7 @@ func (b *Backend) CheckWritePermissions(ctx context.Context) error { // reusing the temporary credentials across consecutive calls until the // expiration window approaches. type sessionCredentialsProvider struct { - stsClient *sts.Client + stsClient stsAssumer // ambientCreds is the SDK-default credentials provider captured from // awsCfg at construction time. Only consulted when @@ -296,21 +304,34 @@ func (p *sessionCredentialsProvider) Retrieve(ctx context.Context) (aws.Credenti return p.ambientCreds.Retrieve(ctx) } - // Session policy intersects with the base role's permissions and - // pins this session to the caller's AP ARN. Cross-tenant defense - // against a tampered AccessPointARN in the secret blob lives in the - // AP's resource policy (aws:userid StringEquals on the role session - // name minted from the request-context org UUID), not here — keeping - // the inline policy small leaves headroom in STS's packed-policy - // budget for tags inherited from the caller principal. - sessionPolicy := buildSessionPolicy(p.creds.AccessPointARN) - - out, err := p.stsClient.AssumeRole(ctx, &sts.AssumeRoleInput{ + // The session policy intersects with the base role's permissions and + // pins this session to the caller's AP. Cross-tenant defense against + // a tampered AccessPointARN in the secret blob lives in the AP's + // resource policy (aws:userid StringEquals on the role session name + // minted from the request-context org UUID), not here. + // + // When the operator has provisioned a managed IAM policy and + // recorded its ARN in SessionPolicyARN, reference it via PolicyArns + // instead of inlining a JSON document. Only the ARN counts against + // STS's packed-policy budget that way, leaving more headroom for + // session tags inherited from the caller principal (IRSA / Pod + // Identity). When SessionPolicyARN is empty we fall back to the + // inline default — a missing ARN must NOT degrade to an unscoped + // session that inherits the full BaseRoleARN permissions. + input := &sts.AssumeRoleInput{ RoleArn: aws.String(p.creds.BaseRoleARN), RoleSessionName: aws.String(roleSessionName(info.OrgID)), - Policy: aws.String(sessionPolicy), DurationSeconds: aws.Int32(int32(SessionDuration.Seconds())), - }) + } + if p.creds.SessionPolicyARN != "" { + input.PolicyArns = []ststypes.PolicyDescriptorType{ + {Arn: aws.String(p.creds.SessionPolicyARN)}, + } + } else { + input.Policy = aws.String(buildSessionPolicy(p.creds.AccessPointARN)) + } + + out, err := p.stsClient.AssumeRole(ctx, input) if err != nil { return aws.Credentials{}, fmt.Errorf("sts:AssumeRole for org %s: %w", info.OrgID, err) } diff --git a/pkg/blobmanager/s3accesspoint/backend_test.go b/pkg/blobmanager/s3accesspoint/backend_test.go index 8aeb12e42..57cbd34a8 100644 --- a/pkg/blobmanager/s3accesspoint/backend_test.go +++ b/pkg/blobmanager/s3accesspoint/backend_test.go @@ -19,8 +19,11 @@ import ( "bytes" "context" "testing" + "time" "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/sts" + ststypes "github.com/aws/aws-sdk-go-v2/service/sts/types" pb "github.com/chainloop-dev/chainloop/app/artifact-cas/api/cas/v1" robotaccount "github.com/chainloop-dev/chainloop/internal/robotaccount/cas" jwtmiddleware "github.com/go-kratos/kratos/v2/middleware/auth/jwt" @@ -174,6 +177,83 @@ func (c *countingCredsProvider) Retrieve(_ context.Context) (aws.Credentials, er return c.creds, nil } +// fakeSTSAssumer captures the AssumeRoleInput passed by the credentials +// provider so tests can lock down the AssumeRole call shape (inline +// policy vs PolicyArns) without making a real AWS call. +type fakeSTSAssumer struct { + lastInput *sts.AssumeRoleInput +} + +func (f *fakeSTSAssumer) AssumeRole(_ context.Context, in *sts.AssumeRoleInput, _ ...func(*sts.Options)) (*sts.AssumeRoleOutput, error) { + f.lastInput = in + return &sts.AssumeRoleOutput{ + Credentials: &ststypes.Credentials{ + AccessKeyId: aws.String("AKFAKE"), + SecretAccessKey: aws.String("secret"), + SessionToken: aws.String("token"), + Expiration: aws.Time(time.Now().Add(time.Hour)), + }, + }, nil +} + +// TestAssumeRoleInput_DefaultsToInlinePolicy locks down the fallback +// path: with an empty SessionPolicyARN the AssumeRole call must carry an +// inline Policy and must NOT pass PolicyArns — otherwise an upgrade +// that forgets to set the new field would silently degrade to a session +// scoped only by the BaseRoleARN's identity policies. +func TestAssumeRoleInput_DefaultsToInlinePolicy(t *testing.T) { + t.Parallel() + + fake := &fakeSTSAssumer{} + p := &sessionCredentialsProvider{ + stsClient: fake, + creds: &Credentials{ + AccessPointARN: "arn:aws:s3:us-east-1:111:accesspoint/ap-a", + BaseRoleARN: "arn:aws:iam::111:role/r", + }, + } + ctx := jwtmiddleware.NewContext(context.Background(), + &robotaccount.Claims{OrgID: "org-A", StoredSecretID: "foo", BackendType: "BT", Role: robotaccount.Uploader}) + + _, err := p.Retrieve(ctx) + require.NoError(t, err) + require.NotNil(t, fake.lastInput, "stub STS must have been invoked") + + require.NotNil(t, fake.lastInput.Policy, "empty SessionPolicyARN must produce an inline Policy") + assert.Contains(t, *fake.lastInput.Policy, "arn:aws:s3:us-east-1:111:accesspoint/ap-a/object/*") + assert.Empty(t, fake.lastInput.PolicyArns, "inline path must not also pass PolicyArns") +} + +// TestAssumeRoleInput_UsesPolicyArnsWhenSet locks down the opt-in path: +// a configured SessionPolicyARN must reach STS via PolicyArns, and the +// inline Policy must be omitted so we don't double-count against the +// packed-policy budget. +func TestAssumeRoleInput_UsesPolicyArnsWhenSet(t *testing.T) { + t.Parallel() + + const managedARN = "arn:aws:iam::111:policy/chainloop-cas-session" + fake := &fakeSTSAssumer{} + p := &sessionCredentialsProvider{ + stsClient: fake, + creds: &Credentials{ + AccessPointARN: "arn:aws:s3:us-east-1:111:accesspoint/ap-a", + BaseRoleARN: "arn:aws:iam::111:role/r", + SessionPolicyARN: managedARN, + }, + } + ctx := jwtmiddleware.NewContext(context.Background(), + &robotaccount.Claims{OrgID: "org-A", StoredSecretID: "foo", BackendType: "BT", Role: robotaccount.Uploader}) + + _, err := p.Retrieve(ctx) + require.NoError(t, err) + require.NotNil(t, fake.lastInput, "stub STS must have been invoked") + + assert.Nil(t, fake.lastInput.Policy, "PolicyArns path must NOT also send an inline Policy") + require.Len(t, fake.lastInput.PolicyArns, 1, "expected exactly one PolicyArn descriptor") + require.NotNil(t, fake.lastInput.PolicyArns[0].Arn) + assert.Equal(t, managedARN, *fake.lastInput.PolicyArns[0].Arn) +} + // --- helpers ----------------------------------------------------------- // newTestBackend constructs a fully wired *Backend that uses static dummy diff --git a/pkg/blobmanager/s3accesspoint/provider.go b/pkg/blobmanager/s3accesspoint/provider.go index 378ae9a89..69fefc21e 100644 --- a/pkg/blobmanager/s3accesspoint/provider.go +++ b/pkg/blobmanager/s3accesspoint/provider.go @@ -34,6 +34,13 @@ // (${apARN}/object/*), keeping the inline document small so STS's // packed-policy budget isn't consumed by chainloop before tags // inherited from the caller principal are even accounted for. +// 4. Optionally, a customer-managed IAM policy referenced via +// Credentials.SessionPolicyARN. When set, the per-request AssumeRole +// call passes that ARN through PolicyArns instead of an inline Policy +// document; only the ~60-char ARN counts against the packed-policy +// budget, leaving more headroom for caller-inherited session tags. +// The managed policy is the action allowlist in that mode, so it +// MUST be at least as restrictive as the inline default. // // The session name MUST come from the request context, not from the secret // blob: a secrets-store compromise alone must not let an attacker reroute @@ -49,6 +56,7 @@ import ( "strings" "time" + "github.com/aws/aws-sdk-go-v2/aws/arn" backend "github.com/chainloop-dev/chainloop/pkg/blobmanager" "github.com/chainloop-dev/chainloop/pkg/credentials" ) @@ -113,6 +121,15 @@ type Credentials struct { // accounts without a config change. Required unless DevModeEnvVar is // set on the running binary. BaseRoleARN string + // SessionPolicyARN is optional. When non-empty, the per-request + // AssumeRole call passes this ARN via PolicyArns instead of an + // inline Policy document, trimming chainloop's contribution to + // STS's packed-policy budget down to the ARN string itself. The + // IAM policy at this ARN MUST be at least as restrictive as the + // inline default (s3:GetObject + s3:PutObject scoped to the AP's + // /object/* prefix). When empty, the backend falls back to the + // inline session policy. + SessionPolicyARN string } func (c *Credentials) Validate() error { @@ -136,6 +153,21 @@ func (c *Credentials) Validate() error { return fmt.Errorf("%w: base_role_arn %q is not a valid IAM role ARN", backend.ErrValidation, c.BaseRoleARN) } } + // SessionPolicyARN is optional. When set it must be a syntactically + // valid IAM managed policy ARN — anything else (an S3 ARN, a role + // ARN, a policy ARN with no name, a role ARN that happens to embed + // ":policy/" in its path) would be silently rejected by STS at + // request time, surfacing as opaque errors deep in the upload path. + // Fail loudly here instead. + if c.SessionPolicyARN != "" { + a, err := arn.Parse(c.SessionPolicyARN) + if err != nil || + a.Service != "iam" || + !strings.HasPrefix(a.Resource, "policy/") || + a.Resource == "policy/" { + return fmt.Errorf("%w: session_policy_arn %q is not a valid IAM managed policy ARN", backend.ErrValidation, c.SessionPolicyARN) + } + } return nil } diff --git a/pkg/blobmanager/s3accesspoint/provider_test.go b/pkg/blobmanager/s3accesspoint/provider_test.go index ccc1d9488..013bcbe25 100644 --- a/pkg/blobmanager/s3accesspoint/provider_test.go +++ b/pkg/blobmanager/s3accesspoint/provider_test.go @@ -67,6 +67,58 @@ func TestCredentials_Validate(t *testing.T) { mutate: func(c *Credentials) { c.BaseRoleARN = "not-an-arn" }, wantErr: "not a valid IAM role ARN", }, + { + name: "session policy arn empty is allowed", + mutate: func(c *Credentials) { c.SessionPolicyARN = "" }, + wantErr: "", + }, + { + name: "session policy arn valid", + mutate: func(c *Credentials) { c.SessionPolicyARN = "arn:aws:iam::123456789012:policy/chainloop-cas-session" }, + wantErr: "", + }, + { + name: "session policy arn pointing at s3 ARN rejected", + mutate: func(c *Credentials) { c.SessionPolicyARN = "arn:aws:s3:::some-bucket" }, + wantErr: "not a valid IAM managed policy ARN", + }, + { + name: "session policy arn pointing at role rejected", + mutate: func(c *Credentials) { c.SessionPolicyARN = "arn:aws:iam::123456789012:role/some-role" }, + wantErr: "not a valid IAM managed policy ARN", + }, + { + name: "session policy arn garbage rejected", + mutate: func(c *Credentials) { c.SessionPolicyARN = "not-an-arn-at-all" }, + wantErr: "not a valid IAM managed policy ARN", + }, + { + name: "session policy arn for AWS-managed policy accepted", + mutate: func(c *Credentials) { c.SessionPolicyARN = "arn:aws:iam::aws:policy/AdministratorAccess" }, + wantErr: "", + }, + { + // previously the loose substring check would accept this because + // the string both starts with arn:aws:iam:: and contains :policy/ + name: "session policy arn embedded in role path rejected", + mutate: func(c *Credentials) { c.SessionPolicyARN = "arn:aws:iam::123456789012:role/foo:policy/bar" }, + wantErr: "not a valid IAM managed policy ARN", + }, + { + name: "session policy arn with empty name rejected", + mutate: func(c *Credentials) { c.SessionPolicyARN = "arn:aws:iam::123456789012:policy/" }, + wantErr: "not a valid IAM managed policy ARN", + }, + { + name: "session policy arn missing service rejected", + mutate: func(c *Credentials) { c.SessionPolicyARN = "arn:aws::us-east-1:123456789012:policy/foo" }, + wantErr: "not a valid IAM managed policy ARN", + }, + { + name: "session policy arn structurally malformed rejected", + mutate: func(c *Credentials) { c.SessionPolicyARN = "arn:aws:iam" }, + wantErr: "not a valid IAM managed policy ARN", + }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) {