Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions changes/44111-scep-autorenew-fail

This file was deleted.

28 changes: 7 additions & 21 deletions server/datastore/mysql/apple_mdm.go
Original file line number Diff line number Diff line change
Expand Up @@ -3539,34 +3539,20 @@ func (ds *Datastore) UpdateOrDeleteHostMDMAppleProfile(ctx context.Context, prof
}

// Check whether we want to set a install operation as 'verifying' for an iOS/iPadOS device.
// For non-managed-cert profiles, iOS/iPadOS short-circuits to 'verified' because there is
// no osquery available to drive the standard verifying -> verified transition. Managed-cert
// profiles instead use CertificateList ingestion (updateHostMDMManagedCertDetailsDB) as
// their verification trigger; leaving them at 'verifying' here closes the renewal-cron race
// where 'verified' would arrive before fresh cert metadata had been ingested. See #44111.
var iOSAckCheck struct {
IsIOS bool `db:"is_ios"`
IsManagedCert bool `db:"is_managed_cert"`
}
var isIOSIPadOSInstallVerifiying bool
if profile.OperationType == fleet.MDMOperationTypeInstall && profile.Status != nil && *profile.Status == fleet.MDMDeliveryVerifying {
if err := ds.writer(ctx).GetContext(ctx, &iOSAckCheck, `
SELECT
(h.platform = 'ios' OR h.platform = 'ipados') AS is_ios,
EXISTS(SELECT 1 FROM host_mdm_managed_certificates WHERE host_uuid = h.uuid AND profile_uuid = ?) AS is_managed_cert
FROM hosts h
WHERE h.uuid = ?`,
profile.ProfileUUID, profile.HostUUID,
if err := ds.writer(ctx).GetContext(ctx, &isIOSIPadOSInstallVerifiying, `
SELECT platform = 'ios' OR platform = 'ipados' FROM hosts WHERE uuid = ?`,
profile.HostUUID,
); err != nil {
return err
}
}

status := profile.Status
if iOSAckCheck.IsIOS && !iOSAckCheck.IsManagedCert {
// iOS/iPadOS devices do not have osquery, thus they go from 'pending'
// straight to 'verified'. Managed-cert profiles are the exception and
// transition via updateHostMDMManagedCertDetailsDB once fresh metadata
// arrives.
if isIOSIPadOSInstallVerifiying {
// iOS/iPadOS devices do not have osquery,
// thus they go from 'pending' straight to 'verified'
status = &fleet.MDMDeliveryVerified
}

Expand Down
308 changes: 0 additions & 308 deletions server/datastore/mysql/apple_mdm_test.go

Large diffs are not rendered by default.

131 changes: 0 additions & 131 deletions server/datastore/mysql/challenges_test.go

This file was deleted.

19 changes: 0 additions & 19 deletions server/datastore/mysql/host_certificates.go
Original file line number Diff line number Diff line change
Expand Up @@ -520,25 +520,6 @@ func updateHostMDMManagedCertDetailsDB(ctx context.Context, tx sqlx.ExtContext,
if _, err := tx.ExecContext(ctx, stmt, args...); err != nil {
return ctxerr.Wrap(ctx, err, "updating host mdm managed certificates")
}

// Fresh cert metadata is the verification signal for iOS/iPadOS managed-cert
// profiles (which UpdateOrDeleteHostMDMAppleProfile parks at 'verifying' on
// MDM ack instead of short-circuiting to 'verified'). Flip the install row
// here so the renewal cron's IN ('verified', 'failed') filter only matches
// once cert metadata is in sync. For macOS this is redundant with
// VerifyHostMDMProfiles but idempotent. See issue #44111.
flipStmt := `
UPDATE host_mdm_apple_profiles
SET status = ?
WHERE host_uuid = ? AND profile_uuid = ?
AND status = ? AND operation_type = ?`
if _, err := tx.ExecContext(ctx, flipStmt,
fleet.MDMDeliveryVerified,
certToUpdate.HostUUID, certToUpdate.ProfileUUID,
fleet.MDMDeliveryVerifying, fleet.MDMOperationTypeInstall,
); err != nil {
return ctxerr.Wrap(ctx, err, "flipping iOS managed cert profile to verified")
}
}
return nil
}
69 changes: 10 additions & 59 deletions server/datastore/mysql/mdm.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,31 +14,10 @@ import (
"github.com/fleetdm/fleet/v4/server/mdm"
"github.com/fleetdm/fleet/v4/server/mdm/apple/mobileconfig"
microsoft_mdm "github.com/fleetdm/fleet/v4/server/mdm/microsoft"
common_mysql "github.com/fleetdm/fleet/v4/server/platform/mysql"
"github.com/google/go-cmp/cmp"
"github.com/jmoiron/sqlx"
)

// renewalFailedRetryBackoff bounds how often RenewMDMManagedCertificates re-triggers
// renewal for a managed-cert profile sitting in the 'failed' state. Without this gate,
// a permanent failure (CA deleted, IDP variables missing, license downgraded — anything
// that fails at profile-render time via fleet.MarkProfilesFailed) would loop every cron
// tick: cron flips status to NULL, reconcile re-renders and immediately fails again,
// status returns to 'failed', repeat. The backoff is set well above the cron interval
// (1h) so transient SCEP-server outages still recover within a day, but permanent
// failures don't churn nano commands and profile renders hourly. See issue #44111.
const renewalFailedRetryBackoff = 24 * time.Hour

var mdmCommandsAllowedOrderKeys = common_mysql.OrderKeyAllowlist{
"command_uuid": "command_uuid",
"request_type": "request_type",
"status": "status",
"updated_at": "updated_at",
"hostname": "hostname",
"host_uuid": "host_uuid",
"name": "name",
}

func (ds *Datastore) GetMDMCommandPlatform(ctx context.Context, commandUUID string) (string, error) {
stmt := `
SELECT CASE
Expand Down Expand Up @@ -2788,14 +2767,6 @@ func (ds *Datastore) BulkUpsertMDMManagedCertificates(ctx context.Context, paylo
}

executeUpsertBatch := func(valuePart string, args []any) error {
// Cert metadata columns (not_valid_before, not_valid_after, serial) use COALESCE so a
// nil incoming value preserves the previously stored value. The reconcile re-render
// path (e.g. ReplaceCustomSCEPProxyURLVariable, NDES/Smallstep handlers) upserts with
// those fields nil — they aren't known until the device completes the SCEP handshake
// and osquery reports the issued cert via updateHostMDMManagedCertDetailsDB. Without
// COALESCE, a renewal trigger silently wipes cert metadata, which then disables the
// renewal cron itself (its HAVING clause requires validity_period IS NOT NULL). See
// issue #44111.
stmt := fmt.Sprintf(`
INSERT INTO host_mdm_managed_certificates (
host_uuid,
Expand All @@ -2810,11 +2781,11 @@ func (ds *Datastore) BulkUpsertMDMManagedCertificates(ctx context.Context, paylo
VALUES %s
ON DUPLICATE KEY UPDATE
challenge_retrieved_at = VALUES(challenge_retrieved_at),
not_valid_before = COALESCE(VALUES(not_valid_before), not_valid_before),
not_valid_after = COALESCE(VALUES(not_valid_after), not_valid_after),
not_valid_before = VALUES(not_valid_before),
not_valid_after = VALUES(not_valid_after),
type = VALUES(type),
ca_name = VALUES(ca_name),
serial = COALESCE(VALUES(serial), serial)`,
serial = VALUES(serial)`,
strings.TrimSuffix(valuePart, ","),
)

Expand Down Expand Up @@ -2873,33 +2844,18 @@ func (ds *Datastore) RenewMDMManagedCertificates(ctx context.Context) error {
)
continue
}
// This will trigger a resend next time profiles are checked. Restrict to settled
// statuses ('verified', 'failed') to avoid re-firing renewal while a previous
// delivery is still in flight ('pending', 'verifying') — that would generate a
// fresh challenge and InstallProfile command every cron tick for any host that
// hasn't yet picked up the renewal (e.g. offline laptop), creating orphan nano
// commands and challenge rows hourly. See issue #44111.
//
// 'failed' rows are additionally gated on hp.updated_at being older than
// renewalFailedRetryBackoff. Without that gate, a permanent failure (CA deleted,
// IDP variables missing, license downgraded — anything that fails at profile-render
// time via fleet.MarkProfilesFailed) would loop indefinitely: cron flips status to
// NULL, reconcile re-renders and immediately fails again, status returns to 'failed',
// next cron tick repeats. Pre-fix this was masked by bug #1's metadata wipe acting
// as an accidental circuit breaker; once metadata is preserved (COALESCE), the loop
// becomes visible. The backoff gives transient SCEP failures a chance to recover
// without spamming a fresh challenge + nano command per cron tick.
updateQuery := `UPDATE ` + table + ` SET status = NULL WHERE status IN (?, ?) AND operation_type = ? AND (`
// This will trigger a resend next time profiles are checked
updateQuery := `UPDATE ` + table + ` SET status = NULL WHERE status IS NOT NULL AND operation_type = ? AND (`
hostProfileClause := ``
values := []any{fleet.MDMDeliveryVerified, fleet.MDMDeliveryFailed, fleet.MDMOperationTypeInstall}
values := []any{fleet.MDMOperationTypeInstall}
hostCertsToRenew := []struct {
HostUUID string `db:"host_uuid"`
ProfileUUID string `db:"profile_uuid"`
NotValidAfter time.Time `db:"not_valid_after"`
ValidityPeriod int `db:"validity_period"`
}{}
// Fetch all MDM Managed certificates of the given type that are in a settled
// status ('verified' or 'failed') and which
// Fetch all MDM Managed certificates of the given type that aren't already queued for
// resend(hmap.status=null) and which
// * Have a validity period > 30 days and are expiring in the next 30 days
// * Have a validity period <= 30 days and are within half the validity period of expiration
// nb: we SELECT not_valid_after and validity_period here so we can use them in the HAVING clause, but
Expand All @@ -2916,17 +2872,12 @@ func (ds *Datastore) RenewMDMManagedCertificates(ctx context.Context) error {
`+table+` hp
ON hmmc.host_uuid = hp.host_uuid AND hmmc.profile_uuid = hp.profile_uuid
WHERE
hmmc.type = ?
AND hp.operation_type = ?
AND (
hp.status = ?
OR (hp.status = ? AND hp.updated_at < DATE_SUB(NOW(), INTERVAL ? SECOND))
)
hmmc.type = ? AND hp.status IS NOT NULL AND hp.operation_type = ?
HAVING
validity_period IS NOT NULL AND
((validity_period > 30 AND not_valid_after < DATE_ADD(NOW(), INTERVAL 30 DAY)) OR
(validity_period <= 30 AND not_valid_after < DATE_ADD(NOW(), INTERVAL validity_period/2 DAY)))
LIMIT ?`, hostCertType, fleet.MDMOperationTypeInstall, fleet.MDMDeliveryVerified, fleet.MDMDeliveryFailed, int(renewalFailedRetryBackoff.Seconds()), limit)
LIMIT ?`, hostCertType, fleet.MDMOperationTypeInstall, limit)
if err != nil {
return ctxerr.Wrap(ctx, err, "retrieving mdm managed certificates to renew")
}
Expand Down
8 changes: 2 additions & 6 deletions server/fleet/mdm.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,8 @@ const (
FleetVarSmallstepSCEPProxyURLPrefix FleetVarName = "SMALLSTEP_SCEP_PROXY_URL_"
FleetVarSCEPWindowsCertificateID FleetVarName = "SCEP_WINDOWS_CERTIFICATE_ID" // nolint:gosec // G101: Potential hardcoded credentials

// OneTimeChallengeTTL is the time to live for one-time challenges. The challenge is
// generated at profile-render time but consumed when the device makes its SCEP request,
// which can be hours or days later if the device is offline (asleep, on a plane, etc.).
// 7 days covers a typical absence without being unbounded; once consumed, the challenge
// is deleted immediately regardless of TTL. See issue #44111.
OneTimeChallengeTTL = 7 * 24 * time.Hour
// OneTimeChallengeTTL is the time to live for one-time challenges.
OneTimeChallengeTTL = 1 * time.Hour
)

// HasCAVariables returns true if any of the given Fleet variable names
Expand Down
Loading
Loading