Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/35805-zero-host-counts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* Fixed dead rows accumulating in software host counts tables by using an atomic table swap instead of in-place updates during the sync process.
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package tables

import (
"database/sql"
)

func init() {
MigrationClient.AddMigration(Up_20260223000000, Down_20260223000000)
}

func Up_20260223000000(tx *sql.Tx) error {
// Delete any accumulated zero-count rows from software_host_counts and software_titles_host_counts.
// After this migration, the sync process uses an atomic swap table pattern that never produces zero-count rows.
// Add CHECK constraints to prevent zero-count rows from being inserted in the future.
// Constraints are unnamed because the sync process uses CREATE TABLE ... LIKE to create swap tables,
// which copies CHECK constraints with auto-generated names. Named constraints would drift after each swap.

return withSteps([]migrationStep{
basicMigrationStep(
`DELETE FROM software_host_counts WHERE hosts_count = 0`,
"deleting zero-count rows from software_host_counts",
),
basicMigrationStep(
`ALTER TABLE software_host_counts ADD CHECK (hosts_count > 0)`,
"adding CHECK constraint to software_host_counts",
),
basicMigrationStep(
`DELETE FROM software_titles_host_counts WHERE hosts_count = 0`,
"deleting zero-count rows from software_titles_host_counts",
),
basicMigrationStep(
`ALTER TABLE software_titles_host_counts ADD CHECK (hosts_count > 0)`,
"adding CHECK constraint to software_titles_host_counts",
),
}, tx)
}

func Down_20260223000000(tx *sql.Tx) error {
return nil
}
10 changes: 6 additions & 4 deletions server/datastore/mysql/schema.sql

Large diffs are not rendered by default.

97 changes: 43 additions & 54 deletions server/datastore/mysql/software.go
Original file line number Diff line number Diff line change
Expand Up @@ -1678,28 +1678,21 @@ func buildOptimizedListSoftwareSQL(opts fleet.SoftwareListOptions) (string, []in
// This allows MySQL to read from the index without accessing the table,
// which is critical for performance.
// IMPORTANT: Update this query if modifying idx_software_host_counts_team_global_hosts_desc index.
// PERFORMANCE ISSUE: The hosts_count > 0 filter causes ASC queries to read the entire index
// instead of stopping after LIMIT rows like DESC queries do. While both ASC and DESC
// use the index, DESC can stop early but ASC with a range condition must read all matching rows.
// RECOMMENDED SOLUTION: Eliminate zero-count rows from this table entirely.
// This would allow removing the hosts_count > 0 filter, making ASC queries as fast as DESC
// without requiring a second index. Related issue: https://github.com/fleetdm/fleet/issues/35805
innerSQL := `
SELECT
shc.software_id,
shc.hosts_count
FROM software_host_counts shc
WHERE shc.hosts_count > 0
`

// Apply team filtering with global_stats
switch {
case opts.TeamID == nil:
innerSQL += " AND shc.team_id = 0 AND shc.global_stats = 1"
innerSQL += " WHERE shc.team_id = 0 AND shc.global_stats = 1"
case *opts.TeamID == 0:
innerSQL += " AND shc.team_id = 0 AND shc.global_stats = 0"
innerSQL += " WHERE shc.team_id = 0 AND shc.global_stats = 0"
default:
innerSQL += " AND shc.team_id = ? AND shc.global_stats = 0"
innerSQL += " WHERE shc.team_id = ? AND shc.global_stats = 0"
args = append(args, *opts.TeamID)
}

Expand Down Expand Up @@ -2154,8 +2147,6 @@ func countSoftwareDB(
countSQL += ` INNER JOIN cve_meta c ON c.cve = scv.cve`
}

countSQL += ` WHERE shc.hosts_count > 0`

var args []interface{}
var whereClauses []string
// Apply team filtering with global_stats
Expand Down Expand Up @@ -2191,8 +2182,8 @@ func countSoftwareDB(
}

// Add all WHERE clauses
for _, clause := range whereClauses {
countSQL += " AND " + clause
if len(whereClauses) > 0 {
countSQL += " WHERE " + strings.Join(whereClauses, " AND ")
}

var count int
Expand Down Expand Up @@ -2545,7 +2536,7 @@ func (ds *Datastore) SoftwareByID(ctx context.Context, id uint, teamID *uint, in
// However, it is possible that the software was deleted from all hosts after the last host count update.
q = q.Where(
goqu.L(
"EXISTS (SELECT 1 FROM software_host_counts WHERE software_id = ? AND team_id = ? AND hosts_count > 0 AND global_stats = 0)", id, *teamID,
"EXISTS (SELECT 1 FROM software_host_counts WHERE software_id = ? AND team_id = ? AND global_stats = 0)", id, *teamID,
),
)
}
Expand Down Expand Up @@ -2620,9 +2611,8 @@ func (ds *Datastore) SoftwareByID(ctx context.Context, id uint, teamID *uint, in
// on removed hosts, software uninstalled on hosts, etc.)
func (ds *Datastore) SyncHostsSoftware(ctx context.Context, updatedAt time.Time) error {
const (
resetStmt = `
UPDATE software_host_counts
SET hosts_count = 0, updated_at = ?`
swapTable = "software_host_counts_swap"
swapTableCreate = "CREATE TABLE IF NOT EXISTS " + swapTable + " LIKE software_host_counts"

// team_id is added to the select list to have the same structure as
// the teamCountsStmt, making it easier to use a common implementation
Expand All @@ -2649,7 +2639,7 @@ func (ds *Datastore) SyncHostsSoftware(ctx context.Context, updatedAt time.Time)
GROUP BY hs.software_id`

insertStmt = `
INSERT INTO software_host_counts
INSERT INTO ` + swapTable + `
(software_id, hosts_count, team_id, global_stats, updated_at)
VALUES
%s
Expand All @@ -2668,33 +2658,19 @@ func (ds *Datastore) SyncHostsSoftware(ctx context.Context, updatedAt time.Time)
LEFT JOIN software_host_counts shc
ON s.id = shc.software_id
WHERE
(shc.software_id IS NULL OR
(shc.team_id = 0 AND shc.hosts_count = 0)) AND
NOT EXISTS (SELECT 1 FROM host_software hsw WHERE hsw.software_id = s.id)
shc.software_id IS NULL AND
NOT EXISTS (SELECT 1 FROM host_software hsw WHERE hsw.software_id = s.id)
`

cleanupOrphanedStmt = `
DELETE shc
FROM
software_host_counts shc
LEFT JOIN software s ON s.id = shc.software_id
WHERE
s.id IS NULL
`

cleanupTeamStmt = `
DELETE shc
FROM software_host_counts shc
LEFT JOIN teams t
ON t.id = shc.team_id
WHERE
shc.team_id > 0 AND
t.id IS NULL`
)

// first, reset all counts to 0
if _, err := ds.writer(ctx).ExecContext(ctx, resetStmt, updatedAt); err != nil {
return ctxerr.Wrap(ctx, err, "reset all software_host_counts to 0")
// Create a fresh swap table to populate with new counts. If a previous run left a partial swap table, drop it first.
w := ds.writer(ctx)
if _, err := w.ExecContext(ctx, "DROP TABLE IF EXISTS "+swapTable); err != nil {
return ctxerr.Wrap(ctx, err, "drop existing swap table")
}
// CREATE TABLE ... LIKE copies structure including CHECK constraints (with auto-generated names).
if _, err := w.ExecContext(ctx, swapTableCreate); err != nil {
return ctxerr.Wrap(ctx, err, "create swap table")
}

db := ds.reader(ctx)
Expand Down Expand Up @@ -2772,19 +2748,32 @@ func (ds *Datastore) SyncHostsSoftware(ctx context.Context, updatedAt time.Time)

}

// remove any unused software (global counts = 0)
if _, err := ds.writer(ctx).ExecContext(ctx, cleanupSoftwareStmt); err != nil {
return ctxerr.Wrap(ctx, err, "delete unused software")
}

// remove any software count row for software that don't exist anymore
if _, err := ds.writer(ctx).ExecContext(ctx, cleanupOrphanedStmt); err != nil {
return ctxerr.Wrap(ctx, err, "delete software_host_counts for non-existing software")
// Atomic table swap: rename the swap table to the real table, drop the old one.
// Also drop any leftover old table from a previous failed swap.
if err := ds.withRetryTxx(ctx, func(tx sqlx.ExtContext) error {
_, err := tx.ExecContext(ctx, "DROP TABLE IF EXISTS software_host_counts_old")
if err != nil {
return ctxerr.Wrap(ctx, err, "drop leftover old table")
}
_, err = tx.ExecContext(ctx, `
RENAME TABLE
software_host_counts TO software_host_counts_old,
`+swapTable+` TO software_host_counts`)
if err != nil {
return ctxerr.Wrap(ctx, err, "atomic table swap")
}
_, err = tx.ExecContext(ctx, "DROP TABLE IF EXISTS software_host_counts_old")
if err != nil {
return ctxerr.Wrap(ctx, err, "drop old table after swap")
}
return nil
}); err != nil {
return err
}

// remove any software count row for teams that don't exist anymore
if _, err := ds.writer(ctx).ExecContext(ctx, cleanupTeamStmt); err != nil {
return ctxerr.Wrap(ctx, err, "delete software_host_counts for non-existing teams")
// Remove any unused software (those not in host_software).
if _, err := ds.writer(ctx).ExecContext(ctx, cleanupSoftwareStmt); err != nil {
return ctxerr.Wrap(ctx, err, "delete unused software")
}
return nil
}
Expand Down
6 changes: 3 additions & 3 deletions server/datastore/mysql/software_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1300,7 +1300,7 @@ func testSoftwareSyncHostsSoftware(t *testing.T, ds *Datastore) {
cmpNameVersionCount(want, team1Counts)

// composite pk (software_id, team_id, global_stats), so we expect more rows
checkTableTotalCount(11)
checkTableTotalCount(10)

soft1ByID, err := ds.SoftwareByID(context.Background(), host1.HostSoftware.Software[0].ID, &team1.ID, false, nil)
require.NoError(t, err)
Expand Down Expand Up @@ -1346,7 +1346,7 @@ func testSoftwareSyncHostsSoftware(t *testing.T, ds *Datastore) {
}
cmpNameVersionCount(want, team2Counts)

checkTableTotalCount(9)
checkTableTotalCount(8)

// update host4 (team2), remove all software and delete team
software4 = []fleet.Software{}
Expand Down Expand Up @@ -1384,7 +1384,7 @@ func testSoftwareSyncHostsSoftware(t *testing.T, ds *Datastore) {
cmpNameVersionCount(want, team1Counts)

listSoftwareCheckCount(t, ds, 0, 0, team2Opts, false)
checkTableTotalCount(8)
checkTableTotalCount(7)
}

// softwareChecksumComputedColumn computes the checksum for a software entry
Expand Down
72 changes: 34 additions & 38 deletions server/datastore/mysql/software_titles.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,13 @@ SELECT
vap.icon_url AS icon_url
FROM software_titles st
%s
LEFT JOIN software_titles_host_counts sthc ON sthc.software_title_id = st.id AND sthc.hosts_count > 0 AND (%s)
LEFT JOIN software_titles_host_counts sthc ON sthc.software_title_id = st.id AND (%s)
LEFT JOIN software_installers si ON si.title_id = st.id AND si.is_active = TRUE AND %s
LEFT JOIN vpp_apps vap ON vap.title_id = st.id
LEFT JOIN vpp_apps_teams vat ON vat.adam_id = vap.adam_id AND vat.platform = vap.platform AND %s
LEFT JOIN in_house_apps iha ON iha.title_id = st.id AND %s
WHERE st.id = ? AND
(sthc.hosts_count > 0 OR vat.adam_id IS NOT NULL OR si.id IS NOT NULL OR iha.title_id IS NOT NULL)
(sthc.software_title_id IS NOT NULL OR vat.adam_id IS NOT NULL OR si.id IS NOT NULL OR iha.title_id IS NOT NULL)
GROUP BY
st.id,
st.name,
Expand Down Expand Up @@ -587,7 +587,7 @@ WHERE
{{with $defFilter := yesNo (hasTeamID .) "(si.id IS NOT NULL OR vat.adam_id IS NOT NULL OR iha.id IS NOT NULL)" "FALSE"}}
-- add software installed for hosts if we're not filtering for "available for install" only
{{if not $.AvailableForInstall}}
{{$defFilter = $defFilter | printf " ( %s OR sthc.hosts_count > 0 ) "}}
{{$defFilter = $defFilter | printf " ( %s OR sthc.software_title_id IS NOT NULL ) "}}
{{ end }}
{{if and $.SelfServiceOnly (hasTeamID $)}}
{{$defFilter = $defFilter | printf "%s AND ( si.self_service = 1 OR vat.self_service = 1 OR iha.self_service = 1 ) "}}
Expand Down Expand Up @@ -822,7 +822,6 @@ LEFT JOIN software_host_counts shc ON shc.software_id = s.id AND %s
LEFT JOIN software_cve scve ON shc.software_id = scve.software_id
WHERE s.title_id IN (?)
AND %s
AND shc.hosts_count > 0
GROUP BY s.id`

extraSelect := ""
Expand Down Expand Up @@ -854,9 +853,8 @@ GROUP BY s.id`
// table.
func (ds *Datastore) SyncHostsSoftwareTitles(ctx context.Context, updatedAt time.Time) error {
const (
resetStmt = `
UPDATE software_titles_host_counts
SET hosts_count = 0, updated_at = ?`
swapTable = "software_titles_host_counts_swap"
swapTableCreate = "CREATE TABLE IF NOT EXISTS " + swapTable + " LIKE software_titles_host_counts"

globalCountsStmt = `
SELECT
Expand Down Expand Up @@ -896,7 +894,7 @@ func (ds *Datastore) SyncHostsSoftwareTitles(ctx context.Context, updatedAt time
GROUP BY st.id`

insertStmt = `
INSERT INTO software_titles_host_counts
INSERT INTO ` + swapTable + `
(software_title_id, hosts_count, team_id, global_stats, updated_at)
VALUES
%s
Expand All @@ -905,30 +903,19 @@ func (ds *Datastore) SyncHostsSoftwareTitles(ctx context.Context, updatedAt time
updated_at = VALUES(updated_at)`

valuesPart = `(?, ?, ?, ?, ?),`

cleanupOrphanedStmt = `
DELETE sthc
FROM
software_titles_host_counts sthc
LEFT JOIN software_titles st ON st.id = sthc.software_title_id
WHERE
st.id IS NULL`

cleanupTeamStmt = `
DELETE sthc
FROM software_titles_host_counts sthc
LEFT JOIN teams t ON t.id = sthc.team_id
WHERE
sthc.team_id > 0 AND
t.id IS NULL`
)

// first, reset all counts to 0
if _, err := ds.writer(ctx).ExecContext(ctx, resetStmt, updatedAt); err != nil {
return ctxerr.Wrap(ctx, err, "reset all software_titles_host_counts to 0")
// Create a fresh swap table to populate with new counts. If a previous run left a partial swap table, drop it first.
w := ds.writer(ctx)
if _, err := w.ExecContext(ctx, "DROP TABLE IF EXISTS "+swapTable); err != nil {
return ctxerr.Wrap(ctx, err, "drop existing swap table")
}
// CREATE TABLE ... LIKE copies structure including CHECK constraints (with auto-generated names).
if _, err := w.ExecContext(ctx, swapTableCreate); err != nil {
return ctxerr.Wrap(ctx, err, "create swap table")
}

// next get a cursor for the global and team counts for each software
// Get a cursor for the global and team counts for each software title.
stmtLabel := []string{"global", "team", "no_team"}
for i, countStmt := range []string{globalCountsStmt, teamCountsStmt, noTeamCountsStmt} {
rows, err := ds.reader(ctx).QueryContext(ctx, countStmt)
Expand Down Expand Up @@ -980,16 +967,25 @@ func (ds *Datastore) SyncHostsSoftwareTitles(ctx context.Context, updatedAt time
rows.Close()
}

// remove any software count row for software that don't exist anymore
if _, err := ds.writer(ctx).ExecContext(ctx, cleanupOrphanedStmt); err != nil {
return ctxerr.Wrap(ctx, err, "delete software_titles_host_counts for non-existing software")
}

// remove any software count row for teams that don't exist anymore
if _, err := ds.writer(ctx).ExecContext(ctx, cleanupTeamStmt); err != nil {
return ctxerr.Wrap(ctx, err, "delete software_titles_host_counts for non-existing teams")
}
return nil
// Atomic table swap: rename the swap table to the real table, drop the old one.
return ds.withRetryTxx(ctx, func(tx sqlx.ExtContext) error {
_, err := tx.ExecContext(ctx, "DROP TABLE IF EXISTS software_titles_host_counts_old")
if err != nil {
return ctxerr.Wrap(ctx, err, "drop leftover old table")
}
_, err = tx.ExecContext(ctx, `
RENAME TABLE
software_titles_host_counts TO software_titles_host_counts_old,
`+swapTable+` TO software_titles_host_counts`)
if err != nil {
return ctxerr.Wrap(ctx, err, "atomic table swap")
}
_, err = tx.ExecContext(ctx, "DROP TABLE IF EXISTS software_titles_host_counts_old")
if err != nil {
return ctxerr.Wrap(ctx, err, "drop old table after swap")
}
return nil
})
}

func (ds *Datastore) UpdateSoftwareTitleAutoUpdateConfig(ctx context.Context, titleID uint, teamID uint, config fleet.SoftwareAutoUpdateConfig) error {
Expand Down
Binary file not shown.
Loading