Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 27 additions & 3 deletions app/controlplane/cmd/main.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// Copyright 2024 The Chainloop Authors.
// Copyright 2024-2025 The Chainloop Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -18,6 +18,7 @@ package main
import (
"context"
"fmt"
"math/rand"
_ "net/http/pprof"
"os"
"time"
Expand Down Expand Up @@ -167,9 +168,28 @@ func main() {
}
}()

// Start the background CAS Backend checker
// Start the background CAS Backend checker for DEFAULT backends (every 30 minutes)
if app.casBackendChecker != nil {
go app.casBackendChecker.Start(ctx, &biz.CASBackendCheckerOpts{CheckInterval: 30 * time.Minute})
// Calculate initial delay: 1 minute base + 0-5 minutes jitter
// This protects boot phase and spreads validation across pods
baseDelay := 1 * time.Minute
// #nosec G404 - using math/rand for jitter is acceptable, cryptographic randomness not required
jitter := time.Duration(rand.Intn(5*60)) * time.Second
initialDelay := baseDelay + jitter

go app.casBackendChecker.Start(ctx, &biz.CASBackendCheckerOpts{
CheckInterval: 30 * time.Minute,
InitialDelay: initialDelay,
OnlyDefaults: toPtr(true),
})

// Start the background CAS Backend checker for ALL backends (every 24 hours)
// Start around 24h mark to avoid overlap with default checker
go app.casBackendChecker.Start(ctx, &biz.CASBackendCheckerOpts{
CheckInterval: 24 * time.Hour,
InitialDelay: 24 * time.Hour,
OnlyDefaults: toPtr(false),
})
}

// start and wait for stop signal
Expand All @@ -178,6 +198,10 @@ func main() {
}
}

func toPtr[T any](v T) *T {
return &v
}

type app struct {
*kratos.App
// Periodic job that expires unfinished attestation processes older than a given threshold
Expand Down
50 changes: 28 additions & 22 deletions app/controlplane/pkg/biz/casbackend_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,13 @@ type CASBackendChecker struct {

type CASBackendCheckerOpts struct {
// Whether to check only default backends or all backends
OnlyDefaults bool
OnlyDefaults *bool
// Interval between checks, defaults to 30 minutes
CheckInterval time.Duration
// Timeout for each individual backend validation, defaults to 10 seconds
ValidationTimeout time.Duration
// Initial delay before first validation (includes jitter). If not set, runs immediately.
InitialDelay time.Duration
}

// NewCASBackendChecker creates a new CAS backend checker that will periodically validate
Expand All @@ -65,44 +67,56 @@ func (c *CASBackendChecker) Start(ctx context.Context, opts *CASBackendCheckerOp
}

onlyDefaults := true
if opts != nil {
onlyDefaults = opts.OnlyDefaults
if opts != nil && opts.OnlyDefaults != nil {
onlyDefaults = *opts.OnlyDefaults
}

// Apply validation timeout from options if provided
if opts != nil && opts.ValidationTimeout > 0 {
c.validationTimeout = opts.ValidationTimeout
}

ticker := time.NewTicker(interval)
defer ticker.Stop()
// Apply initial delay from options if provided
var initialDelay = 0 * time.Second
if opts != nil && opts.InitialDelay > 0 {
initialDelay = opts.InitialDelay
}

c.logger.Infow("msg", "CAS backend checker configured", "initialDelay", initialDelay, "interval", interval, "allBackends", !onlyDefaults, "timeout", c.validationTimeout)

select {
case <-ctx.Done():
c.logger.Info("CAS backend checker stopping due to context cancellation before initial check")
return
case <-time.After(initialDelay):
// Continue to first check
}

// Run one check immediately
if err := c.CheckAllBackends(ctx, onlyDefaults); err != nil {
// Run first check
if err := c.checkBackends(ctx, onlyDefaults); err != nil {
c.logger.Errorf("initial CAS backend check failed: %v", err)
}

c.logger.Infof("CAS backend checker started with interval %s, checking %s, timeout %s",
interval,
conditionalString(onlyDefaults, "only default backends", "all backends"),
c.validationTimeout)
// Start periodic checks
ticker := time.NewTicker(interval)
defer ticker.Stop()

for {
select {
case <-ctx.Done():
c.logger.Info("CAS backend checker stopping due to context cancellation")
return
case <-ticker.C:
if err := c.CheckAllBackends(ctx, onlyDefaults); err != nil {
if err := c.checkBackends(ctx, onlyDefaults); err != nil {
c.logger.Errorf("periodic CAS backend check failed: %v", err)
}
}
}
}

// CheckAllBackends validates all CAS backends (or just default ones based on configuration)
// checkBackends validates all CAS backends (or just default ones based on configuration)
// using a worker pool for parallel processing with timeouts
func (c *CASBackendChecker) CheckAllBackends(ctx context.Context, onlyDefaults bool) error {
func (c *CASBackendChecker) checkBackends(ctx context.Context, onlyDefaults bool) error {
c.logger.Debug("starting CAS backend validation check")

backends, err := c.casBackendRepo.ListBackends(ctx, onlyDefaults)
Expand Down Expand Up @@ -138,11 +152,3 @@ func (c *CASBackendChecker) CheckAllBackends(ctx context.Context, onlyDefaults b
c.logger.Debug("all CAS backend validations completed")
return nil
}

// Helper function to return different strings based on a condition
func conditionalString(condition bool, trueStr, falseStr string) string {
if condition {
return trueStr
}
return falseStr
}
Loading