Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -1138,14 +1138,17 @@ func getCPUTargeterConfig(v *viper.Viper) (tracker.TargeterConfig, error) {
}
}

func getDiskSpaceConfig(v *viper.Viper) (requiredAvailableDiskSpace uint64, warningThresholdAvailableDiskSpace uint64, err error) {
func getDiskSpaceConfig(v *viper.Viper) (requiredAvailableDiskSpace uint64, warningThresholdAvailableDiskSpace uint64, warningThresholdAvailableDiskSpacePercentage uint64, err error) {
requiredAvailableDiskSpace = v.GetUint64(SystemTrackerRequiredAvailableDiskSpaceKey)
warningThresholdAvailableDiskSpace = v.GetUint64(SystemTrackerWarningThresholdAvailableDiskSpaceKey)
warningThresholdAvailableDiskSpacePercentage = v.GetUint64(SystemTrackerWarnThreshAvailDiskSpacePercentageKey)
switch {
case warningThresholdAvailableDiskSpacePercentage > 50:
return 0, 0, 0, fmt.Errorf("%q (%d) must be in [0, 50]", SystemTrackerWarnThreshAvailDiskSpacePercentageKey, warningThresholdAvailableDiskSpacePercentage)
case warningThresholdAvailableDiskSpace < requiredAvailableDiskSpace:
return 0, 0, fmt.Errorf("%q (%d) < %q (%d)", SystemTrackerWarningThresholdAvailableDiskSpaceKey, warningThresholdAvailableDiskSpace, SystemTrackerRequiredAvailableDiskSpaceKey, requiredAvailableDiskSpace)
return 0, 0, 0, fmt.Errorf("%q (%d) < %q (%d)", SystemTrackerWarningThresholdAvailableDiskSpaceKey, warningThresholdAvailableDiskSpace, SystemTrackerRequiredAvailableDiskSpaceKey, requiredAvailableDiskSpace)
default:
return requiredAvailableDiskSpace, warningThresholdAvailableDiskSpace, nil
return requiredAvailableDiskSpace, warningThresholdAvailableDiskSpace, warningThresholdAvailableDiskSpacePercentage, nil
}
}

Expand Down Expand Up @@ -1400,7 +1403,7 @@ func GetNodeConfig(v *viper.Viper) (node.Config, error) {
nodeConfig.SystemTrackerCPUHalflife = v.GetDuration(SystemTrackerCPUHalflifeKey)
nodeConfig.SystemTrackerDiskHalflife = v.GetDuration(SystemTrackerDiskHalflifeKey)

nodeConfig.RequiredAvailableDiskSpace, nodeConfig.WarningThresholdAvailableDiskSpace, err = getDiskSpaceConfig(v)
nodeConfig.RequiredAvailableDiskSpace, nodeConfig.WarningThresholdAvailableDiskSpace, nodeConfig.WarningThresholdAvailableDiskSpacePercentage, err = getDiskSpaceConfig(v)
if err != nil {
return node.Config{}, err
}
Expand Down
1 change: 1 addition & 0 deletions config/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@ func addNodeFlags(fs *pflag.FlagSet) {
fs.Duration(SystemTrackerDiskHalflifeKey, time.Minute, "Halflife to use for the disk tracker. Larger halflife --> disk usage metrics change more slowly")
fs.Uint64(SystemTrackerRequiredAvailableDiskSpaceKey, 10*units.GiB, "Minimum number of available bytes on disk, under which the node will shutdown.")
fs.Uint64(SystemTrackerWarningThresholdAvailableDiskSpaceKey, 200*units.GiB, fmt.Sprintf("Warning threshold for the number of available bytes on disk, under which the node will be considered unhealthy. Must be >= [%s]", SystemTrackerRequiredAvailableDiskSpaceKey))
fs.Uint64(SystemTrackerWarnThreshAvailDiskSpacePercentageKey, 3, "Warning threshold for the percentage (between 0 and 50) of available disk space, under which the node will be considered unhealthy.")

// CPU management
fs.Float64(CPUVdrAllocKey, float64(runtime.NumCPU()), "Maximum number of CPUs to allocate for use by validators. Value should be in range [0, total core count]")
Expand Down
1 change: 1 addition & 0 deletions config/keys.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ const (
SystemTrackerDiskHalflifeKey = "system-tracker-disk-halflife"
SystemTrackerRequiredAvailableDiskSpaceKey = "system-tracker-disk-required-available-space"
SystemTrackerWarningThresholdAvailableDiskSpaceKey = "system-tracker-disk-warning-threshold-available-space"
SystemTrackerWarnThreshAvailDiskSpacePercentageKey = "system-tracker-disk-warning-threshold-available-space-percentage"
DiskVdrAllocKey = "throttler-inbound-disk-validator-alloc"
DiskMaxNonVdrUsageKey = "throttler-inbound-disk-max-non-validator-usage"
DiskMaxNonVdrNodeUsageKey = "throttler-inbound-disk-max-non-validator-node-usage"
Expand Down
5 changes: 3 additions & 2 deletions config/node/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,9 @@ type Config struct {

DiskTargeterConfig tracker.TargeterConfig `json:"diskTargeterConfig"`

RequiredAvailableDiskSpace uint64 `json:"requiredAvailableDiskSpace"`
WarningThresholdAvailableDiskSpace uint64 `json:"warningThresholdAvailableDiskSpace"`
RequiredAvailableDiskSpace uint64 `json:"requiredAvailableDiskSpace"`
WarningThresholdAvailableDiskSpace uint64 `json:"warningThresholdAvailableDiskSpace"`
WarningThresholdAvailableDiskSpacePercentage uint64 `json:"warningThresholdAvailableDiskSpacePercentage"`

TraceConfig trace.Config `json:"traceConfig"`

Expand Down
19 changes: 15 additions & 4 deletions node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -1457,20 +1457,31 @@ func (n *Node) initHealthAPI() error {
// if there is too little disk space remaining, first report unhealthy and then shutdown the node

availableDiskBytes := n.resourceTracker.DiskTracker().AvailableDiskBytes()
availableDiskPercentage := n.resourceTracker.DiskTracker().AvailableDiskPercentage()

var err error
var diskSpaceErrors []error
if availableDiskBytes < n.Config.RequiredAvailableDiskSpace {
n.Log.Fatal("low on disk space. Shutting down...",
zap.Uint64("remainingDiskBytes", availableDiskBytes),
)
go n.Shutdown(1)
err = fmt.Errorf("remaining available disk space (%d) is below minimum required available space (%d)", availableDiskBytes, n.Config.RequiredAvailableDiskSpace)
err := fmt.Errorf("remaining available disk space (%d) is below minimum required available space (%d)", availableDiskBytes, n.Config.RequiredAvailableDiskSpace)
diskSpaceErrors = append(diskSpaceErrors, err)
} else if availableDiskBytes < n.Config.WarningThresholdAvailableDiskSpace {
err = fmt.Errorf("remaining available disk space (%d) is below the warning threshold of disk space (%d)", availableDiskBytes, n.Config.WarningThresholdAvailableDiskSpace)
err := fmt.Errorf("remaining available disk space (%d) is below the warning threshold of disk space (%d)", availableDiskBytes, n.Config.WarningThresholdAvailableDiskSpace)
diskSpaceErrors = append(diskSpaceErrors, err)
}

if availableDiskPercentage < n.Config.WarningThresholdAvailableDiskSpacePercentage {
err := fmt.Errorf("remaining available disk space percentage (%d%%) is below minimum required available space percentage (%d%%)", availableDiskPercentage, n.Config.WarningThresholdAvailableDiskSpacePercentage)
diskSpaceErrors = append(diskSpaceErrors, err)
}

err = errors.Join(diskSpaceErrors...)

return map[string]interface{}{
"availableDiskBytes": availableDiskBytes,
"availableDiskBytes": availableDiskBytes,
"availableDiskPercentage": availableDiskPercentage,
}, err
})

Expand Down
27 changes: 22 additions & 5 deletions snow/networking/tracker/resource_tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ type Tracker interface {
type DiskTracker interface {
Tracker
AvailableDiskBytes() uint64
AvailableDiskPercentage() uint64
}

// ResourceTracker is an interface for tracking peers' usage of resources
Expand Down Expand Up @@ -150,6 +151,16 @@ func (t *diskResourceTracker) AvailableDiskBytes() uint64 {
return bytesAvailable
}

func (t *diskResourceTracker) AvailableDiskPercentage() uint64 {
rt := t.t
rt.lock.Lock()
defer rt.lock.Unlock()

percentageAvailable := rt.resources.AvailableDiskPercentage()
rt.metrics.diskPercentageAvailable.Set(float64(percentageAvailable))
return percentageAvailable
}

func (t *diskResourceTracker) TotalUsage() float64 {
realReadUsage, _ := t.t.resources.DiskUsage()
return realReadUsage
Expand Down Expand Up @@ -286,11 +297,12 @@ func (rt *resourceTracker) prune(now time.Time) {
}

type trackerMetrics struct {
processingTimeMetric prometheus.Gauge
cpuMetric prometheus.Gauge
diskReadsMetric prometheus.Gauge
diskWritesMetric prometheus.Gauge
diskSpaceAvailable prometheus.Gauge
processingTimeMetric prometheus.Gauge
cpuMetric prometheus.Gauge
diskReadsMetric prometheus.Gauge
diskWritesMetric prometheus.Gauge
diskSpaceAvailable prometheus.Gauge
diskPercentageAvailable prometheus.Gauge
}

func newCPUTrackerMetrics(reg prometheus.Registerer) (*trackerMetrics, error) {
Expand All @@ -315,13 +327,18 @@ func newCPUTrackerMetrics(reg prometheus.Registerer) (*trackerMetrics, error) {
Name: "disk_available_space",
Help: "Available space remaining (bytes) on the database volume",
}),
diskPercentageAvailable: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "disk_available_percentage",
Help: "Percentage of database volume available",
}),
}
err := errors.Join(
reg.Register(m.processingTimeMetric),
reg.Register(m.cpuMetric),
reg.Register(m.diskReadsMetric),
reg.Register(m.diskWritesMetric),
reg.Register(m.diskSpaceAvailable),
reg.Register(m.diskPercentageAvailable),
)
return m, err
}
4 changes: 4 additions & 0 deletions utils/resource/no_usage.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ var NoUsage User = noUsage{}

type noUsage struct{}

func (noUsage) AvailableDiskPercentage() uint64 {
return 100
}

func (noUsage) CPUUsage() float64 {
return 0
}
Expand Down
14 changes: 14 additions & 0 deletions utils/resource/resourcemock/user.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 14 additions & 1 deletion utils/resource/usage.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ type DiskUser interface {

// returns number of bytes available in the db volume
AvailableDiskBytes() uint64

// returns percentage free in the db volume
AvailableDiskPercentage() uint64
}

type User interface {
Expand Down Expand Up @@ -82,6 +85,8 @@ type manager struct {

availableDiskBytes uint64

availableDiskPercent uint64

closeOnce sync.Once
onClose chan struct{}
}
Expand Down Expand Up @@ -132,6 +137,13 @@ func (m *manager) AvailableDiskBytes() uint64 {
return m.availableDiskBytes
}

func (m *manager) AvailableDiskPercentage() uint64 {
m.usageLock.RLock()
defer m.usageLock.RUnlock()

return m.availableDiskPercent
}

func (m *manager) TrackProcess(pid int) {
p, err := process.NewProcess(int32(pid))
if err != nil {
Expand Down Expand Up @@ -174,7 +186,7 @@ func (m *manager) update(diskPath string, frequency, cpuHalflife, diskHalflife t
currentScaledReadUsage := newDiskWeight * currentReadUsage
currentScaledWriteUsage := newDiskWeight * currentWriteUsage

availableBytes, getBytesErr := storage.AvailableBytes(diskPath)
availableBytes, availablePercentage, getBytesErr := storage.AvailableBytes(diskPath)
if getBytesErr != nil {
m.log.Verbo("failed to lookup resource",
zap.String("resource", "system disk"),
Expand All @@ -190,6 +202,7 @@ func (m *manager) update(diskPath string, frequency, cpuHalflife, diskHalflife t

if getBytesErr == nil {
m.availableDiskBytes = availableBytes
m.availableDiskPercent = availablePercentage
}

m.usageLock.Unlock()
Expand Down
19 changes: 14 additions & 5 deletions utils/storage/storage_openbsd.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,23 @@

package storage

import "syscall"
import (
"errors"
"syscall"
)

func AvailableBytes(storagePath string) (uint64, error) {
var errZeroAvailableBytes = errors.New("available blocks is reported as 0")

func AvailableBytes(storagePath string) (uint64, uint64, error) {
var stat syscall.Statfs_t
err := syscall.Statfs(storagePath, &stat)
if err != nil {
return 0, err
return 0, 0, err
}
if stat.F_blocks == 0 {
return 0, 0, errZeroAvailableBytes
}
avail := uint64(stat.F_bavail) * uint64(stat.F_bsize)
return avail, nil
avail := stat.F_bavail * uint64(stat.F_bsize)
percentage := stat.F_bavail * 100 / stat.F_blocks
return avail, percentage, nil
}
17 changes: 13 additions & 4 deletions utils/storage/storage_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,23 @@

package storage

import "syscall"
import (
"errors"
"syscall"
)

func AvailableBytes(storagePath string) (uint64, error) {
var errZeroAvailableBytes = errors.New("available blocks is reported as 0")

func AvailableBytes(storagePath string) (uint64, uint64, error) {
var stat syscall.Statfs_t
err := syscall.Statfs(storagePath, &stat)
if err != nil {
return 0, err
return 0, 0, err
}
if stat.Blocks == 0 {
return 0, 0, errZeroAvailableBytes
}
avail := stat.Bavail * uint64(stat.Bsize)
return avail, nil
percentage := stat.Bavail * 100 / stat.Blocks
return avail, percentage, nil
}
Loading