Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(dfdaemon): add disk usage metrics #2912

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion client/daemon/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ var (
Namespace: types.MetricsNamespace,
Subsystem: types.DfdaemonMetricsName,
Name: "piece_task_total",
Help: "Counter of the total failed piece tasks.",
Help: "Counter of the total piece tasks.",
})

PieceTaskFailedCount = promauto.NewCounter(prometheus.CounterOpts{
Expand Down Expand Up @@ -171,6 +171,41 @@ var (
Name: "version",
Help: "Version info of the service.",
}, []string{"major", "minor", "git_version", "git_commit", "platform", "build_time", "go_version", "go_tags", "go_gcflags"})

DataDiskUsage = promauto.NewGauge(prometheus.GaugeOpts{
Namespace: types.MetricsNamespace,
Subsystem: types.DfdaemonMetricsName,
Name: "data_disk_usage_total",
Help: "Gauger of the disk usage of data directory.",
})

DataDiskCapacity = promauto.NewGauge(prometheus.GaugeOpts{
Namespace: types.MetricsNamespace,
Subsystem: types.DfdaemonMetricsName,
Name: "data_disk_capacity_total",
Help: "Gauger of disk capacity of data directory.",
})

DataUnReclaimedUsage = promauto.NewGauge(prometheus.GaugeOpts{
Namespace: types.MetricsNamespace,
Subsystem: types.DfdaemonMetricsName,
Name: "data_unreclaimed_usage_total",
Help: "Gauger of unreclaimed data usage of data directory.",
})

DataDiskGCThreshold = promauto.NewGauge(prometheus.GaugeOpts{
Namespace: types.MetricsNamespace,
Subsystem: types.DfdaemonMetricsName,
Name: "data_disk_gc_threshold_total",
Help: "Gauger of disk gc threshold of data directory.",
})

DataDiskGCThresholdPercent = promauto.NewGauge(prometheus.GaugeOpts{
Namespace: types.MetricsNamespace,
Subsystem: types.DfdaemonMetricsName,
Name: "data_disk_gc_threshold_percent",
Help: "Gauger of disk gc threshold percent of data directory.",
})
)

func New(addr string) *http.Server {
Expand Down
30 changes: 24 additions & 6 deletions client/daemon/storage/storage_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import (
"context"
"d7y.io/dragonfly/v2/client/daemon/metrics"

Check failure on line 23 in client/daemon/storage/storage_manager.go

View workflow job for this annotation

GitHub Actions / Lint

File is not `gci`-ed with --skip-generated -s standard,default,prefix(d7y.io/api),prefix(d7y.io/dragonfly/v2) (gci)
"encoding/base64"
"encoding/json"
"errors"
Expand All @@ -43,7 +44,7 @@
commonv1 "d7y.io/api/v2/pkg/apis/common/v1"

"d7y.io/dragonfly/v2/client/config"
"d7y.io/dragonfly/v2/client/daemon/gc"

Check failure on line 47 in client/daemon/storage/storage_manager.go

View workflow job for this annotation

GitHub Actions / Lint

File is not `gci`-ed with --skip-generated -s standard,default,prefix(d7y.io/api),prefix(d7y.io/dragonfly/v2) (gci)
"d7y.io/dragonfly/v2/client/util"
logger "d7y.io/dragonfly/v2/internal/dflog"
nethttp "d7y.io/dragonfly/v2/pkg/net/http"
Expand Down Expand Up @@ -806,10 +807,20 @@
}
return true
})

quotaBytesExceed := totalNotMarkedSize - int64(s.storeOption.DiskGCThreshold)
quotaExceed := s.storeOption.DiskGCThreshold > 0 && quotaBytesExceed > 0
usageExceed, usageBytesExceed := s.diskUsageExceed()

metrics.DataUnReclaimedUsage.Set(float64(totalNotMarkedSize))
metrics.DataDiskGCThreshold.Set(float64(s.storeOption.DiskGCThreshold))
metrics.DataDiskGCThresholdPercent.Set(s.storeOption.DiskGCThresholdPercent)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The s.storeOption.DiskGCThresholdPercent and s.storeOption.DiskGCThreshold is static, we should not update them every time.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, this is used to record the threshold, so that we can display on the monitor, or there is better way to achieve that?

usage := s.diskUsage()
if usage != nil {
metrics.DataDiskUsage.Set(float64(usage.Used))
metrics.DataDiskCapacity.Set(float64(usage.Total))
}

usageExceed, usageBytesExceed := s.diskUsageExceed(usage)

if quotaExceed || usageExceed {
var bytesExceed int64
Expand Down Expand Up @@ -941,13 +952,20 @@
return true, nil
}

func (s *storageManager) diskUsageExceed() (exceed bool, bytes int64) {
if s.storeOption.DiskGCThresholdPercent <= 0 {
return false, 0
}
func (s *storageManager) diskUsage() *disk.UsageStat {
usage, err := disk.Usage(s.storeOption.DataPath)
if err != nil {
logger.Warnf("get %s disk usage error: %s", s.storeOption.DataPath, err)
return nil
}
return usage
}

func (s *storageManager) diskUsageExceed(usage *disk.UsageStat) (exceed bool, bytes int64) {
if s.storeOption.DiskGCThresholdPercent <= 0 {
return false, 0
}
if usage == nil {
return false, 0
}
logger.Debugf("disk usage: %+v", usage)
Expand Down
Loading