Skip to content

Commit

Permalink
MB-49381 support for sigar cgroup in projector process
Browse files Browse the repository at this point in the history
With this change, the total memory and current memory used by
the container are reported from container's cgroup info rather
than reading system limits. Similarly, projector's maxCpuPercent
is adjusted based on the available cores to the container rather
than the available cores in the system.

Change-Id: I27682d3ef2e4127f4476ea1713a5c567187b4e03
  • Loading branch information
varunv-cb committed Jan 7, 2022
1 parent 7629b83 commit e8ecd95
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 23 deletions.
2 changes: 2 additions & 0 deletions secondary/common/settings.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import (
)

const MAX_METAKV_RETRIES = 100
const SIGAR_INIT_RETRIES = 100
const SIGAR_CGROUP_SUPPORTED = 1

var maxMetaKVRetries = int32(MAX_METAKV_RETRIES)

Expand Down
37 changes: 25 additions & 12 deletions secondary/projector/memmanager/collect_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"sync/atomic"
"time"

"github.com/couchbase/indexing/secondary/common"
"github.com/couchbase/indexing/secondary/logging"
)

Expand Down Expand Up @@ -39,19 +40,31 @@ func (mgr *MemManager) runStatsCollection() {
}
mgr.updateRSS(rss)

total, err := mgr.stats.TotalMem()
if err != nil {
logging.Debugf("Fail to get total memory. Err=%v", err)
continue
}
mgr.updateMemTotal(total)

free, err := mgr.stats.ActualFreeMem()
if err != nil {
logging.Debugf("Fail to get free memory. Err=%v", err)
continue
cgroupInfo := mgr.stats.GetControlGroupInfo()

var total, free uint64
if cgroupInfo.Supported == common.SIGAR_CGROUP_SUPPORTED {
total = cgroupInfo.MemoryMax
mgr.updateMemTotal(total)

used := cgroupInfo.MemoryCurrent
free = total - used
mgr.updateMemFree(free)
} else {
total, err = mgr.stats.TotalMem()
if err != nil {
logging.Debugf("Fail to get total memory. Err=%v", err)
continue
}
mgr.updateMemTotal(total)

free, err = mgr.stats.ActualFreeMem()
if err != nil {
logging.Debugf("Fail to get free memory. Err=%v", err)
continue
}
mgr.updateMemFree(free)
}
mgr.updateMemFree(free)

count++
if count > 10 {
Expand Down
12 changes: 3 additions & 9 deletions secondary/projector/memmanager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,19 +60,12 @@ type MemManager struct {
stats *system.SystemStats
}

func Init(statsCollectionInterval int64) error {
func Init(statsCollectionInterval int64, stats *system.SystemStats) error {
memMgr = &MemManager{
ms: &runtime.MemStats{},
}
memMgr.recentSamples = common.NewSample(4)
memMgr.olderSamples = common.NewSample(12)

// open sigar for stats
stats, err := system.NewSystemStats()
if err != nil {
logging.Errorf("Fail to start system stat collector. Err=%v", err)
return err
}
memMgr.stats = stats

// skip the first one
Expand Down Expand Up @@ -132,7 +125,8 @@ func (memMgr *MemManager) monitorMemUsage() {
currRSS, currFreeMem = rssBef, freeMemBef
}

throttleLevel := computeThrottleLevel(currRSS, currFreeMem, memMgr.memTotal)
memTotal := GetMemTotal()
throttleLevel := computeThrottleLevel(currRSS, currFreeMem, memTotal)
if throttleLevel > memThrottler.THROTTLE_LEVEL_10 {
throttleLevel = memThrottler.THROTTLE_LEVEL_10
}
Expand Down
43 changes: 41 additions & 2 deletions secondary/projector/projector.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"expvar"
"fmt"
"io"
"math"
"net/http"
"os"
"path/filepath"
Expand All @@ -28,6 +29,7 @@ import (
"github.com/couchbase/indexing/secondary/projector/memmanager"
protobuf "github.com/couchbase/indexing/secondary/protobuf/projector"
"github.com/couchbase/indexing/secondary/security"
"github.com/couchbase/indexing/secondary/system"
"github.com/golang/protobuf/proto"
)

Expand Down Expand Up @@ -87,8 +89,13 @@ func NewProjector(maxvbs int, config c.Config, certFile, keyFile, caFile string)
statsStopCh: make(chan bool, 1),
}

sysStats, err := initSystemStatsHandler()
c.CrashOnError(err)

updateMaxCpuPercent(sysStats, config)

// Setup dynamic configuration propagation
config, err := c.GetSettingsConfig(config)
config, err = c.GetSettingsConfig(config)
c.CrashOnError(err)

pconfig := config.SectionConfig("projector.", true /*trim*/)
Expand Down Expand Up @@ -116,7 +123,7 @@ func NewProjector(maxvbs int, config c.Config, certFile, keyFile, caFile string)
p.cinfoProvider = cip

systemStatsCollectionInterval := int64(config["projector.systemStatsCollectionInterval"].Int())
memmanager.Init(systemStatsCollectionInterval) // Initialize memory manager
memmanager.Init(systemStatsCollectionInterval, sysStats) // Initialize memory manager

p.stats = NewProjectorStats()
p.statsMgr = NewStatsManager(p.statsCmdCh, p.statsStopCh, config)
Expand Down Expand Up @@ -1149,3 +1156,35 @@ func (p *Projector) getNodeUUID() (string, error) {
func GetNodeUUID() string {
return nodeUUID
}

func updateMaxCpuPercent(stats *system.SystemStats, config c.Config) {
cgroupInfo := stats.GetControlGroupInfo()

if cgroupInfo.Supported == common.SIGAR_CGROUP_SUPPORTED {
maxCpu := cgroupInfo.NumCpuPrc
cpuPercent := int(math.Max(400.0, float64(maxCpu)*0.25))
config.SetValue("projector.maxCpuPercent", cpuPercent)
logging.Infof("Projector::updateMaxCpuPercent: Updating projector max cpu percent to: %v "+
"as cores availble for this container are: %v", cpuPercent, maxCpu)
} else {
logging.Infof("Projector::updateMaxCpuPercent: Sigar CGroupInfo not supported")
}
}

func initSystemStatsHandler() (*system.SystemStats, error) {
var stats *system.SystemStats
var err error
fn := func(r int, err error) error {
// open sigar for stats
stats, err = system.NewSystemStats()
if err != nil {
logging.Errorf("initSystemStatsHandler: Fail to start system stat collector. Err=%v", err)
return err
}
return nil
}

rh := common.NewRetryHelper(int(common.SIGAR_INIT_RETRIES), time.Second*3, 1, fn)
err = rh.Run()
return stats, err
}
29 changes: 29 additions & 0 deletions secondary/system/systemStats.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package system

//#cgo LDFLAGS: -lsigar
//#include <sigar.h>
//#include <sigar_control_group.h>
import "C"

import (
Expand Down Expand Up @@ -143,3 +144,31 @@ func (h *SystemStats) SigarCpuGet() (*SigarCpuT, error) {
Total: uint64(cpu.total),
}, nil
}

type SigarControlGroupInfo struct {
Supported uint8 // "1" if cgroup info is supprted, "0" otherwise
Version uint8 // "1" for cgroup v1, "2" for cgroup v2

// The number of CPUs available in the cgroup (in % where 100% represents 1 full core)
// Derived from (cpu.cfs_quota_us/cpu.cfs_period_us) or COUCHBASE_CPU_COUNT env variable
NumCpuPrc uint16

// Maximum memory available in the group. Derived from memory.max
MemoryMax uint64

// Current memory usage by this cgroup. Derived from memory.usage_in_bytes
MemoryCurrent uint64
}

func (h *SystemStats) GetControlGroupInfo() *SigarControlGroupInfo {
var info C.sigar_control_group_info_t
C.sigar_get_control_group_info(&info)

return &SigarControlGroupInfo{
Supported: uint8(info.supported),
Version: uint8(info.version),
NumCpuPrc: uint16(info.num_cpu_prc),
MemoryMax: uint64(info.memory_max),
MemoryCurrent: uint64(info.memory_current),
}
}

0 comments on commit e8ecd95

Please sign in to comment.