Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

balloons: fix empty cpuset of a balloon that has containers #907

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -214,8 +214,14 @@ func (p *balloons) AllocateResources(c cache.Container) error {
// Resize selected balloon to fit the new container, unless it
// uses the ReservedResources CPUs, which is a fixed set.
reqMilliCpus := p.containerRequestedMilliCpus(c.GetCacheID()) + p.requestedMilliCpus(bln)
if bln.AvailMilliCpus() < reqMilliCpus {
p.resizeBalloon(bln, reqMilliCpus)
// Even if all containers in a balloon request is 0 mCPU in
// total (all are BestEffort, for example), force the size of
// the balloon to be enough for at least 1 mCPU
// request. Otherwise balloon's cpuset becomes empty, which in
// would mean no CPU pinning and balloon's containers would
// run on any CPUs.
if bln.AvailMilliCpus() < max(1, reqMilliCpus) {
p.resizeBalloon(bln, max(1, reqMilliCpus))
}
p.assignContainer(c, bln)
if log.DebugEnabled() {
Expand All @@ -232,12 +238,16 @@ func (p *balloons) ReleaseResources(c cache.Container) error {
if log.DebugEnabled() {
log.Debug(p.dumpBalloon(bln))
}
// Deflate the balloon, even down to 0 mCPUs before
// possibly freeing it.
p.resizeBalloon(bln, p.requestedMilliCpus(bln))
if bln.ContainerCount() == 0 {
// Deflate the balloon completely before
// freeing it.
p.resizeBalloon(bln, 0)
log.Debug("all containers removed, free balloon allocation %s", bln.PrettyName())
p.freeBalloon(bln)
} else {
// Make sure that the balloon will have at
// least 1 CPU to run remaining containers.
p.resizeBalloon(bln, max(1, p.requestedMilliCpus(bln)))
}
} else {
log.Debug("ReleaseResources: balloon-less container %s, nothing to release", c.PrettyName())
Expand Down Expand Up @@ -1113,6 +1123,13 @@ func removeString(strings []string, element string) []string {
return strings
}

func max(a, b int) int {
if a > b {
return a
}
return b
}

// Register us as a policy implementation.
func init() {
policy.Register(PolicyName, PolicyDescription, CreateBalloonsPolicy)
Expand Down
@@ -0,0 +1,21 @@
policy:
Active: balloons
AvailableResources:
CPU: cpuset:0-15
# Reserve one of our CPUs (cpu15) for kube-system tasks.
ReservedResources:
CPU: 1
balloons:
PinCPU: true
PinMemory: true
BalloonTypes:
- Name: fit-in-numa
# All (non-system) containers are assigned to this balloon
# type
Namespaces:
- "*"
# Prevent a balloon to be inflated larger than a NUMA node
MinCPUs: 0
MaxCPUs: 4
AllocationPriority: 0
PreferNewBalloons: false
@@ -0,0 +1,99 @@
terminate cri-resmgr
cri_resmgr_cfg=${TEST_DIR}/balloons-numa.cfg launch cri-resmgr

# pod0: besteffort, make sure it still gets at least 1 CPU
CPUREQ="" CPULIM="" MEMREQ="" MEMLIM=""
CONTCOUNT=1 create balloons-busybox
report allowed
verify 'len(cpus["pod0c0"]) == 1'

# pod1: guaranteed, make sure it gets the CPU it requested.
# The configuration does not prefer creating new balloons,
# so pod0 and pod1 should be placed in the same balloon.
# Sum of their CPU requests is 1, so they should actually
# run on the same CPU.
CPUREQ="1" CPULIM="1" MEMREQ="50M" MEMLIM="50M"
CONTCOUNT=1 create balloons-busybox
report allowed
verify 'len(cpus["pod0c0"]) == 1' \
'len(cpus["pod1c0"]) == 1' \
'cpus["pod0c0"] == cpus["pod1c0"]'

# pod2: guaranteed, make sure it gets the CPU it requested.
CPUREQ="1" CPULIM="1" MEMREQ="50M" MEMLIM="50M"
CONTCOUNT=1 create balloons-busybox
report allowed
verify 'len(cpus["pod0c0"]) == 2' \
'len(cpus["pod1c0"]) == 2' \
'len(cpus["pod2c0"]) == 2' \
'cpus["pod0c0"] == cpus["pod1c0"] == cpus["pod2c0"]'

# pod3: guaranteed, make sure it gets the CPU it requested.
CPUREQ="1" CPULIM="1" MEMREQ="50M" MEMLIM="50M"
CONTCOUNT=1 create balloons-busybox
report allowed
verify 'len(cpus["pod0c0"]) == 3' \
'len(cpus["pod1c0"]) == 3' \
'len(cpus["pod2c0"]) == 3' \
'len(cpus["pod3c0"]) == 3' \
'cpus["pod0c0"] == cpus["pod1c0"] == cpus["pod2c0"] == cpus["pod3c0"]'

# pod4: guaranteed, fill up a balloon to the MaxCPU
CPUREQ="1" CPULIM="1" MEMREQ="50M" MEMLIM="50M"
CONTCOUNT=1 create balloons-busybox
report allowed
verify 'len(cpus["pod0c0"]) == 4' \
'len(cpus["pod1c0"]) == 4' \
'len(cpus["pod2c0"]) == 4' \
'len(cpus["pod3c0"]) == 4' \
'len(cpus["pod4c0"]) == 4' \
'cpus["pod0c0"] == cpus["pod1c0"] == cpus["pod2c0"] == cpus["pod3c0"] == cpus["pod4c0"]'

# pod5: besteffort, no CPU request, should fit into the full balloon
CPUREQ="" CPULIM="" MEMREQ="" MEMLIM=""
CONTCOUNT=1 create balloons-busybox
report allowed
verify 'len(cpus["pod0c0"]) == 4' \
'len(cpus["pod1c0"]) == 4' \
'len(cpus["pod2c0"]) == 4' \
'len(cpus["pod3c0"]) == 4' \
'len(cpus["pod4c0"]) == 4' \
'len(cpus["pod5c0"]) == 4' \
'cpus["pod0c0"] == cpus["pod1c0"] == cpus["pod2c0"] == cpus["pod3c0"] == cpus["pod4c0"] == cpus["pod5c0"]'

# pod6: guaranteed, start filling new balloon
CPUREQ="1" CPULIM="1" MEMREQ="50M" MEMLIM="50M"
CONTCOUNT=1 create balloons-busybox
report allowed
verify 'len(cpus["pod0c0"]) == 4' \
'len(cpus["pod1c0"]) == 4' \
'len(cpus["pod2c0"]) == 4' \
'len(cpus["pod3c0"]) == 4' \
'len(cpus["pod4c0"]) == 4' \
'len(cpus["pod5c0"]) == 4' \
'len(cpus["pod6c0"]) == 1' \
'cpus["pod0c0"] == cpus["pod1c0"] == cpus["pod2c0"] == cpus["pod3c0"] == cpus["pod4c0"]' \
'disjoint_sets(cpus["pod0c0"], cpus["pod6c0"])'

# Leave only one guaranteed container to the first balloon.
kubectl delete pods pod1 pod2 pod3 --now
report allowed
verify 'len(cpus["pod0c0"]) == 1' \
'len(cpus["pod4c0"]) == 1' \
'len(cpus["pod5c0"]) == 1' \
'len(cpus["pod6c0"]) == 1' \
'cpus["pod0c0"] == cpus["pod4c0"] == cpus["pod5c0"]' \
'disjoint_sets(cpus["pod0c0"], cpus["pod6c0"])'

# Leave only bestefforts to the first balloon. Make sure they still
# have a CPU.
kubectl delete pods pod4 --now
report allowed
verify 'len(cpus["pod0c0"]) == 1' \
'len(cpus["pod5c0"]) == 1' \
'len(cpus["pod6c0"]) == 1' \
'cpus["pod0c0"] == cpus["pod5c0"]' \
'disjoint_sets(cpus["pod0c0"], cpus["pod6c0"])'

terminate cri-resmgr
launch cri-resmgr