Skip to content

Commit

Permalink
fix: request/limits mismatch with many containers in a pod (#21)
Browse files Browse the repository at this point in the history
* fix apply command for oomer.yaml

* wip: container index fix

* add simple helper to retrieve podSpecIndex

This is used to map the correct memory limits/requests to the relevant container that was OOMKilled

* update docstring

* alter usage of plugin integration tests to suite pkg usage

* remove redundant use of index in .Status.ContainerStatuses

* add wip test for checking resources are matching

* add helper function for retrieving deployment manifest resource lim/request

* add test for matching plugin.Run func result to manifest mem req/lim

* update comment for hardcoded container index

* add docstring for purpose of TearDownSuite with RequiresClusterTests

* add knownIndex assignment for explanation with getMemoryRequestAndLimitFromDeploymentManifest func

* fix typo in README development section
  • Loading branch information
jdockerty authored Dec 10, 2022
1 parent 5d665f4 commit 1ab0cef
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 32 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,10 @@ jaeger-agent-4k845 jaeger-agent 100Mi 100Mi 2022-11-11 21:06:3

### Development

If you wish to force some `OOMKilled` for testing purposes, you can use [`oomer`](https://github.com/jdockerty/oomer)
If you wish to force some `OOMKilled` pods for testing purposes, you can use [`oomer`](https://github.com/jdockerty/oomer)
or simply run

kubectl apply -f https://github.com/jdockerty/oomer/blob/main/oomer.yaml
kubectl apply -f https://raw.githubusercontent.com/jdockerty/oomer/main/oomer.yaml

This will create the `oomkilled` namespace and a `Deployment` with pods that continually exit with code `137`,
in order to be picked up by `oomd`.
1 change: 0 additions & 1 deletion cmd/plugin/cli/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ var (
// Only 'time' is supported currently.
sortField string


// Formatting for table output, similar to other kubectl commands.
t = tabwriter.NewWriter(os.Stdout, 10, 1, 5, ' ', 0)
)
Expand Down
35 changes: 26 additions & 9 deletions pkg/plugin/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,20 @@ func getK8sClientAndConfig(configFlags *genericclioptions.ConfigFlags) (*kuberne
return clientset, config, nil
}

// getPodSpecIndex is a helper function to return the index of a container
// within the containers list of the pod specification. This is used as the
// index, as the index which appears within the containerStatus field is not
// guaranteed to be the same.
func getPodSpecIndex(name string, pod v1.Pod) (int, error) {

for i, c := range pod.Spec.Containers {
if name == c.Name {
return i, nil
}
}
return -1, fmt.Errorf("unable to retrieve pod spec index for %s", name)
}

// GetNamespace will retrieve the current namespace from the provided namespace or kubeconfig file of the caller
// or handle the return of the all namespaces shortcut when the flag is set.
func GetNamespace(configFlags *genericclioptions.ConfigFlags, all bool, givenNamespace string) (string, error) {
Expand Down Expand Up @@ -105,13 +119,12 @@ func BuildTerminatedPodsInfo(client *kubernetes.Clientset, namespace string) (Te
return nil, fmt.Errorf("failed to list pods: %w", err)
}

terminatedPods := TerminatedPodsFilter(pods.Items)

var terminatedPodsInfo []TerminatedPodInfo

for _, pod := range terminatedPods {
terminatedPods := TerminatedPodsFilter(pods.Items)

for i, containerStatus := range pod.Status.ContainerStatuses {
for _, pod := range terminatedPods {
for _, containerStatus := range pod.Status.ContainerStatuses {

// Not every container within the pod will be in a terminated state, we skip these ones.
// This also means we can use the relevant index to directly access the container,
Expand All @@ -120,8 +133,13 @@ func BuildTerminatedPodsInfo(client *kubernetes.Clientset, namespace string) (Te
continue
}

containerStartTime := pod.Status.ContainerStatuses[i].LastTerminationState.Terminated.StartedAt.String()
containerTerminatedTime := pod.Status.ContainerStatuses[i].LastTerminationState.Terminated.FinishedAt
containerStartTime := containerStatus.LastTerminationState.Terminated.StartedAt.String()
containerTerminatedTime := containerStatus.LastTerminationState.Terminated.FinishedAt

podSpecIndex, err := getPodSpecIndex(containerStatus.Name, pod)
if err != nil {
return nil, err
}

// Build our terminated pod info struct
info := TerminatedPodInfo{
Expand All @@ -131,11 +149,10 @@ func BuildTerminatedPodsInfo(client *kubernetes.Clientset, namespace string) (Te
terminatedTime: containerTerminatedTime.Time,
TerminatedTime: containerTerminatedTime.String(),
Memory: MemoryInfo{
Limit: pod.Spec.Containers[i].Resources.Limits.Memory().String(),
Request: pod.Spec.Containers[i].Resources.Requests.Memory().String(),
Limit: pod.Spec.Containers[podSpecIndex].Resources.Limits.Memory().String(),
Request: pod.Spec.Containers[podSpecIndex].Resources.Requests.Memory().String(),
},
}

// TODO: Since we know all pods here have been in the "terminated state", can we
// achieve this same result in an elegant way?
terminatedPodsInfo = append(terminatedPodsInfo, info)
Expand Down
129 changes: 109 additions & 20 deletions pkg/plugin/plugin_test.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
package plugin

import (
"bufio"
"fmt"
"io"
"net/http"
"os"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
k8syaml "k8s.io/apimachinery/pkg/util/yaml"
"k8s.io/cli-runtime/pkg/genericclioptions"
"k8s.io/kubectl/pkg/cmd"
"k8s.io/kubectl/pkg/scheme"
)

var KubernetesConfigFlags = genericclioptions.NewConfigFlags(false)
Expand All @@ -19,23 +27,15 @@ const (
// The namespace to use with tests that do not require access to a working cluster.
testNamespace = "test-namespace"

// The namespace defined for use with the `oomer` manifest.
integrationTestNamespace = "oomkilled"

// A manifest which contains the `oomkilled` namespace and `oomer` deployment for testing purposes.
forceOOMKilledManifest = "https://raw.githubusercontent.com/jdockerty/oomer/main/oomer.yaml"
)

func setupIntegrationTestDependencies(t *testing.T) {
type RequiresClusterTests struct {
suite.Suite

err := applyOrDeleteOOMKilledManifest(false)
if err != nil {
t.Fatalf("unable to apply OOMKilled manifest: %s", err)
}
defer applyOrDeleteOOMKilledManifest(true)

t.Log("Waiting 20 seconds for pods to start being OOMKilled...")
time.Sleep(20 * time.Second)
// The namespace defined for use with the `oomer` manifest.
IntegrationTestNamespace string
}

// applyOrDeleteOOMKilledManifest is a rather hacky way to utilise `kubectl` within
Expand All @@ -56,21 +56,110 @@ func applyOrDeleteOOMKilledManifest(runDelete bool) error {
return nil
}

// getMemoryRequestAndLimitFromDeploymentManifest is a helper function for retrieving the
// string representation of a container's memory limit and request, such as "128Mi", from
// its own manifest.
func getMemoryRequestAndLimitFromDeploymentManifest(r io.Reader, containerIndex int) (string, string, error) {

multir := k8syaml.NewYAMLReader(bufio.NewReader(r))

for {

buf, err := multir.Read()
if err != nil {
if err == io.EOF {
break
}
return "", "", err
}

obj, gvk, err := scheme.Codecs.UniversalDeserializer().Decode(buf, nil, nil)
if err != nil {
return "", "", err
}

switch gvk.Kind {
case "Deployment":
d := obj.(*appsv1.Deployment)
request := d.Spec.Template.Spec.Containers[containerIndex].Resources.Requests["memory"]
limit := d.Spec.Template.Spec.Containers[containerIndex].Resources.Limits["memory"]
return request.String(), limit.String(), nil

}
}

return "", "", fmt.Errorf("unable to get Kind=Deployment from manifest")
}

func (rc *RequiresClusterTests) SetupTest() {
rc.IntegrationTestNamespace = "oomkilled"
}

func (rc *RequiresClusterTests) SetupSuite() {
err := applyOrDeleteOOMKilledManifest(false)
if err != nil {
rc.T().Fatalf("unable to apply OOMKilled manifest: %s", err)
}

rc.T().Log("Waiting 30 seconds for pods to start being OOMKilled...")
time.Sleep(30 * time.Second)
}

// TearDownSuite runs the deletion process against the Namespace and Deployment
// created as part of the testing process.
func (rc *RequiresClusterTests) TearDownSuite() {
applyOrDeleteOOMKilledManifest(true)
}

// TestRunPlugin tests against an initialised cluster with OOMKilled pods that
// the plugin's functionality works as expected.
func TestRunPlugin(t *testing.T) {
func (rc *RequiresClusterTests) TestRunPlugin() {
pods, err := Run(KubernetesConfigFlags, rc.IntegrationTestNamespace)
assert.Nil(rc.T(), err)

if testing.Short() {
t.Skipf("skipping %s which requires running cluster", t.Name())
}
assert.Greater(rc.T(), len(pods), 0, "expected number of failed pods to be greater than 0, got %d", len(pods))
}

setupIntegrationTestDependencies(t)
func (rc *RequiresClusterTests) TestCorrectResources() {
res, err := http.Get(forceOOMKilledManifest)
if err != nil {
// TODO Wrap these functions in a retry as they have reliance on external factors
// which could cause a failure, e.g. here an issue with GitHub would cause an
// error with getting the manifest.
rc.T().Skipf("Skipping %s as could not perform GET request for the manifest", rc.T().Name())
}
defer res.Body.Close()

// We can pass containerIndex 0 here as I control the manifest we are using, so it is
// okay to hardcode it, since I know it is the first index.
knownIndex := 0
manifestReq, manifestLim, err := getMemoryRequestAndLimitFromDeploymentManifest(res.Body, knownIndex)
assert.Nil(rc.T(), err) // We don't skip this on failure, as if we got the manifest it should be a Deployment.

pods, _ := Run(KubernetesConfigFlags, rc.IntegrationTestNamespace)

fmt.Println(manifestReq, manifestLim)
podMemoryRequest := pods[knownIndex].Pod.Spec.Containers[knownIndex].Resources.Requests["memory"]
podMemoryLimit := pods[knownIndex].Pod.Spec.Containers[knownIndex].Resources.Limits["memory"]

assert.Equal(rc.T(), podMemoryRequest.String(), manifestReq)
assert.Equal(rc.T(), podMemoryLimit.String(), manifestLim)
rc.T().Logf(
"\nMemory:\n\tManifest Request: %s\n\tManifest Limit: %s\n\tPod Request: %s\n\tPod Limit: %s\n",
manifestReq,
manifestLim,
&podMemoryRequest,
&podMemoryLimit,
)

pods, err := Run(KubernetesConfigFlags, integrationTestNamespace)
assert.Nil(t, err)
}

assert.Greater(t, len(pods), 0, "expected number of failed pods to be greater than 0, got %d", len(pods))
func TestRequiresClusterSuite(t *testing.T) {
if testing.Short() {
t.Skipf("skipping %s which requires running cluster", t.Name())
}

suite.Run(t, new(RequiresClusterTests))
}

func TestGetNamespace(t *testing.T) {
Expand Down

0 comments on commit 1ab0cef

Please sign in to comment.