Skip to content

Commit

Permalink
[YUNIKORN-521] Placeholder pods are not cleaned when the job is delet…
Browse files Browse the repository at this point in the history
…ed (#232)

Add the owner reference to the placeholder pods in order to do proper cleanup.
The owner reference is the same value as the first pod of the app's owner reference,
if the first pod doesn't have an owner reference set, it is set to the pod's object.
  • Loading branch information
kingamarton authored and yangwwei committed Mar 16, 2021
1 parent 8f276d2 commit 1a17b30
Show file tree
Hide file tree
Showing 7 changed files with 160 additions and 59 deletions.
33 changes: 28 additions & 5 deletions pkg/appmgmt/general/general.go
Expand Up @@ -19,8 +19,11 @@
package general

import (
"reflect"

"go.uber.org/zap"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/selection"
k8sCache "k8s.io/client-go/tools/cache"
Expand Down Expand Up @@ -123,20 +126,40 @@ func (os *Manager) getAppMetadata(pod *v1.Pod) (interfaces.ApplicationMetadata,
if err != nil {
log.Logger().Error("unable to get taskGroups by given pod", zap.Error(err))
}
ownerReferences := getOwnerReferences(pod)

placeholderTimeout, err := utils.GetPlaceholderTimeoutParam(pod)
if err != nil {
log.Logger().Warn("unable to get placeholder timeout by given pod.", zap.Error(err))
}
return interfaces.ApplicationMetadata{
ApplicationID: appId,
QueueName: utils.GetQueueNameFromPod(pod),
User: user,
Tags: tags,
TaskGroups: taskGroups,
ApplicationID: appId,
QueueName: utils.GetQueueNameFromPod(pod),
User: user,
Tags: tags,
TaskGroups: taskGroups,
PlaceholderTimeoutInSec: placeholderTimeout,
OwnerReferences: ownerReferences,
}, true
}

func getOwnerReferences(pod *v1.Pod) []metav1.OwnerReference {
if len(pod.OwnerReferences) > 0 {
return pod.OwnerReferences
}
controller := false
blockOwnerDeletion := true
ref := metav1.OwnerReference{
APIVersion: v1.SchemeGroupVersion.String(),
Kind: reflect.TypeOf(v1.Pod{}).Name(),
Name: pod.Name,
UID: pod.UID,
Controller: &controller,
BlockOwnerDeletion: &blockOwnerDeletion,
}
return []metav1.OwnerReference{ref}
}

// filter pods by scheduler name and state
func (os *Manager) filterPods(obj interface{}) bool {
switch obj.(type) {
Expand Down
28 changes: 28 additions & 0 deletions pkg/appmgmt/general/general_test.go
Expand Up @@ -498,3 +498,31 @@ func TestGetExistingAllocation(t *testing.T) {
assert.Equal(t, alloc.UUID, string(pod.UID))
assert.Equal(t, alloc.NodeID, "allocated-node")
}

func TestGetOwnerReferences(t *testing.T) {
ownerRef := apis.OwnerReference{
APIVersion: apis.SchemeGroupVersion.String(),
Name: "owner ref",
}
podWithOwnerRef := &v1.Pod{
ObjectMeta: apis.ObjectMeta{
OwnerReferences: []apis.OwnerReference{ownerRef},
},
}
podWithNoOwnerRef := &v1.Pod{
ObjectMeta: apis.ObjectMeta{
Name: "pod",
UID: "uid",
},
}
returnedOwnerRefs := getOwnerReferences(podWithOwnerRef)
assert.Assert(t, len(returnedOwnerRefs) == 1, "Only one owner reference is expected")
assert.DeepEqual(t, ownerRef, returnedOwnerRefs[0])

returnedOwnerRefs = getOwnerReferences(podWithNoOwnerRef)
assert.Assert(t, len(returnedOwnerRefs) == 1, "Only one owner reference is expected")
assert.Equal(t, returnedOwnerRefs[0].Name, podWithNoOwnerRef.Name, "Unexpected owner reference name")
assert.Equal(t, returnedOwnerRefs[0].UID, podWithNoOwnerRef.UID, "Unexpected owner reference UID")
assert.Equal(t, returnedOwnerRefs[0].Kind, "Pod", "Unexpected owner reference Kind")
assert.Equal(t, returnedOwnerRefs[0].APIVersion, v1.SchemeGroupVersion.String(), "Unexpected owner reference Kind")
}
12 changes: 7 additions & 5 deletions pkg/appmgmt/interfaces/amprotocol.go
Expand Up @@ -20,6 +20,7 @@ package interfaces

import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/apache/incubator-yunikorn-k8shim/pkg/apis/yunikorn.apache.org/v1alpha1"
)
Expand Down Expand Up @@ -69,12 +70,13 @@ type AddTaskRequest struct {
}

type ApplicationMetadata struct {
ApplicationID string
QueueName string
User string
Tags map[string]string
TaskGroups []v1alpha1.TaskGroup
ApplicationID string
QueueName string
User string
Tags map[string]string
TaskGroups []v1alpha1.TaskGroup
PlaceholderTimeoutInSec int64
OwnerReferences []metav1.OwnerReference
}

type TaskMetadata struct {
Expand Down
34 changes: 21 additions & 13 deletions pkg/cache/application.go
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/looplab/fsm"
"go.uber.org/zap"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/apache/incubator-yunikorn-core/pkg/api"
"github.com/apache/incubator-yunikorn-k8shim/pkg/apis/yunikorn.apache.org/v1alpha1"
Expand All @@ -41,19 +42,20 @@ import (
)

type Application struct {
applicationID string
queue string
partition string
user string
taskMap map[string]*Task
tags map[string]string
schedulingPolicy v1alpha1.SchedulingPolicy
taskGroups []v1alpha1.TaskGroup
sm *fsm.FSM
lock *sync.RWMutex
schedulerAPI api.SchedulerAPI
placeholderAsk *si.Resource // total placeholder request for the app (all task groups)
placeholderTimeoutInSec int64
applicationID string
queue string
partition string
user string
taskMap map[string]*Task
tags map[string]string
schedulingPolicy v1alpha1.SchedulingPolicy
taskGroups []v1alpha1.TaskGroup
placeholderOwnerReferences []metav1.OwnerReference
sm *fsm.FSM
lock *sync.RWMutex
schedulerAPI api.SchedulerAPI
placeholderAsk *si.Resource // total placeholder request for the app (all task groups)
placeholderTimeoutInSec int64
}

func (app *Application) String() string {
Expand Down Expand Up @@ -225,6 +227,12 @@ func (app *Application) getTaskGroups() []v1alpha1.TaskGroup {
return app.taskGroups
}

func (app *Application) setOwnReferences(ref []metav1.OwnerReference) {
app.lock.RLock()
defer app.lock.RUnlock()
app.placeholderOwnerReferences = ref
}

func (app *Application) addTask(task *Task) {
app.lock.Lock()
defer app.lock.Unlock()
Expand Down
1 change: 1 addition & 0 deletions pkg/cache/context.go
Expand Up @@ -500,6 +500,7 @@ func (ctx *Context) AddApplication(request *interfaces.AddApplicationRequest) in
ctx.apiProvider.GetAPIs().SchedulerAPI)
app.setTaskGroups(request.Metadata.TaskGroups)
app.SetPlaceholderTimeout(request.Metadata.PlaceholderTimeoutInSec)
app.setOwnReferences(request.Metadata.OwnerReferences)

// add into cache
ctx.applications[app.applicationID] = app
Expand Down
12 changes: 12 additions & 0 deletions pkg/cache/placeholder.go
Expand Up @@ -46,6 +46,17 @@ type Placeholder struct {
}

func newPlaceholder(placeholderName string, app *Application, taskGroup v1alpha1.TaskGroup) *Placeholder {
ownerRefs := app.placeholderOwnerReferences
// we need to set the controller field to false, because since we don't know what exactly the controller will do,
// we might have some unexpected behaviour.
// For example if it is a replication controller, some pods (placeholders and/or real pods) might be deleted
// in order to met the requested replication factor.
// Since we need the owner reference only for having the placeholders garbage collected,
// we can just set the controller field = false, so we can avoid any kind of side effects.
controller := false
for _, r := range ownerRefs {
*r.Controller = controller
}
placeholderPod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: placeholderName,
Expand All @@ -58,6 +69,7 @@ func newPlaceholder(placeholderName string, app *Application, taskGroup v1alpha1
constants.AnnotationPlaceholderFlag: "true",
constants.AnnotationTaskGroupName: taskGroup.Name,
}),
OwnerReferences: ownerRefs,
},
Spec: v1.PodSpec{
SecurityContext: &v1.PodSecurityContext{
Expand Down
99 changes: 63 additions & 36 deletions pkg/cache/placeholder_manager_test.go
Expand Up @@ -36,12 +36,69 @@ import (
"github.com/apache/incubator-yunikorn-k8shim/pkg/common/constants"
)

const (
appID = "app01"
queue = "root.default"
namespace = "test"
)

func TestCreateAppPlaceholders(t *testing.T) {
const (
appID = "app01"
queue = "root.default"
namespace = "test"
)
app := createAppWIthTaskGroupForTest()
mockedAPIProvider := client.NewMockedAPIProvider()
createdPods := createAndCheckPlaceholderCreate(mockedAPIProvider, app, t)
for _, pod := range createdPods {
assert.Assert(t, len(pod.OwnerReferences) == 0, "By default the pod should not have owner reference set")
}

// simulate placeholder creation failures
// failed to create one placeholder
mockedAPIProvider.MockCreateFn(func(pod *v1.Pod) (*v1.Pod, error) {
if pod.Name == "tg-test-group-2-app01-15" {
return nil, fmt.Errorf("failed to create pod %s", pod.Name)
}
return pod, nil
})
err := placeholderMgr.createAppPlaceholders(app)
assert.Error(t, err, "failed to create pod tg-test-group-2-app01-15")
}

func createAndCheckPlaceholderCreate(mockedAPIProvider *client.MockedAPIProvider, app *Application, t *testing.T) map[string]*v1.Pod {
createdPods := make(map[string]*v1.Pod)
mockedAPIProvider.MockCreateFn(func(pod *v1.Pod) (*v1.Pod, error) {
createdPods[pod.Name] = pod
return pod, nil
})
placeholderMgr = &PlaceholderManager{
clients: mockedAPIProvider.GetAPIs(),
RWMutex: sync.RWMutex{},
}

err := placeholderMgr.createAppPlaceholders(app)
assert.NilError(t, err, "create app placeholders should be successful")
assert.Equal(t, len(createdPods), 30)
return createdPods
}

func TestCreateAppPlaceholdersWithOwnReference(t *testing.T) {
app := createAppWIthTaskGroupForTest()
controller := true
ownRef := apis.OwnerReference{
Name: "JobId",
UID: "JobUid",
Controller: &controller,
}
app.setOwnReferences([]apis.OwnerReference{ownRef})
mockedAPIProvider := client.NewMockedAPIProvider()
pods := createAndCheckPlaceholderCreate(mockedAPIProvider, app, t)
for _, pod := range pods {
assert.Assert(t, len(pod.OwnerReferences) == 1, "The pod should have exactly one owner reference set")
assert.Assert(t, *pod.OwnerReferences[0].Controller == false, "The owner reference should not be a controller")
assert.Equal(t, pod.OwnerReferences[0].Name, ownRef.Name, "The owner reference name does not match")
assert.Equal(t, pod.OwnerReferences[0].UID, ownRef.UID, "The owner reference UID does not match")
}
}

func createAppWIthTaskGroupForTest() *Application {
mockedSchedulerAPI := newMockSchedulerAPI()
app := NewApplication(appID, queue,
"bob", map[string]string{constants.AppTagNamespace: namespace}, mockedSchedulerAPI)
Expand All @@ -63,40 +120,10 @@ func TestCreateAppPlaceholders(t *testing.T) {
},
},
})

createdPods := make(map[string]*v1.Pod)
mockedAPIProvider := client.NewMockedAPIProvider()
mockedAPIProvider.MockCreateFn(func(pod *v1.Pod) (*v1.Pod, error) {
createdPods[pod.Name] = pod
return pod, nil
})
placeholderMgr := &PlaceholderManager{
clients: mockedAPIProvider.GetAPIs(),
RWMutex: sync.RWMutex{},
}

err := placeholderMgr.createAppPlaceholders(app)
assert.NilError(t, err, "create app placeholders should be successful")
assert.Equal(t, len(createdPods), 30)

// simulate placeholder creation failures
// failed to create one placeholder
mockedAPIProvider.MockCreateFn(func(pod *v1.Pod) (*v1.Pod, error) {
if pod.Name == "tg-test-group-2-app01-15" {
return nil, fmt.Errorf("failed to create pod %s", pod.Name)
}
return pod, nil
})
err = placeholderMgr.createAppPlaceholders(app)
assert.Error(t, err, "failed to create pod tg-test-group-2-app01-15")
return app
}

func TestCleanUp(t *testing.T) {
const (
appID = "app01"
queue = "root.default"
namespace = "test"
)
mockedContext := initContextForTest()
mockedSchedulerAPI := newMockSchedulerAPI()
app := NewApplication(appID, queue,
Expand Down

0 comments on commit 1a17b30

Please sign in to comment.