From 6eba8fd8b81756b0dcbed35fe600568d46b55088 Mon Sep 17 00:00:00 2001
From: Weiwei Yang <wwei@apache.org>
Date: Tue, 27 Oct 2020 13:54:28 -0700
Subject: [PATCH 01/15] [YUNIKORN-421]  Define app gang scheduling info in API
 package. (#200)

This PR updates the app CRD types to include more info about gangs, such as
node-selector and tolerations.
---
 pkg/apis/yunikorn.apache.org/v1alpha1/type.go | 33 ++++++++++---------
 .../v1alpha1/zz_generated.deepcopy.go         | 33 ++++++++++++-------
 .../application/app_controller_test.go        | 10 +++---
 test/e2e/app/app_test.go                      | 12 +++----
 test/e2e/testdata/application.yaml            |  4 +--
 test/e2e/testdata/application_error.yaml      |  8 ++---
 6 files changed, 56 insertions(+), 44 deletions(-)

diff --git a/pkg/apis/yunikorn.apache.org/v1alpha1/type.go b/pkg/apis/yunikorn.apache.org/v1alpha1/type.go
index bd20f8b32..a21497823 100644
--- a/pkg/apis/yunikorn.apache.org/v1alpha1/type.go
+++ b/pkg/apis/yunikorn.apache.org/v1alpha1/type.go
@@ -19,6 +19,7 @@
 package v1alpha1
 
 import (
+	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
@@ -37,29 +38,31 @@ type Application struct {
 // Spec part
 
 type ApplicationSpec struct {
-	Policy    SchedulePolicy `json:"schedulingPolicy"`
-	Queue     string         `json:"queue"`
-	TaskGroup []Task         `json:"taskGroups"`
+	SchedulingPolicy SchedulingPolicy `json:"schedulingPolicy"`
+	Queue            string           `json:"queue"`
+	TaskGroups       []TaskGroup      `json:"taskGroups"`
 }
 
-type SchedulePolicy struct {
-	Policy     SchedulingPolicy  `json:"name"`
-	Parameters map[string]string `json:"parameters,omitempty"`
+type SchedulingPolicy struct {
+	Type       SchedulingPolicyType `json:"type"`
+	Parameters map[string]string    `json:"parameters,omitempty"`
 }
 
-type SchedulingPolicy string
+type SchedulingPolicyType string
 
 const (
-	TryOnce    SchedulingPolicy = "TryOnce"
-	MaxRetry   SchedulingPolicy = "MaxRetry"
-	TryReserve SchedulingPolicy = "TryReserve"
-	TryPreempt SchedulingPolicy = "TryPreempt"
+	TryOnce    SchedulingPolicyType = "TryOnce"
+	MaxRetry   SchedulingPolicyType = "MaxRetry"
+	TryReserve SchedulingPolicyType = "TryReserve"
+	TryPreempt SchedulingPolicyType = "TryPreempt"
 )
 
-type Task struct {
-	GroupName   string                       `json:"groupName"`
-	MinMember   int32                        `json:"minMember"`
-	MinResource map[string]resource.Quantity `json:"minResource"`
+type TaskGroup struct {
+	Name         string                       `json:"name"`
+	MinMember    int32                        `json:"minMember"`
+	MinResource  map[string]resource.Quantity `json:"minResource"`
+	NodeSelector metav1.LabelSelector         `json:"nodeSelector,omitempty"`
+	Tolerations  []v1.Toleration              `json:"tolerations,omitempty"`
 }
 
 // Status part
diff --git a/pkg/apis/yunikorn.apache.org/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/yunikorn.apache.org/v1alpha1/zz_generated.deepcopy.go
index 25f88eb17..bebfec861 100644
--- a/pkg/apis/yunikorn.apache.org/v1alpha1/zz_generated.deepcopy.go
+++ b/pkg/apis/yunikorn.apache.org/v1alpha1/zz_generated.deepcopy.go
@@ -22,6 +22,7 @@
 package v1alpha1
 
 import (
+	v1 "k8s.io/api/core/v1"
 	resource "k8s.io/apimachinery/pkg/api/resource"
 	runtime "k8s.io/apimachinery/pkg/runtime"
 )
@@ -90,10 +91,10 @@ func (in *ApplicationList) DeepCopyObject() runtime.Object {
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *ApplicationSpec) DeepCopyInto(out *ApplicationSpec) {
 	*out = *in
-	in.Policy.DeepCopyInto(&out.Policy)
-	if in.TaskGroup != nil {
-		in, out := &in.TaskGroup, &out.TaskGroup
-		*out = make([]Task, len(*in))
+	in.SchedulingPolicy.DeepCopyInto(&out.SchedulingPolicy)
+	if in.TaskGroups != nil {
+		in, out := &in.TaskGroups, &out.TaskGroups
+		*out = make([]TaskGroup, len(*in))
 		for i := range *in {
 			(*in)[i].DeepCopyInto(&(*out)[i])
 		}
@@ -129,7 +130,7 @@ func (in *ApplicationStatus) DeepCopy() *ApplicationStatus {
 }
 
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *SchedulePolicy) DeepCopyInto(out *SchedulePolicy) {
+func (in *SchedulingPolicy) DeepCopyInto(out *SchedulingPolicy) {
 	*out = *in
 	if in.Parameters != nil {
 		in, out := &in.Parameters, &out.Parameters
@@ -141,18 +142,18 @@ func (in *SchedulePolicy) DeepCopyInto(out *SchedulePolicy) {
 	return
 }
 
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulePolicy.
-func (in *SchedulePolicy) DeepCopy() *SchedulePolicy {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingPolicy.
+func (in *SchedulingPolicy) DeepCopy() *SchedulingPolicy {
 	if in == nil {
 		return nil
 	}
-	out := new(SchedulePolicy)
+	out := new(SchedulingPolicy)
 	in.DeepCopyInto(out)
 	return out
 }
 
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *Task) DeepCopyInto(out *Task) {
+func (in *TaskGroup) DeepCopyInto(out *TaskGroup) {
 	*out = *in
 	if in.MinResource != nil {
 		in, out := &in.MinResource, &out.MinResource
@@ -161,15 +162,23 @@ func (in *Task) DeepCopyInto(out *Task) {
 			(*out)[key] = val.DeepCopy()
 		}
 	}
+	in.NodeSelector.DeepCopyInto(&out.NodeSelector)
+	if in.Tolerations != nil {
+		in, out := &in.Tolerations, &out.Tolerations
+		*out = make([]v1.Toleration, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
 	return
 }
 
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Task.
-func (in *Task) DeepCopy() *Task {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TaskGroup.
+func (in *TaskGroup) DeepCopy() *TaskGroup {
 	if in == nil {
 		return nil
 	}
-	out := new(Task)
+	out := new(TaskGroup)
 	in.DeepCopyInto(out)
 	return out
 }
diff --git a/pkg/controller/application/app_controller_test.go b/pkg/controller/application/app_controller_test.go
index 5b84da579..49aa1a791 100644
--- a/pkg/controller/application/app_controller_test.go
+++ b/pkg/controller/application/app_controller_test.go
@@ -165,13 +165,13 @@ func createApp(name string, namespace string, queue string) appv1.Application {
 			UID:       "UID-APP-00001",
 		},
 		Spec: appv1.ApplicationSpec{
-			Policy: appv1.SchedulePolicy{
-				Policy: "TryOnce",
-			},
 			Queue: queue,
-			TaskGroup: []appv1.Task{
+			SchedulingPolicy: appv1.SchedulingPolicy{
+				Type: appv1.TryReserve,
+			},
+			TaskGroups: []appv1.TaskGroup{
 				{
-					GroupName: "test-task-001",
+					Name:      "test-task-001",
 					MinMember: 0,
 				},
 			},
diff --git a/test/e2e/app/app_test.go b/test/e2e/app/app_test.go
index 6d4ff994d..f3b7b8075 100644
--- a/test/e2e/app/app_test.go
+++ b/test/e2e/app/app_test.go
@@ -74,14 +74,14 @@ var _ = ginkgo.Describe("App", func() {
 			gomega.Ω(appCRD.Spec.Queue).To(gomega.Equal("root.default"))
 			gomega.Ω(appCRD.ObjectMeta.Name).To(gomega.Equal("example"))
 			gomega.Ω(appCRD.ObjectMeta.Namespace).To(gomega.Equal(dev))
-			policy := appCRD.Spec.Policy.Policy
-			gomega.Ω(string(policy)).To(gomega.Equal("TryOnce"))
-			gomega.Ω(appCRD.Spec.TaskGroup[0].GroupName).To(gomega.Equal("test-task-0001"))
-			gomega.Ω(appCRD.Spec.TaskGroup[0].MinMember).To(gomega.Equal(int32(1)))
+			policy := appCRD.Spec.SchedulingPolicy
+			gomega.Ω(policy.Type).To(gomega.Equal(v1alpha1.TryOnce))
+			gomega.Ω(appCRD.Spec.TaskGroups[0].Name).To(gomega.Equal("test-task-0001"))
+			gomega.Ω(appCRD.Spec.TaskGroups[0].MinMember).To(gomega.Equal(int32(1)))
 			anscpu := resource.MustParse("300m")
 			ansmem := resource.MustParse("128Mi")
-			gomega.Ω(appCRD.Spec.TaskGroup[0].MinResource["cpu"]).To(gomega.Equal(anscpu))
-			gomega.Ω(appCRD.Spec.TaskGroup[0].MinResource["memory"]).To(gomega.Equal(ansmem))
+			gomega.Ω(appCRD.Spec.TaskGroups[0].MinResource["cpu"]).To(gomega.Equal(anscpu))
+			gomega.Ω(appCRD.Spec.TaskGroups[0].MinResource["memory"]).To(gomega.Equal(ansmem))
 		})
 
 		ginkgo.AfterSuite(func() {
diff --git a/test/e2e/testdata/application.yaml b/test/e2e/testdata/application.yaml
index dcdc26ca1..ef6345517 100644
--- a/test/e2e/testdata/application.yaml
+++ b/test/e2e/testdata/application.yaml
@@ -21,10 +21,10 @@ metadata:
   name: example
 spec:
   schedulingPolicy:
-    name: TryOnce
+    type: TryOnce
   queue: root.default
   taskGroups:
-    - groupName: "test-task-0001"
+    - name: "test-task-0001"
       minMember: 1
       minResource:
         cpu: "300m"
diff --git a/test/e2e/testdata/application_error.yaml b/test/e2e/testdata/application_error.yaml
index 32da826a4..5be8d6e83 100644
--- a/test/e2e/testdata/application_error.yaml
+++ b/test/e2e/testdata/application_error.yaml
@@ -23,11 +23,11 @@ metadata:
 spec:
   queue: //////
   schedulingPolicy:
-    name: errorName
+    type: TryReserve
   taskGroups:
-    - groupName: 123
+    - name: "test-task-0001"
       minMember: "string"
       minResource:
-        cpu: "ABC"
-        memory: "ABC"
+        cpu: "300m"
+        memory: "128Mi"
   
\ No newline at end of file

From 1dd3c285c823ac4aa0c96f800a680c27e91aecef Mon Sep 17 00:00:00 2001
From: Ting-Yao Huang <61864060+HuangTing-Yao@users.noreply.github.com>
Date: Wed, 28 Oct 2020 23:47:14 +0800
Subject: [PATCH 02/15] [YUNIKORN-448] Create a docker image to simulate app
 that requires gang scheduling (#201)

This PR adds the docker files and scripts to build docker images for the simulated job,
such a job requires to get a configurable number of instances before it is able to execute.
These images will be used in e2e testing and performance evaluation scenarios.
---
 Makefile                                      |  29 +++++
 deployments/examples/README.md                |   8 ++
 deployments/examples/gang/README.md           |  28 +++++
 deployments/examples/gang/cmd/gangDeploy.sh   | 100 ++++++++++++++++++
 .../examples/gang/gang-coordinator.yaml       |  46 ++++++++
 deployments/examples/gang/gang-job.yaml       |  44 ++++++++
 deployments/image/gang/gangclient/dockerfile  |  21 ++++
 deployments/image/gang/webserver/dockerfile   |  22 ++++
 go.mod                                        |   1 +
 pkg/simulation/gang/gangclient/gangclient.go  |  97 +++++++++++++++++
 pkg/simulation/gang/webserver/handlers.go     |  58 ++++++++++
 pkg/simulation/gang/webserver/routes.go       |  46 ++++++++
 pkg/simulation/gang/webserver/webserver.go    |  26 +++++
 pkg/simulation/gang/webserver/webservice.go   |  76 +++++++++++++
 14 files changed, 602 insertions(+)
 create mode 100644 deployments/examples/gang/README.md
 create mode 100755 deployments/examples/gang/cmd/gangDeploy.sh
 create mode 100644 deployments/examples/gang/gang-coordinator.yaml
 create mode 100644 deployments/examples/gang/gang-job.yaml
 create mode 100644 deployments/image/gang/gangclient/dockerfile
 create mode 100644 deployments/image/gang/webserver/dockerfile
 create mode 100644 pkg/simulation/gang/gangclient/gangclient.go
 create mode 100644 pkg/simulation/gang/webserver/handlers.go
 create mode 100644 pkg/simulation/gang/webserver/routes.go
 create mode 100644 pkg/simulation/gang/webserver/webserver.go
 create mode 100644 pkg/simulation/gang/webserver/webservice.go

diff --git a/Makefile b/Makefile
index d8e6160a6..285b671d5 100644
--- a/Makefile
+++ b/Makefile
@@ -30,6 +30,9 @@ OUTPUT=_output
 RELEASE_BIN_DIR=${OUTPUT}/bin
 ADMISSION_CONTROLLER_BIN_DIR=${OUTPUT}/admission-controllers/
 POD_ADMISSION_CONTROLLER_BINARY=scheduler-admission-controller
+GANG_BIN_DIR=${OUTPUT}/gang
+GANG_CLIENT_BINARY=simulation-gang-worker
+GANG_SERVER_BINARY=simulation-gang-coordinator
 LOCAL_CONF=conf
 CONF_FILE=queues.yaml
 REPO=github.com/apache/incubator-yunikorn-k8shim/pkg
@@ -153,6 +156,32 @@ adm_image: admission
 	docker build ./deployments/image/admission -t ${REGISTRY}/yunikorn:admission-${VERSION}
 	@rm -f ./deployments/image/admission/${POD_ADMISSION_CONTROLLER_BINARY}
 
+# Build gang web server and client binary in a production ready version
+.PHONY: simulation
+simulation:
+	@echo "building gang web client binary"
+	CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
+	go build -a -o=${GANG_BIN_DIR}/${GANG_CLIENT_BINARY} -ldflags \
+	'-extldflags "-static" -X main.version=${VERSION} -X main.date=${DATE}' \
+	-tags netgo -installsuffix netgo \
+	./pkg/simulation/gang/gangclient
+	@echo "building gang web server binary"
+	go build -a -o=${GANG_BIN_DIR}/${GANG_SERVER_BINARY} -ldflags \
+	'-extldflags "-static" -X main.version=${VERSION} -X main.date=${DATE}' \
+	-tags netgo -installsuffix netgo \
+	./pkg/simulation/gang/webserver
+
+# Build gang test images based on the production ready version
+.PHONY: simulation_image
+simulation_image: simulation
+	@echo "building gang test docker images"
+	@cp ${GANG_BIN_DIR}/${GANG_CLIENT_BINARY} ./deployments/image/gang/gangclient
+	@cp ${GANG_BIN_DIR}/${GANG_SERVER_BINARY} ./deployments/image/gang/webserver
+	docker build ./deployments/image/gang/gangclient -t ${REGISTRY}/yunikorn:simulation-gang-worker-latest
+	docker build ./deployments/image/gang/webserver -t ${REGISTRY}/yunikorn:simulation-gang-coordinator-latest
+	@rm -f ./deployments/image/gang/gangclient/${GANG_CLIENT_BINARY}
+	@rm -f ./deployments/image/gang/webserver/${GANG_SERVER_BINARY}
+	
 # Build all images based on the production ready version
 .PHONY: image
 image: sched_image adm_image
diff --git a/deployments/examples/README.md b/deployments/examples/README.md
index c1281a128..639c51d47 100644
--- a/deployments/examples/README.md
+++ b/deployments/examples/README.md
@@ -49,6 +49,14 @@ Deployment files for the driver and executor:
 A simple example that runs a [kubeflow/tensorflow](./tfjob/tf-job-mnist.yaml) job.
 In this example it will run a distributed mnist model for e2e test, for full more detail see the [dist-mnist](https://github.com/kubeflow/tf-operator/tree/master/examples/v1/dist-mnist) section.
 
+## gang
+A sample application which implement gang in application level.
+Start via the [gangDeploy.sh](./gang/cmd/gangDeploy.sh) script, for full more detail see the [gang](./gang/README.md) section.
+
+Deployment file for gang-coordinator and gang-job:
+* [gang-coordinator](./gang/gang-coordinator.yaml)
+* [gang-job](./gang/gang-job.yaml)
+
 ## volumes
 The volumes directory contains three examples:
 1. [local volumes](#local-volume)
diff --git a/deployments/examples/gang/README.md b/deployments/examples/gang/README.md
new file mode 100644
index 000000000..a0a9579b4
--- /dev/null
+++ b/deployments/examples/gang/README.md
@@ -0,0 +1,28 @@
+<!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+
+# Gang in application level
+
+The following script runs a given number of jobs, and each one is a simulated job that requires gang scheduling support. Where the job only starts to execute its tasks when the cluster has the min member of instances running.
+
+```shell script
+./cmd/gangDeploy.sh <job number> <gang member> <task run time(sec)>
+```
+Note: if you prefer to manually launch such jobs, please refer to the `gang-coordinator.yaml` `gang-job.yaml`. Where:
+* [gang-coordinator.yaml](./gang-coordinator.yaml): the coordinator service used to coordinate the execution of each job's tasks.
+* [gang-job](./gang-job.yaml): the simulated job that requires gang members to be started before executing its tasks.
diff --git a/deployments/examples/gang/cmd/gangDeploy.sh b/deployments/examples/gang/cmd/gangDeploy.sh
new file mode 100755
index 000000000..42f4f2855
--- /dev/null
+++ b/deployments/examples/gang/cmd/gangDeploy.sh
@@ -0,0 +1,100 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+#limitations under the License.
+#
+
+# gangDeploy.sh <job amount> <pod amount> <gang member> <task run time(sec)>
+set -o errexit
+set -o nounset
+set -o pipefail
+
+JOBAMOUNT=$1
+GANGMEMBER=$2
+RUNTIMESEC=$3
+
+# create service
+kubectl create -f <(cat << EOF
+apiVersion: v1
+kind: Service
+metadata:
+  name: gangservice
+  labels:
+    app: gang
+spec:
+  selector:
+    app: gang
+  type: ClusterIP
+  ports:
+  - protocol: TCP
+    port: 8863
+    targetPort: 8863
+EOF) 
+# create job counter web server
+kubectl create -f <(cat << EOF
+apiVersion: v1
+kind: Pod
+metadata:
+  name: gangweb
+  labels:
+    app: gang
+    queue: root.sandbox
+spec:
+  schedulerName: yunikorn
+  containers:
+    - name: gangweb
+      image: apache/yunikorn:simulation-gang-coordinator-latest
+      imagePullPolicy: Never
+      ports:
+        - containerPort: 8863
+EOF)
+# wait for web server to be running
+until grep 'Running' <(kubectl get pod gangweb -o=jsonpath='{.status.phase}'); do
+  sleep 1
+done
+# create gang jobs
+for i in $(seq "$JOBAMOUNT"); do
+  kubectl create -f <(cat << EOF
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: gang-job-$i
+  labels: 
+    app: gang
+    queue: root.sandbox
+spec:
+  completions: $GANGMEMBER
+  parallelism: $GANGMEMBER
+  template:
+    spec:
+      containers:
+      - name: gang
+        image: apache/yunikorn:simulation-gang-worker-latest
+        imagePullPolicy: Never
+        env:
+        - name: JOB_ID
+          value: gang-job-$i
+        - name: SERVICE_NAME
+          value: gangservice
+        - name: MEMBER_AMOUNT
+          value: "$GANGMEMBER"
+        - name: TASK_EXECUTION_SECONDS
+          value: "$RUNTIMESEC"
+      restartPolicy: Never
+      schedulerName: yunikorn
+EOF)
+done
\ No newline at end of file
diff --git a/deployments/examples/gang/gang-coordinator.yaml b/deployments/examples/gang/gang-coordinator.yaml
new file mode 100644
index 000000000..d44776087
--- /dev/null
+++ b/deployments/examples/gang/gang-coordinator.yaml
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: v1
+kind: Service
+metadata:
+  name: gangservice
+  labels:
+    app: gang
+spec:
+  selector:
+    app: gang
+  type: ClusterIP
+  ports:
+  - protocol: TCP
+    port: 8863
+    targetPort: 8863
+---
+apiVersion: v1
+kind: Pod
+metadata:
+  name: gangweb
+  labels:
+    app: gang
+    queue: root.sandbox
+spec:
+  schedulerName: yunikorn
+  containers:
+    - name: gangweb
+      image: apache/yunikorn:simulation-gang-coordinator-latest
+      imagePullPolicy: Never
+      ports:
+        - containerPort: 8863
\ No newline at end of file
diff --git a/deployments/examples/gang/gang-job.yaml b/deployments/examples/gang/gang-job.yaml
new file mode 100644
index 000000000..852e90ba0
--- /dev/null
+++ b/deployments/examples/gang/gang-job.yaml
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: gang-job-1
+  labels: 
+    app: gang
+    queue: root.sandbox
+spec:
+  completions: 10 # The pod number will create
+  parallelism: 10 # The pod number will create
+  template:
+    spec:
+      containers:
+      - name: gang
+        image: apache/yunikorn:simulation-gang-worker-latest
+        imagePullPolicy: Never
+        env:
+        - name: JOB_ID
+          value: gang-job-1 # This job's name
+        - name: SERVICE_NAME
+          value: gangservice # The service name
+        - name: MEMBER_AMOUNT
+          value: "10" # The gang member that you hope, must small than pod number.must be string.
+        - name: TASK_EXECUTION_SECONDS
+          value: "60" # The task execution time (sec), it will start to countdown when the gang member amount be satisfied. must be string.
+      restartPolicy: Never
+      schedulerName: yunikorn
\ No newline at end of file
diff --git a/deployments/image/gang/gangclient/dockerfile b/deployments/image/gang/gangclient/dockerfile
new file mode 100644
index 000000000..f5e03c720
--- /dev/null
+++ b/deployments/image/gang/gangclient/dockerfile
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM golang:1.15.2
+WORKDIR /gang/client
+ADD . /gang/client 
+ENTRYPOINT ["./simulation-gang-worker"]
\ No newline at end of file
diff --git a/deployments/image/gang/webserver/dockerfile b/deployments/image/gang/webserver/dockerfile
new file mode 100644
index 000000000..a519d7514
--- /dev/null
+++ b/deployments/image/gang/webserver/dockerfile
@@ -0,0 +1,22 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM golang:1.15.2
+WORKDIR /gang/server
+ADD . /gang/server
+EXPOSE 8863
+ENTRYPOINT ["./simulation-gang-coordinator"]
\ No newline at end of file
diff --git a/go.mod b/go.mod
index 8f5b45a31..6cc1aa5ce 100644
--- a/go.mod
+++ b/go.mod
@@ -25,6 +25,7 @@ require (
 	github.com/apache/incubator-yunikorn-core v0.0.0-20200909001345-c2699e725c8a
 	github.com/apache/incubator-yunikorn-scheduler-interface v0.9.1-0.20200901200728-b9033558f319
 	github.com/google/uuid v1.1.1
+	github.com/gorilla/mux v1.7.3
 	github.com/looplab/fsm v0.1.0
 	github.com/onsi/ginkgo v1.11.0
 	github.com/onsi/gomega v1.7.0
diff --git a/pkg/simulation/gang/gangclient/gangclient.go b/pkg/simulation/gang/gangclient/gangclient.go
new file mode 100644
index 000000000..31951f479
--- /dev/null
+++ b/pkg/simulation/gang/gangclient/gangclient.go
@@ -0,0 +1,97 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package main
+
+import (
+	"encoding/json"
+	"errors"
+	"log"
+	"net/http"
+	"os"
+	"strconv"
+	"strings"
+	"time"
+)
+
+func main() {
+	jobID := os.Getenv("JOB_ID")
+	if jobID == "" {
+		err := errors.New("can't get the jobID from env")
+		log.Fatal(err)
+	}
+	serviceName := os.Getenv("SERVICE_NAME")
+	if serviceName == "" {
+		err := errors.New("can't get the service name from env")
+		log.Fatal(err)
+	}
+	memberAmount, err := strconv.Atoi(os.Getenv("MEMBER_AMOUNT"))
+	if err != nil {
+		err = errors.New("can't get the member amount from env")
+		log.Fatal(err)
+	}
+	taskExecutionSeconds, err := strconv.Atoi(os.Getenv("TASK_EXECUTION_SECONDS"))
+	if err != nil {
+		err = errors.New("can't get the task execution seconds from env")
+		log.Fatal(err)
+	}
+	serviceName = strings.ToUpper(serviceName) + "_SERVICE_HOST"
+	serviceIP := os.Getenv(serviceName)
+	err = addRequest(serviceIP, jobID)
+	if err != nil {
+		log.Fatal(err)
+	}
+	// Check if we satisfy gang minMember or not every 2 second
+	for {
+		number, err := checkRequest(serviceIP, jobID)
+		if err != nil {
+			log.Fatal(err)
+		}
+		if number >= memberAmount {
+			log.Printf("satisfy gang minMember.")
+			log.Printf("start to run task.")
+			log.Printf("Task will run for %d min and %d sec.", taskExecutionSeconds/60, taskExecutionSeconds-((taskExecutionSeconds/60)*60))
+			time.Sleep(time.Duration(taskExecutionSeconds) * time.Second)
+			break
+		}
+		log.Printf("The ready member and expected member: %d/%d", number, memberAmount)
+		time.Sleep(2 * time.Second)
+	}
+}
+
+func addRequest(ip string, jobID string) error {
+	_, err := http.Get("http://" + ip + ":8863" + "/ws/v1/add/" + jobID)
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+func checkRequest(ip string, jobID string) (int, error) {
+	resp, err := http.Get("http://" + ip + ":8863" + "/ws/v1/check/" + jobID)
+	var value int
+	if err != nil {
+		return -1, err
+	}
+	defer resp.Body.Close()
+	err = json.NewDecoder(resp.Body).Decode(&value)
+	if err != nil {
+		return -1, err
+	}
+	return value, nil
+}
diff --git a/pkg/simulation/gang/webserver/handlers.go b/pkg/simulation/gang/webserver/handlers.go
new file mode 100644
index 000000000..e394735e7
--- /dev/null
+++ b/pkg/simulation/gang/webserver/handlers.go
@@ -0,0 +1,58 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package main
+
+import (
+	"encoding/json"
+	"log"
+	"net/http"
+
+	"github.com/gorilla/mux"
+)
+
+func writeHeader(w http.ResponseWriter) {
+	w.Header().Set("Content-Type", "application/json; charset=UTF-8")
+	w.Header().Set("Access-Control-Allow-Origin", "*")
+	w.Header().Set("Access-Control-Allow-Credentials", "true")
+	w.Header().Set("Access-Control-Allow-Methods", "GET,POST,HEAD,OPTIONS")
+	w.Header().Set("Access-Control-Allow-Headers", "X-Requested-With,Content-Type,Accept,Origin")
+}
+
+func setTaskReady(w http.ResponseWriter, r *http.Request) {
+	writeHeader(w)
+	lock.Lock()
+	defer lock.Unlock()
+	vars := mux.Vars(r)
+	jobID := vars["jobID"]
+	jobMember[jobID]++
+	if err := json.NewEncoder(w).Encode(jobMember); err != nil {
+		log.Printf("Add task %s fail.", jobID)
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+	}
+}
+
+func checkJobReady(w http.ResponseWriter, r *http.Request) {
+	writeHeader(w)
+	vars := mux.Vars(r)
+	jobID := vars["jobID"]
+	if err := json.NewEncoder(w).Encode(jobMember[jobID]); err != nil {
+		log.Printf("Check job %s Member fail", jobID)
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+	}
+}
diff --git a/pkg/simulation/gang/webserver/routes.go b/pkg/simulation/gang/webserver/routes.go
new file mode 100644
index 000000000..9f902a8ea
--- /dev/null
+++ b/pkg/simulation/gang/webserver/routes.go
@@ -0,0 +1,46 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package main
+
+import (
+	"net/http"
+)
+
+type route struct {
+	Name        string
+	Method      string
+	Pattern     string
+	HandlerFunc http.HandlerFunc
+}
+type routes []route
+
+var webRoutes = routes{
+	route{
+		"Counter",
+		"GET",
+		"/ws/v1/add/{jobID}",
+		setTaskReady,
+	},
+	route{
+		"Counter",
+		"GET",
+		"/ws/v1/check/{jobID}",
+		checkJobReady,
+	},
+}
diff --git a/pkg/simulation/gang/webserver/webserver.go b/pkg/simulation/gang/webserver/webserver.go
new file mode 100644
index 000000000..f1c6f92fa
--- /dev/null
+++ b/pkg/simulation/gang/webserver/webserver.go
@@ -0,0 +1,26 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package main
+
+var jobMember = make(map[string]int)
+
+func main() {
+	webapp := NewWebApp()
+	webapp.StartWebApp()
+}
diff --git a/pkg/simulation/gang/webserver/webservice.go b/pkg/simulation/gang/webserver/webservice.go
new file mode 100644
index 000000000..075683189
--- /dev/null
+++ b/pkg/simulation/gang/webserver/webservice.go
@@ -0,0 +1,76 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package main
+
+import (
+	"context"
+	"log"
+	"net/http"
+	"sync"
+	"time"
+
+	"github.com/gorilla/mux"
+)
+
+var lock sync.RWMutex
+
+type WebService struct {
+	httpServer *http.Server
+}
+
+func newRouter() *mux.Router {
+	router := mux.NewRouter().StrictSlash(true)
+	for _, webRoute := range webRoutes {
+		handler := loggingHandler(webRoute.HandlerFunc, webRoute.Name)
+		router.Methods(webRoute.Method).Path(webRoute.Pattern).Name(webRoute.Name).Handler(handler)
+	}
+	return router
+}
+
+func loggingHandler(inner http.Handler, name string) http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		start := time.Now()
+		inner.ServeHTTP(w, r)
+		log.Printf("%s %s %s %s", r.Method, r.RequestURI, name, time.Since(start))
+	})
+}
+
+func (m *WebService) StartWebApp() {
+	router := newRouter()
+	m.httpServer = &http.Server{Addr: ":8863", Handler: router}
+	log.Println("web-app started with port 8863")
+	httpError := m.httpServer.ListenAndServe()
+	if httpError != nil && httpError != http.ErrServerClosed {
+		log.Println("HTTP server error.")
+	}
+}
+
+func NewWebApp() *WebService {
+	m := &WebService{}
+	return m
+}
+
+func (m *WebService) StopWebApp() error {
+	if m.httpServer != nil {
+		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+		defer cancel()
+		return m.httpServer.Shutdown(ctx)
+	}
+	return nil
+}

From e2ce94840eec5f69403be20a3edad2ce4896b2c0 Mon Sep 17 00:00:00 2001
From: Weiwei Yang <wwei@apache.org>
Date: Thu, 29 Oct 2020 23:28:43 -0700
Subject: [PATCH 03/15] [YUNIKORN-453] add taskGroup info to apps/tasks (#203)

* [YUNIKORN-453] add taskGroup info to apps/tasks

This PR addresses the following tasks:
  - Add taskGroups to apps/tasks
  - Add Placeholder and PlaceholderManager to manage placeholders
  - Add Reserving state in apps to handle taskGroups
  - Related UTs
---
 go.sum                                |   3 +
 pkg/cache/application.go              | 189 +++++++++++++++++++-------
 pkg/cache/application_events.go       |  27 ++++
 pkg/cache/application_test.go         | 186 ++++++++++++++++++++++++-
 pkg/cache/placeholder.go              |  85 ++++++++++++
 pkg/cache/placeholder_manager.go      |  84 ++++++++++++
 pkg/cache/placeholder_manager_test.go |  88 ++++++++++++
 pkg/cache/placeholder_test.go         |  67 +++++++++
 pkg/cache/task.go                     |  31 +++++
 pkg/cache/task_test.go                |  24 +++-
 pkg/client/apifactory_mock.go         |   6 +
 pkg/client/interfaces.go              |   3 +
 pkg/client/kubeclient.go              |   4 +
 pkg/client/kubeclient_mock.go         |  14 ++
 pkg/common/constants/constants.go     |   8 +-
 pkg/common/events/events.go           |   2 +
 pkg/common/events/states.go           |   2 +
 pkg/common/utils/gang_utils.go        | 144 ++++++++++++++++++++
 pkg/common/utils/gang_utils_test.go   | 184 +++++++++++++++++++++++++
 19 files changed, 1095 insertions(+), 56 deletions(-)
 create mode 100644 pkg/cache/placeholder.go
 create mode 100644 pkg/cache/placeholder_manager.go
 create mode 100644 pkg/cache/placeholder_manager_test.go
 create mode 100644 pkg/cache/placeholder_test.go
 create mode 100644 pkg/common/utils/gang_utils.go
 create mode 100644 pkg/common/utils/gang_utils_test.go

diff --git a/go.sum b/go.sum
index 16ecc537f..af265d71d 100644
--- a/go.sum
+++ b/go.sum
@@ -140,6 +140,7 @@ github.com/docker/go-connections v0.3.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5Xh
 github.com/docker/go-units v0.3.3/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
 github.com/docker/libnetwork v0.0.0-20180830151422-a9cd636e3789/go.mod h1:93m0aTqz6z+g32wla4l4WxTrdtvBRmVzYRkYvasA5Z8=
 github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM=
+github.com/docker/spdystream v0.0.0-20181023171402-6480d4af844c h1:ZfSZ3P3BedhKGUhzj7BQlPSU4OvT6tfOKe3DVHzOA7s=
 github.com/docker/spdystream v0.0.0-20181023171402-6480d4af844c/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM=
 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
 github.com/elazarl/goproxy v0.0.0-20170405201442-c4fc26588b6e/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc=
@@ -888,6 +889,7 @@ k8s.io/code-generator v0.16.13 h1:zjla2EwAmLsgRBDfudNIMd8OBw2YtTGBQCW5jLruEQk=
 k8s.io/code-generator v0.16.13/go.mod h1:J2H4yLa80/c3JcoMtjfRQ/cqZNWeJnNhrCtedYrPJ+k=
 k8s.io/component-base v0.16.13 h1:wHmQb3b/QCTODA/MGih9x7XZQmhdDlA8AxJnAID6T2A=
 k8s.io/component-base v0.16.13/go.mod h1:cNe9ZU2A6tqBG0gPQ4/T/KolI9Cv2NA1+7uvmkA7Cyc=
+k8s.io/cri-api v0.16.13 h1:djfY5bALowwzMJr8NvGpoeMCE6QHybM0CV5nES52Y/8=
 k8s.io/cri-api v0.16.13/go.mod h1:W6aMMPN5fmxcRGaHnb6BEfoTeS82OsJcsUJyKf+EWYc=
 k8s.io/csi-translation-lib v0.16.13 h1:707nr7tiLwHAxiJOgEdr1cILuLWUyQw/lVQsbXsGfog=
 k8s.io/csi-translation-lib v0.16.13/go.mod h1:Cx8LaBQZN7e5BVZ/1YCddS5RjouZiqfBDA+gkYpx3us=
@@ -911,6 +913,7 @@ k8s.io/kube-openapi v0.0.0-20200410163147-594e756bea31/go.mod h1:1TqjTSzOxsLGIKf
 k8s.io/kube-proxy v0.16.13/go.mod h1:bYKw8vvDTHlbpZjlBbOwK9LCtbZ32VhzIDXaUqi0Pr0=
 k8s.io/kube-scheduler v0.16.13 h1:ZgdD/1l5INVMWc0+uAXNzGC8p1QXk9AoUT2fHN5BI7U=
 k8s.io/kube-scheduler v0.16.13/go.mod h1:3mhAfNFJ+4V6wEQnCmQ8onxuOpZgjPvy8SWXbdHhjy8=
+k8s.io/kubectl v0.16.13 h1:7xEbxDmCOs5AbahIpXTkpRGU7Bj5rwPMwBDJzgroaxo=
 k8s.io/kubectl v0.16.13/go.mod h1:OG3VmGfy+jjyRmdUqnPuu+Y2PimdjR5Fme5WjYsie5k=
 k8s.io/kubelet v0.16.13/go.mod h1:+9VmFnhaHwmLRjoOz6oFkUEzKtvueDDuTRuX3g2FiL4=
 k8s.io/kubernetes v1.16.2/go.mod h1:SmhGgKfQ30imqjFVj8AI+iW+zSyFsswNErKYeTfgoH0=
diff --git a/pkg/cache/application.go b/pkg/cache/application.go
index d30abab18..18cf63d5a 100644
--- a/pkg/cache/application.go
+++ b/pkg/cache/application.go
@@ -28,9 +28,11 @@ import (
 	v1 "k8s.io/api/core/v1"
 
 	"github.com/apache/incubator-yunikorn-core/pkg/api"
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/apis/yunikorn.apache.org/v1alpha1"
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/appmgmt/interfaces"
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/common/constants"
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/common/events"
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/common/utils"
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/conf"
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/dispatcher"
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/log"
@@ -38,15 +40,17 @@ import (
 )
 
 type Application struct {
-	applicationID string
-	queue         string
-	partition     string
-	user          string
-	taskMap       map[string]*Task
-	tags          map[string]string
-	sm            *fsm.FSM
-	lock          *sync.RWMutex
-	schedulerAPI  api.SchedulerAPI
+	applicationID    string
+	queue            string
+	partition        string
+	user             string
+	taskMap          map[string]*Task
+	tags             map[string]string
+	schedulingPolicy v1alpha1.SchedulingPolicy
+	taskGroups       []v1alpha1.TaskGroup
+	sm               *fsm.FSM
+	lock             *sync.RWMutex
+	schedulerAPI     api.SchedulerAPI
 }
 
 func (app *Application) String() string {
@@ -58,14 +62,16 @@ func (app *Application) String() string {
 func NewApplication(appID, queueName, user string, tags map[string]string, scheduler api.SchedulerAPI) *Application {
 	taskMap := make(map[string]*Task)
 	app := &Application{
-		applicationID: appID,
-		queue:         queueName,
-		partition:     constants.DefaultPartition,
-		user:          user,
-		taskMap:       taskMap,
-		tags:          tags,
-		lock:          &sync.RWMutex{},
-		schedulerAPI:  scheduler,
+		applicationID:    appID,
+		queue:            queueName,
+		partition:        constants.DefaultPartition,
+		user:             user,
+		taskMap:          taskMap,
+		tags:             tags,
+		schedulingPolicy: v1alpha1.SchedulingPolicy{},
+		taskGroups:       make([]v1alpha1.TaskGroup, 0),
+		lock:             &sync.RWMutex{},
+		schedulerAPI:     scheduler,
 	}
 
 	var states = events.States().Application
@@ -81,8 +87,14 @@ func NewApplication(appID, queueName, user string, tags map[string]string, sched
 			{Name: string(events.AcceptApplication),
 				Src: []string{states.Submitted, states.Recovering},
 				Dst: states.Accepted},
+			{Name: string(events.TryReserve),
+				Src: []string{states.Accepted},
+				Dst: states.Reserving},
+			{Name: string(events.UpdateReservation),
+				Src: []string{states.Reserving},
+				Dst: states.Reserving},
 			{Name: string(events.RunApplication),
-				Src: []string{states.Accepted, states.Running},
+				Src: []string{states.Accepted, states.Reserving, states.Running},
 				Dst: states.Running},
 			{Name: string(events.CompleteApplication),
 				Src: []string{states.Running},
@@ -101,11 +113,14 @@ func NewApplication(appID, queueName, user string, tags map[string]string, sched
 				Dst: states.Killed},
 		},
 		fsm.Callbacks{
-			string(events.SubmitApplication):   app.handleSubmitApplicationEvent,
-			string(events.RecoverApplication):  app.handleRecoverApplicationEvent,
-			string(events.RejectApplication):   app.handleRejectApplicationEvent,
-			string(events.CompleteApplication): app.handleCompleteApplicationEvent,
-			events.EnterState:                  app.enterState,
+			string(events.SubmitApplication):      app.handleSubmitApplicationEvent,
+			string(events.RecoverApplication):     app.handleRecoverApplicationEvent,
+			string(events.RejectApplication):      app.handleRejectApplicationEvent,
+			string(events.CompleteApplication):    app.handleCompleteApplicationEvent,
+			string(events.UpdateReservation):      app.onReservationStateChange,
+			events.States().Application.Accepted:  app.postAppAccepted,
+			events.States().Application.Reserving: app.onReserving,
+			events.EnterState:                     app.enterState,
 		},
 	)
 
@@ -166,6 +181,30 @@ func (app *Application) GetUser() string {
 	return app.user
 }
 
+func (app *Application) setSchedulingPolicy(policy v1alpha1.SchedulingPolicy) {
+	app.lock.Lock()
+	defer app.lock.Unlock()
+	app.schedulingPolicy = policy
+}
+
+func (app *Application) getSchedulingPolicy() v1alpha1.SchedulingPolicy {
+	app.lock.RLock()
+	defer app.lock.RUnlock()
+	return app.schedulingPolicy
+}
+
+func (app *Application) setTaskGroups(taskGroups []v1alpha1.TaskGroup) {
+	app.lock.Lock()
+	defer app.lock.Unlock()
+	app.taskGroups = taskGroups
+}
+
+func (app *Application) getTaskGroups() []v1alpha1.TaskGroup {
+	app.lock.RLock()
+	defer app.lock.RUnlock()
+	return app.taskGroups
+}
+
 func (app *Application) addTask(task *Task) {
 	app.lock.Lock()
 	defer app.lock.Unlock()
@@ -269,35 +308,14 @@ func (app *Application) Schedule() {
 			log.Logger().Warn("failed to handle SUBMIT app event",
 				zap.Error(err))
 		}
-	case states.Accepted:
-		ev := NewRunApplicationEvent(app.GetApplicationID())
-		if err := app.handle(ev); err != nil {
-			log.Logger().Warn("failed to handle RUN app event",
-				zap.Error(err))
-		}
+	case states.Reserving:
+		app.scheduleTasks(func(t *Task) bool {
+			return t.placeholder
+		})
 	case states.Running:
-		if len(app.GetNewTasks()) > 0 {
-			for _, task := range app.GetNewTasks() {
-				// for each new task, we do a sanity check before moving the state to Pending_Schedule
-				if err := task.sanityCheckBeforeScheduling(); err == nil {
-					// note, if we directly trigger submit task event, it may spawn too many duplicate
-					// events, because a task might be submitted multiple times before its state transits to PENDING.
-					if handleErr := task.handle(
-						NewSimpleTaskEvent(task.applicationID, task.taskID, events.InitTask)); handleErr != nil {
-						// something goes wrong when transit task to PENDING state,
-						// this should not happen because we already checked the state
-						// before calling the transition. Nowhere to go, just log the error.
-						log.Logger().Warn("init task failed", zap.Error(err))
-					}
-				} else {
-					events.GetRecorder().Event(task.GetTaskPod(), v1.EventTypeWarning, "FailedScheduling", err.Error())
-					log.Logger().Debug("task is not ready for scheduling",
-						zap.String("appID", task.applicationID),
-						zap.String("taskID", task.taskID),
-						zap.Error(err))
-				}
-			}
-		}
+		app.scheduleTasks(func(t *Task) bool {
+			return !t.placeholder
+		})
 	default:
 		log.Logger().Debug("skipping scheduling application",
 			zap.String("appID", app.GetApplicationID()),
@@ -305,6 +323,31 @@ func (app *Application) Schedule() {
 	}
 }
 
+func (app *Application) scheduleTasks(taskScheduleCondition func(t *Task) bool) {
+	for _, task := range app.GetNewTasks() {
+		if taskScheduleCondition(task) {
+			// for each new task, we do a sanity check before moving the state to Pending_Schedule
+			if err := task.sanityCheckBeforeScheduling(); err == nil {
+				// note, if we directly trigger submit task event, it may spawn too many duplicate
+				// events, because a task might be submitted multiple times before its state transits to PENDING.
+				if handleErr := task.handle(
+					NewSimpleTaskEvent(task.applicationID, task.taskID, events.InitTask)); handleErr != nil {
+					// something goes wrong when transit task to PENDING state,
+					// this should not happen because we already checked the state
+					// before calling the transition. Nowhere to go, just log the error.
+					log.Logger().Warn("init task failed", zap.Error(err))
+				}
+			} else {
+				events.GetRecorder().Event(task.GetTaskPod(), v1.EventTypeWarning, "FailedScheduling", err.Error())
+				log.Logger().Debug("task is not ready for scheduling",
+					zap.String("appID", task.applicationID),
+					zap.String("taskID", task.taskID),
+					zap.Error(err))
+			}
+		}
+	}
+}
+
 func (app *Application) handleSubmitApplicationEvent(event *fsm.Event) {
 	log.Logger().Info("handle app submission",
 		zap.String("app", app.String()),
@@ -359,6 +402,50 @@ func (app *Application) handleRecoverApplicationEvent(event *fsm.Event) {
 	}
 }
 
+func (app *Application) postAppAccepted(event *fsm.Event) {
+	// if app has taskGroups defined, it goes to the Reserving state before getting to Running
+	var ev events.SchedulingEvent
+	if len(app.taskGroups) != 0 {
+		ev = NewSimpleApplicationEvent(app.applicationID, events.TryReserve)
+		dispatcher.Dispatch(ev)
+	} else {
+		ev = NewRunApplicationEvent(app.applicationID)
+	}
+	dispatcher.Dispatch(ev)
+}
+
+func (app *Application) onReserving(event *fsm.Event) {
+	go func() {
+		// while doing reserving
+		if err := GetPlaceholderManager().createAppPlaceholders(app); err != nil {
+			// creating placeholder failed
+			// put the app into recycling queue and turn the app to running state
+			GetPlaceholderManager().Recycle(app.applicationID)
+			ev := NewRunApplicationEvent(app.applicationID)
+			dispatcher.Dispatch(ev)
+		}
+	}()
+}
+
+func (app *Application) onReservationStateChange(event *fsm.Event) {
+	// this event is called when there is a add or release of placeholders
+	desireCounts := utils.NewTaskGroupInstanceCountMap()
+	for _, tg := range app.taskGroups {
+		desireCounts.Add(tg.Name, tg.MinMember)
+	}
+
+	actualCounts := utils.NewTaskGroupInstanceCountMap()
+	for _, t := range app.getTasks(events.States().Task.Bound) {
+		actualCounts.AddOne(t.taskGroupName)
+	}
+
+	// min member all satisfied
+	if desireCounts.Equals(actualCounts) {
+		ev := NewRunApplicationEvent(app.GetApplicationID())
+		dispatcher.Dispatch(ev)
+	}
+}
+
 func (app *Application) handleRejectApplicationEvent(event *fsm.Event) {
 	log.Logger().Info("app is rejected by scheduler", zap.String("appID", app.applicationID))
 	// for rejected apps, we directly move them to failed state
diff --git a/pkg/cache/application_events.go b/pkg/cache/application_events.go
index 046641a20..5a8961cb0 100644
--- a/pkg/cache/application_events.go
+++ b/pkg/cache/application_events.go
@@ -160,3 +160,30 @@ func (fe FailApplicationEvent) GetArgs() []interface{} {
 func (fe FailApplicationEvent) GetApplicationID() string {
 	return fe.applicationID
 }
+
+// ------------------------
+// Reservation Update Event
+// ------------------------
+type UpdateApplicationReservationEvent struct {
+	applicationID string
+	event         events.ApplicationEventType
+}
+
+func NewUpdateApplicationReservationEvent(appID string) UpdateApplicationReservationEvent {
+	return UpdateApplicationReservationEvent{
+		applicationID: appID,
+		event:         events.UpdateReservation,
+	}
+}
+
+func (ue UpdateApplicationReservationEvent) GetEvent() events.ApplicationEventType {
+	return ue.event
+}
+
+func (ue UpdateApplicationReservationEvent) GetArgs() []interface{} {
+	return nil
+}
+
+func (ue UpdateApplicationReservationEvent) GetApplicationID() string {
+	return ue.applicationID
+}
diff --git a/pkg/cache/application_test.go b/pkg/cache/application_test.go
index 643db5443..aa757f347 100644
--- a/pkg/cache/application_test.go
+++ b/pkg/cache/application_test.go
@@ -19,17 +19,26 @@
 package cache
 
 import (
+	"sort"
+	"strings"
+	"sync"
 	"testing"
 	"time"
 
 	"gotest.tools/assert"
 	is "gotest.tools/assert/cmp"
 	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
 	apis "k8s.io/apimachinery/pkg/apis/meta/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 
 	"github.com/apache/incubator-yunikorn-core/pkg/api"
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/apis/yunikorn.apache.org/v1alpha1"
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/client"
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/common/constants"
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/common/events"
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/common/utils"
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/dispatcher"
 	"github.com/apache/incubator-yunikorn-scheduler-interface/lib/go/si"
 )
 
@@ -181,7 +190,7 @@ func TestGetNonTerminatedTaskAlias(t *testing.T) {
 	assert.Assert(t, is.Contains(res, "/test-00001"))
 	assert.Assert(t, is.Contains(res, "/test-00002"))
 
-	//set two tasks to terminated states
+	// set two tasks to terminated states
 	task1.sm.SetState(events.States().Task.Rejected)
 	task2.sm.SetState(events.States().Task.Rejected)
 	// check the tasks both in terminated states
@@ -189,7 +198,7 @@ func TestGetNonTerminatedTaskAlias(t *testing.T) {
 	res = app.getNonTerminatedTaskAlias()
 	assert.Equal(t, len(res), 0)
 
-	//set two tasks to one is terminated, another is non-terminated
+	// set two tasks to one is terminated, another is non-terminated
 	task1.sm.SetState(events.States().Task.Rejected)
 	task2.sm.SetState(events.States().Task.Allocated)
 	// check the task, should only return task2's alias
@@ -197,3 +206,176 @@ func TestGetNonTerminatedTaskAlias(t *testing.T) {
 	assert.Equal(t, len(res), 1)
 	assert.Equal(t, res[0], "/test-00002")
 }
+
+func TestSetTaskGroupsAndSchedulingPolicy(t *testing.T) {
+	app := NewApplication("app01", "root.a", "test-user", map[string]string{}, newMockSchedulerAPI())
+	assert.Assert(t, app.getSchedulingPolicy().Type == "")
+	assert.Equal(t, len(app.getTaskGroups()), 0)
+
+	app.setSchedulingPolicy(v1alpha1.SchedulingPolicy{
+		Type: v1alpha1.TryReserve,
+		Parameters: map[string]string{
+			"option-1": "value-1",
+			"option-2": "value-2",
+		},
+	})
+
+	assert.Equal(t, app.getSchedulingPolicy().Type, v1alpha1.TryReserve)
+	assert.Equal(t, len(app.getSchedulingPolicy().Parameters), 2)
+	assert.Equal(t, app.getSchedulingPolicy().Parameters["option-1"], "value-1", "incorrect parameter value")
+	assert.Equal(t, app.getSchedulingPolicy().Parameters["option-2"], "value-2", "incorrect parameter value")
+
+	duration := int64(3000)
+	app.setTaskGroups([]v1alpha1.TaskGroup{
+		{
+			Name:      "test-group-1",
+			MinMember: 10,
+			MinResource: map[string]resource.Quantity{
+				v1.ResourceCPU.String():    resource.MustParse("500m"),
+				v1.ResourceMemory.String(): resource.MustParse("500Mi"),
+			},
+		},
+		{
+			Name:      "test-group-2",
+			MinMember: 20,
+			MinResource: map[string]resource.Quantity{
+				v1.ResourceCPU.String():    resource.MustParse("1000m"),
+				v1.ResourceMemory.String(): resource.MustParse("1000Mi"),
+			},
+			NodeSelector: metav1.LabelSelector{
+				MatchExpressions: []metav1.LabelSelectorRequirement{
+					{
+						Key:      "security",
+						Operator: metav1.LabelSelectorOpIn,
+						Values:   []string{"S1", "value2"},
+					},
+				},
+			},
+			Tolerations: []v1.Toleration{
+				{
+					Key:               "nodeType",
+					Operator:          v1.TolerationOpEqual,
+					Value:             "infra",
+					Effect:            v1.TaintEffectNoSchedule,
+					TolerationSeconds: &duration,
+				},
+			},
+		},
+	})
+
+	assert.Assert(t, app.getTaskGroups() != nil)
+	assert.Equal(t, len(app.getTaskGroups()), 2)
+
+	// sort the slice to give us a stable order
+	sort.Slice(app.getTaskGroups(), func(i, j int) bool {
+		return strings.Compare(app.getTaskGroups()[i].Name, app.getTaskGroups()[j].Name) < 0
+	})
+
+	tg1 := app.getTaskGroups()[0]
+	assert.Equal(t, tg1.Name, "test-group-1")
+	assert.Equal(t, tg1.MinMember, int32(10))
+	assert.Equal(t, tg1.MinResource[v1.ResourceCPU.String()], resource.MustParse("500m"))
+	assert.Equal(t, tg1.MinResource[v1.ResourceMemory.String()], resource.MustParse("500Mi"))
+
+	tg2 := app.getTaskGroups()[1]
+	assert.Equal(t, tg2.Name, "test-group-2")
+	assert.Equal(t, tg2.MinMember, int32(20))
+	assert.Equal(t, len(tg2.Tolerations), 1)
+	assert.Equal(t, tg2.Tolerations[0].Key, "nodeType")
+	assert.Equal(t, tg2.Tolerations[0].Value, "infra")
+	assert.Equal(t, tg2.Tolerations[0].Operator, v1.TolerationOpEqual)
+	assert.Equal(t, tg2.Tolerations[0].Effect, v1.TaintEffectNoSchedule)
+	assert.Equal(t, tg2.Tolerations[0].TolerationSeconds, &duration)
+}
+
+type threadSafePodsMap struct {
+	pods map[string]*v1.Pod
+	sync.RWMutex
+}
+
+func newThreadSafePodsMap() *threadSafePodsMap {
+	return &threadSafePodsMap{
+		pods: make(map[string]*v1.Pod),
+	}
+}
+
+func (t *threadSafePodsMap) add(pod *v1.Pod) {
+	t.Lock()
+	defer t.Unlock()
+	t.pods[pod.Name] = pod
+}
+
+func (t *threadSafePodsMap) count() int {
+	t.RLock()
+	defer t.RUnlock()
+	return len(t.pods)
+}
+
+func TestTryReserve(t *testing.T) {
+	context := initContextForTest()
+	dispatcher.RegisterEventHandler(dispatcher.EventTypeApp, context.ApplicationEventHandler())
+	dispatcher.Start()
+	defer dispatcher.Stop()
+
+	// inject the mocked clients to the placeholder manager
+	createdPods := newThreadSafePodsMap()
+	mockedAPIProvider := client.NewMockedAPIProvider()
+	mockedAPIProvider.MockCreateFn(func(pod *v1.Pod) (*v1.Pod, error) {
+		createdPods.add(pod)
+		return pod, nil
+	})
+	GetPlaceholderManager().setMockedClients(mockedAPIProvider.GetAPIs())
+
+	// create a new app
+	app := NewApplication("app00001", "root.abc", "test-user",
+		map[string]string{}, mockedAPIProvider.GetAPIs().SchedulerAPI)
+	context.applications[app.applicationID] = app
+
+	// set app scheduling policy
+	app.setSchedulingPolicy(v1alpha1.SchedulingPolicy{
+		Type: v1alpha1.TryReserve,
+		Parameters: map[string]string{
+			"option-1": "value-1",
+			"option-2": "value-2",
+		},
+	})
+
+	// set taskGroups
+	app.setTaskGroups([]v1alpha1.TaskGroup{
+		{
+			Name:      "test-group-1",
+			MinMember: 10,
+			MinResource: map[string]resource.Quantity{
+				v1.ResourceCPU.String():    resource.MustParse("500m"),
+				v1.ResourceMemory.String(): resource.MustParse("500Mi"),
+			},
+		},
+		{
+			Name:      "test-group-2",
+			MinMember: 20,
+			MinResource: map[string]resource.Quantity{
+				v1.ResourceCPU.String():    resource.MustParse("1000m"),
+				v1.ResourceMemory.String(): resource.MustParse("1000Mi"),
+			},
+		},
+	})
+
+	// submit the app
+	err := app.handle(NewSubmitApplicationEvent(app.applicationID))
+	assert.NilError(t, err)
+	assertAppState(t, app, events.States().Application.Submitted, 3*time.Second)
+
+	// accepted the app
+	err = app.handle(NewSimpleApplicationEvent(app.GetApplicationID(), events.AcceptApplication))
+	assert.NilError(t, err)
+
+	// since this app has taskGroups defined,
+	// once the app is accepted, it is expected to see this app goes to Reserving state
+	assertAppState(t, app, events.States().Application.Reserving, 3*time.Second)
+
+	// under Reserving state, the app will need to acquire all the placeholders it asks for
+	err = utils.WaitForCondition(func() bool {
+		return createdPods.count() == 30
+	}, 100*time.Millisecond, 3*time.Second)
+	assert.NilError(t, err, "placeholders are not created")
+}
diff --git a/pkg/cache/placeholder.go b/pkg/cache/placeholder.go
new file mode 100644
index 000000000..fb238a873
--- /dev/null
+++ b/pkg/cache/placeholder.go
@@ -0,0 +1,85 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package cache
+
+import (
+	"fmt"
+
+	v1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/apis/yunikorn.apache.org/v1alpha1"
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/common/constants"
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/common/utils"
+)
+
+type Placeholder struct {
+	appID         string
+	taskGroupName string
+	pod           *v1.Pod
+	stage         PlaceholderState
+}
+
+type PlaceholderState string
+
+const (
+	Acquiring PlaceholderState = "Acquiring"
+	Acquired  PlaceholderState = "Acquired"
+)
+
+func newPlaceholder(placeholderName string, app *Application, taskGroup v1alpha1.TaskGroup) *Placeholder {
+	placeholderPod := &v1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      placeholderName,
+			Namespace: app.tags[constants.AppTagNamespace],
+			Labels: map[string]string{
+				constants.LabelApplicationID: app.GetApplicationID(),
+				constants.LabelQueueName:     app.GetQueue(),
+			},
+			Annotations: map[string]string{
+				constants.AnnotationTaskGroupName: taskGroup.Name,
+			},
+		},
+		Spec: v1.PodSpec{
+			Containers: []v1.Container{
+				{
+					Name:  constants.PlaceholderContainerName,
+					Image: constants.PlaceholderContainerImage,
+					Resources: v1.ResourceRequirements{
+						Requests: utils.GetPlaceholderResourceRequest(taskGroup.MinResource),
+					},
+				},
+			},
+			RestartPolicy: constants.PlaceholderPodRestartPolicy,
+			SchedulerName: constants.SchedulerName,
+		},
+	}
+
+	return &Placeholder{
+		appID:         app.GetApplicationID(),
+		taskGroupName: taskGroup.Name,
+		pod:           placeholderPod,
+		stage:         Acquiring,
+	}
+}
+
+func (p *Placeholder) String() string {
+	return fmt.Sprintf("appID: %s, taskGroup: %s, podName: %s/%s, stage: %s",
+		p.appID, p.taskGroupName, p.pod.Namespace, p.pod.Name, p.stage)
+}
diff --git a/pkg/cache/placeholder_manager.go b/pkg/cache/placeholder_manager.go
new file mode 100644
index 000000000..3d1c106bb
--- /dev/null
+++ b/pkg/cache/placeholder_manager.go
@@ -0,0 +1,84 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package cache
+
+import (
+	"sync"
+
+	"go.uber.org/zap"
+
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/client"
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/common/utils"
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/log"
+)
+
+// placeholder manager is a service to manage the lifecycle of app placeholders
+type PlaceholderManager struct {
+	clients *client.Clients
+	sync.RWMutex
+}
+
+var placeholderMgr *PlaceholderManager
+var once sync.Once
+
+func GetPlaceholderManager() *PlaceholderManager {
+	once.Do(func() {
+		placeholderMgr = &PlaceholderManager{}
+	})
+	return placeholderMgr
+}
+
+func (mgr *PlaceholderManager) createAppPlaceholders(app *Application) error {
+	mgr.Lock()
+	defer mgr.Unlock()
+
+	// iterate all task groups, create placeholders for all the min members
+	for _, tg := range app.getTaskGroups() {
+		for i := int32(0); i < tg.MinMember; i++ {
+			placeholderName := utils.GeneratePlaceholderName(tg.Name, app.GetApplicationID(), i)
+			placeholder := newPlaceholder(placeholderName, app, tg)
+			// create the placeholder on K8s
+			_, err := mgr.clients.KubeClient.Create(placeholder.pod)
+			if err != nil {
+				// if failed to create the place holder pod
+				// caller should handle this error
+				log.Logger().Error("failed to create placeholder pod",
+					zap.Error(err))
+				return err
+			}
+			log.Logger().Info("placeholder created",
+				zap.String("placeholder", placeholder.String()))
+		}
+	}
+
+	return nil
+}
+
+// recycle all the placeholders for an application
+func (mgr *PlaceholderManager) Recycle(appID string) {
+	log.Logger().Info("start to recycle app placeholders",
+		zap.String("appID", appID))
+}
+
+// this is only used in testing
+func (mgr *PlaceholderManager) setMockedClients(mockedClients *client.Clients) {
+	mgr.Lock()
+	defer mgr.Unlock()
+	mgr.clients = mockedClients
+}
diff --git a/pkg/cache/placeholder_manager_test.go b/pkg/cache/placeholder_manager_test.go
new file mode 100644
index 000000000..46a0bc596
--- /dev/null
+++ b/pkg/cache/placeholder_manager_test.go
@@ -0,0 +1,88 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package cache
+
+import (
+	"fmt"
+	"sync"
+	"testing"
+
+	"gotest.tools/assert"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/apis/yunikorn.apache.org/v1alpha1"
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/client"
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/common/constants"
+)
+
+func TestCreateAppPlaceholders(t *testing.T) {
+	const (
+		appID     = "app01"
+		queue     = "root.default"
+		namespace = "test"
+	)
+	mockedSchedulerAPI := newMockSchedulerAPI()
+	app := NewApplication(appID, queue,
+		"bob", map[string]string{constants.AppTagNamespace: namespace}, mockedSchedulerAPI)
+	app.setTaskGroups([]v1alpha1.TaskGroup{
+		{
+			Name:      "test-group-1",
+			MinMember: 10,
+			MinResource: map[string]resource.Quantity{
+				"cpu":    resource.MustParse("500m"),
+				"memory": resource.MustParse("1024M"),
+			},
+		},
+		{
+			Name:      "test-group-2",
+			MinMember: 20,
+			MinResource: map[string]resource.Quantity{
+				"cpu":    resource.MustParse("1000m"),
+				"memory": resource.MustParse("2048M"),
+			},
+		},
+	})
+
+	createdPods := make(map[string]*v1.Pod)
+	mockedAPIProvider := client.NewMockedAPIProvider()
+	mockedAPIProvider.MockCreateFn(func(pod *v1.Pod) (*v1.Pod, error) {
+		createdPods[pod.Name] = pod
+		return pod, nil
+	})
+	placeholderMgr := &PlaceholderManager{
+		clients: mockedAPIProvider.GetAPIs(),
+		RWMutex: sync.RWMutex{},
+	}
+
+	err := placeholderMgr.createAppPlaceholders(app)
+	assert.NilError(t, err, "create app placeholders should be successful")
+	assert.Equal(t, len(createdPods), 30)
+
+	// simulate placeholder creation failures
+	// failed to create one placeholder
+	mockedAPIProvider.MockCreateFn(func(pod *v1.Pod) (*v1.Pod, error) {
+		if pod.Name == "tg-test-group-2-app01-15" {
+			return nil, fmt.Errorf("failed to create pod %s", pod.Name)
+		}
+		return pod, nil
+	})
+	err = placeholderMgr.createAppPlaceholders(app)
+	assert.Error(t, err, "failed to create pod tg-test-group-2-app01-15")
+}
diff --git a/pkg/cache/placeholder_test.go b/pkg/cache/placeholder_test.go
new file mode 100644
index 000000000..c4908a3d1
--- /dev/null
+++ b/pkg/cache/placeholder_test.go
@@ -0,0 +1,67 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package cache
+
+import (
+	"testing"
+
+	"gotest.tools/assert"
+	"k8s.io/apimachinery/pkg/api/resource"
+
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/apis/yunikorn.apache.org/v1alpha1"
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/common"
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/common/constants"
+)
+
+func TestNewPlaceholder(t *testing.T) {
+	const (
+		appID     = "app01"
+		queue     = "root.default"
+		namespace = "test"
+	)
+	mockedSchedulerAPI := newMockSchedulerAPI()
+	app := NewApplication(appID, queue,
+		"bob", map[string]string{constants.AppTagNamespace: namespace}, mockedSchedulerAPI)
+	app.setTaskGroups([]v1alpha1.TaskGroup{
+		{
+			Name:      "test-group-1",
+			MinMember: 10,
+			MinResource: map[string]resource.Quantity{
+				"cpu":    resource.MustParse("500m"),
+				"memory": resource.MustParse("1024M"),
+			},
+		},
+	})
+
+	holder := newPlaceholder("ph-name", app, app.taskGroups[0])
+	assert.Equal(t, holder.appID, appID)
+	assert.Equal(t, holder.stage, Acquiring)
+	assert.Equal(t, holder.taskGroupName, app.taskGroups[0].Name)
+	assert.Equal(t, holder.pod.Spec.SchedulerName, constants.SchedulerName)
+	assert.Equal(t, holder.pod.Name, "ph-name")
+	assert.Equal(t, holder.pod.Namespace, namespace)
+	assert.Equal(t, len(holder.pod.Labels), 2)
+	assert.Equal(t, holder.pod.Labels[constants.LabelApplicationID], appID)
+	assert.Equal(t, holder.pod.Labels[constants.LabelQueueName], queue)
+	assert.Equal(t, len(holder.pod.Annotations), 1)
+	assert.Equal(t, holder.pod.Annotations[constants.AnnotationTaskGroupName], app.taskGroups[0].Name)
+	assert.Equal(t, common.GetPodResource(holder.pod).Resources[constants.CPU].Value, int64(500))
+	assert.Equal(t, common.GetPodResource(holder.pod).Resources[constants.Memory].Value, int64(1024))
+	assert.Equal(t, holder.String(), "appID: app01, taskGroup: test-group-1, podName: test/ph-name, stage: Acquiring")
+}
diff --git a/pkg/cache/task.go b/pkg/cache/task.go
index b856d7e82..8b34f68cb 100644
--- a/pkg/cache/task.go
+++ b/pkg/cache/task.go
@@ -28,6 +28,7 @@ import (
 
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/common"
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/common/events"
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/common/utils"
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/dispatcher"
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/log"
 	"github.com/apache/incubator-yunikorn-scheduler-interface/lib/go/si"
@@ -47,6 +48,8 @@ type Task struct {
 	context        *Context
 	nodeName       string
 	createTime     time.Time
+	taskGroupName  string
+	placeholder    bool
 	sm             *fsm.FSM
 	lock           *sync.RWMutex
 }
@@ -77,6 +80,7 @@ func createTaskInternal(tid string, app *Application, resource *si.Resource,
 		pod:           pod,
 		resource:      resource,
 		createTime:    pod.GetCreationTimestamp().Time,
+		placeholder:   false,
 		context:       ctx,
 		lock:          &sync.RWMutex{},
 	}
@@ -121,10 +125,15 @@ func createTaskInternal(tid string, app *Application, resource *si.Resource,
 			states.Rejected:                 task.postTaskRejected,
 			beforeHook(events.CompleteTask): task.beforeTaskCompleted,
 			states.Failed:                   task.postTaskFailed,
+			states.Bound:                    task.postTaskBound,
 			events.EnterState:               task.enterState,
 		},
 	)
 
+	if tgName := utils.GetTaskGroupFromPodSpec(pod); tgName != "" {
+		task.taskGroupName = tgName
+	}
+
 	return task
 }
 
@@ -167,6 +176,18 @@ func (task *Task) GetTaskState() string {
 	return task.sm.Current()
 }
 
+func (task *Task) setTaskGroupName(groupName string) {
+	task.lock.Lock()
+	defer task.lock.Unlock()
+	task.taskGroupName = groupName
+}
+
+func (task *Task) getTaskGroupName() string {
+	task.lock.RLock()
+	defer task.lock.RUnlock()
+	return task.taskGroupName
+}
+
 func (task *Task) getTaskAllocationUUID() string {
 	task.lock.RLock()
 	defer task.lock.RUnlock()
@@ -296,6 +317,16 @@ func (task *Task) postTaskAllocated(event *fsm.Event) {
 	}(event)
 }
 
+func (task *Task) postTaskBound(event *fsm.Event) {
+	if task.placeholder {
+		log.Logger().Info("placeholder is bound",
+			zap.String("appID", task.applicationID),
+			zap.String("taskName", task.alias),
+			zap.String("taskGroupName", task.taskGroupName))
+		dispatcher.Dispatch(NewUpdateApplicationReservationEvent(task.applicationID))
+	}
+}
+
 func (task *Task) postTaskRejected(event *fsm.Event) {
 	// currently, once task is rejected by scheduler, we directly move task to failed state.
 	// so this function simply triggers the state transition when it is rejected.
diff --git a/pkg/cache/task_test.go b/pkg/cache/task_test.go
index 966b79dfb..a2e138bae 100644
--- a/pkg/cache/task_test.go
+++ b/pkg/cache/task_test.go
@@ -413,13 +413,33 @@ func TestIsTerminated(t *testing.T) {
 		},
 	}
 	task := NewTask("task01", app, mockedContext, pod)
-	//set task states to non-terminated
+	// set task states to non-terminated
 	task.sm.SetState(events.States().Task.Pending)
 	res := task.isTerminated()
 	assert.Equal(t, res, false)
 
-	//set task states to terminated
+	// set task states to terminated
 	task.sm.SetState(events.States().Task.Failed)
 	res = task.isTerminated()
 	assert.Equal(t, res, true)
 }
+
+func TestSetTaskGroup(t *testing.T) {
+	mockedContext := initContextForTest()
+	mockedSchedulerAPI := newMockSchedulerAPI()
+	app := NewApplication("app01", "root.default",
+		"bob", map[string]string{}, mockedSchedulerAPI)
+	pod := &v1.Pod{
+		TypeMeta: apis.TypeMeta{
+			Kind:       "Pod",
+			APIVersion: "v1",
+		},
+		ObjectMeta: apis.ObjectMeta{
+			Name: "pod-01",
+			UID:  "UID-01",
+		},
+	}
+	task := NewTask("task01", app, mockedContext, pod)
+	task.setTaskGroupName("test-group")
+	assert.Equal(t, task.getTaskGroupName(), "test-group")
+}
diff --git a/pkg/client/apifactory_mock.go b/pkg/client/apifactory_mock.go
index 0ae0095d4..5bb6f122c 100644
--- a/pkg/client/apifactory_mock.go
+++ b/pkg/client/apifactory_mock.go
@@ -103,6 +103,12 @@ func (m *MockedAPIProvider) MockDeleteFn(dfn func(pod *v1.Pod) error) {
 	}
 }
 
+func (m *MockedAPIProvider) MockCreateFn(cfn func(pod *v1.Pod) (*v1.Pod, error)) {
+	if mock, ok := m.clients.KubeClient.(*KubeClientMock); ok {
+		mock.createFn = cfn
+	}
+}
+
 func (m *MockedAPIProvider) SetNodeLister(lister corev1.NodeLister) {
 	informer := m.clients.NodeInformer
 	if i, ok := informer.(*test.MockedNodeInformer); ok {
diff --git a/pkg/client/interfaces.go b/pkg/client/interfaces.go
index 87e6bad43..f60580fc5 100644
--- a/pkg/client/interfaces.go
+++ b/pkg/client/interfaces.go
@@ -28,6 +28,9 @@ type KubeClient interface {
 	// bind a pod to a specific host
 	Bind(pod *v1.Pod, hostID string) error
 
+	// Create a pod
+	Create(pod *v1.Pod) (*v1.Pod, error)
+
 	// Delete a pod from a host
 	Delete(pod *v1.Pod) error
 
diff --git a/pkg/client/kubeclient.go b/pkg/client/kubeclient.go
index 3f253bd7f..f54c1a01d 100644
--- a/pkg/client/kubeclient.go
+++ b/pkg/client/kubeclient.go
@@ -100,6 +100,10 @@ func (nc SchedulerKubeClient) Bind(pod *v1.Pod, hostID string) error {
 	return nil
 }
 
+func (nc SchedulerKubeClient) Create(pod *v1.Pod) (*v1.Pod, error) {
+	return nc.clientSet.CoreV1().Pods(pod.Namespace).Create(pod)
+}
+
 func (nc SchedulerKubeClient) Delete(pod *v1.Pod) error {
 	// TODO make this configurable for pods
 	gracefulSeconds := int64(3)
diff --git a/pkg/client/kubeclient_mock.go b/pkg/client/kubeclient_mock.go
index af8e9ba17..c289f61b1 100644
--- a/pkg/client/kubeclient_mock.go
+++ b/pkg/client/kubeclient_mock.go
@@ -32,6 +32,7 @@ import (
 type KubeClientMock struct {
 	bindFn    func(pod *v1.Pod, hostID string) error
 	deleteFn  func(pod *v1.Pod) error
+	createFn  func(pod *v1.Pod) (*v1.Pod, error)
 	clientSet kubernetes.Interface
 }
 
@@ -47,6 +48,11 @@ func NewKubeClientMock() *KubeClientMock {
 				zap.String("PodName", pod.Name))
 			return nil
 		},
+		createFn: func(pod *v1.Pod) (*v1.Pod, error) {
+			log.Logger().Info("pod created",
+				zap.String("PodName", pod.Name))
+			return pod, nil
+		},
 		clientSet: fake.NewSimpleClientset(),
 	}
 }
@@ -59,10 +65,18 @@ func (c *KubeClientMock) MockDeleteFn(dfn func(pod *v1.Pod) error) {
 	c.deleteFn = dfn
 }
 
+func (c *KubeClientMock) MockCreateFn(cfn func(pod *v1.Pod) (*v1.Pod, error)) {
+	c.createFn = cfn
+}
+
 func (c *KubeClientMock) Bind(pod *v1.Pod, hostID string) error {
 	return c.bindFn(pod, hostID)
 }
 
+func (c *KubeClientMock) Create(pod *v1.Pod) (*v1.Pod, error) {
+	return c.createFn(pod)
+}
+
 func (c *KubeClientMock) Delete(pod *v1.Pod) error {
 	return c.deleteFn(pod)
 }
diff --git a/pkg/common/constants/constants.go b/pkg/common/constants/constants.go
index 7559ab040..928febacd 100644
--- a/pkg/common/constants/constants.go
+++ b/pkg/common/constants/constants.go
@@ -48,5 +48,11 @@ const SparkLabelRoleDriver = "driver"
 const DefaultConfigMapName = "yunikorn-configs"
 const SchedulerName = "yunikorn"
 
-//Application crd
+// Application crd
 const AppManagerHandlerName = "yunikorn-app"
+const AnnotationTaskGroupName = "yunikorn.apache.org/task-group-name"
+
+// Gang scheduling
+const PlaceholderContainerImage = "k8s.gcr.io/pause"
+const PlaceholderContainerName = "pause"
+const PlaceholderPodRestartPolicy = "Never"
diff --git a/pkg/common/events/events.go b/pkg/common/events/events.go
index 9b85b53c6..31443f125 100644
--- a/pkg/common/events/events.go
+++ b/pkg/common/events/events.go
@@ -36,6 +36,8 @@ const (
 	SubmitApplication   ApplicationEventType = "SubmitApplication"
 	RecoverApplication  ApplicationEventType = "RecoverApplication"
 	AcceptApplication   ApplicationEventType = "AcceptApplication"
+	TryReserve          ApplicationEventType = "TryReserve"
+	UpdateReservation   ApplicationEventType = "UpdateReservation"
 	RunApplication      ApplicationEventType = "RunApplication"
 	RejectApplication   ApplicationEventType = "RejectApplication"
 	CompleteApplication ApplicationEventType = "CompleteApplication"
diff --git a/pkg/common/events/states.go b/pkg/common/events/states.go
index 0af94d76b..991bdcad4 100644
--- a/pkg/common/events/states.go
+++ b/pkg/common/events/states.go
@@ -44,6 +44,7 @@ type ApplicationStates struct {
 	Recovering string
 	Submitted  string
 	Accepted   string
+	Reserving  string
 	Running    string
 	Rejected   string
 	Completed  string
@@ -84,6 +85,7 @@ func States() *AllStates {
 				Recovering: "Recovering",
 				Submitted:  "Submitted",
 				Accepted:   "Accepted",
+				Reserving:  "Reserving",
 				Running:    "Running",
 				Rejected:   "Rejected",
 				Completed:  "Completed",
diff --git a/pkg/common/utils/gang_utils.go b/pkg/common/utils/gang_utils.go
new file mode 100644
index 000000000..f3f9e8ba1
--- /dev/null
+++ b/pkg/common/utils/gang_utils.go
@@ -0,0 +1,144 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package utils
+
+import (
+	"fmt"
+	"sync"
+
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/apis/yunikorn.apache.org/v1alpha1"
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/common/constants"
+)
+
+func FindAppTaskGroup(appTaskGroups []*v1alpha1.TaskGroup, groupName string) (*v1alpha1.TaskGroup, error) {
+	if groupName == "" {
+		// task has no group defined
+		return nil, nil
+	}
+
+	// app has no taskGroups associated
+	if len(appTaskGroups) == 0 {
+		return nil, nil
+	}
+
+	// task group defined in app, return the corresponding taskGroup
+	for _, tg := range appTaskGroups {
+		if tg.Name == groupName {
+			return tg, nil
+		}
+	}
+
+	// task group name specified, but could not find a mapping value in app taskGroups
+	return nil, fmt.Errorf("taskGroup %s is not defined in the application", groupName)
+}
+
+// the placeholder name is the pod name, pod name can not be longer than 63 chars
+// taskGroup name and appID will be truncated if they go over 20/28 chars respectively
+func GeneratePlaceholderName(taskGroupName, appID string, index int32) string {
+	// taskGroup name no longer than 20 chars
+	// appID no longer than 35 chars
+	// total length no longer than 20 + 28 + 5 + 10 = 63
+	shortTaskGroupName := fmt.Sprintf("%.20s", taskGroupName)
+	shortAppID := fmt.Sprintf("%.28s", appID)
+	return "tg-" + shortTaskGroupName + "-" + shortAppID + fmt.Sprintf("-%d", index)
+}
+
+func GetPlaceholderResourceRequest(resources map[string]resource.Quantity) v1.ResourceList {
+	resourceReq := v1.ResourceList{}
+	for k, v := range resources {
+		resourceReq[v1.ResourceName(k)] = v
+	}
+	return resourceReq
+}
+
+func GetTaskGroupFromPodSpec(pod *v1.Pod) string {
+	if value, ok := pod.Annotations[constants.AnnotationTaskGroupName]; ok {
+		return value
+	}
+	return ""
+}
+
+type TaskGroupInstanceCountMap struct {
+	counts map[string]int32
+	sync.RWMutex
+}
+
+func NewTaskGroupInstanceCountMap() *TaskGroupInstanceCountMap {
+	return &TaskGroupInstanceCountMap{
+		counts: make(map[string]int32),
+	}
+}
+
+func (t *TaskGroupInstanceCountMap) Add(taskGroupName string, num int32) {
+	t.update(taskGroupName, num)
+}
+
+func (t *TaskGroupInstanceCountMap) AddOne(taskGroupName string) {
+	t.update(taskGroupName, 1)
+}
+
+func (t *TaskGroupInstanceCountMap) DeleteOne(taskGroupName string) {
+	t.update(taskGroupName, -1)
+}
+
+func (t *TaskGroupInstanceCountMap) update(taskGroupName string, delta int32) {
+	t.Lock()
+	defer t.Unlock()
+	if v, ok := t.counts[taskGroupName]; ok {
+		t.counts[taskGroupName] = v + delta
+	} else {
+		t.counts[taskGroupName] = delta
+	}
+}
+
+func (t *TaskGroupInstanceCountMap) Size() int {
+	t.RLock()
+	defer t.RUnlock()
+	return len(t.counts)
+}
+
+func (t *TaskGroupInstanceCountMap) GetTaskGroupInstanceCount(groupName string) int32 {
+	t.RLock()
+	defer t.RUnlock()
+	return t.counts[groupName]
+}
+
+func (t *TaskGroupInstanceCountMap) Equals(target *TaskGroupInstanceCountMap) bool {
+	t.RLock()
+	defer t.RUnlock()
+
+	if target == nil {
+		return false
+	}
+
+	if t.Size() != target.Size() {
+		return false
+	}
+
+	for k, v := range t.counts {
+		if target.counts[k] != v {
+			return false
+		}
+	}
+
+	return true
+}
diff --git a/pkg/common/utils/gang_utils_test.go b/pkg/common/utils/gang_utils_test.go
new file mode 100644
index 000000000..33d7ddc2e
--- /dev/null
+++ b/pkg/common/utils/gang_utils_test.go
@@ -0,0 +1,184 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package utils
+
+import (
+	"math"
+	"testing"
+
+	"gotest.tools/assert"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/apis/yunikorn.apache.org/v1alpha1"
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/common/constants"
+)
+
+func TestFindAppTaskGroup(t *testing.T) {
+	taskGroups := []*v1alpha1.TaskGroup{
+		{
+			Name:      "test-group-0",
+			MinMember: 1,
+			MinResource: map[string]resource.Quantity{
+				"cpu": resource.MustParse("500m"),
+			},
+		},
+		{
+			Name:      "test-group-1",
+			MinMember: 2,
+			MinResource: map[string]resource.Quantity{
+				"cpu": resource.MustParse("500m"),
+			},
+		},
+	}
+
+	tg, err := FindAppTaskGroup(taskGroups, "")
+	assert.NilError(t, err)
+	assert.Assert(t, tg == nil)
+
+	tg, err = FindAppTaskGroup(nil, "")
+	assert.NilError(t, err)
+	assert.Assert(t, tg == nil)
+
+	tg, err = FindAppTaskGroup(nil, "test-group-0")
+	assert.NilError(t, err)
+	assert.Assert(t, tg == nil)
+
+	tg, err = FindAppTaskGroup(taskGroups, "test-group-3")
+	assert.Error(t, err, "taskGroup test-group-3 is not defined in the application")
+	assert.Assert(t, tg == nil)
+
+	tg, err = FindAppTaskGroup(taskGroups, "test-group-1")
+	assert.NilError(t, err)
+	assert.Equal(t, tg.Name, "test-group-1")
+	assert.Equal(t, tg.MinMember, int32(2))
+}
+
+func TestGeneratePlaceholderName(t *testing.T) {
+	name := GeneratePlaceholderName("my-group", "app0001", 100)
+	assert.Equal(t, name, "tg-my-group-app0001-100")
+
+	name = GeneratePlaceholderName("my-group",
+		"app00000000000000000000000000000000000000000001", 100)
+	assert.Equal(t, name, "tg-my-group-app0000000000000000000000000-100")
+	assert.Assert(t, len(name) < 63)
+
+	name = GeneratePlaceholderName("a-very-long-task-group-name------------------------------------------",
+		"a-very-long-app-ID-----------------------------------------------------------------", 100)
+	assert.Equal(t, name, "tg-a-very-long-task-gro-a-very-long-app-ID-----------100")
+	assert.Assert(t, len(name) < 63)
+
+	name = GeneratePlaceholderName("a-very-long-task-group-name------------------------------------------",
+		"a-very-long-app-ID-----------------------------------------------------------------", math.MaxInt32)
+	assert.Equal(t, name, "tg-a-very-long-task-gro-a-very-long-app-ID-----------2147483647")
+	assert.Assert(t, len(name) == 63)
+}
+
+func TestGetTaskGroupFromPodSpec(t *testing.T) {
+	pod := &v1.Pod{
+		TypeMeta: metav1.TypeMeta{
+			Kind:       "Pod",
+			APIVersion: "v1",
+		},
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "pod-01",
+			UID:  "UID-01",
+			Annotations: map[string]string{
+				constants.AnnotationTaskGroupName: "test-task-group",
+			},
+		},
+	}
+
+	assert.Equal(t, GetTaskGroupFromPodSpec(pod), "test-task-group")
+
+	pod = &v1.Pod{
+		TypeMeta: metav1.TypeMeta{
+			Kind:       "Pod",
+			APIVersion: "v1",
+		},
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "pod-01",
+			UID:  "UID-01",
+		},
+	}
+
+	assert.Equal(t, GetTaskGroupFromPodSpec(pod), "")
+}
+
+func TestTaskGroupInstanceCountMap(t *testing.T) {
+	counts := NewTaskGroupInstanceCountMap()
+	assert.Equal(t, counts.Size(), 0)
+	assert.Equal(t, counts.GetTaskGroupInstanceCount("g1"), int32(0))
+	assert.Equal(t, counts.GetTaskGroupInstanceCount("g2"), int32(0))
+
+	counts = NewTaskGroupInstanceCountMap()
+	counts.AddOne("g1")
+	counts.AddOne("g1")
+	counts.AddOne("g1")
+	counts.AddOne("g2")
+	counts.AddOne("g2")
+	assert.Equal(t, counts.Size(), 2)
+	assert.Equal(t, counts.GetTaskGroupInstanceCount("g1"), int32(3))
+	assert.Equal(t, counts.GetTaskGroupInstanceCount("g2"), int32(2))
+
+	counts.DeleteOne("g1")
+	counts.DeleteOne("g2")
+	assert.Equal(t, counts.Size(), 2)
+	assert.Equal(t, counts.GetTaskGroupInstanceCount("g1"), int32(2))
+	assert.Equal(t, counts.GetTaskGroupInstanceCount("g2"), int32(1))
+
+	counts1 := NewTaskGroupInstanceCountMap()
+	counts2 := NewTaskGroupInstanceCountMap()
+	assert.Equal(t, counts1.Equals(counts2), true)
+	counts1.Add("g1", 101)
+	counts2.Add("g1", 101)
+	assert.Equal(t, counts1.Equals(counts2), true)
+	counts1.Add("g1", 100)
+	counts2.Add("g1", 101)
+	assert.Equal(t, counts1.Equals(counts2), false)
+
+	counts1 = NewTaskGroupInstanceCountMap()
+	counts2 = NewTaskGroupInstanceCountMap()
+	counts1.AddOne("g1")
+	counts1.AddOne("g2")
+	counts1.AddOne("g3")
+	counts1.AddOne("g4")
+	counts1.AddOne("g5")
+	counts2.AddOne("g5")
+	counts2.AddOne("g4")
+	counts2.AddOne("g3")
+	counts2.AddOne("g2")
+	counts2.AddOne("g1")
+	assert.Equal(t, counts1.Equals(counts2), true)
+
+	counts1 = NewTaskGroupInstanceCountMap()
+	counts2 = NewTaskGroupInstanceCountMap()
+	counts1.AddOne("g1")
+	counts1.AddOne("g2")
+	counts2.AddOne("g1")
+	assert.Equal(t, counts1.Equals(counts2), false)
+
+	counts1 = NewTaskGroupInstanceCountMap()
+	counts2 = NewTaskGroupInstanceCountMap()
+	counts1.AddOne("g1")
+	counts2.AddOne("g2")
+	counts2.AddOne("g1")
+	assert.Equal(t, counts1.Equals(counts2), false)
+}

From e78ce072c2a9ab6bfad82c56622c2d704d7e3ef5 Mon Sep 17 00:00:00 2001
From: Ting-Yao Huang <61864060+HuangTing-Yao@users.noreply.github.com>
Date: Sat, 31 Oct 2020 07:59:40 +0800
Subject: [PATCH 04/15] [YUNIKORN-439] Parse taskGroup info from pod annotation
 (#204)

Co-authored-by: Weiwei Yang <wyang@cloudera.com>
---
 pkg/apis/yunikorn.apache.org/v1alpha1/type.go |   2 +-
 .../v1alpha1/zz_generated.deepcopy.go         |   8 +-
 pkg/cache/application_test.go                 |  11 +-
 pkg/common/constants/constants.go             |   1 +
 pkg/common/utils/utils.go                     |  28 ++++
 pkg/common/utils/utils_test.go                | 157 ++++++++++++++++++
 6 files changed, 196 insertions(+), 11 deletions(-)

diff --git a/pkg/apis/yunikorn.apache.org/v1alpha1/type.go b/pkg/apis/yunikorn.apache.org/v1alpha1/type.go
index a21497823..f06701e28 100644
--- a/pkg/apis/yunikorn.apache.org/v1alpha1/type.go
+++ b/pkg/apis/yunikorn.apache.org/v1alpha1/type.go
@@ -61,7 +61,7 @@ type TaskGroup struct {
 	Name         string                       `json:"name"`
 	MinMember    int32                        `json:"minMember"`
 	MinResource  map[string]resource.Quantity `json:"minResource"`
-	NodeSelector metav1.LabelSelector         `json:"nodeSelector,omitempty"`
+	NodeSelector map[string]string            `json:"nodeSelector,omitempty"`
 	Tolerations  []v1.Toleration              `json:"tolerations,omitempty"`
 }
 
diff --git a/pkg/apis/yunikorn.apache.org/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/yunikorn.apache.org/v1alpha1/zz_generated.deepcopy.go
index bebfec861..3149d782c 100644
--- a/pkg/apis/yunikorn.apache.org/v1alpha1/zz_generated.deepcopy.go
+++ b/pkg/apis/yunikorn.apache.org/v1alpha1/zz_generated.deepcopy.go
@@ -162,7 +162,13 @@ func (in *TaskGroup) DeepCopyInto(out *TaskGroup) {
 			(*out)[key] = val.DeepCopy()
 		}
 	}
-	in.NodeSelector.DeepCopyInto(&out.NodeSelector)
+	if in.NodeSelector != nil {
+		in, out := &in.NodeSelector, &out.NodeSelector
+		*out = make(map[string]string, len(*in))
+		for key, val := range *in {
+			(*out)[key] = val
+		}
+	}
 	if in.Tolerations != nil {
 		in, out := &in.Tolerations, &out.Tolerations
 		*out = make([]v1.Toleration, len(*in))
diff --git a/pkg/cache/application_test.go b/pkg/cache/application_test.go
index aa757f347..327c74077 100644
--- a/pkg/cache/application_test.go
+++ b/pkg/cache/application_test.go
@@ -30,7 +30,6 @@ import (
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	apis "k8s.io/apimachinery/pkg/apis/meta/v1"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 
 	"github.com/apache/incubator-yunikorn-core/pkg/api"
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/apis/yunikorn.apache.org/v1alpha1"
@@ -242,14 +241,8 @@ func TestSetTaskGroupsAndSchedulingPolicy(t *testing.T) {
 				v1.ResourceCPU.String():    resource.MustParse("1000m"),
 				v1.ResourceMemory.String(): resource.MustParse("1000Mi"),
 			},
-			NodeSelector: metav1.LabelSelector{
-				MatchExpressions: []metav1.LabelSelectorRequirement{
-					{
-						Key:      "security",
-						Operator: metav1.LabelSelectorOpIn,
-						Values:   []string{"S1", "value2"},
-					},
-				},
+			NodeSelector: map[string]string{
+				"locate": "west",
 			},
 			Tolerations: []v1.Toleration{
 				{
diff --git a/pkg/common/constants/constants.go b/pkg/common/constants/constants.go
index 928febacd..580ed4721 100644
--- a/pkg/common/constants/constants.go
+++ b/pkg/common/constants/constants.go
@@ -51,6 +51,7 @@ const SchedulerName = "yunikorn"
 // Application crd
 const AppManagerHandlerName = "yunikorn-app"
 const AnnotationTaskGroupName = "yunikorn.apache.org/task-group-name"
+const AnnotationTaskGroups = "yunikorn.apache.org/task-groups"
 
 // Gang scheduling
 const PlaceholderContainerImage = "k8s.gcr.io/pause"
diff --git a/pkg/common/utils/utils.go b/pkg/common/utils/utils.go
index afe8f799a..4aa6e4cd8 100644
--- a/pkg/common/utils/utils.go
+++ b/pkg/common/utils/utils.go
@@ -15,9 +15,11 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 */
+
 package utils
 
 import (
+	"encoding/json"
 	"fmt"
 	"strings"
 	"time"
@@ -27,6 +29,7 @@ import (
 	apis "k8s.io/apimachinery/pkg/apis/meta/v1"
 	podv1 "k8s.io/kubernetes/pkg/api/v1/pod"
 
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/apis/yunikorn.apache.org/v1alpha1"
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/common"
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/common/constants"
 	"github.com/apache/incubator-yunikorn-scheduler-interface/lib/go/si"
@@ -106,6 +109,31 @@ func GetApplicationIDFromPod(pod *v1.Pod) (string, error) {
 		pod.Spec.String())
 }
 
+func GetTaskGroupsFromAnnotation(pod *v1.Pod) ([]v1alpha1.TaskGroup, error) {
+	taskGroupInfo, ok := pod.Annotations[constants.AnnotationTaskGroups]
+	if !ok {
+		return nil, fmt.Errorf("unable to extract taskGroups info from pod annotations, %s",
+			pod.Annotations)
+	}
+	taskGroups := []v1alpha1.TaskGroup{}
+	err := json.Unmarshal([]byte(taskGroupInfo), &taskGroups)
+	if err != nil {
+		return nil, err
+	}
+	// json.Unmarchal won't return error if name or MinMember is empty, but will return error if MinResource is empty or error format.
+	for _, taskGroup := range taskGroups {
+		if taskGroup.Name == "" {
+			return nil, fmt.Errorf("can't get taskGroup Name from pod annotation, %s",
+				pod.Annotations[constants.AnnotationTaskGroups])
+		}
+		if taskGroup.MinMember == int32(0) {
+			return nil, fmt.Errorf("can't get taskGroup MinMember from pod annotation, %s",
+				pod.Annotations[constants.AnnotationTaskGroups])
+		}
+	}
+	return taskGroups, nil
+}
+
 // compare the existing pod condition with the given one, return true if the pod condition remains not changed.
 // return false if pod has no condition set yet, or condition has changed.
 func PodUnderCondition(pod *v1.Pod, condition *v1.PodCondition) bool {
diff --git a/pkg/common/utils/utils_test.go b/pkg/common/utils/utils_test.go
index e296b803c..28dcc7b92 100644
--- a/pkg/common/utils/utils_test.go
+++ b/pkg/common/utils/utils_test.go
@@ -25,6 +25,7 @@ import (
 
 	"gotest.tools/assert"
 	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/common"
@@ -331,3 +332,159 @@ func TestGetApplicationIDFromPod(t *testing.T) {
 		})
 	}
 }
+
+// nolint: funlen
+func TestGetTaskGroupFromAnnotation(t *testing.T) {
+	// correct json
+	testGroup := `
+	[
+		{
+			"name": "test-group-1",
+			"minMember": 10,
+			"minResource": {
+				"cpu": 1,
+				"memory": "2Gi"
+			},
+			"nodeSelector": {
+				"test": "testnode",
+				"locate": "west"
+			},
+			"tolerations": [
+				{
+					"key": "key",
+					"operator": "Equal",
+					"value": "value",
+					"effect": "NoSchedule"
+				}
+			]
+		},
+		{
+			"name": "test-group-2",
+			"minMember": 5,
+			"minResource": {
+				"cpu": 2,
+				"memory": "4Gi"
+			}
+		}
+	]`
+	testGroup2 := `
+	[
+		{
+			"name": "test-group-3",
+			"minMember": 3,
+			"minResource": {
+				"cpu": 2,
+				"memory": "1Gi"
+			}
+		}
+	]`
+	// Error json
+	testGroupErr := `
+	[
+		{
+			"name": "test-group-err-1",
+			"minMember": "ERR",
+			"minResource": {
+				"cpu": "ERR",
+				"memory": "ERR"
+			},
+		}
+	]`
+	// without name
+	testGroupErr2 := `
+	[
+		{
+			"minMember": 3,
+			"minResource": {
+				"cpu": 2,
+				"memory": "1Gi"
+			}
+		}
+	]`
+	// without minMember
+	testGroupErr3 := `
+	[
+		{
+			"name": "test-group-err-2",
+			"minResource": {
+				"cpu": 2,
+				"memory": "1Gi"
+			}
+		}
+	]`
+	// withot minResource
+	testGroupErr4 := `
+	[
+		{
+			"name": "test-group-err-3",
+			"minMember": 3,
+		}
+	]`
+	// Insert task group info to pod annotation
+	pod := &v1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "test-pod-err",
+			Namespace: "test",
+			UID:       "test-pod-UID-err",
+		},
+		Spec: v1.PodSpec{},
+		Status: v1.PodStatus{
+			Phase: v1.PodPending,
+		},
+	}
+	// Empty case
+	taskGroupEmpty, err := GetTaskGroupsFromAnnotation(pod)
+	assert.Assert(t, taskGroupEmpty == nil)
+	assert.Assert(t, err != nil)
+	// Error case
+	pod.Annotations = map[string]string{constants.AnnotationTaskGroups: testGroupErr}
+	taskGroupErr, err := GetTaskGroupsFromAnnotation(pod)
+	assert.Assert(t, taskGroupErr == nil)
+	assert.Assert(t, err != nil)
+	pod.Annotations = map[string]string{constants.AnnotationTaskGroups: testGroupErr2}
+	taskGroupErr2, err := GetTaskGroupsFromAnnotation(pod)
+	assert.Assert(t, taskGroupErr2 == nil)
+	assert.Assert(t, err != nil)
+	pod.Annotations = map[string]string{constants.AnnotationTaskGroups: testGroupErr3}
+	taskGroupErr3, err := GetTaskGroupsFromAnnotation(pod)
+	assert.Assert(t, taskGroupErr3 == nil)
+	assert.Assert(t, err != nil)
+	pod.Annotations = map[string]string{constants.AnnotationTaskGroups: testGroupErr4}
+	taskGroupErr4, err := GetTaskGroupsFromAnnotation(pod)
+	assert.Assert(t, taskGroupErr4 == nil)
+	assert.Assert(t, err != nil)
+	// Correct case
+	pod.Annotations = map[string]string{constants.AnnotationTaskGroups: testGroup}
+	taskGroups, err := GetTaskGroupsFromAnnotation(pod)
+	assert.NilError(t, err)
+	// Group value check
+	assert.Equal(t, taskGroups[0].Name, "test-group-1")
+	assert.Equal(t, taskGroups[0].MinMember, int32(10))
+	assert.Equal(t, taskGroups[0].MinResource["cpu"], resource.MustParse("1"))
+	assert.Equal(t, taskGroups[0].MinResource["memory"], resource.MustParse("2Gi"))
+	assert.Equal(t, taskGroups[1].Name, "test-group-2")
+	assert.Equal(t, taskGroups[1].MinMember, int32(5))
+	assert.Equal(t, taskGroups[1].MinResource["cpu"], resource.MustParse("2"))
+	assert.Equal(t, taskGroups[1].MinResource["memory"], resource.MustParse("4Gi"))
+	// NodeSelector check
+	assert.Equal(t, taskGroups[0].NodeSelector["test"], "testnode")
+	assert.Equal(t, taskGroups[0].NodeSelector["locate"], "west")
+	// Toleration check
+	var tolerations []v1.Toleration
+	toleration := v1.Toleration{
+		Key:      "key",
+		Operator: "Equal",
+		Value:    "value",
+		Effect:   "NoSchedule",
+	}
+	tolerations = append(tolerations, toleration)
+	assert.DeepEqual(t, taskGroups[0].Tolerations, tolerations)
+
+	pod.Annotations = map[string]string{constants.AnnotationTaskGroups: testGroup2}
+	taskGroups2, err := GetTaskGroupsFromAnnotation(pod)
+	assert.NilError(t, err)
+	assert.Equal(t, taskGroups2[0].Name, "test-group-3")
+	assert.Equal(t, taskGroups2[0].MinMember, int32(3))
+	assert.Equal(t, taskGroups2[0].MinResource["cpu"], resource.MustParse("2"))
+	assert.Equal(t, taskGroups2[0].MinResource["memory"], resource.MustParse("1Gi"))
+}

From 65b69defdf4c86f2e2b7dac9504b7072233739e7 Mon Sep 17 00:00:00 2001
From: Ting-Yao Huang <61864060+HuangTing-Yao@users.noreply.github.com>
Date: Tue, 10 Nov 2020 02:01:27 +0800
Subject: [PATCH 05/15] [YUNIKORN-451] Fix application CRD definition (#205)

---
 .../application-definition.yaml               | 26 +++++++++--
 .../sample-yunikorn-app.yaml                  | 13 ++++--
 test/e2e/app/app_test.go                      | 45 +++++++++++++++----
 test/e2e/framework/helpers/k8s/k8s_utils.go   | 17 +++++++
 test/e2e/testdata/{ => app}/application.yaml  |  7 +++
 .../app/application_err_minmember.yaml        | 32 +++++++++++++
 .../app/application_err_minresource.yaml      | 32 +++++++++++++
 .../testdata/app/application_err_name.yaml    | 32 +++++++++++++
 .../app/application_err_nodeselector.yaml     | 33 ++++++++++++++
 .../application_err_queue.yaml}               |  4 +-
 .../app/application_err_tolerations.yaml      | 36 +++++++++++++++
 11 files changed, 261 insertions(+), 16 deletions(-)
 rename test/e2e/testdata/{ => app}/application.yaml (86%)
 create mode 100644 test/e2e/testdata/app/application_err_minmember.yaml
 create mode 100644 test/e2e/testdata/app/application_err_minresource.yaml
 create mode 100644 test/e2e/testdata/app/application_err_name.yaml
 create mode 100644 test/e2e/testdata/app/application_err_nodeselector.yaml
 rename test/e2e/testdata/{application_error.yaml => app/application_err_queue.yaml} (95%)
 create mode 100644 test/e2e/testdata/app/application_err_tolerations.yaml

diff --git a/deployments/yunikorn-application/application-definition.yaml b/deployments/yunikorn-application/application-definition.yaml
index 1b0ee81c2..7d78c37fc 100644
--- a/deployments/yunikorn-application/application-definition.yaml
+++ b/deployments/yunikorn-application/application-definition.yaml
@@ -50,10 +50,10 @@ spec:
         spec:
           type: object
           properties:
-            policy:
+            schedulingPolicy:
               type: object
               properties:
-                policy:
+                type:
                   type: string
                 parameters:
                   type: object
@@ -67,7 +67,7 @@ spec:
               items:
                 type: object
                 properties:
-                  groupName: 
+                  name: 
                     type: string
                   minMember:
                     type: integer
@@ -79,6 +79,26 @@ spec:
                       - type: string
                       pattern: '^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$'
                       x-kubernetes-int-or-string: true 
+                  nodeSelector:
+                    type: object
+                    additionalProperties:
+                      type: string
+                  tolerations:
+                    type: array
+                    items:
+                      type: object
+                      properties:
+                        effect:
+                          type: string
+                        key:
+                          type: string
+                        operator:
+                          type: string
+                        value:
+                          type: string
+                        tolerationSeconds:
+                          format: int64
+                          type: integer             
         status:
           type: object
           properties:
diff --git a/deployments/yunikorn-application/sample-yunikorn-app.yaml b/deployments/yunikorn-application/sample-yunikorn-app.yaml
index 5f581a682..618aca6e9 100644
--- a/deployments/yunikorn-application/sample-yunikorn-app.yaml
+++ b/deployments/yunikorn-application/sample-yunikorn-app.yaml
@@ -21,18 +21,25 @@ metadata:
   name: example
 spec:
   schedulingPolicy:
-    name: TryOnce
+    type: TryOnce
     parameters:
       timeout: "60"
       retrySecond: "0"
   queue: root.default
   taskGroups:
-    - groupName: "test-task-0001"
+    - name: "test-task-0001"
       minMember: 1
       minResource:
         cpu: "300m"
         memory: "128Mi"
-    - groupName: "test-task-0002"
+      nodeSelector:
+        locate: west
+      tolerations:
+        - key: "key"
+          operator: "Equal"
+          value: "value"
+          effect: "NoSchedule"
+    - name: "test-task-0002"
       minMember: 2
       minResource:
         cpu: "600m"
diff --git a/test/e2e/app/app_test.go b/test/e2e/app/app_test.go
index f3b7b8075..ac426225b 100644
--- a/test/e2e/app/app_test.go
+++ b/test/e2e/app/app_test.go
@@ -34,7 +34,7 @@ import (
 var _ = ginkgo.Describe("App", func() {
 	var kClient k8s.KubeCtl
 	var appClient *crdclientset.Clientset
-	var appCRDDef string
+	// var appCRDDef string
 	var appCRD *v1alpha1.Application
 	var dev = "apptest"
 
@@ -51,17 +51,41 @@ var _ = ginkgo.Describe("App", func() {
 		appClient, err = yunikorn.NewApplicationClient()
 		gomega.Ω(err).NotTo(gomega.HaveOccurred())
 		// error test case
-		apperrDef, err := common.GetAbsPath("../testdata/application_error.yaml")
+		// error queue format
+		appCRDDef, err := common.GetAbsPath("../testdata/app/application_err_queue.yaml")
 		gomega.Ω(err).NotTo(gomega.HaveOccurred())
-		_, err = yunikorn.GetApplicationObj(apperrDef)
+		err = k8s.ApplyYamlWithKubectl(appCRDDef, dev)
 		gomega.Ω(err).To(gomega.HaveOccurred())
-		// correct test case
-		appCRDDef, err = common.GetAbsPath("../testdata/application.yaml")
+		// error gang name format
+		appCRDDef, err = common.GetAbsPath("../testdata/app/application_err_name.yaml")
+		gomega.Ω(err).NotTo(gomega.HaveOccurred())
+		err = k8s.ApplyYamlWithKubectl(appCRDDef, dev)
+		gomega.Ω(err).To(gomega.HaveOccurred())
+		// error minMember format
+		appCRDDef, err = common.GetAbsPath("../testdata/app/application_err_minmember.yaml")
+		gomega.Ω(err).NotTo(gomega.HaveOccurred())
+		err = k8s.ApplyYamlWithKubectl(appCRDDef, dev)
+		gomega.Ω(err).To(gomega.HaveOccurred())
+		// error minResource format
+		appCRDDef, err = common.GetAbsPath("../testdata/app/application_err_minresource.yaml")
+		gomega.Ω(err).NotTo(gomega.HaveOccurred())
+		err = k8s.ApplyYamlWithKubectl(appCRDDef, dev)
+		gomega.Ω(err).To(gomega.HaveOccurred())
+		// error NodeSelector format
+		appCRDDef, err = common.GetAbsPath("../testdata/app/application_err_nodeselector.yaml")
+		gomega.Ω(err).NotTo(gomega.HaveOccurred())
+		err = k8s.ApplyYamlWithKubectl(appCRDDef, dev)
+		gomega.Ω(err).To(gomega.HaveOccurred())
+		// error tolerations format
+		appCRDDef, err = common.GetAbsPath("../testdata/app/application_err_tolerations.yaml")
 		gomega.Ω(err).NotTo(gomega.HaveOccurred())
-		appCRDObj, err := yunikorn.GetApplicationObj(appCRDDef)
+		err = k8s.ApplyYamlWithKubectl(appCRDDef, dev)
+		gomega.Ω(err).To(gomega.HaveOccurred())
+
+		// correct test case
+		appCRDDef, err = common.GetAbsPath("../testdata/app/application.yaml")
 		gomega.Ω(err).NotTo(gomega.HaveOccurred())
-		appCRDObj.Namespace = dev
-		err = yunikorn.CreateApplication(appClient, appCRDObj, dev)
+		err = k8s.ApplyYamlWithKubectl(appCRDDef, dev)
 		gomega.Ω(err).NotTo(gomega.HaveOccurred())
 		appCRD, err = yunikorn.GetApplication(appClient, dev, "example")
 		gomega.Ω(err).NotTo(gomega.HaveOccurred())
@@ -82,6 +106,11 @@ var _ = ginkgo.Describe("App", func() {
 			ansmem := resource.MustParse("128Mi")
 			gomega.Ω(appCRD.Spec.TaskGroups[0].MinResource["cpu"]).To(gomega.Equal(anscpu))
 			gomega.Ω(appCRD.Spec.TaskGroups[0].MinResource["memory"]).To(gomega.Equal(ansmem))
+			gomega.Ω(appCRD.Spec.TaskGroups[0].NodeSelector["locate"]).To(gomega.Equal("west"))
+			gomega.Ω(appCRD.Spec.TaskGroups[0].Tolerations[0].Key).To(gomega.Equal("key"))
+			gomega.Ω(appCRD.Spec.TaskGroups[0].Tolerations[0].Operator).To(gomega.Equal(v1.TolerationOpEqual))
+			gomega.Ω(appCRD.Spec.TaskGroups[0].Tolerations[0].Value).To(gomega.Equal("value"))
+			gomega.Ω(appCRD.Spec.TaskGroups[0].Tolerations[0].Effect).To(gomega.Equal(v1.TaintEffectNoSchedule))
 		})
 
 		ginkgo.AfterSuite(func() {
diff --git a/test/e2e/framework/helpers/k8s/k8s_utils.go b/test/e2e/framework/helpers/k8s/k8s_utils.go
index 399a59e17..9d19d45cd 100644
--- a/test/e2e/framework/helpers/k8s/k8s_utils.go
+++ b/test/e2e/framework/helpers/k8s/k8s_utils.go
@@ -17,9 +17,11 @@
 package k8s
 
 import (
+	"bytes"
 	"errors"
 	"fmt"
 	"os"
+	"os/exec"
 	"path/filepath"
 	"time"
 
@@ -472,3 +474,18 @@ func (k *KubeCtl) RemoveYunikornSchedulerPodAnnotation(annotation string) error
 	_, err = k.DeletePodAnnotation(&schedPod, configmanager.YuniKornTestConfig.YkNamespace, annotation)
 	return err
 }
+
+func ApplyYamlWithKubectl(path, namespace string) error {
+	cmd := exec.Command("kubectl", "apply", "-f", path, "-n", namespace)
+	var stderr bytes.Buffer
+	cmd.Stderr = &stderr
+	// if err != nil, isn't represent yaml format error.
+	// it only represent the cmd.Run() fail.
+	err := cmd.Run()
+	// if yaml format error, errStr will show the detail
+	errStr := stderr.String()
+	if err != nil && errStr != "" {
+		return fmt.Errorf("apply fail with %s", errStr)
+	}
+	return nil
+}
diff --git a/test/e2e/testdata/application.yaml b/test/e2e/testdata/app/application.yaml
similarity index 86%
rename from test/e2e/testdata/application.yaml
rename to test/e2e/testdata/app/application.yaml
index ef6345517..4d473548a 100644
--- a/test/e2e/testdata/application.yaml
+++ b/test/e2e/testdata/app/application.yaml
@@ -29,4 +29,11 @@ spec:
       minResource:
         cpu: "300m"
         memory: "128Mi"
+      nodeSelector:
+        locate: west
+      tolerations:
+        - key: "key"
+          operator: "Equal"
+          value: "value"
+          effect: "NoSchedule"
   
\ No newline at end of file
diff --git a/test/e2e/testdata/app/application_err_minmember.yaml b/test/e2e/testdata/app/application_err_minmember.yaml
new file mode 100644
index 000000000..f6ef954cc
--- /dev/null
+++ b/test/e2e/testdata/app/application_err_minmember.yaml
@@ -0,0 +1,32 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+#The error application example for error case
+apiVersion: "yunikorn.apache.org/v1alpha1"
+kind: Application
+metadata:
+  name: error-minmember
+spec:
+  queue: root.default
+  schedulingPolicy:
+    type: TryReserve
+  taskGroups:
+    - name: "test-task-0001"
+      minMember: "string"
+      minResource:
+        cpu: "300m"
+        memory: "128Mi"
\ No newline at end of file
diff --git a/test/e2e/testdata/app/application_err_minresource.yaml b/test/e2e/testdata/app/application_err_minresource.yaml
new file mode 100644
index 000000000..510f4897b
--- /dev/null
+++ b/test/e2e/testdata/app/application_err_minresource.yaml
@@ -0,0 +1,32 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+#The error application example for error case
+apiVersion: "yunikorn.apache.org/v1alpha1"
+kind: Application
+metadata:
+  name: error-minResource
+spec:
+  queue: root.default
+  schedulingPolicy:
+    type: TryReserve
+  taskGroups:
+    - name: "test-task-0001"
+      minMember: 1
+      minResource:
+        cpu: "300A"
+        memory: "128Mi"
\ No newline at end of file
diff --git a/test/e2e/testdata/app/application_err_name.yaml b/test/e2e/testdata/app/application_err_name.yaml
new file mode 100644
index 000000000..eaad3af15
--- /dev/null
+++ b/test/e2e/testdata/app/application_err_name.yaml
@@ -0,0 +1,32 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 
+#The error application example for error case
+apiVersion: "yunikorn.apache.org/v1alpha1"
+kind: Application
+metadata:
+  name: error-name
+spec:
+  queue: root.default
+  schedulingPolicy:
+    type: TryReserve
+  taskGroups:
+    - name: 100
+      minMember: 1
+      minResource:
+        cpu: "300m"
+        memory: "128Mi"
\ No newline at end of file
diff --git a/test/e2e/testdata/app/application_err_nodeselector.yaml b/test/e2e/testdata/app/application_err_nodeselector.yaml
new file mode 100644
index 000000000..0a025988b
--- /dev/null
+++ b/test/e2e/testdata/app/application_err_nodeselector.yaml
@@ -0,0 +1,33 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# The error application example for error case
+apiVersion: "yunikorn.apache.org/v1alpha1"
+kind: Application
+metadata:
+  name: error-nodeselector
+spec:
+  schedulingPolicy:
+    type: TryOnce
+  queue: root.default
+  taskGroups:
+    - name: "test-task-0001"
+      minMember: 1
+      minResource:
+        cpu: "300m"
+        memory: "128Mi"
+      nodeSelector:
+        locate: 1
\ No newline at end of file
diff --git a/test/e2e/testdata/application_error.yaml b/test/e2e/testdata/app/application_err_queue.yaml
similarity index 95%
rename from test/e2e/testdata/application_error.yaml
rename to test/e2e/testdata/app/application_err_queue.yaml
index 5be8d6e83..5aa8afa6e 100644
--- a/test/e2e/testdata/application_error.yaml
+++ b/test/e2e/testdata/app/application_err_queue.yaml
@@ -19,14 +19,14 @@
 apiVersion: "yunikorn.apache.org/v1alpha1"
 kind: Application
 metadata:
-  name: example_test
+  name: error-queue
 spec:
   queue: //////
   schedulingPolicy:
     type: TryReserve
   taskGroups:
     - name: "test-task-0001"
-      minMember: "string"
+      minMember: 1
       minResource:
         cpu: "300m"
         memory: "128Mi"
diff --git a/test/e2e/testdata/app/application_err_tolerations.yaml b/test/e2e/testdata/app/application_err_tolerations.yaml
new file mode 100644
index 000000000..9d8ac1ee5
--- /dev/null
+++ b/test/e2e/testdata/app/application_err_tolerations.yaml
@@ -0,0 +1,36 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# The error application example for error case
+apiVersion: "yunikorn.apache.org/v1alpha1"
+kind: Application
+metadata:
+  name: error-tolerations
+spec:
+  schedulingPolicy:
+    type: TryOnce
+  queue: root.default
+  taskGroups:
+    - name: "test-task-0001"
+      minMember: 1
+      minResource:
+        cpu: "300m"
+        memory: "128Mi"
+      tolerations:
+        - key: 1
+          operator: "ERR"
+          value: "value"
+          effect: "ERR"
\ No newline at end of file

From f5fb65f0caad93bed622300ce09f352b51a3746b Mon Sep 17 00:00:00 2001
From: Ting-Yao Huang <61864060+HuangTing-Yao@users.noreply.github.com>
Date: Sat, 21 Nov 2020 07:34:37 +0800
Subject: [PATCH 06/15] [YUNIKORN-458]Retrieve app taskGroups from pod spec
 (#206)

---
 pkg/appmgmt/general/general.go       |  11 +-
 pkg/appmgmt/general/general_test.go  |  29 +++++
 pkg/appmgmt/interfaces/amprotocol.go |   4 +
 pkg/common/utils/gang_utils.go       |  25 +++++
 pkg/common/utils/gang_utils_test.go  | 156 ++++++++++++++++++++++++++
 pkg/common/utils/utils.go            |  27 -----
 pkg/common/utils/utils_test.go       | 157 ---------------------------
 7 files changed, 224 insertions(+), 185 deletions(-)

diff --git a/pkg/appmgmt/general/general.go b/pkg/appmgmt/general/general.go
index adf7e18cd..39fb2fca6 100644
--- a/pkg/appmgmt/general/general.go
+++ b/pkg/appmgmt/general/general.go
@@ -20,7 +20,7 @@ package general
 
 import (
 	"go.uber.org/zap"
-	"k8s.io/api/core/v1"
+	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/apimachinery/pkg/selection"
 	k8sCache "k8s.io/client-go/tools/cache"
@@ -88,10 +88,13 @@ func (os *Manager) getTaskMetadata(pod *v1.Pod) (interfaces.TaskMetadata, bool)
 		return interfaces.TaskMetadata{}, false
 	}
 
+	taskGroupName := utils.GetTaskGroupFromPodSpec(pod)
+
 	return interfaces.TaskMetadata{
 		ApplicationID: appId,
 		TaskID:        string(pod.UID),
 		Pod:           pod,
+		TaskGroupName: taskGroupName,
 	}, true
 }
 
@@ -114,11 +117,17 @@ func (os *Manager) getAppMetadata(pod *v1.Pod) (interfaces.ApplicationMetadata,
 	// get the application owner (this is all that is available as far as we can find)
 	user := pod.Spec.ServiceAccountName
 
+	taskGroups, err := utils.GetTaskGroupsFromAnnotation(pod)
+	if err != nil {
+		log.Logger().Error("unable to get taskGroups by given pod", zap.Error(err))
+	}
+
 	return interfaces.ApplicationMetadata{
 		ApplicationID: appId,
 		QueueName:     utils.GetQueueNameFromPod(pod),
 		User:          user,
 		Tags:          tags,
+		TaskGroups:    taskGroups,
 	}, true
 }
 
diff --git a/pkg/appmgmt/general/general_test.go b/pkg/appmgmt/general/general_test.go
index a2fdeb03d..9c0b48d80 100644
--- a/pkg/appmgmt/general/general_test.go
+++ b/pkg/appmgmt/general/general_test.go
@@ -23,6 +23,7 @@ import (
 
 	"gotest.tools/assert"
 	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
 	apis "k8s.io/apimachinery/pkg/apis/meta/v1"
 
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/cache"
@@ -31,6 +32,18 @@ import (
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/common/events"
 )
 
+const taskGroupInfo = `
+[
+	{
+		"name": "test-group-1",
+		"minMember": 3,
+		"minResource": {
+			"cpu": 2,
+			"memory": "1Gi"
+		}
+	}
+]`
+
 func TestGetAppMetadata(t *testing.T) {
 	am := NewManager(cache.NewMockedAMProtocol(), client.NewMockedAPIProvider())
 
@@ -47,6 +60,9 @@ func TestGetAppMetadata(t *testing.T) {
 				"applicationId": "app00001",
 				"queue":         "root.a",
 			},
+			Annotations: map[string]string{
+				constants.AnnotationTaskGroups: taskGroupInfo,
+			},
 		},
 		Spec: v1.PodSpec{SchedulerName: constants.SchedulerName},
 		Status: v1.PodStatus{
@@ -60,6 +76,10 @@ func TestGetAppMetadata(t *testing.T) {
 	assert.Equal(t, app.QueueName, "root.a")
 	assert.Equal(t, app.User, "")
 	assert.DeepEqual(t, app.Tags, map[string]string{"namespace": "default"})
+	assert.Equal(t, app.TaskGroups[0].Name, "test-group-1")
+	assert.Equal(t, app.TaskGroups[0].MinMember, int32(3))
+	assert.Equal(t, app.TaskGroups[0].MinResource["cpu"], resource.MustParse("2"))
+	assert.Equal(t, app.TaskGroups[0].MinResource["memory"], resource.MustParse("1Gi"))
 
 	pod = v1.Pod{
 		TypeMeta: apis.TypeMeta{
@@ -90,6 +110,7 @@ func TestGetAppMetadata(t *testing.T) {
 	assert.Equal(t, app.QueueName, "root.b")
 	assert.Equal(t, app.User, "bob")
 	assert.DeepEqual(t, app.Tags, map[string]string{"namespace": "app-namespace-01"})
+	assert.DeepEqual(t, len(app.TaskGroups), 0)
 
 	pod = v1.Pod{
 		TypeMeta: apis.TypeMeta{
@@ -130,6 +151,9 @@ func TestGetTaskMetadata(t *testing.T) {
 				"applicationId": "app00001",
 				"queue":         "root.a",
 			},
+			Annotations: map[string]string{
+				constants.AnnotationTaskGroupName: "test-group-01",
+			},
 		},
 		Spec: v1.PodSpec{SchedulerName: constants.SchedulerName},
 		Status: v1.PodStatus{
@@ -141,6 +165,11 @@ func TestGetTaskMetadata(t *testing.T) {
 	assert.Equal(t, ok, true)
 	assert.Equal(t, task.ApplicationID, "app00001")
 	assert.Equal(t, task.TaskID, "UID-POD-00001")
+	assert.Equal(t, task.TaskGroupName, "test-group-01")
+	pod.Annotations = map[string]string{}
+	task, ok = am.getTaskMetadata(&pod)
+	assert.Equal(t, ok, true)
+	assert.Equal(t, task.TaskGroupName, "")
 
 	pod = v1.Pod{
 		TypeMeta: apis.TypeMeta{
diff --git a/pkg/appmgmt/interfaces/amprotocol.go b/pkg/appmgmt/interfaces/amprotocol.go
index 67c824f9a..5a521931a 100644
--- a/pkg/appmgmt/interfaces/amprotocol.go
+++ b/pkg/appmgmt/interfaces/amprotocol.go
@@ -20,6 +20,8 @@ package interfaces
 
 import (
 	v1 "k8s.io/api/core/v1"
+
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/apis/yunikorn.apache.org/v1alpha1"
 )
 
 // app management protocol defines all the APIs needed for app management,
@@ -72,10 +74,12 @@ type ApplicationMetadata struct {
 	QueueName     string
 	User          string
 	Tags          map[string]string
+	TaskGroups    []v1alpha1.TaskGroup
 }
 
 type TaskMetadata struct {
 	ApplicationID string
 	TaskID        string
 	Pod           *v1.Pod
+	TaskGroupName string
 }
diff --git a/pkg/common/utils/gang_utils.go b/pkg/common/utils/gang_utils.go
index f3f9e8ba1..e8615093a 100644
--- a/pkg/common/utils/gang_utils.go
+++ b/pkg/common/utils/gang_utils.go
@@ -19,6 +19,7 @@
 package utils
 
 import (
+	"encoding/json"
 	"fmt"
 	"sync"
 
@@ -77,6 +78,30 @@ func GetTaskGroupFromPodSpec(pod *v1.Pod) string {
 	return ""
 }
 
+func GetTaskGroupsFromAnnotation(pod *v1.Pod) ([]v1alpha1.TaskGroup, error) {
+	taskGroupInfo, ok := pod.Annotations[constants.AnnotationTaskGroups]
+	if !ok {
+		return nil, nil
+	}
+	taskGroups := []v1alpha1.TaskGroup{}
+	err := json.Unmarshal([]byte(taskGroupInfo), &taskGroups)
+	if err != nil {
+		return nil, err
+	}
+	// json.Unmarchal won't return error if name or MinMember is empty, but will return error if MinResource is empty or error format.
+	for _, taskGroup := range taskGroups {
+		if taskGroup.Name == "" {
+			return nil, fmt.Errorf("can't get taskGroup Name from pod annotation, %s",
+				pod.Annotations[constants.AnnotationTaskGroups])
+		}
+		if taskGroup.MinMember == int32(0) {
+			return nil, fmt.Errorf("can't get taskGroup MinMember from pod annotation, %s",
+				pod.Annotations[constants.AnnotationTaskGroups])
+		}
+	}
+	return taskGroups, nil
+}
+
 type TaskGroupInstanceCountMap struct {
 	counts map[string]int32
 	sync.RWMutex
diff --git a/pkg/common/utils/gang_utils_test.go b/pkg/common/utils/gang_utils_test.go
index 33d7ddc2e..20deff34c 100644
--- a/pkg/common/utils/gang_utils_test.go
+++ b/pkg/common/utils/gang_utils_test.go
@@ -182,3 +182,159 @@ func TestTaskGroupInstanceCountMap(t *testing.T) {
 	counts2.AddOne("g1")
 	assert.Equal(t, counts1.Equals(counts2), false)
 }
+
+// nolint: funlen
+func TestGetTaskGroupFromAnnotation(t *testing.T) {
+	// correct json
+	testGroup := `
+	[
+		{
+			"name": "test-group-1",
+			"minMember": 10,
+			"minResource": {
+				"cpu": 1,
+				"memory": "2Gi"
+			},
+			"nodeSelector": {
+				"test": "testnode",
+				"locate": "west"
+			},
+			"tolerations": [
+				{
+					"key": "key",
+					"operator": "Equal",
+					"value": "value",
+					"effect": "NoSchedule"
+				}
+			]
+		},
+		{
+			"name": "test-group-2",
+			"minMember": 5,
+			"minResource": {
+				"cpu": 2,
+				"memory": "4Gi"
+			}
+		}
+	]`
+	testGroup2 := `
+	[
+		{
+			"name": "test-group-3",
+			"minMember": 3,
+			"minResource": {
+				"cpu": 2,
+				"memory": "1Gi"
+			}
+		}
+	]`
+	// Error json
+	testGroupErr := `
+	[
+		{
+			"name": "test-group-err-1",
+			"minMember": "ERR",
+			"minResource": {
+				"cpu": "ERR",
+				"memory": "ERR"
+			},
+		}
+	]`
+	// without name
+	testGroupErr2 := `
+	[
+		{
+			"minMember": 3,
+			"minResource": {
+				"cpu": 2,
+				"memory": "1Gi"
+			}
+		}
+	]`
+	// without minMember
+	testGroupErr3 := `
+	[
+		{
+			"name": "test-group-err-2",
+			"minResource": {
+				"cpu": 2,
+				"memory": "1Gi"
+			}
+		}
+	]`
+	// withot minResource
+	testGroupErr4 := `
+	[
+		{
+			"name": "test-group-err-3",
+			"minMember": 3,
+		}
+	]`
+	// Insert task group info to pod annotation
+	pod := &v1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "test-pod-err",
+			Namespace: "test",
+			UID:       "test-pod-UID-err",
+		},
+		Spec: v1.PodSpec{},
+		Status: v1.PodStatus{
+			Phase: v1.PodPending,
+		},
+	}
+	// Empty case
+	taskGroupEmpty, err := GetTaskGroupsFromAnnotation(pod)
+	assert.Assert(t, taskGroupEmpty == nil)
+	assert.Assert(t, err == nil)
+	// Error case
+	pod.Annotations = map[string]string{constants.AnnotationTaskGroups: testGroupErr}
+	taskGroupErr, err := GetTaskGroupsFromAnnotation(pod)
+	assert.Assert(t, taskGroupErr == nil)
+	assert.Assert(t, err != nil)
+	pod.Annotations = map[string]string{constants.AnnotationTaskGroups: testGroupErr2}
+	taskGroupErr2, err := GetTaskGroupsFromAnnotation(pod)
+	assert.Assert(t, taskGroupErr2 == nil)
+	assert.Assert(t, err != nil)
+	pod.Annotations = map[string]string{constants.AnnotationTaskGroups: testGroupErr3}
+	taskGroupErr3, err := GetTaskGroupsFromAnnotation(pod)
+	assert.Assert(t, taskGroupErr3 == nil)
+	assert.Assert(t, err != nil)
+	pod.Annotations = map[string]string{constants.AnnotationTaskGroups: testGroupErr4}
+	taskGroupErr4, err := GetTaskGroupsFromAnnotation(pod)
+	assert.Assert(t, taskGroupErr4 == nil)
+	assert.Assert(t, err != nil)
+	// Correct case
+	pod.Annotations = map[string]string{constants.AnnotationTaskGroups: testGroup}
+	taskGroups, err := GetTaskGroupsFromAnnotation(pod)
+	assert.NilError(t, err)
+	// Group value check
+	assert.Equal(t, taskGroups[0].Name, "test-group-1")
+	assert.Equal(t, taskGroups[0].MinMember, int32(10))
+	assert.Equal(t, taskGroups[0].MinResource["cpu"], resource.MustParse("1"))
+	assert.Equal(t, taskGroups[0].MinResource["memory"], resource.MustParse("2Gi"))
+	assert.Equal(t, taskGroups[1].Name, "test-group-2")
+	assert.Equal(t, taskGroups[1].MinMember, int32(5))
+	assert.Equal(t, taskGroups[1].MinResource["cpu"], resource.MustParse("2"))
+	assert.Equal(t, taskGroups[1].MinResource["memory"], resource.MustParse("4Gi"))
+	// NodeSelector check
+	assert.Equal(t, taskGroups[0].NodeSelector["test"], "testnode")
+	assert.Equal(t, taskGroups[0].NodeSelector["locate"], "west")
+	// Toleration check
+	var tolerations []v1.Toleration
+	toleration := v1.Toleration{
+		Key:      "key",
+		Operator: "Equal",
+		Value:    "value",
+		Effect:   "NoSchedule",
+	}
+	tolerations = append(tolerations, toleration)
+	assert.DeepEqual(t, taskGroups[0].Tolerations, tolerations)
+
+	pod.Annotations = map[string]string{constants.AnnotationTaskGroups: testGroup2}
+	taskGroups2, err := GetTaskGroupsFromAnnotation(pod)
+	assert.NilError(t, err)
+	assert.Equal(t, taskGroups2[0].Name, "test-group-3")
+	assert.Equal(t, taskGroups2[0].MinMember, int32(3))
+	assert.Equal(t, taskGroups2[0].MinResource["cpu"], resource.MustParse("2"))
+	assert.Equal(t, taskGroups2[0].MinResource["memory"], resource.MustParse("1Gi"))
+}
diff --git a/pkg/common/utils/utils.go b/pkg/common/utils/utils.go
index 4aa6e4cd8..c62ba128e 100644
--- a/pkg/common/utils/utils.go
+++ b/pkg/common/utils/utils.go
@@ -19,7 +19,6 @@
 package utils
 
 import (
-	"encoding/json"
 	"fmt"
 	"strings"
 	"time"
@@ -29,7 +28,6 @@ import (
 	apis "k8s.io/apimachinery/pkg/apis/meta/v1"
 	podv1 "k8s.io/kubernetes/pkg/api/v1/pod"
 
-	"github.com/apache/incubator-yunikorn-k8shim/pkg/apis/yunikorn.apache.org/v1alpha1"
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/common"
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/common/constants"
 	"github.com/apache/incubator-yunikorn-scheduler-interface/lib/go/si"
@@ -109,31 +107,6 @@ func GetApplicationIDFromPod(pod *v1.Pod) (string, error) {
 		pod.Spec.String())
 }
 
-func GetTaskGroupsFromAnnotation(pod *v1.Pod) ([]v1alpha1.TaskGroup, error) {
-	taskGroupInfo, ok := pod.Annotations[constants.AnnotationTaskGroups]
-	if !ok {
-		return nil, fmt.Errorf("unable to extract taskGroups info from pod annotations, %s",
-			pod.Annotations)
-	}
-	taskGroups := []v1alpha1.TaskGroup{}
-	err := json.Unmarshal([]byte(taskGroupInfo), &taskGroups)
-	if err != nil {
-		return nil, err
-	}
-	// json.Unmarchal won't return error if name or MinMember is empty, but will return error if MinResource is empty or error format.
-	for _, taskGroup := range taskGroups {
-		if taskGroup.Name == "" {
-			return nil, fmt.Errorf("can't get taskGroup Name from pod annotation, %s",
-				pod.Annotations[constants.AnnotationTaskGroups])
-		}
-		if taskGroup.MinMember == int32(0) {
-			return nil, fmt.Errorf("can't get taskGroup MinMember from pod annotation, %s",
-				pod.Annotations[constants.AnnotationTaskGroups])
-		}
-	}
-	return taskGroups, nil
-}
-
 // compare the existing pod condition with the given one, return true if the pod condition remains not changed.
 // return false if pod has no condition set yet, or condition has changed.
 func PodUnderCondition(pod *v1.Pod, condition *v1.PodCondition) bool {
diff --git a/pkg/common/utils/utils_test.go b/pkg/common/utils/utils_test.go
index 28dcc7b92..e296b803c 100644
--- a/pkg/common/utils/utils_test.go
+++ b/pkg/common/utils/utils_test.go
@@ -25,7 +25,6 @@ import (
 
 	"gotest.tools/assert"
 	v1 "k8s.io/api/core/v1"
-	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/common"
@@ -332,159 +331,3 @@ func TestGetApplicationIDFromPod(t *testing.T) {
 		})
 	}
 }
-
-// nolint: funlen
-func TestGetTaskGroupFromAnnotation(t *testing.T) {
-	// correct json
-	testGroup := `
-	[
-		{
-			"name": "test-group-1",
-			"minMember": 10,
-			"minResource": {
-				"cpu": 1,
-				"memory": "2Gi"
-			},
-			"nodeSelector": {
-				"test": "testnode",
-				"locate": "west"
-			},
-			"tolerations": [
-				{
-					"key": "key",
-					"operator": "Equal",
-					"value": "value",
-					"effect": "NoSchedule"
-				}
-			]
-		},
-		{
-			"name": "test-group-2",
-			"minMember": 5,
-			"minResource": {
-				"cpu": 2,
-				"memory": "4Gi"
-			}
-		}
-	]`
-	testGroup2 := `
-	[
-		{
-			"name": "test-group-3",
-			"minMember": 3,
-			"minResource": {
-				"cpu": 2,
-				"memory": "1Gi"
-			}
-		}
-	]`
-	// Error json
-	testGroupErr := `
-	[
-		{
-			"name": "test-group-err-1",
-			"minMember": "ERR",
-			"minResource": {
-				"cpu": "ERR",
-				"memory": "ERR"
-			},
-		}
-	]`
-	// without name
-	testGroupErr2 := `
-	[
-		{
-			"minMember": 3,
-			"minResource": {
-				"cpu": 2,
-				"memory": "1Gi"
-			}
-		}
-	]`
-	// without minMember
-	testGroupErr3 := `
-	[
-		{
-			"name": "test-group-err-2",
-			"minResource": {
-				"cpu": 2,
-				"memory": "1Gi"
-			}
-		}
-	]`
-	// withot minResource
-	testGroupErr4 := `
-	[
-		{
-			"name": "test-group-err-3",
-			"minMember": 3,
-		}
-	]`
-	// Insert task group info to pod annotation
-	pod := &v1.Pod{
-		ObjectMeta: metav1.ObjectMeta{
-			Name:      "test-pod-err",
-			Namespace: "test",
-			UID:       "test-pod-UID-err",
-		},
-		Spec: v1.PodSpec{},
-		Status: v1.PodStatus{
-			Phase: v1.PodPending,
-		},
-	}
-	// Empty case
-	taskGroupEmpty, err := GetTaskGroupsFromAnnotation(pod)
-	assert.Assert(t, taskGroupEmpty == nil)
-	assert.Assert(t, err != nil)
-	// Error case
-	pod.Annotations = map[string]string{constants.AnnotationTaskGroups: testGroupErr}
-	taskGroupErr, err := GetTaskGroupsFromAnnotation(pod)
-	assert.Assert(t, taskGroupErr == nil)
-	assert.Assert(t, err != nil)
-	pod.Annotations = map[string]string{constants.AnnotationTaskGroups: testGroupErr2}
-	taskGroupErr2, err := GetTaskGroupsFromAnnotation(pod)
-	assert.Assert(t, taskGroupErr2 == nil)
-	assert.Assert(t, err != nil)
-	pod.Annotations = map[string]string{constants.AnnotationTaskGroups: testGroupErr3}
-	taskGroupErr3, err := GetTaskGroupsFromAnnotation(pod)
-	assert.Assert(t, taskGroupErr3 == nil)
-	assert.Assert(t, err != nil)
-	pod.Annotations = map[string]string{constants.AnnotationTaskGroups: testGroupErr4}
-	taskGroupErr4, err := GetTaskGroupsFromAnnotation(pod)
-	assert.Assert(t, taskGroupErr4 == nil)
-	assert.Assert(t, err != nil)
-	// Correct case
-	pod.Annotations = map[string]string{constants.AnnotationTaskGroups: testGroup}
-	taskGroups, err := GetTaskGroupsFromAnnotation(pod)
-	assert.NilError(t, err)
-	// Group value check
-	assert.Equal(t, taskGroups[0].Name, "test-group-1")
-	assert.Equal(t, taskGroups[0].MinMember, int32(10))
-	assert.Equal(t, taskGroups[0].MinResource["cpu"], resource.MustParse("1"))
-	assert.Equal(t, taskGroups[0].MinResource["memory"], resource.MustParse("2Gi"))
-	assert.Equal(t, taskGroups[1].Name, "test-group-2")
-	assert.Equal(t, taskGroups[1].MinMember, int32(5))
-	assert.Equal(t, taskGroups[1].MinResource["cpu"], resource.MustParse("2"))
-	assert.Equal(t, taskGroups[1].MinResource["memory"], resource.MustParse("4Gi"))
-	// NodeSelector check
-	assert.Equal(t, taskGroups[0].NodeSelector["test"], "testnode")
-	assert.Equal(t, taskGroups[0].NodeSelector["locate"], "west")
-	// Toleration check
-	var tolerations []v1.Toleration
-	toleration := v1.Toleration{
-		Key:      "key",
-		Operator: "Equal",
-		Value:    "value",
-		Effect:   "NoSchedule",
-	}
-	tolerations = append(tolerations, toleration)
-	assert.DeepEqual(t, taskGroups[0].Tolerations, tolerations)
-
-	pod.Annotations = map[string]string{constants.AnnotationTaskGroups: testGroup2}
-	taskGroups2, err := GetTaskGroupsFromAnnotation(pod)
-	assert.NilError(t, err)
-	assert.Equal(t, taskGroups2[0].Name, "test-group-3")
-	assert.Equal(t, taskGroups2[0].MinMember, int32(3))
-	assert.Equal(t, taskGroups2[0].MinResource["cpu"], resource.MustParse("2"))
-	assert.Equal(t, taskGroups2[0].MinResource["memory"], resource.MustParse("1Gi"))
-}

From 44b4069e11e588cc45e2f09c130dbff4ca701f50 Mon Sep 17 00:00:00 2001
From: Ting-Yao Huang <61864060+HuangTing-Yao@users.noreply.github.com>
Date: Thu, 26 Nov 2020 06:54:15 +0800
Subject: [PATCH 07/15] [YUNIKORN-459]Implement the placeholder cleanup in
 PlaceholderManager (#207)

---
 pkg/cache/application.go              |  2 +-
 pkg/cache/placeholder_manager.go      | 25 ++++++--
 pkg/cache/placeholder_manager_test.go | 87 +++++++++++++++++++++++++++
 pkg/cache/task.go                     |  6 ++
 4 files changed, 114 insertions(+), 6 deletions(-)

diff --git a/pkg/cache/application.go b/pkg/cache/application.go
index 18cf63d5a..794ae9510 100644
--- a/pkg/cache/application.go
+++ b/pkg/cache/application.go
@@ -420,7 +420,7 @@ func (app *Application) onReserving(event *fsm.Event) {
 		if err := GetPlaceholderManager().createAppPlaceholders(app); err != nil {
 			// creating placeholder failed
 			// put the app into recycling queue and turn the app to running state
-			GetPlaceholderManager().Recycle(app.applicationID)
+			GetPlaceholderManager().CleanUp(app)
 			ev := NewRunApplicationEvent(app.applicationID)
 			dispatcher.Dispatch(ev)
 		}
diff --git a/pkg/cache/placeholder_manager.go b/pkg/cache/placeholder_manager.go
index 3d1c106bb..80c4c0579 100644
--- a/pkg/cache/placeholder_manager.go
+++ b/pkg/cache/placeholder_manager.go
@@ -22,6 +22,7 @@ import (
 	"sync"
 
 	"go.uber.org/zap"
+	v1 "k8s.io/api/core/v1"
 
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/client"
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/common/utils"
@@ -30,7 +31,8 @@ import (
 
 // placeholder manager is a service to manage the lifecycle of app placeholders
 type PlaceholderManager struct {
-	clients *client.Clients
+	clients   *client.Clients
+	orphanPod map[string]*v1.Pod
 	sync.RWMutex
 }
 
@@ -70,10 +72,23 @@ func (mgr *PlaceholderManager) createAppPlaceholders(app *Application) error {
 	return nil
 }
 
-// recycle all the placeholders for an application
-func (mgr *PlaceholderManager) Recycle(appID string) {
-	log.Logger().Info("start to recycle app placeholders",
-		zap.String("appID", appID))
+// clean up all the placeholders for an application
+func (mgr *PlaceholderManager) CleanUp(app *Application) {
+	log.Logger().Info("start to clean up app placeholders",
+		zap.String("appID", app.GetApplicationID()))
+	for taskID, task := range app.taskMap {
+		if task.GetTaskPlaceholder() {
+			// remove pod
+			err := mgr.clients.KubeClient.Delete(task.pod)
+			if err != nil {
+				log.Logger().Error("failed to clean up placeholder pod",
+					zap.Error(err))
+				mgr.orphanPod[taskID] = task.pod
+			}
+		}
+	}
+	log.Logger().Info("finish to clean up app placeholders",
+		zap.String("appID", app.GetApplicationID()))
 }
 
 // this is only used in testing
diff --git a/pkg/cache/placeholder_manager_test.go b/pkg/cache/placeholder_manager_test.go
index 46a0bc596..b43bb7e17 100644
--- a/pkg/cache/placeholder_manager_test.go
+++ b/pkg/cache/placeholder_manager_test.go
@@ -24,8 +24,10 @@ import (
 	"testing"
 
 	"gotest.tools/assert"
+	is "gotest.tools/assert/cmp"
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
+	apis "k8s.io/apimachinery/pkg/apis/meta/v1"
 
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/apis/yunikorn.apache.org/v1alpha1"
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/client"
@@ -86,3 +88,88 @@ func TestCreateAppPlaceholders(t *testing.T) {
 	err = placeholderMgr.createAppPlaceholders(app)
 	assert.Error(t, err, "failed to create pod tg-test-group-2-app01-15")
 }
+
+func TestCleanUp(t *testing.T) {
+	const (
+		appID     = "app01"
+		queue     = "root.default"
+		namespace = "test"
+	)
+	mockedContext := initContextForTest()
+	mockedSchedulerAPI := newMockSchedulerAPI()
+	app := NewApplication(appID, queue,
+		"bob", map[string]string{constants.AppTagNamespace: namespace}, mockedSchedulerAPI)
+	mockedContext.applications[appID] = app
+	res := app.getNonTerminatedTaskAlias()
+	assert.Equal(t, len(res), 0)
+
+	pod1 := &v1.Pod{
+		TypeMeta: apis.TypeMeta{
+			Kind:       "Pod",
+			APIVersion: "v1",
+		},
+		ObjectMeta: apis.ObjectMeta{
+			Name: "pod-01",
+			UID:  "UID-01",
+		},
+	}
+	pod2 := &v1.Pod{
+		TypeMeta: apis.TypeMeta{
+			Kind:       "Pod",
+			APIVersion: "v1",
+		},
+		ObjectMeta: apis.ObjectMeta{
+			Name: "pod-02",
+			UID:  "UID-02",
+		},
+	}
+	pod3 := &v1.Pod{
+		TypeMeta: apis.TypeMeta{
+			Kind:       "Pod",
+			APIVersion: "v1",
+		},
+		ObjectMeta: apis.ObjectMeta{
+			Name: "pod-03",
+			UID:  "UID-03",
+		},
+	}
+	taskID1 := "task01"
+	task1 := NewTask(taskID1, app, mockedContext, pod1)
+	task1.placeholder = true
+	app.taskMap[taskID1] = task1
+	taskID2 := "task02"
+	task2 := NewTask(taskID2, app, mockedContext, pod2)
+	task2.placeholder = true
+	app.taskMap[taskID2] = task2
+	taskID3 := "task03"
+	task3 := NewTask(taskID3, app, mockedContext, pod3)
+	task3.placeholder = false
+	app.taskMap[taskID3] = task3
+	res = app.getNonTerminatedTaskAlias()
+	assert.Equal(t, len(res), 3)
+
+	deletePod := make([]string, 0)
+	mockedAPIProvider := client.NewMockedAPIProvider()
+	mockedAPIProvider.MockDeleteFn(func(pod *v1.Pod) error {
+		deletePod = append(deletePod, pod.Name)
+		return nil
+	})
+	placeholderMgr := &PlaceholderManager{
+		clients:   mockedAPIProvider.GetAPIs(),
+		orphanPod: make(map[string]*v1.Pod),
+		RWMutex:   sync.RWMutex{},
+	}
+	placeholderMgr.CleanUp(app)
+
+	// check both pod-01 and pod-02 in deletePod list and pod-03 isn't contain
+	assert.Assert(t, is.Contains(deletePod, "pod-01"))
+	assert.Assert(t, is.Contains(deletePod, "pod-02"))
+	exist := false
+	for _, item := range deletePod {
+		if item == "pod-03" {
+			exist = true
+		}
+	}
+	assert.Equal(t, exist, false)
+	assert.Equal(t, len(placeholderMgr.orphanPod), 0)
+}
diff --git a/pkg/cache/task.go b/pkg/cache/task.go
index 8b34f68cb..c307c2909 100644
--- a/pkg/cache/task.go
+++ b/pkg/cache/task.go
@@ -171,6 +171,12 @@ func (task *Task) GetTaskID() string {
 	return task.taskID
 }
 
+func (task *Task) GetTaskPlaceholder() bool {
+	task.lock.RLock()
+	defer task.lock.RUnlock()
+	return task.placeholder
+}
+
 func (task *Task) GetTaskState() string {
 	// fsm has its own internal lock, we don't need to hold node's lock here
 	return task.sm.Current()

From 4a9933000af7e26f9a8040429330a629c0dc1278 Mon Sep 17 00:00:00 2001
From: Ting-Yao Huang <61864060+HuangTing-Yao@users.noreply.github.com>
Date: Wed, 9 Dec 2020 13:53:57 +0800
Subject: [PATCH 08/15] [YUNIKORN-469]Implement recycling service in
 PlaceholderManager that cleans up orphan placeholders (#208)

---
 pkg/cache/placeholder_manager.go      | 63 +++++++++++++++++++++++++++
 pkg/cache/placeholder_manager_test.go | 42 ++++++++++++++++++
 2 files changed, 105 insertions(+)

diff --git a/pkg/cache/placeholder_manager.go b/pkg/cache/placeholder_manager.go
index 80c4c0579..2fe768363 100644
--- a/pkg/cache/placeholder_manager.go
+++ b/pkg/cache/placeholder_manager.go
@@ -20,6 +20,8 @@ package cache
 
 import (
 	"sync"
+	"sync/atomic"
+	"time"
 
 	"go.uber.org/zap"
 	v1 "k8s.io/api/core/v1"
@@ -33,6 +35,8 @@ import (
 type PlaceholderManager struct {
 	clients   *client.Clients
 	orphanPod map[string]*v1.Pod
+	stopChan  chan struct{}
+	running   atomic.Value
 	sync.RWMutex
 }
 
@@ -97,3 +101,62 @@ func (mgr *PlaceholderManager) setMockedClients(mockedClients *client.Clients) {
 	defer mgr.Unlock()
 	mgr.clients = mockedClients
 }
+
+func (mgr *PlaceholderManager) cleanOrphanPlaceholders() {
+	mgr.Lock()
+	defer mgr.Unlock()
+	for taskID, pod := range mgr.orphanPod {
+		log.Logger().Debug("start to clean up orphan pod",
+			zap.String("taskID", taskID),
+			zap.String("podName", pod.Name))
+		err := mgr.clients.KubeClient.Delete(pod)
+		if err != nil {
+			log.Logger().Warn("failed to clean up orphan pod", zap.Error(err))
+		} else {
+			delete(mgr.orphanPod, taskID)
+		}
+	}
+}
+
+func (mgr *PlaceholderManager) Start() {
+	log.Logger().Info("starting the Placeholder Manager")
+	mgr.stopChan = make(chan struct{})
+	if mgr.isRunning() {
+		log.Logger().Info("The placeholder manager has been started")
+		return
+	}
+	mgr.setRunning(true)
+	go func() {
+		for {
+			select {
+			case <-mgr.stopChan:
+				log.Logger().Info("PlaceholderManager has been stopped")
+				mgr.setRunning(false)
+				return
+			default:
+				// clean orphan placeholders every 5 seconds
+				log.Logger().Info("clean up orphan pod")
+				mgr.cleanOrphanPlaceholders()
+				time.Sleep(5 * time.Second)
+			}
+		}
+	}()
+}
+
+func (mgr *PlaceholderManager) Stop() {
+	if !mgr.isRunning() {
+		log.Logger().Info("The placeholder manager has been stopped")
+		return
+	}
+	log.Logger().Info("stopping the Placeholder Manager")
+	mgr.stopChan <- struct{}{}
+	time.Sleep(3 * time.Second)
+}
+
+func (mgr *PlaceholderManager) isRunning() bool {
+	return mgr.running.Load().(bool)
+}
+
+func (mgr *PlaceholderManager) setRunning(flag bool) {
+	mgr.running.Store(flag)
+}
diff --git a/pkg/cache/placeholder_manager_test.go b/pkg/cache/placeholder_manager_test.go
index b43bb7e17..93b9cbf0b 100644
--- a/pkg/cache/placeholder_manager_test.go
+++ b/pkg/cache/placeholder_manager_test.go
@@ -21,6 +21,7 @@ package cache
 import (
 	"fmt"
 	"sync"
+	"sync/atomic"
 	"testing"
 
 	"gotest.tools/assert"
@@ -173,3 +174,44 @@ func TestCleanUp(t *testing.T) {
 	assert.Equal(t, exist, false)
 	assert.Equal(t, len(placeholderMgr.orphanPod), 0)
 }
+
+func TestCleanOrphanPlaceholders(t *testing.T) {
+	mockedAPIProvider := client.NewMockedAPIProvider()
+	placeholderMgr := &PlaceholderManager{
+		clients:   mockedAPIProvider.GetAPIs(),
+		orphanPod: make(map[string]*v1.Pod),
+		RWMutex:   sync.RWMutex{},
+	}
+	pod1 := &v1.Pod{
+		TypeMeta: apis.TypeMeta{
+			Kind:       "Pod",
+			APIVersion: "v1",
+		},
+		ObjectMeta: apis.ObjectMeta{
+			Name: "pod-01",
+			UID:  "UID-01",
+		},
+	}
+	placeholderMgr.orphanPod["task01"] = pod1
+	assert.Equal(t, len(placeholderMgr.orphanPod), 1)
+	placeholderMgr.cleanOrphanPlaceholders()
+	assert.Equal(t, len(placeholderMgr.orphanPod), 0)
+}
+
+func TestPlaceholderManagerStartStop(t *testing.T) {
+	mockedAPIProvider := client.NewMockedAPIProvider()
+	placeholderMgr := &PlaceholderManager{
+		clients:   mockedAPIProvider.GetAPIs(),
+		orphanPod: make(map[string]*v1.Pod),
+		running:   atomic.Value{},
+		RWMutex:   sync.RWMutex{},
+	}
+	placeholderMgr.setRunning(false)
+	// start clean up goroutine
+	placeholderMgr.Start()
+	assert.Equal(t, placeholderMgr.isRunning(), true)
+
+	placeholderMgr.Stop()
+	// check orphan pod map is empty
+	assert.Equal(t, placeholderMgr.isRunning(), false)
+}

From 99776beb5f80638365ddfcfa361b15b020fbe7dd Mon Sep 17 00:00:00 2001
From: Weiwei Yang <wyang@cloudera.com>
Date: Wed, 6 Jan 2021 22:01:07 -0800
Subject: [PATCH 09/15] [YUNIKORN-500] Make sure placeholder/taskGroupName are
 passed back to the core.

---
 pkg/appmgmt/general/general.go       |  6 +++
 pkg/appmgmt/interfaces/amprotocol.go |  1 +
 pkg/cache/application.go             | 13 +++--
 pkg/cache/context.go                 |  3 +-
 pkg/cache/placeholder.go             |  1 +
 pkg/cache/placeholder_manager.go     | 14 ++++-
 pkg/cache/task.go                    | 78 +++++++++++++++++-----------
 pkg/common/constants/constants.go    |  1 +
 pkg/common/si_helper.go              | 22 +++++---
 pkg/common/si_helper_test.go         |  2 +-
 pkg/common/utils/gang_utils.go       | 10 ++++
 11 files changed, 108 insertions(+), 43 deletions(-)

diff --git a/pkg/appmgmt/general/general.go b/pkg/appmgmt/general/general.go
index 39fb2fca6..4061ab634 100644
--- a/pkg/appmgmt/general/general.go
+++ b/pkg/appmgmt/general/general.go
@@ -88,12 +88,14 @@ func (os *Manager) getTaskMetadata(pod *v1.Pod) (interfaces.TaskMetadata, bool)
 		return interfaces.TaskMetadata{}, false
 	}
 
+	placeholder := utils.GetPlaceholderFlagFromPodSpec(pod)
 	taskGroupName := utils.GetTaskGroupFromPodSpec(pod)
 
 	return interfaces.TaskMetadata{
 		ApplicationID: appId,
 		TaskID:        string(pod.UID),
 		Pod:           pod,
+		Placeholder:   placeholder,
 		TaskGroupName: taskGroupName,
 	}, true
 }
@@ -303,6 +305,8 @@ func (os *Manager) GetExistingAllocation(pod *v1.Pod) *si.Allocation {
 		// when submit a task, we use pod UID as the allocationKey,
 		// to keep consistent, during recovery, the pod UID is also used
 		// for an Allocation.
+		placeholder := utils.GetPlaceholderFlagFromPodSpec(pod)
+		taskGroupName := utils.GetTaskGroupFromPodSpec(pod)
 		return &si.Allocation{
 			AllocationKey:    string(pod.UID),
 			AllocationTags:   meta.Tags,
@@ -311,6 +315,8 @@ func (os *Manager) GetExistingAllocation(pod *v1.Pod) *si.Allocation {
 			QueueName:        meta.QueueName,
 			NodeID:           pod.Spec.NodeName,
 			ApplicationID:    meta.ApplicationID,
+			Placeholder:      placeholder,
+			TaskGroupName:    taskGroupName,
 			PartitionName:    constants.DefaultPartition,
 		}
 	}
diff --git a/pkg/appmgmt/interfaces/amprotocol.go b/pkg/appmgmt/interfaces/amprotocol.go
index 5a521931a..0476c011a 100644
--- a/pkg/appmgmt/interfaces/amprotocol.go
+++ b/pkg/appmgmt/interfaces/amprotocol.go
@@ -81,5 +81,6 @@ type TaskMetadata struct {
 	ApplicationID string
 	TaskID        string
 	Pod           *v1.Pod
+	Placeholder   bool
 	TaskGroupName string
 }
diff --git a/pkg/cache/application.go b/pkg/cache/application.go
index 1daf9b5d7..8ff251837 100644
--- a/pkg/cache/application.go
+++ b/pkg/cache/application.go
@@ -440,12 +440,14 @@ func (app *Application) onReservationStateChange(event *fsm.Event) {
 
 	actualCounts := utils.NewTaskGroupInstanceCountMap()
 	for _, t := range app.getTasks(events.States().Task.Bound) {
-		actualCounts.AddOne(t.taskGroupName)
+		if t.placeholder {
+			actualCounts.AddOne(t.taskGroupName)
+		}
 	}
 
 	// min member all satisfied
 	if desireCounts.Equals(actualCounts) {
-		ev := NewRunApplicationEvent(app.GetApplicationID())
+		ev := NewRunApplicationEvent(app.applicationID)
 		dispatcher.Dispatch(ev)
 	}
 }
@@ -463,16 +465,19 @@ func (app *Application) handleCompleteApplicationEvent(event *fsm.Event) {
 func (app *Application) handleReleaseAppAllocationEvent(event *fsm.Event) {
 	eventArgs := make([]string, 2)
 	if err := events.GetEventArgsAsStrings(eventArgs, event.Args); err != nil {
-		log.Logger().Error("fail to paser event arg", zap.Error(err))
+		log.Logger().Error("fail to parse event arg", zap.Error(err))
 		return
 	}
 	allocUUID := eventArgs[0]
+	terminationTypeStr := eventArgs[1]
 	log.Logger().Info("try to release pod from application",
 		zap.String("appID", app.applicationID),
-		zap.String("allocationUUID", allocUUID))
+		zap.String("allocationUUID", allocUUID),
+		zap.String("terminationType", terminationTypeStr))
 
 	for _, task := range app.taskMap {
 		if task.allocationUUID == allocUUID {
+			task.setTaskTerminationType(terminationTypeStr)
 			err := task.DeleteTaskPod(task.pod)
 			if err != nil {
 				log.Logger().Error("failed to release allocation from application", zap.Error(err))
diff --git a/pkg/cache/context.go b/pkg/cache/context.go
index 80a738aa3..cb8347f83 100644
--- a/pkg/cache/context.go
+++ b/pkg/cache/context.go
@@ -498,6 +498,7 @@ func (ctx *Context) AddApplication(request *interfaces.AddApplicationRequest) in
 		request.Metadata.User,
 		request.Metadata.Tags,
 		ctx.apiProvider.GetAPIs().SchedulerAPI)
+	app.setTaskGroups(request.Metadata.TaskGroups)
 
 	// add into cache
 	ctx.applications[app.applicationID] = app
@@ -572,7 +573,7 @@ func (ctx *Context) AddTask(request *interfaces.AddTaskRequest) interfaces.Manag
 		if app, valid := managedApp.(*Application); valid {
 			existingTask, err := app.GetTask(request.Metadata.TaskID)
 			if err != nil {
-				task := NewTask(request.Metadata.TaskID, app, ctx, request.Metadata.Pod)
+				task := NewFromTaskMeta(request.Metadata.TaskID, app, ctx, request.Metadata)
 				// in recovery mode, task is considered as allocated
 				if request.Recovery {
 					// in scheduling, allocationUUID is assigned by scheduler-core
diff --git a/pkg/cache/placeholder.go b/pkg/cache/placeholder.go
index fb238a873..0bf4a4c1a 100644
--- a/pkg/cache/placeholder.go
+++ b/pkg/cache/placeholder.go
@@ -53,6 +53,7 @@ func newPlaceholder(placeholderName string, app *Application, taskGroup v1alpha1
 				constants.LabelQueueName:     app.GetQueue(),
 			},
 			Annotations: map[string]string{
+				constants.AnnotationPlaceholderFlag: "true",
 				constants.AnnotationTaskGroupName: taskGroup.Name,
 			},
 		},
diff --git a/pkg/cache/placeholder_manager.go b/pkg/cache/placeholder_manager.go
index 2fe768363..f4b16a0d1 100644
--- a/pkg/cache/placeholder_manager.go
+++ b/pkg/cache/placeholder_manager.go
@@ -43,9 +43,21 @@ type PlaceholderManager struct {
 var placeholderMgr *PlaceholderManager
 var once sync.Once
 
+func NewPlaceholderManager(clients *client.Clients) *PlaceholderManager {
+	var r atomic.Value
+	r.Store(false)
+	placeholderMgr = &PlaceholderManager{
+		clients: clients,
+		running: r,
+	}
+	return placeholderMgr
+}
+
 func GetPlaceholderManager() *PlaceholderManager {
 	once.Do(func() {
-		placeholderMgr = &PlaceholderManager{}
+		if placeholderMgr == nil {
+			log.Logger().Fatal("PlaceholderManager is not initiated")
+		}
 	})
 	return placeholderMgr
 }
diff --git a/pkg/cache/task.go b/pkg/cache/task.go
index 306fefb3a..979546c77 100644
--- a/pkg/cache/task.go
+++ b/pkg/cache/task.go
@@ -23,8 +23,8 @@ import (
 	"sync"
 	"time"
 
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/appmgmt/interfaces"
 	"go.uber.org/zap"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/common"
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/common/events"
@@ -38,40 +38,44 @@ import (
 )
 
 type Task struct {
-	taskID         string
-	alias          string
-	applicationID  string
-	application    *Application
-	allocationUUID string
-	resource       *si.Resource
-	pod            *v1.Pod
-	context        *Context
-	nodeName       string
-	createTime     time.Time
-	taskGroupName  string
-	placeholder    bool
-	sm             *fsm.FSM
-	lock           *sync.RWMutex
+	taskID          string
+	alias           string
+	applicationID   string
+	application     *Application
+	allocationUUID  string
+	resource        *si.Resource
+	pod             *v1.Pod
+	context         *Context
+	nodeName        string
+	createTime      time.Time
+	taskGroupName   string
+	placeholder     bool
+	terminationType string
+	sm              *fsm.FSM
+	lock            *sync.RWMutex
 }
 
+// TODO to be removed
 func NewTask(tid string, app *Application, ctx *Context, pod *v1.Pod) *Task {
 	taskResource := common.GetPodResource(pod)
-	return createTaskInternal(tid, app, taskResource, pod, ctx)
+	return createTaskInternal(tid, app, taskResource, pod, false, "", ctx)
 }
 
-// test only
-func CreateTaskForTest(tid string, app *Application, resource *si.Resource, ctx *Context) *Task {
-	// for testing purpose, the pod name is same as the taskID
-	taskPod := &v1.Pod{
-		ObjectMeta: metav1.ObjectMeta{
-			Name: tid,
-		},
-	}
-	return createTaskInternal(tid, app, resource, taskPod, ctx)
+func NewFromTaskMeta(tid string, app *Application, ctx *Context, metadata interfaces.TaskMetadata) *Task {
+	taskPod := metadata.Pod
+	taskResource := common.GetPodResource(taskPod)
+	return createTaskInternal(
+		tid,
+		app,
+		taskResource,
+		metadata.Pod,
+		metadata.Placeholder,
+		metadata.TaskGroupName,
+		ctx)
 }
 
 func createTaskInternal(tid string, app *Application, resource *si.Resource,
-	pod *v1.Pod, ctx *Context) *Task {
+	pod *v1.Pod, placeholder bool, taskGroupName string, ctx *Context) *Task {
 	task := &Task{
 		taskID:        tid,
 		alias:         fmt.Sprintf("%s/%s", pod.Namespace, pod.Name),
@@ -80,7 +84,8 @@ func createTaskInternal(tid string, app *Application, resource *si.Resource,
 		pod:           pod,
 		resource:      resource,
 		createTime:    pod.GetCreationTimestamp().Time,
-		placeholder:   false,
+		placeholder:   placeholder,
+		taskGroupName: taskGroupName,
 		context:       ctx,
 		lock:          &sync.RWMutex{},
 	}
@@ -188,6 +193,12 @@ func (task *Task) setTaskGroupName(groupName string) {
 	task.taskGroupName = groupName
 }
 
+func (task *Task) setTaskTerminationType(terminationTyp string) {
+	task.lock.Lock()
+	defer task.lock.Unlock()
+	task.terminationType = terminationTyp
+}
+
 func (task *Task) getTaskGroupName() string {
 	task.lock.RLock()
 	defer task.lock.RUnlock()
@@ -240,7 +251,13 @@ func (task *Task) handleSubmitTaskEvent(event *fsm.Event) {
 	log.Logger().Debug("scheduling pod",
 		zap.String("podName", task.pod.Name))
 	// convert the request
-	rr := common.CreateUpdateRequestForTask(task.applicationID, task.taskID, task.resource, task.pod)
+	rr := common.CreateUpdateRequestForTask(
+		task.applicationID,
+		task.taskID,
+		task.resource,
+		task.placeholder,
+		task.taskGroupName,
+		task.pod)
 	log.Logger().Debug("send update request", zap.String("request", rr.String()))
 	if err := task.context.apiProvider.GetAPIs().SchedulerAPI.Update(&rr); err != nil {
 		log.Logger().Debug("failed to send scheduling request to scheduler", zap.Error(err))
@@ -378,7 +395,8 @@ func (task *Task) releaseAllocation() {
 			zap.String("taskID", task.taskID),
 			zap.String("taskAlias", task.alias),
 			zap.String("allocationUUID", task.allocationUUID),
-			zap.String("task", task.GetTaskState()))
+			zap.String("task", task.GetTaskState()),
+			zap.String("terminationType", task.terminationType))
 
 		// depends on current task state, generate requests accordingly.
 		// if task is already allocated, which means the scheduler core already,
@@ -406,7 +424,7 @@ func (task *Task) releaseAllocation() {
 				return
 			}
 			releaseRequest = common.CreateReleaseAllocationRequestForTask(
-				task.applicationID, task.allocationUUID, task.application.partition)
+				task.applicationID, task.allocationUUID, task.application.partition, task.terminationType)
 		}
 
 		if releaseRequest.Releases != nil {
diff --git a/pkg/common/constants/constants.go b/pkg/common/constants/constants.go
index 580ed4721..09ef14f3c 100644
--- a/pkg/common/constants/constants.go
+++ b/pkg/common/constants/constants.go
@@ -50,6 +50,7 @@ const SchedulerName = "yunikorn"
 
 // Application crd
 const AppManagerHandlerName = "yunikorn-app"
+const AnnotationPlaceholderFlag = "yunikorn.apache.org/placeholder"
 const AnnotationTaskGroupName = "yunikorn.apache.org/task-group-name"
 const AnnotationTaskGroups = "yunikorn.apache.org/task-groups"
 
diff --git a/pkg/common/si_helper.go b/pkg/common/si_helper.go
index cf8405f70..f1956ddb3 100644
--- a/pkg/common/si_helper.go
+++ b/pkg/common/si_helper.go
@@ -43,13 +43,15 @@ func createTagsForTask(pod *v1.Pod) map[string]string {
 	return tags
 }
 
-func CreateUpdateRequestForTask(appID, taskID string, resource *si.Resource, pod *v1.Pod) si.UpdateRequest {
+func CreateUpdateRequestForTask(appID, taskID string, resource *si.Resource, placeholder bool, taskGroupName string, pod *v1.Pod) si.UpdateRequest {
 	ask := si.AllocationAsk{
 		AllocationKey:  taskID,
 		ResourceAsk:    resource,
 		ApplicationID:  appID,
 		MaxAllocations: 1,
 		Tags:           createTagsForTask(pod),
+		Placeholder:    placeholder,
+		TaskGroupName:  taskGroupName,
 	}
 
 	result := si.UpdateRequest{
@@ -84,13 +86,21 @@ func CreateReleaseAskRequestForTask(appID, taskId, partition string) si.UpdateRe
 	return result
 }
 
-func CreateReleaseAllocationRequestForTask(appID, allocUUID, partition string) si.UpdateRequest {
+func GetTerminationTypeFromString(terminationTypeStr string) si.AllocationRelease_TerminationType{
+	if v, ok := si.AllocationRelease_TerminationType_value[terminationTypeStr]; ok {
+		return si.AllocationRelease_TerminationType(v)
+	}
+	return si.AllocationRelease_STOPPED_BY_RM
+}
+
+func CreateReleaseAllocationRequestForTask(appID, allocUUID, partition, terminationType string) si.UpdateRequest {
 	toReleases := make([]*si.AllocationRelease, 0)
 	toReleases = append(toReleases, &si.AllocationRelease{
-		ApplicationID: appID,
-		UUID:          allocUUID,
-		PartitionName: partition,
-		Message:       "task completed",
+		ApplicationID:   appID,
+		UUID:            allocUUID,
+		PartitionName:   partition,
+		TerminationType: GetTerminationTypeFromString(terminationType),
+		Message:         "task completed",
 	})
 
 	releaseRequest := si.AllocationReleasesRequest{
diff --git a/pkg/common/si_helper_test.go b/pkg/common/si_helper_test.go
index ff06b280e..225328a55 100644
--- a/pkg/common/si_helper_test.go
+++ b/pkg/common/si_helper_test.go
@@ -83,7 +83,7 @@ func TestCreateUpdateRequestForTask(t *testing.T) {
 		},
 	}
 
-	updateRequest := CreateUpdateRequestForTask("appId1", "taskId1", res, pod)
+	updateRequest := CreateUpdateRequestForTask("appId1", "taskId1", res, false, "", pod)
 	asks := updateRequest.Asks
 	assert.Equal(t, len(asks), 1)
 	allocAsk := asks[0]
diff --git a/pkg/common/utils/gang_utils.go b/pkg/common/utils/gang_utils.go
index e8615093a..78de99942 100644
--- a/pkg/common/utils/gang_utils.go
+++ b/pkg/common/utils/gang_utils.go
@@ -21,6 +21,7 @@ package utils
 import (
 	"encoding/json"
 	"fmt"
+	"strconv"
 	"sync"
 
 	v1 "k8s.io/api/core/v1"
@@ -71,6 +72,15 @@ func GetPlaceholderResourceRequest(resources map[string]resource.Quantity) v1.Re
 	return resourceReq
 }
 
+func GetPlaceholderFlagFromPodSpec(pod *v1.Pod) bool {
+	if value, ok := pod.Annotations[constants.AnnotationPlaceholderFlag]; ok {
+		if v, err := strconv.ParseBool(value); err == nil {
+			return v
+		}
+	}
+	return false
+}
+
 func GetTaskGroupFromPodSpec(pod *v1.Pod) string {
 	if value, ok := pod.Annotations[constants.AnnotationTaskGroupName]; ok {
 		return value

From 132fdae007aa63aa18ff1b16a6846ec92d7d56d0 Mon Sep 17 00:00:00 2001
From: Weiwei Yang <wyang@cloudera.com>
Date: Wed, 6 Jan 2021 22:39:40 -0800
Subject: [PATCH 10/15] [YUNIKORN-500] fix UT failures

---
 go.mod                        | 2 +-
 go.sum                        | 3 +++
 pkg/cache/application_test.go | 4 +++-
 pkg/cache/placeholder_test.go | 2 +-
 pkg/common/si_helper_test.go  | 2 +-
 pkg/shim/scheduler.go         | 4 ++++
 6 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/go.mod b/go.mod
index 24afc3071..0400f636b 100644
--- a/go.mod
+++ b/go.mod
@@ -23,7 +23,7 @@ go 1.12
 require (
 	github.com/GoogleCloudPlatform/spark-on-k8s-operator v0.0.0-20200817155620-c19d2b8660d8
 	github.com/apache/incubator-yunikorn-core v0.0.0-20210106054125-dcf631d5c6b5
-	github.com/apache/incubator-yunikorn-scheduler-interface v0.9.1-0.20201215141356-df4d86d2197b
+	github.com/apache/incubator-yunikorn-scheduler-interface v0.9.1-0.20210106054514-49c4f33ed27b
 	github.com/google/uuid v1.1.1
 	github.com/gorilla/mux v1.7.3
 	github.com/looplab/fsm v0.1.0
diff --git a/go.sum b/go.sum
index dabdda435..b836a5951 100644
--- a/go.sum
+++ b/go.sum
@@ -50,10 +50,13 @@ github.com/apache/incubator-yunikorn-core v0.0.0-20201218082332-5471d84cd619 h1:
 github.com/apache/incubator-yunikorn-core v0.0.0-20201218082332-5471d84cd619/go.mod h1:d/fn47kdGd094NHiysHAY+0d4evpYRFQ6q5b9eaBONo=
 github.com/apache/incubator-yunikorn-core v0.0.0-20210106054125-dcf631d5c6b5 h1:1cDvDjyqKce4U+Zvj7bmPSxU2ok1e+ynJsarFcp+JwQ=
 github.com/apache/incubator-yunikorn-core v0.0.0-20210106054125-dcf631d5c6b5/go.mod h1:d/fn47kdGd094NHiysHAY+0d4evpYRFQ6q5b9eaBONo=
+github.com/apache/incubator-yunikorn-scheduler-interface v0.0.0-20210106054514-49c4f33ed27b/go.mod h1:ObMs03XFbnmpGD81jYvdUDEVZbHvz8W6dWH5nGDCjc0=
 github.com/apache/incubator-yunikorn-scheduler-interface v0.9.1-0.20200901200728-b9033558f319 h1:I12nCcXdHe6W4oysVejFHfQDy0Ix0r+ZHdfooyEoldo=
 github.com/apache/incubator-yunikorn-scheduler-interface v0.9.1-0.20200901200728-b9033558f319/go.mod h1:ObMs03XFbnmpGD81jYvdUDEVZbHvz8W6dWH5nGDCjc0=
 github.com/apache/incubator-yunikorn-scheduler-interface v0.9.1-0.20201215141356-df4d86d2197b h1:dxncLtkTwtqUB6pgsVVw52+Ni3ZXG2BPnLrSrubcRZA=
 github.com/apache/incubator-yunikorn-scheduler-interface v0.9.1-0.20201215141356-df4d86d2197b/go.mod h1:ObMs03XFbnmpGD81jYvdUDEVZbHvz8W6dWH5nGDCjc0=
+github.com/apache/incubator-yunikorn-scheduler-interface v0.9.1-0.20210106054514-49c4f33ed27b h1:er61fRzIZuWejT9vUsHvb3ZyilJt6QCY7ZFdlAmVm7w=
+github.com/apache/incubator-yunikorn-scheduler-interface v0.9.1-0.20210106054514-49c4f33ed27b/go.mod h1:ObMs03XFbnmpGD81jYvdUDEVZbHvz8W6dWH5nGDCjc0=
 github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
 github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
 github.com/asaskevich/govalidator v0.0.0-20180720115003-f9ffefc3facf/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY=
diff --git a/pkg/cache/application_test.go b/pkg/cache/application_test.go
index 4bb88a485..ecbeb541d 100644
--- a/pkg/cache/application_test.go
+++ b/pkg/cache/application_test.go
@@ -362,7 +362,9 @@ func TestTryReserve(t *testing.T) {
 		createdPods.add(pod)
 		return pod, nil
 	})
-	GetPlaceholderManager().setMockedClients(mockedAPIProvider.GetAPIs())
+	mgr := NewPlaceholderManager(mockedAPIProvider.GetAPIs())
+	mgr.Start()
+	defer mgr.Stop()
 
 	// create a new app
 	app := NewApplication("app00001", "root.abc", "test-user",
diff --git a/pkg/cache/placeholder_test.go b/pkg/cache/placeholder_test.go
index c4908a3d1..42bf58c80 100644
--- a/pkg/cache/placeholder_test.go
+++ b/pkg/cache/placeholder_test.go
@@ -59,7 +59,7 @@ func TestNewPlaceholder(t *testing.T) {
 	assert.Equal(t, len(holder.pod.Labels), 2)
 	assert.Equal(t, holder.pod.Labels[constants.LabelApplicationID], appID)
 	assert.Equal(t, holder.pod.Labels[constants.LabelQueueName], queue)
-	assert.Equal(t, len(holder.pod.Annotations), 1)
+	assert.Equal(t, len(holder.pod.Annotations), 2)
 	assert.Equal(t, holder.pod.Annotations[constants.AnnotationTaskGroupName], app.taskGroups[0].Name)
 	assert.Equal(t, common.GetPodResource(holder.pod).Resources[constants.CPU].Value, int64(500))
 	assert.Equal(t, common.GetPodResource(holder.pod).Resources[constants.Memory].Value, int64(1024))
diff --git a/pkg/common/si_helper_test.go b/pkg/common/si_helper_test.go
index 225328a55..5f01a2556 100644
--- a/pkg/common/si_helper_test.go
+++ b/pkg/common/si_helper_test.go
@@ -27,7 +27,7 @@ import (
 )
 
 func TestCreateReleaseAllocationRequest(t *testing.T) {
-	request := CreateReleaseAllocationRequestForTask("app01", "alloc01", "default")
+	request := CreateReleaseAllocationRequestForTask("app01", "alloc01", "default", "STOPPED_BY_RM")
 	assert.Assert(t, request.Releases != nil)
 	assert.Assert(t, request.Releases.AllocationsToRelease != nil)
 	assert.Assert(t, request.Releases.AllocationAsksToRelease == nil)
diff --git a/pkg/shim/scheduler.go b/pkg/shim/scheduler.go
index 63f8565ba..e21340de6 100644
--- a/pkg/shim/scheduler.go
+++ b/pkg/shim/scheduler.go
@@ -267,6 +267,10 @@ func (ss *KubernetesShim) run() {
 	}
 
 	ss.apiFactory.Start()
+
+	// run the placeholder manager
+	placeholderMgr := cache.NewPlaceholderManager(ss.apiFactory.GetAPIs())
+	placeholderMgr.Start()
 }
 
 func (ss *KubernetesShim) enterState(event *fsm.Event) {

From 396d0b1c017c60050bdf439c2c4be37e6b7f4aed Mon Sep 17 00:00:00 2001
From: Weiwei Yang <wwei@apache.org>
Date: Wed, 13 Jan 2021 21:55:55 -0800
Subject: [PATCH 11/15] [YUNIKORN-501] Include node-selector and tolerations in
 the placeholder's pod spec. (#217)

---
 pkg/cache/application_events.go  |  2 +-
 pkg/cache/placeholder.go         | 17 +++-----
 pkg/cache/placeholder_manager.go |  7 ---
 pkg/cache/placeholder_test.go    | 73 +++++++++++++++++++++++++++++++-
 pkg/cache/task.go                |  3 +-
 pkg/common/si_helper.go          |  2 +-
 6 files changed, 79 insertions(+), 25 deletions(-)

diff --git a/pkg/cache/application_events.go b/pkg/cache/application_events.go
index 23a89e014..d3617e050 100644
--- a/pkg/cache/application_events.go
+++ b/pkg/cache/application_events.go
@@ -223,4 +223,4 @@ func (re ReleaseAppAllocationEvent) GetArgs() []interface{} {
 
 func (re ReleaseAppAllocationEvent) GetEvent() events.ApplicationEventType {
 	return re.event
-}
\ No newline at end of file
+}
diff --git a/pkg/cache/placeholder.go b/pkg/cache/placeholder.go
index 0bf4a4c1a..d048630e6 100644
--- a/pkg/cache/placeholder.go
+++ b/pkg/cache/placeholder.go
@@ -33,16 +33,8 @@ type Placeholder struct {
 	appID         string
 	taskGroupName string
 	pod           *v1.Pod
-	stage         PlaceholderState
 }
 
-type PlaceholderState string
-
-const (
-	Acquiring PlaceholderState = "Acquiring"
-	Acquired  PlaceholderState = "Acquired"
-)
-
 func newPlaceholder(placeholderName string, app *Application, taskGroup v1alpha1.TaskGroup) *Placeholder {
 	placeholderPod := &v1.Pod{
 		ObjectMeta: metav1.ObjectMeta{
@@ -54,7 +46,7 @@ func newPlaceholder(placeholderName string, app *Application, taskGroup v1alpha1
 			},
 			Annotations: map[string]string{
 				constants.AnnotationPlaceholderFlag: "true",
-				constants.AnnotationTaskGroupName: taskGroup.Name,
+				constants.AnnotationTaskGroupName:   taskGroup.Name,
 			},
 		},
 		Spec: v1.PodSpec{
@@ -69,6 +61,8 @@ func newPlaceholder(placeholderName string, app *Application, taskGroup v1alpha1
 			},
 			RestartPolicy: constants.PlaceholderPodRestartPolicy,
 			SchedulerName: constants.SchedulerName,
+			NodeSelector:  taskGroup.NodeSelector,
+			Tolerations:   taskGroup.Tolerations,
 		},
 	}
 
@@ -76,11 +70,10 @@ func newPlaceholder(placeholderName string, app *Application, taskGroup v1alpha1
 		appID:         app.GetApplicationID(),
 		taskGroupName: taskGroup.Name,
 		pod:           placeholderPod,
-		stage:         Acquiring,
 	}
 }
 
 func (p *Placeholder) String() string {
-	return fmt.Sprintf("appID: %s, taskGroup: %s, podName: %s/%s, stage: %s",
-		p.appID, p.taskGroupName, p.pod.Namespace, p.pod.Name, p.stage)
+	return fmt.Sprintf("appID: %s, taskGroup: %s, podName: %s/%s",
+		p.appID, p.taskGroupName, p.pod.Namespace, p.pod.Name)
 }
diff --git a/pkg/cache/placeholder_manager.go b/pkg/cache/placeholder_manager.go
index f4b16a0d1..b25b97c2a 100644
--- a/pkg/cache/placeholder_manager.go
+++ b/pkg/cache/placeholder_manager.go
@@ -107,13 +107,6 @@ func (mgr *PlaceholderManager) CleanUp(app *Application) {
 		zap.String("appID", app.GetApplicationID()))
 }
 
-// this is only used in testing
-func (mgr *PlaceholderManager) setMockedClients(mockedClients *client.Clients) {
-	mgr.Lock()
-	defer mgr.Unlock()
-	mgr.clients = mockedClients
-}
-
 func (mgr *PlaceholderManager) cleanOrphanPlaceholders() {
 	mgr.Lock()
 	defer mgr.Unlock()
diff --git a/pkg/cache/placeholder_test.go b/pkg/cache/placeholder_test.go
index 42bf58c80..e9d27864b 100644
--- a/pkg/cache/placeholder_test.go
+++ b/pkg/cache/placeholder_test.go
@@ -22,6 +22,7 @@ import (
 	"testing"
 
 	"gotest.tools/assert"
+	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/apis/yunikorn.apache.org/v1alpha1"
@@ -51,7 +52,6 @@ func TestNewPlaceholder(t *testing.T) {
 
 	holder := newPlaceholder("ph-name", app, app.taskGroups[0])
 	assert.Equal(t, holder.appID, appID)
-	assert.Equal(t, holder.stage, Acquiring)
 	assert.Equal(t, holder.taskGroupName, app.taskGroups[0].Name)
 	assert.Equal(t, holder.pod.Spec.SchedulerName, constants.SchedulerName)
 	assert.Equal(t, holder.pod.Name, "ph-name")
@@ -63,5 +63,74 @@ func TestNewPlaceholder(t *testing.T) {
 	assert.Equal(t, holder.pod.Annotations[constants.AnnotationTaskGroupName], app.taskGroups[0].Name)
 	assert.Equal(t, common.GetPodResource(holder.pod).Resources[constants.CPU].Value, int64(500))
 	assert.Equal(t, common.GetPodResource(holder.pod).Resources[constants.Memory].Value, int64(1024))
-	assert.Equal(t, holder.String(), "appID: app01, taskGroup: test-group-1, podName: test/ph-name, stage: Acquiring")
+	assert.Equal(t, len(holder.pod.Spec.NodeSelector), 0)
+	assert.Equal(t, len(holder.pod.Spec.Tolerations), 0)
+	assert.Equal(t, holder.String(), "appID: app01, taskGroup: test-group-1, podName: test/ph-name")
+}
+
+func TestNewPlaceholderWithNodeSelectors(t *testing.T) {
+	const (
+		appID     = "app01"
+		queue     = "root.default"
+		namespace = "test"
+	)
+	mockedSchedulerAPI := newMockSchedulerAPI()
+	app := NewApplication(appID, queue,
+		"bob", map[string]string{constants.AppTagNamespace: namespace}, mockedSchedulerAPI)
+	app.setTaskGroups([]v1alpha1.TaskGroup{
+		{
+			Name:      "test-group-1",
+			MinMember: 10,
+			MinResource: map[string]resource.Quantity{
+				"cpu":    resource.MustParse("500m"),
+				"memory": resource.MustParse("1024M"),
+			},
+			NodeSelector: map[string]string{
+				"nodeType":  "test",
+				"nodeState": "healthy",
+			},
+		},
+	})
+
+	holder := newPlaceholder("ph-name", app, app.taskGroups[0])
+	assert.Equal(t, len(holder.pod.Spec.NodeSelector), 2)
+	assert.Equal(t, holder.pod.Spec.NodeSelector["nodeType"], "test")
+	assert.Equal(t, holder.pod.Spec.NodeSelector["nodeState"], "healthy")
+}
+
+func TestNewPlaceholderWithTolerations(t *testing.T) {
+	const (
+		appID     = "app01"
+		queue     = "root.default"
+		namespace = "test"
+	)
+	mockedSchedulerAPI := newMockSchedulerAPI()
+	app := NewApplication(appID, queue,
+		"bob", map[string]string{constants.AppTagNamespace: namespace}, mockedSchedulerAPI)
+	app.setTaskGroups([]v1alpha1.TaskGroup{
+		{
+			Name:      "test-group-1",
+			MinMember: 10,
+			MinResource: map[string]resource.Quantity{
+				"cpu":    resource.MustParse("500m"),
+				"memory": resource.MustParse("1024M"),
+			},
+			Tolerations: []v1.Toleration{
+				{
+					Key:      "key1",
+					Operator: v1.TolerationOpEqual,
+					Value:    "value1",
+					Effect:   v1.TaintEffectNoSchedule,
+				},
+			},
+		},
+	})
+
+	holder := newPlaceholder("ph-name", app, app.taskGroups[0])
+	assert.Equal(t, len(holder.pod.Spec.Tolerations), 1)
+	tlr := holder.pod.Spec.Tolerations[0]
+	assert.Equal(t, tlr.Key, "key1")
+	assert.Equal(t, tlr.Value, "value1")
+	assert.Equal(t, tlr.Operator, v1.TolerationOpEqual)
+	assert.Equal(t, tlr.Effect, v1.TaintEffectNoSchedule)
 }
diff --git a/pkg/cache/task.go b/pkg/cache/task.go
index 979546c77..8fa50c7eb 100644
--- a/pkg/cache/task.go
+++ b/pkg/cache/task.go
@@ -23,9 +23,9 @@ import (
 	"sync"
 	"time"
 
-	"github.com/apache/incubator-yunikorn-k8shim/pkg/appmgmt/interfaces"
 	"go.uber.org/zap"
 
+	"github.com/apache/incubator-yunikorn-k8shim/pkg/appmgmt/interfaces"
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/common"
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/common/events"
 	"github.com/apache/incubator-yunikorn-k8shim/pkg/common/utils"
@@ -55,7 +55,6 @@ type Task struct {
 	lock            *sync.RWMutex
 }
 
-// TODO to be removed
 func NewTask(tid string, app *Application, ctx *Context, pod *v1.Pod) *Task {
 	taskResource := common.GetPodResource(pod)
 	return createTaskInternal(tid, app, taskResource, pod, false, "", ctx)
diff --git a/pkg/common/si_helper.go b/pkg/common/si_helper.go
index f1956ddb3..5cb37b07f 100644
--- a/pkg/common/si_helper.go
+++ b/pkg/common/si_helper.go
@@ -86,7 +86,7 @@ func CreateReleaseAskRequestForTask(appID, taskId, partition string) si.UpdateRe
 	return result
 }
 
-func GetTerminationTypeFromString(terminationTypeStr string) si.AllocationRelease_TerminationType{
+func GetTerminationTypeFromString(terminationTypeStr string) si.AllocationRelease_TerminationType {
 	if v, ok := si.AllocationRelease_TerminationType_value[terminationTypeStr]; ok {
 		return si.AllocationRelease_TerminationType(v)
 	}

From 368300d53f130d1aab7930d15e05994dede7c4c2 Mon Sep 17 00:00:00 2001
From: Weiwei Yang <wyang@cloudera.com>
Date: Thu, 14 Jan 2021 00:21:24 -0800
Subject: [PATCH 12/15] fix dependency

---
 go.sum | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/go.sum b/go.sum
index 4f9a9650a..fe3762d86 100644
--- a/go.sum
+++ b/go.sum
@@ -52,6 +52,8 @@ github.com/apache/incubator-yunikorn-core v0.0.0-20210106054125-dcf631d5c6b5 h1:
 github.com/apache/incubator-yunikorn-core v0.0.0-20210106054125-dcf631d5c6b5/go.mod h1:d/fn47kdGd094NHiysHAY+0d4evpYRFQ6q5b9eaBONo=
 github.com/apache/incubator-yunikorn-core v0.0.0-20210113182138-361d455c7f28 h1:7NO76ZYnYtrdC8CT+ZjBKsezSzviYwDjW9DzVu090/M=
 github.com/apache/incubator-yunikorn-core v0.0.0-20210113182138-361d455c7f28/go.mod h1:IhX7VztC5z9LfPVKTIkbkuisl+2CLum0neLVMF+hfXM=
+github.com/apache/incubator-yunikorn-core v0.0.0-20210113193345-bd9ed16dd49f h1:GTWn7W/KdjvihGHBIcjd+Q5FRNJl9Ycj4a+NWfQ8kD8=
+github.com/apache/incubator-yunikorn-core v0.0.0-20210113193345-bd9ed16dd49f/go.mod h1:IhX7VztC5z9LfPVKTIkbkuisl+2CLum0neLVMF+hfXM=
 github.com/apache/incubator-yunikorn-scheduler-interface v0.0.0-20210106054514-49c4f33ed27b/go.mod h1:ObMs03XFbnmpGD81jYvdUDEVZbHvz8W6dWH5nGDCjc0=
 github.com/apache/incubator-yunikorn-scheduler-interface v0.9.1-0.20200901200728-b9033558f319 h1:I12nCcXdHe6W4oysVejFHfQDy0Ix0r+ZHdfooyEoldo=
 github.com/apache/incubator-yunikorn-scheduler-interface v0.9.1-0.20200901200728-b9033558f319/go.mod h1:ObMs03XFbnmpGD81jYvdUDEVZbHvz8W6dWH5nGDCjc0=

From ec73314571802ea5b0aeab983fbcb7a166be0392 Mon Sep 17 00:00:00 2001
From: Weiwei Yang <wyang@cloudera.com>
Date: Fri, 15 Jan 2021 11:43:09 -0800
Subject: [PATCH 13/15] [YUNIKORN-2] fix review comments

---
 pkg/cache/placeholder_manager.go    | 7 ++++++-
 pkg/common/utils/gang_utils.go      | 4 ++++
 pkg/common/utils/gang_utils_test.go | 7 +++++++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/pkg/cache/placeholder_manager.go b/pkg/cache/placeholder_manager.go
index b25b97c2a..a42f1401b 100644
--- a/pkg/cache/placeholder_manager.go
+++ b/pkg/cache/placeholder_manager.go
@@ -33,7 +33,10 @@ import (
 
 // placeholder manager is a service to manage the lifecycle of app placeholders
 type PlaceholderManager struct {
-	clients   *client.Clients
+	clients *client.Clients
+	// when the placeholder manager is unable to delete a pod,
+	// this pod becomes to be an "orphan" pod. We add them to a map
+	// and keep retrying deleting them in order to avoid wasting resources.
 	orphanPod map[string]*v1.Pod
 	stopChan  chan struct{}
 	running   atomic.Value
@@ -90,6 +93,8 @@ func (mgr *PlaceholderManager) createAppPlaceholders(app *Application) error {
 
 // clean up all the placeholders for an application
 func (mgr *PlaceholderManager) CleanUp(app *Application) {
+	mgr.Lock()
+	defer mgr.Unlock()
 	log.Logger().Info("start to clean up app placeholders",
 		zap.String("appID", app.GetApplicationID()))
 	for taskID, task := range app.taskMap {
diff --git a/pkg/common/utils/gang_utils.go b/pkg/common/utils/gang_utils.go
index 78de99942..226e10af7 100644
--- a/pkg/common/utils/gang_utils.go
+++ b/pkg/common/utils/gang_utils.go
@@ -158,6 +158,10 @@ func (t *TaskGroupInstanceCountMap) GetTaskGroupInstanceCount(groupName string)
 }
 
 func (t *TaskGroupInstanceCountMap) Equals(target *TaskGroupInstanceCountMap) bool {
+	if t == nil {
+		return t == target
+	}
+
 	t.RLock()
 	defer t.RUnlock()
 
diff --git a/pkg/common/utils/gang_utils_test.go b/pkg/common/utils/gang_utils_test.go
index 20deff34c..43215b9ef 100644
--- a/pkg/common/utils/gang_utils_test.go
+++ b/pkg/common/utils/gang_utils_test.go
@@ -181,6 +181,13 @@ func TestTaskGroupInstanceCountMap(t *testing.T) {
 	counts2.AddOne("g2")
 	counts2.AddOne("g1")
 	assert.Equal(t, counts1.Equals(counts2), false)
+
+	var nilOne *TaskGroupInstanceCountMap
+	var nilTwo *TaskGroupInstanceCountMap
+	assert.Equal(t, nilOne.Equals(nilTwo), true)
+
+	empty := NewTaskGroupInstanceCountMap()
+	assert.Equal(t, nilOne.Equals(empty), false)
 }
 
 // nolint: funlen

From d0a5e7b4d9e9916781ef0cedfa1064b4003d72df Mon Sep 17 00:00:00 2001
From: Weiwei Yang <wyang@cloudera.com>
Date: Mon, 18 Jan 2021 14:17:29 -0800
Subject: [PATCH 14/15] [YUNIKORN-2] continue to fix review comments

---
 pkg/cache/placeholder_manager.go    |  2 +-
 pkg/common/utils/gang_utils.go      | 11 ++++++++---
 pkg/common/utils/gang_utils_test.go | 14 +++++++++++++-
 pkg/shim/scheduler.go               |  5 +++--
 4 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/pkg/cache/placeholder_manager.go b/pkg/cache/placeholder_manager.go
index a42f1401b..9ac19b04a 100644
--- a/pkg/cache/placeholder_manager.go
+++ b/pkg/cache/placeholder_manager.go
@@ -129,12 +129,12 @@ func (mgr *PlaceholderManager) cleanOrphanPlaceholders() {
 }
 
 func (mgr *PlaceholderManager) Start() {
-	log.Logger().Info("starting the Placeholder Manager")
 	mgr.stopChan = make(chan struct{})
 	if mgr.isRunning() {
 		log.Logger().Info("The placeholder manager has been started")
 		return
 	}
+	log.Logger().Info("starting the Placeholder Manager")
 	mgr.setRunning(true)
 	go func() {
 		for {
diff --git a/pkg/common/utils/gang_utils.go b/pkg/common/utils/gang_utils.go
index 226e10af7..6a8174eb6 100644
--- a/pkg/common/utils/gang_utils.go
+++ b/pkg/common/utils/gang_utils.go
@@ -53,11 +53,12 @@ func FindAppTaskGroup(appTaskGroups []*v1alpha1.TaskGroup, groupName string) (*v
 	return nil, fmt.Errorf("taskGroup %s is not defined in the application", groupName)
 }
 
-// the placeholder name is the pod name, pod name can not be longer than 63 chars
-// taskGroup name and appID will be truncated if they go over 20/28 chars respectively
+// the placeholder name is the pod name, pod name can not be longer than 63 chars,
+// taskGroup name and appID will be truncated if they go over 20/28 chars respectively,
+// each taskGroup is assigned with an incremental index starting from 0.
 func GeneratePlaceholderName(taskGroupName, appID string, index int32) string {
 	// taskGroup name no longer than 20 chars
-	// appID no longer than 35 chars
+	// appID no longer than 28 chars
 	// total length no longer than 20 + 28 + 5 + 10 = 63
 	shortTaskGroupName := fmt.Sprintf("%.20s", taskGroupName)
 	shortAppID := fmt.Sprintf("%.28s", appID)
@@ -108,6 +109,10 @@ func GetTaskGroupsFromAnnotation(pod *v1.Pod) ([]v1alpha1.TaskGroup, error) {
 			return nil, fmt.Errorf("can't get taskGroup MinMember from pod annotation, %s",
 				pod.Annotations[constants.AnnotationTaskGroups])
 		}
+		if taskGroup.MinMember < int32(0) {
+			return nil, fmt.Errorf("minMember cannot be negative, %s",
+				pod.Annotations[constants.AnnotationTaskGroups])
+		}
 	}
 	return taskGroups, nil
 }
diff --git a/pkg/common/utils/gang_utils_test.go b/pkg/common/utils/gang_utils_test.go
index 43215b9ef..fec7d145e 100644
--- a/pkg/common/utils/gang_utils_test.go
+++ b/pkg/common/utils/gang_utils_test.go
@@ -273,10 +273,18 @@ func TestGetTaskGroupFromAnnotation(t *testing.T) {
 	testGroupErr4 := `
 	[
 		{
-			"name": "test-group-err-3",
+			"name": "test-group-err-4",
 			"minMember": 3,
 		}
 	]`
+	// negative minMember
+	testGroupErr5 := `
+	[
+		{
+			"name": "test-group-err-5",
+			"minMember": -100,
+		}
+	]`
 	// Insert task group info to pod annotation
 	pod := &v1.Pod{
 		ObjectMeta: metav1.ObjectMeta{
@@ -310,6 +318,10 @@ func TestGetTaskGroupFromAnnotation(t *testing.T) {
 	taskGroupErr4, err := GetTaskGroupsFromAnnotation(pod)
 	assert.Assert(t, taskGroupErr4 == nil)
 	assert.Assert(t, err != nil)
+	pod.Annotations = map[string]string{constants.AnnotationTaskGroups: testGroupErr5}
+	taskGroupErr5, err := GetTaskGroupsFromAnnotation(pod)
+	assert.Assert(t, taskGroupErr5 == nil)
+	assert.Assert(t, err != nil)
 	// Correct case
 	pod.Annotations = map[string]string{constants.AnnotationTaskGroups: testGroup}
 	taskGroups, err := GetTaskGroupsFromAnnotation(pod)
diff --git a/pkg/shim/scheduler.go b/pkg/shim/scheduler.go
index e21340de6..8d0e9e026 100644
--- a/pkg/shim/scheduler.go
+++ b/pkg/shim/scheduler.go
@@ -269,8 +269,7 @@ func (ss *KubernetesShim) run() {
 	ss.apiFactory.Start()
 
 	// run the placeholder manager
-	placeholderMgr := cache.NewPlaceholderManager(ss.apiFactory.GetAPIs())
-	placeholderMgr.Start()
+	cache.NewPlaceholderManager(ss.apiFactory.GetAPIs()).Start()
 }
 
 func (ss *KubernetesShim) enterState(event *fsm.Event) {
@@ -288,6 +287,8 @@ func (ss *KubernetesShim) stop() {
 		dispatcher.Stop()
 		// stop the app manager
 		ss.appManager.Stop()
+		// stop the placeholder manager
+		cache.GetPlaceholderManager().Stop()
 	default:
 		log.Logger().Info("scheduler is already stopped")
 	}

From 12ecf1165635957e25018fb4faab3b9734e1c10d Mon Sep 17 00:00:00 2001
From: Weiwei Yang <wyang@cloudera.com>
Date: Tue, 19 Jan 2021 12:19:10 -0800
Subject: [PATCH 15/15] [YUNIKORN-2] small fix for placeholder start function

---
 pkg/cache/placeholder_manager.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/cache/placeholder_manager.go b/pkg/cache/placeholder_manager.go
index 9ac19b04a..4ac47ce4f 100644
--- a/pkg/cache/placeholder_manager.go
+++ b/pkg/cache/placeholder_manager.go
@@ -129,12 +129,12 @@ func (mgr *PlaceholderManager) cleanOrphanPlaceholders() {
 }
 
 func (mgr *PlaceholderManager) Start() {
-	mgr.stopChan = make(chan struct{})
 	if mgr.isRunning() {
 		log.Logger().Info("The placeholder manager has been started")
 		return
 	}
 	log.Logger().Info("starting the Placeholder Manager")
+	mgr.stopChan = make(chan struct{})
 	mgr.setRunning(true)
 	go func() {
 		for {