Merge branch 'master' into avoid_node_on_retry

armadaproject · Apr 13, 2023 · 4f34365 · 4f34365
2 parents 269126b + ec82b5c
commit 4f34365
Show file tree

Hide file tree

Showing 18 changed files with 713 additions and 227 deletions.
diff --git a/client/python/pyproject.toml b/client/python/pyproject.toml
@@ -9,9 +9,9 @@ license = { text = "Apache Software License" }
 authors = [{ name = "G-Research Open Source Software", email = "armada@armadaproject.io" }]
 
 [project.optional-dependencies]
-format = ["black==23.1.0", "flake8==6.0.0", "pylint==2.16.3"]
+format = ["black==23.3.0", "flake8==6.0.0", "pylint==2.17.2"]
 docs = ["sphinx", "sphinx-jekyll-builder", "sphinx-toolbox==3.2.0b1"]
-test = ["pytest==7.2.1", "coverage>=6.5.0"]
+test = ["pytest==7.2.2", "coverage>=6.5.0"]
 
 [build-system]
 requires = ["setuptools"]

diff --git a/cmd/armadactl/README.md b/cmd/armadactl/README.md
@@ -57,13 +57,18 @@ armadactl version [flags]
 ```bash
 armadactl watch [deployment_name] [flags]
 ```
-- getQueueSchedulingReport : This subcommand retrieves a report of the current scheduling status of all queues in the Armada cluster.
+- queue-report : This subcommand retrieves a report of the current scheduling status of all queues in the Armada cluster.
 ```bash
-armadactl getQueueSchedulingReport
+armadactl queue-report
 ```
-- getJobSchedulingReport : This subcommand retrieves a report of the current scheduling status of all jobs in the Armada cluster.
+- job-report : This subcommand retrieves a report of the current scheduling status of all jobs in the Armada cluster.
 ```bash
-armadactl getJobSchedulingReport
+armadactl job-report
+```
+
+- scheduling-report : This subcommand retrieves a report of the current scheduling status in the Armada cluster.
+```bash
+armadactl scheduling-report
 ```
 
 For a full list of subcommands and options, you can run **armadactl --help**.
diff --git a/config/executor/config.yaml b/config/executor/config.yaml
@@ -46,7 +46,10 @@ kubernetes:
   maxTerminatedPods: 1000 # Should be lower than kube-controller-managed terminated-pod-gc-threshold (default 12500)
   stuckTerminatingPodExpiry: 1m
   podKillTimeout: 5m
-  nodeReservedResourcesPriority: 2000001000 # same priority as system-node-critical
+  minimumResourcesMarkedAllocatedToNonArmadaPodsPerNode:
+    cpu: 100m
+    memory: 50Mi
+  minimumResourcesMarkedAllocatedToNonArmadaPodsPerNodePriority: 2000001000 # same priority as system-node-critical
   podDefaults:
     ingress:
       hostnameSuffix: "svc"

diff --git a/config/jobservice/config.yaml b/config/jobservice/config.yaml
@@ -2,7 +2,23 @@ grpcPort: 60003
 httpPort: 8090
 purgeJobSetTime: 1000
 subscribeJobSetTime: 100
+# databaseType can be either 'postgres' or 'sqlite'
+databaseType: "postgres"
+# databasePath specifies the location of the back-end
+# storage file when using database type 'sqlite'
 databasePath: "/var/jobservice.db"
+# Connection details when using database type 'postgres'
+postgresConfig:
+  maxOpenConns: 50
+  maxIdleConns: 10
+  connMaxLifetime: 30m
+  connection:
+    host: postgres
+    port: 5432
+    user: postgres
+    password: psw
+    dbname: postgres
+    sslmode: disable
 grpc:
   keepaliveParams:
     maxConnectionIdle: 5m

diff --git a/e2e/setup/jobservice.yaml b/e2e/setup/jobservice.yaml
@@ -1,6 +1,17 @@
-databasePath: "/tmp/jobservice.db"
 subscribeJobSetTime: 60
 purgeJobSetTime: 10000
+databaseType: "postgres"
+postgresConfig:
+  maxOpenConns: 50
+  maxIdleConns: 10
+  connMaxLifetime: 30m
+  connection:
+    host: postgres
+    port: 5432
+    user: postgres
+    password: psw
+    dbname: postgres
+    sslmode: disable
 apiConnection:
   armadaUrl: "server:50051"
   forceNoTls: true

diff --git a/internal/armada/server/lease.go b/internal/armada/server/lease.go
@@ -376,7 +376,6 @@ func (q *AggregatedQueueServer) getJobs(ctx context.Context, req *api.StreamingL
 		for _, job := range jobs {
 			nodeIdByJobId[job.Id] = node.Id
 		}
-
 		nodes = append(nodes, node)
 	}
 	indexedResources := q.schedulingConfig.IndexedResources

diff --git a/internal/executor/application.go b/internal/executor/application.go
@@ -155,8 +155,8 @@ func setupExecutorApiComponents(
 		nil,
 		config.Kubernetes.TrackedNodeLabels,
 		config.Kubernetes.NodeIdLabel,
-		config.Kubernetes.NodeReservedResources,
-		config.Kubernetes.NodeReservedResourcesPriority,
+		config.Kubernetes.MinimumResourcesMarkedAllocatedToNonArmadaPodsPerNode,
+		config.Kubernetes.MinimumResourcesMarkedAllocatedToNonArmadaPodsPerNodePriority,
 	)
 
 	eventReporter, stopReporter := reporter.NewJobEventReporter(
@@ -261,8 +261,8 @@ func setupServerApiComponents(
 		usageClient,
 		config.Kubernetes.TrackedNodeLabels,
 		config.Kubernetes.NodeIdLabel,
-		config.Kubernetes.NodeReservedResources,
-		config.Kubernetes.NodeReservedResourcesPriority,
+		config.Kubernetes.MinimumResourcesMarkedAllocatedToNonArmadaPodsPerNode,
+		config.Kubernetes.MinimumResourcesMarkedAllocatedToNonArmadaPodsPerNodePriority,
 	)
 
 	jobLeaseService := service.NewJobLeaseService(
@@ -286,7 +286,6 @@ func setupServerApiComponents(
 		clusterUtilisationService,
 		submitter,
 		etcdHealthMonitor,
-		config.Kubernetes.NodeReservedResources,
 	)
 
 	jobManager := service.NewJobManager(

diff --git a/internal/executor/configuration/types.go b/internal/executor/configuration/types.go
@@ -55,14 +55,15 @@ type KubernetesConfiguration struct {
 	PodDefaults               *PodDefaults
 	PendingPodChecks          *podchecks.Checks
 	FatalPodSubmissionErrors  []string
-	// NodeReservedResources config is used to factor in reserved resources on each node
-	// when validating can a job be scheduled on a node during job submit (i.e. factor in resources for daemonset pods)
-	NodeReservedResources armadaresource.ComputeResources
-	// NodeReservedResourcesPriority - The priority the reserved resource is reported at
-	// All pods in kubernetes have a priority - and we report to the Armada API resource for a given priority
-	// Therefore we also need to set a priority for the reserved resource
-	NodeReservedResourcesPriority int32
-	PodKillTimeout                time.Duration
+	// Minimum amount of resources marked as allocated to non-Armada pods on each node.
+	// I.e., if the total resources allocated to non-Armada pods on some node drops below this value,
+	// the executor adds a fictional allocation to make up the difference, such that the total is at least this.
+	// Hence, specifying can ensure that, e.g., if a deamonset pod restarts, those resources are not considered for scheduling.
+	MinimumResourcesMarkedAllocatedToNonArmadaPodsPerNode armadaresource.ComputeResources
+	// When adding a fictional allocation to ensure resources allocated to non-Armada pods is at least
+	// MinimumResourcesMarkedAllocatedToNonArmadaPodsPerNode, those resources are marked allocated at this priority.
+	MinimumResourcesMarkedAllocatedToNonArmadaPodsPerNodePriority int32
+	PodKillTimeout                                                time.Duration
 }
 
 type EtcdConfiguration struct {

diff --git a/internal/executor/service/cluster_allocation.go b/internal/executor/service/cluster_allocation.go
@@ -123,7 +123,6 @@ type LegacyClusterAllocationService struct {
 	clusterContext     executorContext.ClusterContext
 	submitter          job.Submitter
 	etcdHealthMonitor  healthmonitor.EtcdLimitHealthMonitor
-	reserved           armadaresource.ComputeResources
 }
 
 func NewLegacyClusterAllocationService(
@@ -133,7 +132,6 @@ func NewLegacyClusterAllocationService(
 	utilisationService utilisation.UtilisationService,
 	submitter job.Submitter,
 	etcdHealthMonitor healthmonitor.EtcdLimitHealthMonitor,
-	reserved armadaresource.ComputeResources,
 ) *LegacyClusterAllocationService {
 	return &LegacyClusterAllocationService{
 		leaseService:       leaseService,
@@ -142,7 +140,6 @@ func NewLegacyClusterAllocationService(
 		clusterContext:     clusterContext,
 		submitter:          submitter,
 		etcdHealthMonitor:  etcdHealthMonitor,
-		reserved:           reserved,
 	}
 }