Skip to content

Commit

Permalink
Add missing config
Browse files Browse the repository at this point in the history
  • Loading branch information
severinson committed Jun 28, 2023
1 parent f0422d3 commit 02d0aa6
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 49 deletions.
20 changes: 10 additions & 10 deletions config/armada/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@ eventsApiRedis:
poolSize: 1000
scheduling:
enableAssertions: true
fairnessModel: "AssetFairness"
dominantResourceFairnessResourcesToConsider:
- "cpu"
- "memory"
- "nvidia.com/gpu"
resourceScarcity:
cpu: 1.0
preemption:
nodeEvictionProbability: 1.0
nodeOversubscriptionEvictionProbability: 1.0
Expand All @@ -43,8 +50,8 @@ scheduling:
priority: 1000
preemptible: false
maximumResourceFractionPerQueue:
memory: 0.99
cpu: 0.99
memory: 1.0
cpu: 1.0
armada-preemptible:
priority: 1000
preemptible: true
Expand All @@ -54,7 +61,7 @@ scheduling:
maxExtraNodesToConsider: 1
maximumResourceFractionToSchedule:
memory: 1.0
cpu: 1.0
cpu: 1.0
maxJobSchedulingContextsPerExecutor: 10000
lease:
expireAfter: 15m
Expand All @@ -69,11 +76,6 @@ scheduling:
value: "true"
effect: "NoSchedule"
defaultJobTolerationsByPriorityClass:
"":
- key: "armadaproject.io/pc-armada-default"
operator: "Equal"
value: "true"
effect: "NoSchedule"
armada-default:
- key: "armadaproject.io/pc-armada-default"
operator: "Equal"
Expand All @@ -85,8 +87,6 @@ scheduling:
value: "true"
effect: "NoSchedule"
maxRetries: 5
resourceScarcity:
cpu: 1.0
maxPodSpecSizeBytes: 65535
minJobResources:
memory: 1Mi
Expand Down
18 changes: 9 additions & 9 deletions config/scheduler/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ grpc:
scheduling:
executorTimeout: 10m
enableAssertions: true
fairnessModel: "AssetFairness"
dominantResourceFairnessResourcesToConsider:
- "cpu"
- "memory"
- "nvidia.com/gpu"
resourceScarcity:
cpu: 1.0
preemption:
alwaysAttemptScheduling: false
enabled: true
Expand All @@ -60,8 +67,8 @@ scheduling:
priority: 1000
preemptible: false
maximumResourceFractionPerQueue:
memory: 0.99
cpu: 0.99
memory: 1.0
cpu: 1.0
armada-preemptible:
priority: 1000
preemptible: true
Expand All @@ -85,11 +92,6 @@ scheduling:
value: "true"
effect: "NoSchedule"
defaultJobTolerationsByPriorityClass:
"":
- key: "armadaproject.io/pc-armada-default"
operator: "Equal"
value: "true"
effect: "NoSchedule"
armada-default:
- key: "armadaproject.io/pc-armada-default"
operator: "Equal"
Expand All @@ -101,8 +103,6 @@ scheduling:
value: "true"
effect: "NoSchedule"
maxRetries: 5
resourceScarcity:
cpu: 1.0
indexedResources:
- name: "cpu"
resolution: "100m"
Expand Down
23 changes: 7 additions & 16 deletions internal/armada/configuration/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@ type SchedulingConfig struct {
// Maximum number of times a job is retried before considered failed.
MaxRetries uint
// Controls how fairness is calculated. Can be either AssetFairness or DominantResourceFairness.
FairnessType FairnessType
// Used to convert one resource into another when using DominantResourceFairness.
FairnessResourceMapping []ResourceMapping
FairnessModel FairnessModel
// List of resource names, e.g., []string{"cpu", "memory"}, to consider when computing DominantResourceFairness.
DominantResourceFairnessResourcesToConsider []string
// Weights used to compute fair share when using AssetFairness.
// Overrides dynamic scarcity calculation if provided.
// Applies to both the new and old scheduler.
Expand Down Expand Up @@ -191,7 +191,7 @@ type SchedulingConfig struct {
AlwaysAttemptScheduling bool
}

// FairnessType controls how fairness is computed.
// FairnessModel controls how fairness is computed.
// In particular, each queue has a cost associated with it and the next job to attempt to schedule
// is taken from the queue with the smallest cost associated with it.
//
Expand All @@ -202,22 +202,13 @@ type SchedulingConfig struct {
//
// If DominantResourceFairness, the cost associated with a queue is
// max("CPU allocation" / "CPU capacity", "memory allocation" / "mamory capacity", ...).
type FairnessType string
type FairnessModel string

const (
AssertFairness FairnessType = "AssertFairness"
DominantResourceFairness FairnessType = "DominantResourceFairness"
AssetFairness FairnessModel = "AssetFairness"
DominantResourceFairness FairnessModel = "DominantResourceFairness"
)

// ResourceMapping describes a mapping from one resource type to another. Used when computing fairness.
// E.g., ResourceMapping{"nvidia.com/mig-1g.10gb", "nvidia.com/gpu", 1/8}
// indicates 1 unit of "nvidia.com/mig-1g.10gb" should be trated as 1/8 unit of "nvidia.com/gpu".
type ResourceMapping struct {
Source string
Target string
Multiplier float64
}

type IndexedResource struct {
// Resource name. E.g., "cpu", "memory", or "nvidia.com/gpu".
Name string
Expand Down
3 changes: 3 additions & 0 deletions internal/armada/server/lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,9 @@ func (q *AggregatedQueueServer) getJobs(ctx context.Context, req *api.StreamingL
q.schedulingConfig.ResourceScarcity,
schedulerobjects.ResourceList{Resources: totalCapacity},
)
if q.schedulingConfig.FairnessModel == configuration.DominantResourceFairness {
sctx.EnableDominantResourceFairness(q.schedulingConfig.DominantResourceFairnessResourcesToConsider)
}
for queue, priorityFactor := range priorityFactorByQueue {
weight := 1 / priorityFactor
if err := sctx.AddQueueSchedulingContext(queue, weight, allocatedByQueueAndPriorityClassForPool[queue]); err != nil {
Expand Down
40 changes: 26 additions & 14 deletions internal/scheduler/context/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,10 @@ type SchedulingContext struct {
// Default priority class.
DefaultPriorityClass string
// Determines how fairness is computed.
FairnessType configuration.FairnessType
// Used to convert one resource into another when computing fair share.
// Only applies to DominantResourceFairness.
FairnessResourceMappingBySourceResource map[string]configuration.ResourceMapping
// Weights used when computing total resource usage.
FairnessModel configuration.FairnessModel
// Resources considered when computing DominantResourceFairness.
DominantResourceFairnessResourcesToConsider []string
// Weights used when computing AssetFairness.
ResourceScarcity map[string]float64
// Per-queue scheduling contexts.
QueueSchedulingContexts map[string]*QueueSchedulingContext
Expand Down Expand Up @@ -83,7 +82,7 @@ func NewSchedulingContext(
Pool: pool,
PriorityClasses: priorityClasses,
DefaultPriorityClass: defaultPriorityClass,
FairnessType: configuration.AssertFairness,
FairnessModel: configuration.AssetFairness,
ResourceScarcity: resourceScarcity,
QueueSchedulingContexts: make(map[string]*QueueSchedulingContext),
TotalResources: totalResources.DeepCopy(),
Expand All @@ -95,6 +94,11 @@ func NewSchedulingContext(
}
}

func (sctx *SchedulingContext) EnableDominantResourceFairness(dominantResourceFairnessResourcesToConsider []string) {
sctx.FairnessModel = configuration.DominantResourceFairness
sctx.DominantResourceFairnessResourcesToConsider = dominantResourceFairnessResourcesToConsider
}

func (sctx *SchedulingContext) SchedulingKeyFromLegacySchedulerJob(job interfaces.LegacySchedulerJob) schedulerobjects.SchedulingKey {
var priority int32
if priorityClass, ok := sctx.PriorityClasses[job.GetPriorityClassName()]; ok {
Expand Down Expand Up @@ -521,28 +525,36 @@ func (qctx *QueueSchedulingContext) TotalCostForQueue() float64 {
// TotalCostForQueueWithAllocation returns the cost for which this queue should be penalised when computing fairness,
// if the total allocation of this queue is given by allocated.
func (qctx *QueueSchedulingContext) TotalCostForQueueWithAllocation(allocated schedulerobjects.ResourceList) float64 {
switch qctx.SchedulingContext.FairnessType {
case configuration.AssertFairness:
switch qctx.SchedulingContext.FairnessModel {
case configuration.AssetFairness:
return qctx.assetFairnessCostWithAllocation(allocated)
case configuration.DominantResourceFairness:
return qctx.dominantResourceFairnessCostWithAllocation(allocated)
default:
panic(fmt.Sprintf("unknown fairness type: %s", qctx.SchedulingContext.FairnessType))
panic(fmt.Sprintf("unknown fairness type: %s", qctx.SchedulingContext.FairnessModel))
}
}

func (qctx *QueueSchedulingContext) assetFairnessCostWithAllocation(allocated schedulerobjects.ResourceList) float64 {
if len(qctx.SchedulingContext.ResourceScarcity) == 0 {
panic("ResourceScarcity is not set")
}
return float64(allocated.AsWeightedMillis(qctx.SchedulingContext.ResourceScarcity)) / qctx.Weight
}

func (qctx *QueueSchedulingContext) dominantResourceFairnessCostWithAllocation(allocated schedulerobjects.ResourceList) float64 {
if len(qctx.SchedulingContext.DominantResourceFairnessResourcesToConsider) == 0 {
panic("DominantResourceFairnessResourcesToConsider is not set")
}
var cost float64
for t, q := range allocated.Resources {
totalq := qctx.SchedulingContext.TotalResources.Get(t)
if totalq.Cmp(resource.Quantity{}) == 0 {
totalq.SetMilli(1)
for _, t := range qctx.SchedulingContext.DominantResourceFairnessResourcesToConsider {
capacity := qctx.SchedulingContext.TotalResources.Get(t)
if capacity.Equal(resource.Quantity{}) {
// Ignore any resources with zero capacity.
continue
}
tcost := float64(q.MilliValue()) / float64(totalq.MilliValue())
q := allocated.Get(t)
tcost := float64(q.MilliValue()) / float64(capacity.MilliValue())
if tcost > cost {
cost = tcost
}
Expand Down
3 changes: 3 additions & 0 deletions internal/scheduler/scheduling_algo.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,9 @@ func (l *FairSchedulingAlgo) scheduleOnExecutor(
l.config.ResourceScarcity,
accounting.totalCapacity,
)
if l.config.FairnessModel == configuration.DominantResourceFairness {
sctx.EnableDominantResourceFairness(l.config.DominantResourceFairnessResourcesToConsider)
}
for queue, priorityFactor := range accounting.priorityFactorByQueue {
var allocatedByPriorityClass schedulerobjects.QuantityByTAndResourceType[string]
if allocatedByQueueAndPriorityClass := accounting.allocationByPoolAndQueueAndPriorityClass[executor.Pool]; allocatedByQueueAndPriorityClass != nil {
Expand Down

0 comments on commit 02d0aa6

Please sign in to comment.