Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion api/external/cinder/messages.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,23 @@ func (r ExternalSchedulerRequest) GetTraceLogArgs() []slog.Attr {
slog.String("project", r.Context.ProjectID),
}
}
func (r ExternalSchedulerRequest) FilterHosts(includedHosts map[string]float64) lib.FilterWeigherPipelineRequest {
func (r ExternalSchedulerRequest) Filter(includedHosts map[string]float64) lib.FilterWeigherPipelineRequest {
filteredHosts := make([]ExternalSchedulerHost, 0, len(includedHosts))
for _, host := range r.Hosts {
if _, exists := includedHosts[host.VolumeHost]; exists {
filteredHosts = append(filteredHosts, host)
}
}
r.Hosts = filteredHosts
// Also filter the weights map to only include the hosts that are still
// in the request, and update the weights accordingly.
filteredWeights := make(map[string]float64, len(includedHosts))
for host, weight := range includedHosts {
if _, exists := includedHosts[host]; exists {
filteredWeights[host] = weight
}
}
r.Weights = filteredWeights
return r
}

Expand Down
2 changes: 1 addition & 1 deletion api/external/ironcore/messages.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ func (r MachinePipelineRequest) GetWeights() map[string]float64 {
func (r MachinePipelineRequest) GetTraceLogArgs() []slog.Attr {
return []slog.Attr{}
}
func (r MachinePipelineRequest) FilterHosts(includedHosts map[string]float64) lib.FilterWeigherPipelineRequest {
func (r MachinePipelineRequest) Filter(includedHosts map[string]float64) lib.FilterWeigherPipelineRequest {
filteredPools := make([]ironcorev1alpha1.MachinePool, 0, len(includedHosts))
for _, pool := range r.Pools {
if _, exists := includedHosts[pool.Name]; exists {
Expand Down
11 changes: 10 additions & 1 deletion api/external/manila/messages.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,23 @@ func (r ExternalSchedulerRequest) GetTraceLogArgs() []slog.Attr {
slog.String("project", r.Context.ProjectID),
}
}
func (r ExternalSchedulerRequest) FilterHosts(includedHosts map[string]float64) lib.FilterWeigherPipelineRequest {
func (r ExternalSchedulerRequest) Filter(includedHosts map[string]float64) lib.FilterWeigherPipelineRequest {
filteredHosts := make([]ExternalSchedulerHost, 0, len(includedHosts))
for _, host := range r.Hosts {
if _, exists := includedHosts[host.ShareHost]; exists {
filteredHosts = append(filteredHosts, host)
}
}
r.Hosts = filteredHosts
// Also filter the weights map to only include the hosts that are still
// in the request, and update the weights accordingly.
filteredWeights := make(map[string]float64, len(includedHosts))
for host, weight := range includedHosts {
if _, exists := includedHosts[host]; exists {
filteredWeights[host] = weight
}
}
r.Weights = filteredWeights
return r
}

Expand Down
11 changes: 10 additions & 1 deletion api/external/nova/messages.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,23 @@ func (r ExternalSchedulerRequest) GetTraceLogArgs() []slog.Attr {
slog.String("project", r.Context.ProjectID),
}
}
func (r ExternalSchedulerRequest) FilterHosts(includedHosts map[string]float64) lib.FilterWeigherPipelineRequest {
func (r ExternalSchedulerRequest) Filter(includedHosts map[string]float64) lib.FilterWeigherPipelineRequest {
filteredHosts := make([]ExternalSchedulerHost, 0, len(includedHosts))
for _, host := range r.Hosts {
if _, exists := includedHosts[host.ComputeHost]; exists {
filteredHosts = append(filteredHosts, host)
}
}
r.Hosts = filteredHosts
// Also filter the weights map to only include the hosts that are still
// in the request, and update the weights accordingly.
filteredWeights := make(map[string]float64, len(includedHosts))
for host, weight := range includedHosts {
if _, exists := includedHosts[host]; exists {
filteredWeights[host] = weight
}
}
r.Weights = filteredWeights
return r
}

Expand Down
2 changes: 1 addition & 1 deletion api/external/pods/messages.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func (r PodPipelineRequest) GetWeights() map[string]float64 {
func (r PodPipelineRequest) GetTraceLogArgs() []slog.Attr {
return []slog.Attr{}
}
func (r PodPipelineRequest) FilterHosts(includedHosts map[string]float64) lib.FilterWeigherPipelineRequest {
func (r PodPipelineRequest) Filter(includedHosts map[string]float64) lib.FilterWeigherPipelineRequest {
filteredNodes := make([]corev1.Node, 0, len(includedHosts))
for _, node := range r.Nodes {
if _, exists := includedHosts[node.Name]; exists {
Expand Down
3 changes: 2 additions & 1 deletion cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,8 @@ func main() {
"reconcileInterval", failoverConfig.ReconcileInterval,
"revalidationInterval", failoverConfig.RevalidationInterval,
"trustHypervisorLocation", failoverConfig.TrustHypervisorLocation,
"maxVMsToProcess", failoverConfig.MaxVMsToProcess)
"maxVMsToProcess", failoverConfig.MaxVMsToProcess,
"vmSelectionRotationInterval", failoverConfig.VMSelectionRotationInterval)
}

// +kubebuilder:scaffold:builder
Expand Down
6 changes: 3 additions & 3 deletions helm/bundles/cortex-cinder/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ apiVersion: v2
name: cortex-cinder
description: A Helm chart deploying Cortex for Cinder.
type: application
version: 0.0.50
version: 0.0.51
appVersion: 0.1.0
dependencies:
# from: file://../../library/cortex-postgres
Expand All @@ -16,12 +16,12 @@ dependencies:
# from: file://../../library/cortex
- name: cortex
repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
version: 0.0.37
version: 0.0.38
alias: cortex-knowledge-controllers
# from: file://../../library/cortex
- name: cortex
repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
version: 0.0.37
version: 0.0.38
alias: cortex-scheduling-controllers

# Owner info adds a configmap to the kubernetes cluster with information on
Expand Down
4 changes: 2 additions & 2 deletions helm/bundles/cortex-crds/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ apiVersion: v2
name: cortex-crds
description: A Helm chart deploying Cortex CRDs.
type: application
version: 0.0.50
version: 0.0.51
appVersion: 0.1.0
dependencies:
# from: file://../../library/cortex
- name: cortex
repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
version: 0.0.37
version: 0.0.38

# Owner info adds a configmap to the kubernetes cluster with information on
# the service owner. This makes it easier to find out who to contact in case
Expand Down
4 changes: 2 additions & 2 deletions helm/bundles/cortex-ironcore/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ apiVersion: v2
name: cortex-ironcore
description: A Helm chart deploying Cortex for IronCore.
type: application
version: 0.0.50
version: 0.0.51
appVersion: 0.1.0
dependencies:
# from: file://../../library/cortex
- name: cortex
repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
version: 0.0.37
version: 0.0.38

# Owner info adds a configmap to the kubernetes cluster with information on
# the service owner. This makes it easier to find out who to contact in case
Expand Down
6 changes: 3 additions & 3 deletions helm/bundles/cortex-manila/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ apiVersion: v2
name: cortex-manila
description: A Helm chart deploying Cortex for Manila.
type: application
version: 0.0.50
version: 0.0.51
appVersion: 0.1.0
dependencies:
# from: file://../../library/cortex-postgres
Expand All @@ -16,12 +16,12 @@ dependencies:
# from: file://../../library/cortex
- name: cortex
repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
version: 0.0.37
version: 0.0.38
alias: cortex-knowledge-controllers
# from: file://../../library/cortex
- name: cortex
repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
version: 0.0.37
version: 0.0.38
alias: cortex-scheduling-controllers

# Owner info adds a configmap to the kubernetes cluster with information on
Expand Down
6 changes: 3 additions & 3 deletions helm/bundles/cortex-nova/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ apiVersion: v2
name: cortex-nova
description: A Helm chart deploying Cortex for Nova.
type: application
version: 0.0.50
version: 0.0.51
appVersion: 0.1.0
dependencies:
# from: file://../../library/cortex-postgres
Expand All @@ -16,12 +16,12 @@ dependencies:
# from: file://../../library/cortex
- name: cortex
repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
version: 0.0.37
version: 0.0.38
alias: cortex-knowledge-controllers
# from: file://../../library/cortex
- name: cortex
repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
version: 0.0.37
version: 0.0.38
alias: cortex-scheduling-controllers

# Owner info adds a configmap to the kubernetes cluster with information on
Expand Down
20 changes: 0 additions & 20 deletions helm/bundles/cortex-nova/alerts/nova.alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -485,26 +485,6 @@ groups:
CRD retrieval. Limes scrapes may time out, affecting capacity reporting.

# Committed Resource Syncer Alerts
- alert: CortexNovaCommittedResourceSyncerNotRunning
expr: |
increase(cortex_committed_resource_syncer_runs_total{service="cortex-nova-metrics"}[2h]) == 0
or
absent(cortex_committed_resource_syncer_runs_total{service="cortex-nova-metrics"})
for: 5m
labels:
context: committed-resource-syncer
dashboard: cortex/cortex
service: cortex
severity: warning
support_group: workload-management
annotations:
summary: "Committed Resource syncer not running"
description: >
The committed resource syncer has not run in the last 2 hours or the metric is missing.
This indicates that the syncer may have stopped, is encountering errors, or the feature
is not enabled. Check the syncer logs for errors or verify the commitments-sync-task is
in the enabledTasks configuration.

- alert: CortexNovaCommittedResourceSyncerErrorsHigh
expr: increase(cortex_committed_resource_syncer_errors_total{service="cortex-nova-metrics"}[1h]) > 3
for: 5m
Expand Down
6 changes: 1 addition & 5 deletions helm/bundles/cortex-nova/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -218,11 +218,7 @@ cortex-knowledge-controllers:
- datasource-controllers
- knowledge-controllers
- kpis-controller
# How often the commitments syncer reconciles Limes commitments to Reservation CRDs
# 1h = 3600000000000 nanoseconds
committedResourceSyncInterval: 3600000000000
enabledTasks:
- commitments-sync-task
enabledTasks: []

# Custom configuration for the cortex postgres chart.
cortex-postgres:
Expand Down
4 changes: 2 additions & 2 deletions helm/bundles/cortex-pods/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ apiVersion: v2
name: cortex-pods
description: A Helm chart deploying Cortex for Pods.
type: application
version: 0.0.50
version: 0.0.51
appVersion: 0.1.0
dependencies:
# from: file://../../library/cortex
- name: cortex
repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
version: 0.0.37
version: 0.0.38

# Owner info adds a configmap to the kubernetes cluster with information on
# the service owner. This makes it easier to find out who to contact in case
Expand Down
4 changes: 2 additions & 2 deletions helm/library/cortex/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ apiVersion: v2
name: cortex
description: A Helm chart to distribute cortex.
type: application
version: 0.0.37
appVersion: "sha-665d8dd0"
version: 0.0.38
appVersion: "sha-b3cf6dc8"
icon: "https://example.com/icon.png"
dependencies: []
2 changes: 1 addition & 1 deletion internal/scheduling/lib/filter_weigher_pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ func (p *filterWeigherPipeline[RequestType]) runFilters(
})
// Mutate the request to only include the remaining hosts.
// Assume the resulting request type is the same as the input type.
filteredRequest = filteredRequest.FilterHosts(result.Activations).(RequestType)
filteredRequest = filteredRequest.Filter(result.Activations).(RequestType)
}
return filteredRequest, stepResults
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ type FilterWeigherPipelineRequest interface {
// of the request with only the given hosts remaining. This is helpful
// for steps that filter out hosts. Hosts not included in the map
// are considered as filtered out, and won't be reconsidered in later steps.
FilterHosts(includedHosts map[string]float64) FilterWeigherPipelineRequest
// This function should also update the weights of the remaining hosts
// accordingly, so that the weights map always corresponds to the hosts
// that are currently in the request.
Filter(includedHosts map[string]float64) FilterWeigherPipelineRequest
// Get the weights for the hosts.
GetWeights() map[string]float64
// Get logging args to be used in the step's trace log.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,20 @@ func (m mockFilterWeigherPipelineRequest) GetHosts() []string { retu
func (m mockFilterWeigherPipelineRequest) GetWeights() map[string]float64 { return m.Weights }
func (m mockFilterWeigherPipelineRequest) GetPipeline() string { return m.Pipeline }

func (m mockFilterWeigherPipelineRequest) FilterHosts(hosts map[string]float64) FilterWeigherPipelineRequest {
func (m mockFilterWeigherPipelineRequest) Filter(hosts map[string]float64) FilterWeigherPipelineRequest {
filteredHosts := make([]string, 0, len(hosts))
for host := range hosts {
filteredHosts = append(filteredHosts, host)
}
m.Hosts = filteredHosts
// Also filter the weights map to only include the hosts that are still
// in the request, and update the weights accordingly.
filteredWeights := make(map[string]float64, len(hosts))
for host, weight := range hosts {
if _, exists := hosts[host]; exists {
filteredWeights[host] = weight
}
}
m.Weights = filteredWeights
return m
}
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,16 @@ func TestCommitmentChangeIntegration(t *testing.T) {
ExpectedReservations: []*TestReservation{},
ExpectedAPIResponse: newAPIResponse("1 commitment(s) failed", "commitment long-long-long-long-long-long-long-long-long-long-long-long-long-: unexpected commitment format"),
},
{
Name: "Planned CR is ignored in validation, no scheduling or capacity reservation",
VMs: []*TestVM{},
Flavors: []*TestFlavor{m1Small},
CommitmentRequest: newCommitmentRequest("az-a", false, 1234,
createCommitment("hw_version_hana_1_ram", "project-A", "uuid-new", "planned", 200),
),
ExpectedReservations: []*TestReservation{},
ExpectedAPIResponse: newAPIResponse(),
},
{
Name: "Invalid CR name - spaces",
VMs: []*TestVM{},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,8 @@ func (m *ReservationManager) newReservation(
}

spec := v1alpha1.ReservationSpec{
Type: v1alpha1.ReservationTypeCommittedResource,
Type: v1alpha1.ReservationTypeCommittedResource,
SchedulingDomain: v1alpha1.SchedulingDomainNova,
Resources: map[hv1.ResourceName]resource.Quantity{
hv1.ResourceMemory: *resource.NewQuantity(
memoryBytes,
Expand Down
7 changes: 6 additions & 1 deletion internal/scheduling/reservations/failover/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package failover
import (
"context"
"fmt"
"math/rand/v2"
"path/filepath"
"slices"
"sort"
Expand Down Expand Up @@ -518,7 +519,11 @@ func (c *FailoverReservationController) selectVMsToProcess(
offset := 0
rotationInterval := *c.Config.VMSelectionRotationInterval
if rotationInterval > 0 && c.reconcileCount%int64(rotationInterval) == 0 {
offset = int(c.reconcileCount) % len(vmsMissingFailover)
offset = rand.IntN(len(vmsMissingFailover)) //nolint:gosec // non-cryptographic randomness is fine for VM selection rotation
logger.Info("applying random rotation offset for VM selection",
"offset", offset,
"totalVMs", len(vmsMissingFailover),
"rotationInterval", rotationInterval)
}

selected = make([]vmFailoverNeed, 0, maxToProcess)
Expand Down
Loading