From e7316862c701204cf4910d2deba9f7499bc129cc Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Thu, 25 Sep 2025 09:24:17 +0200 Subject: [PATCH 01/58] Init scaffold computedecision --- decisions/LICENSE | 201 ++++++++++++++ decisions/Makefile | 50 ++++ decisions/PROJECT | 22 ++ decisions/api/LICENSE | 201 ++++++++++++++ decisions/api/go.mod | 27 ++ decisions/api/go.sum | 101 +++++++ .../api/v1alpha1/computedecision_types.go | 68 +++++ decisions/api/v1alpha1/groupversion_info.go | 23 ++ .../api/v1alpha1/zz_generated.deepcopy.go | 166 +++++++++++ decisions/cmd/main.go | 233 ++++++++++++++++ .../decisions.cortex_computedecisions.yaml | 91 ++++++ ...servations.cortex_computereservations.yaml | 118 ++++++++ .../decisions.cortex_computedecisions.yaml | 91 ++++++ decisions/config/crd/kustomization.yaml | 16 ++ decisions/config/crd/kustomizeconfig.yaml | 19 ++ .../default/cert_metrics_manager_patch.yaml | 30 ++ decisions/config/default/kustomization.yaml | 234 ++++++++++++++++ .../config/default/manager_metrics_patch.yaml | 4 + decisions/config/default/metrics_service.yaml | 18 ++ decisions/config/manager/kustomization.yaml | 8 + decisions/config/manager/manager.yaml | 77 ++++++ .../network-policy/allow-metrics-traffic.yaml | 27 ++ .../config/network-policy/kustomization.yaml | 2 + .../config/prometheus/kustomization.yaml | 11 + decisions/config/prometheus/monitor.yaml | 27 ++ .../config/prometheus/monitor_tls_patch.yaml | 19 ++ .../rbac/computereservation_admin_role.yaml | 27 ++ .../rbac/computereservation_editor_role.yaml | 33 +++ .../rbac/computereservation_viewer_role.yaml | 29 ++ decisions/config/rbac/kustomization.yaml | 28 ++ .../config/rbac/leader_election_role.yaml | 40 +++ .../rbac/leader_election_role_binding.yaml | 15 + decisions/config/rbac/metrics_auth_role.yaml | 17 ++ .../rbac/metrics_auth_role_binding.yaml | 12 + .../config/rbac/metrics_reader_role.yaml | 9 + decisions/config/rbac/role.yaml | 32 +++ decisions/config/rbac/role_binding.yaml | 15 + decisions/config/rbac/service_account.yaml | 8 + decisions/dist/chart/.helmignore | 25 ++ decisions/dist/chart/Chart.lock | 6 + decisions/dist/chart/Chart.yaml | 14 + .../dist/chart/charts/owner-info-1.0.0.tgz | Bin 0 -> 2139 bytes decisions/dist/chart/templates/_helpers.tpl | 50 ++++ .../templates/certmanager/certificate.yaml | 36 +++ .../decisions.cortex_computedecisions.yaml | 98 +++++++ .../dist/chart/templates/manager/manager.yaml | 107 ++++++++ .../templates/metrics/metrics-service.yaml | 18 ++ .../network-policy/allow-metrics-traffic.yaml | 28 ++ .../chart/templates/prometheus/monitor.yaml | 40 +++ .../rbac/computereservation_admin_role.yaml | 28 ++ .../rbac/computereservation_editor_role.yaml | 34 +++ .../rbac/computereservation_viewer_role.yaml | 30 ++ .../templates/rbac/leader_election_role.yaml | 42 +++ .../rbac/leader_election_role_binding.yaml | 17 ++ .../templates/rbac/metrics_auth_role.yaml | 21 ++ .../rbac/metrics_auth_role_binding.yaml | 16 ++ .../templates/rbac/metrics_reader_role.yaml | 13 + decisions/dist/chart/templates/rbac/role.yaml | 36 +++ .../chart/templates/rbac/role_binding.yaml | 16 ++ .../chart/templates/rbac/service_account.yaml | 15 + decisions/dist/chart/values.yaml | 129 +++++++++ decisions/go.mod | 105 +++++++ decisions/go.sum | 259 ++++++++++++++++++ decisions/hack/boilerplate.go.txt | 2 + decisions/internal/controller/conf.go | 7 + decisions/internal/controller/controller.go | 58 ++++ 66 files changed, 3399 insertions(+) create mode 100644 decisions/LICENSE create mode 100644 decisions/Makefile create mode 100644 decisions/PROJECT create mode 100644 decisions/api/LICENSE create mode 100644 decisions/api/go.mod create mode 100644 decisions/api/go.sum create mode 100644 decisions/api/v1alpha1/computedecision_types.go create mode 100644 decisions/api/v1alpha1/groupversion_info.go create mode 100644 decisions/api/v1alpha1/zz_generated.deepcopy.go create mode 100644 decisions/cmd/main.go create mode 100644 decisions/config/crd/bases/decisions.cortex_computedecisions.yaml create mode 100644 decisions/config/crd/bases/reservations.cortex_computereservations.yaml create mode 100644 decisions/config/crd/decisions.cortex_computedecisions.yaml create mode 100644 decisions/config/crd/kustomization.yaml create mode 100644 decisions/config/crd/kustomizeconfig.yaml create mode 100644 decisions/config/default/cert_metrics_manager_patch.yaml create mode 100644 decisions/config/default/kustomization.yaml create mode 100644 decisions/config/default/manager_metrics_patch.yaml create mode 100644 decisions/config/default/metrics_service.yaml create mode 100644 decisions/config/manager/kustomization.yaml create mode 100644 decisions/config/manager/manager.yaml create mode 100644 decisions/config/network-policy/allow-metrics-traffic.yaml create mode 100644 decisions/config/network-policy/kustomization.yaml create mode 100644 decisions/config/prometheus/kustomization.yaml create mode 100644 decisions/config/prometheus/monitor.yaml create mode 100644 decisions/config/prometheus/monitor_tls_patch.yaml create mode 100644 decisions/config/rbac/computereservation_admin_role.yaml create mode 100644 decisions/config/rbac/computereservation_editor_role.yaml create mode 100644 decisions/config/rbac/computereservation_viewer_role.yaml create mode 100644 decisions/config/rbac/kustomization.yaml create mode 100644 decisions/config/rbac/leader_election_role.yaml create mode 100644 decisions/config/rbac/leader_election_role_binding.yaml create mode 100644 decisions/config/rbac/metrics_auth_role.yaml create mode 100644 decisions/config/rbac/metrics_auth_role_binding.yaml create mode 100644 decisions/config/rbac/metrics_reader_role.yaml create mode 100644 decisions/config/rbac/role.yaml create mode 100644 decisions/config/rbac/role_binding.yaml create mode 100644 decisions/config/rbac/service_account.yaml create mode 100644 decisions/dist/chart/.helmignore create mode 100644 decisions/dist/chart/Chart.lock create mode 100644 decisions/dist/chart/Chart.yaml create mode 100644 decisions/dist/chart/charts/owner-info-1.0.0.tgz create mode 100644 decisions/dist/chart/templates/_helpers.tpl create mode 100644 decisions/dist/chart/templates/certmanager/certificate.yaml create mode 100644 decisions/dist/chart/templates/crd/decisions.cortex_computedecisions.yaml create mode 100644 decisions/dist/chart/templates/manager/manager.yaml create mode 100644 decisions/dist/chart/templates/metrics/metrics-service.yaml create mode 100644 decisions/dist/chart/templates/network-policy/allow-metrics-traffic.yaml create mode 100644 decisions/dist/chart/templates/prometheus/monitor.yaml create mode 100644 decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml create mode 100644 decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml create mode 100644 decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml create mode 100644 decisions/dist/chart/templates/rbac/leader_election_role.yaml create mode 100644 decisions/dist/chart/templates/rbac/leader_election_role_binding.yaml create mode 100644 decisions/dist/chart/templates/rbac/metrics_auth_role.yaml create mode 100644 decisions/dist/chart/templates/rbac/metrics_auth_role_binding.yaml create mode 100644 decisions/dist/chart/templates/rbac/metrics_reader_role.yaml create mode 100644 decisions/dist/chart/templates/rbac/role.yaml create mode 100644 decisions/dist/chart/templates/rbac/role_binding.yaml create mode 100644 decisions/dist/chart/templates/rbac/service_account.yaml create mode 100644 decisions/dist/chart/values.yaml create mode 100644 decisions/go.mod create mode 100644 decisions/go.sum create mode 100644 decisions/hack/boilerplate.go.txt create mode 100644 decisions/internal/controller/conf.go create mode 100644 decisions/internal/controller/controller.go diff --git a/decisions/LICENSE b/decisions/LICENSE new file mode 100644 index 00000000..06c1fb23 --- /dev/null +++ b/decisions/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2024 SAP SE or an SAP affiliate company and cobaltcore-dev contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/decisions/Makefile b/decisions/Makefile new file mode 100644 index 00000000..313dd550 --- /dev/null +++ b/decisions/Makefile @@ -0,0 +1,50 @@ +.PHONY: all +all: build + +.PHONY: manifests +manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects. + $(CONTROLLER_GEN) rbac:roleName=manager-role crd:allowDangerousTypes=true webhook paths="./..." output:crd:artifacts:config=config/crd/bases + +.PHONY: generate +generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations. + $(CONTROLLER_GEN) crd:allowDangerousTypes=true object:headerFile="hack/boilerplate.go.txt" paths="./..." + +##@ Build + +.PHONY: build +build: manifests generate + +LOCALBIN ?= $(shell pwd)/bin +$(LOCALBIN): + mkdir -p $(LOCALBIN) +CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen + +CONTROLLER_TOOLS_VERSION ?= v0.17.2 + +.PHONY: controller-gen +controller-gen: $(CONTROLLER_GEN) ## Download controller-gen locally if necessary. +$(CONTROLLER_GEN): $(LOCALBIN) + $(call go-install-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen,$(CONTROLLER_TOOLS_VERSION)) + +# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) +ifeq (,$(shell go env GOBIN)) +GOBIN=$(shell go env GOPATH)/bin +else +GOBIN=$(shell go env GOBIN) +endif + +# go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist +# $1 - target path with name of binary +# $2 - package url which can be installed +# $3 - specific version of package +define go-install-tool +@[ -f "$(1)-$(3)" ] || { \ +set -e; \ +package=$(2)@$(3) ;\ +echo "Downloading $${package}" ;\ +rm -f $(1) || true ;\ +GOBIN=$(LOCALBIN) go install $${package} ;\ +mv $(1) $(1)-$(3) ;\ +} ;\ +ln -sf $(1)-$(3) $(1) +endef \ No newline at end of file diff --git a/decisions/PROJECT b/decisions/PROJECT new file mode 100644 index 00000000..40012891 --- /dev/null +++ b/decisions/PROJECT @@ -0,0 +1,22 @@ +# Code generated by tool. DO NOT EDIT. +# This file is used to track the info used to scaffold your project +# and allow the plugins properly work. +# More info: https://book.kubebuilder.io/reference/project-config.html +cliVersion: 4.7.1 +domain: cortex +layout: +- go.kubebuilder.io/v4 +plugins: + helm.kubebuilder.io/v1-alpha: {} +projectName: decisions +repo: github.com/cobaltcore-dev/cortex/decisions +resources: +- api: + crdVersion: v1 + controller: true + domain: cortex + group: decisions + kind: ComputeDecision + path: github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1 + version: v1alpha1 +version: "3" diff --git a/decisions/api/LICENSE b/decisions/api/LICENSE new file mode 100644 index 00000000..06c1fb23 --- /dev/null +++ b/decisions/api/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2024 SAP SE or an SAP affiliate company and cobaltcore-dev contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/decisions/api/go.mod b/decisions/api/go.mod new file mode 100644 index 00000000..db86f9fe --- /dev/null +++ b/decisions/api/go.mod @@ -0,0 +1,27 @@ +module github.com/cobaltcore-dev/cortex/decisions/api + +go 1.25.0 + +require ( + k8s.io/apimachinery v0.34.1 + sigs.k8s.io/controller-runtime v0.22.1 +) + +require ( + github.com/fxamacker/cbor/v2 v2.9.0 // indirect + github.com/go-logr/logr v1.4.2 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect + github.com/x448/float16 v0.8.4 // indirect + go.yaml.in/yaml/v2 v2.4.2 // indirect + golang.org/x/net v0.38.0 // indirect + golang.org/x/text v0.23.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + k8s.io/klog/v2 v2.130.1 // indirect + k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect + sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect + sigs.k8s.io/randfill v1.0.0 // indirect + sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect +) diff --git a/decisions/api/go.sum b/decisions/api/go.sum new file mode 100644 index 00000000..edd5a267 --- /dev/null +++ b/decisions/api/go.sum @@ -0,0 +1,101 @@ +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= +github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo= +github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg= +github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= +github.com/onsi/gomega v1.36.1 h1:bJDPBO7ibjxcbHMgSCoo4Yj18UWbKDlLwX1x9sybDcw= +github.com/onsi/gomega v1.36.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= +github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= +go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= +golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= +golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= +golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= +golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.34.0 h1:L+JtP2wDbEYPUeNGbeSa/5GwFtIA662EmT2YSLOkAVE= +k8s.io/api v0.34.0/go.mod h1:YzgkIzOOlhl9uwWCZNqpw6RJy9L2FK4dlJeayUoydug= +k8s.io/apimachinery v0.34.1 h1:dTlxFls/eikpJxmAC7MVE8oOeP1zryV7iRyIjB0gky4= +k8s.io/apimachinery v0.34.1/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y= +k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV24Eqg= +sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY= +sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= +sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= +sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= +sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE= +sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= +sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= diff --git a/decisions/api/v1alpha1/computedecision_types.go b/decisions/api/v1alpha1/computedecision_types.go new file mode 100644 index 00000000..c282a848 --- /dev/null +++ b/decisions/api/v1alpha1/computedecision_types.go @@ -0,0 +1,68 @@ +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +type ComputeDecisionPipelineOutputSpec struct { + Step string `json:"step"` + Weights map[string]float64 `json:"weights,omitempty"` +} + +type ComputeDecisionPipelineSpec struct { + Name string `json:"name"` + Outputs []ComputeDecisionPipelineOutputSpec `json:"outputs,omitempty"` +} + +// ComputeDecisionSpec defines the desired state of ComputeDecision. +type ComputeDecisionSpec struct { + Pipeline ComputeDecisionPipelineSpec `json:"pipeline"` +} + +type ComputeDecisionFactorStatus struct { + Host string `json:"host"` + Expl string `json:"expl"` +} + +// ComputeDecisionStatus defines the observed state of ComputeDecision. +type ComputeDecisionStatus struct { + Description string `json:"description,omitempty"` + Factors []ComputeDecisionFactorStatus `json:"factors,omitempty"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:resource:scope=Cluster,shortName=cdec + +// ComputeDecision is the Schema for the computedecisions API +type ComputeDecision struct { + metav1.TypeMeta `json:",inline"` + + // metadata is a standard object metadata + // +optional + metav1.ObjectMeta `json:"metadata,omitempty,omitzero"` + + // spec defines the desired state of ComputeDecision + // +required + Spec ComputeDecisionSpec `json:"spec"` + + // status defines the observed state of ComputeDecision + // +optional + Status ComputeDecisionStatus `json:"status,omitempty,omitzero"` +} + +// +kubebuilder:object:root=true + +// ComputeDecisionList contains a list of ComputeDecision +type ComputeDecisionList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []ComputeDecision `json:"items"` +} + +func init() { + SchemeBuilder.Register(&ComputeDecision{}, &ComputeDecisionList{}) +} diff --git a/decisions/api/v1alpha1/groupversion_info.go b/decisions/api/v1alpha1/groupversion_info.go new file mode 100644 index 00000000..8d38e963 --- /dev/null +++ b/decisions/api/v1alpha1/groupversion_info.go @@ -0,0 +1,23 @@ +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 + +// Package v1alpha1 contains API Schema definitions for the decisions v1alpha1 API group. +// +kubebuilder:object:generate=true +// +groupName=decisions.cortex +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/scheme" +) + +var ( + // GroupVersion is group version used to register these objects. + GroupVersion = schema.GroupVersion{Group: "decisions.cortex", Version: "v1alpha1"} + + // SchemeBuilder is used to add go types to the GroupVersionKind scheme. + SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} + + // AddToScheme adds the types in this group-version to the given scheme. + AddToScheme = SchemeBuilder.AddToScheme +) diff --git a/decisions/api/v1alpha1/zz_generated.deepcopy.go b/decisions/api/v1alpha1/zz_generated.deepcopy.go new file mode 100644 index 00000000..763790e0 --- /dev/null +++ b/decisions/api/v1alpha1/zz_generated.deepcopy.go @@ -0,0 +1,166 @@ +//go:build !ignore_autogenerated + +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 + +// Code generated by controller-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ComputeDecision) DeepCopyInto(out *ComputeDecision) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComputeDecision. +func (in *ComputeDecision) DeepCopy() *ComputeDecision { + if in == nil { + return nil + } + out := new(ComputeDecision) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *ComputeDecision) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ComputeDecisionFactorStatus) DeepCopyInto(out *ComputeDecisionFactorStatus) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComputeDecisionFactorStatus. +func (in *ComputeDecisionFactorStatus) DeepCopy() *ComputeDecisionFactorStatus { + if in == nil { + return nil + } + out := new(ComputeDecisionFactorStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ComputeDecisionList) DeepCopyInto(out *ComputeDecisionList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]ComputeDecision, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComputeDecisionList. +func (in *ComputeDecisionList) DeepCopy() *ComputeDecisionList { + if in == nil { + return nil + } + out := new(ComputeDecisionList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *ComputeDecisionList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ComputeDecisionPipelineOutputSpec) DeepCopyInto(out *ComputeDecisionPipelineOutputSpec) { + *out = *in + if in.Weights != nil { + in, out := &in.Weights, &out.Weights + *out = make(map[string]float64, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComputeDecisionPipelineOutputSpec. +func (in *ComputeDecisionPipelineOutputSpec) DeepCopy() *ComputeDecisionPipelineOutputSpec { + if in == nil { + return nil + } + out := new(ComputeDecisionPipelineOutputSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ComputeDecisionPipelineSpec) DeepCopyInto(out *ComputeDecisionPipelineSpec) { + *out = *in + if in.Outputs != nil { + in, out := &in.Outputs, &out.Outputs + *out = make([]ComputeDecisionPipelineOutputSpec, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComputeDecisionPipelineSpec. +func (in *ComputeDecisionPipelineSpec) DeepCopy() *ComputeDecisionPipelineSpec { + if in == nil { + return nil + } + out := new(ComputeDecisionPipelineSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ComputeDecisionSpec) DeepCopyInto(out *ComputeDecisionSpec) { + *out = *in + in.Pipeline.DeepCopyInto(&out.Pipeline) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComputeDecisionSpec. +func (in *ComputeDecisionSpec) DeepCopy() *ComputeDecisionSpec { + if in == nil { + return nil + } + out := new(ComputeDecisionSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ComputeDecisionStatus) DeepCopyInto(out *ComputeDecisionStatus) { + *out = *in + if in.Factors != nil { + in, out := &in.Factors, &out.Factors + *out = make([]ComputeDecisionFactorStatus, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComputeDecisionStatus. +func (in *ComputeDecisionStatus) DeepCopy() *ComputeDecisionStatus { + if in == nil { + return nil + } + out := new(ComputeDecisionStatus) + in.DeepCopyInto(out) + return out +} diff --git a/decisions/cmd/main.go b/decisions/cmd/main.go new file mode 100644 index 00000000..25a4c09f --- /dev/null +++ b/decisions/cmd/main.go @@ -0,0 +1,233 @@ +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "crypto/tls" + "flag" + "os" + "path/filepath" + + // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) + // to ensure that exec-entrypoint and run can make use of them. + _ "k8s.io/client-go/plugin/pkg/client/auth" + + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/certwatcher" + "sigs.k8s.io/controller-runtime/pkg/healthz" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/controller-runtime/pkg/metrics/filters" + metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + "sigs.k8s.io/controller-runtime/pkg/webhook" + + decisionsv1alpha1 "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" + "github.com/cobaltcore-dev/cortex/decisions/internal/controller" + "github.com/cobaltcore-dev/cortex/internal/conf" + // +kubebuilder:scaffold:imports +) + +var ( + scheme = runtime.NewScheme() + setupLog = ctrl.Log.WithName("setup") +) + +func init() { + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + + utilruntime.Must(decisionsv1alpha1.AddToScheme(scheme)) + // +kubebuilder:scaffold:scheme +} + +// nolint:gocyclo +func main() { + var metricsAddr string + var metricsCertPath, metricsCertName, metricsCertKey string + var webhookCertPath, webhookCertName, webhookCertKey string + var enableLeaderElection bool + var probeAddr string + var secureMetrics bool + var enableHTTP2 bool + var tlsOpts []func(*tls.Config) + flag.StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metrics endpoint binds to. "+ + "Use :8443 for HTTPS or :8080 for HTTP, or leave as 0 to disable the metrics service.") + flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") + flag.BoolVar(&enableLeaderElection, "leader-elect", false, + "Enable leader election for controller manager. "+ + "Enabling this will ensure there is only one active controller manager.") + flag.BoolVar(&secureMetrics, "metrics-secure", true, + "If set, the metrics endpoint is served securely via HTTPS. Use --metrics-secure=false to use HTTP instead.") + flag.StringVar(&webhookCertPath, "webhook-cert-path", "", "The directory that contains the webhook certificate.") + flag.StringVar(&webhookCertName, "webhook-cert-name", "tls.crt", "The name of the webhook certificate file.") + flag.StringVar(&webhookCertKey, "webhook-cert-key", "tls.key", "The name of the webhook key file.") + flag.StringVar(&metricsCertPath, "metrics-cert-path", "", + "The directory that contains the metrics server certificate.") + flag.StringVar(&metricsCertName, "metrics-cert-name", "tls.crt", "The name of the metrics server certificate file.") + flag.StringVar(&metricsCertKey, "metrics-cert-key", "tls.key", "The name of the metrics server key file.") + flag.BoolVar(&enableHTTP2, "enable-http2", false, + "If set, HTTP/2 will be enabled for the metrics and webhook servers") + opts := zap.Options{ + Development: true, + } + opts.BindFlags(flag.CommandLine) + flag.Parse() + + ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) + + // if the enable-http2 flag is false (the default), http/2 should be disabled + // due to its vulnerabilities. More specifically, disabling http/2 will + // prevent from being vulnerable to the HTTP/2 Stream Cancellation and + // Rapid Reset CVEs. For more information see: + // - https://github.com/advisories/GHSA-qppj-fm5r-hxr3 + // - https://github.com/advisories/GHSA-4374-p667-p6c8 + disableHTTP2 := func(c *tls.Config) { + setupLog.Info("disabling http/2") + c.NextProtos = []string{"http/1.1"} + } + + if !enableHTTP2 { + tlsOpts = append(tlsOpts, disableHTTP2) + } + + // Create watchers for metrics and webhooks certificates + var metricsCertWatcher, webhookCertWatcher *certwatcher.CertWatcher + + // Initial webhook TLS options + webhookTLSOpts := tlsOpts + + if len(webhookCertPath) > 0 { + setupLog.Info("Initializing webhook certificate watcher using provided certificates", + "webhook-cert-path", webhookCertPath, "webhook-cert-name", webhookCertName, "webhook-cert-key", webhookCertKey) + + var err error + webhookCertWatcher, err = certwatcher.New( + filepath.Join(webhookCertPath, webhookCertName), + filepath.Join(webhookCertPath, webhookCertKey), + ) + if err != nil { + setupLog.Error(err, "Failed to initialize webhook certificate watcher") + os.Exit(1) + } + + webhookTLSOpts = append(webhookTLSOpts, func(config *tls.Config) { + config.GetCertificate = webhookCertWatcher.GetCertificate + }) + } + + webhookServer := webhook.NewServer(webhook.Options{ + TLSOpts: webhookTLSOpts, + }) + + // Metrics endpoint is enabled in 'config/default/kustomization.yaml'. The Metrics options configure the server. + // More info: + // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.21.0/pkg/metrics/server + // - https://book.kubebuilder.io/reference/metrics.html + metricsServerOptions := metricsserver.Options{ + BindAddress: metricsAddr, + SecureServing: secureMetrics, + TLSOpts: tlsOpts, + } + + if secureMetrics { + // FilterProvider is used to protect the metrics endpoint with authn/authz. + // These configurations ensure that only authorized users and service accounts + // can access the metrics endpoint. The RBAC are configured in 'config/rbac/kustomization.yaml'. More info: + // https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.21.0/pkg/metrics/filters#WithAuthenticationAndAuthorization + metricsServerOptions.FilterProvider = filters.WithAuthenticationAndAuthorization + } + + // If the certificate is not specified, controller-runtime will automatically + // generate self-signed certificates for the metrics server. While convenient for development and testing, + // this setup is not recommended for production. + // + // If you enable certManager, uncomment the following lines: + // - [METRICS-WITH-CERTS] at config/default/kustomization.yaml to generate and use certificates + // managed by cert-manager for the metrics server. + // - [PROMETHEUS-WITH-CERTS] at config/prometheus/kustomization.yaml for TLS certification. + if len(metricsCertPath) > 0 { + setupLog.Info("Initializing metrics certificate watcher using provided certificates", + "metrics-cert-path", metricsCertPath, "metrics-cert-name", metricsCertName, "metrics-cert-key", metricsCertKey) + + var err error + metricsCertWatcher, err = certwatcher.New( + filepath.Join(metricsCertPath, metricsCertName), + filepath.Join(metricsCertPath, metricsCertKey), + ) + if err != nil { + setupLog.Error(err, "to initialize metrics certificate watcher", "error", err) + os.Exit(1) + } + + metricsServerOptions.TLSOpts = append(metricsServerOptions.TLSOpts, func(config *tls.Config) { + config.GetCertificate = metricsCertWatcher.GetCertificate + }) + } + + mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ + Scheme: scheme, + Metrics: metricsServerOptions, + WebhookServer: webhookServer, + HealthProbeBindAddress: probeAddr, + LeaderElection: enableLeaderElection, + LeaderElectionID: "6fb26449.cortex", + // LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily + // when the Manager ends. This requires the binary to immediately end when the + // Manager is stopped, otherwise, this setting is unsafe. Setting this significantly + // speeds up voluntary leader transitions as the new leader don't have to wait + // LeaseDuration time first. + // + // In the default scaffold provided, the program ends immediately after + // the manager stops, so would be fine to enable this option. However, + // if you are doing or is intended to do any operation such as perform cleanups + // after the manager stops then its usage might be unsafe. + // LeaderElectionReleaseOnCancel: true, + }) + if err != nil { + setupLog.Error(err, "unable to start manager") + os.Exit(1) + } + + if err := (&controller.ComputeDecisionReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Conf: conf.NewConfig[controller.Config](), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "ComputeDecision") + os.Exit(1) + } + // +kubebuilder:scaffold:builder + + if metricsCertWatcher != nil { + setupLog.Info("Adding metrics certificate watcher to manager") + if err := mgr.Add(metricsCertWatcher); err != nil { + setupLog.Error(err, "unable to add metrics certificate watcher to manager") + os.Exit(1) + } + } + + if webhookCertWatcher != nil { + setupLog.Info("Adding webhook certificate watcher to manager") + if err := mgr.Add(webhookCertWatcher); err != nil { + setupLog.Error(err, "unable to add webhook certificate watcher to manager") + os.Exit(1) + } + } + + if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { + setupLog.Error(err, "unable to set up health check") + os.Exit(1) + } + if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + setupLog.Error(err, "unable to set up ready check") + os.Exit(1) + } + + setupLog.Info("starting manager") + if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { + setupLog.Error(err, "problem running manager") + os.Exit(1) + } +} diff --git a/decisions/config/crd/bases/decisions.cortex_computedecisions.yaml b/decisions/config/crd/bases/decisions.cortex_computedecisions.yaml new file mode 100644 index 00000000..fcc4e894 --- /dev/null +++ b/decisions/config/crd/bases/decisions.cortex_computedecisions.yaml @@ -0,0 +1,91 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.2 + name: computedecisions.decisions.cortex +spec: + group: decisions.cortex + names: + kind: ComputeDecision + listKind: ComputeDecisionList + plural: computedecisions + shortNames: + - cdec + singular: computedecision + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: ComputeDecision is the Schema for the computedecisions API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: spec defines the desired state of ComputeDecision + properties: + pipeline: + properties: + name: + type: string + outputs: + items: + properties: + step: + type: string + weights: + additionalProperties: + type: number + type: object + required: + - step + type: object + type: array + required: + - name + type: object + required: + - pipeline + type: object + status: + description: status defines the observed state of ComputeDecision + properties: + description: + type: string + factors: + items: + properties: + expl: + type: string + host: + type: string + required: + - expl + - host + type: object + type: array + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} diff --git a/decisions/config/crd/bases/reservations.cortex_computereservations.yaml b/decisions/config/crd/bases/reservations.cortex_computereservations.yaml new file mode 100644 index 00000000..2dc1e1bd --- /dev/null +++ b/decisions/config/crd/bases/reservations.cortex_computereservations.yaml @@ -0,0 +1,118 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.2 + name: computedecisions.decisions.cortex +spec: + group: decisions.cortex + names: + kind: ComputeDecision + listKind: ComputeDecisionList + plural: computedecisions + shortNames: + - cres + singular: computedecision + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .status.host + name: Host + type: string + - jsonPath: .status.phase + name: Phase + type: string + - jsonPath: .status.error + name: Error + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + description: ComputeDecision is the Schema for the computedecisions + API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: spec defines the desired state of ComputeDecision + properties: + creator: + description: |- + A remark that can be used to identify the creator of the decision. + This can be used to clean up decisions synced from external systems + without touching decisions created manually or by other systems. + type: string + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: Resources requested to reserve for this instance. + type: object + scheduler: + description: Specification of the scheduler that will handle the decision. + properties: + cortexNova: + description: |- + If the type of scheduler is cortex-nova, this field will contain additional + information used by cortex-nova to place the instance. + properties: + domainID: + description: The domain ID to reserve for. + type: string + flavorExtraSpecs: + additionalProperties: + type: string + description: Extra specifications relevant for initial placement + of the instance. + type: object + flavorName: + description: The flavor name of the instance to reserve. + type: string + projectID: + description: The project ID to reserve for. + type: string + type: object + type: object + type: object + status: + description: status defines the observed state of ComputeDecision + properties: + error: + description: An error explaining why the decision is failed, if + applicable. + type: string + host: + description: The name of the compute host that was allocated. + type: string + phase: + description: The current phase of the decision. + type: string + required: + - host + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} diff --git a/decisions/config/crd/decisions.cortex_computedecisions.yaml b/decisions/config/crd/decisions.cortex_computedecisions.yaml new file mode 100644 index 00000000..fcc4e894 --- /dev/null +++ b/decisions/config/crd/decisions.cortex_computedecisions.yaml @@ -0,0 +1,91 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.2 + name: computedecisions.decisions.cortex +spec: + group: decisions.cortex + names: + kind: ComputeDecision + listKind: ComputeDecisionList + plural: computedecisions + shortNames: + - cdec + singular: computedecision + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: ComputeDecision is the Schema for the computedecisions API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: spec defines the desired state of ComputeDecision + properties: + pipeline: + properties: + name: + type: string + outputs: + items: + properties: + step: + type: string + weights: + additionalProperties: + type: number + type: object + required: + - step + type: object + type: array + required: + - name + type: object + required: + - pipeline + type: object + status: + description: status defines the observed state of ComputeDecision + properties: + description: + type: string + factors: + items: + properties: + expl: + type: string + host: + type: string + required: + - expl + - host + type: object + type: array + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} diff --git a/decisions/config/crd/kustomization.yaml b/decisions/config/crd/kustomization.yaml new file mode 100644 index 00000000..8c62eca2 --- /dev/null +++ b/decisions/config/crd/kustomization.yaml @@ -0,0 +1,16 @@ +# This kustomization.yaml is not intended to be run by itself, +# since it depends on service name and namespace that are out of this kustomize package. +# It should be run by config/default +resources: +- bases/decisions.cortex_computedecisions.yaml +# +kubebuilder:scaffold:crdkustomizeresource + +patches: +# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix. +# patches here are for enabling the conversion webhook for each CRD +# +kubebuilder:scaffold:crdkustomizewebhookpatch + +# [WEBHOOK] To enable webhook, uncomment the following section +# the following config is for teaching kustomize how to do kustomization for CRDs. +#configurations: +#- kustomizeconfig.yaml diff --git a/decisions/config/crd/kustomizeconfig.yaml b/decisions/config/crd/kustomizeconfig.yaml new file mode 100644 index 00000000..ec5c150a --- /dev/null +++ b/decisions/config/crd/kustomizeconfig.yaml @@ -0,0 +1,19 @@ +# This file is for teaching kustomize how to substitute name and namespace reference in CRD +nameReference: +- kind: Service + version: v1 + fieldSpecs: + - kind: CustomResourceDefinition + version: v1 + group: apiextensions.k8s.io + path: spec/conversion/webhook/clientConfig/service/name + +namespace: +- kind: CustomResourceDefinition + version: v1 + group: apiextensions.k8s.io + path: spec/conversion/webhook/clientConfig/service/namespace + create: false + +varReference: +- path: metadata/annotations diff --git a/decisions/config/default/cert_metrics_manager_patch.yaml b/decisions/config/default/cert_metrics_manager_patch.yaml new file mode 100644 index 00000000..d9750155 --- /dev/null +++ b/decisions/config/default/cert_metrics_manager_patch.yaml @@ -0,0 +1,30 @@ +# This patch adds the args, volumes, and ports to allow the manager to use the metrics-server certs. + +# Add the volumeMount for the metrics-server certs +- op: add + path: /spec/template/spec/containers/0/volumeMounts/- + value: + mountPath: /tmp/k8s-metrics-server/metrics-certs + name: metrics-certs + readOnly: true + +# Add the --metrics-cert-path argument for the metrics server +- op: add + path: /spec/template/spec/containers/0/args/- + value: --metrics-cert-path=/tmp/k8s-metrics-server/metrics-certs + +# Add the metrics-server certs volume configuration +- op: add + path: /spec/template/spec/volumes/- + value: + name: metrics-certs + secret: + secretName: metrics-server-cert + optional: false + items: + - key: ca.crt + path: ca.crt + - key: tls.crt + path: tls.crt + - key: tls.key + path: tls.key diff --git a/decisions/config/default/kustomization.yaml b/decisions/config/default/kustomization.yaml new file mode 100644 index 00000000..35afcf3b --- /dev/null +++ b/decisions/config/default/kustomization.yaml @@ -0,0 +1,234 @@ +# Adds namespace to all resources. +namespace: cortex-decisions + +# Value of this field is prepended to the +# names of all resources, e.g. a deployment named +# "wordpress" becomes "alices-wordpress". +# Note that it should also match with the prefix (text before '-') of the namespace +# field above. +namePrefix: cortex-decisions- + +# Labels to add to all resources and selectors. +#labels: +#- includeSelectors: true +# pairs: +# someName: someValue + +resources: +- ../crd +- ../rbac +- ../manager +# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in +# crd/kustomization.yaml +#- ../webhook +# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required. +#- ../certmanager +# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. +#- ../prometheus +# [METRICS] Expose the controller manager metrics service. +- metrics_service.yaml +# [NETWORK POLICY] Protect the /metrics endpoint and Webhook Server with NetworkPolicy. +# Only Pod(s) running a namespace labeled with 'metrics: enabled' will be able to gather the metrics. +# Only CR(s) which requires webhooks and are applied on namespaces labeled with 'webhooks: enabled' will +# be able to communicate with the Webhook Server. +#- ../network-policy + +# Uncomment the patches line if you enable Metrics +patches: +# [METRICS] The following patch will enable the metrics endpoint using HTTPS and the port :8443. +# More info: https://book.kubebuilder.io/reference/metrics +- path: manager_metrics_patch.yaml + target: + kind: Deployment + +# Uncomment the patches line if you enable Metrics and CertManager +# [METRICS-WITH-CERTS] To enable metrics protected with certManager, uncomment the following line. +# This patch will protect the metrics with certManager self-signed certs. +#- path: cert_metrics_manager_patch.yaml +# target: +# kind: Deployment + +# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in +# crd/kustomization.yaml +#- path: manager_webhook_patch.yaml +# target: +# kind: Deployment + +# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix. +# Uncomment the following replacements to add the cert-manager CA injection annotations +#replacements: +# - source: # Uncomment the following block to enable certificates for metrics +# kind: Service +# version: v1 +# name: controller-manager-metrics-service +# fieldPath: metadata.name +# targets: +# - select: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: metrics-certs +# fieldPaths: +# - spec.dnsNames.0 +# - spec.dnsNames.1 +# options: +# delimiter: '.' +# index: 0 +# create: true +# - select: # Uncomment the following to set the Service name for TLS config in Prometheus ServiceMonitor +# kind: ServiceMonitor +# group: monitoring.coreos.com +# version: v1 +# name: controller-manager-metrics-monitor +# fieldPaths: +# - spec.endpoints.0.tlsConfig.serverName +# options: +# delimiter: '.' +# index: 0 +# create: true + +# - source: +# kind: Service +# version: v1 +# name: controller-manager-metrics-service +# fieldPath: metadata.namespace +# targets: +# - select: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: metrics-certs +# fieldPaths: +# - spec.dnsNames.0 +# - spec.dnsNames.1 +# options: +# delimiter: '.' +# index: 1 +# create: true +# - select: # Uncomment the following to set the Service namespace for TLS in Prometheus ServiceMonitor +# kind: ServiceMonitor +# group: monitoring.coreos.com +# version: v1 +# name: controller-manager-metrics-monitor +# fieldPaths: +# - spec.endpoints.0.tlsConfig.serverName +# options: +# delimiter: '.' +# index: 1 +# create: true + +# - source: # Uncomment the following block if you have any webhook +# kind: Service +# version: v1 +# name: webhook-service +# fieldPath: .metadata.name # Name of the service +# targets: +# - select: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert +# fieldPaths: +# - .spec.dnsNames.0 +# - .spec.dnsNames.1 +# options: +# delimiter: '.' +# index: 0 +# create: true +# - source: +# kind: Service +# version: v1 +# name: webhook-service +# fieldPath: .metadata.namespace # Namespace of the service +# targets: +# - select: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert +# fieldPaths: +# - .spec.dnsNames.0 +# - .spec.dnsNames.1 +# options: +# delimiter: '.' +# index: 1 +# create: true + +# - source: # Uncomment the following block if you have a ValidatingWebhook (--programmatic-validation) +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert # This name should match the one in certificate.yaml +# fieldPath: .metadata.namespace # Namespace of the certificate CR +# targets: +# - select: +# kind: ValidatingWebhookConfiguration +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 0 +# create: true +# - source: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert +# fieldPath: .metadata.name +# targets: +# - select: +# kind: ValidatingWebhookConfiguration +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 1 +# create: true + +# - source: # Uncomment the following block if you have a DefaultingWebhook (--defaulting ) +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert +# fieldPath: .metadata.namespace # Namespace of the certificate CR +# targets: +# - select: +# kind: MutatingWebhookConfiguration +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 0 +# create: true +# - source: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert +# fieldPath: .metadata.name +# targets: +# - select: +# kind: MutatingWebhookConfiguration +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 1 +# create: true + +# - source: # Uncomment the following block if you have a ConversionWebhook (--conversion) +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert +# fieldPath: .metadata.namespace # Namespace of the certificate CR +# targets: # Do not remove or uncomment the following scaffold marker; required to generate code for target CRD. +# +kubebuilder:scaffold:crdkustomizecainjectionns +# - source: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert +# fieldPath: .metadata.name +# targets: # Do not remove or uncomment the following scaffold marker; required to generate code for target CRD. +# +kubebuilder:scaffold:crdkustomizecainjectionname diff --git a/decisions/config/default/manager_metrics_patch.yaml b/decisions/config/default/manager_metrics_patch.yaml new file mode 100644 index 00000000..2aaef653 --- /dev/null +++ b/decisions/config/default/manager_metrics_patch.yaml @@ -0,0 +1,4 @@ +# This patch adds the args to allow exposing the metrics endpoint using HTTPS +- op: add + path: /spec/template/spec/containers/0/args/0 + value: --metrics-bind-address=:8443 diff --git a/decisions/config/default/metrics_service.yaml b/decisions/config/default/metrics_service.yaml new file mode 100644 index 00000000..cd559a2e --- /dev/null +++ b/decisions/config/default/metrics_service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + control-plane: controller-manager + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize + name: controller-manager-metrics-service + namespace: system +spec: + ports: + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + control-plane: controller-manager + app.kubernetes.io/name: decisions diff --git a/decisions/config/manager/kustomization.yaml b/decisions/config/manager/kustomization.yaml new file mode 100644 index 00000000..0df5546d --- /dev/null +++ b/decisions/config/manager/kustomization.yaml @@ -0,0 +1,8 @@ +resources: +- manager.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +images: +- name: controller + newName: example.com/decisions + newTag: v0.0.1 diff --git a/decisions/config/manager/manager.yaml b/decisions/config/manager/manager.yaml new file mode 100644 index 00000000..91f9f3a4 --- /dev/null +++ b/decisions/config/manager/manager.yaml @@ -0,0 +1,77 @@ +apiVersion: v1 +kind: Namespace +metadata: + labels: + control-plane: controller-manager + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize + name: system +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: controller-manager + namespace: system + labels: + control-plane: controller-manager + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize +spec: + selector: + matchLabels: + control-plane: controller-manager + app.kubernetes.io/name: decisions + replicas: 1 + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: manager + labels: + control-plane: controller-manager + app.kubernetes.io/name: decisions + spec: + securityContext: + # Projects are configured by default to adhere to the "restricted" Pod Security Standards. + # This ensures that deployments meet the highest security requirements for Kubernetes. + # For more details, see: https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + containers: + - command: + - /manager + args: + - --leader-elect + - --health-probe-bind-address=:8081 + image: controller:latest + name: manager + ports: [] + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + capabilities: + drop: + - "ALL" + livenessProbe: + httpGet: + path: /healthz + port: 8081 + initialDelaySeconds: 15 + periodSeconds: 20 + readinessProbe: + httpGet: + path: /readyz + port: 8081 + initialDelaySeconds: 5 + periodSeconds: 10 + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 10m + memory: 64Mi + volumeMounts: [] + volumes: [] + serviceAccountName: controller-manager + terminationGracePeriodSeconds: 10 diff --git a/decisions/config/network-policy/allow-metrics-traffic.yaml b/decisions/config/network-policy/allow-metrics-traffic.yaml new file mode 100644 index 00000000..da847f1b --- /dev/null +++ b/decisions/config/network-policy/allow-metrics-traffic.yaml @@ -0,0 +1,27 @@ +# This NetworkPolicy allows ingress traffic +# with Pods running on namespaces labeled with 'metrics: enabled'. Only Pods on those +# namespaces are able to gather data from the metrics endpoint. +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + labels: + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize + name: allow-metrics-traffic + namespace: system +spec: + podSelector: + matchLabels: + control-plane: controller-manager + app.kubernetes.io/name: decisions + policyTypes: + - Ingress + ingress: + # This allows ingress traffic from any namespace with the label metrics: enabled + - from: + - namespaceSelector: + matchLabels: + metrics: enabled # Only from namespaces with this label + ports: + - port: 8443 + protocol: TCP diff --git a/decisions/config/network-policy/kustomization.yaml b/decisions/config/network-policy/kustomization.yaml new file mode 100644 index 00000000..ec0fb5e5 --- /dev/null +++ b/decisions/config/network-policy/kustomization.yaml @@ -0,0 +1,2 @@ +resources: +- allow-metrics-traffic.yaml diff --git a/decisions/config/prometheus/kustomization.yaml b/decisions/config/prometheus/kustomization.yaml new file mode 100644 index 00000000..fdc5481b --- /dev/null +++ b/decisions/config/prometheus/kustomization.yaml @@ -0,0 +1,11 @@ +resources: +- monitor.yaml + +# [PROMETHEUS-WITH-CERTS] The following patch configures the ServiceMonitor in ../prometheus +# to securely reference certificates created and managed by cert-manager. +# Additionally, ensure that you uncomment the [METRICS WITH CERTMANAGER] patch under config/default/kustomization.yaml +# to mount the "metrics-server-cert" secret in the Manager Deployment. +#patches: +# - path: monitor_tls_patch.yaml +# target: +# kind: ServiceMonitor diff --git a/decisions/config/prometheus/monitor.yaml b/decisions/config/prometheus/monitor.yaml new file mode 100644 index 00000000..bf0a107e --- /dev/null +++ b/decisions/config/prometheus/monitor.yaml @@ -0,0 +1,27 @@ +# Prometheus Monitor Service (Metrics) +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + control-plane: controller-manager + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize + name: controller-manager-metrics-monitor + namespace: system +spec: + endpoints: + - path: /metrics + port: https # Ensure this is the name of the port that exposes HTTPS metrics + scheme: https + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + tlsConfig: + # The option insecureSkipVerify: true is not recommended for production since it disables + # certificate verification, exposing the system to potential man-in-the-middle attacks. + # For production environments, it is recommended to use cert-manager for automatic TLS certificate management. + # To apply this configuration, enable cert-manager and use the patch located at config/prometheus/servicemonitor_tls_patch.yaml, + # which securely references the certificate from the 'metrics-server-cert' secret. + insecureSkipVerify: true + selector: + matchLabels: + control-plane: controller-manager + app.kubernetes.io/name: decisions diff --git a/decisions/config/prometheus/monitor_tls_patch.yaml b/decisions/config/prometheus/monitor_tls_patch.yaml new file mode 100644 index 00000000..5bf84ce0 --- /dev/null +++ b/decisions/config/prometheus/monitor_tls_patch.yaml @@ -0,0 +1,19 @@ +# Patch for Prometheus ServiceMonitor to enable secure TLS configuration +# using certificates managed by cert-manager +- op: replace + path: /spec/endpoints/0/tlsConfig + value: + # SERVICE_NAME and SERVICE_NAMESPACE will be substituted by kustomize + serverName: SERVICE_NAME.SERVICE_NAMESPACE.svc + insecureSkipVerify: false + ca: + secret: + name: metrics-server-cert + key: ca.crt + cert: + secret: + name: metrics-server-cert + key: tls.crt + keySecret: + name: metrics-server-cert + key: tls.key diff --git a/decisions/config/rbac/computereservation_admin_role.yaml b/decisions/config/rbac/computereservation_admin_role.yaml new file mode 100644 index 00000000..3a10d614 --- /dev/null +++ b/decisions/config/rbac/computereservation_admin_role.yaml @@ -0,0 +1,27 @@ +# This rule is not used by the project decisions itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants full permissions ('*') over decisions.cortex. +# This role is intended for users authorized to modify roles and bindings within the cluster, +# enabling them to delegate specific permissions to other users or groups as needed. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize + name: computedecision-admin-role +rules: +- apiGroups: + - decisions.cortex + resources: + - computedecisions + verbs: + - '*' +- apiGroups: + - decisions.cortex + resources: + - computedecisions/status + verbs: + - get diff --git a/decisions/config/rbac/computereservation_editor_role.yaml b/decisions/config/rbac/computereservation_editor_role.yaml new file mode 100644 index 00000000..09f6ccd7 --- /dev/null +++ b/decisions/config/rbac/computereservation_editor_role.yaml @@ -0,0 +1,33 @@ +# This rule is not used by the project decisions itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants permissions to create, update, and delete resources within the decisions.cortex. +# This role is intended for users who need to manage these resources +# but should not control RBAC or manage permissions for others. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize + name: computedecision-editor-role +rules: +- apiGroups: + - decisions.cortex + resources: + - computedecisions + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - decisions.cortex + resources: + - computedecisions/status + verbs: + - get diff --git a/decisions/config/rbac/computereservation_viewer_role.yaml b/decisions/config/rbac/computereservation_viewer_role.yaml new file mode 100644 index 00000000..cbe78ad4 --- /dev/null +++ b/decisions/config/rbac/computereservation_viewer_role.yaml @@ -0,0 +1,29 @@ +# This rule is not used by the project decisions itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants read-only access to decisions.cortex resources. +# This role is intended for users who need visibility into these resources +# without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize + name: computedecision-viewer-role +rules: +- apiGroups: + - decisions.cortex + resources: + - computedecisions + verbs: + - get + - list + - watch +- apiGroups: + - decisions.cortex + resources: + - computedecisions/status + verbs: + - get diff --git a/decisions/config/rbac/kustomization.yaml b/decisions/config/rbac/kustomization.yaml new file mode 100644 index 00000000..d3da9c0e --- /dev/null +++ b/decisions/config/rbac/kustomization.yaml @@ -0,0 +1,28 @@ +resources: +# All RBAC will be applied under this service account in +# the deployment namespace. You may comment out this resource +# if your manager will use a service account that exists at +# runtime. Be sure to update RoleBinding and ClusterRoleBinding +# subjects if changing service account names. +- service_account.yaml +- role.yaml +- role_binding.yaml +- leader_election_role.yaml +- leader_election_role_binding.yaml +# The following RBAC configurations are used to protect +# the metrics endpoint with authn/authz. These configurations +# ensure that only authorized users and service accounts +# can access the metrics endpoint. Comment the following +# permissions if you want to disable this protection. +# More info: https://book.kubebuilder.io/reference/metrics.html +- metrics_auth_role.yaml +- metrics_auth_role_binding.yaml +- metrics_reader_role.yaml +# For each CRD, "Admin", "Editor" and "Viewer" roles are scaffolded by +# default, aiding admins in cluster management. Those roles are +# not used by the decisions itself. You can comment the following lines +# if you do not want those helpers be installed with your Project. +- computedecision_admin_role.yaml +- computedecision_editor_role.yaml +- computedecision_viewer_role.yaml + diff --git a/decisions/config/rbac/leader_election_role.yaml b/decisions/config/rbac/leader_election_role.yaml new file mode 100644 index 00000000..3f1f68b5 --- /dev/null +++ b/decisions/config/rbac/leader_election_role.yaml @@ -0,0 +1,40 @@ +# permissions to do leader election. +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize + name: leader-election-role +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch diff --git a/decisions/config/rbac/leader_election_role_binding.yaml b/decisions/config/rbac/leader_election_role_binding.yaml new file mode 100644 index 00000000..1f6f5652 --- /dev/null +++ b/decisions/config/rbac/leader_election_role_binding.yaml @@ -0,0 +1,15 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize + name: leader-election-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: leader-election-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/decisions/config/rbac/metrics_auth_role.yaml b/decisions/config/rbac/metrics_auth_role.yaml new file mode 100644 index 00000000..32d2e4ec --- /dev/null +++ b/decisions/config/rbac/metrics_auth_role.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: metrics-auth-role +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/decisions/config/rbac/metrics_auth_role_binding.yaml b/decisions/config/rbac/metrics_auth_role_binding.yaml new file mode 100644 index 00000000..e775d67f --- /dev/null +++ b/decisions/config/rbac/metrics_auth_role_binding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: metrics-auth-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: metrics-auth-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/decisions/config/rbac/metrics_reader_role.yaml b/decisions/config/rbac/metrics_reader_role.yaml new file mode 100644 index 00000000..51a75db4 --- /dev/null +++ b/decisions/config/rbac/metrics_reader_role.yaml @@ -0,0 +1,9 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: metrics-reader +rules: +- nonResourceURLs: + - "/metrics" + verbs: + - get diff --git a/decisions/config/rbac/role.yaml b/decisions/config/rbac/role.yaml new file mode 100644 index 00000000..c249a499 --- /dev/null +++ b/decisions/config/rbac/role.yaml @@ -0,0 +1,32 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: manager-role +rules: +- apiGroups: + - decisions.cortex + resources: + - computedecisions + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - decisions.cortex + resources: + - computedecisions/finalizers + verbs: + - update +- apiGroups: + - decisions.cortex + resources: + - computedecisions/status + verbs: + - get + - patch + - update diff --git a/decisions/config/rbac/role_binding.yaml b/decisions/config/rbac/role_binding.yaml new file mode 100644 index 00000000..6a27d9e9 --- /dev/null +++ b/decisions/config/rbac/role_binding.yaml @@ -0,0 +1,15 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize + name: manager-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: manager-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/decisions/config/rbac/service_account.yaml b/decisions/config/rbac/service_account.yaml new file mode 100644 index 00000000..1adb8bd8 --- /dev/null +++ b/decisions/config/rbac/service_account.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/name: decisions + app.kubernetes.io/managed-by: kustomize + name: controller-manager + namespace: system diff --git a/decisions/dist/chart/.helmignore b/decisions/dist/chart/.helmignore new file mode 100644 index 00000000..7d92f7fb --- /dev/null +++ b/decisions/dist/chart/.helmignore @@ -0,0 +1,25 @@ +# Patterns to ignore when building Helm packages. +# Operating system files +.DS_Store + +# Version control directories +.git/ +.gitignore +.bzr/ +.hg/ +.hgignore +.svn/ + +# Backup and temporary files +*.swp +*.tmp +*.bak +*.orig +*~ + +# IDE and editor-related files +.idea/ +.vscode/ + +# Helm chart artifacts +dist/chart/*.tgz diff --git a/decisions/dist/chart/Chart.lock b/decisions/dist/chart/Chart.lock new file mode 100644 index 00000000..db4c5823 --- /dev/null +++ b/decisions/dist/chart/Chart.lock @@ -0,0 +1,6 @@ +dependencies: +- name: owner-info + repository: oci://ghcr.io/sapcc/helm-charts + version: 1.0.0 +digest: sha256:7643f231cc4ebda347fd12ec62fe4445c280e2b71d27eec555f3025290f5038f +generated: "2025-08-26T10:55:05.888651+02:00" diff --git a/decisions/dist/chart/Chart.yaml b/decisions/dist/chart/Chart.yaml new file mode 100644 index 00000000..caab06fb --- /dev/null +++ b/decisions/dist/chart/Chart.yaml @@ -0,0 +1,14 @@ +apiVersion: v2 +name: cortex-decisions +description: A Helm chart to distribute the cortex decisions operator. +type: application +version: 0.2.0 +appVersion: "latest" +icon: "https://example.com/icon.png" +dependencies: + # Owner info adds a configmap to the kubernetes cluster with information on + # the service owner. This makes it easier to find out who to contact in case + # of issues. See: https://github.com/sapcc/helm-charts/pkgs/container/helm-charts%2Fowner-info + - name: owner-info + repository: oci://ghcr.io/sapcc/helm-charts + version: 1.0.0 diff --git a/decisions/dist/chart/charts/owner-info-1.0.0.tgz b/decisions/dist/chart/charts/owner-info-1.0.0.tgz new file mode 100644 index 0000000000000000000000000000000000000000..2032ead97387e7374750a71b0ffc726645bc5b7e GIT binary patch literal 2139 zcmV-h2&DHPiwG0|00000|0w_~VMtOiV@ORlOnEsqVl!4SWK%V1T2nbTPgYhoO;>Dc zVQyr3R8em|NM&qo0PI=abKAHP_cQ;B9jP<6Rq;bUN15v7CXH{JwC5({`qGC?Is(b1 zh%*Q<04Rl(?tkw9ASH^j>$>MlE*ay+qy_93_T#rpvEnFw$}N{~Mh>e|9uEpH9!BXVd8%l8IUYftP}3ba6>a&^ghN2(Ag`3hXu#i%LUc9O+oO z^MMEOS_4&3AmJLcfEb}tm;y2MQY?|D$del48idYKIsz*2I4Q6zhPP}*odi)3d0*Oi z-@xj`|B|o@)o+jh?uq}&B%1c)|Jmdz{y)na!XQUh_%`JHO0oeImC`0g35QqL*Kbe$ zaSA_O{Rl2K#FQ^!m7_!x$rLl>?sn6i}*c-y2Ak^j! zlxkpd1K}WS)*%cSRT@MFCi<0mAdEqYqGSlGTtIua$2fr3nKy)@Vw#$*&549?$dQ$> zqB;kvRD}v8E{IHxDu#wVK86gDK_<~^MtQ=j6itpQ2(O(m-q5_)q5?*5Fz`BB%;5b8 z&uFHqvJ_JL3n{A7>L&4HMJ1+?2?|k+8xW+D)J+Uoh;GMy{I9Vn8PTZ1#MbzNl#f#k?#ch(olhsd{6C#M;lIzb)@vVV1}BuK_z8k{ z76TmCgpJ8i`00}9B!|Jja;;1C{VWX4FI?zT@PGNm12EJwLKzGiO2N**f_<765mKdW zCX$8-kZ!#hSdxYy@^t%ri>SSX+mrM?@&6t9fAT;4@edD?b&qII)DGlb5=K*7KS-i& zExSwTmPeB{?-Xoo*d$dD)u z{!R)6D{4Bm0f|HjPGVL= zRr*@^?vxYHR3mMv+k$WT?X2N%yMy%_K2|~_1g@uTv@^nbtzvy#vR=czG2n=9uZ3pT zZBOtcL~z28w`YS6FgQJR2llYSdJR7d%3&~u!EJ|N1Da}vhdj+C8Z`iVwf|85gTOcx zzwedyw{2lJmwn*2cgW-W{=*z=2>(RjXxzizXj^ic2Ct< z;u;7Pl_nO;tT4`nxG~7o;up)nHIpL#853lPDV8vLi)?8hxknU4N>ptAqcpet2P@3@6Crof0H>v%;8KHMDBlJe2mt=`V&qgD7!%ebO2=kp*bK7^Dysky* zGh8BTL_47Y2D=G>Q?C})u|&@aN%`V5fY+Jr`I`b{s1aW8graUrQHx3*u+XL+nZql} z7%WhG+xf)Stgi32MFtW^R33QVa0ovsvOv#!k*53F^rxm;IM(gdb-tV9&mp4>XP!5o z&&|8nmDyxg9}Y&WCUHwFg`!%>dIln)voO5DvP2eO<)`RRf`o}G4HPNE#6J2a)7zul z!ZWn?+7UihkW^Bdn%B>ZM<8%$+ zI!8VRlDfH6XN28?k3)lX2A>BO+Yz`8G#=YVXcmSGs`Dxi5>bSTlt~iqcB)WI#IPVr zqYPE8G!`L#B1OsYxCaca34Qo0;RqK3o7YS72`(#hydaz(7Q}{_d+nCB_J`&l&cN3> zLM9j!D+8P|iY#>)g_I={*&PDy`JKWtLd+o~#@56AVsJa*FB1e_y~ zBBM*R6fWMpc?(M_m;>EoyV>4{D{3b47OFLvBeM0J7oe)x6bMB#npg_17+Iah&`fd1 zZBxf|$VhUnu5rYYT z3BIt7EOyb*c55fG&^e@3Vxn1Xc~RcJo;qtbS&VX+2W`p^Tvzsf;Hp))-!FVFq%4iZQ~{~9D2S#BVXTG{WhP|d;%8TlEOno9qp=+)6{Xx*Ciqm=QfZ>?*7T2Jd~J*}^Q R{U-nb|Nj_?q>KP4004X22~_|9 literal 0 HcmV?d00001 diff --git a/decisions/dist/chart/templates/_helpers.tpl b/decisions/dist/chart/templates/_helpers.tpl new file mode 100644 index 00000000..05ce24c6 --- /dev/null +++ b/decisions/dist/chart/templates/_helpers.tpl @@ -0,0 +1,50 @@ +{{- define "chart.name" -}} +{{- if .Chart }} + {{- if .Chart.Name }} + {{- .Chart.Name | trunc 63 | trimSuffix "-" }} + {{- else if .Values.nameOverride }} + {{ .Values.nameOverride | trunc 63 | trimSuffix "-" }} + {{- else }} + decisions + {{- end }} +{{- else }} + decisions +{{- end }} +{{- end }} + + +{{- define "chart.labels" -}} +{{- if .Chart.AppVersion -}} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +{{- if .Chart.Version }} +helm.sh/chart: {{ .Chart.Version | quote }} +{{- end }} +app.kubernetes.io/name: {{ include "chart.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + + +{{- define "chart.selectorLabels" -}} +app.kubernetes.io/name: {{ include "chart.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + + +{{- define "chart.hasMutatingWebhooks" -}} +{{- $hasMutating := false }} +{{- range . }} + {{- if eq .type "mutating" }} + $hasMutating = true }}{{- end }} +{{- end }} +{{ $hasMutating }}}}{{- end }} + + +{{- define "chart.hasValidatingWebhooks" -}} +{{- $hasValidating := false }} +{{- range . }} + {{- if eq .type "validating" }} + $hasValidating = true }}{{- end }} +{{- end }} +{{ $hasValidating }}}}{{- end }} diff --git a/decisions/dist/chart/templates/certmanager/certificate.yaml b/decisions/dist/chart/templates/certmanager/certificate.yaml new file mode 100644 index 00000000..b1b42606 --- /dev/null +++ b/decisions/dist/chart/templates/certmanager/certificate.yaml @@ -0,0 +1,36 @@ +{{- if .Values.certmanager.enable }} +# Self-signed Issuer +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: selfsigned-issuer + namespace: {{ .Release.Namespace }} +spec: + selfSigned: {} +{{- if .Values.metrics.enable }} +--- +# Certificate for the metrics +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + annotations: + {{- if .Values.crd.keep }} + "helm.sh/resource-policy": keep + {{- end }} + labels: + {{- include "chart.labels" . | nindent 4 }} + name: metrics-certs + namespace: {{ .Release.Namespace }} +spec: + dnsNames: + - decisions.{{ .Release.Namespace }}.svc + - decisions.{{ .Release.Namespace }}.svc.cluster.local + - decisions-metrics-service.{{ .Release.Namespace }}.svc + issuerRef: + kind: Issuer + name: selfsigned-issuer + secretName: metrics-server-cert +{{- end }} +{{- end }} diff --git a/decisions/dist/chart/templates/crd/decisions.cortex_computedecisions.yaml b/decisions/dist/chart/templates/crd/decisions.cortex_computedecisions.yaml new file mode 100644 index 00000000..145e45f4 --- /dev/null +++ b/decisions/dist/chart/templates/crd/decisions.cortex_computedecisions.yaml @@ -0,0 +1,98 @@ +{{- if .Values.crd.enable }} +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + annotations: + {{- if .Values.crd.keep }} + "helm.sh/resource-policy": keep + {{- end }} + controller-gen.kubebuilder.io/version: v0.17.2 + name: computedecisions.decisions.cortex +spec: + group: decisions.cortex + names: + kind: ComputeDecision + listKind: ComputeDecisionList + plural: computedecisions + shortNames: + - cdec + singular: computedecision + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: ComputeDecision is the Schema for the computedecisions API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: spec defines the desired state of ComputeDecision + properties: + pipeline: + properties: + name: + type: string + outputs: + items: + properties: + step: + type: string + weights: + additionalProperties: + type: number + type: object + required: + - step + type: object + type: array + required: + - name + type: object + required: + - pipeline + type: object + status: + description: status defines the observed state of ComputeDecision + properties: + description: + type: string + factors: + items: + properties: + expl: + type: string + host: + type: string + required: + - expl + - host + type: object + type: array + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} +{{- end -}} diff --git a/decisions/dist/chart/templates/manager/manager.yaml b/decisions/dist/chart/templates/manager/manager.yaml new file mode 100644 index 00000000..060d2e6f --- /dev/null +++ b/decisions/dist/chart/templates/manager/manager.yaml @@ -0,0 +1,107 @@ +# This file is safe from kubebuilder edit --plugins=helm/v1-alpha +# If you want to re-generate, add the --force flag. + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: decisions-controller-manager + namespace: {{ .Release.Namespace }} + labels: + {{- include "chart.labels" . | nindent 4 }} + control-plane: controller-manager +spec: + replicas: {{ .Values.controllerManager.replicas }} + selector: + matchLabels: + {{- include "chart.selectorLabels" . | nindent 6 }} + control-plane: controller-manager + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: manager + labels: + {{- include "chart.labels" . | nindent 8 }} + control-plane: controller-manager + {{- if and .Values.controllerManager.pod .Values.controllerManager.pod.labels }} + {{- range $key, $value := .Values.controllerManager.pod.labels }} + {{ $key }}: {{ $value }} + {{- end }} + {{- end }} + spec: + containers: + - name: manager + args: + {{- range .Values.controllerManager.container.args }} + - {{ . }} + {{- end }} + command: + - /manager + image: {{ .Values.controllerManager.container.image.repository }}:{{ .Values.controllerManager.container.image.tag | default .Chart.AppVersion }} + {{- if .Values.controllerManager.container.env }} + env: + {{- range $key, $value := .Values.controllerManager.container.env }} + - name: {{ $key }} + value: {{ $value }} + {{- end }} + {{- end }} + livenessProbe: + {{- toYaml .Values.controllerManager.container.livenessProbe | nindent 12 }} + readinessProbe: + {{- toYaml .Values.controllerManager.container.readinessProbe | nindent 12 }} + resources: + {{- toYaml .Values.controllerManager.container.resources | nindent 12 }} + securityContext: + {{- toYaml .Values.controllerManager.container.securityContext | nindent 12 }} + volumeMounts: + - name: decisions-controller-manager-config-volume + mountPath: /etc/config + - name: decisions-controller-manager-secrets-volume + mountPath: /etc/secrets + readOnly: true + {{- if and .Values.metrics.enable .Values.certmanager.enable }} + - name: metrics-certs + mountPath: /tmp/k8s-metrics-server/metrics-certs + readOnly: true + {{- end }} + securityContext: + {{- toYaml .Values.controllerManager.securityContext | nindent 8 }} + serviceAccountName: {{ .Values.controllerManager.serviceAccountName }} + terminationGracePeriodSeconds: {{ .Values.controllerManager.terminationGracePeriodSeconds }} + volumes: + # Custom values to configure the controller-manager. + - name: decisions-controller-manager-config-volume + configMap: + name: decisions-controller-manager-config + - name: decisions-controller-manager-secrets-volume + secret: + secretName: decisions-controller-manager-secrets + {{- if and .Values.metrics.enable .Values.certmanager.enable }} + - name: metrics-certs + secret: + secretName: metrics-server-cert + {{- end }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: decisions-controller-manager-config +data: + conf.json: |- + {{- if .Values.decisions.conf }} + {{ toJson .Values.decisions.conf }} + {{- else }} + {} + {{- end }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: decisions-controller-manager-secrets +type: Opaque +data: + secrets.json: |- + {{- if .Values.decisions.secrets }} + {{ toJson .Values.decisions.secrets | b64enc }} + {{- else }} + {{ "{}" | b64enc }} + {{- end }} \ No newline at end of file diff --git a/decisions/dist/chart/templates/metrics/metrics-service.yaml b/decisions/dist/chart/templates/metrics/metrics-service.yaml new file mode 100644 index 00000000..818e728d --- /dev/null +++ b/decisions/dist/chart/templates/metrics/metrics-service.yaml @@ -0,0 +1,18 @@ +{{- if .Values.metrics.enable }} +apiVersion: v1 +kind: Service +metadata: + name: decisions-controller-manager-metrics-service + namespace: {{ .Release.Namespace }} + labels: + {{- include "chart.labels" . | nindent 4 }} + control-plane: controller-manager +spec: + ports: + - port: 8443 + targetPort: 8443 + protocol: TCP + name: https + selector: + control-plane: controller-manager +{{- end }} diff --git a/decisions/dist/chart/templates/network-policy/allow-metrics-traffic.yaml b/decisions/dist/chart/templates/network-policy/allow-metrics-traffic.yaml new file mode 100644 index 00000000..9d54a550 --- /dev/null +++ b/decisions/dist/chart/templates/network-policy/allow-metrics-traffic.yaml @@ -0,0 +1,28 @@ +{{- if .Values.networkPolicy.enable }} +# This NetworkPolicy allows ingress traffic +# with Pods running on namespaces labeled with 'metrics: enabled'. Only Pods on those +# namespaces are able to gather data from the metrics endpoint. +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: allow-metrics-traffic + namespace: {{ .Release.Namespace }} +spec: + podSelector: + matchLabels: + control-plane: controller-manager + app.kubernetes.io/name: decisions + policyTypes: + - Ingress + ingress: + # This allows ingress traffic from any namespace with the label metrics: enabled + - from: + - namespaceSelector: + matchLabels: + metrics: enabled # Only from namespaces with this label + ports: + - port: 8443 + protocol: TCP +{{- end -}} diff --git a/decisions/dist/chart/templates/prometheus/monitor.yaml b/decisions/dist/chart/templates/prometheus/monitor.yaml new file mode 100644 index 00000000..1720ee91 --- /dev/null +++ b/decisions/dist/chart/templates/prometheus/monitor.yaml @@ -0,0 +1,40 @@ +# To integrate with Prometheus. +{{- if .Values.prometheus.enable }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + control-plane: controller-manager + name: decisions-controller-manager-metrics-monitor + namespace: {{ .Release.Namespace }} +spec: + endpoints: + - path: /metrics + port: https + scheme: https + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + tlsConfig: + {{- if .Values.certmanager.enable }} + serverName: decisions-controller-manager-metrics-service.{{ .Release.Namespace }}.svc + # Apply secure TLS configuration with cert-manager + insecureSkipVerify: false + ca: + secret: + name: metrics-server-cert + key: ca.crt + cert: + secret: + name: metrics-server-cert + key: tls.crt + keySecret: + name: metrics-server-cert + key: tls.key + {{- else }} + # Development/Test mode (insecure configuration) + insecureSkipVerify: true + {{- end }} + selector: + matchLabels: + control-plane: controller-manager +{{- end }} diff --git a/decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml b/decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml new file mode 100644 index 00000000..fee77c79 --- /dev/null +++ b/decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml @@ -0,0 +1,28 @@ +{{- if .Values.rbac.enable }} +# This rule is not used by the project decisions itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants full permissions ('*') over decisions.cortex. +# This role is intended for users authorized to modify roles and bindings within the cluster, +# enabling them to delegate specific permissions to other users or groups as needed. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: computedecision-admin-role +rules: +- apiGroups: + - decisions.cortex + resources: + - computedecisions + verbs: + - '*' +- apiGroups: + - decisions.cortex + resources: + - computedecisions/status + verbs: + - get +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml b/decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml new file mode 100644 index 00000000..d435a404 --- /dev/null +++ b/decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml @@ -0,0 +1,34 @@ +{{- if .Values.rbac.enable }} +# This rule is not used by the project decisions itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants permissions to create, update, and delete resources within the decisions.cortex. +# This role is intended for users who need to manage these resources +# but should not control RBAC or manage permissions for others. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: computedecision-editor-role +rules: +- apiGroups: + - decisions.cortex + resources: + - computedecisions + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - decisions.cortex + resources: + - computedecisions/status + verbs: + - get +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml b/decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml new file mode 100644 index 00000000..879ecb27 --- /dev/null +++ b/decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml @@ -0,0 +1,30 @@ +{{- if .Values.rbac.enable }} +# This rule is not used by the project decisions itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants read-only access to decisions.cortex resources. +# This role is intended for users who need visibility into these resources +# without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: computedecision-viewer-role +rules: +- apiGroups: + - decisions.cortex + resources: + - computedecisions + verbs: + - get + - list + - watch +- apiGroups: + - decisions.cortex + resources: + - computedecisions/status + verbs: + - get +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/leader_election_role.yaml b/decisions/dist/chart/templates/rbac/leader_election_role.yaml new file mode 100644 index 00000000..5e5e2ded --- /dev/null +++ b/decisions/dist/chart/templates/rbac/leader_election_role.yaml @@ -0,0 +1,42 @@ +{{- if .Values.rbac.enable }} +# permissions to do leader election. +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + namespace: {{ .Release.Namespace }} + name: decisions-leader-election-role +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/leader_election_role_binding.yaml b/decisions/dist/chart/templates/rbac/leader_election_role_binding.yaml new file mode 100644 index 00000000..a4be63be --- /dev/null +++ b/decisions/dist/chart/templates/rbac/leader_election_role_binding.yaml @@ -0,0 +1,17 @@ +{{- if .Values.rbac.enable }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + namespace: {{ .Release.Namespace }} + name: decisions-leader-election-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: decisions-leader-election-role +subjects: +- kind: ServiceAccount + name: {{ .Values.controllerManager.serviceAccountName }} + namespace: {{ .Release.Namespace }} +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/metrics_auth_role.yaml b/decisions/dist/chart/templates/rbac/metrics_auth_role.yaml new file mode 100644 index 00000000..8ed40055 --- /dev/null +++ b/decisions/dist/chart/templates/rbac/metrics_auth_role.yaml @@ -0,0 +1,21 @@ +{{- if and .Values.rbac.enable .Values.metrics.enable }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: decisions-metrics-auth-role +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/metrics_auth_role_binding.yaml b/decisions/dist/chart/templates/rbac/metrics_auth_role_binding.yaml new file mode 100644 index 00000000..d3ca3c7e --- /dev/null +++ b/decisions/dist/chart/templates/rbac/metrics_auth_role_binding.yaml @@ -0,0 +1,16 @@ +{{- if and .Values.rbac.enable .Values.metrics.enable }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: decisions-metrics-auth-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: decisions-metrics-auth-role +subjects: +- kind: ServiceAccount + name: {{ .Values.controllerManager.serviceAccountName }} + namespace: {{ .Release.Namespace }} +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/metrics_reader_role.yaml b/decisions/dist/chart/templates/rbac/metrics_reader_role.yaml new file mode 100644 index 00000000..81f7da70 --- /dev/null +++ b/decisions/dist/chart/templates/rbac/metrics_reader_role.yaml @@ -0,0 +1,13 @@ +{{- if and .Values.rbac.enable .Values.metrics.enable }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: decisions-metrics-reader +rules: +- nonResourceURLs: + - "/metrics" + verbs: + - get +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/role.yaml b/decisions/dist/chart/templates/rbac/role.yaml new file mode 100644 index 00000000..24e720cf --- /dev/null +++ b/decisions/dist/chart/templates/rbac/role.yaml @@ -0,0 +1,36 @@ +{{- if .Values.rbac.enable }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: decisions-manager-role +rules: +- apiGroups: + - decisions.cortex + resources: + - computedecisions + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - decisions.cortex + resources: + - computedecisions/finalizers + verbs: + - update +- apiGroups: + - decisions.cortex + resources: + - computedecisions/status + verbs: + - get + - patch + - update +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/role_binding.yaml b/decisions/dist/chart/templates/rbac/role_binding.yaml new file mode 100644 index 00000000..09804a2a --- /dev/null +++ b/decisions/dist/chart/templates/rbac/role_binding.yaml @@ -0,0 +1,16 @@ +{{- if .Values.rbac.enable }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: decisions-manager-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: decisions-manager-role +subjects: +- kind: ServiceAccount + name: {{ .Values.controllerManager.serviceAccountName }} + namespace: {{ .Release.Namespace }} +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/service_account.yaml b/decisions/dist/chart/templates/rbac/service_account.yaml new file mode 100644 index 00000000..93e0a323 --- /dev/null +++ b/decisions/dist/chart/templates/rbac/service_account.yaml @@ -0,0 +1,15 @@ +{{- if .Values.rbac.enable }} +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + {{- if and .Values.controllerManager.serviceAccount .Values.controllerManager.serviceAccount.annotations }} + annotations: + {{- range $key, $value := .Values.controllerManager.serviceAccount.annotations }} + {{ $key }}: {{ $value }} + {{- end }} + {{- end }} + name: {{ .Values.controllerManager.serviceAccountName }} + namespace: {{ .Release.Namespace }} +{{- end -}} diff --git a/decisions/dist/chart/values.yaml b/decisions/dist/chart/values.yaml new file mode 100644 index 00000000..df67c7d0 --- /dev/null +++ b/decisions/dist/chart/values.yaml @@ -0,0 +1,129 @@ +# This file is safe from kubebuilder edit --plugins=helm/v1-alpha +# If you want to re-generate, add the --force flag. + +owner-info: + enabled: true + helm-chart-url: "https://github.com/cobaltcore-dev/cortex/decisions/dist/chart" + maintainers: + - "p.matthes@sap.com" + - "markus.wieland@sap.com" + - "arno.uhlig@sap.com" + support-group: "workload-management" + service: "cortex-decisions" + +# [MANAGER]: Manager Deployment Configurations +controllerManager: + replicas: 1 + container: + image: + repository: ghcr.io/cobaltcore-dev/cortex-decisions-operator + args: + - "--leader-elect" + - "--metrics-bind-address=:8443" + - "--health-probe-bind-address=:8081" + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 10m + memory: 64Mi + livenessProbe: + initialDelaySeconds: 15 + periodSeconds: 20 + httpGet: + path: /healthz + port: 8081 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 10 + httpGet: + path: /readyz + port: 8081 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "ALL" + securityContext: + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + terminationGracePeriodSeconds: 10 + serviceAccountName: decisions-controller-manager + +# [RBAC]: To enable RBAC (Permissions) configurations +rbac: + enable: true + +# [CRDs]: To enable the CRDs +crd: + # This option determines whether the CRDs are included + # in the installation process. + enable: true + + # Enabling this option adds the "helm.sh/resource-policy": keep + # annotation to the CRD, ensuring it remains installed even when + # the Helm release is uninstalled. + # NOTE: Removing the CRDs will also remove all cert-manager CR(s) + # (Certificates, Issuers, ...) due to garbage collection. + keep: true + +# [METRICS]: Set to true to generate manifests for exporting metrics. +# To disable metrics export set false, and ensure that the +# ControllerManager argument "--metrics-bind-address=:8443" is removed. +metrics: + enable: true + +# [PROMETHEUS]: To enable a ServiceMonitor to export metrics to Prometheus set true +prometheus: + enable: true + +# [CERT-MANAGER]: To enable cert-manager injection to webhooks set true +certmanager: + enable: false + +# [NETWORK POLICIES]: To enable NetworkPolicies set true +networkPolicy: + enable: false + +# SSO certificate to use. +sharedSSOCert: &sharedSSOCert + # Certificate "public key". (Optional, remove this key if not needed) + cert: | + -----BEGIN CERTIFICATE----- + Your certificate here + -----END CERTIFICATE----- + # Certificate private key. (Optional, remove this key if not needed) + certKey: | + -----BEGIN PRIVATE KEY----- + Your private key here + -----END PRIVATE KEY + # Whether the certificate is self-signed. + # If true, the certificate is not verified. + selfSigned: false + +decisions: + # Default configuration provided through configmap to the operator. + conf: + # Which hypervisor types should be handled by the operator. + hypervisors: + - "QEMU" + - "CH" + # Not supported: + # - "VMware vCenter Server" + # - "ironic" + endpoints: + # The URL of the Nova external scheduler service. + novaExternalScheduler: "http://cortex-nova-scheduler:8080/scheduler/nova/external" + # Config provided here will override the config provided above. + secrets: + # Override the endpoints and credentials to your OpenStack. + keystone: + url: https://path-to-keystone/v3 + sso: *sharedSSOCert + username: openstack-user-with-all-project-read-access + password: openstack-user-password + projectName: openstack-project-of-user + userDomainName: openstack-domain-of-user + projectDomainName: openstack-domain-of-project-scoped-to diff --git a/decisions/go.mod b/decisions/go.mod new file mode 100644 index 00000000..7d25ec49 --- /dev/null +++ b/decisions/go.mod @@ -0,0 +1,105 @@ +module github.com/cobaltcore-dev/cortex/decisions + +go 1.25.0 + +replace ( + github.com/cobaltcore-dev/cortex => ../ + github.com/cobaltcore-dev/cortex/decisions/api => ./api +) + +require ( + github.com/cobaltcore-dev/cortex v0.0.0-00010101000000-000000000000 + github.com/cobaltcore-dev/cortex/decisions/api v0.0.0-00010101000000-000000000000 + k8s.io/apimachinery v0.34.1 + k8s.io/client-go v0.34.1 + sigs.k8s.io/controller-runtime v0.22.1 +) + +require ( + github.com/pmezard/go-difflib v1.0.0 // indirect + go.yaml.in/yaml/v2 v2.4.2 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect + sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect +) + +require ( + cel.dev/expr v0.24.0 // indirect + github.com/antlr4-go/antlr/v4 v4.13.0 // indirect + github.com/beorn7/perks v1.0.1 // indirect + github.com/blang/semver/v4 v4.0.0 // indirect + github.com/cenkalti/backoff/v4 v4.3.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/emicklei/go-restful/v3 v3.12.2 // indirect + github.com/evanphx/json-patch/v5 v5.9.11 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/fsnotify/fsnotify v1.9.0 // indirect + github.com/fxamacker/cbor/v2 v2.9.0 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-logr/zapr v1.3.0 // indirect + github.com/go-openapi/jsonpointer v0.21.0 // indirect + github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-openapi/swag v0.23.0 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/google/btree v1.1.3 // indirect + github.com/google/cel-go v0.26.0 // indirect + github.com/google/gnostic-models v0.7.0 // indirect + github.com/google/go-cmp v0.7.0 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/prometheus/client_golang v1.23.2 // indirect + github.com/prometheus/client_model v0.6.2 // indirect + github.com/prometheus/common v0.66.1 // indirect + github.com/prometheus/procfs v0.17.0 // indirect + github.com/spf13/cobra v1.9.1 // indirect + github.com/spf13/pflag v1.0.6 // indirect + github.com/stoewer/go-strcase v1.3.0 // indirect + github.com/x448/float16 v0.8.4 // indirect + go.opentelemetry.io/auto/sdk v1.1.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 // indirect + go.opentelemetry.io/otel v1.37.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.34.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0 // indirect + go.opentelemetry.io/otel/metric v1.37.0 // indirect + go.opentelemetry.io/otel/sdk v1.35.0 // indirect + go.opentelemetry.io/otel/trace v1.37.0 // indirect + go.opentelemetry.io/proto/otlp v1.5.0 // indirect + go.uber.org/multierr v1.11.0 // indirect + go.uber.org/zap v1.27.0 // indirect + golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect + golang.org/x/net v0.44.0 // indirect + golang.org/x/oauth2 v0.30.0 // indirect + golang.org/x/sync v0.17.0 // indirect + golang.org/x/sys v0.36.0 // indirect + golang.org/x/term v0.35.0 // indirect + golang.org/x/text v0.29.0 // indirect + golang.org/x/time v0.12.0 // indirect + gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb // indirect + google.golang.org/grpc v1.72.1 // indirect + google.golang.org/protobuf v1.36.8 // indirect + gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/api v0.34.1 // indirect + k8s.io/apiextensions-apiserver v0.34.0 // indirect + k8s.io/apiserver v0.34.0 // indirect + k8s.io/component-base v0.34.0 // indirect + k8s.io/klog/v2 v2.130.1 // indirect + k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect + k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect + sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect + sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect + sigs.k8s.io/randfill v1.0.0 // indirect + sigs.k8s.io/yaml v1.6.0 // indirect +) diff --git a/decisions/go.sum b/decisions/go.sum new file mode 100644 index 00000000..bc272d7d --- /dev/null +++ b/decisions/go.sum @@ -0,0 +1,259 @@ +cel.dev/expr v0.24.0 h1:56OvJKSH3hDGL0ml5uSxZmz3/3Pq4tJ+fb1unVLAFcY= +cel.dev/expr v0.24.0/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw= +github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI= +github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= +github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= +github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= +github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU= +github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k= +github.com/evanphx/json-patch v0.5.2/go.mod h1:ZWS5hhDbVDyob71nXKNL0+PWn6ToqBHMikGIFbs31qQ= +github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU= +github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= +github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= +github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= +github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= +github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= +github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= +github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= +github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= +github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= +github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= +github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= +github.com/google/cel-go v0.26.0 h1:DPGjXackMpJWH680oGY4lZhYjIameYmR+/6RBdDGmaI= +github.com/google/cel-go v0.26.0/go.mod h1:A9O8OU9rdvrK5MQyrqfIxo1a0u4g3sF8KB6PUIaryMM= +github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo= +github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo= +github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 h1:5ZPtiqj0JL5oKWmcsq4VMaAW5ukBEgSGXEN89zeH1Jo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3/go.mod h1:ndYquD05frm2vACXE1nsccT4oJzjhw2arTS2cpUD1PI= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg= +github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= +github.com/onsi/gomega v1.36.1 h1:bJDPBO7ibjxcbHMgSCoo4Yj18UWbKDlLwX1x9sybDcw= +github.com/onsi/gomega v1.36.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs= +github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= +github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0= +github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= +github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= +github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= +github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs= +github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= +go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 h1:sbiXRNDSWJOTobXh5HyQKjq6wUC5tNybqjIqDpAY4CU= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0/go.mod h1:69uWxva0WgAA/4bu2Yy70SLDBwZXuQ6PbBpbsa5iZrQ= +go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= +go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.34.0 h1:OeNbIYk/2C15ckl7glBlOBp5+WlYsOElzTNmiPW/x60= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.34.0/go.mod h1:7Bept48yIeqxP2OZ9/AqIpYS94h2or0aB4FypJTc8ZM= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0 h1:tgJ0uaNS4c98WRNUEx5U3aDlrDOI5Rs+1Vifcw4DJ8U= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0/go.mod h1:U7HYyW0zt/a9x5J1Kjs+r1f/d4ZHnYFclhYY2+YbeoE= +go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE= +go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= +go.opentelemetry.io/otel/sdk v1.35.0 h1:iPctf8iprVySXSKJffSS79eOjl9pvxV9ZqOWT0QejKY= +go.opentelemetry.io/otel/sdk v1.35.0/go.mod h1:+ga1bZliga3DxJ3CQGg3updiaAJoNECOgJREo9KHGQg= +go.opentelemetry.io/otel/sdk/metric v1.35.0 h1:1RriWBmCKgkeHEhM7a2uMjMUfP7MsOF5JpUCaEqEI9o= +go.opentelemetry.io/otel/sdk/metric v1.35.0/go.mod h1:is6XYCUMpcKi+ZsOvfluY5YstFnhW0BidkR+gL+qN+w= +go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= +go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= +go.opentelemetry.io/proto/otlp v1.5.0 h1:xJvq7gMzB31/d406fB8U5CBdyQGw4P399D1aQWU/3i4= +go.opentelemetry.io/proto/otlp v1.5.0/go.mod h1:keN8WnHxOy8PG0rQZjJJ5A2ebUoafqWp0eVQ4yIXvJ4= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= +go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8= +golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.44.0 h1:evd8IRDyfNBMBTTY5XRF1vaZlD+EmWx6x8PkhR04H/I= +golang.org/x/net v0.44.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY= +golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= +golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= +golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= +golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ= +golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= +golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= +golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= +golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg= +golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= +gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= +google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb h1:p31xT4yrYrSM/G4Sn2+TNUkVhFCbG9y8itM2S6Th950= +google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb/go.mod h1:jbe3Bkdp+Dh2IrslsFCklNhweNTBgSYanP1UXhJDhKg= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb h1:TLPQVbx1GJ8VKZxz52VAxl1EBgKXXbTiU9Fc5fZeLn4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I= +google.golang.org/grpc v1.72.1 h1:HR03wO6eyZ7lknl75XlxABNVLLFc2PAb6mHlYh756mA= +google.golang.org/grpc v1.72.1/go.mod h1:wH5Aktxcg25y1I3w7H69nHfXdOG3UiadoBtjh3izSDM= +google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= +google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= +gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.34.1 h1:jC+153630BMdlFukegoEL8E/yT7aLyQkIVuwhmwDgJM= +k8s.io/api v0.34.1/go.mod h1:SB80FxFtXn5/gwzCoN6QCtPD7Vbu5w2n1S0J5gFfTYk= +k8s.io/apiextensions-apiserver v0.34.0 h1:B3hiB32jV7BcyKcMU5fDaDxk882YrJ1KU+ZSkA9Qxoc= +k8s.io/apiextensions-apiserver v0.34.0/go.mod h1:hLI4GxE1BDBy9adJKxUxCEHBGZtGfIg98Q+JmTD7+g0= +k8s.io/apimachinery v0.34.1 h1:dTlxFls/eikpJxmAC7MVE8oOeP1zryV7iRyIjB0gky4= +k8s.io/apimachinery v0.34.1/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw= +k8s.io/apiserver v0.34.0 h1:Z51fw1iGMqN7uJ1kEaynf2Aec1Y774PqU+FVWCFV3Jg= +k8s.io/apiserver v0.34.0/go.mod h1:52ti5YhxAvewmmpVRqlASvaqxt0gKJxvCeW7ZrwgazQ= +k8s.io/client-go v0.34.1 h1:ZUPJKgXsnKwVwmKKdPfw4tB58+7/Ik3CrjOEhsiZ7mY= +k8s.io/client-go v0.34.1/go.mod h1:kA8v0FP+tk6sZA0yKLRG67LWjqufAoSHA2xVGKw9Of8= +k8s.io/component-base v0.34.0 h1:bS8Ua3zlJzapklsB1dZgjEJuJEeHjj8yTu1gxE2zQX8= +k8s.io/component-base v0.34.0/go.mod h1:RSCqUdvIjjrEm81epPcjQ/DS+49fADvGSCkIP3IC6vg= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b h1:MloQ9/bdJyIu9lb1PzujOPolHyvO06MXG5TUIj2mNAA= +k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b/go.mod h1:UZ2yyWbFTpuhSbFhv24aGNOdoRdJZgsIObGBUaYVsts= +k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y= +k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 h1:jpcvIRr3GLoUoEKRkHKSmGjxb6lWwrBlJsXc+eUYQHM= +sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw= +sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV24Eqg= +sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY= +sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= +sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= +sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= +sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE= +sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= +sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= diff --git a/decisions/hack/boilerplate.go.txt b/decisions/hack/boilerplate.go.txt new file mode 100644 index 00000000..0fb88f91 --- /dev/null +++ b/decisions/hack/boilerplate.go.txt @@ -0,0 +1,2 @@ +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 \ No newline at end of file diff --git a/decisions/internal/controller/conf.go b/decisions/internal/controller/conf.go new file mode 100644 index 00000000..4cf64100 --- /dev/null +++ b/decisions/internal/controller/conf.go @@ -0,0 +1,7 @@ +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package controller + +// Configuration for the decisions operator. +type Config struct{} diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go new file mode 100644 index 00000000..baac2711 --- /dev/null +++ b/decisions/internal/controller/controller.go @@ -0,0 +1,58 @@ +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package controller + +import ( + "context" + + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + logf "sigs.k8s.io/controller-runtime/pkg/log" + + "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" + decisionsv1alpha1 "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" +) + +// ComputeDecisionReconciler reconciles a ComputeDecision object +type ComputeDecisionReconciler struct { + // Client for the kubernetes API. + client.Client + // Kubernetes scheme to use for the decisions. + Scheme *runtime.Scheme + // Configuration for the controller. + Conf Config +} + +// +kubebuilder:rbac:groups=decisions.cortex,resources=computedecisions,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=decisions.cortex,resources=computedecisions/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=decisions.cortex,resources=computedecisions/finalizers,verbs=update + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +func (r *ComputeDecisionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + _ = logf.FromContext(ctx) + // Fetch the decision object. + var res v1alpha1.ComputeDecision + if err := r.Get(ctx, req.NamespacedName, &res); err != nil { + // Can happen when the resource was just deleted. + return ctrl.Result{}, err + } + + // TODO: Reconciliation logic. + + return ctrl.Result{}, nil // No need to requeue. +} + +// SetupWithManager sets up the controller with the Manager. +func (r *ComputeDecisionReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&decisionsv1alpha1.ComputeDecision{}). + Named("computedecision"). + WithOptions(controller.Options{ + MaxConcurrentReconciles: 1, // Default + }). + Complete(r) +} From 8f710b6ec09308f87c1a5279dd0830a22f56da92 Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Thu, 25 Sep 2025 10:15:34 +0200 Subject: [PATCH 02/58] Remove duplicated --- ...servations.cortex_computereservations.yaml | 118 ------------------ 1 file changed, 118 deletions(-) delete mode 100644 decisions/config/crd/bases/reservations.cortex_computereservations.yaml diff --git a/decisions/config/crd/bases/reservations.cortex_computereservations.yaml b/decisions/config/crd/bases/reservations.cortex_computereservations.yaml deleted file mode 100644 index 2dc1e1bd..00000000 --- a/decisions/config/crd/bases/reservations.cortex_computereservations.yaml +++ /dev/null @@ -1,118 +0,0 @@ ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.17.2 - name: computedecisions.decisions.cortex -spec: - group: decisions.cortex - names: - kind: ComputeDecision - listKind: ComputeDecisionList - plural: computedecisions - shortNames: - - cres - singular: computedecision - scope: Cluster - versions: - - additionalPrinterColumns: - - jsonPath: .status.host - name: Host - type: string - - jsonPath: .status.phase - name: Phase - type: string - - jsonPath: .status.error - name: Error - type: string - name: v1alpha1 - schema: - openAPIV3Schema: - description: ComputeDecision is the Schema for the computedecisions - API - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: spec defines the desired state of ComputeDecision - properties: - creator: - description: |- - A remark that can be used to identify the creator of the decision. - This can be used to clean up decisions synced from external systems - without touching decisions created manually or by other systems. - type: string - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: Resources requested to reserve for this instance. - type: object - scheduler: - description: Specification of the scheduler that will handle the decision. - properties: - cortexNova: - description: |- - If the type of scheduler is cortex-nova, this field will contain additional - information used by cortex-nova to place the instance. - properties: - domainID: - description: The domain ID to reserve for. - type: string - flavorExtraSpecs: - additionalProperties: - type: string - description: Extra specifications relevant for initial placement - of the instance. - type: object - flavorName: - description: The flavor name of the instance to reserve. - type: string - projectID: - description: The project ID to reserve for. - type: string - type: object - type: object - type: object - status: - description: status defines the observed state of ComputeDecision - properties: - error: - description: An error explaining why the decision is failed, if - applicable. - type: string - host: - description: The name of the compute host that was allocated. - type: string - phase: - description: The current phase of the decision. - type: string - required: - - host - type: object - required: - - spec - type: object - served: true - storage: true - subresources: - status: {} From 0692cd8e9009c10b89fe51544576ca6793963a26 Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Thu, 25 Sep 2025 10:33:45 +0200 Subject: [PATCH 03/58] Rename to schedulingdecision --- decisions/PROJECT | 2 +- .../api/v1alpha1/computedecision_types.go | 68 --------------- .../api/v1alpha1/schedulingdecision_types.go | 62 +++++++++++++ .../api/v1alpha1/zz_generated.deepcopy.go | 78 +++++++---------- decisions/cmd/main.go | 4 +- ...decisions.cortex_schedulingdecisions.yaml} | 30 ++----- ...decisions.cortex_schedulingdecisions.yaml} | 30 ++----- .../decisions.cortex_computedecisions.yaml | 22 ++--- .../decisions.cortex_schedulingdecisions.yaml | 86 +++++++++++++++++++ decisions/internal/controller/controller.go | 17 ++-- 10 files changed, 213 insertions(+), 186 deletions(-) delete mode 100644 decisions/api/v1alpha1/computedecision_types.go create mode 100644 decisions/api/v1alpha1/schedulingdecision_types.go rename decisions/config/crd/bases/{decisions.cortex_computedecisions.yaml => decisions.cortex_schedulingdecisions.yaml} (74%) rename decisions/config/crd/{decisions.cortex_computedecisions.yaml => decisions.cortex_schedulingdecisions.yaml} (74%) create mode 100644 decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml diff --git a/decisions/PROJECT b/decisions/PROJECT index 40012891..edd787f2 100644 --- a/decisions/PROJECT +++ b/decisions/PROJECT @@ -16,7 +16,7 @@ resources: controller: true domain: cortex group: decisions - kind: ComputeDecision + kind: SchedulingDecision path: github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1 version: v1alpha1 version: "3" diff --git a/decisions/api/v1alpha1/computedecision_types.go b/decisions/api/v1alpha1/computedecision_types.go deleted file mode 100644 index c282a848..00000000 --- a/decisions/api/v1alpha1/computedecision_types.go +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2025 SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package v1alpha1 - -import ( - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -type ComputeDecisionPipelineOutputSpec struct { - Step string `json:"step"` - Weights map[string]float64 `json:"weights,omitempty"` -} - -type ComputeDecisionPipelineSpec struct { - Name string `json:"name"` - Outputs []ComputeDecisionPipelineOutputSpec `json:"outputs,omitempty"` -} - -// ComputeDecisionSpec defines the desired state of ComputeDecision. -type ComputeDecisionSpec struct { - Pipeline ComputeDecisionPipelineSpec `json:"pipeline"` -} - -type ComputeDecisionFactorStatus struct { - Host string `json:"host"` - Expl string `json:"expl"` -} - -// ComputeDecisionStatus defines the observed state of ComputeDecision. -type ComputeDecisionStatus struct { - Description string `json:"description,omitempty"` - Factors []ComputeDecisionFactorStatus `json:"factors,omitempty"` -} - -// +kubebuilder:object:root=true -// +kubebuilder:subresource:status -// +kubebuilder:resource:scope=Cluster,shortName=cdec - -// ComputeDecision is the Schema for the computedecisions API -type ComputeDecision struct { - metav1.TypeMeta `json:",inline"` - - // metadata is a standard object metadata - // +optional - metav1.ObjectMeta `json:"metadata,omitempty,omitzero"` - - // spec defines the desired state of ComputeDecision - // +required - Spec ComputeDecisionSpec `json:"spec"` - - // status defines the observed state of ComputeDecision - // +optional - Status ComputeDecisionStatus `json:"status,omitempty,omitzero"` -} - -// +kubebuilder:object:root=true - -// ComputeDecisionList contains a list of ComputeDecision -type ComputeDecisionList struct { - metav1.TypeMeta `json:",inline"` - metav1.ListMeta `json:"metadata,omitempty"` - Items []ComputeDecision `json:"items"` -} - -func init() { - SchemeBuilder.Register(&ComputeDecision{}, &ComputeDecisionList{}) -} diff --git a/decisions/api/v1alpha1/schedulingdecision_types.go b/decisions/api/v1alpha1/schedulingdecision_types.go new file mode 100644 index 00000000..1beace65 --- /dev/null +++ b/decisions/api/v1alpha1/schedulingdecision_types.go @@ -0,0 +1,62 @@ +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +type SchedulingDecisionPipelineOutputSpec struct { + Step string `json:"step"` + Weights map[string]float64 `json:"weights,omitempty"` +} + +type SchedulingDecisionPipelineSpec struct { + Name string `json:"name"` + Outputs []SchedulingDecisionPipelineOutputSpec `json:"outputs,omitempty"` +} + +// SchedulingDecisionSpec defines the desired state of SchedulingDecision. +type SchedulingDecisionSpec struct { + Pipeline SchedulingDecisionPipelineSpec `json:"pipeline"` +} + +// SchedulingDecisionStatus defines the observed state of SchedulingDecision. +type SchedulingDecisionStatus struct { + Description string `json:"description,omitempty"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:resource:scope=Cluster,shortName=sdecs + +// SchedulingDecision is the Schema for the computedecisions API +type SchedulingDecision struct { + metav1.TypeMeta `json:",inline"` + + // metadata is a standard object metadata + // +optional + metav1.ObjectMeta `json:"metadata,omitempty,omitzero"` + + // spec defines the desired state of SchedulingDecision + // +required + Spec SchedulingDecisionSpec `json:"spec"` + + // status defines the observed state of SchedulingDecision + // +optional + Status SchedulingDecisionStatus `json:"status,omitempty,omitzero"` +} + +// +kubebuilder:object:root=true + +// SchedulingDecisionList contains a list of SchedulingDecision +type SchedulingDecisionList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []SchedulingDecision `json:"items"` +} + +func init() { + SchemeBuilder.Register(&SchedulingDecision{}, &SchedulingDecisionList{}) +} diff --git a/decisions/api/v1alpha1/zz_generated.deepcopy.go b/decisions/api/v1alpha1/zz_generated.deepcopy.go index 763790e0..5d891098 100644 --- a/decisions/api/v1alpha1/zz_generated.deepcopy.go +++ b/decisions/api/v1alpha1/zz_generated.deepcopy.go @@ -12,26 +12,26 @@ import ( ) // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ComputeDecision) DeepCopyInto(out *ComputeDecision) { +func (in *SchedulingDecision) DeepCopyInto(out *SchedulingDecision) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) in.Spec.DeepCopyInto(&out.Spec) - in.Status.DeepCopyInto(&out.Status) + out.Status = in.Status } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComputeDecision. -func (in *ComputeDecision) DeepCopy() *ComputeDecision { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecision. +func (in *SchedulingDecision) DeepCopy() *SchedulingDecision { if in == nil { return nil } - out := new(ComputeDecision) + out := new(SchedulingDecision) in.DeepCopyInto(out) return out } // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *ComputeDecision) DeepCopyObject() runtime.Object { +func (in *SchedulingDecision) DeepCopyObject() runtime.Object { if c := in.DeepCopy(); c != nil { return c } @@ -39,46 +39,31 @@ func (in *ComputeDecision) DeepCopyObject() runtime.Object { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ComputeDecisionFactorStatus) DeepCopyInto(out *ComputeDecisionFactorStatus) { - *out = *in -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComputeDecisionFactorStatus. -func (in *ComputeDecisionFactorStatus) DeepCopy() *ComputeDecisionFactorStatus { - if in == nil { - return nil - } - out := new(ComputeDecisionFactorStatus) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ComputeDecisionList) DeepCopyInto(out *ComputeDecisionList) { +func (in *SchedulingDecisionList) DeepCopyInto(out *SchedulingDecisionList) { *out = *in out.TypeMeta = in.TypeMeta in.ListMeta.DeepCopyInto(&out.ListMeta) if in.Items != nil { in, out := &in.Items, &out.Items - *out = make([]ComputeDecision, len(*in)) + *out = make([]SchedulingDecision, len(*in)) for i := range *in { (*in)[i].DeepCopyInto(&(*out)[i]) } } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComputeDecisionList. -func (in *ComputeDecisionList) DeepCopy() *ComputeDecisionList { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionList. +func (in *SchedulingDecisionList) DeepCopy() *SchedulingDecisionList { if in == nil { return nil } - out := new(ComputeDecisionList) + out := new(SchedulingDecisionList) in.DeepCopyInto(out) return out } // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *ComputeDecisionList) DeepCopyObject() runtime.Object { +func (in *SchedulingDecisionList) DeepCopyObject() runtime.Object { if c := in.DeepCopy(); c != nil { return c } @@ -86,7 +71,7 @@ func (in *ComputeDecisionList) DeepCopyObject() runtime.Object { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ComputeDecisionPipelineOutputSpec) DeepCopyInto(out *ComputeDecisionPipelineOutputSpec) { +func (in *SchedulingDecisionPipelineOutputSpec) DeepCopyInto(out *SchedulingDecisionPipelineOutputSpec) { *out = *in if in.Weights != nil { in, out := &in.Weights, &out.Weights @@ -97,70 +82,65 @@ func (in *ComputeDecisionPipelineOutputSpec) DeepCopyInto(out *ComputeDecisionPi } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComputeDecisionPipelineOutputSpec. -func (in *ComputeDecisionPipelineOutputSpec) DeepCopy() *ComputeDecisionPipelineOutputSpec { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionPipelineOutputSpec. +func (in *SchedulingDecisionPipelineOutputSpec) DeepCopy() *SchedulingDecisionPipelineOutputSpec { if in == nil { return nil } - out := new(ComputeDecisionPipelineOutputSpec) + out := new(SchedulingDecisionPipelineOutputSpec) in.DeepCopyInto(out) return out } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ComputeDecisionPipelineSpec) DeepCopyInto(out *ComputeDecisionPipelineSpec) { +func (in *SchedulingDecisionPipelineSpec) DeepCopyInto(out *SchedulingDecisionPipelineSpec) { *out = *in if in.Outputs != nil { in, out := &in.Outputs, &out.Outputs - *out = make([]ComputeDecisionPipelineOutputSpec, len(*in)) + *out = make([]SchedulingDecisionPipelineOutputSpec, len(*in)) for i := range *in { (*in)[i].DeepCopyInto(&(*out)[i]) } } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComputeDecisionPipelineSpec. -func (in *ComputeDecisionPipelineSpec) DeepCopy() *ComputeDecisionPipelineSpec { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionPipelineSpec. +func (in *SchedulingDecisionPipelineSpec) DeepCopy() *SchedulingDecisionPipelineSpec { if in == nil { return nil } - out := new(ComputeDecisionPipelineSpec) + out := new(SchedulingDecisionPipelineSpec) in.DeepCopyInto(out) return out } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ComputeDecisionSpec) DeepCopyInto(out *ComputeDecisionSpec) { +func (in *SchedulingDecisionSpec) DeepCopyInto(out *SchedulingDecisionSpec) { *out = *in in.Pipeline.DeepCopyInto(&out.Pipeline) } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComputeDecisionSpec. -func (in *ComputeDecisionSpec) DeepCopy() *ComputeDecisionSpec { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionSpec. +func (in *SchedulingDecisionSpec) DeepCopy() *SchedulingDecisionSpec { if in == nil { return nil } - out := new(ComputeDecisionSpec) + out := new(SchedulingDecisionSpec) in.DeepCopyInto(out) return out } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ComputeDecisionStatus) DeepCopyInto(out *ComputeDecisionStatus) { +func (in *SchedulingDecisionStatus) DeepCopyInto(out *SchedulingDecisionStatus) { *out = *in - if in.Factors != nil { - in, out := &in.Factors, &out.Factors - *out = make([]ComputeDecisionFactorStatus, len(*in)) - copy(*out, *in) - } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComputeDecisionStatus. -func (in *ComputeDecisionStatus) DeepCopy() *ComputeDecisionStatus { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionStatus. +func (in *SchedulingDecisionStatus) DeepCopy() *SchedulingDecisionStatus { if in == nil { return nil } - out := new(ComputeDecisionStatus) + out := new(SchedulingDecisionStatus) in.DeepCopyInto(out) return out } diff --git a/decisions/cmd/main.go b/decisions/cmd/main.go index 25a4c09f..102f01e4 100644 --- a/decisions/cmd/main.go +++ b/decisions/cmd/main.go @@ -190,12 +190,12 @@ func main() { os.Exit(1) } - if err := (&controller.ComputeDecisionReconciler{ + if err := (&controller.SchedulingDecisionReconciler{ Client: mgr.GetClient(), Scheme: mgr.GetScheme(), Conf: conf.NewConfig[controller.Config](), }).SetupWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "ComputeDecision") + setupLog.Error(err, "unable to create controller", "controller", "SchedulingDecision") os.Exit(1) } // +kubebuilder:scaffold:builder diff --git a/decisions/config/crd/bases/decisions.cortex_computedecisions.yaml b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml similarity index 74% rename from decisions/config/crd/bases/decisions.cortex_computedecisions.yaml rename to decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml index fcc4e894..426ca823 100644 --- a/decisions/config/crd/bases/decisions.cortex_computedecisions.yaml +++ b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml @@ -4,22 +4,22 @@ kind: CustomResourceDefinition metadata: annotations: controller-gen.kubebuilder.io/version: v0.17.2 - name: computedecisions.decisions.cortex + name: schedulingdecisions.decisions.cortex spec: group: decisions.cortex names: - kind: ComputeDecision - listKind: ComputeDecisionList - plural: computedecisions + kind: SchedulingDecision + listKind: SchedulingDecisionList + plural: schedulingdecisions shortNames: - - cdec - singular: computedecision + - sdecs + singular: schedulingdecision scope: Cluster versions: - name: v1alpha1 schema: openAPIV3Schema: - description: ComputeDecision is the Schema for the computedecisions API + description: SchedulingDecision is the Schema for the computedecisions API properties: apiVersion: description: |- @@ -39,7 +39,7 @@ spec: metadata: type: object spec: - description: spec defines the desired state of ComputeDecision + description: spec defines the desired state of SchedulingDecision properties: pipeline: properties: @@ -65,22 +65,10 @@ spec: - pipeline type: object status: - description: status defines the observed state of ComputeDecision + description: status defines the observed state of SchedulingDecision properties: description: type: string - factors: - items: - properties: - expl: - type: string - host: - type: string - required: - - expl - - host - type: object - type: array type: object required: - spec diff --git a/decisions/config/crd/decisions.cortex_computedecisions.yaml b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml similarity index 74% rename from decisions/config/crd/decisions.cortex_computedecisions.yaml rename to decisions/config/crd/decisions.cortex_schedulingdecisions.yaml index fcc4e894..426ca823 100644 --- a/decisions/config/crd/decisions.cortex_computedecisions.yaml +++ b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml @@ -4,22 +4,22 @@ kind: CustomResourceDefinition metadata: annotations: controller-gen.kubebuilder.io/version: v0.17.2 - name: computedecisions.decisions.cortex + name: schedulingdecisions.decisions.cortex spec: group: decisions.cortex names: - kind: ComputeDecision - listKind: ComputeDecisionList - plural: computedecisions + kind: SchedulingDecision + listKind: SchedulingDecisionList + plural: schedulingdecisions shortNames: - - cdec - singular: computedecision + - sdecs + singular: schedulingdecision scope: Cluster versions: - name: v1alpha1 schema: openAPIV3Schema: - description: ComputeDecision is the Schema for the computedecisions API + description: SchedulingDecision is the Schema for the computedecisions API properties: apiVersion: description: |- @@ -39,7 +39,7 @@ spec: metadata: type: object spec: - description: spec defines the desired state of ComputeDecision + description: spec defines the desired state of SchedulingDecision properties: pipeline: properties: @@ -65,22 +65,10 @@ spec: - pipeline type: object status: - description: status defines the observed state of ComputeDecision + description: status defines the observed state of SchedulingDecision properties: description: type: string - factors: - items: - properties: - expl: - type: string - host: - type: string - required: - - expl - - host - type: object - type: array type: object required: - spec diff --git a/decisions/dist/chart/templates/crd/decisions.cortex_computedecisions.yaml b/decisions/dist/chart/templates/crd/decisions.cortex_computedecisions.yaml index 145e45f4..430f4fb0 100644 --- a/decisions/dist/chart/templates/crd/decisions.cortex_computedecisions.yaml +++ b/decisions/dist/chart/templates/crd/decisions.cortex_computedecisions.yaml @@ -14,8 +14,8 @@ metadata: spec: group: decisions.cortex names: - kind: ComputeDecision - listKind: ComputeDecisionList + kind: SchedulingDecision + listKind: SchedulingDecisionList plural: computedecisions shortNames: - cdec @@ -25,7 +25,7 @@ spec: - name: v1alpha1 schema: openAPIV3Schema: - description: ComputeDecision is the Schema for the computedecisions API + description: SchedulingDecision is the Schema for the computedecisions API properties: apiVersion: description: |- @@ -45,7 +45,7 @@ spec: metadata: type: object spec: - description: spec defines the desired state of ComputeDecision + description: spec defines the desired state of SchedulingDecision properties: pipeline: properties: @@ -71,22 +71,10 @@ spec: - pipeline type: object status: - description: status defines the observed state of ComputeDecision + description: status defines the observed state of SchedulingDecision properties: description: type: string - factors: - items: - properties: - expl: - type: string - host: - type: string - required: - - expl - - host - type: object - type: array type: object required: - spec diff --git a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml new file mode 100644 index 00000000..ed90d92b --- /dev/null +++ b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml @@ -0,0 +1,86 @@ +{{- if .Values.crd.enable }} +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + annotations: + {{- if .Values.crd.keep }} + "helm.sh/resource-policy": keep + {{- end }} + controller-gen.kubebuilder.io/version: v0.17.2 + name: schedulingdecisions.decisions.cortex +spec: + group: decisions.cortex + names: + kind: SchedulingDecision + listKind: SchedulingDecisionList + plural: schedulingdecisions + shortNames: + - sdecs + singular: schedulingdecision + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: SchedulingDecision is the Schema for the computedecisions API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: spec defines the desired state of SchedulingDecision + properties: + pipeline: + properties: + name: + type: string + outputs: + items: + properties: + step: + type: string + weights: + additionalProperties: + type: number + type: object + required: + - step + type: object + type: array + required: + - name + type: object + required: + - pipeline + type: object + status: + description: status defines the observed state of SchedulingDecision + properties: + description: + type: string + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} +{{- end -}} diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index baac2711..bdfb6853 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -16,8 +16,8 @@ import ( decisionsv1alpha1 "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" ) -// ComputeDecisionReconciler reconciles a ComputeDecision object -type ComputeDecisionReconciler struct { +// SchedulingDecisionReconciler reconciles a SchedulingDecision object +type SchedulingDecisionReconciler struct { // Client for the kubernetes API. client.Client // Kubernetes scheme to use for the decisions. @@ -32,24 +32,27 @@ type ComputeDecisionReconciler struct { // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. -func (r *ComputeDecisionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { _ = logf.FromContext(ctx) // Fetch the decision object. - var res v1alpha1.ComputeDecision + var res v1alpha1.SchedulingDecision if err := r.Get(ctx, req.NamespacedName, &res); err != nil { // Can happen when the resource was just deleted. return ctrl.Result{}, err } - // TODO: Reconciliation logic. + res.Status.Description = "...." + if err := r.Status().Update(ctx, &res); err != nil { + return ctrl.Result{}, err + } return ctrl.Result{}, nil // No need to requeue. } // SetupWithManager sets up the controller with the Manager. -func (r *ComputeDecisionReconciler) SetupWithManager(mgr ctrl.Manager) error { +func (r *SchedulingDecisionReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). - For(&decisionsv1alpha1.ComputeDecision{}). + For(&decisionsv1alpha1.SchedulingDecision{}). Named("computedecision"). WithOptions(controller.Options{ MaxConcurrentReconciles: 1, // Default From 824fae19aacdccaf80ea480fc71680a10953dcc4 Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Thu, 25 Sep 2025 10:54:38 +0200 Subject: [PATCH 04/58] Add test for reconciler --- .../api/v1alpha1/schedulingdecision_types.go | 1 + .../internal/controller/controller_test.go | 70 +++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 decisions/internal/controller/controller_test.go diff --git a/decisions/api/v1alpha1/schedulingdecision_types.go b/decisions/api/v1alpha1/schedulingdecision_types.go index 1beace65..4e8f7785 100644 --- a/decisions/api/v1alpha1/schedulingdecision_types.go +++ b/decisions/api/v1alpha1/schedulingdecision_types.go @@ -19,6 +19,7 @@ type SchedulingDecisionPipelineSpec struct { // SchedulingDecisionSpec defines the desired state of SchedulingDecision. type SchedulingDecisionSpec struct { + Input map[string]float64 `json:"input,omitempty"` Pipeline SchedulingDecisionPipelineSpec `json:"pipeline"` } diff --git a/decisions/internal/controller/controller_test.go b/decisions/internal/controller/controller_test.go new file mode 100644 index 00000000..40356958 --- /dev/null +++ b/decisions/internal/controller/controller_test.go @@ -0,0 +1,70 @@ +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package controller + +import ( + "testing" + + "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestReconcile(t *testing.T) { + resource := &v1alpha1.SchedulingDecision{ + ObjectMeta: ctrl.ObjectMeta{ + Name: "test-decision", + }, + Spec: v1alpha1.SchedulingDecisionSpec{ + Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ + Name: "test-pipeline", + Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "weigher", + Weights: map[string]float64{ + "host1": 0.5, + "host2": 0.5, + }, + }, + { + Step: "filter", + Weights: map[string]float64{ + "host1": 0.0, + }, + }, + }, + }, + }, + } + + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add scheme: %v", err) + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(resource). + WithStatusSubresource(&v1alpha1.SchedulingDecision{}). + Build() + + req := ctrl.Request{ + NamespacedName: client.ObjectKey{ + Name: "test-decision", + }, + } + + reconciler := &SchedulingDecisionReconciler{ + Conf: Config{}, + Client: fakeClient, + } + _, err := reconciler.Reconcile(t.Context(), req) + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + t.Logf("Reconcile completed successfully: %v", resource) +} From e4110a2646243660ee77db45baefa027e094fa6e Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Thu, 25 Sep 2025 10:59:32 +0200 Subject: [PATCH 05/58] Explicitly init map --- decisions/internal/controller/controller_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/decisions/internal/controller/controller_test.go b/decisions/internal/controller/controller_test.go index 40356958..5d3c8250 100644 --- a/decisions/internal/controller/controller_test.go +++ b/decisions/internal/controller/controller_test.go @@ -19,6 +19,7 @@ func TestReconcile(t *testing.T) { Name: "test-decision", }, Spec: v1alpha1.SchedulingDecisionSpec{ + Input: map[string]float64{}, Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ Name: "test-pipeline", Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ From 41fcd9ed0cd5b58f833e0111f289aabf66510be3 Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Thu, 25 Sep 2025 11:02:53 +0200 Subject: [PATCH 06/58] Print description directly --- decisions/internal/controller/controller_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/decisions/internal/controller/controller_test.go b/decisions/internal/controller/controller_test.go index 5d3c8250..450a7fc2 100644 --- a/decisions/internal/controller/controller_test.go +++ b/decisions/internal/controller/controller_test.go @@ -67,5 +67,5 @@ func TestReconcile(t *testing.T) { t.Fatalf("Reconcile returned an error: %v", err) } - t.Logf("Reconcile completed successfully: %v", resource) + t.Logf("Reconcile completed successfully: description=%s", resource.Status.Description) } From 9352171c742ae78675923659148357b4b77a08c6 Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Thu, 25 Sep 2025 11:15:00 +0200 Subject: [PATCH 07/58] Add state and optional error to spec --- .../api/v1alpha1/schedulingdecision_types.go | 12 ++++++++++++ .../api/v1alpha1/zz_generated.deepcopy.go | 7 +++++++ .../decisions.cortex_schedulingdecisions.yaml | 18 +++++++++++++++++- .../decisions.cortex_schedulingdecisions.yaml | 18 +++++++++++++++++- .../decisions.cortex_schedulingdecisions.yaml | 18 +++++++++++++++++- 5 files changed, 70 insertions(+), 3 deletions(-) diff --git a/decisions/api/v1alpha1/schedulingdecision_types.go b/decisions/api/v1alpha1/schedulingdecision_types.go index 4e8f7785..03b43205 100644 --- a/decisions/api/v1alpha1/schedulingdecision_types.go +++ b/decisions/api/v1alpha1/schedulingdecision_types.go @@ -23,14 +23,26 @@ type SchedulingDecisionSpec struct { Pipeline SchedulingDecisionPipelineSpec `json:"pipeline"` } +type SchedulingDecisionState string + +const ( + SchedulingDecisionStateResolved SchedulingDecisionState = "resolved" + SchedulingDecisionStateError SchedulingDecisionState = "error" +) + // SchedulingDecisionStatus defines the observed state of SchedulingDecision. type SchedulingDecisionStatus struct { + State SchedulingDecisionState `json:"state,omitempty"` + // Only given if state is "error". + Error string `json:"error,omitempty"` Description string `json:"description,omitempty"` } // +kubebuilder:object:root=true // +kubebuilder:subresource:status // +kubebuilder:resource:scope=Cluster,shortName=sdecs +// +kubebuilder:printcolumn:name="State",type="string",JSONPath=".status.state" +// +kubebuilder:printcolumn:name="Error",type="string",JSONPath=".status.error" // SchedulingDecision is the Schema for the computedecisions API type SchedulingDecision struct { diff --git a/decisions/api/v1alpha1/zz_generated.deepcopy.go b/decisions/api/v1alpha1/zz_generated.deepcopy.go index 5d891098..c6f7bc86 100644 --- a/decisions/api/v1alpha1/zz_generated.deepcopy.go +++ b/decisions/api/v1alpha1/zz_generated.deepcopy.go @@ -117,6 +117,13 @@ func (in *SchedulingDecisionPipelineSpec) DeepCopy() *SchedulingDecisionPipeline // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SchedulingDecisionSpec) DeepCopyInto(out *SchedulingDecisionSpec) { *out = *in + if in.Input != nil { + in, out := &in.Input, &out.Input + *out = make(map[string]float64, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } in.Pipeline.DeepCopyInto(&out.Pipeline) } diff --git a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml index 426ca823..144e33b5 100644 --- a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml @@ -16,7 +16,14 @@ spec: singular: schedulingdecision scope: Cluster versions: - - name: v1alpha1 + - additionalPrinterColumns: + - jsonPath: .status.state + name: State + type: string + - jsonPath: .status.error + name: Error + type: string + name: v1alpha1 schema: openAPIV3Schema: description: SchedulingDecision is the Schema for the computedecisions API @@ -41,6 +48,10 @@ spec: spec: description: spec defines the desired state of SchedulingDecision properties: + input: + additionalProperties: + type: number + type: object pipeline: properties: name: @@ -69,6 +80,11 @@ spec: properties: description: type: string + error: + description: Only given if state is "error". + type: string + state: + type: string type: object required: - spec diff --git a/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml index 426ca823..144e33b5 100644 --- a/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml @@ -16,7 +16,14 @@ spec: singular: schedulingdecision scope: Cluster versions: - - name: v1alpha1 + - additionalPrinterColumns: + - jsonPath: .status.state + name: State + type: string + - jsonPath: .status.error + name: Error + type: string + name: v1alpha1 schema: openAPIV3Schema: description: SchedulingDecision is the Schema for the computedecisions API @@ -41,6 +48,10 @@ spec: spec: description: spec defines the desired state of SchedulingDecision properties: + input: + additionalProperties: + type: number + type: object pipeline: properties: name: @@ -69,6 +80,11 @@ spec: properties: description: type: string + error: + description: Only given if state is "error". + type: string + state: + type: string type: object required: - spec diff --git a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml index ed90d92b..c38ed7bf 100644 --- a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml @@ -22,7 +22,14 @@ spec: singular: schedulingdecision scope: Cluster versions: - - name: v1alpha1 + - additionalPrinterColumns: + - jsonPath: .status.state + name: State + type: string + - jsonPath: .status.error + name: Error + type: string + name: v1alpha1 schema: openAPIV3Schema: description: SchedulingDecision is the Schema for the computedecisions API @@ -47,6 +54,10 @@ spec: spec: description: spec defines the desired state of SchedulingDecision properties: + input: + additionalProperties: + type: number + type: object pipeline: properties: name: @@ -75,6 +86,11 @@ spec: properties: description: type: string + error: + description: Only given if state is "error". + type: string + state: + type: string type: object required: - spec From 05c740eb37d0ff71801776bc8b58df83a3b5a46c Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Thu, 25 Sep 2025 11:18:57 +0200 Subject: [PATCH 08/58] Rename computedecision -> schedulingdecision --- decisions/api/v1alpha1/schedulingdecision_types.go | 2 +- .../decisions.cortex_schedulingdecisions.yaml | 2 +- .../crd/decisions.cortex_schedulingdecisions.yaml | 2 +- decisions/config/crd/kustomization.yaml | 2 +- .../config/rbac/computereservation_admin_role.yaml | 6 +++--- .../rbac/computereservation_editor_role.yaml | 6 +++--- .../rbac/computereservation_viewer_role.yaml | 6 +++--- decisions/config/rbac/kustomization.yaml | 6 +++--- decisions/config/rbac/role.yaml | 6 +++--- .../crd/decisions.cortex_computedecisions.yaml | 8 ++++---- .../crd/decisions.cortex_schedulingdecisions.yaml | 2 +- .../rbac/computereservation_admin_role.yaml | 6 +++--- .../rbac/computereservation_editor_role.yaml | 6 +++--- .../rbac/computereservation_viewer_role.yaml | 6 +++--- decisions/dist/chart/templates/rbac/role.yaml | 6 +++--- decisions/internal/controller/controller.go | 8 ++++---- helm/library/cortex-core/templates/rbac.yaml | 14 ++++++++++++++ 17 files changed, 54 insertions(+), 40 deletions(-) diff --git a/decisions/api/v1alpha1/schedulingdecision_types.go b/decisions/api/v1alpha1/schedulingdecision_types.go index 03b43205..f3d02749 100644 --- a/decisions/api/v1alpha1/schedulingdecision_types.go +++ b/decisions/api/v1alpha1/schedulingdecision_types.go @@ -44,7 +44,7 @@ type SchedulingDecisionStatus struct { // +kubebuilder:printcolumn:name="State",type="string",JSONPath=".status.state" // +kubebuilder:printcolumn:name="Error",type="string",JSONPath=".status.error" -// SchedulingDecision is the Schema for the computedecisions API +// SchedulingDecision is the Schema for the schedulingdecisions API type SchedulingDecision struct { metav1.TypeMeta `json:",inline"` diff --git a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml index 144e33b5..29d3a7ce 100644 --- a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml @@ -26,7 +26,7 @@ spec: name: v1alpha1 schema: openAPIV3Schema: - description: SchedulingDecision is the Schema for the computedecisions API + description: SchedulingDecision is the Schema for the schedulingdecisions API properties: apiVersion: description: |- diff --git a/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml index 144e33b5..29d3a7ce 100644 --- a/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml @@ -26,7 +26,7 @@ spec: name: v1alpha1 schema: openAPIV3Schema: - description: SchedulingDecision is the Schema for the computedecisions API + description: SchedulingDecision is the Schema for the schedulingdecisions API properties: apiVersion: description: |- diff --git a/decisions/config/crd/kustomization.yaml b/decisions/config/crd/kustomization.yaml index 8c62eca2..c1caafe2 100644 --- a/decisions/config/crd/kustomization.yaml +++ b/decisions/config/crd/kustomization.yaml @@ -2,7 +2,7 @@ # since it depends on service name and namespace that are out of this kustomize package. # It should be run by config/default resources: -- bases/decisions.cortex_computedecisions.yaml +- bases/decisions.cortex_schedulingdecisions.yaml # +kubebuilder:scaffold:crdkustomizeresource patches: diff --git a/decisions/config/rbac/computereservation_admin_role.yaml b/decisions/config/rbac/computereservation_admin_role.yaml index 3a10d614..cd8699f9 100644 --- a/decisions/config/rbac/computereservation_admin_role.yaml +++ b/decisions/config/rbac/computereservation_admin_role.yaml @@ -11,17 +11,17 @@ metadata: labels: app.kubernetes.io/name: decisions app.kubernetes.io/managed-by: kustomize - name: computedecision-admin-role + name: schedulingdecision-admin-role rules: - apiGroups: - decisions.cortex resources: - - computedecisions + - schedulingdecisions verbs: - '*' - apiGroups: - decisions.cortex resources: - - computedecisions/status + - schedulingdecisions/status verbs: - get diff --git a/decisions/config/rbac/computereservation_editor_role.yaml b/decisions/config/rbac/computereservation_editor_role.yaml index 09f6ccd7..864ce9ad 100644 --- a/decisions/config/rbac/computereservation_editor_role.yaml +++ b/decisions/config/rbac/computereservation_editor_role.yaml @@ -11,12 +11,12 @@ metadata: labels: app.kubernetes.io/name: decisions app.kubernetes.io/managed-by: kustomize - name: computedecision-editor-role + name: schedulingdecision-editor-role rules: - apiGroups: - decisions.cortex resources: - - computedecisions + - schedulingdecisions verbs: - create - delete @@ -28,6 +28,6 @@ rules: - apiGroups: - decisions.cortex resources: - - computedecisions/status + - schedulingdecisions/status verbs: - get diff --git a/decisions/config/rbac/computereservation_viewer_role.yaml b/decisions/config/rbac/computereservation_viewer_role.yaml index cbe78ad4..4d62565e 100644 --- a/decisions/config/rbac/computereservation_viewer_role.yaml +++ b/decisions/config/rbac/computereservation_viewer_role.yaml @@ -11,12 +11,12 @@ metadata: labels: app.kubernetes.io/name: decisions app.kubernetes.io/managed-by: kustomize - name: computedecision-viewer-role + name: schedulingdecision-viewer-role rules: - apiGroups: - decisions.cortex resources: - - computedecisions + - schedulingdecisions verbs: - get - list @@ -24,6 +24,6 @@ rules: - apiGroups: - decisions.cortex resources: - - computedecisions/status + - schedulingdecisions/status verbs: - get diff --git a/decisions/config/rbac/kustomization.yaml b/decisions/config/rbac/kustomization.yaml index d3da9c0e..9a3976b9 100644 --- a/decisions/config/rbac/kustomization.yaml +++ b/decisions/config/rbac/kustomization.yaml @@ -22,7 +22,7 @@ resources: # default, aiding admins in cluster management. Those roles are # not used by the decisions itself. You can comment the following lines # if you do not want those helpers be installed with your Project. -- computedecision_admin_role.yaml -- computedecision_editor_role.yaml -- computedecision_viewer_role.yaml +- schedulingdecision_admin_role.yaml +- schedulingdecision_editor_role.yaml +- schedulingdecision_viewer_role.yaml diff --git a/decisions/config/rbac/role.yaml b/decisions/config/rbac/role.yaml index c249a499..ee66f8a1 100644 --- a/decisions/config/rbac/role.yaml +++ b/decisions/config/rbac/role.yaml @@ -7,7 +7,7 @@ rules: - apiGroups: - decisions.cortex resources: - - computedecisions + - schedulingdecisions verbs: - create - delete @@ -19,13 +19,13 @@ rules: - apiGroups: - decisions.cortex resources: - - computedecisions/finalizers + - schedulingdecisions/finalizers verbs: - update - apiGroups: - decisions.cortex resources: - - computedecisions/status + - schedulingdecisions/status verbs: - get - patch diff --git a/decisions/dist/chart/templates/crd/decisions.cortex_computedecisions.yaml b/decisions/dist/chart/templates/crd/decisions.cortex_computedecisions.yaml index 430f4fb0..f7104190 100644 --- a/decisions/dist/chart/templates/crd/decisions.cortex_computedecisions.yaml +++ b/decisions/dist/chart/templates/crd/decisions.cortex_computedecisions.yaml @@ -10,22 +10,22 @@ metadata: "helm.sh/resource-policy": keep {{- end }} controller-gen.kubebuilder.io/version: v0.17.2 - name: computedecisions.decisions.cortex + name: schedulingdecisions.decisions.cortex spec: group: decisions.cortex names: kind: SchedulingDecision listKind: SchedulingDecisionList - plural: computedecisions + plural: schedulingdecisions shortNames: - cdec - singular: computedecision + singular: schedulingdecision scope: Cluster versions: - name: v1alpha1 schema: openAPIV3Schema: - description: SchedulingDecision is the Schema for the computedecisions API + description: SchedulingDecision is the Schema for the schedulingdecisions API properties: apiVersion: description: |- diff --git a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml index c38ed7bf..7a4286bf 100644 --- a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml @@ -32,7 +32,7 @@ spec: name: v1alpha1 schema: openAPIV3Schema: - description: SchedulingDecision is the Schema for the computedecisions API + description: SchedulingDecision is the Schema for the schedulingdecisions API properties: apiVersion: description: |- diff --git a/decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml b/decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml index fee77c79..6db64811 100644 --- a/decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml +++ b/decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml @@ -11,18 +11,18 @@ kind: ClusterRole metadata: labels: {{- include "chart.labels" . | nindent 4 }} - name: computedecision-admin-role + name: schedulingdecision-admin-role rules: - apiGroups: - decisions.cortex resources: - - computedecisions + - schedulingdecisions verbs: - '*' - apiGroups: - decisions.cortex resources: - - computedecisions/status + - schedulingdecisions/status verbs: - get {{- end -}} diff --git a/decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml b/decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml index d435a404..7a82611c 100644 --- a/decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml +++ b/decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml @@ -11,12 +11,12 @@ kind: ClusterRole metadata: labels: {{- include "chart.labels" . | nindent 4 }} - name: computedecision-editor-role + name: schedulingdecision-editor-role rules: - apiGroups: - decisions.cortex resources: - - computedecisions + - schedulingdecisions verbs: - create - delete @@ -28,7 +28,7 @@ rules: - apiGroups: - decisions.cortex resources: - - computedecisions/status + - schedulingdecisions/status verbs: - get {{- end -}} diff --git a/decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml b/decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml index 879ecb27..4375bd65 100644 --- a/decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml +++ b/decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml @@ -11,12 +11,12 @@ kind: ClusterRole metadata: labels: {{- include "chart.labels" . | nindent 4 }} - name: computedecision-viewer-role + name: schedulingdecision-viewer-role rules: - apiGroups: - decisions.cortex resources: - - computedecisions + - schedulingdecisions verbs: - get - list @@ -24,7 +24,7 @@ rules: - apiGroups: - decisions.cortex resources: - - computedecisions/status + - schedulingdecisions/status verbs: - get {{- end -}} diff --git a/decisions/dist/chart/templates/rbac/role.yaml b/decisions/dist/chart/templates/rbac/role.yaml index 24e720cf..b93e56fc 100644 --- a/decisions/dist/chart/templates/rbac/role.yaml +++ b/decisions/dist/chart/templates/rbac/role.yaml @@ -10,7 +10,7 @@ rules: - apiGroups: - decisions.cortex resources: - - computedecisions + - schedulingdecisions verbs: - create - delete @@ -22,13 +22,13 @@ rules: - apiGroups: - decisions.cortex resources: - - computedecisions/finalizers + - schedulingdecisions/finalizers verbs: - update - apiGroups: - decisions.cortex resources: - - computedecisions/status + - schedulingdecisions/status verbs: - get - patch diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index bdfb6853..734eaf1f 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -26,9 +26,9 @@ type SchedulingDecisionReconciler struct { Conf Config } -// +kubebuilder:rbac:groups=decisions.cortex,resources=computedecisions,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=decisions.cortex,resources=computedecisions/status,verbs=get;update;patch -// +kubebuilder:rbac:groups=decisions.cortex,resources=computedecisions/finalizers,verbs=update +// +kubebuilder:rbac:groups=decisions.cortex,resources=schedulingdecisions,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=decisions.cortex,resources=schedulingdecisions/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=decisions.cortex,resources=schedulingdecisions/finalizers,verbs=update // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. @@ -53,7 +53,7 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R func (r *SchedulingDecisionReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&decisionsv1alpha1.SchedulingDecision{}). - Named("computedecision"). + Named("schedulingdecision"). WithOptions(controller.Options{ MaxConcurrentReconciles: 1, // Default }). diff --git a/helm/library/cortex-core/templates/rbac.yaml b/helm/library/cortex-core/templates/rbac.yaml index 57903041..688a8d79 100644 --- a/helm/library/cortex-core/templates/rbac.yaml +++ b/helm/library/cortex-core/templates/rbac.yaml @@ -12,6 +12,20 @@ subjects: name: {{ .Release.Namespace }}-{{ include "cortex.fullname" . }} namespace: {{ .Release.Namespace }} --- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ .Release.Namespace }}-{{ include "cortex.fullname" . }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + # From the decisions operator. + name: schedulingdecision-viewer-role +subjects: +- kind: ServiceAccount + name: {{ .Release.Namespace }}-{{ include "cortex.fullname" . }} + namespace: {{ .Release.Namespace }} +--- apiVersion: v1 kind: ServiceAccount metadata: From 3b3ab1e9adc4474dc82dcde4959dba617a05edc2 Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Thu, 25 Sep 2025 11:19:52 +0200 Subject: [PATCH 09/58] Also rename rbac files --- ...rvation_admin_role.yaml => schedulingdecision_admin_role.yaml} | 0 ...ation_editor_role.yaml => schedulingdecision_editor_role.yaml} | 0 ...ation_viewer_role.yaml => schedulingdecision_viewer_role.yaml} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename decisions/dist/chart/templates/rbac/{computereservation_admin_role.yaml => schedulingdecision_admin_role.yaml} (100%) rename decisions/dist/chart/templates/rbac/{computereservation_editor_role.yaml => schedulingdecision_editor_role.yaml} (100%) rename decisions/dist/chart/templates/rbac/{computereservation_viewer_role.yaml => schedulingdecision_viewer_role.yaml} (100%) diff --git a/decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml b/decisions/dist/chart/templates/rbac/schedulingdecision_admin_role.yaml similarity index 100% rename from decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml rename to decisions/dist/chart/templates/rbac/schedulingdecision_admin_role.yaml diff --git a/decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml b/decisions/dist/chart/templates/rbac/schedulingdecision_editor_role.yaml similarity index 100% rename from decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml rename to decisions/dist/chart/templates/rbac/schedulingdecision_editor_role.yaml diff --git a/decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml b/decisions/dist/chart/templates/rbac/schedulingdecision_viewer_role.yaml similarity index 100% rename from decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml rename to decisions/dist/chart/templates/rbac/schedulingdecision_viewer_role.yaml From 242ac6fe434ef167e95e43f7e73d4bad266e10df Mon Sep 17 00:00:00 2001 From: mblos <> Date: Thu, 25 Sep 2025 11:21:19 +0200 Subject: [PATCH 10/58] Check there is at least one host in the input --- decisions/internal/controller/controller.go | 10 +- .../internal/controller/controller_test.go | 93 ++++++++++++++++++- 2 files changed, 100 insertions(+), 3 deletions(-) diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index 734eaf1f..4d1bd519 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -41,7 +41,15 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{}, err } - res.Status.Description = "...." + // Validate that there is at least one host in the input + if len(res.Spec.Input) == 0 { + res.Status.State = v1alpha1.SchedulingDecisionStateError + res.Status.Error = "No hosts provided in input" + } else { + res.Status.State = v1alpha1.SchedulingDecisionStateResolved + res.Status.Description = "...." + } + if err := r.Status().Update(ctx, &res); err != nil { return ctrl.Result{}, err } diff --git a/decisions/internal/controller/controller_test.go b/decisions/internal/controller/controller_test.go index 450a7fc2..cd36faa7 100644 --- a/decisions/internal/controller/controller_test.go +++ b/decisions/internal/controller/controller_test.go @@ -19,7 +19,10 @@ func TestReconcile(t *testing.T) { Name: "test-decision", }, Spec: v1alpha1.SchedulingDecisionSpec{ - Input: map[string]float64{}, + Input: map[string]float64{ + "host1": 1.0, + "host2": 2.0, + }, Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ Name: "test-pipeline", Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ @@ -67,5 +70,91 @@ func TestReconcile(t *testing.T) { t.Fatalf("Reconcile returned an error: %v", err) } - t.Logf("Reconcile completed successfully: description=%s", resource.Status.Description) + // Fetch the updated resource to check status + var updatedResource v1alpha1.SchedulingDecision + if err := fakeClient.Get(t.Context(), client.ObjectKey{Name: "test-decision"}, &updatedResource); err != nil { + t.Fatalf("Failed to get updated resource: %v", err) + } + + // Verify success state + if updatedResource.Status.State != v1alpha1.SchedulingDecisionStateResolved { + t.Errorf("Expected state '%s', got '%s'", v1alpha1.SchedulingDecisionStateResolved, updatedResource.Status.State) + } + if updatedResource.Status.Error != "" { + t.Errorf("Expected empty error, got '%s'", updatedResource.Status.Error) + } + if updatedResource.Status.Description != "...." { + t.Errorf("Expected description '....', got '%s'", updatedResource.Status.Description) + } + + t.Logf("Reconcile completed successfully: state=%s, description=%s", updatedResource.Status.State, updatedResource.Status.Description) +} + +func TestReconcileEmptyInput(t *testing.T) { + resource := &v1alpha1.SchedulingDecision{ + ObjectMeta: ctrl.ObjectMeta{ + Name: "test-decision-empty-input", + }, + Spec: v1alpha1.SchedulingDecisionSpec{ + Input: map[string]float64{}, // Empty input - no hosts + Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ + Name: "test-pipeline", + Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "weigher", + Weights: map[string]float64{ + "host1": 0.5, + "host2": 0.5, + }, + }, + }, + }, + }, + } + + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add scheme: %v", err) + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(resource). + WithStatusSubresource(&v1alpha1.SchedulingDecision{}). + Build() + + req := ctrl.Request{ + NamespacedName: client.ObjectKey{ + Name: "test-decision-empty-input", + }, + } + + reconciler := &SchedulingDecisionReconciler{ + Conf: Config{}, + Client: fakeClient, + } + _, err := reconciler.Reconcile(t.Context(), req) + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Fetch the updated resource to check status + var updatedResource v1alpha1.SchedulingDecision + if err := fakeClient.Get(t.Context(), client.ObjectKey{Name: "test-decision-empty-input"}, &updatedResource); err != nil { + t.Fatalf("Failed to get updated resource: %v", err) + } + + // Verify error state + if updatedResource.Status.State != v1alpha1.SchedulingDecisionStateError { + t.Errorf("Expected state '%s', got '%s'", v1alpha1.SchedulingDecisionStateError, updatedResource.Status.State) + } + expectedError := "No hosts provided in input" + if updatedResource.Status.Error != expectedError { + t.Errorf("Expected error '%s', got '%s'", expectedError, updatedResource.Status.Error) + } + if updatedResource.Status.Description != "" { + t.Errorf("Expected empty description, got '%s'", updatedResource.Status.Description) + } + + t.Logf("Reconcile completed with error: state=%s, error=%s", updatedResource.Status.State, updatedResource.Status.Error) } From ec3230bbf62d89a61a520ec44a10b91696e6ed2d Mon Sep 17 00:00:00 2001 From: mblos <> Date: Thu, 25 Sep 2025 11:25:15 +0200 Subject: [PATCH 11/58] Check that all hosts in output exist --- decisions/internal/controller/controller.go | 15 ++++ .../internal/controller/controller_test.go | 72 +++++++++++++++++++ 2 files changed, 87 insertions(+) diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index 4d1bd519..922d0002 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -46,7 +46,22 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R res.Status.State = v1alpha1.SchedulingDecisionStateError res.Status.Error = "No hosts provided in input" } else { + // Validate that all hosts in pipeline outputs exist in input + for _, output := range res.Spec.Pipeline.Outputs { + for hostName := range output.Weights { + if _, exists := res.Spec.Input[hostName]; !exists { + res.Status.State = v1alpha1.SchedulingDecisionStateError + res.Status.Error = "Host '" + hostName + "' in pipeline output not found in input" + if err := r.Status().Update(ctx, &res); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } + } + } + res.Status.State = v1alpha1.SchedulingDecisionStateResolved + res.Status.Error = "" res.Status.Description = "...." } diff --git a/decisions/internal/controller/controller_test.go b/decisions/internal/controller/controller_test.go index cd36faa7..596fb9e3 100644 --- a/decisions/internal/controller/controller_test.go +++ b/decisions/internal/controller/controller_test.go @@ -158,3 +158,75 @@ func TestReconcileEmptyInput(t *testing.T) { t.Logf("Reconcile completed with error: state=%s, error=%s", updatedResource.Status.State, updatedResource.Status.Error) } + +func TestReconcileHostMismatch(t *testing.T) { + resource := &v1alpha1.SchedulingDecision{ + ObjectMeta: ctrl.ObjectMeta{ + Name: "test-decision-host-mismatch", + }, + Spec: v1alpha1.SchedulingDecisionSpec{ + Input: map[string]float64{ + "host1": 1.0, + "host2": 2.0, + }, // host3 is missing but referenced in pipeline output + Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ + Name: "test-pipeline", + Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "weigher", + Weights: map[string]float64{ + "host1": 0.5, + "host3": 0.3, // host3 doesn't exist in input + }, + }, + }, + }, + }, + } + + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add scheme: %v", err) + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(resource). + WithStatusSubresource(&v1alpha1.SchedulingDecision{}). + Build() + + req := ctrl.Request{ + NamespacedName: client.ObjectKey{ + Name: "test-decision-host-mismatch", + }, + } + + reconciler := &SchedulingDecisionReconciler{ + Conf: Config{}, + Client: fakeClient, + } + _, err := reconciler.Reconcile(t.Context(), req) + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Fetch the updated resource to check status + var updatedResource v1alpha1.SchedulingDecision + if err := fakeClient.Get(t.Context(), client.ObjectKey{Name: "test-decision-host-mismatch"}, &updatedResource); err != nil { + t.Fatalf("Failed to get updated resource: %v", err) + } + + // Verify error state for host mismatch + if updatedResource.Status.State != v1alpha1.SchedulingDecisionStateError { + t.Errorf("Expected state '%s', got '%s'", v1alpha1.SchedulingDecisionStateError, updatedResource.Status.State) + } + expectedError := "Host 'host3' in pipeline output not found in input" + if updatedResource.Status.Error != expectedError { + t.Errorf("Expected error '%s', got '%s'", expectedError, updatedResource.Status.Error) + } + if updatedResource.Status.Description != "" { + t.Errorf("Expected empty description, got '%s'", updatedResource.Status.Description) + } + + t.Logf("Reconcile completed with host mismatch error: state=%s, error=%s", updatedResource.Status.State, updatedResource.Status.Error) +} From 941f700fcdbaef630b938be4837f2ffd2aef687c Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Thu, 25 Sep 2025 11:31:12 +0200 Subject: [PATCH 12/58] Rename weights in spec to activations --- .../api/v1alpha1/schedulingdecision_types.go | 5 +-- .../api/v1alpha1/zz_generated.deepcopy.go | 4 +-- .../decisions.cortex_schedulingdecisions.yaml | 11 +++--- .../decisions.cortex_schedulingdecisions.yaml | 11 +++--- .../decisions.cortex_schedulingdecisions.yaml | 11 +++--- .../rbac/computereservation_admin_role.yaml | 28 +++++++++++++++ .../rbac/computereservation_editor_role.yaml | 34 +++++++++++++++++++ .../rbac/computereservation_viewer_role.yaml | 30 ++++++++++++++++ decisions/internal/controller/controller.go | 2 +- .../internal/controller/controller_test.go | 8 ++--- 10 files changed, 123 insertions(+), 21 deletions(-) create mode 100644 decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml create mode 100644 decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml create mode 100644 decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml diff --git a/decisions/api/v1alpha1/schedulingdecision_types.go b/decisions/api/v1alpha1/schedulingdecision_types.go index f3d02749..9a1cd356 100644 --- a/decisions/api/v1alpha1/schedulingdecision_types.go +++ b/decisions/api/v1alpha1/schedulingdecision_types.go @@ -8,8 +8,9 @@ import ( ) type SchedulingDecisionPipelineOutputSpec struct { - Step string `json:"step"` - Weights map[string]float64 `json:"weights,omitempty"` + Step string `json:"step"` + // Weights calculated by this step subjected to the activation function. + Activations map[string]float64 `json:"activations,omitempty"` } type SchedulingDecisionPipelineSpec struct { diff --git a/decisions/api/v1alpha1/zz_generated.deepcopy.go b/decisions/api/v1alpha1/zz_generated.deepcopy.go index c6f7bc86..c64d3653 100644 --- a/decisions/api/v1alpha1/zz_generated.deepcopy.go +++ b/decisions/api/v1alpha1/zz_generated.deepcopy.go @@ -73,8 +73,8 @@ func (in *SchedulingDecisionList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SchedulingDecisionPipelineOutputSpec) DeepCopyInto(out *SchedulingDecisionPipelineOutputSpec) { *out = *in - if in.Weights != nil { - in, out := &in.Weights, &out.Weights + if in.Activations != nil { + in, out := &in.Activations, &out.Activations *out = make(map[string]float64, len(*in)) for key, val := range *in { (*out)[key] = val diff --git a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml index 29d3a7ce..252ddd08 100644 --- a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml @@ -26,7 +26,8 @@ spec: name: v1alpha1 schema: openAPIV3Schema: - description: SchedulingDecision is the Schema for the schedulingdecisions API + description: SchedulingDecision is the Schema for the schedulingdecisions + API properties: apiVersion: description: |- @@ -59,12 +60,14 @@ spec: outputs: items: properties: - step: - type: string - weights: + activations: additionalProperties: type: number + description: Weights calculated by this step subjected to + the activation function. type: object + step: + type: string required: - step type: object diff --git a/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml index 29d3a7ce..252ddd08 100644 --- a/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml @@ -26,7 +26,8 @@ spec: name: v1alpha1 schema: openAPIV3Schema: - description: SchedulingDecision is the Schema for the schedulingdecisions API + description: SchedulingDecision is the Schema for the schedulingdecisions + API properties: apiVersion: description: |- @@ -59,12 +60,14 @@ spec: outputs: items: properties: - step: - type: string - weights: + activations: additionalProperties: type: number + description: Weights calculated by this step subjected to + the activation function. type: object + step: + type: string required: - step type: object diff --git a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml index 7a4286bf..02dc9a53 100644 --- a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml @@ -32,7 +32,8 @@ spec: name: v1alpha1 schema: openAPIV3Schema: - description: SchedulingDecision is the Schema for the schedulingdecisions API + description: SchedulingDecision is the Schema for the schedulingdecisions + API properties: apiVersion: description: |- @@ -65,12 +66,14 @@ spec: outputs: items: properties: - step: - type: string - weights: + activations: additionalProperties: type: number + description: Weights calculated by this step subjected to + the activation function. type: object + step: + type: string required: - step type: object diff --git a/decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml b/decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml new file mode 100644 index 00000000..6db64811 --- /dev/null +++ b/decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml @@ -0,0 +1,28 @@ +{{- if .Values.rbac.enable }} +# This rule is not used by the project decisions itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants full permissions ('*') over decisions.cortex. +# This role is intended for users authorized to modify roles and bindings within the cluster, +# enabling them to delegate specific permissions to other users or groups as needed. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: schedulingdecision-admin-role +rules: +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions + verbs: + - '*' +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions/status + verbs: + - get +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml b/decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml new file mode 100644 index 00000000..7a82611c --- /dev/null +++ b/decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml @@ -0,0 +1,34 @@ +{{- if .Values.rbac.enable }} +# This rule is not used by the project decisions itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants permissions to create, update, and delete resources within the decisions.cortex. +# This role is intended for users who need to manage these resources +# but should not control RBAC or manage permissions for others. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: schedulingdecision-editor-role +rules: +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions/status + verbs: + - get +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml b/decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml new file mode 100644 index 00000000..4375bd65 --- /dev/null +++ b/decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml @@ -0,0 +1,30 @@ +{{- if .Values.rbac.enable }} +# This rule is not used by the project decisions itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants read-only access to decisions.cortex resources. +# This role is intended for users who need visibility into these resources +# without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: schedulingdecision-viewer-role +rules: +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions + verbs: + - get + - list + - watch +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions/status + verbs: + - get +{{- end -}} diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index 922d0002..3840cb1d 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -48,7 +48,7 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R } else { // Validate that all hosts in pipeline outputs exist in input for _, output := range res.Spec.Pipeline.Outputs { - for hostName := range output.Weights { + for hostName := range output.Activations { if _, exists := res.Spec.Input[hostName]; !exists { res.Status.State = v1alpha1.SchedulingDecisionStateError res.Status.Error = "Host '" + hostName + "' in pipeline output not found in input" diff --git a/decisions/internal/controller/controller_test.go b/decisions/internal/controller/controller_test.go index 596fb9e3..ca2833c2 100644 --- a/decisions/internal/controller/controller_test.go +++ b/decisions/internal/controller/controller_test.go @@ -28,14 +28,14 @@ func TestReconcile(t *testing.T) { Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ { Step: "weigher", - Weights: map[string]float64{ + Activations: map[string]float64{ "host1": 0.5, "host2": 0.5, }, }, { Step: "filter", - Weights: map[string]float64{ + Activations: map[string]float64{ "host1": 0.0, }, }, @@ -102,7 +102,7 @@ func TestReconcileEmptyInput(t *testing.T) { Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ { Step: "weigher", - Weights: map[string]float64{ + Activations: map[string]float64{ "host1": 0.5, "host2": 0.5, }, @@ -174,7 +174,7 @@ func TestReconcileHostMismatch(t *testing.T) { Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ { Step: "weigher", - Weights: map[string]float64{ + Activations: map[string]float64{ "host1": 0.5, "host3": 0.3, // host3 doesn't exist in input }, From b65e394548f92905bdf7ece54f494a452acce7c7 Mon Sep 17 00:00:00 2001 From: mblos <> Date: Thu, 25 Sep 2025 11:46:59 +0200 Subject: [PATCH 13/58] Get final actions of each host --- .../api/v1alpha1/schedulingdecision_types.go | 4 + .../decisions.cortex_schedulingdecisions.yaml | 12 + .../decisions.cortex_schedulingdecisions.yaml | 12 + decisions/internal/controller/controller.go | 35 ++- .../internal/controller/controller_test.go | 280 +++++++++++++++++- 5 files changed, 339 insertions(+), 4 deletions(-) diff --git a/decisions/api/v1alpha1/schedulingdecision_types.go b/decisions/api/v1alpha1/schedulingdecision_types.go index 9a1cd356..a19f8123 100644 --- a/decisions/api/v1alpha1/schedulingdecision_types.go +++ b/decisions/api/v1alpha1/schedulingdecision_types.go @@ -37,6 +37,10 @@ type SchedulingDecisionStatus struct { // Only given if state is "error". Error string `json:"error,omitempty"` Description string `json:"description,omitempty"` + // Final scores for each host after processing all pipeline steps. + FinalScores map[string]float64 `json:"finalScores,omitempty"` + // Hosts that were deleted during pipeline processing and all steps that attempted to delete them. + DeletedHosts map[string][]string `json:"deletedHosts,omitempty"` } // +kubebuilder:object:root=true diff --git a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml index 252ddd08..0f2a8e98 100644 --- a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml @@ -81,11 +81,23 @@ spec: status: description: status defines the observed state of SchedulingDecision properties: + deletedHosts: + additionalProperties: + type: string + description: Hosts that were deleted during pipeline processing and + the step that deleted them. + type: object description: type: string error: description: Only given if state is "error". type: string + finalScores: + additionalProperties: + type: number + description: Final scores for each host after processing all pipeline + steps. + type: object state: type: string type: object diff --git a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml index 02dc9a53..32117e90 100644 --- a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml @@ -87,11 +87,23 @@ spec: status: description: status defines the observed state of SchedulingDecision properties: + deletedHosts: + additionalProperties: + type: string + description: Hosts that were deleted during pipeline processing and + the step that deleted them. + type: object description: type: string error: description: Only given if state is "error". type: string + finalScores: + additionalProperties: + type: number + description: Final scores for each host after processing all pipeline + steps. + type: object state: type: string type: object diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index 3840cb1d..99c1e800 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -60,9 +60,42 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R } } + // Calculate final scores for all hosts + finalScores := make(map[string]float64) + deletedHosts := make(map[string][]string) + + // Start with input values as initial scores + for hostName, inputValue := range res.Spec.Input { + finalScores[hostName] = inputValue + } + + // Process each pipeline step sequentially + for _, output := range res.Spec.Pipeline.Outputs { + // Check which hosts will be deleted in this step + for hostName := range finalScores { + if _, exists := output.Activations[hostName]; !exists { + // Host not in this step's activations - will be deleted + deletedHosts[hostName] = append(deletedHosts[hostName], output.Step) + } + } + + // Apply activations and remove hosts not in this step + for hostName := range finalScores { + if activation, exists := output.Activations[hostName]; exists { + // Add activation to current score + finalScores[hostName] = finalScores[hostName] + activation + } else { + // Host not in this step - remove it + delete(finalScores, hostName) + } + } + } + res.Status.State = v1alpha1.SchedulingDecisionStateResolved res.Status.Error = "" - res.Status.Description = "...." + res.Status.FinalScores = finalScores + res.Status.DeletedHosts = deletedHosts + res.Status.Description = "Calculated final scores for hosts" } if err := r.Status().Update(ctx, &res); err != nil { diff --git a/decisions/internal/controller/controller_test.go b/decisions/internal/controller/controller_test.go index ca2833c2..946a7f20 100644 --- a/decisions/internal/controller/controller_test.go +++ b/decisions/internal/controller/controller_test.go @@ -83,11 +83,49 @@ func TestReconcile(t *testing.T) { if updatedResource.Status.Error != "" { t.Errorf("Expected empty error, got '%s'", updatedResource.Status.Error) } - if updatedResource.Status.Description != "...." { - t.Errorf("Expected description '....', got '%s'", updatedResource.Status.Description) + if updatedResource.Status.Description != "Calculated final scores for hosts" { + t.Errorf("Expected description 'Calculated final scores for hosts', got '%s'", updatedResource.Status.Description) } - t.Logf("Reconcile completed successfully: state=%s, description=%s", updatedResource.Status.State, updatedResource.Status.Description) + // Verify final scores calculation + // Expected: host1: 1.0 + 0.5 + 0.0 = 1.5, host2: removed by filter step + expectedFinalScores := map[string]float64{ + "host1": 1.5, + } + if len(updatedResource.Status.FinalScores) != len(expectedFinalScores) { + t.Errorf("Expected %d final scores, got %d", len(expectedFinalScores), len(updatedResource.Status.FinalScores)) + } + for host, expectedScore := range expectedFinalScores { + if actualScore, exists := updatedResource.Status.FinalScores[host]; !exists { + t.Errorf("Expected final score for host '%s', but it was not found", host) + } else if actualScore != expectedScore { + t.Errorf("Expected final score for host '%s' to be %f, got %f", host, expectedScore, actualScore) + } + } + + // Verify deleted hosts tracking + expectedDeletedHosts := map[string][]string{ + "host2": {"filter"}, // host2 was deleted by the filter step + } + if len(updatedResource.Status.DeletedHosts) != len(expectedDeletedHosts) { + t.Errorf("Expected %d deleted hosts, got %d", len(expectedDeletedHosts), len(updatedResource.Status.DeletedHosts)) + } + for host, expectedSteps := range expectedDeletedHosts { + if actualSteps, exists := updatedResource.Status.DeletedHosts[host]; !exists { + t.Errorf("Expected deleted host '%s', but it was not found", host) + } else if len(actualSteps) != len(expectedSteps) { + t.Errorf("Expected host '%s' to be deleted by %d steps, got %d", host, len(expectedSteps), len(actualSteps)) + } else { + for i, expectedStep := range expectedSteps { + if actualSteps[i] != expectedStep { + t.Errorf("Expected host '%s' step %d to be '%s', got '%s'", host, i, expectedStep, actualSteps[i]) + } + } + } + } + + t.Logf("Reconcile completed successfully: state=%s, finalScores=%v, deletedHosts=%v", + updatedResource.Status.State, updatedResource.Status.FinalScores, updatedResource.Status.DeletedHosts) } func TestReconcileEmptyInput(t *testing.T) { @@ -230,3 +268,239 @@ func TestReconcileHostMismatch(t *testing.T) { t.Logf("Reconcile completed with host mismatch error: state=%s, error=%s", updatedResource.Status.State, updatedResource.Status.Error) } + +func TestReconcileComplexScoring(t *testing.T) { + resource := &v1alpha1.SchedulingDecision{ + ObjectMeta: ctrl.ObjectMeta{ + Name: "test-decision-complex", + }, + Spec: v1alpha1.SchedulingDecisionSpec{ + Input: map[string]float64{ + "host1": 1.0, + "host2": 2.0, + "host3": 3.0, + "host4": 4.0, + }, + Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ + Name: "complex-pipeline", + Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "weigher1", + Activations: map[string]float64{ + "host1": 0.5, + "host2": 1.0, + "host3": -0.5, + "host4": 2.0, + }, + }, + { + Step: "filter1", + Activations: map[string]float64{ + "host1": 0.2, + "host3": 0.1, // host2 and host4 removed by this step + }, + }, + { + Step: "weigher2", + Activations: map[string]float64{ + "host1": -0.3, // host3 removed by this step + }, + }, + }, + }, + }, + } + + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add scheme: %v", err) + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(resource). + WithStatusSubresource(&v1alpha1.SchedulingDecision{}). + Build() + + req := ctrl.Request{ + NamespacedName: client.ObjectKey{ + Name: "test-decision-complex", + }, + } + + reconciler := &SchedulingDecisionReconciler{ + Conf: Config{}, + Client: fakeClient, + } + _, err := reconciler.Reconcile(t.Context(), req) + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Fetch the updated resource to check status + var updatedResource v1alpha1.SchedulingDecision + if err := fakeClient.Get(t.Context(), client.ObjectKey{Name: "test-decision-complex"}, &updatedResource); err != nil { + t.Fatalf("Failed to get updated resource: %v", err) + } + + // Verify success state + if updatedResource.Status.State != v1alpha1.SchedulingDecisionStateResolved { + t.Errorf("Expected state '%s', got '%s'", v1alpha1.SchedulingDecisionStateResolved, updatedResource.Status.State) + } + + // Verify final scores calculation + // Expected: host1: 1.0 + 0.5 + 0.2 + (-0.3) = 1.4 + // host2: removed by filter1, host3: removed by weigher2, host4: removed by filter1 + expectedFinalScores := map[string]float64{ + "host1": 1.4, + } + if len(updatedResource.Status.FinalScores) != len(expectedFinalScores) { + t.Errorf("Expected %d final scores, got %d", len(expectedFinalScores), len(updatedResource.Status.FinalScores)) + } + for host, expectedScore := range expectedFinalScores { + if actualScore, exists := updatedResource.Status.FinalScores[host]; !exists { + t.Errorf("Expected final score for host '%s', but it was not found", host) + } else if actualScore != expectedScore { + t.Errorf("Expected final score for host '%s' to be %f, got %f", host, expectedScore, actualScore) + } + } + + // Verify deleted hosts tracking + expectedDeletedHosts := map[string][]string{ + "host2": {"filter1"}, // host2 deleted by filter1 + "host4": {"filter1"}, // host4 deleted by filter1 + "host3": {"weigher2"}, // host3 deleted by weigher2 + } + if len(updatedResource.Status.DeletedHosts) != len(expectedDeletedHosts) { + t.Errorf("Expected %d deleted hosts, got %d", len(expectedDeletedHosts), len(updatedResource.Status.DeletedHosts)) + } + for host, expectedSteps := range expectedDeletedHosts { + if actualSteps, exists := updatedResource.Status.DeletedHosts[host]; !exists { + t.Errorf("Expected deleted host '%s', but it was not found", host) + } else if len(actualSteps) != len(expectedSteps) { + t.Errorf("Expected host '%s' to be deleted by %d steps, got %d", host, len(expectedSteps), len(actualSteps)) + } else { + for i, expectedStep := range expectedSteps { + if actualSteps[i] != expectedStep { + t.Errorf("Expected host '%s' step %d to be '%s', got '%s'", host, i, expectedStep, actualSteps[i]) + } + } + } + } + + t.Logf("Complex scoring completed: finalScores=%v, deletedHosts=%v", + updatedResource.Status.FinalScores, updatedResource.Status.DeletedHosts) +} + +func TestReconcileMultipleDeletionSteps(t *testing.T) { + resource := &v1alpha1.SchedulingDecision{ + ObjectMeta: ctrl.ObjectMeta{ + Name: "test-decision-multiple-deletions", + }, + Spec: v1alpha1.SchedulingDecisionSpec{ + Input: map[string]float64{ + "host1": 1.0, + "host2": 2.0, + "host3": 3.0, + }, + Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ + Name: "multiple-deletion-pipeline", + Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "weigher1", + Activations: map[string]float64{ + "host1": 0.5, + "host2": 1.0, + "host3": -0.5, + }, + }, + { + Step: "filter1", + Activations: map[string]float64{ + "host1": 0.2, + // host2 and host3 removed by this step + }, + }, + { + Step: "filter2", + Activations: map[string]float64{ + // host1 removed by this step + // host2 and host3 would be removed again, but they're already gone + }, + }, + }, + }, + }, + } + + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add scheme: %v", err) + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(resource). + WithStatusSubresource(&v1alpha1.SchedulingDecision{}). + Build() + + req := ctrl.Request{ + NamespacedName: client.ObjectKey{ + Name: "test-decision-multiple-deletions", + }, + } + + reconciler := &SchedulingDecisionReconciler{ + Conf: Config{}, + Client: fakeClient, + } + _, err := reconciler.Reconcile(t.Context(), req) + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Fetch the updated resource to check status + var updatedResource v1alpha1.SchedulingDecision + if err := fakeClient.Get(t.Context(), client.ObjectKey{Name: "test-decision-multiple-deletions"}, &updatedResource); err != nil { + t.Fatalf("Failed to get updated resource: %v", err) + } + + // Verify success state + if updatedResource.Status.State != v1alpha1.SchedulingDecisionStateResolved { + t.Errorf("Expected state '%s', got '%s'", v1alpha1.SchedulingDecisionStateResolved, updatedResource.Status.State) + } + + // Verify final scores calculation + // Expected: All hosts should be removed, no final scores + expectedFinalScores := map[string]float64{} + if len(updatedResource.Status.FinalScores) != len(expectedFinalScores) { + t.Errorf("Expected %d final scores, got %d", len(expectedFinalScores), len(updatedResource.Status.FinalScores)) + } + + // Verify deleted hosts tracking + // host2 and host3 deleted by filter1, host1 deleted by filter2 + expectedDeletedHosts := map[string][]string{ + "host2": {"filter1"}, // host2 deleted by filter1 + "host3": {"filter1"}, // host3 deleted by filter1 + "host1": {"filter2"}, // host1 deleted by filter2 + } + if len(updatedResource.Status.DeletedHosts) != len(expectedDeletedHosts) { + t.Errorf("Expected %d deleted hosts, got %d", len(expectedDeletedHosts), len(updatedResource.Status.DeletedHosts)) + } + for host, expectedSteps := range expectedDeletedHosts { + if actualSteps, exists := updatedResource.Status.DeletedHosts[host]; !exists { + t.Errorf("Expected deleted host '%s', but it was not found", host) + } else if len(actualSteps) != len(expectedSteps) { + t.Errorf("Expected host '%s' to be deleted by %d steps, got %d", host, len(expectedSteps), len(actualSteps)) + } else { + for i, expectedStep := range expectedSteps { + if actualSteps[i] != expectedStep { + t.Errorf("Expected host '%s' step %d to be '%s', got '%s'", host, i, expectedStep, actualSteps[i]) + } + } + } + } + + t.Logf("Multiple deletion test completed: finalScores=%v, deletedHosts=%v", + updatedResource.Status.FinalScores, updatedResource.Status.DeletedHosts) +} From 858197fe206e098a024639a9d92eaded7b43feec Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Thu, 25 Sep 2025 11:58:45 +0200 Subject: [PATCH 14/58] Update ClusterRoleBinding names for computereservation and schedulingdecision --- helm/library/cortex-core/templates/rbac.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/helm/library/cortex-core/templates/rbac.yaml b/helm/library/cortex-core/templates/rbac.yaml index 688a8d79..d30b5596 100644 --- a/helm/library/cortex-core/templates/rbac.yaml +++ b/helm/library/cortex-core/templates/rbac.yaml @@ -1,7 +1,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: - name: {{ .Release.Namespace }}-{{ include "cortex.fullname" . }} + name: {{ .Release.Namespace }}-{{ include "cortex.fullname" . }}-computereservation roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole @@ -15,7 +15,7 @@ subjects: apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: - name: {{ .Release.Namespace }}-{{ include "cortex.fullname" . }} + name: {{ .Release.Namespace }}-{{ include "cortex.fullname" . }}-schedulingdescision roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole From 8525025ca1502bc544f75a38239565233adbbdc0 Mon Sep 17 00:00:00 2001 From: mblos <> Date: Thu, 25 Sep 2025 11:59:40 +0200 Subject: [PATCH 15/58] Simple description added in crd status --- decisions/internal/controller/controller.go | 63 +++++- .../internal/controller/controller_test.go | 203 +++++++++++++++++- 2 files changed, 262 insertions(+), 4 deletions(-) diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index 99c1e800..5fca53e8 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -5,6 +5,8 @@ package controller import ( "context" + "fmt" + "sort" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" @@ -93,9 +95,13 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R res.Status.State = v1alpha1.SchedulingDecisionStateResolved res.Status.Error = "" - res.Status.FinalScores = finalScores + + // Sort finalScores by score (highest to lowest) and generate enhanced description + orderedScores, description := r.generateOrderedScoresAndDescription(finalScores, len(res.Spec.Input)) + + res.Status.FinalScores = orderedScores res.Status.DeletedHosts = deletedHosts - res.Status.Description = "Calculated final scores for hosts" + res.Status.Description = description } if err := r.Status().Update(ctx, &res); err != nil { @@ -105,6 +111,59 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{}, nil // No need to requeue. } +// generateOrderedScoresAndDescription sorts final scores by value (highest to lowest) +// and generates a brief description with highest host, certainty, and host count +func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(finalScores map[string]float64, totalInputHosts int) (map[string]float64, string) { + if len(finalScores) == 0 { + return finalScores, fmt.Sprintf("No hosts remaining after filtering, %d hosts evaluated", totalInputHosts) + } + + // Create a slice of host-score pairs for sorting + type hostScore struct { + host string + score float64 + } + + var sortedHosts []hostScore + for host, score := range finalScores { + sortedHosts = append(sortedHosts, hostScore{host: host, score: score}) + } + + // Sort by score (highest to lowest) + sort.Slice(sortedHosts, func(i, j int) bool { + return sortedHosts[i].score > sortedHosts[j].score + }) + + // Create ordered map (Go maps maintain insertion order as of Go 1.8+) + orderedScores := make(map[string]float64) + for _, hs := range sortedHosts { + orderedScores[hs.host] = hs.score + } + + // Generate description + var description string + if len(sortedHosts) == 1 { + description = fmt.Sprintf("Selected: %s (score: %.2f), certainty: perfect, %d hosts evaluated", + sortedHosts[0].host, sortedHosts[0].score, totalInputHosts) + } else { + // Calculate certainty based on gap between 1st and 2nd place + gap := sortedHosts[0].score - sortedHosts[1].score + var certainty string + if gap >= 0.5 { + certainty = "high" + } else if gap >= 0.2 { + certainty = "medium" + } else { + certainty = "low" + } + + description = fmt.Sprintf("Selected: %s (score: %.2f), certainty: %s (gap: %.2f), %d hosts evaluated", + sortedHosts[0].host, sortedHosts[0].score, certainty, gap, totalInputHosts) + } + + return orderedScores, description +} + // SetupWithManager sets up the controller with the Manager. func (r *SchedulingDecisionReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). diff --git a/decisions/internal/controller/controller_test.go b/decisions/internal/controller/controller_test.go index 946a7f20..b330fa57 100644 --- a/decisions/internal/controller/controller_test.go +++ b/decisions/internal/controller/controller_test.go @@ -83,8 +83,9 @@ func TestReconcile(t *testing.T) { if updatedResource.Status.Error != "" { t.Errorf("Expected empty error, got '%s'", updatedResource.Status.Error) } - if updatedResource.Status.Description != "Calculated final scores for hosts" { - t.Errorf("Expected description 'Calculated final scores for hosts', got '%s'", updatedResource.Status.Description) + expectedDescription := "Selected: host1 (score: 1.50), certainty: perfect, 2 hosts evaluated" + if updatedResource.Status.Description != expectedDescription { + t.Errorf("Expected description '%s', got '%s'", expectedDescription, updatedResource.Status.Description) } // Verify final scores calculation @@ -504,3 +505,201 @@ func TestReconcileMultipleDeletionSteps(t *testing.T) { t.Logf("Multiple deletion test completed: finalScores=%v, deletedHosts=%v", updatedResource.Status.FinalScores, updatedResource.Status.DeletedHosts) } + +func TestReconcileCertaintyLevels(t *testing.T) { + tests := []struct { + name string + input map[string]float64 + activations map[string]float64 + expectedWinner string + expectedCertainty string + }{ + { + name: "high-certainty", + input: map[string]float64{ + "host1": 1.0, + "host2": 1.0, + }, + activations: map[string]float64{ + "host1": 1.0, // host1: 2.0, host2: 1.0, gap = 1.0 (high) + "host2": 0.0, + }, + expectedWinner: "host1", + expectedCertainty: "high", + }, + { + name: "medium-certainty", + input: map[string]float64{ + "host1": 1.0, + "host2": 1.0, + }, + activations: map[string]float64{ + "host1": 0.3, // host1: 1.3, host2: 1.0, gap = 0.3 (medium) + "host2": 0.0, + }, + expectedWinner: "host1", + expectedCertainty: "medium", + }, + { + name: "low-certainty", + input: map[string]float64{ + "host1": 1.0, + "host2": 1.0, + }, + activations: map[string]float64{ + "host1": 0.1, // host1: 1.1, host2: 1.0, gap = 0.1 (low) + "host2": 0.0, + }, + expectedWinner: "host1", + expectedCertainty: "low", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + resource := &v1alpha1.SchedulingDecision{ + ObjectMeta: ctrl.ObjectMeta{ + Name: "test-certainty-" + tt.name, + }, + Spec: v1alpha1.SchedulingDecisionSpec{ + Input: tt.input, + Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ + Name: "certainty-test-pipeline", + Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "weigher", + Activations: tt.activations, + }, + }, + }, + }, + } + + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add scheme: %v", err) + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(resource). + WithStatusSubresource(&v1alpha1.SchedulingDecision{}). + Build() + + req := ctrl.Request{ + NamespacedName: client.ObjectKey{ + Name: "test-certainty-" + tt.name, + }, + } + + reconciler := &SchedulingDecisionReconciler{ + Conf: Config{}, + Client: fakeClient, + } + _, err := reconciler.Reconcile(t.Context(), req) + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Fetch the updated resource to check status + var updatedResource v1alpha1.SchedulingDecision + if err := fakeClient.Get(t.Context(), client.ObjectKey{Name: "test-certainty-" + tt.name}, &updatedResource); err != nil { + t.Fatalf("Failed to get updated resource: %v", err) + } + + // Verify the description contains the expected winner and certainty + description := updatedResource.Status.Description + if !contains(description, "Selected: "+tt.expectedWinner) { + t.Errorf("Expected description to contain 'Selected: %s', got '%s'", tt.expectedWinner, description) + } + if !contains(description, "certainty: "+tt.expectedCertainty) { + t.Errorf("Expected description to contain 'certainty: %s', got '%s'", tt.expectedCertainty, description) + } + + t.Logf("Certainty test %s completed: %s", tt.name, description) + }) + } +} + +func TestReconcileNoHostsRemaining(t *testing.T) { + resource := &v1alpha1.SchedulingDecision{ + ObjectMeta: ctrl.ObjectMeta{ + Name: "test-no-hosts-remaining", + }, + Spec: v1alpha1.SchedulingDecisionSpec{ + Input: map[string]float64{ + "host1": 1.0, + "host2": 2.0, + }, + Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ + Name: "filter-all-pipeline", + Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "filter-all", + Activations: map[string]float64{ + // No hosts in activations - all will be filtered out + }, + }, + }, + }, + }, + } + + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add scheme: %v", err) + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(resource). + WithStatusSubresource(&v1alpha1.SchedulingDecision{}). + Build() + + req := ctrl.Request{ + NamespacedName: client.ObjectKey{ + Name: "test-no-hosts-remaining", + }, + } + + reconciler := &SchedulingDecisionReconciler{ + Conf: Config{}, + Client: fakeClient, + } + _, err := reconciler.Reconcile(t.Context(), req) + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Fetch the updated resource to check status + var updatedResource v1alpha1.SchedulingDecision + if err := fakeClient.Get(t.Context(), client.ObjectKey{Name: "test-no-hosts-remaining"}, &updatedResource); err != nil { + t.Fatalf("Failed to get updated resource: %v", err) + } + + // Verify success state but no final scores + if updatedResource.Status.State != v1alpha1.SchedulingDecisionStateResolved { + t.Errorf("Expected state '%s', got '%s'", v1alpha1.SchedulingDecisionStateResolved, updatedResource.Status.State) + } + + if len(updatedResource.Status.FinalScores) != 0 { + t.Errorf("Expected 0 final scores, got %d", len(updatedResource.Status.FinalScores)) + } + + expectedDescription := "No hosts remaining after filtering, 2 hosts evaluated" + if updatedResource.Status.Description != expectedDescription { + t.Errorf("Expected description '%s', got '%s'", expectedDescription, updatedResource.Status.Description) + } + + t.Logf("No hosts remaining test completed: %s", updatedResource.Status.Description) +} + +// Helper function to check if a string contains a substring +func contains(s, substr string) bool { + for i := 0; i <= len(s)-len(substr); i++ { + if s[i:i+len(substr)] == substr { + return true + } + } + return false +} From 109b44851271c58dfcfcabe1698b7895df1ed384 Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Thu, 25 Sep 2025 12:00:49 +0200 Subject: [PATCH 16/58] Integrate with scheduler --- go.mod | 2 + internal/scheduler/cinder/api/messages.go | 5 ++ internal/scheduler/manila/api/messages.go | 5 ++ internal/scheduler/nova/api/messages.go | 3 + internal/scheduler/pipeline.go | 69 +++++++++++++++++++++++ internal/scheduler/pipeline_test.go | 6 ++ internal/scheduler/request.go | 4 ++ internal/scheduler/request_test.go | 1 + 8 files changed, 95 insertions(+) diff --git a/go.mod b/go.mod index d7eecf1d..0bed916a 100644 --- a/go.mod +++ b/go.mod @@ -4,11 +4,13 @@ go 1.25.0 replace ( github.com/cobaltcore-dev/cortex/commands => ./commands + github.com/cobaltcore-dev/cortex/decisions/api => ./decisions/api github.com/cobaltcore-dev/cortex/reservations/api => ./reservations/api github.com/cobaltcore-dev/cortex/testlib => ./testlib ) require ( + github.com/cobaltcore-dev/cortex/decisions/api v0.0.0-00010101000000-000000000000 github.com/cobaltcore-dev/cortex/reservations/api v0.0.0-00010101000000-000000000000 github.com/dlmiddlecote/sqlstats v1.0.2 github.com/eclipse/paho.mqtt.golang v1.5.1 diff --git a/internal/scheduler/cinder/api/messages.go b/internal/scheduler/cinder/api/messages.go index a6d1414a..3083e3f6 100644 --- a/internal/scheduler/cinder/api/messages.go +++ b/internal/scheduler/cinder/api/messages.go @@ -60,6 +60,11 @@ func (r ExternalSchedulerRequest) WithPipeline(pipeline string) scheduler.Pipeli r.Pipeline = pipeline return r } +func (r ExternalSchedulerRequest) GetResourceID() string { + // TODO: We don't properly unwrap the spec yet, + // so we don't have access to the actual resource ID. + return "" +} // Response generated by cortex for the Cinder scheduler. // Cortex returns an ordered list of hosts that the share should be scheduled on. diff --git a/internal/scheduler/manila/api/messages.go b/internal/scheduler/manila/api/messages.go index 5a5d2353..e33c576a 100644 --- a/internal/scheduler/manila/api/messages.go +++ b/internal/scheduler/manila/api/messages.go @@ -60,6 +60,11 @@ func (r ExternalSchedulerRequest) WithPipeline(pipeline string) scheduler.Pipeli r.Pipeline = pipeline return r } +func (r ExternalSchedulerRequest) GetResourceID() string { + // TODO: We don't properly unwrap the spec yet, + // so we don't have access to the actual resource ID. + return "" +} // Response generated by cortex for the Manila scheduler. // Cortex returns an ordered list of hosts that the share should be scheduled on. diff --git a/internal/scheduler/nova/api/messages.go b/internal/scheduler/nova/api/messages.go index 2b8afede..552c751a 100644 --- a/internal/scheduler/nova/api/messages.go +++ b/internal/scheduler/nova/api/messages.go @@ -78,6 +78,9 @@ func (r ExternalSchedulerRequest) WithPipeline(pipeline string) scheduler.Pipeli r.Pipeline = pipeline return r } +func (r ExternalSchedulerRequest) GetResourceID() string { + return r.Spec.Data.InstanceUUID +} // Response generated by cortex for the Nova scheduler. // Cortex returns an ordered list of hosts that the VM should be scheduled on. diff --git a/internal/scheduler/pipeline.go b/internal/scheduler/pipeline.go index 1b0ac0cd..cffee1e2 100644 --- a/internal/scheduler/pipeline.go +++ b/internal/scheduler/pipeline.go @@ -4,6 +4,7 @@ package scheduler import ( + "context" "errors" "log/slog" "maps" @@ -16,6 +17,10 @@ import ( "github.com/cobaltcore-dev/cortex/internal/conf" "github.com/cobaltcore-dev/cortex/internal/db" "github.com/cobaltcore-dev/cortex/internal/mqtt" + + "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" ) type Pipeline[RequestType PipelineRequest] interface { @@ -43,6 +48,9 @@ type pipeline[RequestType PipelineRequest] struct { mqttClient mqtt.Client // MQTT topic to publish telemetry data on when the pipeline is finished. mqttTopic string + + // Kubernetes client to create decision resources. + Client client.Client } type StepWrapper[RequestType PipelineRequest] func(Step[RequestType], conf.SchedulerStepConfig) Step[RequestType] @@ -96,6 +104,16 @@ func NewPipeline[RequestType PipelineRequest]( ) } + var kubernetesClient client.Client + if scheme, err := v1alpha1.SchemeBuilder.Build(); err == nil { + if clientConfig, err := ctrl.GetConfig(); err == nil { + if cl, err := client.New(clientConfig, client.Options{Scheme: scheme}); err == nil { + // Successfully created a client, use it. + kubernetesClient = cl + } + } + } + return &pipeline[RequestType]{ // All steps can be run in parallel. executionOrder: [][]Step[RequestType]{steps}, @@ -103,6 +121,7 @@ func NewPipeline[RequestType PipelineRequest]( monitor: monitor, mqttClient: mqttClient, mqttTopic: mqttTopic, + Client: kubernetesClient, } } @@ -232,5 +251,55 @@ func (p *pipeline[RequestType]) Run(request RequestType) ([]string, error) { Out: outWeights, }) + // Create a new scheduling decision object for this object. + go func() { + if p.Client == nil { + return + } + var existing v1alpha1.SchedulingDecision + if err := p.Client.Get( + context.Background(), + client.ObjectKey{Name: request.GetResourceID()}, + &existing, + ); err == nil { + // Decision already exists, do not create a new one. + // TODO: Add new decisions for the same vm id if this is a migration. + traceLog.Info("scheduler: decision already exists, not creating a new one", "resourceID", request.GetResourceID()) + return + } + outputs := []v1alpha1.SchedulingDecisionPipelineOutputSpec{} + for _, stepKey := range p.applicationOrder { + weights, ok := stepWeights[stepKey] + if !ok { + // This is ok, since steps can be skipped. + continue + } + activations := make(map[string]float64, len(weights)) + for k, v := range weights { + activations[k] = p.ActivationFunction.Norm(v) + } + outputs = append(outputs, v1alpha1.SchedulingDecisionPipelineOutputSpec{ + Step: stepKey, + Activations: activations, + }) + } + decision := &v1alpha1.SchedulingDecision{ + ObjectMeta: ctrl.ObjectMeta{Name: request.GetResourceID()}, + Spec: v1alpha1.SchedulingDecisionSpec{ + Input: inWeights, + Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ + Name: request.GetPipeline(), + Outputs: outputs, + }, + }, + // Status will be filled in by the controller. + } + if err := p.Client.Create(context.Background(), decision); err != nil { + traceLog.Error("scheduler: failed to create decision", "error", err) + return + } + traceLog.Info("scheduler: created decision", "resourceID", request.GetResourceID()) + }() + return subjects, nil } diff --git a/internal/scheduler/pipeline_test.go b/internal/scheduler/pipeline_test.go index b84a0c4c..734b8806 100644 --- a/internal/scheduler/pipeline_test.go +++ b/internal/scheduler/pipeline_test.go @@ -11,6 +11,7 @@ import ( "github.com/cobaltcore-dev/cortex/internal/conf" "github.com/cobaltcore-dev/cortex/internal/db" "github.com/cobaltcore-dev/cortex/testlib/mqtt" + "k8s.io/client-go/rest" ) type mockPipelineStep struct { @@ -227,6 +228,11 @@ func TestNewPipeline(t *testing.T) { database := db.DB{} // Mock or initialize as needed monitor := PipelineMonitor{} // Replace with an actual mock implementation if available mqttClient := &mqtt.MockClient{} + + // Set up kubekonfig for GetConfigOrDie + restConfig := &rest.Config{} + _ = restConfig + supportedSteps := map[string]func() Step[mockPipelineRequest]{ "mock_pipeline_step": func() Step[mockPipelineRequest] { return &mockPipelineStep{ diff --git a/internal/scheduler/request.go b/internal/scheduler/request.go index c3343cce..95dc09b8 100644 --- a/internal/scheduler/request.go +++ b/internal/scheduler/request.go @@ -19,4 +19,8 @@ type PipelineRequest interface { GetPipeline() string // Return a copy of the request with the pipeline name set. WithPipeline(pipeline string) PipelineRequest + + // Get the identifier of the resource that should be placed, + // e.g. the virtual machine id for virtual machines. + GetResourceID() string } diff --git a/internal/scheduler/request_test.go b/internal/scheduler/request_test.go index a4308854..5bfb332f 100644 --- a/internal/scheduler/request_test.go +++ b/internal/scheduler/request_test.go @@ -22,3 +22,4 @@ func (m mockPipelineRequest) WithPipeline(pipeline string) PipelineRequest { m.Pipeline = pipeline return m } +func (m mockPipelineRequest) GetResourceID() string { return "mock-resource-id" } From 169ee9922c8dfd523459a9af151fdd8192f4b882 Mon Sep 17 00:00:00 2001 From: mblos <> Date: Thu, 25 Sep 2025 12:14:57 +0200 Subject: [PATCH 17/58] Compare Cortex decision with input decision --- decisions/internal/controller/controller.go | 63 ++++++++- .../internal/controller/controller_test.go | 123 +++++++++++++++++- 2 files changed, 181 insertions(+), 5 deletions(-) diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index 5fca53e8..fde10065 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -97,7 +97,7 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R res.Status.Error = "" // Sort finalScores by score (highest to lowest) and generate enhanced description - orderedScores, description := r.generateOrderedScoresAndDescription(finalScores, len(res.Spec.Input)) + orderedScores, description := r.generateOrderedScoresAndDescription(finalScores, res.Spec.Input) res.Status.FinalScores = orderedScores res.Status.DeletedHosts = deletedHosts @@ -112,8 +112,9 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R } // generateOrderedScoresAndDescription sorts final scores by value (highest to lowest) -// and generates a brief description with highest host, certainty, and host count -func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(finalScores map[string]float64, totalInputHosts int) (map[string]float64, string) { +// and generates a brief description with highest host, certainty, host count, and input comparison +func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(finalScores map[string]float64, inputScores map[string]float64) (map[string]float64, string) { + totalInputHosts := len(inputScores) if len(finalScores) == 0 { return finalScores, fmt.Sprintf("No hosts remaining after filtering, %d hosts evaluated", totalInputHosts) } @@ -140,7 +141,30 @@ func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(final orderedScores[hs.host] = hs.score } - // Generate description + // Sort input scores to determine input-based ranking + var sortedInputHosts []hostScore + for host, score := range inputScores { + sortedInputHosts = append(sortedInputHosts, hostScore{host: host, score: score}) + } + sort.Slice(sortedInputHosts, func(i, j int) bool { + return sortedInputHosts[i].score > sortedInputHosts[j].score + }) + + // Find positions and generate comparison + finalWinner := sortedHosts[0].host + inputWinner := sortedInputHosts[0].host + finalWinnerInputScore := inputScores[finalWinner] + + // Find final winner's position in input ranking + finalWinnerInputPosition := -1 + for i, hs := range sortedInputHosts { + if hs.host == finalWinner { + finalWinnerInputPosition = i + 1 // 1-based position + break + } + } + + // Generate main description var description string if len(sortedHosts) == 1 { description = fmt.Sprintf("Selected: %s (score: %.2f), certainty: perfect, %d hosts evaluated", @@ -161,6 +185,37 @@ func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(final sortedHosts[0].host, sortedHosts[0].score, certainty, gap, totalInputHosts) } + // Add input vs. final comparison + var comparison string + if inputWinner == finalWinner { + // Input choice confirmed + comparison = fmt.Sprintf("\nInput choice confirmed: %s (%.2f→%.2f, remained #1).", + finalWinner, finalWinnerInputScore, sortedHosts[0].score) + } else { + // Input winner different from final winner + inputWinnerScore := sortedInputHosts[0].score + + // Check if input winner was filtered out + _, inputWinnerSurvived := finalScores[inputWinner] + if !inputWinnerSurvived { + comparison = fmt.Sprintf("\nInput favored %s (score: %.2f, now filtered), final winner was #%d in input (%.2f→%.2f).", + inputWinner, inputWinnerScore, finalWinnerInputPosition, finalWinnerInputScore, sortedHosts[0].score) + } else { + // Find input winner's position in final ranking + inputWinnerFinalPosition := -1 + for i, hs := range sortedHosts { + if hs.host == inputWinner { + inputWinnerFinalPosition = i + 1 // 1-based position + break + } + } + comparison = fmt.Sprintf("\nInput favored %s (score: %.2f, now #%d with %.2f), final winner was #%d in input (%.2f→%.2f).", + inputWinner, inputWinnerScore, inputWinnerFinalPosition, finalScores[inputWinner], + finalWinnerInputPosition, finalWinnerInputScore, sortedHosts[0].score) + } + } + + description += comparison return orderedScores, description } diff --git a/decisions/internal/controller/controller_test.go b/decisions/internal/controller/controller_test.go index b330fa57..fb153dd8 100644 --- a/decisions/internal/controller/controller_test.go +++ b/decisions/internal/controller/controller_test.go @@ -83,7 +83,7 @@ func TestReconcile(t *testing.T) { if updatedResource.Status.Error != "" { t.Errorf("Expected empty error, got '%s'", updatedResource.Status.Error) } - expectedDescription := "Selected: host1 (score: 1.50), certainty: perfect, 2 hosts evaluated" + expectedDescription := "Selected: host1 (score: 1.50), certainty: perfect, 2 hosts evaluated\nInput favored host2 (score: 2.00, now filtered), final winner was #2 in input (1.00→1.50)." if updatedResource.Status.Description != expectedDescription { t.Errorf("Expected description '%s', got '%s'", expectedDescription, updatedResource.Status.Description) } @@ -694,6 +694,127 @@ func TestReconcileNoHostsRemaining(t *testing.T) { t.Logf("No hosts remaining test completed: %s", updatedResource.Status.Description) } +func TestReconcileInputVsFinalComparison(t *testing.T) { + tests := []struct { + name string + input map[string]float64 + activations []map[string]float64 + expectedDescContains []string + }{ + { + name: "input-choice-confirmed", + input: map[string]float64{ + "host1": 3.0, // highest in input + "host2": 2.0, + "host3": 1.0, + }, + activations: []map[string]float64{ + {"host1": 0.5, "host2": 0.3, "host3": 0.1}, // host1 stays winner + }, + expectedDescContains: []string{ + "Selected: host1", + "Input choice confirmed: host1 (3.00→3.50, remained #1)", + }, + }, + { + name: "input-winner-filtered", + input: map[string]float64{ + "host1": 1.0, + "host2": 3.0, // highest in input + "host3": 2.0, + }, + activations: []map[string]float64{ + {"host1": 0.5, "host3": 0.3}, // host2 filtered out, host3 becomes winner + }, + expectedDescContains: []string{ + "Selected: host3", + "Input favored host2 (score: 3.00, now filtered)", + "final winner was #2 in input (2.00→2.30)", + }, + }, + { + name: "input-winner-demoted", + input: map[string]float64{ + "host1": 1.0, + "host2": 3.0, // highest in input + "host3": 2.0, + }, + activations: []map[string]float64{ + {"host1": 2.5, "host2": -0.5, "host3": 0.8}, // host1 becomes winner, host2 demoted to #3 + }, + expectedDescContains: []string{ + "Selected: host1", + "Input favored host2 (score: 3.00, now #3 with 2.50)", + "final winner was #3 in input (1.00→3.50)", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + resource := &v1alpha1.SchedulingDecision{ + ObjectMeta: ctrl.ObjectMeta{ + Name: "test-input-vs-final-" + tt.name, + }, + Spec: v1alpha1.SchedulingDecisionSpec{ + Input: tt.input, + Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ + Name: "input-vs-final-pipeline", + Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "weigher", + Activations: tt.activations[0], + }, + }, + }, + }, + } + + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add scheme: %v", err) + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(resource). + WithStatusSubresource(&v1alpha1.SchedulingDecision{}). + Build() + + req := ctrl.Request{ + NamespacedName: client.ObjectKey{ + Name: "test-input-vs-final-" + tt.name, + }, + } + + reconciler := &SchedulingDecisionReconciler{ + Conf: Config{}, + Client: fakeClient, + } + _, err := reconciler.Reconcile(t.Context(), req) + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Fetch the updated resource to check status + var updatedResource v1alpha1.SchedulingDecision + if err := fakeClient.Get(t.Context(), client.ObjectKey{Name: "test-input-vs-final-" + tt.name}, &updatedResource); err != nil { + t.Fatalf("Failed to get updated resource: %v", err) + } + + // Verify the description contains expected elements + description := updatedResource.Status.Description + for _, expectedContent := range tt.expectedDescContains { + if !contains(description, expectedContent) { + t.Errorf("Expected description to contain '%s', got '%s'", expectedContent, description) + } + } + + t.Logf("Input vs Final test %s completed: %s", tt.name, description) + }) + } +} + // Helper function to check if a string contains a substring func contains(s, substr string) bool { for i := 0; i <= len(s)-len(substr); i++ { From 9e244544a0bb056f420214da4b625059613b603b Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Thu, 25 Sep 2025 12:19:16 +0200 Subject: [PATCH 18/58] Role fix + add operator to tiltfile + e2e test fix --- Tiltfile | 11 +++ commands/checks/nova/checks.go | 1 + .../decisions.cortex_computedecisions.yaml | 86 ------------------- .../rbac/computereservation_admin_role.yaml | 28 ------ .../rbac/computereservation_editor_role.yaml | 34 -------- .../rbac/computereservation_viewer_role.yaml | 30 ------- helm/library/cortex-core/templates/rbac.yaml | 2 +- 7 files changed, 13 insertions(+), 179 deletions(-) delete mode 100644 decisions/dist/chart/templates/crd/decisions.cortex_computedecisions.yaml delete mode 100644 decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml delete mode 100644 decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml delete mode 100644 decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml diff --git a/Tiltfile b/Tiltfile index da22cf74..e18c068f 100644 --- a/Tiltfile +++ b/Tiltfile @@ -37,6 +37,16 @@ local('sh helm/sync.sh reservations/dist/chart') k8s_yaml(helm('reservations/dist/chart', name='cortex-reservations', values=[tilt_values])) k8s_resource('reservations-controller-manager', labels=['Reservations']) +########### Decisions Operator & CRDs +docker_build('ghcr.io/cobaltcore-dev/cortex-decisions-operator', '.', + dockerfile='Dockerfile.kubebuilder', + build_args={'GO_MOD_PATH': 'decisions'}, + only=kubebuilder_binary_files('decisions') + ['internal/', 'go.mod', 'go.sum'], +) +local('sh helm/sync.sh decisions/dist/chart') +k8s_yaml(helm('decisions/dist/chart', name='cortex-decisions', values=[tilt_values])) +k8s_resource('decisions-controller-manager', labels=['Decisions']) + ########### Dev Dependencies local('sh helm/sync.sh helm/dev/cortex-prometheus-operator') k8s_yaml(helm('./helm/dev/cortex-prometheus-operator', name='cortex-prometheus-operator')) # Operator @@ -76,6 +86,7 @@ k8s_resource('cortex-plutono', port_forwards=[ docker_build('ghcr.io/cobaltcore-dev/cortex', '.', only=[ 'internal/', 'commands/', 'main.go', 'go.mod', 'go.sum', 'Makefile', 'reservations/api/', # API module of the reservations operator needed for the scheduler. + 'decisions/api/', # API module of the decisions operator needed for the scheduler. ]) docker_build('ghcr.io/cobaltcore-dev/cortex-postgres', 'postgres') diff --git a/commands/checks/nova/checks.go b/commands/checks/nova/checks.go index a0d8916f..11027589 100644 --- a/commands/checks/nova/checks.go +++ b/commands/checks/nova/checks.go @@ -271,6 +271,7 @@ func randomRequest(dc datacenter, seed int) api.ExternalSchedulerRequest { slog.Info("using flavor extra specs", "extraSpecs", extraSpecs) request := api.ExternalSchedulerRequest{ Spec: api.NovaObject[api.NovaSpec]{Data: api.NovaSpec{ + InstanceUUID: "cortex-e2e-tests", AvailabilityZone: az, ProjectID: project.ID, Flavor: api.NovaObject[api.NovaFlavor]{Data: api.NovaFlavor{ diff --git a/decisions/dist/chart/templates/crd/decisions.cortex_computedecisions.yaml b/decisions/dist/chart/templates/crd/decisions.cortex_computedecisions.yaml deleted file mode 100644 index f7104190..00000000 --- a/decisions/dist/chart/templates/crd/decisions.cortex_computedecisions.yaml +++ /dev/null @@ -1,86 +0,0 @@ -{{- if .Values.crd.enable }} ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - labels: - {{- include "chart.labels" . | nindent 4 }} - annotations: - {{- if .Values.crd.keep }} - "helm.sh/resource-policy": keep - {{- end }} - controller-gen.kubebuilder.io/version: v0.17.2 - name: schedulingdecisions.decisions.cortex -spec: - group: decisions.cortex - names: - kind: SchedulingDecision - listKind: SchedulingDecisionList - plural: schedulingdecisions - shortNames: - - cdec - singular: schedulingdecision - scope: Cluster - versions: - - name: v1alpha1 - schema: - openAPIV3Schema: - description: SchedulingDecision is the Schema for the schedulingdecisions API - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: spec defines the desired state of SchedulingDecision - properties: - pipeline: - properties: - name: - type: string - outputs: - items: - properties: - step: - type: string - weights: - additionalProperties: - type: number - type: object - required: - - step - type: object - type: array - required: - - name - type: object - required: - - pipeline - type: object - status: - description: status defines the observed state of SchedulingDecision - properties: - description: - type: string - type: object - required: - - spec - type: object - served: true - storage: true - subresources: - status: {} -{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml b/decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml deleted file mode 100644 index 6db64811..00000000 --- a/decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml +++ /dev/null @@ -1,28 +0,0 @@ -{{- if .Values.rbac.enable }} -# This rule is not used by the project decisions itself. -# It is provided to allow the cluster admin to help manage permissions for users. -# -# Grants full permissions ('*') over decisions.cortex. -# This role is intended for users authorized to modify roles and bindings within the cluster, -# enabling them to delegate specific permissions to other users or groups as needed. - -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - labels: - {{- include "chart.labels" . | nindent 4 }} - name: schedulingdecision-admin-role -rules: -- apiGroups: - - decisions.cortex - resources: - - schedulingdecisions - verbs: - - '*' -- apiGroups: - - decisions.cortex - resources: - - schedulingdecisions/status - verbs: - - get -{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml b/decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml deleted file mode 100644 index 7a82611c..00000000 --- a/decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml +++ /dev/null @@ -1,34 +0,0 @@ -{{- if .Values.rbac.enable }} -# This rule is not used by the project decisions itself. -# It is provided to allow the cluster admin to help manage permissions for users. -# -# Grants permissions to create, update, and delete resources within the decisions.cortex. -# This role is intended for users who need to manage these resources -# but should not control RBAC or manage permissions for others. - -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - labels: - {{- include "chart.labels" . | nindent 4 }} - name: schedulingdecision-editor-role -rules: -- apiGroups: - - decisions.cortex - resources: - - schedulingdecisions - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - decisions.cortex - resources: - - schedulingdecisions/status - verbs: - - get -{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml b/decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml deleted file mode 100644 index 4375bd65..00000000 --- a/decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml +++ /dev/null @@ -1,30 +0,0 @@ -{{- if .Values.rbac.enable }} -# This rule is not used by the project decisions itself. -# It is provided to allow the cluster admin to help manage permissions for users. -# -# Grants read-only access to decisions.cortex resources. -# This role is intended for users who need visibility into these resources -# without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing. - -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - labels: - {{- include "chart.labels" . | nindent 4 }} - name: schedulingdecision-viewer-role -rules: -- apiGroups: - - decisions.cortex - resources: - - schedulingdecisions - verbs: - - get - - list - - watch -- apiGroups: - - decisions.cortex - resources: - - schedulingdecisions/status - verbs: - - get -{{- end -}} diff --git a/helm/library/cortex-core/templates/rbac.yaml b/helm/library/cortex-core/templates/rbac.yaml index d30b5596..baca03ab 100644 --- a/helm/library/cortex-core/templates/rbac.yaml +++ b/helm/library/cortex-core/templates/rbac.yaml @@ -20,7 +20,7 @@ roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole # From the decisions operator. - name: schedulingdecision-viewer-role + name: schedulingdecision-editor-role subjects: - kind: ServiceAccount name: {{ .Release.Namespace }}-{{ include "cortex.fullname" . }} From dd075a3f5f3f656a8525fc20d8139b6919522b27 Mon Sep 17 00:00:00 2001 From: mblos <> Date: Thu, 25 Sep 2025 12:29:37 +0200 Subject: [PATCH 19/58] Add importance of steps --- decisions/internal/controller/controller.go | 158 ++++++++++++++---- .../internal/controller/controller_test.go | 149 ++++++++++++++++- 2 files changed, 273 insertions(+), 34 deletions(-) diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index fde10065..494071f0 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -62,42 +62,17 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R } } - // Calculate final scores for all hosts - finalScores := make(map[string]float64) - deletedHosts := make(map[string][]string) + // Calculate final scores with full pipeline + finalScores, deletedHosts := r.calculateScores(res.Spec.Input, res.Spec.Pipeline.Outputs) - // Start with input values as initial scores - for hostName, inputValue := range res.Spec.Input { - finalScores[hostName] = inputValue - } - - // Process each pipeline step sequentially - for _, output := range res.Spec.Pipeline.Outputs { - // Check which hosts will be deleted in this step - for hostName := range finalScores { - if _, exists := output.Activations[hostName]; !exists { - // Host not in this step's activations - will be deleted - deletedHosts[hostName] = append(deletedHosts[hostName], output.Step) - } - } - - // Apply activations and remove hosts not in this step - for hostName := range finalScores { - if activation, exists := output.Activations[hostName]; exists { - // Add activation to current score - finalScores[hostName] = finalScores[hostName] + activation - } else { - // Host not in this step - remove it - delete(finalScores, hostName) - } - } - } + // Find minimal critical path + criticalSteps, criticalStepCount := r.findCriticalSteps(res.Spec.Input, res.Spec.Pipeline.Outputs, finalScores) res.Status.State = v1alpha1.SchedulingDecisionStateResolved res.Status.Error = "" // Sort finalScores by score (highest to lowest) and generate enhanced description - orderedScores, description := r.generateOrderedScoresAndDescription(finalScores, res.Spec.Input) + orderedScores, description := r.generateOrderedScoresAndDescription(finalScores, res.Spec.Input, criticalSteps, criticalStepCount, len(res.Spec.Pipeline.Outputs)) res.Status.FinalScores = orderedScores res.Status.DeletedHosts = deletedHosts @@ -111,9 +86,99 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{}, nil // No need to requeue. } +// calculateScores processes pipeline outputs and returns final scores and deleted hosts +func (r *SchedulingDecisionReconciler) calculateScores(input map[string]float64, outputs []v1alpha1.SchedulingDecisionPipelineOutputSpec) (map[string]float64, map[string][]string) { + finalScores := make(map[string]float64) + deletedHosts := make(map[string][]string) + + // Start with input values as initial scores + for hostName, inputValue := range input { + finalScores[hostName] = inputValue + } + + // Process each pipeline step sequentially + for _, output := range outputs { + // Check which hosts will be deleted in this step + for hostName := range finalScores { + if _, exists := output.Activations[hostName]; !exists { + // Host not in this step's activations - will be deleted + deletedHosts[hostName] = append(deletedHosts[hostName], output.Step) + } + } + + // Apply activations and remove hosts not in this step + for hostName := range finalScores { + if activation, exists := output.Activations[hostName]; exists { + // Add activation to current score + finalScores[hostName] = finalScores[hostName] + activation + } else { + // Host not in this step - remove it + delete(finalScores, hostName) + } + } + } + + return finalScores, deletedHosts +} + +// findCriticalSteps identifies which pipeline steps are essential for the final decision +// using backward elimination approach +func (r *SchedulingDecisionReconciler) findCriticalSteps(input map[string]float64, outputs []v1alpha1.SchedulingDecisionPipelineOutputSpec, baselineFinalScores map[string]float64) ([]string, int) { + if len(outputs) == 0 { + return []string{}, 0 + } + + // Get baseline winner + baselineWinner := "" + maxScore := float64(-999999) + for host, score := range baselineFinalScores { + if score > maxScore { + maxScore = score + baselineWinner = host + } + } + + if baselineWinner == "" { + return []string{}, 0 + } + + criticalSteps := make([]string, 0) + + // Try removing each step one by one + for i, stepToRemove := range outputs { + // Create pipeline without this step + reducedOutputs := make([]v1alpha1.SchedulingDecisionPipelineOutputSpec, 0, len(outputs)-1) + for j, output := range outputs { + if j != i { + reducedOutputs = append(reducedOutputs, output) + } + } + + // Calculate scores without this step + reducedFinalScores, _ := r.calculateScores(input, reducedOutputs) + + // Find winner without this step + reducedWinner := "" + reducedMaxScore := float64(-999999) + for host, score := range reducedFinalScores { + if score > reducedMaxScore { + reducedMaxScore = score + reducedWinner = host + } + } + + // If removing this step changes the winner, it's critical + if reducedWinner != baselineWinner { + criticalSteps = append(criticalSteps, stepToRemove.Step) + } + } + + return criticalSteps, len(criticalSteps) +} + // generateOrderedScoresAndDescription sorts final scores by value (highest to lowest) -// and generates a brief description with highest host, certainty, host count, and input comparison -func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(finalScores map[string]float64, inputScores map[string]float64) (map[string]float64, string) { +// and generates a brief description with highest host, certainty, host count, input comparison, and critical path +func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(finalScores map[string]float64, inputScores map[string]float64, criticalSteps []string, criticalStepCount int, totalSteps int) (map[string]float64, string) { totalInputHosts := len(inputScores) if len(finalScores) == 0 { return finalScores, fmt.Sprintf("No hosts remaining after filtering, %d hosts evaluated", totalInputHosts) @@ -215,7 +280,34 @@ func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(final } } - description += comparison + // Add critical path information + var criticalPath string + if totalSteps > 0 { + if criticalStepCount == 0 { + criticalPath = fmt.Sprintf("\nDecision driven by input only (all %d steps are non-critical).", totalSteps) + } else if criticalStepCount == totalSteps { + criticalPath = fmt.Sprintf("\nDecision requires all %d pipeline steps.", totalSteps) + } else { + if criticalStepCount == 1 { + criticalPath = fmt.Sprintf("\nDecision driven by 1/%d pipeline step: %s.", totalSteps, criticalSteps[0]) + } else { + // Join critical steps with commas + stepList := "" + for i, step := range criticalSteps { + if i == len(criticalSteps)-1 { + stepList += step + } else if i == len(criticalSteps)-2 { + stepList += step + " and " + } else { + stepList += step + ", " + } + } + criticalPath = fmt.Sprintf("\nDecision driven by %d/%d pipeline steps: %s.", criticalStepCount, totalSteps, stepList) + } + } + } + + description += comparison + criticalPath return orderedScores, description } diff --git a/decisions/internal/controller/controller_test.go b/decisions/internal/controller/controller_test.go index fb153dd8..967b8088 100644 --- a/decisions/internal/controller/controller_test.go +++ b/decisions/internal/controller/controller_test.go @@ -83,7 +83,7 @@ func TestReconcile(t *testing.T) { if updatedResource.Status.Error != "" { t.Errorf("Expected empty error, got '%s'", updatedResource.Status.Error) } - expectedDescription := "Selected: host1 (score: 1.50), certainty: perfect, 2 hosts evaluated\nInput favored host2 (score: 2.00, now filtered), final winner was #2 in input (1.00→1.50)." + expectedDescription := "Selected: host1 (score: 1.50), certainty: perfect, 2 hosts evaluated\nInput favored host2 (score: 2.00, now filtered), final winner was #2 in input (1.00→1.50).\nDecision driven by 1/2 pipeline step: filter." if updatedResource.Status.Description != expectedDescription { t.Errorf("Expected description '%s', got '%s'", expectedDescription, updatedResource.Status.Description) } @@ -815,6 +815,153 @@ func TestReconcileInputVsFinalComparison(t *testing.T) { } } +func TestReconcileCriticalStepElimination(t *testing.T) { + tests := []struct { + name string + input map[string]float64 + pipeline []v1alpha1.SchedulingDecisionPipelineOutputSpec + expectedCriticalMessage string + }{ + { + name: "single-critical-step", + input: map[string]float64{ + "host1": 2.0, // Would win without pipeline + "host2": 1.0, + "host3": 1.5, + }, + pipeline: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "non-critical-weigher", + Activations: map[string]float64{ + "host1": 0.1, // Small changes don't affect winner + "host2": 0.1, + "host3": 0.1, + }, + }, + { + Step: "critical-filter", + Activations: map[string]float64{ + "host2": 0.0, // host1 and host3 filtered out, host2 becomes winner + "host3": 0.0, + }, + }, + }, + expectedCriticalMessage: "Decision driven by 1/2 pipeline step: critical-filter.", + }, + { + name: "multiple-critical-steps", + input: map[string]float64{ + "host1": 1.0, + "host2": 2.0, // Would win without pipeline + "host3": 1.5, + }, + pipeline: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "critical-weigher1", + Activations: map[string]float64{ + "host1": 1.5, // Gives host1 strong boost to overtake host2 + "host2": 0.0, + "host3": 0.5, + }, + }, + { + Step: "critical-weigher2", + Activations: map[string]float64{ + "host1": 0.1, // Further secures host1's lead + "host2": 0.0, + "host3": 0.0, + }, + }, + }, + expectedCriticalMessage: "Decision driven by 1/2 pipeline step: critical-weigher1.", + }, + { + name: "all-non-critical", + input: map[string]float64{ + "host1": 3.0, // Clear winner from input + "host2": 1.0, + "host3": 2.0, + }, + pipeline: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "non-critical-weigher1", + Activations: map[string]float64{ + "host1": 0.1, // Small changes don't change winner + "host2": 0.1, + "host3": 0.1, + }, + }, + { + Step: "non-critical-weigher2", + Activations: map[string]float64{ + "host1": 0.2, + "host2": 0.0, + "host3": 0.1, + }, + }, + }, + expectedCriticalMessage: "Decision driven by input only (all 2 steps are non-critical).", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + resource := &v1alpha1.SchedulingDecision{ + ObjectMeta: ctrl.ObjectMeta{ + Name: "test-critical-steps-" + tt.name, + }, + Spec: v1alpha1.SchedulingDecisionSpec{ + Input: tt.input, + Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ + Name: "critical-step-test-pipeline", + Outputs: tt.pipeline, + }, + }, + } + + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add scheme: %v", err) + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(resource). + WithStatusSubresource(&v1alpha1.SchedulingDecision{}). + Build() + + req := ctrl.Request{ + NamespacedName: client.ObjectKey{ + Name: "test-critical-steps-" + tt.name, + }, + } + + reconciler := &SchedulingDecisionReconciler{ + Conf: Config{}, + Client: fakeClient, + } + _, err := reconciler.Reconcile(t.Context(), req) + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Fetch the updated resource to check status + var updatedResource v1alpha1.SchedulingDecision + if err := fakeClient.Get(t.Context(), client.ObjectKey{Name: "test-critical-steps-" + tt.name}, &updatedResource); err != nil { + t.Fatalf("Failed to get updated resource: %v", err) + } + + // Verify the description contains the expected critical step message + description := updatedResource.Status.Description + if !contains(description, tt.expectedCriticalMessage) { + t.Errorf("Expected description to contain '%s', got '%s'", tt.expectedCriticalMessage, description) + } + + t.Logf("Critical step test %s completed: %s", tt.name, description) + }) + } +} + // Helper function to check if a string contains a substring func contains(s, substr string) bool { for i := 0; i <= len(s)-len(substr); i++ { From 1d9f808837648644607d4dfb04e77f703248daf8 Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Thu, 25 Sep 2025 14:06:42 +0200 Subject: [PATCH 20/58] Cleanup & better make & add printercolumn for creation timestamp --- decisions/Makefile | 10 +++++- .../api/v1alpha1/schedulingdecision_types.go | 3 +- .../api/v1alpha1/zz_generated.deepcopy.go | 25 +++++++++++++- .../decisions.cortex_schedulingdecisions.yaml | 10 ++++-- .../decisions.cortex_schedulingdecisions.yaml | 18 ++++++++++ ...aml => schedulingdecision_admin_role.yaml} | 0 ...ml => schedulingdecision_editor_role.yaml} | 0 ...ml => schedulingdecision_viewer_role.yaml} | 0 .../decisions.cortex_schedulingdecisions.yaml | 10 ++++-- .../rbac/computereservation_admin_role.yaml | 28 +++++++++++++++ .../rbac/computereservation_editor_role.yaml | 34 +++++++++++++++++++ .../rbac/computereservation_viewer_role.yaml | 30 ++++++++++++++++ 12 files changed, 161 insertions(+), 7 deletions(-) rename decisions/config/rbac/{computereservation_admin_role.yaml => schedulingdecision_admin_role.yaml} (100%) rename decisions/config/rbac/{computereservation_editor_role.yaml => schedulingdecision_editor_role.yaml} (100%) rename decisions/config/rbac/{computereservation_viewer_role.yaml => schedulingdecision_viewer_role.yaml} (100%) create mode 100644 decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml create mode 100644 decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml create mode 100644 decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml diff --git a/decisions/Makefile b/decisions/Makefile index 313dd550..4d454b5b 100644 --- a/decisions/Makefile +++ b/decisions/Makefile @@ -9,10 +9,18 @@ manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and Cust generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations. $(CONTROLLER_GEN) crd:allowDangerousTypes=true object:headerFile="hack/boilerplate.go.txt" paths="./..." +.PHONY: cleanup +cleanup: + rm -rf ./.github + +.PHONY: dekustomize +dekustomize: + kubebuilder edit --plugins=helm/v1-alpha + ##@ Build .PHONY: build -build: manifests generate +build: manifests generate dekustomize cleanup LOCALBIN ?= $(shell pwd)/bin $(LOCALBIN): diff --git a/decisions/api/v1alpha1/schedulingdecision_types.go b/decisions/api/v1alpha1/schedulingdecision_types.go index a19f8123..6aaa2344 100644 --- a/decisions/api/v1alpha1/schedulingdecision_types.go +++ b/decisions/api/v1alpha1/schedulingdecision_types.go @@ -45,9 +45,10 @@ type SchedulingDecisionStatus struct { // +kubebuilder:object:root=true // +kubebuilder:subresource:status -// +kubebuilder:resource:scope=Cluster,shortName=sdecs +// +kubebuilder:resource:scope=Cluster,shortName=sdec;sdecs // +kubebuilder:printcolumn:name="State",type="string",JSONPath=".status.state" // +kubebuilder:printcolumn:name="Error",type="string",JSONPath=".status.error" +// +kubebuilder:printcolumn:name="Created",type="date",JSONPath=".metadata.creationTimestamp" // SchedulingDecision is the Schema for the schedulingdecisions API type SchedulingDecision struct { diff --git a/decisions/api/v1alpha1/zz_generated.deepcopy.go b/decisions/api/v1alpha1/zz_generated.deepcopy.go index c64d3653..e846dcb0 100644 --- a/decisions/api/v1alpha1/zz_generated.deepcopy.go +++ b/decisions/api/v1alpha1/zz_generated.deepcopy.go @@ -17,7 +17,7 @@ func (in *SchedulingDecision) DeepCopyInto(out *SchedulingDecision) { out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) in.Spec.DeepCopyInto(&out.Spec) - out.Status = in.Status + in.Status.DeepCopyInto(&out.Status) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecision. @@ -140,6 +140,29 @@ func (in *SchedulingDecisionSpec) DeepCopy() *SchedulingDecisionSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SchedulingDecisionStatus) DeepCopyInto(out *SchedulingDecisionStatus) { *out = *in + if in.FinalScores != nil { + in, out := &in.FinalScores, &out.FinalScores + *out = make(map[string]float64, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.DeletedHosts != nil { + in, out := &in.DeletedHosts, &out.DeletedHosts + *out = make(map[string][]string, len(*in)) + for key, val := range *in { + var outVal []string + if val == nil { + (*out)[key] = nil + } else { + inVal := (*in)[key] + in, out := &inVal, &outVal + *out = make([]string, len(*in)) + copy(*out, *in) + } + (*out)[key] = outVal + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionStatus. diff --git a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml index 0f2a8e98..d815ebdd 100644 --- a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml @@ -12,6 +12,7 @@ spec: listKind: SchedulingDecisionList plural: schedulingdecisions shortNames: + - sdec - sdecs singular: schedulingdecision scope: Cluster @@ -23,6 +24,9 @@ spec: - jsonPath: .status.error name: Error type: string + - jsonPath: .metadata.creationTimestamp + name: Created + type: date name: v1alpha1 schema: openAPIV3Schema: @@ -83,9 +87,11 @@ spec: properties: deletedHosts: additionalProperties: - type: string + items: + type: string + type: array description: Hosts that were deleted during pipeline processing and - the step that deleted them. + all steps that attempted to delete them. type: object description: type: string diff --git a/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml index 252ddd08..d815ebdd 100644 --- a/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml @@ -12,6 +12,7 @@ spec: listKind: SchedulingDecisionList plural: schedulingdecisions shortNames: + - sdec - sdecs singular: schedulingdecision scope: Cluster @@ -23,6 +24,9 @@ spec: - jsonPath: .status.error name: Error type: string + - jsonPath: .metadata.creationTimestamp + name: Created + type: date name: v1alpha1 schema: openAPIV3Schema: @@ -81,11 +85,25 @@ spec: status: description: status defines the observed state of SchedulingDecision properties: + deletedHosts: + additionalProperties: + items: + type: string + type: array + description: Hosts that were deleted during pipeline processing and + all steps that attempted to delete them. + type: object description: type: string error: description: Only given if state is "error". type: string + finalScores: + additionalProperties: + type: number + description: Final scores for each host after processing all pipeline + steps. + type: object state: type: string type: object diff --git a/decisions/config/rbac/computereservation_admin_role.yaml b/decisions/config/rbac/schedulingdecision_admin_role.yaml similarity index 100% rename from decisions/config/rbac/computereservation_admin_role.yaml rename to decisions/config/rbac/schedulingdecision_admin_role.yaml diff --git a/decisions/config/rbac/computereservation_editor_role.yaml b/decisions/config/rbac/schedulingdecision_editor_role.yaml similarity index 100% rename from decisions/config/rbac/computereservation_editor_role.yaml rename to decisions/config/rbac/schedulingdecision_editor_role.yaml diff --git a/decisions/config/rbac/computereservation_viewer_role.yaml b/decisions/config/rbac/schedulingdecision_viewer_role.yaml similarity index 100% rename from decisions/config/rbac/computereservation_viewer_role.yaml rename to decisions/config/rbac/schedulingdecision_viewer_role.yaml diff --git a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml index 32117e90..7d8f6c84 100644 --- a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml @@ -18,6 +18,7 @@ spec: listKind: SchedulingDecisionList plural: schedulingdecisions shortNames: + - sdec - sdecs singular: schedulingdecision scope: Cluster @@ -29,6 +30,9 @@ spec: - jsonPath: .status.error name: Error type: string + - jsonPath: .metadata.creationTimestamp + name: Created + type: date name: v1alpha1 schema: openAPIV3Schema: @@ -89,9 +93,11 @@ spec: properties: deletedHosts: additionalProperties: - type: string + items: + type: string + type: array description: Hosts that were deleted during pipeline processing and - the step that deleted them. + all steps that attempted to delete them. type: object description: type: string diff --git a/decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml b/decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml new file mode 100644 index 00000000..6db64811 --- /dev/null +++ b/decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml @@ -0,0 +1,28 @@ +{{- if .Values.rbac.enable }} +# This rule is not used by the project decisions itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants full permissions ('*') over decisions.cortex. +# This role is intended for users authorized to modify roles and bindings within the cluster, +# enabling them to delegate specific permissions to other users or groups as needed. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: schedulingdecision-admin-role +rules: +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions + verbs: + - '*' +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions/status + verbs: + - get +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml b/decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml new file mode 100644 index 00000000..7a82611c --- /dev/null +++ b/decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml @@ -0,0 +1,34 @@ +{{- if .Values.rbac.enable }} +# This rule is not used by the project decisions itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants permissions to create, update, and delete resources within the decisions.cortex. +# This role is intended for users who need to manage these resources +# but should not control RBAC or manage permissions for others. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: schedulingdecision-editor-role +rules: +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions/status + verbs: + - get +{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml b/decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml new file mode 100644 index 00000000..4375bd65 --- /dev/null +++ b/decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml @@ -0,0 +1,30 @@ +{{- if .Values.rbac.enable }} +# This rule is not used by the project decisions itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants read-only access to decisions.cortex resources. +# This role is intended for users who need visibility into these resources +# without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: schedulingdecision-viewer-role +rules: +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions + verbs: + - get + - list + - watch +- apiGroups: + - decisions.cortex + resources: + - schedulingdecisions/status + verbs: + - get +{{- end -}} From 0db0dc19e65e243d6562686d132e37372b7c8f40 Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Thu, 25 Sep 2025 14:22:28 +0200 Subject: [PATCH 21/58] Fix reservations operator and hopefully delete last stale yaml --- Tiltfile | 2 +- .../rbac/computereservation_admin_role.yaml | 28 --- .../rbac/computereservation_editor_role.yaml | 34 --- .../rbac/computereservation_viewer_role.yaml | 30 --- reservations/go.mod | 97 +++++---- reservations/go.sum | 203 ++++++++++-------- 6 files changed, 165 insertions(+), 229 deletions(-) delete mode 100644 decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml delete mode 100644 decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml delete mode 100644 decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml diff --git a/Tiltfile b/Tiltfile index e18c068f..74157f96 100644 --- a/Tiltfile +++ b/Tiltfile @@ -31,7 +31,7 @@ def kubebuilder_binary_files(path): docker_build('ghcr.io/cobaltcore-dev/cortex-reservations-operator', '.', dockerfile='Dockerfile.kubebuilder', build_args={'GO_MOD_PATH': 'reservations'}, - only=kubebuilder_binary_files('reservations') + ['internal/', 'go.mod', 'go.sum'], + only=kubebuilder_binary_files('reservations') + ['internal/', 'decisions/', 'go.mod', 'go.sum'], ) local('sh helm/sync.sh reservations/dist/chart') k8s_yaml(helm('reservations/dist/chart', name='cortex-reservations', values=[tilt_values])) diff --git a/decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml b/decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml deleted file mode 100644 index 6db64811..00000000 --- a/decisions/dist/chart/templates/rbac/computereservation_admin_role.yaml +++ /dev/null @@ -1,28 +0,0 @@ -{{- if .Values.rbac.enable }} -# This rule is not used by the project decisions itself. -# It is provided to allow the cluster admin to help manage permissions for users. -# -# Grants full permissions ('*') over decisions.cortex. -# This role is intended for users authorized to modify roles and bindings within the cluster, -# enabling them to delegate specific permissions to other users or groups as needed. - -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - labels: - {{- include "chart.labels" . | nindent 4 }} - name: schedulingdecision-admin-role -rules: -- apiGroups: - - decisions.cortex - resources: - - schedulingdecisions - verbs: - - '*' -- apiGroups: - - decisions.cortex - resources: - - schedulingdecisions/status - verbs: - - get -{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml b/decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml deleted file mode 100644 index 7a82611c..00000000 --- a/decisions/dist/chart/templates/rbac/computereservation_editor_role.yaml +++ /dev/null @@ -1,34 +0,0 @@ -{{- if .Values.rbac.enable }} -# This rule is not used by the project decisions itself. -# It is provided to allow the cluster admin to help manage permissions for users. -# -# Grants permissions to create, update, and delete resources within the decisions.cortex. -# This role is intended for users who need to manage these resources -# but should not control RBAC or manage permissions for others. - -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - labels: - {{- include "chart.labels" . | nindent 4 }} - name: schedulingdecision-editor-role -rules: -- apiGroups: - - decisions.cortex - resources: - - schedulingdecisions - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - decisions.cortex - resources: - - schedulingdecisions/status - verbs: - - get -{{- end -}} diff --git a/decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml b/decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml deleted file mode 100644 index 4375bd65..00000000 --- a/decisions/dist/chart/templates/rbac/computereservation_viewer_role.yaml +++ /dev/null @@ -1,30 +0,0 @@ -{{- if .Values.rbac.enable }} -# This rule is not used by the project decisions itself. -# It is provided to allow the cluster admin to help manage permissions for users. -# -# Grants read-only access to decisions.cortex resources. -# This role is intended for users who need visibility into these resources -# without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing. - -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - labels: - {{- include "chart.labels" . | nindent 4 }} - name: schedulingdecision-viewer-role -rules: -- apiGroups: - - decisions.cortex - resources: - - schedulingdecisions - verbs: - - get - - list - - watch -- apiGroups: - - decisions.cortex - resources: - - schedulingdecisions/status - verbs: - - get -{{- end -}} diff --git a/reservations/go.mod b/reservations/go.mod index 9c1b6317..075198cd 100644 --- a/reservations/go.mod +++ b/reservations/go.mod @@ -4,12 +4,13 @@ go 1.25.0 replace ( github.com/cobaltcore-dev/cortex => ../ + github.com/cobaltcore-dev/cortex/decisions/api => ../decisions/api github.com/cobaltcore-dev/cortex/reservations/api => ./api ) require ( - github.com/cobaltcore-dev/cortex v0.0.0-00010101000000-000000000000 - github.com/cobaltcore-dev/cortex/reservations/api v0.0.0-00010101000000-000000000000 + github.com/cobaltcore-dev/cortex v0.0.0-20250925095218-954f58af3880 + github.com/cobaltcore-dev/cortex/reservations/api v0.0.0-20250925095218-954f58af3880 github.com/gophercloud/gophercloud/v2 v2.8.0 k8s.io/apimachinery v0.34.1 k8s.io/client-go v0.34.1 @@ -18,9 +19,22 @@ require ( require ( filippo.io/edwards25519 v1.1.0 // indirect + github.com/cenkalti/backoff/v5 v5.0.3 // indirect + github.com/cobaltcore-dev/cortex/decisions/api v0.0.0-00010101000000-000000000000 // indirect github.com/dlmiddlecote/sqlstats v1.0.2 // indirect github.com/eclipse/paho.mqtt.golang v1.5.1 // indirect github.com/go-gorp/gorp v2.2.0+incompatible // indirect + github.com/go-openapi/swag/cmdutils v0.25.0 // indirect + github.com/go-openapi/swag/conv v0.25.0 // indirect + github.com/go-openapi/swag/fileutils v0.25.0 // indirect + github.com/go-openapi/swag/jsonname v0.25.0 // indirect + github.com/go-openapi/swag/jsonutils v0.25.0 // indirect + github.com/go-openapi/swag/loading v0.25.0 // indirect + github.com/go-openapi/swag/mangling v0.25.0 // indirect + github.com/go-openapi/swag/netutils v0.25.0 // indirect + github.com/go-openapi/swag/stringutils v0.25.0 // indirect + github.com/go-openapi/swag/typeutils v0.25.0 // indirect + github.com/go-openapi/swag/yamlutils v0.25.0 // indirect github.com/golang-migrate/migrate/v4 v4.19.0 // indirect github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect @@ -28,20 +42,19 @@ require ( github.com/lib/pq v1.10.9 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/sapcc/go-api-declarations v1.17.4 // indirect - go.yaml.in/yaml/v2 v2.4.2 // indirect + go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect ) require ( cel.dev/expr v0.24.0 // indirect - github.com/antlr4-go/antlr/v4 v4.13.0 // indirect + github.com/antlr4-go/antlr/v4 v4.13.1 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/blang/semver/v4 v4.0.0 // indirect - github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect - github.com/emicklei/go-restful/v3 v3.12.2 // indirect + github.com/emicklei/go-restful/v3 v3.13.0 // indirect github.com/evanphx/json-patch/v5 v5.9.11 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/fsnotify/fsnotify v1.9.0 // indirect @@ -49,69 +62,65 @@ require ( github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-logr/zapr v1.3.0 // indirect - github.com/go-openapi/jsonpointer v0.21.0 // indirect - github.com/go-openapi/jsonreference v0.20.2 // indirect - github.com/go-openapi/swag v0.23.0 // indirect + github.com/go-openapi/jsonpointer v0.22.0 // indirect + github.com/go-openapi/jsonreference v0.21.1 // indirect + github.com/go-openapi/swag v0.25.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/google/btree v1.1.3 // indirect - github.com/google/cel-go v0.26.0 // indirect + github.com/google/cel-go v0.26.1 // indirect github.com/google/gnostic-models v0.7.0 // indirect github.com/google/go-cmp v0.7.0 // indirect github.com/google/uuid v1.6.0 // indirect - github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect - github.com/mailru/easyjson v0.7.7 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect - github.com/pkg/errors v0.9.1 // indirect github.com/prometheus/client_golang v1.23.2 github.com/prometheus/client_model v0.6.2 github.com/prometheus/common v0.66.1 // indirect github.com/prometheus/procfs v0.17.0 // indirect - github.com/sapcc/go-bits v0.0.0-20250918190459-d63422aef730 - github.com/spf13/cobra v1.9.1 // indirect - github.com/spf13/pflag v1.0.6 // indirect - github.com/stoewer/go-strcase v1.3.0 // indirect + github.com/sapcc/go-bits v0.0.0-20250924092957-bcc75ecf4553 + github.com/spf13/cobra v1.10.1 // indirect + github.com/spf13/pflag v1.0.10 // indirect + github.com/stoewer/go-strcase v1.3.1 // indirect github.com/x448/float16 v0.8.4 // indirect - go.opentelemetry.io/auto/sdk v1.1.0 // indirect - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 // indirect - go.opentelemetry.io/otel v1.37.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.34.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0 // indirect - go.opentelemetry.io/otel/metric v1.37.0 // indirect - go.opentelemetry.io/otel/sdk v1.35.0 // indirect - go.opentelemetry.io/otel/trace v1.37.0 // indirect - go.opentelemetry.io/proto/otlp v1.5.0 // indirect + go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect + go.opentelemetry.io/otel v1.38.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 // indirect + go.opentelemetry.io/otel/metric v1.38.0 // indirect + go.opentelemetry.io/otel/sdk v1.38.0 // indirect + go.opentelemetry.io/otel/trace v1.38.0 // indirect + go.opentelemetry.io/proto/otlp v1.8.0 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect - golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect + golang.org/x/exp v0.0.0-20250911091902-df9299821621 // indirect golang.org/x/net v0.44.0 // indirect - golang.org/x/oauth2 v0.30.0 // indirect + golang.org/x/oauth2 v0.31.0 // indirect golang.org/x/sync v0.17.0 // indirect golang.org/x/sys v0.36.0 // indirect golang.org/x/term v0.35.0 // indirect golang.org/x/text v0.29.0 // indirect - golang.org/x/time v0.12.0 // indirect - gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb // indirect - google.golang.org/grpc v1.72.1 // indirect - google.golang.org/protobuf v1.36.8 // indirect - gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect + golang.org/x/time v0.13.0 // indirect + gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20250922171735-9219d122eba9 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250922171735-9219d122eba9 // indirect + google.golang.org/grpc v1.75.1 // indirect + google.golang.org/protobuf v1.36.9 // indirect + gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/api v0.34.1 // indirect - k8s.io/apiextensions-apiserver v0.34.0 // indirect - k8s.io/apiserver v0.34.0 // indirect - k8s.io/component-base v0.34.0 // indirect + k8s.io/apiextensions-apiserver v0.34.1 // indirect + k8s.io/apiserver v0.34.1 // indirect + k8s.io/component-base v0.34.1 // indirect k8s.io/klog/v2 v2.130.1 // indirect - k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect - k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect - sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect - sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect + k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect + k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d // indirect + sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.33.0 // indirect + sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect sigs.k8s.io/randfill v1.0.0 // indirect sigs.k8s.io/yaml v1.6.0 // indirect ) diff --git a/reservations/go.sum b/reservations/go.sum index 4ee1999d..bd6a9807 100644 --- a/reservations/go.sum +++ b/reservations/go.sum @@ -12,8 +12,8 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= -github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI= -github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g= +github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYWrPrQ= +github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -22,8 +22,8 @@ github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4= github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= -github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= -github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= +github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= @@ -34,7 +34,6 @@ github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE= github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= -github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -52,8 +51,8 @@ github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4 github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/eclipse/paho.mqtt.golang v1.5.1 h1:/VSOv3oDLlpqR2Epjn1Q7b2bSTplJIeV2ISgCl2W7nE= github.com/eclipse/paho.mqtt.golang v1.5.1/go.mod h1:1/yJCneuyOoCOzKSsOTUc0AJfpsItBGWvYpBLimhArU= -github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU= -github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bFY/oTyCes= +github.com/emicklei/go-restful/v3 v3.13.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k= github.com/evanphx/json-patch v0.5.2/go.mod h1:ZWS5hhDbVDyob71nXKNL0+PWn6ToqBHMikGIFbs31qQ= github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU= @@ -77,14 +76,36 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= -github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= -github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= -github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= -github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= -github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= -github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= -github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= -github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/go-openapi/jsonpointer v0.22.0 h1:TmMhghgNef9YXxTu1tOopo+0BGEytxA+okbry0HjZsM= +github.com/go-openapi/jsonpointer v0.22.0/go.mod h1:xt3jV88UtExdIkkL7NloURjRQjbeUgcxFblMjq2iaiU= +github.com/go-openapi/jsonreference v0.21.1 h1:bSKrcl8819zKiOgxkbVNRUBIr6Wwj9KYrDbMjRs0cDA= +github.com/go-openapi/jsonreference v0.21.1/go.mod h1:PWs8rO4xxTUqKGu+lEvvCxD5k2X7QYkKAepJyCmSTT8= +github.com/go-openapi/swag v0.25.0 h1:xyZhlgInBg6wOtyTD5b+pzwVqHSOliAvgvKW+POFUts= +github.com/go-openapi/swag v0.25.0/go.mod h1:yhsa7GJvO1JBFZccLq9uh/MawsC0PQd8sNz88VBXQlU= +github.com/go-openapi/swag/cmdutils v0.25.0 h1:iYZ24DEGPEk6L1jO09vw39KfpxbG7KhS+WeQexS8U5A= +github.com/go-openapi/swag/cmdutils v0.25.0/go.mod h1:pdae/AFo6WxLl5L0rq87eRzVPm/XRHM3MoYgRMvG4A0= +github.com/go-openapi/swag/conv v0.25.0 h1:5K+e44HkOgCVE0IJTbivurzHahT62DPr2DEJqR/+4pA= +github.com/go-openapi/swag/conv v0.25.0/go.mod h1:oa1ZZnb1jubNdZlD1iAhGXt6Ic4hHtuO23MwTgAXR88= +github.com/go-openapi/swag/fileutils v0.25.0 h1:t7aQRuRfsP29dY4vfrNvDZv7RurwRHuyjUedtYVDmYY= +github.com/go-openapi/swag/fileutils v0.25.0/go.mod h1:+NXtt5xNZZqmpIpjqcujqojGFek9/w55b3ecmOdtg8M= +github.com/go-openapi/swag/jsonname v0.25.0 h1:+fuNs9gdkb2w10hgsgOBx9jtx0pvtUaDRYxD91BEpEQ= +github.com/go-openapi/swag/jsonname v0.25.0/go.mod h1:71Tekow6UOLBD3wS7XhdT98g5J5GR13NOTQ9/6Q11Zo= +github.com/go-openapi/swag/jsonutils v0.25.0 h1:ELKpJT29T4N/AvmDqMeDFLx2QRZQOYFthzctbIX30+A= +github.com/go-openapi/swag/jsonutils v0.25.0/go.mod h1:KYL8GyGoi6tek9ajpvn0le4BWmKoUVVv8yPxklViIMo= +github.com/go-openapi/swag/jsonutils/fixtures_test v0.25.0 h1:ca9vKxLnJegL2bzqXRWNabKdqVGxBzrnO8/UZnr5W0Y= +github.com/go-openapi/swag/jsonutils/fixtures_test v0.25.0/go.mod h1:kjmweouyPwRUEYMSrbAidoLMGeJ5p6zdHi9BgZiqmsg= +github.com/go-openapi/swag/loading v0.25.0 h1:e9mjE5fJeaK0LTepHMtG0Ief+9ETXLFhWCx7ZfiI6LI= +github.com/go-openapi/swag/loading v0.25.0/go.mod h1:2ZCWXwVY1XYuoue8Bdjbn5GJK4/ufXbCfcvoSPFQJqM= +github.com/go-openapi/swag/mangling v0.25.0 h1:VdTfDWX5lS3yURxYHF5SK7kYelSK69Lv2xEAeudTzM8= +github.com/go-openapi/swag/mangling v0.25.0/go.mod h1:CdiMQ6pnfAgyQGSOIYnZkXvqhnnwOn997uXZMAd/7mQ= +github.com/go-openapi/swag/netutils v0.25.0 h1:/e1LPmXfF9fcOYbbaP3+SQgon1fRwe5EZ0FjpR4vAjs= +github.com/go-openapi/swag/netutils v0.25.0/go.mod h1:CAkkvqnUJX8NV96tNhEQvKz8SQo2KF0f7LleiJwIeRE= +github.com/go-openapi/swag/stringutils v0.25.0 h1:iYfCF45GUeI/1Yrh8rQtTFCp5K1ToqWhUdzJZwvXvv8= +github.com/go-openapi/swag/stringutils v0.25.0/go.mod h1:JLdSAq5169HaiDUbTvArA2yQxmgn4D6h4A+4HqVvAYg= +github.com/go-openapi/swag/typeutils v0.25.0 h1:iUTsxu3F3h9v6CBzVFGXKPSBQt6d8XXgYy1YAlu+HJ8= +github.com/go-openapi/swag/typeutils v0.25.0/go.mod h1:9McMC/oCdS4BKwk2shEB7x17P6HmMmA6dQRtAkSnNb8= +github.com/go-openapi/swag/yamlutils v0.25.0 h1:apgy77seWLEM9HKDcieIgW8bG9aSZgH6nQ9THlHYgHA= +github.com/go-openapi/swag/yamlutils v0.25.0/go.mod h1:0JvBRtc0mR02IqHURUeGgS9cG+Dfms4FCGXCnsgnt7c= github.com/go-sql-driver/mysql v1.9.2 h1:4cNKDYQ1I84SXslGddlsrMhc8k4LeDVj6Ad6WRjiHuU= github.com/go-sql-driver/mysql v1.9.2/go.mod h1:qn46aNg1333BRMNU69Lq93t8du/dwxI64Gl8i5p1WMU= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= @@ -102,8 +123,8 @@ github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= -github.com/google/cel-go v0.26.0 h1:DPGjXackMpJWH680oGY4lZhYjIameYmR+/6RBdDGmaI= -github.com/google/cel-go v0.26.0/go.mod h1:A9O8OU9rdvrK5MQyrqfIxo1a0u4g3sF8KB6PUIaryMM= +github.com/google/cel-go v0.26.1 h1:iPbVVEdkhTX++hpe3lzSk7D3G3QSYqLGoHOcEio+UXQ= +github.com/google/cel-go v0.26.1/go.mod h1:A9O8OU9rdvrK5MQyrqfIxo1a0u4g3sF8KB6PUIaryMM= github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo= github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= @@ -120,8 +141,8 @@ github.com/gophercloud/gophercloud/v2 v2.8.0 h1:of2+8tT6+FbEYHfYC8GBu8TXJNsXYSNm github.com/gophercloud/gophercloud/v2 v2.8.0/go.mod h1:Ki/ILhYZr/5EPebrPL9Ej+tUg4lqx71/YH2JWVeU+Qk= github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5THxAzdVpqr6/geYxZytqFMBCOtn/ujyeo= github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674/go.mod h1:r4w70xmWCQKmi1ONH4KIaBptdivuRPyosB9RmPlGEwA= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 h1:5ZPtiqj0JL5oKWmcsq4VMaAW5ukBEgSGXEN89zeH1Jo= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3/go.mod h1:ndYquD05frm2vACXE1nsccT4oJzjhw2arTS2cpUD1PI= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 h1:8Tjv8EJ+pM1xP8mK6egEbD1OgnVTyacbefKhmbLhIhU= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2/go.mod h1:pkJQ2tZHJ0aFOVEEot6oZmaVEZcRme73eIFmhiVuRWs= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= @@ -129,8 +150,6 @@ github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+l github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= -github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= -github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.8/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= @@ -142,19 +161,16 @@ github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zt github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= -github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= -github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/majewsky/gg v1.3.0 h1:1bBuQ+S1u9wuD8YT2OMdngEUctwv+xx5D6bZTd1lAto= +github.com/majewsky/gg v1.3.0/go.mod h1:KC7qUlln1VBY90OE0jXMNjXW2b9B4jJ1heYQ08OzeAg= github.com/mattn/go-sqlite3 v1.14.6/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs= github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= @@ -218,25 +234,26 @@ github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsT github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0= github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw= -github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= -github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sapcc/go-api-declarations v1.17.4 h1:F4smuE9x1NJ/7NAdytJ1wekeXT3QeRaYu3L/HyWKqqo= github.com/sapcc/go-api-declarations v1.17.4/go.mod h1:MWmLjmvjftgyAugNUfIhsDsHIzXH1pn32cWLZpiluKg= -github.com/sapcc/go-bits v0.0.0-20250918190459-d63422aef730 h1:JZ0b9IxW5A6XdbaTZWTXDD8Mf5blABivAOOwVtVQCSc= -github.com/sapcc/go-bits v0.0.0-20250918190459-d63422aef730/go.mod h1:YfKAF5oNv8UD/zkOjK+dOVft8DUZq5KphjPy2IEI+QU= +github.com/sapcc/go-bits v0.0.0-20250924092957-bcc75ecf4553 h1:CWbbQgtHq+RPAaPjPZC7z2uJJhaRGJMBymnnzXaWFj8= +github.com/sapcc/go-bits v0.0.0-20250924092957-bcc75ecf4553/go.mod h1:+hy4RXW/4ZnFl/Ct7vBl9cnLEA9Lt/BKYYGoxZkwLZY= github.com/sergi/go-diff v1.4.0 h1:n/SP9D5ad1fORl+llWyN+D6qoUETXNZARKjyY2/KVCw= github.com/sergi/go-diff v1.4.0/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= -github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= -github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= -github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs= -github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo= +github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= +github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0= +github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= +github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stoewer/go-strcase v1.3.1 h1:iS0MdW+kVTxgMoE1LAZyMiYJFKlOzLooE4MxjirtkAs= +github.com/stoewer/go-strcase v1.3.1/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= @@ -256,42 +273,42 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/ziutek/mymysql v1.5.4 h1:GB0qdRGsTwQSBVYuVShFBKaXSnSnYYC2d9knnE1LHFs= github.com/ziutek/mymysql v1.5.4/go.mod h1:LMSpPZ6DbqWFxNCHW77HeMg9I646SAhApZ/wKdgO/C0= -go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= -go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 h1:sbiXRNDSWJOTobXh5HyQKjq6wUC5tNybqjIqDpAY4CU= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0/go.mod h1:69uWxva0WgAA/4bu2Yy70SLDBwZXuQ6PbBpbsa5iZrQ= -go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= -go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.34.0 h1:OeNbIYk/2C15ckl7glBlOBp5+WlYsOElzTNmiPW/x60= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.34.0/go.mod h1:7Bept48yIeqxP2OZ9/AqIpYS94h2or0aB4FypJTc8ZM= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0 h1:tgJ0uaNS4c98WRNUEx5U3aDlrDOI5Rs+1Vifcw4DJ8U= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0/go.mod h1:U7HYyW0zt/a9x5J1Kjs+r1f/d4ZHnYFclhYY2+YbeoE= -go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE= -go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= -go.opentelemetry.io/otel/sdk v1.35.0 h1:iPctf8iprVySXSKJffSS79eOjl9pvxV9ZqOWT0QejKY= -go.opentelemetry.io/otel/sdk v1.35.0/go.mod h1:+ga1bZliga3DxJ3CQGg3updiaAJoNECOgJREo9KHGQg= -go.opentelemetry.io/otel/sdk/metric v1.35.0 h1:1RriWBmCKgkeHEhM7a2uMjMUfP7MsOF5JpUCaEqEI9o= -go.opentelemetry.io/otel/sdk/metric v1.35.0/go.mod h1:is6XYCUMpcKi+ZsOvfluY5YstFnhW0BidkR+gL+qN+w= -go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= -go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= -go.opentelemetry.io/proto/otlp v1.5.0 h1:xJvq7gMzB31/d406fB8U5CBdyQGw4P399D1aQWU/3i4= -go.opentelemetry.io/proto/otlp v1.5.0/go.mod h1:keN8WnHxOy8PG0rQZjJJ5A2ebUoafqWp0eVQ4yIXvJ4= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0/go.mod h1:h06DGIukJOevXaj/xrNjhi/2098RZzcLTbc0jDAUbsg= +go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= +go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 h1:lwI4Dc5leUqENgGuQImwLo4WnuXFPetmPpkLi2IrX54= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0/go.mod h1:Kz/oCE7z5wuyhPxsXDuaPteSWqjSBD5YaSdbxZYGbGk= +go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA= +go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI= +go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E= +go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg= +go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM= +go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA= +go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE= +go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= +go.opentelemetry.io/proto/otlp v1.8.0 h1:fRAZQDcAFHySxpJ1TwlA1cJ4tvcrw7nXl9xWWC8N5CE= +go.opentelemetry.io/proto/otlp v1.8.0/go.mod h1:tIeYOeNBU4cvmPqpaji1P+KbB4Oloai8wN4rWzRrFF0= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= -go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= -go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= +go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= +go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8= go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8= -golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= +golang.org/x/exp v0.0.0-20250911091902-df9299821621 h1:2id6c1/gto0kaHYyrixvknJ8tUK/Qs5IsmBtrc+FtgU= +golang.org/x/exp v0.0.0-20250911091902-df9299821621/go.mod h1:TwQYMMnGpvZyc+JpB/UAuTNIsVJifOlSkrZkhcvpVUk= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -302,8 +319,8 @@ golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.44.0 h1:evd8IRDyfNBMBTTY5XRF1vaZlD+EmWx6x8PkhR04H/I= golang.org/x/net v0.44.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY= -golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= -golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= +golang.org/x/oauth2 v0.31.0 h1:8Fq0yVZLh4j4YA47vHKFTa9Ew5XIrCP8LC6UeNZnLxo= +golang.org/x/oauth2 v0.31.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -326,34 +343,36 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= -golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= -golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= +golang.org/x/time v0.13.0 h1:eUlYslOIt32DgYD6utsuUeHs4d7AsEYLuIAdg7FlYgI= +golang.org/x/time v0.13.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg= -golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s= +golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE= +golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= -gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= -google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb h1:p31xT4yrYrSM/G4Sn2+TNUkVhFCbG9y8itM2S6Th950= -google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb/go.mod h1:jbe3Bkdp+Dh2IrslsFCklNhweNTBgSYanP1UXhJDhKg= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb h1:TLPQVbx1GJ8VKZxz52VAxl1EBgKXXbTiU9Fc5fZeLn4= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I= -google.golang.org/grpc v1.72.1 h1:HR03wO6eyZ7lknl75XlxABNVLLFc2PAb6mHlYh756mA= -google.golang.org/grpc v1.72.1/go.mod h1:wH5Aktxcg25y1I3w7H69nHfXdOG3UiadoBtjh3izSDM= -google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= -google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= +gomodules.xyz/jsonpatch/v2 v2.5.0 h1:JELs8RLM12qJGXU4u/TO3V25KW8GreMKl9pdkk14RM0= +gomodules.xyz/jsonpatch/v2 v2.5.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= +gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= +gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= +google.golang.org/genproto/googleapis/api v0.0.0-20250922171735-9219d122eba9 h1:jm6v6kMRpTYKxBRrDkYAitNJegUeO1Mf3Kt80obv0gg= +google.golang.org/genproto/googleapis/api v0.0.0-20250922171735-9219d122eba9/go.mod h1:LmwNphe5Afor5V3R5BppOULHOnt2mCIf+NxMd4XiygE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250922171735-9219d122eba9 h1:V1jCN2HBa8sySkR5vLcCSqJSTMv093Rw9EJefhQGP7M= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250922171735-9219d122eba9/go.mod h1:HSkG/KdJWusxU1F6CNrwNDjBMgisKxGnc5dAZfT0mjQ= +google.golang.org/grpc v1.75.1 h1:/ODCNEuf9VghjgO3rqLcfg8fiOP0nSluljWFlDxELLI= +google.golang.org/grpc v1.75.1/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ= +google.golang.org/protobuf v1.36.9 h1:w2gp2mA27hUeUzj9Ex9FBjsBm40zfaDtEWow293U7Iw= +google.golang.org/protobuf v1.36.9/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= -gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnfEbYzo= +gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= @@ -363,28 +382,28 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= k8s.io/api v0.34.1 h1:jC+153630BMdlFukegoEL8E/yT7aLyQkIVuwhmwDgJM= k8s.io/api v0.34.1/go.mod h1:SB80FxFtXn5/gwzCoN6QCtPD7Vbu5w2n1S0J5gFfTYk= -k8s.io/apiextensions-apiserver v0.34.0 h1:B3hiB32jV7BcyKcMU5fDaDxk882YrJ1KU+ZSkA9Qxoc= -k8s.io/apiextensions-apiserver v0.34.0/go.mod h1:hLI4GxE1BDBy9adJKxUxCEHBGZtGfIg98Q+JmTD7+g0= +k8s.io/apiextensions-apiserver v0.34.1 h1:NNPBva8FNAPt1iSVwIE0FsdrVriRXMsaWFMqJbII2CI= +k8s.io/apiextensions-apiserver v0.34.1/go.mod h1:hP9Rld3zF5Ay2Of3BeEpLAToP+l4s5UlxiHfqRaRcMc= k8s.io/apimachinery v0.34.1 h1:dTlxFls/eikpJxmAC7MVE8oOeP1zryV7iRyIjB0gky4= k8s.io/apimachinery v0.34.1/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw= -k8s.io/apiserver v0.34.0 h1:Z51fw1iGMqN7uJ1kEaynf2Aec1Y774PqU+FVWCFV3Jg= -k8s.io/apiserver v0.34.0/go.mod h1:52ti5YhxAvewmmpVRqlASvaqxt0gKJxvCeW7ZrwgazQ= +k8s.io/apiserver v0.34.1 h1:U3JBGdgANK3dfFcyknWde1G6X1F4bg7PXuvlqt8lITA= +k8s.io/apiserver v0.34.1/go.mod h1:eOOc9nrVqlBI1AFCvVzsob0OxtPZUCPiUJL45JOTBG0= k8s.io/client-go v0.34.1 h1:ZUPJKgXsnKwVwmKKdPfw4tB58+7/Ik3CrjOEhsiZ7mY= k8s.io/client-go v0.34.1/go.mod h1:kA8v0FP+tk6sZA0yKLRG67LWjqufAoSHA2xVGKw9Of8= -k8s.io/component-base v0.34.0 h1:bS8Ua3zlJzapklsB1dZgjEJuJEeHjj8yTu1gxE2zQX8= -k8s.io/component-base v0.34.0/go.mod h1:RSCqUdvIjjrEm81epPcjQ/DS+49fADvGSCkIP3IC6vg= +k8s.io/component-base v0.34.1 h1:v7xFgG+ONhytZNFpIz5/kecwD+sUhVE6HU7qQUiRM4A= +k8s.io/component-base v0.34.1/go.mod h1:mknCpLlTSKHzAQJJnnHVKqjxR7gBeHRv0rPXA7gdtQ0= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= -k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b h1:MloQ9/bdJyIu9lb1PzujOPolHyvO06MXG5TUIj2mNAA= -k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b/go.mod h1:UZ2yyWbFTpuhSbFhv24aGNOdoRdJZgsIObGBUaYVsts= -k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y= -k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= -sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 h1:jpcvIRr3GLoUoEKRkHKSmGjxb6lWwrBlJsXc+eUYQHM= -sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw= +k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 h1:Y3gxNAuB0OBLImH611+UDZcmKS3g6CthxToOb37KgwE= +k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912/go.mod h1:kdmbQkyfwUagLfXIad1y2TdrjPFWp2Q89B3qkRwf/pQ= +k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d h1:wAhiDyZ4Tdtt7e46e9M5ZSAJ/MnPGPs+Ki1gHw4w1R0= +k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.33.0 h1:qPrZsv1cwQiFeieFlRqT627fVZ+tyfou/+S5S0H5ua0= +sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.33.0/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw= sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV24Eqg= sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY= -sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= -sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco= From d32ec8eebb23c0b817b922f5e06b5a70fb1cc25d Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Thu, 25 Sep 2025 14:25:07 +0200 Subject: [PATCH 22/58] Add description printer column --- decisions/api/v1alpha1/schedulingdecision_types.go | 1 + .../config/crd/bases/decisions.cortex_schedulingdecisions.yaml | 3 +++ decisions/config/crd/decisions.cortex_schedulingdecisions.yaml | 3 +++ .../templates/crd/decisions.cortex_schedulingdecisions.yaml | 3 +++ 4 files changed, 10 insertions(+) diff --git a/decisions/api/v1alpha1/schedulingdecision_types.go b/decisions/api/v1alpha1/schedulingdecision_types.go index 6aaa2344..e3d21546 100644 --- a/decisions/api/v1alpha1/schedulingdecision_types.go +++ b/decisions/api/v1alpha1/schedulingdecision_types.go @@ -49,6 +49,7 @@ type SchedulingDecisionStatus struct { // +kubebuilder:printcolumn:name="State",type="string",JSONPath=".status.state" // +kubebuilder:printcolumn:name="Error",type="string",JSONPath=".status.error" // +kubebuilder:printcolumn:name="Created",type="date",JSONPath=".metadata.creationTimestamp" +// +kubebuilder:printcolumn:name="Description",type="string",JSONPath=".status.description" // SchedulingDecision is the Schema for the schedulingdecisions API type SchedulingDecision struct { diff --git a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml index d815ebdd..7090714c 100644 --- a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml @@ -27,6 +27,9 @@ spec: - jsonPath: .metadata.creationTimestamp name: Created type: date + - jsonPath: .status.description + name: Description + type: string name: v1alpha1 schema: openAPIV3Schema: diff --git a/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml index d815ebdd..7090714c 100644 --- a/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml @@ -27,6 +27,9 @@ spec: - jsonPath: .metadata.creationTimestamp name: Created type: date + - jsonPath: .status.description + name: Description + type: string name: v1alpha1 schema: openAPIV3Schema: diff --git a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml index 7d8f6c84..195b43ad 100644 --- a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml @@ -33,6 +33,9 @@ spec: - jsonPath: .metadata.creationTimestamp name: Created type: date + - jsonPath: .status.description + name: Description + type: string name: v1alpha1 schema: openAPIV3Schema: From 1c1406c2fff2d803c26b396a8baadf19eb27c97d Mon Sep 17 00:00:00 2001 From: mblos <> Date: Thu, 25 Sep 2025 14:25:31 +0200 Subject: [PATCH 23/58] Update step impact --- decisions/internal/controller/controller.go | 268 +++++++++++++++++- .../internal/controller/controller_test.go | 14 +- 2 files changed, 262 insertions(+), 20 deletions(-) diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index 494071f0..8a341167 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -65,6 +65,9 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R // Calculate final scores with full pipeline finalScores, deletedHosts := r.calculateScores(res.Spec.Input, res.Spec.Pipeline.Outputs) + // Calculate step-by-step impact for the winner + stepImpacts := r.calculateStepImpacts(res.Spec.Input, res.Spec.Pipeline.Outputs, finalScores) + // Find minimal critical path criticalSteps, criticalStepCount := r.findCriticalSteps(res.Spec.Input, res.Spec.Pipeline.Outputs, finalScores) @@ -72,7 +75,7 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R res.Status.Error = "" // Sort finalScores by score (highest to lowest) and generate enhanced description - orderedScores, description := r.generateOrderedScoresAndDescription(finalScores, res.Spec.Input, criticalSteps, criticalStepCount, len(res.Spec.Pipeline.Outputs)) + orderedScores, description := r.generateOrderedScoresAndDescription(finalScores, res.Spec.Input, criticalSteps, criticalStepCount, len(res.Spec.Pipeline.Outputs), stepImpacts) res.Status.FinalScores = orderedScores res.Status.DeletedHosts = deletedHosts @@ -176,9 +179,112 @@ func (r *SchedulingDecisionReconciler) findCriticalSteps(input map[string]float6 return criticalSteps, len(criticalSteps) } +// StepImpact represents the impact of a single pipeline step on the winning host +type StepImpact struct { + Step string + ScoreBefore float64 + ScoreAfter float64 + ScoreDelta float64 + CompetitorsRemoved int + PromotedToFirst bool +} + +// calculateStepImpacts tracks how each pipeline step affects the final winner +func (r *SchedulingDecisionReconciler) calculateStepImpacts(input map[string]float64, outputs []v1alpha1.SchedulingDecisionPipelineOutputSpec, finalScores map[string]float64) []StepImpact { + if len(finalScores) == 0 || len(outputs) == 0 { + return []StepImpact{} + } + + // Find the final winner + finalWinner := "" + maxScore := float64(-999999) + for host, score := range finalScores { + if score > maxScore { + maxScore = score + finalWinner = host + } + } + + if finalWinner == "" { + return []StepImpact{} + } + + stepImpacts := make([]StepImpact, 0, len(outputs)) + currentScores := make(map[string]float64) + + // Start with input values as initial scores + for hostName, inputValue := range input { + currentScores[hostName] = inputValue + } + + // Track score before first step + scoreBefore := currentScores[finalWinner] + + // Process each pipeline step and track the winner's evolution + for _, output := range outputs { + // Count how many competitors will be removed in this step + competitorsRemoved := 0 + for hostName := range currentScores { + if hostName != finalWinner { + if _, exists := output.Activations[hostName]; !exists { + competitorsRemoved++ + } + } + } + + // Check if winner was #1 before this step + wasFirst := true + winnerScoreBefore := currentScores[finalWinner] + for host, score := range currentScores { + if host != finalWinner && score > winnerScoreBefore { + wasFirst = false + break + } + } + + // Apply activations and remove hosts not in this step + newScores := make(map[string]float64) + for hostName, score := range currentScores { + if activation, exists := output.Activations[hostName]; exists { + newScores[hostName] = score + activation + } + // Hosts not in activations are removed (don't copy to newScores) + } + + // Get winner's score after this step + scoreAfter := newScores[finalWinner] + + // Check if winner became #1 after this step + isFirstAfter := true + for host, score := range newScores { + if host != finalWinner && score > scoreAfter { + isFirstAfter = false + break + } + } + + promotedToFirst := !wasFirst && isFirstAfter + + stepImpacts = append(stepImpacts, StepImpact{ + Step: output.Step, + ScoreBefore: scoreBefore, + ScoreAfter: scoreAfter, + ScoreDelta: scoreAfter - scoreBefore, + CompetitorsRemoved: competitorsRemoved, + PromotedToFirst: promotedToFirst, + }) + + // Update for next iteration + currentScores = newScores + scoreBefore = scoreAfter + } + + return stepImpacts +} + // generateOrderedScoresAndDescription sorts final scores by value (highest to lowest) -// and generates a brief description with highest host, certainty, host count, input comparison, and critical path -func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(finalScores map[string]float64, inputScores map[string]float64, criticalSteps []string, criticalStepCount int, totalSteps int) (map[string]float64, string) { +// and generates a brief description with highest host, certainty, host count, input comparison, step impacts, and critical path +func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(finalScores map[string]float64, inputScores map[string]float64, criticalSteps []string, criticalStepCount int, totalSteps int, stepImpacts []StepImpact) (map[string]float64, string) { totalInputHosts := len(inputScores) if len(finalScores) == 0 { return finalScores, fmt.Sprintf("No hosts remaining after filtering, %d hosts evaluated", totalInputHosts) @@ -232,7 +338,7 @@ func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(final // Generate main description var description string if len(sortedHosts) == 1 { - description = fmt.Sprintf("Selected: %s (score: %.2f), certainty: perfect, %d hosts evaluated", + description = fmt.Sprintf("Selected: %s (score: %.2f), certainty: perfect, %d hosts evaluated.", sortedHosts[0].host, sortedHosts[0].score, totalInputHosts) } else { // Calculate certainty based on gap between 1st and 2nd place @@ -246,7 +352,7 @@ func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(final certainty = "low" } - description = fmt.Sprintf("Selected: %s (score: %.2f), certainty: %s (gap: %.2f), %d hosts evaluated", + description = fmt.Sprintf("Selected: %s (score: %.2f), certainty: %s (gap: %.2f), %d hosts evaluated.", sortedHosts[0].host, sortedHosts[0].score, certainty, gap, totalInputHosts) } @@ -254,7 +360,7 @@ func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(final var comparison string if inputWinner == finalWinner { // Input choice confirmed - comparison = fmt.Sprintf("\nInput choice confirmed: %s (%.2f→%.2f, remained #1).", + comparison = fmt.Sprintf(" Input choice confirmed: %s (%.2f→%.2f, remained #1).", finalWinner, finalWinnerInputScore, sortedHosts[0].score) } else { // Input winner different from final winner @@ -263,7 +369,7 @@ func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(final // Check if input winner was filtered out _, inputWinnerSurvived := finalScores[inputWinner] if !inputWinnerSurvived { - comparison = fmt.Sprintf("\nInput favored %s (score: %.2f, now filtered), final winner was #%d in input (%.2f→%.2f).", + comparison = fmt.Sprintf(" Input favored %s (score: %.2f, now filtered), final winner was #%d in input (%.2f→%.2f).", inputWinner, inputWinnerScore, finalWinnerInputPosition, finalWinnerInputScore, sortedHosts[0].score) } else { // Find input winner's position in final ranking @@ -274,22 +380,28 @@ func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(final break } } - comparison = fmt.Sprintf("\nInput favored %s (score: %.2f, now #%d with %.2f), final winner was #%d in input (%.2f→%.2f).", + comparison = fmt.Sprintf(" Input favored %s (score: %.2f, now #%d with %.2f), final winner was #%d in input (%.2f→%.2f).", inputWinner, inputWinnerScore, inputWinnerFinalPosition, finalScores[inputWinner], finalWinnerInputPosition, finalWinnerInputScore, sortedHosts[0].score) } } + // Add step impact analysis for the winner using multi-line format + var stepImpactInfo string + if len(stepImpacts) > 0 { + stepImpactInfo = r.formatStepImpactsMultiLine(stepImpacts) + } + // Add critical path information var criticalPath string if totalSteps > 0 { if criticalStepCount == 0 { - criticalPath = fmt.Sprintf("\nDecision driven by input only (all %d steps are non-critical).", totalSteps) + criticalPath = fmt.Sprintf(" Decision driven by input only (all %d steps are non-critical).", totalSteps) } else if criticalStepCount == totalSteps { - criticalPath = fmt.Sprintf("\nDecision requires all %d pipeline steps.", totalSteps) + criticalPath = fmt.Sprintf(" Decision requires all %d pipeline steps.", totalSteps) } else { if criticalStepCount == 1 { - criticalPath = fmt.Sprintf("\nDecision driven by 1/%d pipeline step: %s.", totalSteps, criticalSteps[0]) + criticalPath = fmt.Sprintf(" Decision driven by 1/%d pipeline step: %s.", totalSteps, criticalSteps[0]) } else { // Join critical steps with commas stepList := "" @@ -302,15 +414,145 @@ func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(final stepList += step + ", " } } - criticalPath = fmt.Sprintf("\nDecision driven by %d/%d pipeline steps: %s.", criticalStepCount, totalSteps, stepList) + criticalPath = fmt.Sprintf(" Decision driven by %d/%d pipeline steps: %s.", criticalStepCount, totalSteps, stepList) } } } - description += comparison + criticalPath + description += comparison + criticalPath + stepImpactInfo return orderedScores, description } +// formatStepImpactsMultiLine formats step impacts in a simple delta-ordered format +// without confusing terminology, ordered by absolute impact magnitude +func (r *SchedulingDecisionReconciler) formatStepImpactsMultiLine(stepImpacts []StepImpact) string { + if len(stepImpacts) == 0 { + return "" + } + + // Create a copy of impacts for sorting + sortedImpacts := make([]StepImpact, len(stepImpacts)) + copy(sortedImpacts, stepImpacts) + + // Sort by absolute delta impact (highest first), with promotions taking priority for ties + sort.Slice(sortedImpacts, func(i, j int) bool { + absI := sortedImpacts[i].ScoreDelta + if absI < 0 { + absI = -absI + } + absJ := sortedImpacts[j].ScoreDelta + if absJ < 0 { + absJ = -absJ + } + + // First priority: higher absolute delta + if absI != absJ { + return absI > absJ + } + + // Tie-breaking: promotions come first + if sortedImpacts[i].PromotedToFirst != sortedImpacts[j].PromotedToFirst { + return sortedImpacts[i].PromotedToFirst + } + + // Final tie-breaking: maintain original pipeline order (use step name for consistency) + return sortedImpacts[i].Step < sortedImpacts[j].Step + }) + + var lines []string + + for _, impact := range sortedImpacts { + var stepDesc string + + if impact.PromotedToFirst { + // Step promoted winner to first place + if impact.ScoreDelta != 0 { + stepDesc = fmt.Sprintf("%s %+.2f→#1", impact.Step, impact.ScoreDelta) + } else { + // Zero delta but promoted (must have removed competitors) + stepDesc = fmt.Sprintf("%s +0.00→#1", impact.Step) + } + } else if impact.ScoreDelta != 0 { + // Step changed winner's score but didn't promote to #1 + stepDesc = fmt.Sprintf("%s %+.2f", impact.Step, impact.ScoreDelta) + } else if impact.CompetitorsRemoved > 0 { + // Step removed competitors but didn't change winner's score or promote + stepDesc = fmt.Sprintf("%s +0.00 (removed %d)", impact.Step, impact.CompetitorsRemoved) + } else { + // Step had no measurable impact + stepDesc = fmt.Sprintf("%s +0.00", impact.Step) + } + + lines = append(lines, fmt.Sprintf("• %s", stepDesc)) + } + + if len(lines) == 0 { + return "" + } + + // Join with newlines and add initial label + return fmt.Sprintf(" Step impacts:\n%s", joinLines(lines)) +} + +// joinStepList joins step descriptions with appropriate separators +func joinStepList(steps []string) string { + if len(steps) == 0 { + return "" + } + if len(steps) == 1 { + return steps[0] + } + if len(steps) == 2 { + return steps[0] + ", " + steps[1] + } + + result := "" + for i, step := range steps { + if i < len(steps)-1 { + result += step + ", " + } else { + result += step + } + } + return result +} + +// joinLines joins multiple lines with newlines and proper indentation +func joinLines(lines []string) string { + result := "" + for i, line := range lines { + if i < len(lines)-1 { + result += line + "\n" + } else { + result += line + } + } + return result + "." +} + +// joinImpacts joins step impact descriptions with appropriate separators (kept for compatibility) +func joinImpacts(impacts []string) string { + if len(impacts) == 0 { + return "" + } + if len(impacts) == 1 { + return impacts[0] + } + if len(impacts) == 2 { + return impacts[0] + ", " + impacts[1] + } + + result := "" + for i, impact := range impacts { + if i == len(impacts)-1 { + result += impact + } else { + result += impact + ", " + } + } + return result +} + // SetupWithManager sets up the controller with the Manager. func (r *SchedulingDecisionReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). diff --git a/decisions/internal/controller/controller_test.go b/decisions/internal/controller/controller_test.go index 967b8088..7f2c579b 100644 --- a/decisions/internal/controller/controller_test.go +++ b/decisions/internal/controller/controller_test.go @@ -83,7 +83,7 @@ func TestReconcile(t *testing.T) { if updatedResource.Status.Error != "" { t.Errorf("Expected empty error, got '%s'", updatedResource.Status.Error) } - expectedDescription := "Selected: host1 (score: 1.50), certainty: perfect, 2 hosts evaluated\nInput favored host2 (score: 2.00, now filtered), final winner was #2 in input (1.00→1.50).\nDecision driven by 1/2 pipeline step: filter." + expectedDescription := "Selected: host1 (score: 1.50), certainty: perfect, 2 hosts evaluated. Input favored host2 (score: 2.00, now filtered), final winner was #2 in input (1.00→1.50). Decision driven by 1/2 pipeline step: filter. Step impacts:\n• weigher +0.50\n• filter +0.00→#1." if updatedResource.Status.Description != expectedDescription { t.Errorf("Expected description '%s', got '%s'", expectedDescription, updatedResource.Status.Description) } @@ -852,28 +852,28 @@ func TestReconcileCriticalStepElimination(t *testing.T) { name: "multiple-critical-steps", input: map[string]float64{ "host1": 1.0, - "host2": 2.0, // Would win without pipeline - "host3": 1.5, + "host2": 3.0, // Strong initial winner + "host3": 2.0, }, pipeline: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ { Step: "critical-weigher1", Activations: map[string]float64{ - "host1": 1.5, // Gives host1 strong boost to overtake host2 - "host2": 0.0, + "host1": 1.0, // host1: 2.0, host2: 2.5, host3: 2.5 (ties host2 and host3) + "host2": -0.5, "host3": 0.5, }, }, { Step: "critical-weigher2", Activations: map[string]float64{ - "host1": 0.1, // Further secures host1's lead + "host1": 1.0, // host1: 3.0, host2: 2.5, host3: 2.5 (host1 becomes winner) "host2": 0.0, "host3": 0.0, }, }, }, - expectedCriticalMessage: "Decision driven by 1/2 pipeline step: critical-weigher1.", + expectedCriticalMessage: "Decision requires all 2 pipeline steps.", }, { name: "all-non-critical", From 2cffc96be394a5318a20625bb74ccfb90967cf9f Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Thu, 25 Sep 2025 14:36:56 +0200 Subject: [PATCH 24/58] Don't reconcile in terminal states --- decisions/internal/controller/controller.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index 8a341167..487057b0 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -43,6 +43,11 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{}, err } + // If the decision is already resolved or in error state, do nothing. + if res.Status.State == v1alpha1.SchedulingDecisionStateResolved || res.Status.State == v1alpha1.SchedulingDecisionStateError { + return ctrl.Result{}, nil + } + // Validate that there is at least one host in the input if len(res.Spec.Input) == 0 { res.Status.State = v1alpha1.SchedulingDecisionStateError From 884381f8bdf5f5d2b31d8cec793a95bfd0e3b5a0 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Thu, 25 Sep 2025 14:37:14 +0200 Subject: [PATCH 25/58] Add scheduling decisions endpoint to cortex nova scheduler --- internal/scheduler/nova/api/http/api.go | 73 +++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/internal/scheduler/nova/api/http/api.go b/internal/scheduler/nova/api/http/api.go index 14b5b027..5f160b15 100644 --- a/internal/scheduler/nova/api/http/api.go +++ b/internal/scheduler/nova/api/http/api.go @@ -15,6 +15,7 @@ import ( "strings" "time" + "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/conf" "github.com/cobaltcore-dev/cortex/internal/db" "github.com/cobaltcore-dev/cortex/internal/monitoring" @@ -26,6 +27,9 @@ import ( "github.com/majewsky/gg/option" "github.com/sapcc/go-api-declarations/liquid" "github.com/sapcc/go-bits/jobloop" + "sigs.k8s.io/controller-runtime/pkg/client" + + ctrl "sigs.k8s.io/controller-runtime" ) type HTTPAPI interface { @@ -40,6 +44,9 @@ type httpAPI struct { // Database connection to load specific objects during the scheduling process. DB db.DB + + // Kubernetes client + Client client.Client } func NewAPI(config conf.SchedulerConfig, registry *monitoring.Registry, db db.DB, mqttClient mqtt.Client) HTTPAPI { @@ -53,11 +60,26 @@ func NewAPI(config conf.SchedulerConfig, registry *monitoring.Registry, db db.DB pipelineConf, db, monitor.SubPipeline("nova-"+pipelineConf.Name), mqttClient, ) } + + scheme, err := v1alpha1.SchemeBuilder.Build() + if err != nil { + panic(err) + } + clientConfig, err := ctrl.GetConfig() + if err != nil { + panic(err) + } + cl, err := client.New(clientConfig, client.Options{Scheme: scheme}) + if err != nil { + panic(err) + } + return &httpAPI{ pipelines: pipelines, config: config, monitor: scheduler.NewSchedulerMonitor(registry), DB: db, + Client: cl, // TODO } } @@ -69,6 +91,7 @@ func (httpAPI *httpAPI) Init(mux *http.ServeMux) { } mux.HandleFunc("/scheduler/nova/external", httpAPI.NovaExternalScheduler) mux.HandleFunc("/scheduler/nova/commitments/change", httpAPI.HandleCommitmentChangeRequest) + mux.HandleFunc("/scheduler/nova/scheduling-decisions", httpAPI.HandleListSchedulingDecisions) } // Check if the scheduler can run based on the request data. @@ -408,3 +431,53 @@ func (httpAPI *httpAPI) HandleCommitmentChangeRequest(w http.ResponseWriter, r * } callback.Respond(http.StatusOK, nil, "") } + +// List all scheduling decisions. +func (httpAPI *httpAPI) HandleListSchedulingDecisions(w http.ResponseWriter, r *http.Request) { + callback := httpAPI.monitor.Callback(w, r, "/scheduler/nova/scheduling-decisions") + + // Exit early if the request method is not GET. + if r.Method != http.MethodGet { + internalErr := fmt.Errorf("invalid request method: %s", r.Method) + callback.Respond(http.StatusMethodNotAllowed, internalErr, "invalid request method") + return + } + + // Check if a specific vm id is requested. + vmID := r.URL.Query().Get("vm_id") + + // If no specific vm id is requested, list all scheduling decisions. + if vmID == "" { + var decisions v1alpha1.SchedulingDecisionList + if err := httpAPI.Client.List(r.Context(), &decisions); err != nil { + callback.Respond(http.StatusInternalServerError, err, "failed to list scheduling decisions") + return + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(decisions); err != nil { + callback.Respond(http.StatusInternalServerError, err, "failed to encode response") + return + } + return + } + + var decision v1alpha1.SchedulingDecision + nn := client.ObjectKey{Name: vmID} + if err := httpAPI.Client.Get(r.Context(), nn, &decision); err != nil { + if client.IgnoreNotFound(err) != nil { + callback.Respond(http.StatusInternalServerError, err, "failed to get scheduling decision") + return + } + // Not found + callback.Respond(http.StatusNotFound, err, "scheduling decision not found") + return + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(decision); err != nil { + callback.Respond(http.StatusInternalServerError, err, "failed to encode response") + return + } + callback.Respond(http.StatusOK, nil, "Success") +} From bb411c9d6db701bccdb672f897e8b8f2de1d78e4 Mon Sep 17 00:00:00 2001 From: mblos <> Date: Fri, 26 Sep 2025 08:57:19 +0200 Subject: [PATCH 26/58] Fix reconcile loop --- decisions/internal/controller/controller.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index 487057b0..2efe587f 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -13,6 +13,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/predicate" "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" decisionsv1alpha1 "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" @@ -566,5 +567,6 @@ func (r *SchedulingDecisionReconciler) SetupWithManager(mgr ctrl.Manager) error WithOptions(controller.Options{ MaxConcurrentReconciles: 1, // Default }). + WithEventFilter(predicate.GenerationChangedPredicate{}). Complete(r) } From abbc95c0114b4f2c82e439c429bb830ce0c171af Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Fri, 26 Sep 2025 09:00:33 +0200 Subject: [PATCH 27/58] Enhance scheduling decision functionality with new Flavor type and update CRDs --- .../api/v1alpha1/schedulingdecision_types.go | 15 +- .../api/v1alpha1/zz_generated.deepcopy.go | 16 + .../decisions.cortex_schedulingdecisions.yaml | 25 ++ .../decisions.cortex_schedulingdecisions.yaml | 25 ++ .../decisions.cortex_schedulingdecisions.yaml | 25 ++ internal/scheduler/nova/api/http/api.go | 13 +- internal/scheduler/pipeline.go | 18 +- visualizer/Dockerfile | 2 + visualizer/favicon.ico | Bin 0 -> 15406 bytes visualizer/manila.html | 337 +++++++++--------- visualizer/nova.html | 42 ++- visualizer/scheduling-decisions.html | 239 +++++++++++++ 12 files changed, 580 insertions(+), 177 deletions(-) create mode 100644 visualizer/favicon.ico create mode 100644 visualizer/scheduling-decisions.html diff --git a/decisions/api/v1alpha1/schedulingdecision_types.go b/decisions/api/v1alpha1/schedulingdecision_types.go index e3d21546..9c08b819 100644 --- a/decisions/api/v1alpha1/schedulingdecision_types.go +++ b/decisions/api/v1alpha1/schedulingdecision_types.go @@ -18,9 +18,22 @@ type SchedulingDecisionPipelineSpec struct { Outputs []SchedulingDecisionPipelineOutputSpec `json:"outputs,omitempty"` } +type Flavor struct { + Name string `json:"name"` + VCPUs int `json:"vcpus"` + RAM int `json:"memory_mb"` + Disk int `json:"disk"` +} + // SchedulingDecisionSpec defines the desired state of SchedulingDecision. type SchedulingDecisionSpec struct { - Input map[string]float64 `json:"input,omitempty"` + Input map[string]float64 `json:"input,omitempty"` + AvailabilityZone string `json:"availbility_zone,omitempty"` + VMware bool `json:"vmware,omitempty"` + Live bool `json:"live,omitempty"` + Resize bool `json:"resize,omitempty"` + Flavor Flavor `json:"flavor"` + Pipeline SchedulingDecisionPipelineSpec `json:"pipeline"` } diff --git a/decisions/api/v1alpha1/zz_generated.deepcopy.go b/decisions/api/v1alpha1/zz_generated.deepcopy.go index e846dcb0..c7f73fd4 100644 --- a/decisions/api/v1alpha1/zz_generated.deepcopy.go +++ b/decisions/api/v1alpha1/zz_generated.deepcopy.go @@ -11,6 +11,21 @@ import ( runtime "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Flavor) DeepCopyInto(out *Flavor) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Flavor. +func (in *Flavor) DeepCopy() *Flavor { + if in == nil { + return nil + } + out := new(Flavor) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SchedulingDecision) DeepCopyInto(out *SchedulingDecision) { *out = *in @@ -124,6 +139,7 @@ func (in *SchedulingDecisionSpec) DeepCopyInto(out *SchedulingDecisionSpec) { (*out)[key] = val } } + out.Flavor = in.Flavor in.Pipeline.DeepCopyInto(&out.Pipeline) } diff --git a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml index 7090714c..7e1b876d 100644 --- a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml @@ -56,10 +56,30 @@ spec: spec: description: spec defines the desired state of SchedulingDecision properties: + availbility_zone: + type: string + flavor: + properties: + disk: + type: integer + memory_mb: + type: integer + name: + type: string + vcpus: + type: integer + required: + - disk + - memory_mb + - name + - vcpus + type: object input: additionalProperties: type: number type: object + live: + type: boolean pipeline: properties: name: @@ -82,7 +102,12 @@ spec: required: - name type: object + resize: + type: boolean + vmware: + type: boolean required: + - flavor - pipeline type: object status: diff --git a/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml index 7090714c..7e1b876d 100644 --- a/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml @@ -56,10 +56,30 @@ spec: spec: description: spec defines the desired state of SchedulingDecision properties: + availbility_zone: + type: string + flavor: + properties: + disk: + type: integer + memory_mb: + type: integer + name: + type: string + vcpus: + type: integer + required: + - disk + - memory_mb + - name + - vcpus + type: object input: additionalProperties: type: number type: object + live: + type: boolean pipeline: properties: name: @@ -82,7 +102,12 @@ spec: required: - name type: object + resize: + type: boolean + vmware: + type: boolean required: + - flavor - pipeline type: object status: diff --git a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml index 195b43ad..7b6cc994 100644 --- a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml @@ -62,10 +62,30 @@ spec: spec: description: spec defines the desired state of SchedulingDecision properties: + availbility_zone: + type: string + flavor: + properties: + disk: + type: integer + memory_mb: + type: integer + name: + type: string + vcpus: + type: integer + required: + - disk + - memory_mb + - name + - vcpus + type: object input: additionalProperties: type: number type: object + live: + type: boolean pipeline: properties: name: @@ -88,7 +108,12 @@ spec: required: - name type: object + resize: + type: boolean + vmware: + type: boolean required: + - flavor - pipeline type: object status: diff --git a/internal/scheduler/nova/api/http/api.go b/internal/scheduler/nova/api/http/api.go index 5f160b15..14986bdb 100644 --- a/internal/scheduler/nova/api/http/api.go +++ b/internal/scheduler/nova/api/http/api.go @@ -434,6 +434,16 @@ func (httpAPI *httpAPI) HandleCommitmentChangeRequest(w http.ResponseWriter, r * // List all scheduling decisions. func (httpAPI *httpAPI) HandleListSchedulingDecisions(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Access-Control-Allow-Origin", "http://localhost:4000") + w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS") + w.Header().Set("Access-Control-Allow-Headers", "Content-Type") + + // Handle preflight OPTIONS request + if r.Method == http.MethodOptions { + w.WriteHeader(http.StatusOK) + return + } + callback := httpAPI.monitor.Callback(w, r, "/scheduler/nova/scheduling-decisions") // Exit early if the request method is not GET. @@ -453,8 +463,8 @@ func (httpAPI *httpAPI) HandleListSchedulingDecisions(w http.ResponseWriter, r * callback.Respond(http.StatusInternalServerError, err, "failed to list scheduling decisions") return } - w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(decisions); err != nil { callback.Respond(http.StatusInternalServerError, err, "failed to encode response") return @@ -475,6 +485,7 @@ func (httpAPI *httpAPI) HandleListSchedulingDecisions(w http.ResponseWriter, r * } w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(decision); err != nil { callback.Respond(http.StatusInternalServerError, err, "failed to encode response") return diff --git a/internal/scheduler/pipeline.go b/internal/scheduler/pipeline.go index cffee1e2..7dd4f7d5 100644 --- a/internal/scheduler/pipeline.go +++ b/internal/scheduler/pipeline.go @@ -283,10 +283,26 @@ func (p *pipeline[RequestType]) Run(request RequestType) ([]string, error) { Activations: activations, }) } + + // Need to check if nova request -> circular dependency + // Move to nova pipeline? + // -> Missing data + // omit? + decision := &v1alpha1.SchedulingDecision{ ObjectMeta: ctrl.ObjectMeta{Name: request.GetResourceID()}, Spec: v1alpha1.SchedulingDecisionSpec{ - Input: inWeights, + Input: inWeights, + AvailabilityZone: "TODO", + Flavor: v1alpha1.Flavor{ + Name: "TODO", + VCPUs: 0, + RAM: 0, + Disk: 0, + }, + VMware: false, + Live: false, + Resize: false, Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ Name: request.GetPipeline(), Outputs: outputs, diff --git a/visualizer/Dockerfile b/visualizer/Dockerfile index 5ab7a8ee..ab76e720 100644 --- a/visualizer/Dockerfile +++ b/visualizer/Dockerfile @@ -4,5 +4,7 @@ FROM nginx COPY vendor/mqtt.min.js /usr/share/nginx/html/mqtt.min.js COPY nova.html /usr/share/nginx/html/nova.html +COPY scheduling-decisions.html /usr/share/nginx/html/scheduling-decisions.html COPY manila.html /usr/share/nginx/html/manila.html COPY shared.css /usr/share/nginx/html/shared.css +COPY favicon.ico /usr/share/nginx/html/favicon.ico diff --git a/visualizer/favicon.ico b/visualizer/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..b4f9d5fb8c202c1528e8458073ffeffdb530c143 GIT binary patch literal 15406 zcmeI21&~xn6UP^KcR%by3kgo}0)l&RmjVi>un^o`fkfyMDL@cXti${k>o3 zo2z$wyLar}?g2%n>g{{`-bhbRPxrsO$KeR)h~S7D*J0iL9Bm>x9Qhp%ho9d+|1-yP zIA&Y#a^?E}_y0H?|4rm@WVY&9O{^CGyti&Hi-i2M->UBS((t_#M&mf^#P%g~`i<)@#1(tb{!Jn3K0)dRd5 zJb17?d-hB})1K?psZ&DRUcGt=JaSiR*RCyN$Bqru9$J`3xpL*CUAuPfnoHZZZIwU8 zix-!L3m59xbLY;L>eZ{u*|TTm!Gj0Ewx{#kw{I(*)2C0D(W6I8-@bjNVZ(;ne{kbN zf77N-<^1{cI+szSMyU)OKYm=`Pf+a{W81cEa{c;sog=(M4*T`%r#!iL@187Lw8-uJ znPZCiS>a!P``eCY1y)+@*O$azI}U8O#*#; z^X84LUcFlP!@GCy0)4l=cw~F_^zorfrQpF!^o*Z%m9Q@Kle=OOw$)SOyjb78((&RT zA?=F&a`oy}ZT|T2V>xr?j6UDJdsmP>zQ20)O7FC-q>u%n>sj~X_-+#-a zN1ltq?%lhE{a(3pWtH241q(`(CQW4Ez=66C(V_YC=T|zaRjVdfu3V7;0|sdQ@4ov^ zX3m_cdbw=bvi@{?!Uyy^Ivt(+`|rQ&UPa#X<$ zI>Wj=dGbV%v7SA9O8@@-)gFX|$DY!S{9*T!Cr>W1W5rYW9?88fDoFJE3Ufv5eutAFjeLb|RxIUTu3Wi-#5OKFw_Y+nZPEM4)QJ-(0#tQ3{?@HqRZh~T zO{)!DCvoG(mFCTxD;;M1i4!ME=FFL8)v8rOea1&G;ZvYDLp#2i zG`N@b$d@l)fbsdm-bCKmBj{;+@z(h80b7Gz$8W;sp!>iD%a<<~bYiJer6f+AIEtsi znd}2^jnACG$>8jC>C&l8Llf(fEn7C})Txv5AD;+1vS-gO_#L4gpZTFnx^?R&DO09Y z`}+L(bHzO98e|>0Krfs1KX&Yx;_LnU_lJ6X>^yTpw}Np@LFX}^-R_w2O&#u`o&Wv& z_tkI2ceWSC$M#UrHTY7gQl$!{Cy|q&d>eb8_Gj=Hyxna1-L8rmAD@Q3$@=35+RGn* z{2};f_}-ot~Rv-*x#)k(}rc4RY{o%uhr9_DmDi_Xt3sCU`{(zlo z)~qSfqD7PMzyDr&O*8xWJX64Cty{NN`^TOC-YQ_q9654GiWDg{?t%Sh?p$E)apT76 ze3>`%_eH_3fiTiwM4zLfXGNvsg9qp5WT6DR3rt<#f_xK<$@`mxop>t{;4nKN_vA_|xe zJOS=9x{W*kP5hZUU@+_f-*|6-HtncGUtH87R>egz?Mz*7Fc%t#hY%+Q{}PiyM`J7b z4_4(n@iS}(J`Hg!unRZ~oPu33cm@1HUtGN3yLYdQ88b$CK`w!Kmow%G%{=g*iD^xm zG)a~%Tc$B*Vqa;}q)}fIybl%y3xK)sO?&j{A$981QT~%-03R6p4=yLh3`WB*0>73n zU0OD7+^86x__rOuhK6?9!cY896SG84;8E7BS#`ZxdvF7I4f=~)F+4CPem-mGihg{5 z>XWCTzge?psULtZX2;{9rJeTVQoj1?EA1D0kYBJxmMmG6PsnkC1PK%aK|k?6Xit(P ziN-ix(a(AkgJcb$fAQkQI`_PJ^L|jy$&-1J$0}O1sM3WDk@vtahUSqYM@swl?IlBo z44Mmp{_fqoD}KU%A|Hah^UR&NA@e3TL|=0(jLW|H=9_OczF4thMUA_`CvO$_gTGO- zWJ!TP_}uvNaN)wK zu4S(q8x|J4{LnFw74o>mk0L~fpn8+I3i(Xv$Jgf~evBOoxZjK74q! zS;R-kzcUv@KRT6|8~NRY2@~pi5EB9`7~B(<^MT*UHF^-c3r!t5bWnPb@rzcj(3yVT zfeBa-bS7~R;!4;G;%eyVuqfav;(o|@#E20!22GwHn{0|Z`ptK8pV$Rti9XV&Pp|T4 z@59vf)*YOTjR&(K$Jkr)c<3AF5)}P*K9h3+Yk=+8H-!rq)^#OD=I!#=Uw>)Nl6Vf- z?b4-7Zifhpey}XDaQI;gSQ`ui9w9%$`i3MoZTfS*6K@0WfP(LpU`|))D8ef3;Grli4lsGBt0mk(u z{rD{C4`=|tGbX%5mvfH6oOQGFpR)txj<8)|Ap9-bnD{xm20z;5*L+RC+1tc*p#j>- zL)!DHcKXpH_$lam>@fZTzKo&Y^k?p2ML*-A3puw!?w$*qOtI6?e{3@OcFy`?ufcQZ zNLR8Kmh>Y_U}Z25SQ%Yt&hjB!U?b`fLxFziHGU`hjCDhY;**ijW=+GAet2sNa!l@+ zvpY$XCe>Uu_5zu&RH>5W%$ZYtWv~YFPizI<$vFh_9_SzPS|5XcXu>yUpK|sRAJ$+Q z&O@Rf$-Sbph#NsaJjbpOXUUi`qv9#f&ia~u4#3%o7^RKf5eCp8cQ*>aEFiRO3tS^`@F8`x3~2*{n$)=4t$S7 zg$e~ae`2Q}U&RzK6aF!0#L@rAnybRu1YhAFd~*tZBeb9^T@}NB=trlrPl*+>CSZJG zop$_e?*skHxomWow+iFqU!c?R6Aeyq*T0>9`o^!Svx|ox5A;!u%c=Qu{!CCH{ z>wEGayMwQV9%RpQCx*Z{=p*N1r=K;zHZu?B!aU#wXN{ - - Cortex Manila Visualizer - - - - - - - - - - - -
- -
-
Waiting for mqtt data to arrive...
-
- -
- - - -
- - + + +
+ +
+
Waiting for mqtt data to arrive...
+
+ +
+ + + +
+ + - + } + + + \ No newline at end of file diff --git a/visualizer/nova.html b/visualizer/nova.html index 7f22ca58..0c481eae 100644 --- a/visualizer/nova.html +++ b/visualizer/nova.html @@ -7,6 +7,8 @@ Cortex Nova Visualizer + + @@ -38,6 +40,7 @@ + +
Hire me on: CSS-is-my-passion.com
+ +
+ +
+
Waiting for Scheduling Decisions
+
+ + + + + + \ No newline at end of file From 7d4d5478327b7f38e602d36f983caf3d3fc086cd Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Fri, 26 Sep 2025 09:05:40 +0200 Subject: [PATCH 28/58] Remove promotional "Hire me" text from scheduling decisions page --- visualizer/scheduling-decisions.html | 2 -- 1 file changed, 2 deletions(-) diff --git a/visualizer/scheduling-decisions.html b/visualizer/scheduling-decisions.html index 6a8875d8..dfa8faea 100644 --- a/visualizer/scheduling-decisions.html +++ b/visualizer/scheduling-decisions.html @@ -176,8 +176,6 @@ -
Hire me on: CSS-is-my-passion.com
-
From 3585149be4204f42d9626552f0e170a6abfe434f Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Fri, 26 Sep 2025 09:31:10 +0200 Subject: [PATCH 29/58] Refactor SchedulingDecision CRD: update property names for consistency and clarity --- decisions/api/v1alpha1/schedulingdecision_types.go | 4 ++-- .../crd/bases/decisions.cortex_schedulingdecisions.yaml | 8 ++++---- .../config/crd/decisions.cortex_schedulingdecisions.yaml | 8 ++++---- .../crd/decisions.cortex_schedulingdecisions.yaml | 8 ++++---- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/decisions/api/v1alpha1/schedulingdecision_types.go b/decisions/api/v1alpha1/schedulingdecision_types.go index 9c08b819..c64cce3e 100644 --- a/decisions/api/v1alpha1/schedulingdecision_types.go +++ b/decisions/api/v1alpha1/schedulingdecision_types.go @@ -21,14 +21,14 @@ type SchedulingDecisionPipelineSpec struct { type Flavor struct { Name string `json:"name"` VCPUs int `json:"vcpus"` - RAM int `json:"memory_mb"` + RAM int `json:"ram"` Disk int `json:"disk"` } // SchedulingDecisionSpec defines the desired state of SchedulingDecision. type SchedulingDecisionSpec struct { Input map[string]float64 `json:"input,omitempty"` - AvailabilityZone string `json:"availbility_zone,omitempty"` + AvailabilityZone string `json:"availbilityZone,omitempty"` VMware bool `json:"vmware,omitempty"` Live bool `json:"live,omitempty"` Resize bool `json:"resize,omitempty"` diff --git a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml index 7e1b876d..baa25f3b 100644 --- a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml @@ -56,22 +56,22 @@ spec: spec: description: spec defines the desired state of SchedulingDecision properties: - availbility_zone: + availbilityZone: type: string flavor: properties: disk: type: integer - memory_mb: - type: integer name: type: string + ram: + type: integer vcpus: type: integer required: - disk - - memory_mb - name + - ram - vcpus type: object input: diff --git a/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml index 7e1b876d..baa25f3b 100644 --- a/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml @@ -56,22 +56,22 @@ spec: spec: description: spec defines the desired state of SchedulingDecision properties: - availbility_zone: + availbilityZone: type: string flavor: properties: disk: type: integer - memory_mb: - type: integer name: type: string + ram: + type: integer vcpus: type: integer required: - disk - - memory_mb - name + - ram - vcpus type: object input: diff --git a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml index 7b6cc994..eab224f3 100644 --- a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml @@ -62,22 +62,22 @@ spec: spec: description: spec defines the desired state of SchedulingDecision properties: - availbility_zone: + availbilityZone: type: string flavor: properties: disk: type: integer - memory_mb: - type: integer name: type: string + ram: + type: integer vcpus: type: integer required: - disk - - memory_mb - name + - ram - vcpus type: object input: From c0b3b3a7e309c740845e1d6a35107ad84c4f92fa Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Fri, 26 Sep 2025 09:45:52 +0200 Subject: [PATCH 30/58] Use schedulingdecision consumer --- .../scheduler/cinder/api/http/api_test.go | 12 ++ internal/scheduler/cinder/api/messages.go | 5 - .../scheduler/manila/api/http/api_test.go | 12 ++ internal/scheduler/manila/api/messages.go | 5 - internal/scheduler/nova/api/http/api_test.go | 25 +++++ internal/scheduler/nova/api/messages.go | 3 - internal/scheduler/nova/pipeline.go | 99 ++++++++++++++++- internal/scheduler/pipeline.go | 104 ++++-------------- internal/scheduler/request.go | 4 - internal/scheduler/request_test.go | 1 - 10 files changed, 168 insertions(+), 102 deletions(-) diff --git a/internal/scheduler/cinder/api/http/api_test.go b/internal/scheduler/cinder/api/http/api_test.go index dcf1b251..2c1e8cb2 100644 --- a/internal/scheduler/cinder/api/http/api_test.go +++ b/internal/scheduler/cinder/api/http/api_test.go @@ -21,6 +21,18 @@ type mockPipeline struct { runFunc func(api.ExternalSchedulerRequest) ([]string, error) } +func (p *mockPipeline) SetConsumer(consumer scheduler.SchedulingDecisionConsumer[api.ExternalSchedulerRequest]) { + +} + +func (p *mockPipeline) Consume( + request api.ExternalSchedulerRequest, + applicationOrder []string, + inWeights map[string]float64, + stepWeights map[string]map[string]float64, +) { +} + func (m *mockPipeline) Run(req api.ExternalSchedulerRequest) ([]string, error) { return m.runFunc(req) } diff --git a/internal/scheduler/cinder/api/messages.go b/internal/scheduler/cinder/api/messages.go index 3083e3f6..a6d1414a 100644 --- a/internal/scheduler/cinder/api/messages.go +++ b/internal/scheduler/cinder/api/messages.go @@ -60,11 +60,6 @@ func (r ExternalSchedulerRequest) WithPipeline(pipeline string) scheduler.Pipeli r.Pipeline = pipeline return r } -func (r ExternalSchedulerRequest) GetResourceID() string { - // TODO: We don't properly unwrap the spec yet, - // so we don't have access to the actual resource ID. - return "" -} // Response generated by cortex for the Cinder scheduler. // Cortex returns an ordered list of hosts that the share should be scheduled on. diff --git a/internal/scheduler/manila/api/http/api_test.go b/internal/scheduler/manila/api/http/api_test.go index 6a839216..2c85c6b2 100644 --- a/internal/scheduler/manila/api/http/api_test.go +++ b/internal/scheduler/manila/api/http/api_test.go @@ -21,6 +21,18 @@ type mockPipeline struct { runFunc func(api.ExternalSchedulerRequest) ([]string, error) } +func (p *mockPipeline) SetConsumer(consumer scheduler.SchedulingDecisionConsumer[api.ExternalSchedulerRequest]) { + +} + +func (p *mockPipeline) Consume( + request api.ExternalSchedulerRequest, + applicationOrder []string, + inWeights map[string]float64, + stepWeights map[string]map[string]float64, +) { +} + func (m *mockPipeline) Run(req api.ExternalSchedulerRequest) ([]string, error) { return m.runFunc(req) } diff --git a/internal/scheduler/manila/api/messages.go b/internal/scheduler/manila/api/messages.go index e33c576a..5a5d2353 100644 --- a/internal/scheduler/manila/api/messages.go +++ b/internal/scheduler/manila/api/messages.go @@ -60,11 +60,6 @@ func (r ExternalSchedulerRequest) WithPipeline(pipeline string) scheduler.Pipeli r.Pipeline = pipeline return r } -func (r ExternalSchedulerRequest) GetResourceID() string { - // TODO: We don't properly unwrap the spec yet, - // so we don't have access to the actual resource ID. - return "" -} // Response generated by cortex for the Manila scheduler. // Cortex returns an ordered list of hosts that the share should be scheduled on. diff --git a/internal/scheduler/nova/api/http/api_test.go b/internal/scheduler/nova/api/http/api_test.go index 29c69688..5f0f879b 100644 --- a/internal/scheduler/nova/api/http/api_test.go +++ b/internal/scheduler/nova/api/http/api_test.go @@ -30,6 +30,19 @@ func (m *mockExternalSchedulerPipeline) Run(request api.ExternalSchedulerRequest return []string{"host1"}, nil } +func (m *mockExternalSchedulerPipeline) SetConsumer(consumer scheduler.SchedulingDecisionConsumer[api.ExternalSchedulerRequest]) { + // Do nothing +} + +func (m *mockExternalSchedulerPipeline) Consume( + request api.ExternalSchedulerRequest, + applicationOrder []string, + inWeights map[string]float64, + stepWeights map[string]map[string]float64, +) { + // Do nothing +} + func TestCanRunScheduler(t *testing.T) { httpAPI := &httpAPI{ pipelines: map[string]scheduler.Pipeline[api.ExternalSchedulerRequest]{ @@ -255,6 +268,18 @@ type mockCommitmentsPipeline struct { shouldError bool } +func (p *mockCommitmentsPipeline) SetConsumer(consumer scheduler.SchedulingDecisionConsumer[api.ExternalSchedulerRequest]) { + +} + +func (p *mockCommitmentsPipeline) Consume( + request api.ExternalSchedulerRequest, + applicationOrder []string, + inWeights map[string]float64, + stepWeights map[string]map[string]float64, +) { +} + func (p *mockCommitmentsPipeline) Run(request api.ExternalSchedulerRequest) ([]string, error) { if p.shouldError { return nil, errors.New("mock error") diff --git a/internal/scheduler/nova/api/messages.go b/internal/scheduler/nova/api/messages.go index 552c751a..2b8afede 100644 --- a/internal/scheduler/nova/api/messages.go +++ b/internal/scheduler/nova/api/messages.go @@ -78,9 +78,6 @@ func (r ExternalSchedulerRequest) WithPipeline(pipeline string) scheduler.Pipeli r.Pipeline = pipeline return r } -func (r ExternalSchedulerRequest) GetResourceID() string { - return r.Spec.Data.InstanceUUID -} // Response generated by cortex for the Nova scheduler. // Cortex returns an ordered list of hosts that the VM should be scheduled on. diff --git a/internal/scheduler/nova/pipeline.go b/internal/scheduler/nova/pipeline.go index b9f1f2d5..61dfc381 100644 --- a/internal/scheduler/nova/pipeline.go +++ b/internal/scheduler/nova/pipeline.go @@ -4,9 +4,12 @@ package nova import ( + "context" "errors" "log/slog" + "math" + "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/conf" "github.com/cobaltcore-dev/cortex/internal/db" "github.com/cobaltcore-dev/cortex/internal/mqtt" @@ -16,6 +19,8 @@ import ( "github.com/cobaltcore-dev/cortex/internal/scheduler/nova/plugins/shared" "github.com/cobaltcore-dev/cortex/internal/scheduler/nova/plugins/vmware" "github.com/cobaltcore-dev/cortex/internal/sync/openstack/nova" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" ) type NovaStep = scheduler.Step[api.ExternalSchedulerRequest] @@ -59,6 +64,96 @@ type novaPipeline struct { preselectAllHosts bool } +type novaPipelineConsumer struct { + // Kubernetes client to create decision resources. + Client client.Client +} + +func NewNovaPipelineConsumer() *novaPipelineConsumer { + var kubernetesClient client.Client + if scheme, err := v1alpha1.SchemeBuilder.Build(); err == nil { + if clientConfig, err := ctrl.GetConfig(); err == nil { + if cl, err := client.New(clientConfig, client.Options{Scheme: scheme}); err == nil { + // Successfully created a client, use it. + kubernetesClient = cl + } + } + } + return &novaPipelineConsumer{ + Client: kubernetesClient, + } +} + +func (c *novaPipelineConsumer) Consume( + request api.ExternalSchedulerRequest, + applicationOrder []string, + inWeights map[string]float64, + stepWeights map[string]map[string]float64, +) { + if c.Client == nil { + return + } + var existing v1alpha1.SchedulingDecision + if err := c.Client.Get( + context.Background(), + client.ObjectKey{Name: request.Spec.Data.InstanceUUID}, + &existing, + ); err == nil { + // Decision already exists, do not create a new one. + // TODO: Add new decisions for the same vm id if this is a migration. + slog.Info("scheduler: decision already exists, not creating a new one", "resourceID", request.Spec.Data.InstanceUUID) + return + } + outputs := []v1alpha1.SchedulingDecisionPipelineOutputSpec{} + for _, stepKey := range applicationOrder { + weights, ok := stepWeights[stepKey] + if !ok { + // This is ok, since steps can be skipped. + continue + } + activations := make(map[string]float64, len(weights)) + for k, v := range weights { + activations[k] = math.Tanh(v) + } + outputs = append(outputs, v1alpha1.SchedulingDecisionPipelineOutputSpec{ + Step: stepKey, + Activations: activations, + }) + } + + // Need to check if nova request -> circular dependency + // Move to nova pipeline? + // -> Missing data + // omit? + + decision := &v1alpha1.SchedulingDecision{ + ObjectMeta: ctrl.ObjectMeta{Name: request.Spec.Data.InstanceUUID}, + Spec: v1alpha1.SchedulingDecisionSpec{ + Input: inWeights, + AvailabilityZone: "TODO", + Flavor: v1alpha1.Flavor{ + Name: "TODO", + VCPUs: 0, + RAM: 0, + Disk: 0, + }, + VMware: false, + Live: false, + Resize: false, + Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ + Name: request.GetPipeline(), + Outputs: outputs, + }, + }, + // Status will be filled in by the controller. + } + if err := c.Client.Create(context.Background(), decision); err != nil { + slog.Error("scheduler: failed to create decision", "error", err) + return + } + slog.Info("scheduler: created decision", "resourceID", request.Spec.Data.InstanceUUID) +} + // Create a new Nova scheduler pipeline. func NewPipeline( config conf.NovaSchedulerPipelineConfig, @@ -89,7 +184,9 @@ func NewPipeline( supportedSteps, config.Plugins, wrappers, db, monitor, mqttClient, TopicFinished, ) - return &novaPipeline{pipeline, db, config.PreselectAllHosts} + wrapped := &novaPipeline{pipeline, db, config.PreselectAllHosts} + wrapped.SetConsumer(NewNovaPipelineConsumer()) + return wrapped } // If needed, modify the request before sending it off to the pipeline. diff --git a/internal/scheduler/pipeline.go b/internal/scheduler/pipeline.go index 7dd4f7d5..24b97b9e 100644 --- a/internal/scheduler/pipeline.go +++ b/internal/scheduler/pipeline.go @@ -4,7 +4,6 @@ package scheduler import ( - "context" "errors" "log/slog" "maps" @@ -17,15 +16,14 @@ import ( "github.com/cobaltcore-dev/cortex/internal/conf" "github.com/cobaltcore-dev/cortex/internal/db" "github.com/cobaltcore-dev/cortex/internal/mqtt" - - "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" ) type Pipeline[RequestType PipelineRequest] interface { // Run the scheduling pipeline with the given request. Run(request RequestType) ([]string, error) + + // Set the consumer that will receive the decisions. + SetConsumer(consumer SchedulingDecisionConsumer[RequestType]) } type Premodifier[RequestType PipelineRequest] interface { @@ -49,8 +47,12 @@ type pipeline[RequestType PipelineRequest] struct { // MQTT topic to publish telemetry data on when the pipeline is finished. mqttTopic string - // Kubernetes client to create decision resources. - Client client.Client + // Optional consumer to listen for the decisions. + Consumer SchedulingDecisionConsumer[RequestType] +} + +func (p *pipeline[RequestType]) SetConsumer(consumer SchedulingDecisionConsumer[RequestType]) { + p.Consumer = consumer } type StepWrapper[RequestType PipelineRequest] func(Step[RequestType], conf.SchedulerStepConfig) Step[RequestType] @@ -104,16 +106,6 @@ func NewPipeline[RequestType PipelineRequest]( ) } - var kubernetesClient client.Client - if scheme, err := v1alpha1.SchemeBuilder.Build(); err == nil { - if clientConfig, err := ctrl.GetConfig(); err == nil { - if cl, err := client.New(clientConfig, client.Options{Scheme: scheme}); err == nil { - // Successfully created a client, use it. - kubernetesClient = cl - } - } - } - return &pipeline[RequestType]{ // All steps can be run in parallel. executionOrder: [][]Step[RequestType]{steps}, @@ -121,7 +113,6 @@ func NewPipeline[RequestType PipelineRequest]( monitor: monitor, mqttClient: mqttClient, mqttTopic: mqttTopic, - Client: kubernetesClient, } } @@ -212,6 +203,15 @@ type TelemetryMessage[RequestType PipelineRequest] struct { Out map[string]float64 `json:"out"` } +type SchedulingDecisionConsumer[RequestType PipelineRequest] interface { + Consume( + request RequestType, + applicationOrder []string, + inWeights map[string]float64, + stepWeights map[string]map[string]float64, + ) +} + // Evaluate the pipeline and return a list of subjects in order of preference. func (p *pipeline[RequestType]) Run(request RequestType) ([]string, error) { slogArgs := request.GetTraceLogArgs() @@ -251,71 +251,9 @@ func (p *pipeline[RequestType]) Run(request RequestType) ([]string, error) { Out: outWeights, }) - // Create a new scheduling decision object for this object. - go func() { - if p.Client == nil { - return - } - var existing v1alpha1.SchedulingDecision - if err := p.Client.Get( - context.Background(), - client.ObjectKey{Name: request.GetResourceID()}, - &existing, - ); err == nil { - // Decision already exists, do not create a new one. - // TODO: Add new decisions for the same vm id if this is a migration. - traceLog.Info("scheduler: decision already exists, not creating a new one", "resourceID", request.GetResourceID()) - return - } - outputs := []v1alpha1.SchedulingDecisionPipelineOutputSpec{} - for _, stepKey := range p.applicationOrder { - weights, ok := stepWeights[stepKey] - if !ok { - // This is ok, since steps can be skipped. - continue - } - activations := make(map[string]float64, len(weights)) - for k, v := range weights { - activations[k] = p.ActivationFunction.Norm(v) - } - outputs = append(outputs, v1alpha1.SchedulingDecisionPipelineOutputSpec{ - Step: stepKey, - Activations: activations, - }) - } - - // Need to check if nova request -> circular dependency - // Move to nova pipeline? - // -> Missing data - // omit? - - decision := &v1alpha1.SchedulingDecision{ - ObjectMeta: ctrl.ObjectMeta{Name: request.GetResourceID()}, - Spec: v1alpha1.SchedulingDecisionSpec{ - Input: inWeights, - AvailabilityZone: "TODO", - Flavor: v1alpha1.Flavor{ - Name: "TODO", - VCPUs: 0, - RAM: 0, - Disk: 0, - }, - VMware: false, - Live: false, - Resize: false, - Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ - Name: request.GetPipeline(), - Outputs: outputs, - }, - }, - // Status will be filled in by the controller. - } - if err := p.Client.Create(context.Background(), decision); err != nil { - traceLog.Error("scheduler: failed to create decision", "error", err) - return - } - traceLog.Info("scheduler: created decision", "resourceID", request.GetResourceID()) - }() + if p.Consumer != nil { + go p.Consumer.Consume(request, p.applicationOrder, inWeights, stepWeights) + } return subjects, nil } diff --git a/internal/scheduler/request.go b/internal/scheduler/request.go index 95dc09b8..c3343cce 100644 --- a/internal/scheduler/request.go +++ b/internal/scheduler/request.go @@ -19,8 +19,4 @@ type PipelineRequest interface { GetPipeline() string // Return a copy of the request with the pipeline name set. WithPipeline(pipeline string) PipelineRequest - - // Get the identifier of the resource that should be placed, - // e.g. the virtual machine id for virtual machines. - GetResourceID() string } diff --git a/internal/scheduler/request_test.go b/internal/scheduler/request_test.go index 5bfb332f..a4308854 100644 --- a/internal/scheduler/request_test.go +++ b/internal/scheduler/request_test.go @@ -22,4 +22,3 @@ func (m mockPipelineRequest) WithPipeline(pipeline string) PipelineRequest { m.Pipeline = pipeline return m } -func (m mockPipelineRequest) GetResourceID() string { return "mock-resource-id" } From 4ec4f02c81fba2f23e19c559295c437f18a3b95e Mon Sep 17 00:00:00 2001 From: Philipp Matthes Date: Fri, 26 Sep 2025 09:49:41 +0200 Subject: [PATCH 31/58] Fill in data in spec --- internal/scheduler/nova/pipeline.go | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/internal/scheduler/nova/pipeline.go b/internal/scheduler/nova/pipeline.go index 61dfc381..76f482cc 100644 --- a/internal/scheduler/nova/pipeline.go +++ b/internal/scheduler/nova/pipeline.go @@ -121,25 +121,21 @@ func (c *novaPipelineConsumer) Consume( }) } - // Need to check if nova request -> circular dependency - // Move to nova pipeline? - // -> Missing data - // omit? - + flavor := request.Spec.Data.Flavor decision := &v1alpha1.SchedulingDecision{ ObjectMeta: ctrl.ObjectMeta{Name: request.Spec.Data.InstanceUUID}, Spec: v1alpha1.SchedulingDecisionSpec{ Input: inWeights, - AvailabilityZone: "TODO", + AvailabilityZone: request.Spec.Data.AvailabilityZone, Flavor: v1alpha1.Flavor{ - Name: "TODO", - VCPUs: 0, - RAM: 0, - Disk: 0, + Name: flavor.Data.Name, + VCPUs: int(flavor.Data.VCPUs), // assume this is safe + RAM: int(flavor.Data.MemoryMB), + Disk: int(flavor.Data.RootGB), }, - VMware: false, - Live: false, - Resize: false, + VMware: request.VMware, + Live: request.Live, + Resize: request.Resize, Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ Name: request.GetPipeline(), Outputs: outputs, From e7c1b7f789542b59d6c156272b2a6eb23a89d993 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Fri, 26 Sep 2025 10:14:21 +0200 Subject: [PATCH 32/58] final final polish --- .../api/v1alpha1/schedulingdecision_types.go | 6 +- .../decisions.cortex_schedulingdecisions.yaml | 3 + .../decisions.cortex_schedulingdecisions.yaml | 3 + .../decisions.cortex_schedulingdecisions.yaml | 3 + visualizer/nova.html | 122 +++++++++++++---- visualizer/scheduling-decisions.html | 6 + visualizer/shared.css | 124 ++++++++++++++---- 7 files changed, 212 insertions(+), 55 deletions(-) diff --git a/decisions/api/v1alpha1/schedulingdecision_types.go b/decisions/api/v1alpha1/schedulingdecision_types.go index c64cce3e..a1ff506c 100644 --- a/decisions/api/v1alpha1/schedulingdecision_types.go +++ b/decisions/api/v1alpha1/schedulingdecision_types.go @@ -29,9 +29,9 @@ type Flavor struct { type SchedulingDecisionSpec struct { Input map[string]float64 `json:"input,omitempty"` AvailabilityZone string `json:"availbilityZone,omitempty"` - VMware bool `json:"vmware,omitempty"` - Live bool `json:"live,omitempty"` - Resize bool `json:"resize,omitempty"` + VMware bool `json:"vmware"` + Live bool `json:"live"` + Resize bool `json:"resize"` Flavor Flavor `json:"flavor"` Pipeline SchedulingDecisionPipelineSpec `json:"pipeline"` diff --git a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml index baa25f3b..960a3b70 100644 --- a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml @@ -108,7 +108,10 @@ spec: type: boolean required: - flavor + - live - pipeline + - resize + - vmware type: object status: description: status defines the observed state of SchedulingDecision diff --git a/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml index baa25f3b..960a3b70 100644 --- a/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml @@ -108,7 +108,10 @@ spec: type: boolean required: - flavor + - live - pipeline + - resize + - vmware type: object status: description: status defines the observed state of SchedulingDecision diff --git a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml index eab224f3..24eb0d07 100644 --- a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml @@ -114,7 +114,10 @@ spec: type: boolean required: - flavor + - live - pipeline + - resize + - vmware type: object status: description: status defines the observed state of SchedulingDecision diff --git a/visualizer/nova.html b/visualizer/nova.html index 0c481eae..2b47b0c5 100644 --- a/visualizer/nova.html +++ b/visualizer/nova.html @@ -104,35 +104,77 @@ return utilizations } + function convertSchedulingDecisionToPipeline(schedulingDecision) { + const steps = {} + const order = [] + + for (const step of schedulingDecision.spec.pipeline.outputs) { + steps[step.step] = step.activations + order.push(step.step) + } + + + return { + request: { + spec: { + "nova_object.data": { + weights: schedulingDecision.spec.input, + availability_zone: schedulingDecision.spec.availbilityZone, + flavor: { + "nova_object.data": { + name: schedulingDecision.spec.flavor.name, + memory_mb: schedulingDecision.spec.flavor.ram, + vcpus: schedulingDecision.spec.flavor.vcpus, + disk: schedulingDecision.spec.flavor.disk, + } + }, + } + }, + vmware: schedulingDecision.spec.vmware, + live: schedulingDecision.spec.live, + resize: schedulingDecision.spec.resize, + weights: schedulingDecision.spec.input, + }, + steps: schedulingDecision.status.steps, + in: schedulingDecision.spec.input, + out: schedulingDecision.status.finalScores, + steps, + order + } + } + async function redraw() { - if (Object.keys(state.pipeline).length === 0) return + if (Object.keys(state.pipeline).length === 0 && state.schedulingDecision === null) return - // TODO use data of scheduling decision + let data = state.pipeline + if (state.schedulingDecision !== null) { + data = convertSchedulingDecisionToPipeline(state.schedulingDecision) + } const greyout = 'filter: grayscale(50%); opacity: 0.25' - const spec = state.pipeline.request.spec["nova_object.data"] + const spec = data.request.spec["nova_object.data"] const flavor = spec.flavor["nova_object.data"] const utilizations = getUtilizationByHost() const hostnames = Object.keys(utilizations).sort((a, b) => { - if (state.pipeline.in[a] === undefined) return 1 - if (state.pipeline.in[b] === undefined) return -1 + if (data.in[a] === undefined) return 1 + if (data.in[b] === undefined) return -1 // If both values are equal (e.g. both undefined) compare by the name. - if (state.pipeline.out[a] === state.pipeline.out[b]) return a.localeCompare(b) - if (state.pipeline.out[a] === undefined) return 1 - if (state.pipeline.out[b] === undefined) return -1 - return state.pipeline.out[b] - state.pipeline.out[a] + if (data.out[a] === data.out[b]) return a.localeCompare(b) + if (data.out[a] === undefined) return 1 + if (data.out[b] === undefined) return -1 + return data.out[b] - data.out[a] }) // Also include the nova input weights, scaled weights, and output weights. steps = { - ...state.pipeline.steps, - 'nova input': state.pipeline.request.weights, - 'scaled weights': state.pipeline.in, - 'output weights': state.pipeline.out, + ...data.steps, + 'nova input': data.request.weights, + 'scaled weights': data.in, + 'output weights': data.out, } - order = ['nova input', 'scaled weights', ...state.pipeline.order, 'output weights'] + order = ['nova input', 'scaled weights', ...data.order, 'output weights'] highlights = ['nova input', 'output weights'] let table = '' @@ -176,7 +218,7 @@ // Add hosts to the table. table += '' for (const host of hostnames) { - const hasWeight = state.pipeline.out[host] !== undefined + const hasWeight = data.out[host] !== undefined table += ` ' + + const navData = [ + { + label: "Cortex Nova", + value: "Scheduled VM:", + }, + { + label: "VMware", + value: data.request.vmware, + }, + { + label: "Live Migration", + value: data.request.live, + }, + { + label: "Resize", + value: data.request.resize, + }, + { + label: "Availability Zone", + value: spec.availability_zone, + }, + { + label: "Flavor", + value: `${flavor.name} (MB: ${flavor.memory_mb}, vCPUs: ${flavor.vcpus})`, + }, + ] + + if (state.schedulingDecision !== null) { + navData.push({ + label: "Decision", + value: state.schedulingDecision.status.description, + }) + } + + + const nav = ` ` document.querySelector('#app').innerHTML = `
- ${table} ${nav} + ${table}
` } diff --git a/visualizer/scheduling-decisions.html b/visualizer/scheduling-decisions.html index dfa8faea..c6d402af 100644 --- a/visualizer/scheduling-decisions.html +++ b/visualizer/scheduling-decisions.html @@ -214,6 +214,11 @@

Scheduling Decisions

header.textContent = `Scheduling Decision for VM ID: ${decision.metadata.name}`; div.appendChild(header); + const description = document.createElement('p'); + description.textContent = decision.status.description; + div.appendChild(description); + + /*const pre = document.createElement('pre'); pre.textContent = JSON.stringify(decision, null, 2); div.appendChild(pre);*/ @@ -225,6 +230,7 @@

Scheduling Decisions

timestamp.textContent = `Created at: ${date.toLocaleString()}`; div.appendChild(timestamp); + details.appendChild(div); details.onclick = function () { // Redirect to nova.html with vm_id parameter diff --git a/visualizer/shared.css b/visualizer/shared.css index c04e4a45..3c4f80f2 100644 --- a/visualizer/shared.css +++ b/visualizer/shared.css @@ -24,60 +24,72 @@ body { /* Nice animated progress bar on top of the page. */ .progress { position: fixed; - top: 0; left: 0; right: 0; + top: 0; + left: 0; + right: 0; height: 0.5em; background: var(--color-primary); z-index: 1000; } + .progress::before { content: ''; position: absolute; - top: 0; left: 0; right: 0; + top: 0; + left: 0; + right: 0; height: 0.5em; background: var(--color-secondary); animation: progress 2s infinite; } + @keyframes progress { - 0% { left: -100%; right: 100%; } - 100% { left: 100%; right: -100%; } + 0% { + left: -100%; + right: 100%; + } + + 100% { + left: 100%; + right: -100%; + } } + .progress-text { position: fixed; - top: 2em; left: 0; right: 0; + top: 2em; + left: 0; + right: 0; text-align: center; font-weight: bold; } /* Navbar that shows information. */ nav { - position: fixed; - top: 0; left: 0; right: 0; padding-left: 0.25em; background: var(--color-surface); box-shadow: 0 0 1em rgba(0, 0, 0, 0.1); z-index: 1; } + nav div.element { display: inline-block; - padding-top: 1em; padding-bottom: 2em; - padding-left: 1em; padding-right: 1em; + padding-top: 1em; + padding-bottom: 2em; + padding-left: 1em; + padding-right: 1em; margin: 0; background: var(--color-surface); color: var(--color-on-surface); border-right: 2px solid var(--color-background); font-size: 1em; } + nav div.element p.highlight { font-size: 1.25em; font-weight: bold; } -main { - /* Space for the description */ - padding-top: 9em; - padding-left: 0.5em; -} - table { /* Revert the default spacing used by the browser. */ border-spacing: 0; @@ -89,25 +101,39 @@ td.weight { position: relative; animation: weightAnimation 0.25s ease-in-out; } + td.weight div { border-radius: 0.5em; padding: 0.5em; margin: 0.5em; border: 2px solid var(--color-surface); } + /* Backdrop white for the weight cells */ td.weight::after { content: ''; position: absolute; - --m: 0.6em; top: var(--m); bottom: var(--m); left: var(--m); right: var(--m); + --m: 0.6em; + top: var(--m); + bottom: var(--m); + left: var(--m); + right: var(--m); border-radius: 0.5em; background: var(--color-surface); z-index: -1; } + /* Animation for weights when they first appear */ @keyframes weightAnimation { - 0% { opacity: 0; transform: scale(0.5); } - 100% { opacity: 1; transform: scale(1); } + 0% { + opacity: 0; + transform: scale(0.5); + } + + 100% { + opacity: 1; + transform: scale(1); + } } /* Table cell showing the hostname/name. */ @@ -115,6 +141,7 @@ th.hostname { text-align: center; position: relative; } + th.hostname div { position: relative; padding: 0.1em; @@ -131,10 +158,12 @@ th.metainfo { text-align: center; position: relative; } + th.metainfo div p { width: 6em; overflow: hidden; } + th.metainfo div p.issue { color: var(--color-tertiary); border-radius: 0.5em; @@ -162,12 +191,16 @@ td.chart { position: relative; height: 24em; } + td.chart div.barsbefore, td.chart div.barsafter, td.chart div.backdrop, td.chart div.stats { position: absolute; - top: 0; left: 0; right: 0; bottom: 0; + top: 0; + left: 0; + right: 0; + bottom: 0; display: flex; margin-top: 1.5em; margin-bottom: 0.5em; @@ -177,6 +210,7 @@ td.chart div.stats { justify-content: center; align-items: flex-end; } + td.chart div.barsbefore p, td.chart div.barsafter p, td.chart div.backdrop p, @@ -186,44 +220,73 @@ td.chart div.stats p { display: flex; border-radius: 0.2em; } + td.chart div.backdrop p { height: 100%; border-radius: 0.2em; border: 1px solid rgba(0, 0, 0, 0.05); background: white; } + td.chart div.stats { text-align: center; display: flex; justify-content: center; align-items: flex-start; } + td.chart div.stats p { writing-mode: vertical-lr; text-orientation: mixed; display: flex; font-size: 1em; font-weight: bold; - margin-left: 0.1em; margin-right: 0.1em; + margin-left: 0.1em; + margin-right: 0.1em; justify-content: center; align-items: center; } + /* Animation for chart bars */ td.chart div.barsafter p, td.chart div.barsbefore p { animation: barAnim 0.25s ease-in-out; overflow: hidden; } + @keyframes barAnim { - 0% { transform: scaleY(0); } - 100% { transform: scaleY(1); } + 0% { + transform: scaleY(0); + } + + 100% { + transform: scaleY(1); + } +} + +td.chart div.barsafter p.cpu { + background: var(--color-primary); +} + +td.chart div.barsafter p.mem { + background: var(--color-primary); +} + +td.chart div.barsafter p.disk { + background: var(--color-primary); +} + +td.chart div.barsbefore p.cpu { + background: var(--color-secondary); +} + +td.chart div.barsbefore p.mem { + background: var(--color-secondary); +} + +td.chart div.barsbefore p.disk { + background: var(--color-secondary); } -td.chart div.barsafter p.cpu { background: var(--color-primary); } -td.chart div.barsafter p.mem { background: var(--color-primary); } -td.chart div.barsafter p.disk { background: var(--color-primary); } -td.chart div.barsbefore p.cpu { background: var(--color-secondary); } -td.chart div.barsbefore p.mem { background: var(--color-secondary); } -td.chart div.barsbefore p.disk { background: var(--color-secondary); } /* Style for the input and button */ @@ -233,6 +296,7 @@ td.chart div.barsbefore p.disk { background: var(--color-secondary); } align-items: center; margin: 1em; } + .mqtt-url-input input { padding: 0.5em; font-size: 1em; @@ -241,6 +305,7 @@ td.chart div.barsbefore p.disk { background: var(--color-secondary); } margin-right: 0.5em; min-width: 12em; } + .mqtt-url-input button { padding: 0.5em 1em; margin-right: 0.5em; @@ -251,7 +316,8 @@ td.chart div.barsbefore p.disk { background: var(--color-secondary); } border-radius: 0.25em; cursor: pointer; } + .mqtt-url-input button:hover { background: var(--color-secondary); color: var(--color-on-secondary); -} +} \ No newline at end of file From f694f2a8db02911c16cc3e22ff6de0bca999aeda Mon Sep 17 00:00:00 2001 From: mblos Date: Fri, 26 Sep 2025 12:12:04 +0200 Subject: [PATCH 33/58] Refactor string ops --- decisions/internal/controller/controller.go | 81 +++------------------ 1 file changed, 11 insertions(+), 70 deletions(-) diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index 2efe587f..382a681b 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -7,6 +7,7 @@ import ( "context" "fmt" "sort" + "strings" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" @@ -409,16 +410,15 @@ func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(final if criticalStepCount == 1 { criticalPath = fmt.Sprintf(" Decision driven by 1/%d pipeline step: %s.", totalSteps, criticalSteps[0]) } else { - // Join critical steps with commas - stepList := "" - for i, step := range criticalSteps { - if i == len(criticalSteps)-1 { - stepList += step - } else if i == len(criticalSteps)-2 { - stepList += step + " and " - } else { - stepList += step + ", " - } + // Join critical steps with proper separators + var stepList string + if len(criticalSteps) == 2 { + stepList = strings.Join(criticalSteps, " and ") + } else { + // For 3+ steps: "step1, step2, and step3" + lastStep := criticalSteps[len(criticalSteps)-1] + otherSteps := criticalSteps[:len(criticalSteps)-1] + stepList = strings.Join(otherSteps, ", ") + " and " + lastStep } criticalPath = fmt.Sprintf(" Decision driven by %d/%d pipeline steps: %s.", criticalStepCount, totalSteps, stepList) } @@ -497,66 +497,7 @@ func (r *SchedulingDecisionReconciler) formatStepImpactsMultiLine(stepImpacts [] } // Join with newlines and add initial label - return fmt.Sprintf(" Step impacts:\n%s", joinLines(lines)) -} - -// joinStepList joins step descriptions with appropriate separators -func joinStepList(steps []string) string { - if len(steps) == 0 { - return "" - } - if len(steps) == 1 { - return steps[0] - } - if len(steps) == 2 { - return steps[0] + ", " + steps[1] - } - - result := "" - for i, step := range steps { - if i < len(steps)-1 { - result += step + ", " - } else { - result += step - } - } - return result -} - -// joinLines joins multiple lines with newlines and proper indentation -func joinLines(lines []string) string { - result := "" - for i, line := range lines { - if i < len(lines)-1 { - result += line + "\n" - } else { - result += line - } - } - return result + "." -} - -// joinImpacts joins step impact descriptions with appropriate separators (kept for compatibility) -func joinImpacts(impacts []string) string { - if len(impacts) == 0 { - return "" - } - if len(impacts) == 1 { - return impacts[0] - } - if len(impacts) == 2 { - return impacts[0] + ", " + impacts[1] - } - - result := "" - for i, impact := range impacts { - if i == len(impacts)-1 { - result += impact - } else { - result += impact + ", " - } - } - return result + return fmt.Sprintf(" Step impacts:\n%s.", strings.Join(lines, "\n")) } // SetupWithManager sets up the controller with the Manager. From ab854efc7a8a715b701549fb65e75f5099646e30 Mon Sep 17 00:00:00 2001 From: mblos Date: Fri, 26 Sep 2025 14:46:18 +0200 Subject: [PATCH 34/58] Refactoring --- decisions/internal/controller/controller.go | 353 ++++++++++---------- 1 file changed, 179 insertions(+), 174 deletions(-) diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index 382a681b..f1aac3cd 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -6,6 +6,7 @@ package controller import ( "context" "fmt" + "math" "sort" "strings" @@ -20,6 +21,70 @@ import ( decisionsv1alpha1 "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" ) +const ( + // MinScoreValue represents the minimum possible score value + MinScoreValue = -999999 + + // String format templates for descriptions + selectedPerfectFmt = "Selected: %s (score: %.2f), certainty: perfect, %d hosts evaluated." + selectedCertaintyFmt = "Selected: %s (score: %.2f), certainty: %s (gap: %.2f), %d hosts evaluated." + noHostsRemainingFmt = "No hosts remaining after filtering, %d hosts evaluated" + inputConfirmedFmt = " Input choice confirmed: %s (%.2f→%.2f, remained #1)." + inputFilteredFmt = " Input favored %s (score: %.2f, now filtered), final winner was #%d in input (%.2f→%.2f)." + inputDemotedFmt = " Input favored %s (score: %.2f, now #%d with %.2f), final winner was #%d in input (%.2f→%.2f)." +) + +// certaintyLevel represents a threshold and its corresponding certainty level +type certaintyLevel struct { + threshold float64 + level string +} + +// certaintyLevels maps score gaps to certainty levels (ordered from highest to lowest threshold) +var certaintyLevels = []certaintyLevel{ + {0.5, "high"}, + {0.2, "medium"}, + {0.0, "low"}, +} + +// getCertaintyLevel returns the certainty level for a given score gap +func getCertaintyLevel(gap float64) string { + for _, cl := range certaintyLevels { + if gap >= cl.threshold { + return cl.level + } + } + return "low" // fallback +} + +// hostScore represents a host-score pair for sorting operations +type hostScore struct { + host string + score float64 +} + +// mapToSortedHostScores converts a score map to sorted hostScore slice (highest to lowest) +func mapToSortedHostScores(scores map[string]float64) []hostScore { + sorted := make([]hostScore, 0, len(scores)) + for host, score := range scores { + sorted = append(sorted, hostScore{host: host, score: score}) + } + sort.Slice(sorted, func(i, j int) bool { + return sorted[i].score > sorted[j].score + }) + return sorted +} + +// findHostPosition returns the 1-based position of a host in sorted hosts slice +func findHostPosition(hosts []hostScore, targetHost string) int { + for i, hs := range hosts { + if hs.host == targetHost { + return i + 1 // 1-based position + } + } + return -1 +} + // SchedulingDecisionReconciler reconciles a SchedulingDecision object type SchedulingDecisionReconciler struct { // Client for the kubernetes API. @@ -50,44 +115,40 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{}, nil } - // Validate that there is at least one host in the input - if len(res.Spec.Input) == 0 { - res.Status.State = v1alpha1.SchedulingDecisionStateError - res.Status.Error = "No hosts provided in input" - } else { - // Validate that all hosts in pipeline outputs exist in input - for _, output := range res.Spec.Pipeline.Outputs { - for hostName := range output.Activations { - if _, exists := res.Spec.Input[hostName]; !exists { - res.Status.State = v1alpha1.SchedulingDecisionStateError - res.Status.Error = "Host '" + hostName + "' in pipeline output not found in input" - if err := r.Status().Update(ctx, &res); err != nil { - return ctrl.Result{}, err - } - return ctrl.Result{}, nil - } - } + // Validate input has at least one host + if err := r.validateInput(res.Spec.Input); err != nil { + if err := r.setErrorState(ctx, &res, err); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } + + // Validate that all hosts in pipeline outputs exist in input + if err := r.validatePipelineHosts(res.Spec.Input, res.Spec.Pipeline.Outputs); err != nil { + if err := r.setErrorState(ctx, &res, err); err != nil { + return ctrl.Result{}, err } + return ctrl.Result{}, nil + } - // Calculate final scores with full pipeline - finalScores, deletedHosts := r.calculateScores(res.Spec.Input, res.Spec.Pipeline.Outputs) + // Calculate final scores with full pipeline + finalScores, deletedHosts := r.calculateScores(res.Spec.Input, res.Spec.Pipeline.Outputs) - // Calculate step-by-step impact for the winner - stepImpacts := r.calculateStepImpacts(res.Spec.Input, res.Spec.Pipeline.Outputs, finalScores) + // Calculate step-by-step impact for the winner + stepImpacts := r.calculateStepImpacts(res.Spec.Input, res.Spec.Pipeline.Outputs, finalScores) - // Find minimal critical path - criticalSteps, criticalStepCount := r.findCriticalSteps(res.Spec.Input, res.Spec.Pipeline.Outputs, finalScores) + // Find minimal critical path + criticalSteps, criticalStepCount := r.findCriticalSteps(res.Spec.Input, res.Spec.Pipeline.Outputs, finalScores) - res.Status.State = v1alpha1.SchedulingDecisionStateResolved - res.Status.Error = "" + res.Status.State = v1alpha1.SchedulingDecisionStateResolved + res.Status.Error = "" - // Sort finalScores by score (highest to lowest) and generate enhanced description - orderedScores, description := r.generateOrderedScoresAndDescription(finalScores, res.Spec.Input, criticalSteps, criticalStepCount, len(res.Spec.Pipeline.Outputs), stepImpacts) + // Sort finalScores by score (highest to lowest) and generate enhanced description + orderedScores, description := r.generateOrderedScoresAndDescription(finalScores, res.Spec.Input, criticalSteps, criticalStepCount, len(res.Spec.Pipeline.Outputs), stepImpacts) - res.Status.FinalScores = orderedScores - res.Status.DeletedHosts = deletedHosts - res.Status.Description = description - } + res.Status.FinalScores = orderedScores + res.Status.DeletedHosts = deletedHosts + res.Status.Description = description if err := r.Status().Update(ctx, &res); err != nil { return ctrl.Result{}, err @@ -96,9 +157,53 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{}, nil // No need to requeue. } +// validateInput checks if the input has at least one host +func (r *SchedulingDecisionReconciler) validateInput(input map[string]float64) error { + if len(input) == 0 { + return fmt.Errorf("No hosts provided in input") + } + return nil +} + +// validatePipelineHosts checks if all hosts in pipeline outputs exist in input +func (r *SchedulingDecisionReconciler) validatePipelineHosts(input map[string]float64, outputs []v1alpha1.SchedulingDecisionPipelineOutputSpec) error { + for _, output := range outputs { + for hostName := range output.Activations { + if _, exists := input[hostName]; !exists { + return fmt.Errorf("Host '%s' in pipeline output not found in input", hostName) + } + } + } + return nil +} + +// setErrorState sets the error state and updates the resource status +func (r *SchedulingDecisionReconciler) setErrorState(ctx context.Context, res *v1alpha1.SchedulingDecision, err error) error { + res.Status.State = v1alpha1.SchedulingDecisionStateError + res.Status.Error = err.Error() + return r.Status().Update(ctx, res) +} + +// findWinner returns the host with the highest score and the score value +func findWinner(scores map[string]float64) (string, float64) { + if len(scores) == 0 { + return "", MinScoreValue + } + + winner := "" + maxScore := float64(MinScoreValue) + for host, score := range scores { + if score > maxScore { + maxScore = score + winner = host + } + } + return winner, maxScore +} + // calculateScores processes pipeline outputs and returns final scores and deleted hosts func (r *SchedulingDecisionReconciler) calculateScores(input map[string]float64, outputs []v1alpha1.SchedulingDecisionPipelineOutputSpec) (map[string]float64, map[string][]string) { - finalScores := make(map[string]float64) + finalScores := make(map[string]float64, len(input)) deletedHosts := make(map[string][]string) // Start with input values as initial scores @@ -139,15 +244,7 @@ func (r *SchedulingDecisionReconciler) findCriticalSteps(input map[string]float6 } // Get baseline winner - baselineWinner := "" - maxScore := float64(-999999) - for host, score := range baselineFinalScores { - if score > maxScore { - maxScore = score - baselineWinner = host - } - } - + baselineWinner, _ := findWinner(baselineFinalScores) if baselineWinner == "" { return []string{}, 0 } @@ -156,26 +253,16 @@ func (r *SchedulingDecisionReconciler) findCriticalSteps(input map[string]float6 // Try removing each step one by one for i, stepToRemove := range outputs { - // Create pipeline without this step + // Create pipeline without this step using slice operations reducedOutputs := make([]v1alpha1.SchedulingDecisionPipelineOutputSpec, 0, len(outputs)-1) - for j, output := range outputs { - if j != i { - reducedOutputs = append(reducedOutputs, output) - } - } + reducedOutputs = append(reducedOutputs, outputs[:i]...) + reducedOutputs = append(reducedOutputs, outputs[i+1:]...) // Calculate scores without this step reducedFinalScores, _ := r.calculateScores(input, reducedOutputs) // Find winner without this step - reducedWinner := "" - reducedMaxScore := float64(-999999) - for host, score := range reducedFinalScores { - if score > reducedMaxScore { - reducedMaxScore = score - reducedWinner = host - } - } + reducedWinner, _ := findWinner(reducedFinalScores) // If removing this step changes the winner, it's critical if reducedWinner != baselineWinner { @@ -203,15 +290,7 @@ func (r *SchedulingDecisionReconciler) calculateStepImpacts(input map[string]flo } // Find the final winner - finalWinner := "" - maxScore := float64(-999999) - for host, score := range finalScores { - if score > maxScore { - maxScore = score - finalWinner = host - } - } - + finalWinner, _ := findWinner(finalScores) if finalWinner == "" { return []StepImpact{} } @@ -294,24 +373,11 @@ func (r *SchedulingDecisionReconciler) calculateStepImpacts(input map[string]flo func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(finalScores map[string]float64, inputScores map[string]float64, criticalSteps []string, criticalStepCount int, totalSteps int, stepImpacts []StepImpact) (map[string]float64, string) { totalInputHosts := len(inputScores) if len(finalScores) == 0 { - return finalScores, fmt.Sprintf("No hosts remaining after filtering, %d hosts evaluated", totalInputHosts) + return finalScores, fmt.Sprintf(noHostsRemainingFmt, totalInputHosts) } - // Create a slice of host-score pairs for sorting - type hostScore struct { - host string - score float64 - } - - var sortedHosts []hostScore - for host, score := range finalScores { - sortedHosts = append(sortedHosts, hostScore{host: host, score: score}) - } - - // Sort by score (highest to lowest) - sort.Slice(sortedHosts, func(i, j int) bool { - return sortedHosts[i].score > sortedHosts[j].score - }) + // Sort final scores by value (highest to lowest) + sortedHosts := mapToSortedHostScores(finalScores) // Create ordered map (Go maps maintain insertion order as of Go 1.8+) orderedScores := make(map[string]float64) @@ -320,13 +386,7 @@ func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(final } // Sort input scores to determine input-based ranking - var sortedInputHosts []hostScore - for host, score := range inputScores { - sortedInputHosts = append(sortedInputHosts, hostScore{host: host, score: score}) - } - sort.Slice(sortedInputHosts, func(i, j int) bool { - return sortedInputHosts[i].score > sortedInputHosts[j].score - }) + sortedInputHosts := mapToSortedHostScores(inputScores) // Find positions and generate comparison finalWinner := sortedHosts[0].host @@ -334,41 +394,24 @@ func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(final finalWinnerInputScore := inputScores[finalWinner] // Find final winner's position in input ranking - finalWinnerInputPosition := -1 - for i, hs := range sortedInputHosts { - if hs.host == finalWinner { - finalWinnerInputPosition = i + 1 // 1-based position - break - } - } + finalWinnerInputPosition := findHostPosition(sortedInputHosts, finalWinner) // Generate main description var description string if len(sortedHosts) == 1 { - description = fmt.Sprintf("Selected: %s (score: %.2f), certainty: perfect, %d hosts evaluated.", - sortedHosts[0].host, sortedHosts[0].score, totalInputHosts) + description = fmt.Sprintf(selectedPerfectFmt, sortedHosts[0].host, sortedHosts[0].score, totalInputHosts) } else { // Calculate certainty based on gap between 1st and 2nd place gap := sortedHosts[0].score - sortedHosts[1].score - var certainty string - if gap >= 0.5 { - certainty = "high" - } else if gap >= 0.2 { - certainty = "medium" - } else { - certainty = "low" - } - - description = fmt.Sprintf("Selected: %s (score: %.2f), certainty: %s (gap: %.2f), %d hosts evaluated.", - sortedHosts[0].host, sortedHosts[0].score, certainty, gap, totalInputHosts) + certainty := getCertaintyLevel(gap) + description = fmt.Sprintf(selectedCertaintyFmt, sortedHosts[0].host, sortedHosts[0].score, certainty, gap, totalInputHosts) } // Add input vs. final comparison var comparison string if inputWinner == finalWinner { // Input choice confirmed - comparison = fmt.Sprintf(" Input choice confirmed: %s (%.2f→%.2f, remained #1).", - finalWinner, finalWinnerInputScore, sortedHosts[0].score) + comparison = fmt.Sprintf(inputConfirmedFmt, finalWinner, finalWinnerInputScore, sortedHosts[0].score) } else { // Input winner different from final winner inputWinnerScore := sortedInputHosts[0].score @@ -376,19 +419,11 @@ func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(final // Check if input winner was filtered out _, inputWinnerSurvived := finalScores[inputWinner] if !inputWinnerSurvived { - comparison = fmt.Sprintf(" Input favored %s (score: %.2f, now filtered), final winner was #%d in input (%.2f→%.2f).", - inputWinner, inputWinnerScore, finalWinnerInputPosition, finalWinnerInputScore, sortedHosts[0].score) + comparison = fmt.Sprintf(inputFilteredFmt, inputWinner, inputWinnerScore, finalWinnerInputPosition, finalWinnerInputScore, sortedHosts[0].score) } else { // Find input winner's position in final ranking - inputWinnerFinalPosition := -1 - for i, hs := range sortedHosts { - if hs.host == inputWinner { - inputWinnerFinalPosition = i + 1 // 1-based position - break - } - } - comparison = fmt.Sprintf(" Input favored %s (score: %.2f, now #%d with %.2f), final winner was #%d in input (%.2f→%.2f).", - inputWinner, inputWinnerScore, inputWinnerFinalPosition, finalScores[inputWinner], + inputWinnerFinalPosition := findHostPosition(sortedHosts, inputWinner) + comparison = fmt.Sprintf(inputDemotedFmt, inputWinner, inputWinnerScore, inputWinnerFinalPosition, finalScores[inputWinner], finalWinnerInputPosition, finalWinnerInputScore, sortedHosts[0].score) } } @@ -429,6 +464,20 @@ func (r *SchedulingDecisionReconciler) generateOrderedScoresAndDescription(final return orderedScores, description } +// formatImpactValue formats a single step impact value +func formatImpactValue(impact StepImpact) string { + if impact.PromotedToFirst { + return fmt.Sprintf("%+.2f→#1", impact.ScoreDelta) + } + if impact.ScoreDelta != 0 { + return fmt.Sprintf("%+.2f", impact.ScoreDelta) + } + if impact.CompetitorsRemoved > 0 { + return fmt.Sprintf("+0.00 (removed %d)", impact.CompetitorsRemoved) + } + return "+0.00" +} + // formatStepImpactsMultiLine formats step impacts in a simple delta-ordered format // without confusing terminology, ordered by absolute impact magnitude func (r *SchedulingDecisionReconciler) formatStepImpactsMultiLine(stepImpacts []StepImpact) string { @@ -436,68 +485,24 @@ func (r *SchedulingDecisionReconciler) formatStepImpactsMultiLine(stepImpacts [] return "" } - // Create a copy of impacts for sorting - sortedImpacts := make([]StepImpact, len(stepImpacts)) - copy(sortedImpacts, stepImpacts) - // Sort by absolute delta impact (highest first), with promotions taking priority for ties - sort.Slice(sortedImpacts, func(i, j int) bool { - absI := sortedImpacts[i].ScoreDelta - if absI < 0 { - absI = -absI - } - absJ := sortedImpacts[j].ScoreDelta - if absJ < 0 { - absJ = -absJ - } - - // First priority: higher absolute delta + sort.Slice(stepImpacts, func(i, j int) bool { + absI, absJ := math.Abs(stepImpacts[i].ScoreDelta), math.Abs(stepImpacts[j].ScoreDelta) if absI != absJ { return absI > absJ } - - // Tie-breaking: promotions come first - if sortedImpacts[i].PromotedToFirst != sortedImpacts[j].PromotedToFirst { - return sortedImpacts[i].PromotedToFirst + if stepImpacts[i].PromotedToFirst != stepImpacts[j].PromotedToFirst { + return stepImpacts[i].PromotedToFirst } - - // Final tie-breaking: maintain original pipeline order (use step name for consistency) - return sortedImpacts[i].Step < sortedImpacts[j].Step + return stepImpacts[i].Step < stepImpacts[j].Step }) - var lines []string - - for _, impact := range sortedImpacts { - var stepDesc string - - if impact.PromotedToFirst { - // Step promoted winner to first place - if impact.ScoreDelta != 0 { - stepDesc = fmt.Sprintf("%s %+.2f→#1", impact.Step, impact.ScoreDelta) - } else { - // Zero delta but promoted (must have removed competitors) - stepDesc = fmt.Sprintf("%s +0.00→#1", impact.Step) - } - } else if impact.ScoreDelta != 0 { - // Step changed winner's score but didn't promote to #1 - stepDesc = fmt.Sprintf("%s %+.2f", impact.Step, impact.ScoreDelta) - } else if impact.CompetitorsRemoved > 0 { - // Step removed competitors but didn't change winner's score or promote - stepDesc = fmt.Sprintf("%s +0.00 (removed %d)", impact.Step, impact.CompetitorsRemoved) - } else { - // Step had no measurable impact - stepDesc = fmt.Sprintf("%s +0.00", impact.Step) - } - - lines = append(lines, fmt.Sprintf("• %s", stepDesc)) - } - - if len(lines) == 0 { - return "" + var b strings.Builder + b.WriteString(" Step impacts:") + for _, impact := range stepImpacts { + fmt.Fprintf(&b, "\n• %s %s", impact.Step, formatImpactValue(impact)) } - - // Join with newlines and add initial label - return fmt.Sprintf(" Step impacts:\n%s.", strings.Join(lines, "\n")) + return b.String() + "." } // SetupWithManager sets up the controller with the Manager. From 84a233c425aec191f354d86806df05a6d1f19fbb Mon Sep 17 00:00:00 2001 From: mblos Date: Mon, 29 Sep 2025 15:58:08 +0200 Subject: [PATCH 35/58] Multiple scheduling decisions in CRD --- .../api/v1alpha1/schedulingdecision_types.go | 60 ++- .../api/v1alpha1/zz_generated.deepcopy.go | 57 ++- .../decisions.cortex_schedulingdecisions.yaml | 167 ++++--- .../decisions.cortex_schedulingdecisions.yaml | 167 ++++--- .../decisions.cortex_schedulingdecisions.yaml | 167 ++++--- decisions/internal/controller/controller.go | 65 ++- .../internal/controller/controller_test.go | 435 +++++++++++------- 7 files changed, 706 insertions(+), 412 deletions(-) diff --git a/decisions/api/v1alpha1/schedulingdecision_types.go b/decisions/api/v1alpha1/schedulingdecision_types.go index a1ff506c..cc9a3129 100644 --- a/decisions/api/v1alpha1/schedulingdecision_types.go +++ b/decisions/api/v1alpha1/schedulingdecision_types.go @@ -7,9 +7,18 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +type SchedulingEventType string + +const ( + SchedulingEventTypeLiveMigration SchedulingEventType = "live-migration" + SchedulingEventTypeColdMigration SchedulingEventType = "cold-migration" + SchedulingEventTypeEvacuation SchedulingEventType = "evacuation" + SchedulingEventTypeResize SchedulingEventType = "resize" + SchedulingEventTypeInitialPlacement SchedulingEventType = "initial-placement" +) + type SchedulingDecisionPipelineOutputSpec struct { - Step string `json:"step"` - // Weights calculated by this step subjected to the activation function. + Step string `json:"step"` Activations map[string]float64 `json:"activations,omitempty"` } @@ -26,15 +35,21 @@ type Flavor struct { } // SchedulingDecisionSpec defines the desired state of SchedulingDecision. -type SchedulingDecisionSpec struct { - Input map[string]float64 `json:"input,omitempty"` - AvailabilityZone string `json:"availbilityZone,omitempty"` - VMware bool `json:"vmware"` - Live bool `json:"live"` - Resize bool `json:"resize"` - Flavor Flavor `json:"flavor"` - - Pipeline SchedulingDecisionPipelineSpec `json:"pipeline"` +type SchedulingDecisionSpec struct { // List of scheduling decisions to be processed. + Decisions []SchedulingDecisionRequest `json:"decisions"` +} + +type SchedulingDecisionRequest struct { + ID string `json:"id"` + RequestedAt metav1.Time `json:"requestedAt"` + EventType SchedulingEventType `json:"eventType"` + Input map[string]float64 `json:"input,omitempty"` + Pipeline SchedulingDecisionPipelineSpec `json:"pipeline"` + + AvailabilityZone string `json:"availabilityZone,omitempty"` + VMware bool `json:"vmware"` + // TODO more generic flavor to support other than compute + Flavor Flavor `json:"flavor"` } type SchedulingDecisionState string @@ -44,11 +59,9 @@ const ( SchedulingDecisionStateError SchedulingDecisionState = "error" ) -// SchedulingDecisionStatus defines the observed state of SchedulingDecision. -type SchedulingDecisionStatus struct { - State SchedulingDecisionState `json:"state,omitempty"` - // Only given if state is "error". - Error string `json:"error,omitempty"` +// SchedulingDecisionResult represents the result of processing a single decision request. +type SchedulingDecisionResult struct { + ID string `json:"id"` Description string `json:"description,omitempty"` // Final scores for each host after processing all pipeline steps. FinalScores map[string]float64 `json:"finalScores,omitempty"` @@ -56,13 +69,26 @@ type SchedulingDecisionStatus struct { DeletedHosts map[string][]string `json:"deletedHosts,omitempty"` } +// SchedulingDecisionStatus defines the observed state of SchedulingDecision. +type SchedulingDecisionStatus struct { + State SchedulingDecisionState `json:"state,omitempty"` + Error string `json:"error,omitempty"` + + DecisionCount int `json:"decisionCount,omitempty"` + GlobalDescription string `json:"globalDescription,omitempty"` + + Results []SchedulingDecisionResult `json:"results,omitempty"` +} + // +kubebuilder:object:root=true // +kubebuilder:subresource:status // +kubebuilder:resource:scope=Cluster,shortName=sdec;sdecs // +kubebuilder:printcolumn:name="State",type="string",JSONPath=".status.state" // +kubebuilder:printcolumn:name="Error",type="string",JSONPath=".status.error" // +kubebuilder:printcolumn:name="Created",type="date",JSONPath=".metadata.creationTimestamp" -// +kubebuilder:printcolumn:name="Description",type="string",JSONPath=".status.description" +// +kubebuilder:printcolumn:name="Decisions",type="integer",JSONPath=".status.decisionCount" +// +kubebuilder:printcolumn:name="Latest Event",type="string",JSONPath=".spec.decisions[-1].eventType" +// +kubebuilder:printcolumn:name="Description",type="string",JSONPath=".status.globalDescription" // SchedulingDecision is the Schema for the schedulingdecisions API type SchedulingDecision struct { diff --git a/decisions/api/v1alpha1/zz_generated.deepcopy.go b/decisions/api/v1alpha1/zz_generated.deepcopy.go index c7f73fd4..e1f81402 100644 --- a/decisions/api/v1alpha1/zz_generated.deepcopy.go +++ b/decisions/api/v1alpha1/zz_generated.deepcopy.go @@ -130,8 +130,9 @@ func (in *SchedulingDecisionPipelineSpec) DeepCopy() *SchedulingDecisionPipeline } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *SchedulingDecisionSpec) DeepCopyInto(out *SchedulingDecisionSpec) { +func (in *SchedulingDecisionRequest) DeepCopyInto(out *SchedulingDecisionRequest) { *out = *in + in.RequestedAt.DeepCopyInto(&out.RequestedAt) if in.Input != nil { in, out := &in.Input, &out.Input *out = make(map[string]float64, len(*in)) @@ -139,22 +140,22 @@ func (in *SchedulingDecisionSpec) DeepCopyInto(out *SchedulingDecisionSpec) { (*out)[key] = val } } - out.Flavor = in.Flavor in.Pipeline.DeepCopyInto(&out.Pipeline) + out.Flavor = in.Flavor } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionSpec. -func (in *SchedulingDecisionSpec) DeepCopy() *SchedulingDecisionSpec { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionRequest. +func (in *SchedulingDecisionRequest) DeepCopy() *SchedulingDecisionRequest { if in == nil { return nil } - out := new(SchedulingDecisionSpec) + out := new(SchedulingDecisionRequest) in.DeepCopyInto(out) return out } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *SchedulingDecisionStatus) DeepCopyInto(out *SchedulingDecisionStatus) { +func (in *SchedulingDecisionResult) DeepCopyInto(out *SchedulingDecisionResult) { *out = *in if in.FinalScores != nil { in, out := &in.FinalScores, &out.FinalScores @@ -181,6 +182,50 @@ func (in *SchedulingDecisionStatus) DeepCopyInto(out *SchedulingDecisionStatus) } } +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionResult. +func (in *SchedulingDecisionResult) DeepCopy() *SchedulingDecisionResult { + if in == nil { + return nil + } + out := new(SchedulingDecisionResult) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SchedulingDecisionSpec) DeepCopyInto(out *SchedulingDecisionSpec) { + *out = *in + if in.Decisions != nil { + in, out := &in.Decisions, &out.Decisions + *out = make([]SchedulingDecisionRequest, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionSpec. +func (in *SchedulingDecisionSpec) DeepCopy() *SchedulingDecisionSpec { + if in == nil { + return nil + } + out := new(SchedulingDecisionSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SchedulingDecisionStatus) DeepCopyInto(out *SchedulingDecisionStatus) { + *out = *in + if in.Results != nil { + in, out := &in.Results, &out.Results + *out = make([]SchedulingDecisionResult, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionStatus. func (in *SchedulingDecisionStatus) DeepCopy() *SchedulingDecisionStatus { if in == nil { diff --git a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml index 960a3b70..7b4dc19e 100644 --- a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml @@ -27,7 +27,13 @@ spec: - jsonPath: .metadata.creationTimestamp name: Created type: date - - jsonPath: .status.description + - jsonPath: .status.decisionCount + name: Decisions + type: integer + - jsonPath: .spec.decisions[-1].eventType + name: Latest Event + type: string + - jsonPath: .status.globalDescription name: Description type: string name: v1alpha1 @@ -56,85 +62,108 @@ spec: spec: description: spec defines the desired state of SchedulingDecision properties: - availbilityZone: - type: string - flavor: - properties: - disk: - type: integer - name: - type: string - ram: - type: integer - vcpus: - type: integer - required: - - disk - - name - - ram - - vcpus - type: object - input: - additionalProperties: - type: number - type: object - live: - type: boolean - pipeline: - properties: - name: - type: string - outputs: - items: + decisions: + items: + properties: + availabilityZone: + type: string + eventType: + type: string + flavor: + properties: + disk: + type: integer + name: + type: string + ram: + type: integer + vcpus: + type: integer + required: + - disk + - name + - ram + - vcpus + type: object + id: + type: string + input: + additionalProperties: + type: number + type: object + pipeline: properties: - activations: - additionalProperties: - type: number - description: Weights calculated by this step subjected to - the activation function. - type: object - step: + name: type: string + outputs: + items: + properties: + activations: + additionalProperties: + type: number + type: object + step: + type: string + required: + - step + type: object + type: array required: - - step + - name type: object - type: array - required: - - name - type: object - resize: - type: boolean - vmware: - type: boolean + requestedAt: + format: date-time + type: string + vmware: + type: boolean + required: + - eventType + - flavor + - id + - pipeline + - requestedAt + - vmware + type: object + type: array required: - - flavor - - live - - pipeline - - resize - - vmware + - decisions type: object status: description: status defines the observed state of SchedulingDecision properties: - deletedHosts: - additionalProperties: - items: - type: string - type: array - description: Hosts that were deleted during pipeline processing and - all steps that attempted to delete them. - type: object - description: - type: string + decisionCount: + type: integer error: - description: Only given if state is "error". type: string - finalScores: - additionalProperties: - type: number - description: Final scores for each host after processing all pipeline - steps. - type: object + globalDescription: + type: string + results: + items: + description: SchedulingDecisionResult represents the result of processing + a single decision request. + properties: + deletedHosts: + additionalProperties: + items: + type: string + type: array + description: Hosts that were deleted during pipeline processing + and all steps that attempted to delete them. + type: object + description: + type: string + finalScores: + additionalProperties: + type: number + description: Final scores for each host after processing all + pipeline steps. + type: object + id: + type: string + required: + - id + type: object + type: array state: type: string type: object diff --git a/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml index 960a3b70..7b4dc19e 100644 --- a/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml @@ -27,7 +27,13 @@ spec: - jsonPath: .metadata.creationTimestamp name: Created type: date - - jsonPath: .status.description + - jsonPath: .status.decisionCount + name: Decisions + type: integer + - jsonPath: .spec.decisions[-1].eventType + name: Latest Event + type: string + - jsonPath: .status.globalDescription name: Description type: string name: v1alpha1 @@ -56,85 +62,108 @@ spec: spec: description: spec defines the desired state of SchedulingDecision properties: - availbilityZone: - type: string - flavor: - properties: - disk: - type: integer - name: - type: string - ram: - type: integer - vcpus: - type: integer - required: - - disk - - name - - ram - - vcpus - type: object - input: - additionalProperties: - type: number - type: object - live: - type: boolean - pipeline: - properties: - name: - type: string - outputs: - items: + decisions: + items: + properties: + availabilityZone: + type: string + eventType: + type: string + flavor: + properties: + disk: + type: integer + name: + type: string + ram: + type: integer + vcpus: + type: integer + required: + - disk + - name + - ram + - vcpus + type: object + id: + type: string + input: + additionalProperties: + type: number + type: object + pipeline: properties: - activations: - additionalProperties: - type: number - description: Weights calculated by this step subjected to - the activation function. - type: object - step: + name: type: string + outputs: + items: + properties: + activations: + additionalProperties: + type: number + type: object + step: + type: string + required: + - step + type: object + type: array required: - - step + - name type: object - type: array - required: - - name - type: object - resize: - type: boolean - vmware: - type: boolean + requestedAt: + format: date-time + type: string + vmware: + type: boolean + required: + - eventType + - flavor + - id + - pipeline + - requestedAt + - vmware + type: object + type: array required: - - flavor - - live - - pipeline - - resize - - vmware + - decisions type: object status: description: status defines the observed state of SchedulingDecision properties: - deletedHosts: - additionalProperties: - items: - type: string - type: array - description: Hosts that were deleted during pipeline processing and - all steps that attempted to delete them. - type: object - description: - type: string + decisionCount: + type: integer error: - description: Only given if state is "error". type: string - finalScores: - additionalProperties: - type: number - description: Final scores for each host after processing all pipeline - steps. - type: object + globalDescription: + type: string + results: + items: + description: SchedulingDecisionResult represents the result of processing + a single decision request. + properties: + deletedHosts: + additionalProperties: + items: + type: string + type: array + description: Hosts that were deleted during pipeline processing + and all steps that attempted to delete them. + type: object + description: + type: string + finalScores: + additionalProperties: + type: number + description: Final scores for each host after processing all + pipeline steps. + type: object + id: + type: string + required: + - id + type: object + type: array state: type: string type: object diff --git a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml index 24eb0d07..0709885e 100644 --- a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml @@ -33,7 +33,13 @@ spec: - jsonPath: .metadata.creationTimestamp name: Created type: date - - jsonPath: .status.description + - jsonPath: .status.decisionCount + name: Decisions + type: integer + - jsonPath: .spec.decisions[-1].eventType + name: Latest Event + type: string + - jsonPath: .status.globalDescription name: Description type: string name: v1alpha1 @@ -62,85 +68,108 @@ spec: spec: description: spec defines the desired state of SchedulingDecision properties: - availbilityZone: - type: string - flavor: - properties: - disk: - type: integer - name: - type: string - ram: - type: integer - vcpus: - type: integer - required: - - disk - - name - - ram - - vcpus - type: object - input: - additionalProperties: - type: number - type: object - live: - type: boolean - pipeline: - properties: - name: - type: string - outputs: - items: + decisions: + items: + properties: + availabilityZone: + type: string + eventType: + type: string + flavor: + properties: + disk: + type: integer + name: + type: string + ram: + type: integer + vcpus: + type: integer + required: + - disk + - name + - ram + - vcpus + type: object + id: + type: string + input: + additionalProperties: + type: number + type: object + pipeline: properties: - activations: - additionalProperties: - type: number - description: Weights calculated by this step subjected to - the activation function. - type: object - step: + name: type: string + outputs: + items: + properties: + activations: + additionalProperties: + type: number + type: object + step: + type: string + required: + - step + type: object + type: array required: - - step + - name type: object - type: array - required: - - name - type: object - resize: - type: boolean - vmware: - type: boolean + requestedAt: + format: date-time + type: string + vmware: + type: boolean + required: + - eventType + - flavor + - id + - pipeline + - requestedAt + - vmware + type: object + type: array required: - - flavor - - live - - pipeline - - resize - - vmware + - decisions type: object status: description: status defines the observed state of SchedulingDecision properties: - deletedHosts: - additionalProperties: - items: - type: string - type: array - description: Hosts that were deleted during pipeline processing and - all steps that attempted to delete them. - type: object - description: - type: string + decisionCount: + type: integer error: - description: Only given if state is "error". type: string - finalScores: - additionalProperties: - type: number - description: Final scores for each host after processing all pipeline - steps. - type: object + globalDescription: + type: string + results: + items: + description: SchedulingDecisionResult represents the result of processing + a single decision request. + properties: + deletedHosts: + additionalProperties: + items: + type: string + type: array + description: Hosts that were deleted during pipeline processing + and all steps that attempted to delete them. + type: object + description: + type: string + finalScores: + additionalProperties: + type: number + description: Final scores for each host after processing all + pipeline steps. + type: object + id: + type: string + required: + - id + type: object + type: array state: type: string type: object diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index f1aac3cd..2e9b335a 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -115,40 +115,61 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{}, nil } - // Validate input has at least one host - if err := r.validateInput(res.Spec.Input); err != nil { - if err := r.setErrorState(ctx, &res, err); err != nil { + // Validate we have at least one decision + if len(res.Spec.Decisions) == 0 { + if err := r.setErrorState(ctx, &res, fmt.Errorf("No decisions provided in spec")); err != nil { return ctrl.Result{}, err } return ctrl.Result{}, nil } - // Validate that all hosts in pipeline outputs exist in input - if err := r.validatePipelineHosts(res.Spec.Input, res.Spec.Pipeline.Outputs); err != nil { - if err := r.setErrorState(ctx, &res, err); err != nil { - return ctrl.Result{}, err + // Process each decision individually + results := make([]v1alpha1.SchedulingDecisionResult, 0, len(res.Spec.Decisions)) + + for _, decision := range res.Spec.Decisions { + // Validate input has at least one host for this decision + if err := r.validateInput(decision.Input); err != nil { + if err := r.setErrorState(ctx, &res, fmt.Errorf("Decision %s: %v", decision.ID, err)); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } + + // Validate that all hosts in pipeline outputs exist in input for this decision + if err := r.validatePipelineHosts(decision.Input, decision.Pipeline.Outputs); err != nil { + if err := r.setErrorState(ctx, &res, fmt.Errorf("Decision %s: %v", decision.ID, err)); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, nil } - return ctrl.Result{}, nil - } - // Calculate final scores with full pipeline - finalScores, deletedHosts := r.calculateScores(res.Spec.Input, res.Spec.Pipeline.Outputs) + // Calculate final scores with full pipeline for this decision + finalScores, deletedHosts := r.calculateScores(decision.Input, decision.Pipeline.Outputs) - // Calculate step-by-step impact for the winner - stepImpacts := r.calculateStepImpacts(res.Spec.Input, res.Spec.Pipeline.Outputs, finalScores) + // Calculate step-by-step impact for the winner for this decision + stepImpacts := r.calculateStepImpacts(decision.Input, decision.Pipeline.Outputs, finalScores) - // Find minimal critical path - criticalSteps, criticalStepCount := r.findCriticalSteps(res.Spec.Input, res.Spec.Pipeline.Outputs, finalScores) + // Find minimal critical path for this decision + criticalSteps, criticalStepCount := r.findCriticalSteps(decision.Input, decision.Pipeline.Outputs, finalScores) - res.Status.State = v1alpha1.SchedulingDecisionStateResolved - res.Status.Error = "" + // Sort finalScores by score (highest to lowest) and generate enhanced description for this decision + orderedScores, description := r.generateOrderedScoresAndDescription(finalScores, decision.Input, criticalSteps, criticalStepCount, len(decision.Pipeline.Outputs), stepImpacts) - // Sort finalScores by score (highest to lowest) and generate enhanced description - orderedScores, description := r.generateOrderedScoresAndDescription(finalScores, res.Spec.Input, criticalSteps, criticalStepCount, len(res.Spec.Pipeline.Outputs), stepImpacts) + // Create result for this decision + result := v1alpha1.SchedulingDecisionResult{ + ID: decision.ID, + Description: description, + FinalScores: orderedScores, + DeletedHosts: deletedHosts, + } + results = append(results, result) + } - res.Status.FinalScores = orderedScores - res.Status.DeletedHosts = deletedHosts - res.Status.Description = description + // Update status with all results + res.Status.State = v1alpha1.SchedulingDecisionStateResolved + res.Status.Error = "" + res.Status.DecisionCount = len(res.Spec.Decisions) + res.Status.Results = results if err := r.Status().Update(ctx, &res); err != nil { return ctrl.Result{}, err diff --git a/decisions/internal/controller/controller_test.go b/decisions/internal/controller/controller_test.go index 7f2c579b..b7e2fe82 100644 --- a/decisions/internal/controller/controller_test.go +++ b/decisions/internal/controller/controller_test.go @@ -19,24 +19,29 @@ func TestReconcile(t *testing.T) { Name: "test-decision", }, Spec: v1alpha1.SchedulingDecisionSpec{ - Input: map[string]float64{ - "host1": 1.0, - "host2": 2.0, - }, - Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ - Name: "test-pipeline", - Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ - { - Step: "weigher", - Activations: map[string]float64{ - "host1": 0.5, - "host2": 0.5, - }, + Decisions: []v1alpha1.SchedulingDecisionRequest{ + { + ID: "decision-1", + Input: map[string]float64{ + "host1": 1.0, + "host2": 2.0, }, - { - Step: "filter", - Activations: map[string]float64{ - "host1": 0.0, + Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ + Name: "test-pipeline", + Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "weigher", + Activations: map[string]float64{ + "host1": 0.5, + "host2": 0.5, + }, + }, + { + Step: "filter", + Activations: map[string]float64{ + "host1": 0.0, + }, + }, }, }, }, @@ -83,9 +88,25 @@ func TestReconcile(t *testing.T) { if updatedResource.Status.Error != "" { t.Errorf("Expected empty error, got '%s'", updatedResource.Status.Error) } + + // Verify decision count + if updatedResource.Status.DecisionCount != 1 { + t.Errorf("Expected decision count 1, got %d", updatedResource.Status.DecisionCount) + } + + // Verify we have one result + if len(updatedResource.Status.Results) != 1 { + t.Errorf("Expected 1 result, got %d", len(updatedResource.Status.Results)) + } + + result := updatedResource.Status.Results[0] + if result.ID != "decision-1" { + t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID) + } + expectedDescription := "Selected: host1 (score: 1.50), certainty: perfect, 2 hosts evaluated. Input favored host2 (score: 2.00, now filtered), final winner was #2 in input (1.00→1.50). Decision driven by 1/2 pipeline step: filter. Step impacts:\n• weigher +0.50\n• filter +0.00→#1." - if updatedResource.Status.Description != expectedDescription { - t.Errorf("Expected description '%s', got '%s'", expectedDescription, updatedResource.Status.Description) + if result.Description != expectedDescription { + t.Errorf("Expected description '%s', got '%s'", expectedDescription, result.Description) } // Verify final scores calculation @@ -93,11 +114,11 @@ func TestReconcile(t *testing.T) { expectedFinalScores := map[string]float64{ "host1": 1.5, } - if len(updatedResource.Status.FinalScores) != len(expectedFinalScores) { - t.Errorf("Expected %d final scores, got %d", len(expectedFinalScores), len(updatedResource.Status.FinalScores)) + if len(result.FinalScores) != len(expectedFinalScores) { + t.Errorf("Expected %d final scores, got %d", len(expectedFinalScores), len(result.FinalScores)) } for host, expectedScore := range expectedFinalScores { - if actualScore, exists := updatedResource.Status.FinalScores[host]; !exists { + if actualScore, exists := result.FinalScores[host]; !exists { t.Errorf("Expected final score for host '%s', but it was not found", host) } else if actualScore != expectedScore { t.Errorf("Expected final score for host '%s' to be %f, got %f", host, expectedScore, actualScore) @@ -108,11 +129,11 @@ func TestReconcile(t *testing.T) { expectedDeletedHosts := map[string][]string{ "host2": {"filter"}, // host2 was deleted by the filter step } - if len(updatedResource.Status.DeletedHosts) != len(expectedDeletedHosts) { - t.Errorf("Expected %d deleted hosts, got %d", len(expectedDeletedHosts), len(updatedResource.Status.DeletedHosts)) + if len(result.DeletedHosts) != len(expectedDeletedHosts) { + t.Errorf("Expected %d deleted hosts, got %d", len(expectedDeletedHosts), len(result.DeletedHosts)) } for host, expectedSteps := range expectedDeletedHosts { - if actualSteps, exists := updatedResource.Status.DeletedHosts[host]; !exists { + if actualSteps, exists := result.DeletedHosts[host]; !exists { t.Errorf("Expected deleted host '%s', but it was not found", host) } else if len(actualSteps) != len(expectedSteps) { t.Errorf("Expected host '%s' to be deleted by %d steps, got %d", host, len(expectedSteps), len(actualSteps)) @@ -126,7 +147,7 @@ func TestReconcile(t *testing.T) { } t.Logf("Reconcile completed successfully: state=%s, finalScores=%v, deletedHosts=%v", - updatedResource.Status.State, updatedResource.Status.FinalScores, updatedResource.Status.DeletedHosts) + updatedResource.Status.State, result.FinalScores, result.DeletedHosts) } func TestReconcileEmptyInput(t *testing.T) { @@ -135,15 +156,20 @@ func TestReconcileEmptyInput(t *testing.T) { Name: "test-decision-empty-input", }, Spec: v1alpha1.SchedulingDecisionSpec{ - Input: map[string]float64{}, // Empty input - no hosts - Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ - Name: "test-pipeline", - Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ - { - Step: "weigher", - Activations: map[string]float64{ - "host1": 0.5, - "host2": 0.5, + Decisions: []v1alpha1.SchedulingDecisionRequest{ + { + ID: "decision-1", + Input: map[string]float64{}, // Empty input - no hosts + Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ + Name: "test-pipeline", + Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "weigher", + Activations: map[string]float64{ + "host1": 0.5, + "host2": 0.5, + }, + }, }, }, }, @@ -187,13 +213,10 @@ func TestReconcileEmptyInput(t *testing.T) { if updatedResource.Status.State != v1alpha1.SchedulingDecisionStateError { t.Errorf("Expected state '%s', got '%s'", v1alpha1.SchedulingDecisionStateError, updatedResource.Status.State) } - expectedError := "No hosts provided in input" + expectedError := "Decision decision-1: No hosts provided in input" if updatedResource.Status.Error != expectedError { t.Errorf("Expected error '%s', got '%s'", expectedError, updatedResource.Status.Error) } - if updatedResource.Status.Description != "" { - t.Errorf("Expected empty description, got '%s'", updatedResource.Status.Description) - } t.Logf("Reconcile completed with error: state=%s, error=%s", updatedResource.Status.State, updatedResource.Status.Error) } @@ -204,18 +227,23 @@ func TestReconcileHostMismatch(t *testing.T) { Name: "test-decision-host-mismatch", }, Spec: v1alpha1.SchedulingDecisionSpec{ - Input: map[string]float64{ - "host1": 1.0, - "host2": 2.0, - }, // host3 is missing but referenced in pipeline output - Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ - Name: "test-pipeline", - Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ - { - Step: "weigher", - Activations: map[string]float64{ - "host1": 0.5, - "host3": 0.3, // host3 doesn't exist in input + Decisions: []v1alpha1.SchedulingDecisionRequest{ + { + ID: "decision-1", + Input: map[string]float64{ + "host1": 1.0, + "host2": 2.0, + }, // host3 is missing but referenced in pipeline output + Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ + Name: "test-pipeline", + Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "weigher", + Activations: map[string]float64{ + "host1": 0.5, + "host3": 0.3, // host3 doesn't exist in input + }, + }, }, }, }, @@ -259,13 +287,10 @@ func TestReconcileHostMismatch(t *testing.T) { if updatedResource.Status.State != v1alpha1.SchedulingDecisionStateError { t.Errorf("Expected state '%s', got '%s'", v1alpha1.SchedulingDecisionStateError, updatedResource.Status.State) } - expectedError := "Host 'host3' in pipeline output not found in input" + expectedError := "Decision decision-1: Host 'host3' in pipeline output not found in input" if updatedResource.Status.Error != expectedError { t.Errorf("Expected error '%s', got '%s'", expectedError, updatedResource.Status.Error) } - if updatedResource.Status.Description != "" { - t.Errorf("Expected empty description, got '%s'", updatedResource.Status.Description) - } t.Logf("Reconcile completed with host mismatch error: state=%s, error=%s", updatedResource.Status.State, updatedResource.Status.Error) } @@ -276,35 +301,40 @@ func TestReconcileComplexScoring(t *testing.T) { Name: "test-decision-complex", }, Spec: v1alpha1.SchedulingDecisionSpec{ - Input: map[string]float64{ - "host1": 1.0, - "host2": 2.0, - "host3": 3.0, - "host4": 4.0, - }, - Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ - Name: "complex-pipeline", - Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ - { - Step: "weigher1", - Activations: map[string]float64{ - "host1": 0.5, - "host2": 1.0, - "host3": -0.5, - "host4": 2.0, - }, - }, - { - Step: "filter1", - Activations: map[string]float64{ - "host1": 0.2, - "host3": 0.1, // host2 and host4 removed by this step - }, + Decisions: []v1alpha1.SchedulingDecisionRequest{ + { + ID: "decision-1", + Input: map[string]float64{ + "host1": 1.0, + "host2": 2.0, + "host3": 3.0, + "host4": 4.0, }, - { - Step: "weigher2", - Activations: map[string]float64{ - "host1": -0.3, // host3 removed by this step + Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ + Name: "complex-pipeline", + Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "weigher1", + Activations: map[string]float64{ + "host1": 0.5, + "host2": 1.0, + "host3": -0.5, + "host4": 2.0, + }, + }, + { + Step: "filter1", + Activations: map[string]float64{ + "host1": 0.2, + "host3": 0.1, // host2 and host4 removed by this step + }, + }, + { + Step: "weigher2", + Activations: map[string]float64{ + "host1": -0.3, // host3 removed by this step + }, + }, }, }, }, @@ -349,17 +379,27 @@ func TestReconcileComplexScoring(t *testing.T) { t.Errorf("Expected state '%s', got '%s'", v1alpha1.SchedulingDecisionStateResolved, updatedResource.Status.State) } + // Verify we have one result + if len(updatedResource.Status.Results) != 1 { + t.Errorf("Expected 1 result, got %d", len(updatedResource.Status.Results)) + } + + result := updatedResource.Status.Results[0] + if result.ID != "decision-1" { + t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID) + } + // Verify final scores calculation // Expected: host1: 1.0 + 0.5 + 0.2 + (-0.3) = 1.4 // host2: removed by filter1, host3: removed by weigher2, host4: removed by filter1 expectedFinalScores := map[string]float64{ "host1": 1.4, } - if len(updatedResource.Status.FinalScores) != len(expectedFinalScores) { - t.Errorf("Expected %d final scores, got %d", len(expectedFinalScores), len(updatedResource.Status.FinalScores)) + if len(result.FinalScores) != len(expectedFinalScores) { + t.Errorf("Expected %d final scores, got %d", len(expectedFinalScores), len(result.FinalScores)) } for host, expectedScore := range expectedFinalScores { - if actualScore, exists := updatedResource.Status.FinalScores[host]; !exists { + if actualScore, exists := result.FinalScores[host]; !exists { t.Errorf("Expected final score for host '%s', but it was not found", host) } else if actualScore != expectedScore { t.Errorf("Expected final score for host '%s' to be %f, got %f", host, expectedScore, actualScore) @@ -372,11 +412,11 @@ func TestReconcileComplexScoring(t *testing.T) { "host4": {"filter1"}, // host4 deleted by filter1 "host3": {"weigher2"}, // host3 deleted by weigher2 } - if len(updatedResource.Status.DeletedHosts) != len(expectedDeletedHosts) { - t.Errorf("Expected %d deleted hosts, got %d", len(expectedDeletedHosts), len(updatedResource.Status.DeletedHosts)) + if len(result.DeletedHosts) != len(expectedDeletedHosts) { + t.Errorf("Expected %d deleted hosts, got %d", len(expectedDeletedHosts), len(result.DeletedHosts)) } for host, expectedSteps := range expectedDeletedHosts { - if actualSteps, exists := updatedResource.Status.DeletedHosts[host]; !exists { + if actualSteps, exists := result.DeletedHosts[host]; !exists { t.Errorf("Expected deleted host '%s', but it was not found", host) } else if len(actualSteps) != len(expectedSteps) { t.Errorf("Expected host '%s' to be deleted by %d steps, got %d", host, len(expectedSteps), len(actualSteps)) @@ -390,7 +430,7 @@ func TestReconcileComplexScoring(t *testing.T) { } t.Logf("Complex scoring completed: finalScores=%v, deletedHosts=%v", - updatedResource.Status.FinalScores, updatedResource.Status.DeletedHosts) + result.FinalScores, result.DeletedHosts) } func TestReconcileMultipleDeletionSteps(t *testing.T) { @@ -399,34 +439,39 @@ func TestReconcileMultipleDeletionSteps(t *testing.T) { Name: "test-decision-multiple-deletions", }, Spec: v1alpha1.SchedulingDecisionSpec{ - Input: map[string]float64{ - "host1": 1.0, - "host2": 2.0, - "host3": 3.0, - }, - Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ - Name: "multiple-deletion-pipeline", - Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ - { - Step: "weigher1", - Activations: map[string]float64{ - "host1": 0.5, - "host2": 1.0, - "host3": -0.5, - }, - }, - { - Step: "filter1", - Activations: map[string]float64{ - "host1": 0.2, - // host2 and host3 removed by this step - }, + Decisions: []v1alpha1.SchedulingDecisionRequest{ + { + ID: "decision-1", + Input: map[string]float64{ + "host1": 1.0, + "host2": 2.0, + "host3": 3.0, }, - { - Step: "filter2", - Activations: map[string]float64{ - // host1 removed by this step - // host2 and host3 would be removed again, but they're already gone + Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ + Name: "multiple-deletion-pipeline", + Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "weigher1", + Activations: map[string]float64{ + "host1": 0.5, + "host2": 1.0, + "host3": -0.5, + }, + }, + { + Step: "filter1", + Activations: map[string]float64{ + "host1": 0.2, + // host2 and host3 removed by this step + }, + }, + { + Step: "filter2", + Activations: map[string]float64{ + // host1 removed by this step + // host2 and host3 would be removed again, but they're already gone + }, + }, }, }, }, @@ -471,11 +516,21 @@ func TestReconcileMultipleDeletionSteps(t *testing.T) { t.Errorf("Expected state '%s', got '%s'", v1alpha1.SchedulingDecisionStateResolved, updatedResource.Status.State) } + // Verify we have one result + if len(updatedResource.Status.Results) != 1 { + t.Errorf("Expected 1 result, got %d", len(updatedResource.Status.Results)) + } + + result := updatedResource.Status.Results[0] + if result.ID != "decision-1" { + t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID) + } + // Verify final scores calculation // Expected: All hosts should be removed, no final scores expectedFinalScores := map[string]float64{} - if len(updatedResource.Status.FinalScores) != len(expectedFinalScores) { - t.Errorf("Expected %d final scores, got %d", len(expectedFinalScores), len(updatedResource.Status.FinalScores)) + if len(result.FinalScores) != len(expectedFinalScores) { + t.Errorf("Expected %d final scores, got %d", len(expectedFinalScores), len(result.FinalScores)) } // Verify deleted hosts tracking @@ -485,11 +540,11 @@ func TestReconcileMultipleDeletionSteps(t *testing.T) { "host3": {"filter1"}, // host3 deleted by filter1 "host1": {"filter2"}, // host1 deleted by filter2 } - if len(updatedResource.Status.DeletedHosts) != len(expectedDeletedHosts) { - t.Errorf("Expected %d deleted hosts, got %d", len(expectedDeletedHosts), len(updatedResource.Status.DeletedHosts)) + if len(result.DeletedHosts) != len(expectedDeletedHosts) { + t.Errorf("Expected %d deleted hosts, got %d", len(expectedDeletedHosts), len(result.DeletedHosts)) } for host, expectedSteps := range expectedDeletedHosts { - if actualSteps, exists := updatedResource.Status.DeletedHosts[host]; !exists { + if actualSteps, exists := result.DeletedHosts[host]; !exists { t.Errorf("Expected deleted host '%s', but it was not found", host) } else if len(actualSteps) != len(expectedSteps) { t.Errorf("Expected host '%s' to be deleted by %d steps, got %d", host, len(expectedSteps), len(actualSteps)) @@ -503,7 +558,7 @@ func TestReconcileMultipleDeletionSteps(t *testing.T) { } t.Logf("Multiple deletion test completed: finalScores=%v, deletedHosts=%v", - updatedResource.Status.FinalScores, updatedResource.Status.DeletedHosts) + result.FinalScores, result.DeletedHosts) } func TestReconcileCertaintyLevels(t *testing.T) { @@ -562,13 +617,18 @@ func TestReconcileCertaintyLevels(t *testing.T) { Name: "test-certainty-" + tt.name, }, Spec: v1alpha1.SchedulingDecisionSpec{ - Input: tt.input, - Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ - Name: "certainty-test-pipeline", - Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ - { - Step: "weigher", - Activations: tt.activations, + Decisions: []v1alpha1.SchedulingDecisionRequest{ + { + ID: "decision-1", + Input: tt.input, + Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ + Name: "certainty-test-pipeline", + Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "weigher", + Activations: tt.activations, + }, + }, }, }, }, @@ -607,8 +667,18 @@ func TestReconcileCertaintyLevels(t *testing.T) { t.Fatalf("Failed to get updated resource: %v", err) } + // Verify we have one result + if len(updatedResource.Status.Results) != 1 { + t.Errorf("Expected 1 result, got %d", len(updatedResource.Status.Results)) + } + + result := updatedResource.Status.Results[0] + if result.ID != "decision-1" { + t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID) + } + // Verify the description contains the expected winner and certainty - description := updatedResource.Status.Description + description := result.Description if !contains(description, "Selected: "+tt.expectedWinner) { t.Errorf("Expected description to contain 'Selected: %s', got '%s'", tt.expectedWinner, description) } @@ -627,17 +697,22 @@ func TestReconcileNoHostsRemaining(t *testing.T) { Name: "test-no-hosts-remaining", }, Spec: v1alpha1.SchedulingDecisionSpec{ - Input: map[string]float64{ - "host1": 1.0, - "host2": 2.0, - }, - Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ - Name: "filter-all-pipeline", - Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ - { - Step: "filter-all", - Activations: map[string]float64{ - // No hosts in activations - all will be filtered out + Decisions: []v1alpha1.SchedulingDecisionRequest{ + { + ID: "decision-1", + Input: map[string]float64{ + "host1": 1.0, + "host2": 2.0, + }, + Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ + Name: "filter-all-pipeline", + Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "filter-all", + Activations: map[string]float64{ + // No hosts in activations - all will be filtered out + }, + }, }, }, }, @@ -682,16 +757,26 @@ func TestReconcileNoHostsRemaining(t *testing.T) { t.Errorf("Expected state '%s', got '%s'", v1alpha1.SchedulingDecisionStateResolved, updatedResource.Status.State) } - if len(updatedResource.Status.FinalScores) != 0 { - t.Errorf("Expected 0 final scores, got %d", len(updatedResource.Status.FinalScores)) + // Verify we have one result + if len(updatedResource.Status.Results) != 1 { + t.Errorf("Expected 1 result, got %d", len(updatedResource.Status.Results)) + } + + result := updatedResource.Status.Results[0] + if result.ID != "decision-1" { + t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID) + } + + if len(result.FinalScores) != 0 { + t.Errorf("Expected 0 final scores, got %d", len(result.FinalScores)) } expectedDescription := "No hosts remaining after filtering, 2 hosts evaluated" - if updatedResource.Status.Description != expectedDescription { - t.Errorf("Expected description '%s', got '%s'", expectedDescription, updatedResource.Status.Description) + if result.Description != expectedDescription { + t.Errorf("Expected description '%s', got '%s'", expectedDescription, result.Description) } - t.Logf("No hosts remaining test completed: %s", updatedResource.Status.Description) + t.Logf("No hosts remaining test completed: %s", result.Description) } func TestReconcileInputVsFinalComparison(t *testing.T) { @@ -757,13 +842,18 @@ func TestReconcileInputVsFinalComparison(t *testing.T) { Name: "test-input-vs-final-" + tt.name, }, Spec: v1alpha1.SchedulingDecisionSpec{ - Input: tt.input, - Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ - Name: "input-vs-final-pipeline", - Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ - { - Step: "weigher", - Activations: tt.activations[0], + Decisions: []v1alpha1.SchedulingDecisionRequest{ + { + ID: "decision-1", + Input: tt.input, + Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ + Name: "input-vs-final-pipeline", + Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "weigher", + Activations: tt.activations[0], + }, + }, }, }, }, @@ -802,8 +892,18 @@ func TestReconcileInputVsFinalComparison(t *testing.T) { t.Fatalf("Failed to get updated resource: %v", err) } + // Verify we have one result + if len(updatedResource.Status.Results) != 1 { + t.Errorf("Expected 1 result, got %d", len(updatedResource.Status.Results)) + } + + result := updatedResource.Status.Results[0] + if result.ID != "decision-1" { + t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID) + } + // Verify the description contains expected elements - description := updatedResource.Status.Description + description := result.Description for _, expectedContent := range tt.expectedDescContains { if !contains(description, expectedContent) { t.Errorf("Expected description to contain '%s', got '%s'", expectedContent, description) @@ -911,10 +1011,15 @@ func TestReconcileCriticalStepElimination(t *testing.T) { Name: "test-critical-steps-" + tt.name, }, Spec: v1alpha1.SchedulingDecisionSpec{ - Input: tt.input, - Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ - Name: "critical-step-test-pipeline", - Outputs: tt.pipeline, + Decisions: []v1alpha1.SchedulingDecisionRequest{ + { + ID: "decision-1", + Input: tt.input, + Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ + Name: "critical-step-test-pipeline", + Outputs: tt.pipeline, + }, + }, }, }, } @@ -951,8 +1056,18 @@ func TestReconcileCriticalStepElimination(t *testing.T) { t.Fatalf("Failed to get updated resource: %v", err) } + // Verify we have one result + if len(updatedResource.Status.Results) != 1 { + t.Errorf("Expected 1 result, got %d", len(updatedResource.Status.Results)) + } + + result := updatedResource.Status.Results[0] + if result.ID != "decision-1" { + t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID) + } + // Verify the description contains the expected critical step message - description := updatedResource.Status.Description + description := result.Description if !contains(description, tt.expectedCriticalMessage) { t.Errorf("Expected description to contain '%s', got '%s'", tt.expectedCriticalMessage, description) } From 451620cdafee8de7d06027138de80c9f9a79353e Mon Sep 17 00:00:00 2001 From: mblos Date: Tue, 30 Sep 2025 08:57:14 +0200 Subject: [PATCH 36/58] Cortex appends scheduling decision --- .../api/v1alpha1/schedulingdecision_types.go | 6 +- internal/scheduler/nova/pipeline.go | 103 ++++++++++++------ 2 files changed, 73 insertions(+), 36 deletions(-) diff --git a/decisions/api/v1alpha1/schedulingdecision_types.go b/decisions/api/v1alpha1/schedulingdecision_types.go index cc9a3129..6853f54c 100644 --- a/decisions/api/v1alpha1/schedulingdecision_types.go +++ b/decisions/api/v1alpha1/schedulingdecision_types.go @@ -10,9 +10,9 @@ import ( type SchedulingEventType string const ( - SchedulingEventTypeLiveMigration SchedulingEventType = "live-migration" - SchedulingEventTypeColdMigration SchedulingEventType = "cold-migration" - SchedulingEventTypeEvacuation SchedulingEventType = "evacuation" + SchedulingEventTypeLiveMigration SchedulingEventType = "live-migration" + // SchedulingEventTypeColdMigration SchedulingEventType = "cold-migration" + // SchedulingEventTypeEvacuation SchedulingEventType = "evacuation" SchedulingEventTypeResize SchedulingEventType = "resize" SchedulingEventTypeInitialPlacement SchedulingEventType = "initial-placement" ) diff --git a/internal/scheduler/nova/pipeline.go b/internal/scheduler/nova/pipeline.go index 76f482cc..e0011059 100644 --- a/internal/scheduler/nova/pipeline.go +++ b/internal/scheduler/nova/pipeline.go @@ -19,6 +19,7 @@ import ( "github.com/cobaltcore-dev/cortex/internal/scheduler/nova/plugins/shared" "github.com/cobaltcore-dev/cortex/internal/scheduler/nova/plugins/vmware" "github.com/cobaltcore-dev/cortex/internal/sync/openstack/nova" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -93,17 +94,17 @@ func (c *novaPipelineConsumer) Consume( if c.Client == nil { return } - var existing v1alpha1.SchedulingDecision - if err := c.Client.Get( - context.Background(), - client.ObjectKey{Name: request.Spec.Data.InstanceUUID}, - &existing, - ); err == nil { - // Decision already exists, do not create a new one. - // TODO: Add new decisions for the same vm id if this is a migration. - slog.Info("scheduler: decision already exists, not creating a new one", "resourceID", request.Spec.Data.InstanceUUID) - return + + // Determine the event type based on request flags + var eventType v1alpha1.SchedulingEventType + if request.Live { + eventType = v1alpha1.SchedulingEventTypeLiveMigration + } else if request.Resize { + eventType = v1alpha1.SchedulingEventTypeResize + } else { + eventType = v1alpha1.SchedulingEventTypeInitialPlacement } + outputs := []v1alpha1.SchedulingDecisionPipelineOutputSpec{} for _, stepKey := range applicationOrder { weights, ok := stepWeights[stepKey] @@ -122,32 +123,68 @@ func (c *novaPipelineConsumer) Consume( } flavor := request.Spec.Data.Flavor - decision := &v1alpha1.SchedulingDecision{ - ObjectMeta: ctrl.ObjectMeta{Name: request.Spec.Data.InstanceUUID}, - Spec: v1alpha1.SchedulingDecisionSpec{ - Input: inWeights, - AvailabilityZone: request.Spec.Data.AvailabilityZone, - Flavor: v1alpha1.Flavor{ - Name: flavor.Data.Name, - VCPUs: int(flavor.Data.VCPUs), // assume this is safe - RAM: int(flavor.Data.MemoryMB), - Disk: int(flavor.Data.RootGB), - }, - VMware: request.VMware, - Live: request.Live, - Resize: request.Resize, - Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ - Name: request.GetPipeline(), - Outputs: outputs, - }, + decisionRequest := v1alpha1.SchedulingDecisionRequest{ + ID: request.Spec.Data.InstanceUUID, + RequestedAt: metav1.Now(), + EventType: eventType, + Input: inWeights, + Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ + Name: request.GetPipeline(), + Outputs: outputs, + }, + AvailabilityZone: request.Spec.Data.AvailabilityZone, + VMware: request.VMware, + Flavor: v1alpha1.Flavor{ + Name: flavor.Data.Name, + VCPUs: int(flavor.Data.VCPUs), // assume this is safe + RAM: int(flavor.Data.MemoryMB), + Disk: int(flavor.Data.RootGB), }, - // Status will be filled in by the controller. } - if err := c.Client.Create(context.Background(), decision); err != nil { - slog.Error("scheduler: failed to create decision", "error", err) - return + + objectKey := client.ObjectKey{Name: request.Spec.Data.InstanceUUID} + + // Try to update existing decision with retry logic for concurrent updates + const maxRetries = 3 + for attempt := 0; attempt < maxRetries; attempt++ { + var existing v1alpha1.SchedulingDecision + if err := c.Client.Get(context.Background(), objectKey, &existing); err == nil { + // Decision already exists, append the new decision to the existing ones + existing.Spec.Decisions = append(existing.Spec.Decisions, decisionRequest) + + if err := c.Client.Update(context.Background(), &existing); err != nil { + // Check if it's a conflict error (concurrent update) + if attempt < maxRetries-1 { + slog.Warn("scheduler: conflict updating decision, retrying", "attempt", attempt+1, "resourceID", request.Spec.Data.InstanceUUID) + continue + } + slog.Error("scheduler: failed to update existing decision after retries", "error", err, "resourceID", request.Spec.Data.InstanceUUID) + return + } + slog.Info("scheduler: appended decision to existing resource", "resourceID", request.Spec.Data.InstanceUUID, "eventType", eventType) + return + } else { + // Decision doesn't exist, create a new one + decision := &v1alpha1.SchedulingDecision{ + ObjectMeta: ctrl.ObjectMeta{Name: request.Spec.Data.InstanceUUID}, + Spec: v1alpha1.SchedulingDecisionSpec{ + Decisions: []v1alpha1.SchedulingDecisionRequest{decisionRequest}, + }, + // Status will be filled in by the controller. + } + if err := c.Client.Create(context.Background(), decision); err != nil { + // Check if it's a conflict error (resource was created concurrently) + if attempt < maxRetries-1 { + slog.Warn("scheduler: conflict creating decision, retrying", "attempt", attempt+1, "resourceID", request.Spec.Data.InstanceUUID) + continue + } + slog.Error("scheduler: failed to create decision after retries", "error", err, "resourceID", request.Spec.Data.InstanceUUID) + return + } + slog.Info("scheduler: created new decision", "resourceID", request.Spec.Data.InstanceUUID, "eventType", eventType) + return + } } - slog.Info("scheduler: created decision", "resourceID", request.Spec.Data.InstanceUUID) } // Create a new Nova scheduler pipeline. From 156b158d6e31cd96d3addadf4d420f6bc4d85b4a Mon Sep 17 00:00:00 2001 From: mblos Date: Mon, 6 Oct 2025 14:12:31 +0200 Subject: [PATCH 37/58] feat: ttl removal of old crds; adding global description if >1 decision --- decisions/cmd/main.go | 9 + decisions/internal/controller/conf.go | 8 +- decisions/internal/controller/controller.go | 149 ++- .../internal/controller/controller_test.go | 1012 ++++++----------- decisions/internal/controller/test_helpers.go | 322 ++++++ .../internal/controller/ttl_controller.go | 103 ++ .../controller/ttl_controller_test.go | 150 +++ 7 files changed, 1081 insertions(+), 672 deletions(-) create mode 100644 decisions/internal/controller/test_helpers.go create mode 100644 decisions/internal/controller/ttl_controller.go create mode 100644 decisions/internal/controller/ttl_controller_test.go diff --git a/decisions/cmd/main.go b/decisions/cmd/main.go index 102f01e4..6298e6d4 100644 --- a/decisions/cmd/main.go +++ b/decisions/cmd/main.go @@ -198,6 +198,15 @@ func main() { setupLog.Error(err, "unable to create controller", "controller", "SchedulingDecision") os.Exit(1) } + + if err := (&controller.SchedulingDecisionTTLController{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Conf: conf.NewConfig[controller.Config](), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "SchedulingDecisionTTL") + os.Exit(1) + } // +kubebuilder:scaffold:builder if metricsCertWatcher != nil { diff --git a/decisions/internal/controller/conf.go b/decisions/internal/controller/conf.go index 4cf64100..3543d9ba 100644 --- a/decisions/internal/controller/conf.go +++ b/decisions/internal/controller/conf.go @@ -3,5 +3,11 @@ package controller +import "time" + // Configuration for the decisions operator. -type Config struct{} +type Config struct { + // TTL for scheduling decisions after the last decision's RequestedAt timestamp + // If not set, defaults to 14 days (336 hours) + TTLHoursAfterDecision time.Duration `json:"ttlHoursAfterDecision,omitempty"` +} diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index 2e9b335a..8ea1c847 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -9,11 +9,13 @@ import ( "math" "sort" "strings" + "time" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/event" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/predicate" @@ -57,6 +59,29 @@ func getCertaintyLevel(gap float64) string { return "low" // fallback } +// noDeleteEventsPredicate is a custom predicate that filters out delete events +// to prevent race conditions with the TTL controller. Generic events are typically +// used for periodic reconciliation or external triggers, so we allow them. +type noDeleteEventsPredicate struct{} + +func (noDeleteEventsPredicate) Create(e event.CreateEvent) bool { + return true +} + +func (noDeleteEventsPredicate) Update(e event.UpdateEvent) bool { + return true +} + +func (noDeleteEventsPredicate) Delete(e event.DeleteEvent) bool { + // Ignore delete events to prevent race conditions with TTL controller + return false +} + +func (noDeleteEventsPredicate) Generic(e event.GenericEvent) bool { + // Allow generic events (periodic reconciliation, external triggers) + return true +} + // hostScore represents a host-score pair for sorting operations type hostScore struct { host string @@ -165,10 +190,14 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R results = append(results, result) } + // Generate global description for multiple decisions + globalDescription := r.generateGlobalDescription(results, res.Spec.Decisions) + // Update status with all results res.Status.State = v1alpha1.SchedulingDecisionStateResolved res.Status.Error = "" res.Status.DecisionCount = len(res.Spec.Decisions) + res.Status.GlobalDescription = globalDescription res.Status.Results = results if err := r.Status().Update(ctx, &res); err != nil { @@ -526,6 +555,121 @@ func (r *SchedulingDecisionReconciler) formatStepImpactsMultiLine(stepImpacts [] return b.String() + "." } +// hostSegment represents a segment in the host chain with duration and decision count +type hostSegment struct { + host string + duration time.Duration + decisions int +} + +// formatDuration formats a duration in a simple d/h/m format +func formatDuration(d time.Duration) string { + if d >= 24*time.Hour { + return fmt.Sprintf("%dd", int(d.Hours()/24)) + } + if d >= time.Hour { + return fmt.Sprintf("%dh", int(d.Hours())) + } + return fmt.Sprintf("%dm", int(d.Minutes())) +} + +// generateGlobalDescription creates a global description for multiple decisions +// showing the host chain with durations and detecting simple loops +func (r *SchedulingDecisionReconciler) generateGlobalDescription(results []v1alpha1.SchedulingDecisionResult, decisions []v1alpha1.SchedulingDecisionRequest) string { + if len(results) <= 1 { + return "" // No global description needed for single or no decisions + } + + // Extract host chain from winners + hostChain := make([]string, 0, len(results)) + for _, result := range results { + winner, _ := findWinner(result.FinalScores) + hostChain = append(hostChain, winner) + } + + // Group consecutive decisions on the same host with their timestamps + segments := make([]hostSegment, 0) + if len(hostChain) > 0 { + currentHost := hostChain[0] + currentCount := 1 + + for i := 1; i < len(hostChain); i++ { + if hostChain[i] == currentHost { + currentCount++ + } else { + segments = append(segments, hostSegment{ + host: currentHost, + decisions: currentCount, + }) + currentHost = hostChain[i] + currentCount = 1 + } + } + // Add the last segment + segments = append(segments, hostSegment{ + host: currentHost, + decisions: currentCount, + }) + } + + // Calculate actual durations using timestamps + now := time.Now() + totalSegments := len(segments) + decisionIndex := 0 + + for i := range segments { + segmentStartTime := decisions[decisionIndex].RequestedAt.Time + + // Find the end time for this segment + var segmentEndTime time.Time + if i == totalSegments-1 { + // Last segment: use current time + segmentEndTime = now + } else { + // Find the start of the next segment + decisionIndex += segments[i].decisions + segmentEndTime = decisions[decisionIndex].RequestedAt.Time + } + + segments[i].duration = segmentEndTime.Sub(segmentStartTime) + } + + // Build chain string with durations + chainParts := make([]string, 0, len(segments)) + for _, segment := range segments { + part := segment.host + " (" + formatDuration(segment.duration) + if segment.decisions > 1 { + part += fmt.Sprintf("; %d decisions", segment.decisions) + } + part += ")" + chainParts = append(chainParts, part) + } + + // Loop detection: check if any host appears again after other hosts in between + hasLoop := false + for i := 0; i < len(hostChain); i++ { + for j := i + 2; j < len(hostChain); j++ { // Skip adjacent hosts (i+1) + if hostChain[i] == hostChain[j] { + hasLoop = true + break + } + } + if hasLoop { + break + } + } + + // Build description + chainStr := strings.Join(chainParts, " -> ") + description := fmt.Sprintf("chain: %s", chainStr) + + if hasLoop { + description += "; loop detected" + } + + return description +} + // SetupWithManager sets up the controller with the Manager. func (r *SchedulingDecisionReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). @@ -534,6 +678,9 @@ func (r *SchedulingDecisionReconciler) SetupWithManager(mgr ctrl.Manager) error WithOptions(controller.Options{ MaxConcurrentReconciles: 1, // Default }). - WithEventFilter(predicate.GenerationChangedPredicate{}). + WithEventFilter(predicate.And( + predicate.GenerationChangedPredicate{}, + noDeleteEventsPredicate{}, + )). Complete(r) } diff --git a/decisions/internal/controller/controller_test.go b/decisions/internal/controller/controller_test.go index b7e2fe82..f069dbdb 100644 --- a/decisions/internal/controller/controller_test.go +++ b/decisions/internal/controller/controller_test.go @@ -5,99 +5,48 @@ package controller import ( "testing" + "time" "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" - "k8s.io/apimachinery/pkg/runtime" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/fake" ) func TestReconcile(t *testing.T) { - resource := &v1alpha1.SchedulingDecision{ - ObjectMeta: ctrl.ObjectMeta{ - Name: "test-decision", - }, - Spec: v1alpha1.SchedulingDecisionSpec{ - Decisions: []v1alpha1.SchedulingDecisionRequest{ - { - ID: "decision-1", - Input: map[string]float64{ - "host1": 1.0, - "host2": 2.0, - }, - Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ - Name: "test-pipeline", - Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ - { - Step: "weigher", - Activations: map[string]float64{ - "host1": 0.5, - "host2": 0.5, - }, - }, - { - Step: "filter", - Activations: map[string]float64{ - "host1": 0.0, - }, - }, - }, - }, - }, - }, - }, - } - - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } + // Create test decision with pipeline outputs + decision := NewTestDecision("decision-1"). + WithInput(map[string]float64{ + "host1": 1.0, + "host2": 2.0, + }). + WithPipelineOutputs( + NewTestPipelineOutput("weigher", map[string]float64{ + "host1": 0.5, + "host2": 0.5, + }), + NewTestPipelineOutput("filter", map[string]float64{ + "host1": 0.0, + }), + ). + Build() - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(resource). - WithStatusSubresource(&v1alpha1.SchedulingDecision{}). + resource := NewTestSchedulingDecision("test-decision"). + WithDecisions(decision). Build() - req := ctrl.Request{ - NamespacedName: client.ObjectKey{ - Name: "test-decision", - }, - } + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-decision") - reconciler := &SchedulingDecisionReconciler{ - Conf: Config{}, - Client: fakeClient, - } + reconciler := CreateSchedulingReconciler(fakeClient) _, err := reconciler.Reconcile(t.Context(), req) if err != nil { t.Fatalf("Reconcile returned an error: %v", err) } - // Fetch the updated resource to check status - var updatedResource v1alpha1.SchedulingDecision - if err := fakeClient.Get(t.Context(), client.ObjectKey{Name: "test-decision"}, &updatedResource); err != nil { - t.Fatalf("Failed to get updated resource: %v", err) - } - - // Verify success state - if updatedResource.Status.State != v1alpha1.SchedulingDecisionStateResolved { - t.Errorf("Expected state '%s', got '%s'", v1alpha1.SchedulingDecisionStateResolved, updatedResource.Status.State) - } - if updatedResource.Status.Error != "" { - t.Errorf("Expected empty error, got '%s'", updatedResource.Status.Error) - } - - // Verify decision count - if updatedResource.Status.DecisionCount != 1 { - t.Errorf("Expected decision count 1, got %d", updatedResource.Status.DecisionCount) - } - - // Verify we have one result - if len(updatedResource.Status.Results) != 1 { - t.Errorf("Expected 1 result, got %d", len(updatedResource.Status.Results)) - } + // Fetch and verify the updated resource + updatedResource := AssertResourceExists(t, fakeClient, "test-decision") + AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateResolved) + AssertNoError(t, updatedResource) + AssertDecisionCount(t, updatedResource, 1) + AssertResultCount(t, updatedResource, 1) result := updatedResource.Status.Results[0] if result.ID != "decision-1" { @@ -114,275 +63,130 @@ func TestReconcile(t *testing.T) { expectedFinalScores := map[string]float64{ "host1": 1.5, } - if len(result.FinalScores) != len(expectedFinalScores) { - t.Errorf("Expected %d final scores, got %d", len(expectedFinalScores), len(result.FinalScores)) - } - for host, expectedScore := range expectedFinalScores { - if actualScore, exists := result.FinalScores[host]; !exists { - t.Errorf("Expected final score for host '%s', but it was not found", host) - } else if actualScore != expectedScore { - t.Errorf("Expected final score for host '%s' to be %f, got %f", host, expectedScore, actualScore) - } - } + AssertFinalScores(t, result, expectedFinalScores) // Verify deleted hosts tracking expectedDeletedHosts := map[string][]string{ "host2": {"filter"}, // host2 was deleted by the filter step } - if len(result.DeletedHosts) != len(expectedDeletedHosts) { - t.Errorf("Expected %d deleted hosts, got %d", len(expectedDeletedHosts), len(result.DeletedHosts)) - } - for host, expectedSteps := range expectedDeletedHosts { - if actualSteps, exists := result.DeletedHosts[host]; !exists { - t.Errorf("Expected deleted host '%s', but it was not found", host) - } else if len(actualSteps) != len(expectedSteps) { - t.Errorf("Expected host '%s' to be deleted by %d steps, got %d", host, len(expectedSteps), len(actualSteps)) - } else { - for i, expectedStep := range expectedSteps { - if actualSteps[i] != expectedStep { - t.Errorf("Expected host '%s' step %d to be '%s', got '%s'", host, i, expectedStep, actualSteps[i]) - } - } - } - } + AssertDeletedHosts(t, result, expectedDeletedHosts) t.Logf("Reconcile completed successfully: state=%s, finalScores=%v, deletedHosts=%v", updatedResource.Status.State, result.FinalScores, result.DeletedHosts) } func TestReconcileEmptyInput(t *testing.T) { - resource := &v1alpha1.SchedulingDecision{ - ObjectMeta: ctrl.ObjectMeta{ - Name: "test-decision-empty-input", - }, - Spec: v1alpha1.SchedulingDecisionSpec{ - Decisions: []v1alpha1.SchedulingDecisionRequest{ - { - ID: "decision-1", - Input: map[string]float64{}, // Empty input - no hosts - Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ - Name: "test-pipeline", - Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ - { - Step: "weigher", - Activations: map[string]float64{ - "host1": 0.5, - "host2": 0.5, - }, - }, - }, - }, - }, - }, - }, - } - - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } + // Create test decision with empty input + decision := NewTestDecision("decision-1"). + WithInput(map[string]float64{}). // Empty input - no hosts + WithPipelineOutputs( + NewTestPipelineOutput("weigher", map[string]float64{ + "host1": 0.5, + "host2": 0.5, + }), + ). + Build() - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(resource). - WithStatusSubresource(&v1alpha1.SchedulingDecision{}). + resource := NewTestSchedulingDecision("test-decision-empty-input"). + WithDecisions(decision). Build() - req := ctrl.Request{ - NamespacedName: client.ObjectKey{ - Name: "test-decision-empty-input", - }, - } + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-decision-empty-input") - reconciler := &SchedulingDecisionReconciler{ - Conf: Config{}, - Client: fakeClient, - } + reconciler := CreateSchedulingReconciler(fakeClient) _, err := reconciler.Reconcile(t.Context(), req) if err != nil { t.Fatalf("Reconcile returned an error: %v", err) } - // Fetch the updated resource to check status - var updatedResource v1alpha1.SchedulingDecision - if err := fakeClient.Get(t.Context(), client.ObjectKey{Name: "test-decision-empty-input"}, &updatedResource); err != nil { - t.Fatalf("Failed to get updated resource: %v", err) - } - - // Verify error state - if updatedResource.Status.State != v1alpha1.SchedulingDecisionStateError { - t.Errorf("Expected state '%s', got '%s'", v1alpha1.SchedulingDecisionStateError, updatedResource.Status.State) - } - expectedError := "Decision decision-1: No hosts provided in input" - if updatedResource.Status.Error != expectedError { - t.Errorf("Expected error '%s', got '%s'", expectedError, updatedResource.Status.Error) - } + // Fetch and verify the updated resource + updatedResource := AssertResourceExists(t, fakeClient, "test-decision-empty-input") + AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateError) + AssertResourceError(t, updatedResource, "Decision decision-1: No hosts provided in input") t.Logf("Reconcile completed with error: state=%s, error=%s", updatedResource.Status.State, updatedResource.Status.Error) } func TestReconcileHostMismatch(t *testing.T) { - resource := &v1alpha1.SchedulingDecision{ - ObjectMeta: ctrl.ObjectMeta{ - Name: "test-decision-host-mismatch", - }, - Spec: v1alpha1.SchedulingDecisionSpec{ - Decisions: []v1alpha1.SchedulingDecisionRequest{ - { - ID: "decision-1", - Input: map[string]float64{ - "host1": 1.0, - "host2": 2.0, - }, // host3 is missing but referenced in pipeline output - Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ - Name: "test-pipeline", - Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ - { - Step: "weigher", - Activations: map[string]float64{ - "host1": 0.5, - "host3": 0.3, // host3 doesn't exist in input - }, - }, - }, - }, - }, - }, - }, - } - - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } + // Create test decision with host mismatch (host3 in pipeline but not in input) + decision := NewTestDecision("decision-1"). + WithInput(map[string]float64{ + "host1": 1.0, + "host2": 2.0, + }). + WithPipelineOutputs( + NewTestPipelineOutput("weigher", map[string]float64{ + "host1": 0.5, + "host3": 0.3, // host3 doesn't exist in input + }), + ). + Build() - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(resource). - WithStatusSubresource(&v1alpha1.SchedulingDecision{}). + resource := NewTestSchedulingDecision("test-decision-host-mismatch"). + WithDecisions(decision). Build() - req := ctrl.Request{ - NamespacedName: client.ObjectKey{ - Name: "test-decision-host-mismatch", - }, - } + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-decision-host-mismatch") - reconciler := &SchedulingDecisionReconciler{ - Conf: Config{}, - Client: fakeClient, - } + reconciler := CreateSchedulingReconciler(fakeClient) _, err := reconciler.Reconcile(t.Context(), req) if err != nil { t.Fatalf("Reconcile returned an error: %v", err) } - // Fetch the updated resource to check status - var updatedResource v1alpha1.SchedulingDecision - if err := fakeClient.Get(t.Context(), client.ObjectKey{Name: "test-decision-host-mismatch"}, &updatedResource); err != nil { - t.Fatalf("Failed to get updated resource: %v", err) - } - - // Verify error state for host mismatch - if updatedResource.Status.State != v1alpha1.SchedulingDecisionStateError { - t.Errorf("Expected state '%s', got '%s'", v1alpha1.SchedulingDecisionStateError, updatedResource.Status.State) - } - expectedError := "Decision decision-1: Host 'host3' in pipeline output not found in input" - if updatedResource.Status.Error != expectedError { - t.Errorf("Expected error '%s', got '%s'", expectedError, updatedResource.Status.Error) - } + // Fetch and verify the updated resource + updatedResource := AssertResourceExists(t, fakeClient, "test-decision-host-mismatch") + AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateError) + AssertResourceError(t, updatedResource, "Decision decision-1: Host 'host3' in pipeline output not found in input") t.Logf("Reconcile completed with host mismatch error: state=%s, error=%s", updatedResource.Status.State, updatedResource.Status.Error) } func TestReconcileComplexScoring(t *testing.T) { - resource := &v1alpha1.SchedulingDecision{ - ObjectMeta: ctrl.ObjectMeta{ - Name: "test-decision-complex", - }, - Spec: v1alpha1.SchedulingDecisionSpec{ - Decisions: []v1alpha1.SchedulingDecisionRequest{ - { - ID: "decision-1", - Input: map[string]float64{ - "host1": 1.0, - "host2": 2.0, - "host3": 3.0, - "host4": 4.0, - }, - Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ - Name: "complex-pipeline", - Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ - { - Step: "weigher1", - Activations: map[string]float64{ - "host1": 0.5, - "host2": 1.0, - "host3": -0.5, - "host4": 2.0, - }, - }, - { - Step: "filter1", - Activations: map[string]float64{ - "host1": 0.2, - "host3": 0.1, // host2 and host4 removed by this step - }, - }, - { - Step: "weigher2", - Activations: map[string]float64{ - "host1": -0.3, // host3 removed by this step - }, - }, - }, - }, - }, - }, - }, - } - - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } + // Create test decision with complex multi-step pipeline + decision := NewTestDecision("decision-1"). + WithInput(map[string]float64{ + "host1": 1.0, + "host2": 2.0, + "host3": 3.0, + "host4": 4.0, + }). + WithPipelineOutputs( + NewTestPipelineOutput("weigher1", map[string]float64{ + "host1": 0.5, + "host2": 1.0, + "host3": -0.5, + "host4": 2.0, + }), + NewTestPipelineOutput("filter1", map[string]float64{ + "host1": 0.2, + "host3": 0.1, // host2 and host4 removed by this step + }), + NewTestPipelineOutput("weigher2", map[string]float64{ + "host1": -0.3, // host3 removed by this step + }), + ). + Build() - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(resource). - WithStatusSubresource(&v1alpha1.SchedulingDecision{}). + resource := NewTestSchedulingDecision("test-decision-complex"). + WithDecisions(decision). Build() - req := ctrl.Request{ - NamespacedName: client.ObjectKey{ - Name: "test-decision-complex", - }, - } + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-decision-complex") - reconciler := &SchedulingDecisionReconciler{ - Conf: Config{}, - Client: fakeClient, - } + reconciler := CreateSchedulingReconciler(fakeClient) _, err := reconciler.Reconcile(t.Context(), req) if err != nil { t.Fatalf("Reconcile returned an error: %v", err) } - // Fetch the updated resource to check status - var updatedResource v1alpha1.SchedulingDecision - if err := fakeClient.Get(t.Context(), client.ObjectKey{Name: "test-decision-complex"}, &updatedResource); err != nil { - t.Fatalf("Failed to get updated resource: %v", err) - } - - // Verify success state - if updatedResource.Status.State != v1alpha1.SchedulingDecisionStateResolved { - t.Errorf("Expected state '%s', got '%s'", v1alpha1.SchedulingDecisionStateResolved, updatedResource.Status.State) - } - - // Verify we have one result - if len(updatedResource.Status.Results) != 1 { - t.Errorf("Expected 1 result, got %d", len(updatedResource.Status.Results)) - } + // Fetch and verify the updated resource + updatedResource := AssertResourceExists(t, fakeClient, "test-decision-complex") + AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateResolved) + AssertResultCount(t, updatedResource, 1) result := updatedResource.Status.Results[0] if result.ID != "decision-1" { @@ -395,16 +199,7 @@ func TestReconcileComplexScoring(t *testing.T) { expectedFinalScores := map[string]float64{ "host1": 1.4, } - if len(result.FinalScores) != len(expectedFinalScores) { - t.Errorf("Expected %d final scores, got %d", len(expectedFinalScores), len(result.FinalScores)) - } - for host, expectedScore := range expectedFinalScores { - if actualScore, exists := result.FinalScores[host]; !exists { - t.Errorf("Expected final score for host '%s', but it was not found", host) - } else if actualScore != expectedScore { - t.Errorf("Expected final score for host '%s' to be %f, got %f", host, expectedScore, actualScore) - } - } + AssertFinalScores(t, result, expectedFinalScores) // Verify deleted hosts tracking expectedDeletedHosts := map[string][]string{ @@ -412,126 +207,63 @@ func TestReconcileComplexScoring(t *testing.T) { "host4": {"filter1"}, // host4 deleted by filter1 "host3": {"weigher2"}, // host3 deleted by weigher2 } - if len(result.DeletedHosts) != len(expectedDeletedHosts) { - t.Errorf("Expected %d deleted hosts, got %d", len(expectedDeletedHosts), len(result.DeletedHosts)) - } - for host, expectedSteps := range expectedDeletedHosts { - if actualSteps, exists := result.DeletedHosts[host]; !exists { - t.Errorf("Expected deleted host '%s', but it was not found", host) - } else if len(actualSteps) != len(expectedSteps) { - t.Errorf("Expected host '%s' to be deleted by %d steps, got %d", host, len(expectedSteps), len(actualSteps)) - } else { - for i, expectedStep := range expectedSteps { - if actualSteps[i] != expectedStep { - t.Errorf("Expected host '%s' step %d to be '%s', got '%s'", host, i, expectedStep, actualSteps[i]) - } - } - } - } + AssertDeletedHosts(t, result, expectedDeletedHosts) t.Logf("Complex scoring completed: finalScores=%v, deletedHosts=%v", result.FinalScores, result.DeletedHosts) } func TestReconcileMultipleDeletionSteps(t *testing.T) { - resource := &v1alpha1.SchedulingDecision{ - ObjectMeta: ctrl.ObjectMeta{ - Name: "test-decision-multiple-deletions", - }, - Spec: v1alpha1.SchedulingDecisionSpec{ - Decisions: []v1alpha1.SchedulingDecisionRequest{ - { - ID: "decision-1", - Input: map[string]float64{ - "host1": 1.0, - "host2": 2.0, - "host3": 3.0, - }, - Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ - Name: "multiple-deletion-pipeline", - Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ - { - Step: "weigher1", - Activations: map[string]float64{ - "host1": 0.5, - "host2": 1.0, - "host3": -0.5, - }, - }, - { - Step: "filter1", - Activations: map[string]float64{ - "host1": 0.2, - // host2 and host3 removed by this step - }, - }, - { - Step: "filter2", - Activations: map[string]float64{ - // host1 removed by this step - // host2 and host3 would be removed again, but they're already gone - }, - }, - }, - }, - }, - }, - }, - } - - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } + // Create test decision with multiple filter steps that remove all hosts + decision := NewTestDecision("decision-1"). + WithInput(map[string]float64{ + "host1": 1.0, + "host2": 2.0, + "host3": 3.0, + }). + WithPipelineOutputs( + NewTestPipelineOutput("weigher1", map[string]float64{ + "host1": 0.5, + "host2": 1.0, + "host3": -0.5, + }), + NewTestPipelineOutput("filter1", map[string]float64{ + "host1": 0.2, + // host2 and host3 removed by this step + }), + NewTestPipelineOutput("filter2", map[string]float64{ + // host1 removed by this step + // host2 and host3 would be removed again, but they're already gone + }), + ). + Build() - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(resource). - WithStatusSubresource(&v1alpha1.SchedulingDecision{}). + resource := NewTestSchedulingDecision("test-decision-multiple-deletions"). + WithDecisions(decision). Build() - req := ctrl.Request{ - NamespacedName: client.ObjectKey{ - Name: "test-decision-multiple-deletions", - }, - } + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-decision-multiple-deletions") - reconciler := &SchedulingDecisionReconciler{ - Conf: Config{}, - Client: fakeClient, - } + reconciler := CreateSchedulingReconciler(fakeClient) _, err := reconciler.Reconcile(t.Context(), req) if err != nil { t.Fatalf("Reconcile returned an error: %v", err) } - // Fetch the updated resource to check status - var updatedResource v1alpha1.SchedulingDecision - if err := fakeClient.Get(t.Context(), client.ObjectKey{Name: "test-decision-multiple-deletions"}, &updatedResource); err != nil { - t.Fatalf("Failed to get updated resource: %v", err) - } - - // Verify success state - if updatedResource.Status.State != v1alpha1.SchedulingDecisionStateResolved { - t.Errorf("Expected state '%s', got '%s'", v1alpha1.SchedulingDecisionStateResolved, updatedResource.Status.State) - } - - // Verify we have one result - if len(updatedResource.Status.Results) != 1 { - t.Errorf("Expected 1 result, got %d", len(updatedResource.Status.Results)) - } + // Fetch and verify the updated resource + updatedResource := AssertResourceExists(t, fakeClient, "test-decision-multiple-deletions") + AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateResolved) + AssertResultCount(t, updatedResource, 1) result := updatedResource.Status.Results[0] if result.ID != "decision-1" { t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID) } - // Verify final scores calculation - // Expected: All hosts should be removed, no final scores + // Verify final scores calculation - all hosts should be removed, no final scores expectedFinalScores := map[string]float64{} - if len(result.FinalScores) != len(expectedFinalScores) { - t.Errorf("Expected %d final scores, got %d", len(expectedFinalScores), len(result.FinalScores)) - } + AssertFinalScores(t, result, expectedFinalScores) // Verify deleted hosts tracking // host2 and host3 deleted by filter1, host1 deleted by filter2 @@ -540,22 +272,7 @@ func TestReconcileMultipleDeletionSteps(t *testing.T) { "host3": {"filter1"}, // host3 deleted by filter1 "host1": {"filter2"}, // host1 deleted by filter2 } - if len(result.DeletedHosts) != len(expectedDeletedHosts) { - t.Errorf("Expected %d deleted hosts, got %d", len(expectedDeletedHosts), len(result.DeletedHosts)) - } - for host, expectedSteps := range expectedDeletedHosts { - if actualSteps, exists := result.DeletedHosts[host]; !exists { - t.Errorf("Expected deleted host '%s', but it was not found", host) - } else if len(actualSteps) != len(expectedSteps) { - t.Errorf("Expected host '%s' to be deleted by %d steps, got %d", host, len(expectedSteps), len(actualSteps)) - } else { - for i, expectedStep := range expectedSteps { - if actualSteps[i] != expectedStep { - t.Errorf("Expected host '%s' step %d to be '%s', got '%s'", host, i, expectedStep, actualSteps[i]) - } - } - } - } + AssertDeletedHosts(t, result, expectedDeletedHosts) t.Logf("Multiple deletion test completed: finalScores=%v, deletedHosts=%v", result.FinalScores, result.DeletedHosts) @@ -612,65 +329,30 @@ func TestReconcileCertaintyLevels(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - resource := &v1alpha1.SchedulingDecision{ - ObjectMeta: ctrl.ObjectMeta{ - Name: "test-certainty-" + tt.name, - }, - Spec: v1alpha1.SchedulingDecisionSpec{ - Decisions: []v1alpha1.SchedulingDecisionRequest{ - { - ID: "decision-1", - Input: tt.input, - Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ - Name: "certainty-test-pipeline", - Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ - { - Step: "weigher", - Activations: tt.activations, - }, - }, - }, - }, - }, - }, - } - - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } + // Create test decision with specific activations to test certainty levels + decision := NewTestDecision("decision-1"). + WithInput(tt.input). + WithPipelineOutputs( + NewTestPipelineOutput("weigher", tt.activations), + ). + Build() - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(resource). - WithStatusSubresource(&v1alpha1.SchedulingDecision{}). + resource := NewTestSchedulingDecision("test-certainty-" + tt.name). + WithDecisions(decision). Build() - req := ctrl.Request{ - NamespacedName: client.ObjectKey{ - Name: "test-certainty-" + tt.name, - }, - } + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-certainty-" + tt.name) - reconciler := &SchedulingDecisionReconciler{ - Conf: Config{}, - Client: fakeClient, - } + reconciler := CreateSchedulingReconciler(fakeClient) _, err := reconciler.Reconcile(t.Context(), req) if err != nil { t.Fatalf("Reconcile returned an error: %v", err) } - // Fetch the updated resource to check status - var updatedResource v1alpha1.SchedulingDecision - if err := fakeClient.Get(t.Context(), client.ObjectKey{Name: "test-certainty-" + tt.name}, &updatedResource); err != nil { - t.Fatalf("Failed to get updated resource: %v", err) - } - - // Verify we have one result - if len(updatedResource.Status.Results) != 1 { - t.Errorf("Expected 1 result, got %d", len(updatedResource.Status.Results)) - } + // Fetch and verify the updated resource + updatedResource := AssertResourceExists(t, fakeClient, "test-certainty-"+tt.name) + AssertResultCount(t, updatedResource, 1) result := updatedResource.Status.Results[0] if result.ID != "decision-1" { @@ -678,98 +360,56 @@ func TestReconcileCertaintyLevels(t *testing.T) { } // Verify the description contains the expected winner and certainty - description := result.Description - if !contains(description, "Selected: "+tt.expectedWinner) { - t.Errorf("Expected description to contain 'Selected: %s', got '%s'", tt.expectedWinner, description) - } - if !contains(description, "certainty: "+tt.expectedCertainty) { - t.Errorf("Expected description to contain 'certainty: %s', got '%s'", tt.expectedCertainty, description) - } + AssertDescriptionContains(t, result.Description, + "Selected: "+tt.expectedWinner, + "certainty: "+tt.expectedCertainty, + ) - t.Logf("Certainty test %s completed: %s", tt.name, description) + t.Logf("Certainty test %s completed: %s", tt.name, result.Description) }) } } func TestReconcileNoHostsRemaining(t *testing.T) { - resource := &v1alpha1.SchedulingDecision{ - ObjectMeta: ctrl.ObjectMeta{ - Name: "test-no-hosts-remaining", - }, - Spec: v1alpha1.SchedulingDecisionSpec{ - Decisions: []v1alpha1.SchedulingDecisionRequest{ - { - ID: "decision-1", - Input: map[string]float64{ - "host1": 1.0, - "host2": 2.0, - }, - Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ - Name: "filter-all-pipeline", - Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ - { - Step: "filter-all", - Activations: map[string]float64{ - // No hosts in activations - all will be filtered out - }, - }, - }, - }, - }, - }, - }, - } - - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } + // Create test decision where all hosts are filtered out + decision := NewTestDecision("decision-1"). + WithInput(map[string]float64{ + "host1": 1.0, + "host2": 2.0, + }). + WithPipelineOutputs( + NewTestPipelineOutput("filter-all", map[string]float64{ + // No hosts in activations - all will be filtered out + }), + ). + Build() - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(resource). - WithStatusSubresource(&v1alpha1.SchedulingDecision{}). + resource := NewTestSchedulingDecision("test-no-hosts-remaining"). + WithDecisions(decision). Build() - req := ctrl.Request{ - NamespacedName: client.ObjectKey{ - Name: "test-no-hosts-remaining", - }, - } + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-no-hosts-remaining") - reconciler := &SchedulingDecisionReconciler{ - Conf: Config{}, - Client: fakeClient, - } + reconciler := CreateSchedulingReconciler(fakeClient) _, err := reconciler.Reconcile(t.Context(), req) if err != nil { t.Fatalf("Reconcile returned an error: %v", err) } - // Fetch the updated resource to check status - var updatedResource v1alpha1.SchedulingDecision - if err := fakeClient.Get(t.Context(), client.ObjectKey{Name: "test-no-hosts-remaining"}, &updatedResource); err != nil { - t.Fatalf("Failed to get updated resource: %v", err) - } - - // Verify success state but no final scores - if updatedResource.Status.State != v1alpha1.SchedulingDecisionStateResolved { - t.Errorf("Expected state '%s', got '%s'", v1alpha1.SchedulingDecisionStateResolved, updatedResource.Status.State) - } - - // Verify we have one result - if len(updatedResource.Status.Results) != 1 { - t.Errorf("Expected 1 result, got %d", len(updatedResource.Status.Results)) - } + // Fetch and verify the updated resource + updatedResource := AssertResourceExists(t, fakeClient, "test-no-hosts-remaining") + AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateResolved) + AssertResultCount(t, updatedResource, 1) result := updatedResource.Status.Results[0] if result.ID != "decision-1" { t.Errorf("Expected result ID 'decision-1', got '%s'", result.ID) } - if len(result.FinalScores) != 0 { - t.Errorf("Expected 0 final scores, got %d", len(result.FinalScores)) - } + // Verify no final scores since all hosts were filtered out + expectedFinalScores := map[string]float64{} + AssertFinalScores(t, result, expectedFinalScores) expectedDescription := "No hosts remaining after filtering, 2 hosts evaluated" if result.Description != expectedDescription { @@ -783,7 +423,7 @@ func TestReconcileInputVsFinalComparison(t *testing.T) { tests := []struct { name string input map[string]float64 - activations []map[string]float64 + activations map[string]float64 expectedDescContains []string }{ { @@ -793,8 +433,8 @@ func TestReconcileInputVsFinalComparison(t *testing.T) { "host2": 2.0, "host3": 1.0, }, - activations: []map[string]float64{ - {"host1": 0.5, "host2": 0.3, "host3": 0.1}, // host1 stays winner + activations: map[string]float64{ + "host1": 0.5, "host2": 0.3, "host3": 0.1, // host1 stays winner }, expectedDescContains: []string{ "Selected: host1", @@ -808,8 +448,8 @@ func TestReconcileInputVsFinalComparison(t *testing.T) { "host2": 3.0, // highest in input "host3": 2.0, }, - activations: []map[string]float64{ - {"host1": 0.5, "host3": 0.3}, // host2 filtered out, host3 becomes winner + activations: map[string]float64{ + "host1": 0.5, "host3": 0.3, // host2 filtered out, host3 becomes winner }, expectedDescContains: []string{ "Selected: host3", @@ -824,8 +464,8 @@ func TestReconcileInputVsFinalComparison(t *testing.T) { "host2": 3.0, // highest in input "host3": 2.0, }, - activations: []map[string]float64{ - {"host1": 2.5, "host2": -0.5, "host3": 0.8}, // host1 becomes winner, host2 demoted to #3 + activations: map[string]float64{ + "host1": 2.5, "host2": -0.5, "host3": 0.8, // host1 becomes winner, host2 demoted to #3 }, expectedDescContains: []string{ "Selected: host1", @@ -837,65 +477,30 @@ func TestReconcileInputVsFinalComparison(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - resource := &v1alpha1.SchedulingDecision{ - ObjectMeta: ctrl.ObjectMeta{ - Name: "test-input-vs-final-" + tt.name, - }, - Spec: v1alpha1.SchedulingDecisionSpec{ - Decisions: []v1alpha1.SchedulingDecisionRequest{ - { - ID: "decision-1", - Input: tt.input, - Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ - Name: "input-vs-final-pipeline", - Outputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ - { - Step: "weigher", - Activations: tt.activations[0], - }, - }, - }, - }, - }, - }, - } - - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } + // Create test decision to compare input vs final rankings + decision := NewTestDecision("decision-1"). + WithInput(tt.input). + WithPipelineOutputs( + NewTestPipelineOutput("weigher", tt.activations), + ). + Build() - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(resource). - WithStatusSubresource(&v1alpha1.SchedulingDecision{}). + resource := NewTestSchedulingDecision("test-input-vs-final-" + tt.name). + WithDecisions(decision). Build() - req := ctrl.Request{ - NamespacedName: client.ObjectKey{ - Name: "test-input-vs-final-" + tt.name, - }, - } + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-input-vs-final-" + tt.name) - reconciler := &SchedulingDecisionReconciler{ - Conf: Config{}, - Client: fakeClient, - } + reconciler := CreateSchedulingReconciler(fakeClient) _, err := reconciler.Reconcile(t.Context(), req) if err != nil { t.Fatalf("Reconcile returned an error: %v", err) } - // Fetch the updated resource to check status - var updatedResource v1alpha1.SchedulingDecision - if err := fakeClient.Get(t.Context(), client.ObjectKey{Name: "test-input-vs-final-" + tt.name}, &updatedResource); err != nil { - t.Fatalf("Failed to get updated resource: %v", err) - } - - // Verify we have one result - if len(updatedResource.Status.Results) != 1 { - t.Errorf("Expected 1 result, got %d", len(updatedResource.Status.Results)) - } + // Fetch and verify the updated resource + updatedResource := AssertResourceExists(t, fakeClient, "test-input-vs-final-"+tt.name) + AssertResultCount(t, updatedResource, 1) result := updatedResource.Status.Results[0] if result.ID != "decision-1" { @@ -903,14 +508,9 @@ func TestReconcileInputVsFinalComparison(t *testing.T) { } // Verify the description contains expected elements - description := result.Description - for _, expectedContent := range tt.expectedDescContains { - if !contains(description, expectedContent) { - t.Errorf("Expected description to contain '%s', got '%s'", expectedContent, description) - } - } + AssertDescriptionContains(t, result.Description, tt.expectedDescContains...) - t.Logf("Input vs Final test %s completed: %s", tt.name, description) + t.Logf("Input vs Final test %s completed: %s", tt.name, result.Description) }) } } @@ -919,7 +519,7 @@ func TestReconcileCriticalStepElimination(t *testing.T) { tests := []struct { name string input map[string]float64 - pipeline []v1alpha1.SchedulingDecisionPipelineOutputSpec + pipelineOutputs []v1alpha1.SchedulingDecisionPipelineOutputSpec expectedCriticalMessage string }{ { @@ -929,7 +529,7 @@ func TestReconcileCriticalStepElimination(t *testing.T) { "host2": 1.0, "host3": 1.5, }, - pipeline: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + pipelineOutputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ { Step: "non-critical-weigher", Activations: map[string]float64{ @@ -955,7 +555,7 @@ func TestReconcileCriticalStepElimination(t *testing.T) { "host2": 3.0, // Strong initial winner "host3": 2.0, }, - pipeline: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + pipelineOutputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ { Step: "critical-weigher1", Activations: map[string]float64{ @@ -982,7 +582,7 @@ func TestReconcileCriticalStepElimination(t *testing.T) { "host2": 1.0, "host3": 2.0, }, - pipeline: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + pipelineOutputs: []v1alpha1.SchedulingDecisionPipelineOutputSpec{ { Step: "non-critical-weigher1", Activations: map[string]float64{ @@ -1006,60 +606,28 @@ func TestReconcileCriticalStepElimination(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - resource := &v1alpha1.SchedulingDecision{ - ObjectMeta: ctrl.ObjectMeta{ - Name: "test-critical-steps-" + tt.name, - }, - Spec: v1alpha1.SchedulingDecisionSpec{ - Decisions: []v1alpha1.SchedulingDecisionRequest{ - { - ID: "decision-1", - Input: tt.input, - Pipeline: v1alpha1.SchedulingDecisionPipelineSpec{ - Name: "critical-step-test-pipeline", - Outputs: tt.pipeline, - }, - }, - }, - }, - } - - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } + // Create test decision with multiple pipeline steps to test critical step analysis + decision := NewTestDecision("decision-1"). + WithInput(tt.input). + WithPipelineOutputs(tt.pipelineOutputs...). + Build() - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(resource). - WithStatusSubresource(&v1alpha1.SchedulingDecision{}). + resource := NewTestSchedulingDecision("test-critical-steps-" + tt.name). + WithDecisions(decision). Build() - req := ctrl.Request{ - NamespacedName: client.ObjectKey{ - Name: "test-critical-steps-" + tt.name, - }, - } + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-critical-steps-" + tt.name) - reconciler := &SchedulingDecisionReconciler{ - Conf: Config{}, - Client: fakeClient, - } + reconciler := CreateSchedulingReconciler(fakeClient) _, err := reconciler.Reconcile(t.Context(), req) if err != nil { t.Fatalf("Reconcile returned an error: %v", err) } - // Fetch the updated resource to check status - var updatedResource v1alpha1.SchedulingDecision - if err := fakeClient.Get(t.Context(), client.ObjectKey{Name: "test-critical-steps-" + tt.name}, &updatedResource); err != nil { - t.Fatalf("Failed to get updated resource: %v", err) - } - - // Verify we have one result - if len(updatedResource.Status.Results) != 1 { - t.Errorf("Expected 1 result, got %d", len(updatedResource.Status.Results)) - } + // Fetch and verify the updated resource + updatedResource := AssertResourceExists(t, fakeClient, "test-critical-steps-"+tt.name) + AssertResultCount(t, updatedResource, 1) result := updatedResource.Status.Results[0] if result.ID != "decision-1" { @@ -1067,22 +635,126 @@ func TestReconcileCriticalStepElimination(t *testing.T) { } // Verify the description contains the expected critical step message - description := result.Description - if !contains(description, tt.expectedCriticalMessage) { - t.Errorf("Expected description to contain '%s', got '%s'", tt.expectedCriticalMessage, description) - } + AssertDescriptionContains(t, result.Description, tt.expectedCriticalMessage) - t.Logf("Critical step test %s completed: %s", tt.name, description) + t.Logf("Critical step test %s completed: %s", tt.name, result.Description) }) } } -// Helper function to check if a string contains a substring -func contains(s, substr string) bool { - for i := 0; i <= len(s)-len(substr); i++ { - if s[i:i+len(substr)] == substr { - return true - } +func TestReconcileGlobalDescription(t *testing.T) { + tests := []struct { + name string + decisions []v1alpha1.SchedulingDecisionRequest + expectedGlobalDescription string + }{ + { + name: "single-decision-no-global", + decisions: []v1alpha1.SchedulingDecisionRequest{ + NewTestDecision("decision-1"). + WithInput(map[string]float64{"host1": 1.0, "host2": 2.0}). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 1.0, "host2": 0.0})). + Build(), + }, + expectedGlobalDescription: "", // No global description for single decision + }, + { + name: "simple-chain-no-loop", + decisions: []v1alpha1.SchedulingDecisionRequest{ + NewTestDecision("decision-1"). + WithRequestedAt(time.Now().Add(-5 * time.Hour)). + WithInput(map[string]float64{"host1": 1.0, "host2": 2.0}). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 2.0, "host2": 0.0})). + Build(), + NewTestDecision("decision-2"). + WithRequestedAt(time.Now().Add(-3 * time.Hour)). + WithInput(map[string]float64{"host2": 1.0, "host3": 2.0}). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host2": 1.5, "host3": 0.0})). + Build(), + NewTestDecision("decision-3"). + WithRequestedAt(time.Now().Add(-1 * time.Hour)). + WithInput(map[string]float64{"host2": 1.0, "host3": 2.0}). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host2": 1.5, "host3": 0.0})). + Build(), + NewTestDecision("decision-4"). + WithRequestedAt(time.Now()). + WithInput(map[string]float64{"host3": 1.0, "host4": 2.0}). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host3": 0.0, "host4": 1.0})). + Build(), + }, + expectedGlobalDescription: "chain: host1 (2h) -> host2 (3h; 2 decisions) -> host4 (0m)", + }, + { + name: "chain-with-loop", + decisions: []v1alpha1.SchedulingDecisionRequest{ + NewTestDecision("decision-1"). + WithRequestedAt(time.Now().Add(-5 * time.Hour)). + WithInput(map[string]float64{"host1": 1.0, "host2": 2.0}). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 2.0, "host2": 0.0})). + Build(), + NewTestDecision("decision-2"). + WithRequestedAt(time.Now().Add(-2 * time.Hour)). + WithInput(map[string]float64{"host1": 1.0, "host2": 2.0}). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 0.0, "host2": 1.0})). + Build(), + NewTestDecision("decision-3"). + WithRequestedAt(time.Now().Add(-1 * time.Hour)). + WithInput(map[string]float64{"host1": 1.0, "host2": 2.0}). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 2.0, "host2": 0.0})). + Build(), + NewTestDecision("decision-4"). + WithRequestedAt(time.Now()). + WithInput(map[string]float64{"host3": 1.0}). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host3": 0.0})). + Build(), + }, + expectedGlobalDescription: "chain: host1 (3h) -> host2 (1h) -> host1 (1h) -> host3 (0m); loop detected", + }, + { + name: "same-host-all-decisions-no-loop", + decisions: []v1alpha1.SchedulingDecisionRequest{ + NewTestDecision("decision-1"). + WithRequestedAt(time.Now().Add(-2 * time.Hour)). + WithInput(map[string]float64{"host1": 2.0, "host2": 1.0}). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 1.0, "host2": 0.0})). + Build(), + NewTestDecision("decision-2"). + WithRequestedAt(time.Now()). + WithInput(map[string]float64{"host1": 2.0, "host3": 1.0}). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 1.0, "host3": 0.0})). + Build(), + }, + expectedGlobalDescription: "chain: host1 (2h; 2 decisions)", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + resource := NewTestSchedulingDecision("test-global-" + tt.name). + WithDecisions(tt.decisions...). + Build() + + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-global-" + tt.name) + + reconciler := CreateSchedulingReconciler(fakeClient) + _, err := reconciler.Reconcile(t.Context(), req) + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Fetch and verify the updated resource + updatedResource := AssertResourceExists(t, fakeClient, "test-global-"+tt.name) + AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateResolved) + AssertDecisionCount(t, updatedResource, len(tt.decisions)) + + // Verify global description + if updatedResource.Status.GlobalDescription != tt.expectedGlobalDescription { + t.Errorf("Expected global description '%s', got '%s'", + tt.expectedGlobalDescription, updatedResource.Status.GlobalDescription) + } + + t.Logf("Global description test %s completed: '%s'", tt.name, updatedResource.Status.GlobalDescription) + }) } - return false } diff --git a/decisions/internal/controller/test_helpers.go b/decisions/internal/controller/test_helpers.go new file mode 100644 index 00000000..86eb92f2 --- /dev/null +++ b/decisions/internal/controller/test_helpers.go @@ -0,0 +1,322 @@ +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package controller + +import ( + "strings" + "testing" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + decisionsv1alpha1 "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" +) + +// Test constants to reduce magic numbers +const ( + DefaultTestTTL = 2 * time.Hour + DefaultTestAge = 1 * time.Hour + OldTestAge = 3 * time.Hour + DefaultTTLDays = 14 * 24 * time.Hour + TestTolerance = 1 * time.Minute + DefaultTestVCPUs = 1 + DefaultTestRAM = 2048 + DefaultTestDisk = 10 +) + +// TestDecisionBuilder helps build SchedulingDecisionRequest objects for tests +type TestDecisionBuilder struct { + decision decisionsv1alpha1.SchedulingDecisionRequest +} + +func NewTestDecision(id string) *TestDecisionBuilder { + return &TestDecisionBuilder{ + decision: decisionsv1alpha1.SchedulingDecisionRequest{ + ID: id, + RequestedAt: metav1.NewTime(time.Now()), + EventType: decisionsv1alpha1.SchedulingEventTypeInitialPlacement, + Input: map[string]float64{ + "host1": 1.0, + }, + Pipeline: decisionsv1alpha1.SchedulingDecisionPipelineSpec{ + Name: "test-pipeline", + }, + Flavor: decisionsv1alpha1.Flavor{ + Name: "test-flavor", + VCPUs: DefaultTestVCPUs, + RAM: DefaultTestRAM, + Disk: DefaultTestDisk, + }, + }, + } +} + +// WithRequestedAt sets the RequestedAt timestamp +func (b *TestDecisionBuilder) WithRequestedAt(t time.Time) *TestDecisionBuilder { + b.decision.RequestedAt = metav1.NewTime(t) + return b +} + +// WithInput sets the input hosts and scores +func (b *TestDecisionBuilder) WithInput(input map[string]float64) *TestDecisionBuilder { + b.decision.Input = input + return b +} + +// WithPipelineOutputs sets the pipeline outputs +func (b *TestDecisionBuilder) WithPipelineOutputs(outputs ...decisionsv1alpha1.SchedulingDecisionPipelineOutputSpec) *TestDecisionBuilder { + b.decision.Pipeline.Outputs = outputs + return b +} + +// WithEventType sets the event type +func (b *TestDecisionBuilder) WithEventType(eventType decisionsv1alpha1.SchedulingEventType) *TestDecisionBuilder { + b.decision.EventType = eventType + return b +} + +// Build returns the built SchedulingDecisionRequest +func (b *TestDecisionBuilder) Build() decisionsv1alpha1.SchedulingDecisionRequest { + return b.decision +} + +// TestSchedulingDecisionBuilder helps build SchedulingDecision objects for tests +type TestSchedulingDecisionBuilder struct { + resource decisionsv1alpha1.SchedulingDecision +} + +// NewTestSchedulingDecision creates a new test SchedulingDecision builder +func NewTestSchedulingDecision(name string) *TestSchedulingDecisionBuilder { + return &TestSchedulingDecisionBuilder{ + resource: decisionsv1alpha1.SchedulingDecision{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Spec: decisionsv1alpha1.SchedulingDecisionSpec{ + Decisions: []decisionsv1alpha1.SchedulingDecisionRequest{}, + }, + }, + } +} + +// WithDecisions adds decisions to the SchedulingDecision +func (b *TestSchedulingDecisionBuilder) WithDecisions(decisions ...decisionsv1alpha1.SchedulingDecisionRequest) *TestSchedulingDecisionBuilder { + b.resource.Spec.Decisions = decisions + return b +} + +// WithCreationTimestamp sets the creation timestamp +func (b *TestSchedulingDecisionBuilder) WithCreationTimestamp(t time.Time) *TestSchedulingDecisionBuilder { + b.resource.ObjectMeta.CreationTimestamp = metav1.NewTime(t) + return b +} + +// WithNamespace sets the namespace +func (b *TestSchedulingDecisionBuilder) WithNamespace(namespace string) *TestSchedulingDecisionBuilder { + b.resource.ObjectMeta.Namespace = namespace + return b +} + +// Build returns the built SchedulingDecision +func (b *TestSchedulingDecisionBuilder) Build() *decisionsv1alpha1.SchedulingDecision { + return &b.resource +} + +// NewTestPipelineOutput creates a pipeline output spec for testing +func NewTestPipelineOutput(step string, activations map[string]float64) decisionsv1alpha1.SchedulingDecisionPipelineOutputSpec { + return decisionsv1alpha1.SchedulingDecisionPipelineOutputSpec{ + Step: step, + Activations: activations, + } +} + +// SetupTestEnvironment creates a fake client and scheme for testing +func SetupTestEnvironment(t *testing.T, resources ...client.Object) (client.Client, *runtime.Scheme) { + t.Helper() + + scheme := runtime.NewScheme() + if err := decisionsv1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add scheme: %v", err) + } + + clientBuilder := fake.NewClientBuilder().WithScheme(scheme) + if len(resources) > 0 { + clientBuilder = clientBuilder.WithObjects(resources...) + } + + // Add status subresource for SchedulingDecision + fakeClient := clientBuilder.WithStatusSubresource(&decisionsv1alpha1.SchedulingDecision{}).Build() + + return fakeClient, scheme +} + +// CreateTestRequest creates a controller request for testing +func CreateTestRequest(name string, namespace ...string) ctrl.Request { + req := ctrl.Request{ + NamespacedName: client.ObjectKey{ + Name: name, + }, + } + if len(namespace) > 0 { + req.NamespacedName.Namespace = namespace[0] + } + return req +} + +// AssertResourceExists checks that a resource exists and returns it +func AssertResourceExists(t *testing.T, c client.Client, name string, namespace ...string) *decisionsv1alpha1.SchedulingDecision { + t.Helper() + + key := client.ObjectKey{Name: name} + if len(namespace) > 0 { + key.Namespace = namespace[0] + } + + var resource decisionsv1alpha1.SchedulingDecision + if err := c.Get(t.Context(), key, &resource); err != nil { + t.Fatalf("Resource %s should exist: %v", name, err) + } + return &resource +} + +// AssertResourceDeleted checks that a resource has been deleted +func AssertResourceDeleted(t *testing.T, c client.Client, name string, namespace ...string) { + t.Helper() + + key := client.ObjectKey{Name: name} + if len(namespace) > 0 { + key.Namespace = namespace[0] + } + + var resource decisionsv1alpha1.SchedulingDecision + err := c.Get(t.Context(), key, &resource) + if err == nil { + t.Errorf("Resource %s should have been deleted", name) + } +} + +// AssertResourceState checks the state of a SchedulingDecision +func AssertResourceState(t *testing.T, resource *decisionsv1alpha1.SchedulingDecision, expectedState decisionsv1alpha1.SchedulingDecisionState) { + t.Helper() + + if resource.Status.State != expectedState { + t.Errorf("Expected state '%s', got '%s'", expectedState, resource.Status.State) + } +} + +// AssertResourceError checks the error message of a SchedulingDecision +func AssertResourceError(t *testing.T, resource *decisionsv1alpha1.SchedulingDecision, expectedError string) { + t.Helper() + + if resource.Status.Error != expectedError { + t.Errorf("Expected error '%s', got '%s'", expectedError, resource.Status.Error) + } +} + +// AssertNoError checks that there's no error in the resource status +func AssertNoError(t *testing.T, resource *decisionsv1alpha1.SchedulingDecision) { + t.Helper() + + if resource.Status.Error != "" { + t.Errorf("Expected no error, got '%s'", resource.Status.Error) + } +} + +// AssertResultCount checks the number of results in a SchedulingDecision +func AssertResultCount(t *testing.T, resource *decisionsv1alpha1.SchedulingDecision, expectedCount int) { + t.Helper() + + if len(resource.Status.Results) != expectedCount { + t.Errorf("Expected %d results, got %d", expectedCount, len(resource.Status.Results)) + } +} + +// AssertDecisionCount checks the decision count in a SchedulingDecision +func AssertDecisionCount(t *testing.T, resource *decisionsv1alpha1.SchedulingDecision, expectedCount int) { + t.Helper() + + if resource.Status.DecisionCount != expectedCount { + t.Errorf("Expected decision count %d, got %d", expectedCount, resource.Status.DecisionCount) + } +} + +// AssertFinalScores checks the final scores in a result +func AssertFinalScores(t *testing.T, result decisionsv1alpha1.SchedulingDecisionResult, expectedScores map[string]float64) { + t.Helper() + + if len(result.FinalScores) != len(expectedScores) { + t.Errorf("Expected %d final scores, got %d", len(expectedScores), len(result.FinalScores)) + } + + for host, expectedScore := range expectedScores { + if actualScore, exists := result.FinalScores[host]; !exists { + t.Errorf("Expected final score for host '%s', but it was not found", host) + } else if actualScore != expectedScore { + t.Errorf("Expected final score for host '%s' to be %f, got %f", host, expectedScore, actualScore) + } + } +} + +// AssertDeletedHosts checks the deleted hosts in a result +func AssertDeletedHosts(t *testing.T, result decisionsv1alpha1.SchedulingDecisionResult, expectedDeletedHosts map[string][]string) { + t.Helper() + + if len(result.DeletedHosts) != len(expectedDeletedHosts) { + t.Errorf("Expected %d deleted hosts, got %d", len(expectedDeletedHosts), len(result.DeletedHosts)) + } + + for host, expectedSteps := range expectedDeletedHosts { + if actualSteps, exists := result.DeletedHosts[host]; !exists { + t.Errorf("Expected deleted host '%s', but it was not found", host) + } else if len(actualSteps) != len(expectedSteps) { + t.Errorf("Expected host '%s' to be deleted by %d steps, got %d", host, len(expectedSteps), len(actualSteps)) + } else { + for i, expectedStep := range expectedSteps { + if actualSteps[i] != expectedStep { + t.Errorf("Expected host '%s' step %d to be '%s', got '%s'", host, i, expectedStep, actualSteps[i]) + } + } + } + } +} + +// AssertDescriptionContains checks that a description contains expected text +func AssertDescriptionContains(t *testing.T, description string, expectedContents ...string) { + t.Helper() + + for _, expectedContent := range expectedContents { + if !strings.Contains(description, expectedContent) { + t.Errorf("Expected description to contain '%s', got '%s'", expectedContent, description) + } + } +} + +// CreateTTLReconciler creates a TTL reconciler with the given TTL duration +// If ttl is 0, the reconciler will use its internal default (14 days) +func CreateTTLReconciler(fakeClient client.Client, scheme *runtime.Scheme, ttl time.Duration) *SchedulingDecisionTTLController { + return &SchedulingDecisionTTLController{ + Client: fakeClient, + Scheme: scheme, + Conf: Config{ + TTLHoursAfterDecision: ttl, + }, + } +} + +// CreateSchedulingReconciler creates a scheduling decision reconciler +// If conf is empty, uses default empty config +func CreateSchedulingReconciler(fakeClient client.Client, conf ...Config) *SchedulingDecisionReconciler { + var config Config + if len(conf) > 0 { + config = conf[0] + } + return &SchedulingDecisionReconciler{ + Conf: config, + Client: fakeClient, + } +} diff --git a/decisions/internal/controller/ttl_controller.go b/decisions/internal/controller/ttl_controller.go new file mode 100644 index 00000000..6dbe1ec9 --- /dev/null +++ b/decisions/internal/controller/ttl_controller.go @@ -0,0 +1,103 @@ +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package controller + +import ( + "context" + "time" + + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/predicate" + + decisionsv1alpha1 "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" +) + +// SchedulingDecisionTTLController handles automatic cleanup of resolved SchedulingDecision resources +// after a configurable TTL period. +type SchedulingDecisionTTLController struct { + // Client for the kubernetes API. + client.Client + // Kubernetes scheme to use for the decisions. + Scheme *runtime.Scheme + // Configuration for the TTL controller. + Conf Config +} + +// +kubebuilder:rbac:groups=decisions.cortex,resources=schedulingdecisions,verbs=get;list;watch;delete +// +kubebuilder:rbac:groups=decisions.cortex,resources=schedulingdecisions/status,verbs=get + +func (r *SchedulingDecisionTTLController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := logf.FromContext(ctx).WithName("ttl-controller") + + // Fetch the decision object + var decision decisionsv1alpha1.SchedulingDecision + if err := r.Get(ctx, req.NamespacedName, &decision); err != nil { + // Resource was deleted or doesn't exist - nothing to clean up + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + // Calculate age based on last decision's RequestedAt timestamp + var referenceTime time.Time + if len(decision.Spec.Decisions) > 0 { + // Use the last decision's RequestedAt timestamp + lastDecision := decision.Spec.Decisions[len(decision.Spec.Decisions)-1] + referenceTime = lastDecision.RequestedAt.Time + } else { + // Fallback to creation timestamp if no decisions exist + referenceTime = decision.CreationTimestamp.Time + } + + age := time.Since(referenceTime) + ttl := r.getTTL() + + if age >= ttl { + // TTL has expired - delete the resource + log.Info("Deleting expired SchedulingDecision", + "name", decision.Name, + "age", age.String(), + "ttl", ttl.String()) + + if err := r.Delete(ctx, &decision); err != nil { + if client.IgnoreNotFound(err) != nil { + log.Error(err, "Failed to delete expired SchedulingDecision", "name", decision.Name) + return ctrl.Result{}, err + } + log.V(1).Info("SchedulingDecision was already deleted", "name", decision.Name) + } + + return ctrl.Result{}, nil + } + + remainingTime := ttl - age + log.V(1).Info("Scheduling SchedulingDecision for future deletion", + "name", decision.Name, + "remainingTime", remainingTime.String()) + + return ctrl.Result{RequeueAfter: remainingTime}, nil +} + +func (r *SchedulingDecisionTTLController) getTTL() time.Duration { + if r.Conf.TTLHoursAfterDecision > 0 { + return r.Conf.TTLHoursAfterDecision + } + return 14 * 24 * time.Hour +} + +func (r *SchedulingDecisionTTLController) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&decisionsv1alpha1.SchedulingDecision{}). + Named("schedulingdecision-ttl"). + WithOptions(controller.Options{ + MaxConcurrentReconciles: 10, + }). + WithEventFilter( + // Watch for spec changes (when decisions are added/modified) + predicate.GenerationChangedPredicate{}, + ). + Complete(r) +} diff --git a/decisions/internal/controller/ttl_controller_test.go b/decisions/internal/controller/ttl_controller_test.go new file mode 100644 index 00000000..592d9485 --- /dev/null +++ b/decisions/internal/controller/ttl_controller_test.go @@ -0,0 +1,150 @@ +// Copyright 2025 SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package controller + +import ( + "context" + "testing" + "time" +) + +func TestTTLController(t *testing.T) { + tests := []struct { + name string + resourceAge time.Duration + ttl time.Duration + expectDeleted bool + expectRequeue bool + }{ + { + name: "young resource preserved", + resourceAge: DefaultTestAge, + ttl: DefaultTestTTL, + expectDeleted: false, + expectRequeue: true, + }, + { + name: "old resource deleted", + resourceAge: OldTestAge, + ttl: DefaultTestTTL, + expectDeleted: true, + expectRequeue: false, + }, + { + name: "resource at TTL boundary deleted", + resourceAge: DefaultTestTTL, + ttl: DefaultTestTTL, + expectDeleted: true, + expectRequeue: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create test resource with specified age + decision := NewTestDecision("decision-1"). + WithRequestedAt(time.Now().Add(-tt.resourceAge)). + Build() + + resource := NewTestSchedulingDecision("test-decision"). + WithDecisions(decision). + Build() + + fakeClient, scheme := SetupTestEnvironment(t, resource) + reconciler := CreateTTLReconciler(fakeClient, scheme, tt.ttl) + req := CreateTestRequest("test-decision") + + result, err := reconciler.Reconcile(context.Background(), req) + if err != nil { + t.Fatalf("Reconcile failed: %v", err) + } + + // Check deletion expectation + if tt.expectDeleted { + AssertResourceDeleted(t, fakeClient, "test-decision") + } else { + AssertResourceExists(t, fakeClient, "test-decision") + } + + // Check requeue expectation + if tt.expectRequeue && result.RequeueAfter == 0 { + t.Error("Expected requeue but got none") + } + if !tt.expectRequeue && result.RequeueAfter != 0 { + t.Error("Expected no requeue but got one") + } + }) + } +} + +func TestTTLControllerFallbackToCreationTimestamp(t *testing.T) { + // Resource with no decisions should use creation timestamp + resource := NewTestSchedulingDecision("empty-decision"). + WithCreationTimestamp(time.Now().Add(-OldTestAge)). + Build() + + fakeClient, scheme := SetupTestEnvironment(t, resource) + reconciler := CreateTTLReconciler(fakeClient, scheme, DefaultTestTTL) + req := CreateTestRequest("empty-decision") + + result, err := reconciler.Reconcile(context.Background(), req) + if err != nil { + t.Fatalf("Reconcile failed: %v", err) + } + + // Should be deleted and not requeued + AssertResourceDeleted(t, fakeClient, "empty-decision") + if result.RequeueAfter != 0 { + t.Error("Expected no requeue after deletion") + } +} + +func TestTTLControllerDefaultTTL(t *testing.T) { + decision := NewTestDecision("decision-1"). + WithRequestedAt(time.Now().Add(-DefaultTestAge)). + Build() + + resource := NewTestSchedulingDecision("default-ttl-decision"). + WithDecisions(decision). + Build() + + fakeClient, scheme := SetupTestEnvironment(t, resource) + + // Create reconciler without TTL config (should use default) + reconciler := CreateTTLReconciler(fakeClient, scheme, 0) // Zero duration means use default + + req := CreateTestRequest("default-ttl-decision") + result, err := reconciler.Reconcile(context.Background(), req) + if err != nil { + t.Fatalf("Reconcile failed: %v", err) + } + + // 1-hour-old resource with 14-day default TTL should be preserved + AssertResourceExists(t, fakeClient, "default-ttl-decision") + if result.RequeueAfter == 0 { + t.Error("Expected requeue for resource with default TTL") + } + + // Verify requeue time is reasonable (approximately 13 days, 23 hours) + expectedRequeue := DefaultTTLDays - DefaultTestAge + if result.RequeueAfter < expectedRequeue-TestTolerance || result.RequeueAfter > expectedRequeue+TestTolerance { + t.Errorf("Requeue time %v not within expected range %v ± %v", + result.RequeueAfter, expectedRequeue, TestTolerance) + } +} + +func TestTTLControllerNonExistentResource(t *testing.T) { + fakeClient, scheme := SetupTestEnvironment(t) + reconciler := CreateTTLReconciler(fakeClient, scheme, DefaultTestTTL) + req := CreateTestRequest("non-existent") + + result, err := reconciler.Reconcile(context.Background(), req) + if err != nil { + t.Fatalf("Should handle non-existent resources gracefully: %v", err) + } + + if result.RequeueAfter != 0 { + t.Error("Expected no requeue for non-existent resource") + } +} From d196b3dbadf12d412edbad440e43b29db1d11739 Mon Sep 17 00:00:00 2001 From: mblos Date: Mon, 6 Oct 2025 15:41:51 +0200 Subject: [PATCH 38/58] fixing resolve; analysis --- decisions/internal/controller/controller.go | 18 +++++------------- internal/scheduler/nova/pipeline.go | 21 ++++++++++++++------- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index 8ea1c847..5e864ea2 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -135,11 +135,6 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{}, err } - // If the decision is already resolved or in error state, do nothing. - if res.Status.State == v1alpha1.SchedulingDecisionStateResolved || res.Status.State == v1alpha1.SchedulingDecisionStateError { - return ctrl.Result{}, nil - } - // Validate we have at least one decision if len(res.Spec.Decisions) == 0 { if err := r.setErrorState(ctx, &res, fmt.Errorf("No decisions provided in spec")); err != nil { @@ -647,16 +642,13 @@ func (r *SchedulingDecisionReconciler) generateGlobalDescription(results []v1alp // Loop detection: check if any host appears again after other hosts in between hasLoop := false - for i := 0; i < len(hostChain); i++ { - for j := i + 2; j < len(hostChain); j++ { // Skip adjacent hosts (i+1) - if hostChain[i] == hostChain[j] { - hasLoop = true - break - } - } - if hasLoop { + seenHosts := make(map[string]bool) + for segment := range segments { + if seenHosts[segments[segment].host] { + hasLoop = true break } + seenHosts[segments[segment].host] = true } // Build description diff --git a/internal/scheduler/nova/pipeline.go b/internal/scheduler/nova/pipeline.go index e0011059..be9ded96 100644 --- a/internal/scheduler/nova/pipeline.go +++ b/internal/scheduler/nova/pipeline.go @@ -97,11 +97,12 @@ func (c *novaPipelineConsumer) Consume( // Determine the event type based on request flags var eventType v1alpha1.SchedulingEventType - if request.Live { + switch { + case request.Live: eventType = v1alpha1.SchedulingEventTypeLiveMigration - } else if request.Resize { + case request.Resize: eventType = v1alpha1.SchedulingEventTypeResize - } else { + default: eventType = v1alpha1.SchedulingEventTypeInitialPlacement } @@ -123,6 +124,12 @@ func (c *novaPipelineConsumer) Consume( } flavor := request.Spec.Data.Flavor + + // Safe conversion with bounds checking to prevent integer overflow + vcpus := int(math.Min(float64(flavor.Data.VCPUs), math.MaxInt)) + ram := int(math.Min(float64(flavor.Data.MemoryMB), math.MaxInt)) + disk := int(math.Min(float64(flavor.Data.RootGB), math.MaxInt)) + decisionRequest := v1alpha1.SchedulingDecisionRequest{ ID: request.Spec.Data.InstanceUUID, RequestedAt: metav1.Now(), @@ -136,9 +143,9 @@ func (c *novaPipelineConsumer) Consume( VMware: request.VMware, Flavor: v1alpha1.Flavor{ Name: flavor.Data.Name, - VCPUs: int(flavor.Data.VCPUs), // assume this is safe - RAM: int(flavor.Data.MemoryMB), - Disk: int(flavor.Data.RootGB), + VCPUs: vcpus, + RAM: ram, + Disk: disk, }, } @@ -146,7 +153,7 @@ func (c *novaPipelineConsumer) Consume( // Try to update existing decision with retry logic for concurrent updates const maxRetries = 3 - for attempt := 0; attempt < maxRetries; attempt++ { + for attempt := range maxRetries { var existing v1alpha1.SchedulingDecision if err := c.Client.Get(context.Background(), objectKey, &existing); err == nil { // Decision already exists, append the new decision to the existing ones From 8db606276eb3dfb7afd5e5ca06c5018909697dc8 Mon Sep 17 00:00:00 2001 From: mblos Date: Mon, 6 Oct 2025 15:55:35 +0200 Subject: [PATCH 39/58] . --- decisions/internal/controller/controller.go | 67 +++++++------------ .../internal/controller/controller_test.go | 2 +- 2 files changed, 25 insertions(+), 44 deletions(-) diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index 5e864ea2..ba7a5235 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -582,51 +582,36 @@ func (r *SchedulingDecisionReconciler) generateGlobalDescription(results []v1alp hostChain = append(hostChain, winner) } - // Group consecutive decisions on the same host with their timestamps + // Build segments with durations in one pass segments := make([]hostSegment, 0) if len(hostChain) > 0 { currentHost := hostChain[0] - currentCount := 1 + segmentStart := 0 + + for i := 1; i <= len(hostChain); i++ { + // Check if we've reached the end or found a different host + if i == len(hostChain) || hostChain[i] != currentHost { + // Calculate duration for this segment + startTime := decisions[segmentStart].RequestedAt.Time + var endTime time.Time + if i == len(hostChain) { + endTime = time.Now() // Last segment + } else { + endTime = decisions[i].RequestedAt.Time + } - for i := 1; i < len(hostChain); i++ { - if hostChain[i] == currentHost { - currentCount++ - } else { segments = append(segments, hostSegment{ host: currentHost, - decisions: currentCount, + duration: endTime.Sub(startTime), + decisions: i - segmentStart, }) - currentHost = hostChain[i] - currentCount = 1 - } - } - // Add the last segment - segments = append(segments, hostSegment{ - host: currentHost, - decisions: currentCount, - }) - } - - // Calculate actual durations using timestamps - now := time.Now() - totalSegments := len(segments) - decisionIndex := 0 - for i := range segments { - segmentStartTime := decisions[decisionIndex].RequestedAt.Time - - // Find the end time for this segment - var segmentEndTime time.Time - if i == totalSegments-1 { - // Last segment: use current time - segmentEndTime = now - } else { - // Find the start of the next segment - decisionIndex += segments[i].decisions - segmentEndTime = decisions[decisionIndex].RequestedAt.Time + if i < len(hostChain) { + currentHost = hostChain[i] + segmentStart = i + } + } } - - segments[i].duration = segmentEndTime.Sub(segmentStartTime) } // Build chain string with durations @@ -640,7 +625,6 @@ func (r *SchedulingDecisionReconciler) generateGlobalDescription(results []v1alp chainParts = append(chainParts, part) } - // Loop detection: check if any host appears again after other hosts in between hasLoop := false seenHosts := make(map[string]bool) for segment := range segments { @@ -651,15 +635,12 @@ func (r *SchedulingDecisionReconciler) generateGlobalDescription(results []v1alp seenHosts[segments[segment].host] = true } - // Build description chainStr := strings.Join(chainParts, " -> ") - description := fmt.Sprintf("chain: %s", chainStr) - if hasLoop { - description += "; loop detected" + return fmt.Sprintf("chain (loop detected): %s", chainStr) + } else { + return fmt.Sprintf("chain: %s", chainStr) } - - return description } // SetupWithManager sets up the controller with the Manager. diff --git a/decisions/internal/controller/controller_test.go b/decisions/internal/controller/controller_test.go index f069dbdb..c3fc348e 100644 --- a/decisions/internal/controller/controller_test.go +++ b/decisions/internal/controller/controller_test.go @@ -708,7 +708,7 @@ func TestReconcileGlobalDescription(t *testing.T) { WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host3": 0.0})). Build(), }, - expectedGlobalDescription: "chain: host1 (3h) -> host2 (1h) -> host1 (1h) -> host3 (0m); loop detected", + expectedGlobalDescription: "chain (loop detected): host1 (3h) -> host2 (1h) -> host1 (1h) -> host3 (0m)", }, { name: "same-host-all-decisions-no-loop", From e53bcab7876956331171fd1265cea61aacd73093 Mon Sep 17 00:00:00 2001 From: mblos Date: Mon, 6 Oct 2025 16:25:08 +0200 Subject: [PATCH 40/58] refactor --- internal/scheduler/nova/pipeline.go | 61 ++++++++++++----------------- 1 file changed, 24 insertions(+), 37 deletions(-) diff --git a/internal/scheduler/nova/pipeline.go b/internal/scheduler/nova/pipeline.go index be9ded96..18a1ee53 100644 --- a/internal/scheduler/nova/pipeline.go +++ b/internal/scheduler/nova/pipeline.go @@ -91,6 +91,7 @@ func (c *novaPipelineConsumer) Consume( inWeights map[string]float64, stepWeights map[string]map[string]float64, ) { + if c.Client == nil { return } @@ -151,47 +152,33 @@ func (c *novaPipelineConsumer) Consume( objectKey := client.ObjectKey{Name: request.Spec.Data.InstanceUUID} - // Try to update existing decision with retry logic for concurrent updates - const maxRetries = 3 - for attempt := range maxRetries { - var existing v1alpha1.SchedulingDecision - if err := c.Client.Get(context.Background(), objectKey, &existing); err == nil { - // Decision already exists, append the new decision to the existing ones - existing.Spec.Decisions = append(existing.Spec.Decisions, decisionRequest) + // Try to update existing decision first + var existing v1alpha1.SchedulingDecision + if err := c.Client.Get(context.Background(), objectKey, &existing); err == nil { + // Decision already exists, append the new decision to the existing ones + existing.Spec.Decisions = append(existing.Spec.Decisions, decisionRequest) - if err := c.Client.Update(context.Background(), &existing); err != nil { - // Check if it's a conflict error (concurrent update) - if attempt < maxRetries-1 { - slog.Warn("scheduler: conflict updating decision, retrying", "attempt", attempt+1, "resourceID", request.Spec.Data.InstanceUUID) - continue - } - slog.Error("scheduler: failed to update existing decision after retries", "error", err, "resourceID", request.Spec.Data.InstanceUUID) - return - } - slog.Info("scheduler: appended decision to existing resource", "resourceID", request.Spec.Data.InstanceUUID, "eventType", eventType) - return - } else { - // Decision doesn't exist, create a new one - decision := &v1alpha1.SchedulingDecision{ - ObjectMeta: ctrl.ObjectMeta{Name: request.Spec.Data.InstanceUUID}, - Spec: v1alpha1.SchedulingDecisionSpec{ - Decisions: []v1alpha1.SchedulingDecisionRequest{decisionRequest}, - }, - // Status will be filled in by the controller. - } - if err := c.Client.Create(context.Background(), decision); err != nil { - // Check if it's a conflict error (resource was created concurrently) - if attempt < maxRetries-1 { - slog.Warn("scheduler: conflict creating decision, retrying", "attempt", attempt+1, "resourceID", request.Spec.Data.InstanceUUID) - continue - } - slog.Error("scheduler: failed to create decision after retries", "error", err, "resourceID", request.Spec.Data.InstanceUUID) - return - } - slog.Info("scheduler: created new decision", "resourceID", request.Spec.Data.InstanceUUID, "eventType", eventType) + if err := c.Client.Update(context.Background(), &existing); err != nil { + slog.Error("scheduler: failed to update existing decision", "error", err, "resourceID", request.Spec.Data.InstanceUUID) return } + slog.Info("scheduler: appended decision to existing resource", "resourceID", request.Spec.Data.InstanceUUID, "eventType", eventType) + return + } + + // Decision doesn't exist, create a new one + decision := &v1alpha1.SchedulingDecision{ + ObjectMeta: ctrl.ObjectMeta{Name: request.Spec.Data.InstanceUUID}, + Spec: v1alpha1.SchedulingDecisionSpec{ + Decisions: []v1alpha1.SchedulingDecisionRequest{decisionRequest}, + }, + // Status will be filled in by the controller. + } + if err := c.Client.Create(context.Background(), decision); err != nil { + slog.Error("scheduler: failed to create decision", "error", err, "resourceID", request.Spec.Data.InstanceUUID) + return } + slog.Info("scheduler: created new decision", "resourceID", request.Spec.Data.InstanceUUID, "eventType", eventType) } // Create a new Nova scheduler pipeline. From 3c509883648f49c363b71ef92303d11147cdee95 Mon Sep 17 00:00:00 2001 From: mblos Date: Mon, 6 Oct 2025 16:51:14 +0200 Subject: [PATCH 41/58] default changed --- decisions/internal/controller/conf.go | 5 ++++- decisions/internal/controller/test_helpers.go | 3 +-- decisions/internal/controller/ttl_controller.go | 2 +- decisions/internal/controller/ttl_controller_test.go | 6 +++--- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/decisions/internal/controller/conf.go b/decisions/internal/controller/conf.go index 3543d9ba..34524434 100644 --- a/decisions/internal/controller/conf.go +++ b/decisions/internal/controller/conf.go @@ -5,9 +5,12 @@ package controller import "time" +const ( + DefaultTTLHoursAfterDecision = 24 * time.Hour +) + // Configuration for the decisions operator. type Config struct { // TTL for scheduling decisions after the last decision's RequestedAt timestamp - // If not set, defaults to 14 days (336 hours) TTLHoursAfterDecision time.Duration `json:"ttlHoursAfterDecision,omitempty"` } diff --git a/decisions/internal/controller/test_helpers.go b/decisions/internal/controller/test_helpers.go index 86eb92f2..3c0f6be0 100644 --- a/decisions/internal/controller/test_helpers.go +++ b/decisions/internal/controller/test_helpers.go @@ -22,7 +22,6 @@ const ( DefaultTestTTL = 2 * time.Hour DefaultTestAge = 1 * time.Hour OldTestAge = 3 * time.Hour - DefaultTTLDays = 14 * 24 * time.Hour TestTolerance = 1 * time.Minute DefaultTestVCPUs = 1 DefaultTestRAM = 2048 @@ -297,7 +296,7 @@ func AssertDescriptionContains(t *testing.T, description string, expectedContent } // CreateTTLReconciler creates a TTL reconciler with the given TTL duration -// If ttl is 0, the reconciler will use its internal default (14 days) +// If ttl is 0, the reconciler will use its internal default func CreateTTLReconciler(fakeClient client.Client, scheme *runtime.Scheme, ttl time.Duration) *SchedulingDecisionTTLController { return &SchedulingDecisionTTLController{ Client: fakeClient, diff --git a/decisions/internal/controller/ttl_controller.go b/decisions/internal/controller/ttl_controller.go index 6dbe1ec9..f1260422 100644 --- a/decisions/internal/controller/ttl_controller.go +++ b/decisions/internal/controller/ttl_controller.go @@ -85,7 +85,7 @@ func (r *SchedulingDecisionTTLController) getTTL() time.Duration { if r.Conf.TTLHoursAfterDecision > 0 { return r.Conf.TTLHoursAfterDecision } - return 14 * 24 * time.Hour + return DefaultTTLHoursAfterDecision } func (r *SchedulingDecisionTTLController) SetupWithManager(mgr ctrl.Manager) error { diff --git a/decisions/internal/controller/ttl_controller_test.go b/decisions/internal/controller/ttl_controller_test.go index 592d9485..62b3ef3f 100644 --- a/decisions/internal/controller/ttl_controller_test.go +++ b/decisions/internal/controller/ttl_controller_test.go @@ -120,14 +120,14 @@ func TestTTLControllerDefaultTTL(t *testing.T) { t.Fatalf("Reconcile failed: %v", err) } - // 1-hour-old resource with 14-day default TTL should be preserved + // 1-hour-old resource with default TTL should be preserved AssertResourceExists(t, fakeClient, "default-ttl-decision") if result.RequeueAfter == 0 { t.Error("Expected requeue for resource with default TTL") } - // Verify requeue time is reasonable (approximately 13 days, 23 hours) - expectedRequeue := DefaultTTLDays - DefaultTestAge + // Verify requeue time is reasonable + expectedRequeue := DefaultTTLHoursAfterDecision - DefaultTestAge if result.RequeueAfter < expectedRequeue-TestTolerance || result.RequeueAfter > expectedRequeue+TestTolerance { t.Errorf("Requeue time %v not within expected range %v ± %v", result.RequeueAfter, expectedRequeue, TestTolerance) From 010c03ce29ab87addcac60ecd4588f20dd357de6 Mon Sep 17 00:00:00 2001 From: mblos Date: Mon, 6 Oct 2025 17:29:21 +0200 Subject: [PATCH 42/58] more generic flavor --- .../api/v1alpha1/schedulingdecision_types.go | 12 +++++----- .../api/v1alpha1/zz_generated.deepcopy.go | 10 ++++++++- .../decisions.cortex_schedulingdecisions.yaml | 21 +++++++----------- .../decisions.cortex_schedulingdecisions.yaml | 21 +++++++----------- .../decisions.cortex_schedulingdecisions.yaml | 21 +++++++----------- decisions/internal/controller/test_helpers.go | 11 ++++++---- internal/scheduler/nova/pipeline.go | 22 ++++++++++++++----- 7 files changed, 62 insertions(+), 56 deletions(-) diff --git a/decisions/api/v1alpha1/schedulingdecision_types.go b/decisions/api/v1alpha1/schedulingdecision_types.go index 6853f54c..3eb62ef3 100644 --- a/decisions/api/v1alpha1/schedulingdecision_types.go +++ b/decisions/api/v1alpha1/schedulingdecision_types.go @@ -4,6 +4,7 @@ package v1alpha1 import ( + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -28,10 +29,8 @@ type SchedulingDecisionPipelineSpec struct { } type Flavor struct { - Name string `json:"name"` - VCPUs int `json:"vcpus"` - RAM int `json:"ram"` - Disk int `json:"disk"` + Name string `json:"name"` + Resources map[string]resource.Quantity `json:"requests,omitempty"` } // SchedulingDecisionSpec defines the desired state of SchedulingDecision. @@ -47,9 +46,8 @@ type SchedulingDecisionRequest struct { Pipeline SchedulingDecisionPipelineSpec `json:"pipeline"` AvailabilityZone string `json:"availabilityZone,omitempty"` - VMware bool `json:"vmware"` - // TODO more generic flavor to support other than compute - Flavor Flavor `json:"flavor"` + + Flavor Flavor `json:"flavor,omitempty"` } type SchedulingDecisionState string diff --git a/decisions/api/v1alpha1/zz_generated.deepcopy.go b/decisions/api/v1alpha1/zz_generated.deepcopy.go index e1f81402..2b061852 100644 --- a/decisions/api/v1alpha1/zz_generated.deepcopy.go +++ b/decisions/api/v1alpha1/zz_generated.deepcopy.go @@ -8,12 +8,20 @@ package v1alpha1 import ( + "k8s.io/apimachinery/pkg/api/resource" runtime "k8s.io/apimachinery/pkg/runtime" ) // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Flavor) DeepCopyInto(out *Flavor) { *out = *in + if in.Resources != nil { + in, out := &in.Resources, &out.Resources + *out = make(map[string]resource.Quantity, len(*in)) + for key, val := range *in { + (*out)[key] = val.DeepCopy() + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Flavor. @@ -141,7 +149,7 @@ func (in *SchedulingDecisionRequest) DeepCopyInto(out *SchedulingDecisionRequest } } in.Pipeline.DeepCopyInto(&out.Pipeline) - out.Flavor = in.Flavor + in.Flavor.DeepCopyInto(&out.Flavor) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingDecisionRequest. diff --git a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml index 7b4dc19e..64061bcb 100644 --- a/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/config/crd/bases/decisions.cortex_schedulingdecisions.yaml @@ -71,19 +71,18 @@ spec: type: string flavor: properties: - disk: - type: integer name: type: string - ram: - type: integer - vcpus: - type: integer + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object required: - - disk - name - - ram - - vcpus type: object id: type: string @@ -114,15 +113,11 @@ spec: requestedAt: format: date-time type: string - vmware: - type: boolean required: - eventType - - flavor - id - pipeline - requestedAt - - vmware type: object type: array required: diff --git a/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml index 7b4dc19e..64061bcb 100644 --- a/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/config/crd/decisions.cortex_schedulingdecisions.yaml @@ -71,19 +71,18 @@ spec: type: string flavor: properties: - disk: - type: integer name: type: string - ram: - type: integer - vcpus: - type: integer + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object required: - - disk - name - - ram - - vcpus type: object id: type: string @@ -114,15 +113,11 @@ spec: requestedAt: format: date-time type: string - vmware: - type: boolean required: - eventType - - flavor - id - pipeline - requestedAt - - vmware type: object type: array required: diff --git a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml index 0709885e..2cea3946 100644 --- a/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml +++ b/decisions/dist/chart/templates/crd/decisions.cortex_schedulingdecisions.yaml @@ -77,19 +77,18 @@ spec: type: string flavor: properties: - disk: - type: integer name: type: string - ram: - type: integer - vcpus: - type: integer + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object required: - - disk - name - - ram - - vcpus type: object id: type: string @@ -120,15 +119,11 @@ spec: requestedAt: format: date-time type: string - vmware: - type: boolean required: - eventType - - flavor - id - pipeline - requestedAt - - vmware type: object type: array required: diff --git a/decisions/internal/controller/test_helpers.go b/decisions/internal/controller/test_helpers.go index 3c0f6be0..07d3951f 100644 --- a/decisions/internal/controller/test_helpers.go +++ b/decisions/internal/controller/test_helpers.go @@ -8,6 +8,7 @@ import ( "testing" "time" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" @@ -46,10 +47,12 @@ func NewTestDecision(id string) *TestDecisionBuilder { Name: "test-pipeline", }, Flavor: decisionsv1alpha1.Flavor{ - Name: "test-flavor", - VCPUs: DefaultTestVCPUs, - RAM: DefaultTestRAM, - Disk: DefaultTestDisk, + Name: "test-flavor", + Resources: map[string]resource.Quantity{ + "cpu": *resource.NewQuantity(int64(DefaultTestVCPUs), resource.DecimalSI), + "memory": *resource.NewQuantity(int64(DefaultTestRAM), resource.DecimalSI), + "storage": *resource.NewQuantity(int64(DefaultTestDisk), resource.DecimalSI), + }, }, }, } diff --git a/internal/scheduler/nova/pipeline.go b/internal/scheduler/nova/pipeline.go index 18a1ee53..2e0798fb 100644 --- a/internal/scheduler/nova/pipeline.go +++ b/internal/scheduler/nova/pipeline.go @@ -19,6 +19,7 @@ import ( "github.com/cobaltcore-dev/cortex/internal/scheduler/nova/plugins/shared" "github.com/cobaltcore-dev/cortex/internal/scheduler/nova/plugins/vmware" "github.com/cobaltcore-dev/cortex/internal/sync/openstack/nova" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -131,6 +132,20 @@ func (c *novaPipelineConsumer) Consume( ram := int(math.Min(float64(flavor.Data.MemoryMB), math.MaxInt)) disk := int(math.Min(float64(flavor.Data.RootGB), math.MaxInt)) + resources := map[string]resource.Quantity{ + "cpu": *resource.NewQuantity(int64(vcpus), resource.DecimalSI), + "memory": *resource.NewQuantity(int64(ram), resource.DecimalSI), + "storage": *resource.NewQuantity(int64(disk), resource.DecimalSI), + } + + if request.VMware { + resources["hypervisor.vmware"] = *resource.NewQuantity(1, resource.DecimalSI) + resources["hypervisor.kvm"] = *resource.NewQuantity(0, resource.DecimalSI) + } else { + resources["hypervisor.vmware"] = *resource.NewQuantity(0, resource.DecimalSI) + resources["hypervisor.kvm"] = *resource.NewQuantity(1, resource.DecimalSI) + } + decisionRequest := v1alpha1.SchedulingDecisionRequest{ ID: request.Spec.Data.InstanceUUID, RequestedAt: metav1.Now(), @@ -141,12 +156,9 @@ func (c *novaPipelineConsumer) Consume( Outputs: outputs, }, AvailabilityZone: request.Spec.Data.AvailabilityZone, - VMware: request.VMware, Flavor: v1alpha1.Flavor{ - Name: flavor.Data.Name, - VCPUs: vcpus, - RAM: ram, - Disk: disk, + Name: flavor.Data.Name, + Resources: resources, }, } From e217303134320c03cdfc8f58368b398765262e86 Mon Sep 17 00:00:00 2001 From: mblos Date: Mon, 6 Oct 2025 17:39:31 +0200 Subject: [PATCH 43/58] refactor --- decisions/internal/controller/controller.go | 30 +++------------------ 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index ba7a5235..b10e87a3 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -24,10 +24,8 @@ import ( ) const ( - // MinScoreValue represents the minimum possible score value MinScoreValue = -999999 - // String format templates for descriptions selectedPerfectFmt = "Selected: %s (score: %.2f), certainty: perfect, %d hosts evaluated." selectedCertaintyFmt = "Selected: %s (score: %.2f), certainty: %s (gap: %.2f), %d hosts evaluated." noHostsRemainingFmt = "No hosts remaining after filtering, %d hosts evaluated" @@ -36,32 +34,26 @@ const ( inputDemotedFmt = " Input favored %s (score: %.2f, now #%d with %.2f), final winner was #%d in input (%.2f→%.2f)." ) -// certaintyLevel represents a threshold and its corresponding certainty level type certaintyLevel struct { threshold float64 level string } -// certaintyLevels maps score gaps to certainty levels (ordered from highest to lowest threshold) var certaintyLevels = []certaintyLevel{ {0.5, "high"}, {0.2, "medium"}, {0.0, "low"}, } -// getCertaintyLevel returns the certainty level for a given score gap func getCertaintyLevel(gap float64) string { for _, cl := range certaintyLevels { if gap >= cl.threshold { return cl.level } } - return "low" // fallback + return "low" } -// noDeleteEventsPredicate is a custom predicate that filters out delete events -// to prevent race conditions with the TTL controller. Generic events are typically -// used for periodic reconciliation or external triggers, so we allow them. type noDeleteEventsPredicate struct{} func (noDeleteEventsPredicate) Create(e event.CreateEvent) bool { @@ -78,17 +70,15 @@ func (noDeleteEventsPredicate) Delete(e event.DeleteEvent) bool { } func (noDeleteEventsPredicate) Generic(e event.GenericEvent) bool { - // Allow generic events (periodic reconciliation, external triggers) return true } -// hostScore represents a host-score pair for sorting operations type hostScore struct { host string score float64 } -// mapToSortedHostScores converts a score map to sorted hostScore slice (highest to lowest) +// mapToSortedHostScores sorts hosts by score descending func mapToSortedHostScores(scores map[string]float64) []hostScore { sorted := make([]hostScore, 0, len(scores)) for host, score := range scores { @@ -100,11 +90,10 @@ func mapToSortedHostScores(scores map[string]float64) []hostScore { return sorted } -// findHostPosition returns the 1-based position of a host in sorted hosts slice func findHostPosition(hosts []hostScore, targetHost string) int { for i, hs := range hosts { if hs.host == targetHost { - return i + 1 // 1-based position + return i + 1 } } return -1 @@ -124,8 +113,6 @@ type SchedulingDecisionReconciler struct { // +kubebuilder:rbac:groups=decisions.cortex,resources=schedulingdecisions/status,verbs=get;update;patch // +kubebuilder:rbac:groups=decisions.cortex,resources=schedulingdecisions/finalizers,verbs=update -// Reconcile is part of the main kubernetes reconciliation loop which aims to -// move the current state of the cluster closer to the desired state. func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { _ = logf.FromContext(ctx) // Fetch the decision object. @@ -163,19 +150,14 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{}, nil } - // Calculate final scores with full pipeline for this decision finalScores, deletedHosts := r.calculateScores(decision.Input, decision.Pipeline.Outputs) - // Calculate step-by-step impact for the winner for this decision stepImpacts := r.calculateStepImpacts(decision.Input, decision.Pipeline.Outputs, finalScores) - // Find minimal critical path for this decision criticalSteps, criticalStepCount := r.findCriticalSteps(decision.Input, decision.Pipeline.Outputs, finalScores) - // Sort finalScores by score (highest to lowest) and generate enhanced description for this decision orderedScores, description := r.generateOrderedScoresAndDescription(finalScores, decision.Input, criticalSteps, criticalStepCount, len(decision.Pipeline.Outputs), stepImpacts) - // Create result for this decision result := v1alpha1.SchedulingDecisionResult{ ID: decision.ID, Description: description, @@ -185,10 +167,8 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R results = append(results, result) } - // Generate global description for multiple decisions globalDescription := r.generateGlobalDescription(results, res.Spec.Decisions) - // Update status with all results res.Status.State = v1alpha1.SchedulingDecisionStateResolved res.Status.Error = "" res.Status.DecisionCount = len(res.Spec.Decisions) @@ -202,7 +182,6 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{}, nil // No need to requeue. } -// validateInput checks if the input has at least one host func (r *SchedulingDecisionReconciler) validateInput(input map[string]float64) error { if len(input) == 0 { return fmt.Errorf("No hosts provided in input") @@ -281,8 +260,7 @@ func (r *SchedulingDecisionReconciler) calculateScores(input map[string]float64, return finalScores, deletedHosts } -// findCriticalSteps identifies which pipeline steps are essential for the final decision -// using backward elimination approach +// findCriticalSteps determines which steps change the winning host using backward elimination func (r *SchedulingDecisionReconciler) findCriticalSteps(input map[string]float64, outputs []v1alpha1.SchedulingDecisionPipelineOutputSpec, baselineFinalScores map[string]float64) ([]string, int) { if len(outputs) == 0 { return []string{}, 0 From dc28f3f7d1bc03b1ca32143d564b4e93884e6c8b Mon Sep 17 00:00:00 2001 From: mblos Date: Mon, 6 Oct 2025 17:56:44 +0200 Subject: [PATCH 44/58] logs --- decisions/internal/controller/controller.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index b10e87a3..30aadde5 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -179,6 +179,9 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{}, err } + log := logf.FromContext(ctx) + log.Info("Updated SchedulingDecision", "name", res.Name, "decisions", len(res.Spec.Decisions)) + return ctrl.Result{}, nil // No need to requeue. } @@ -205,6 +208,10 @@ func (r *SchedulingDecisionReconciler) validatePipelineHosts(input map[string]fl func (r *SchedulingDecisionReconciler) setErrorState(ctx context.Context, res *v1alpha1.SchedulingDecision, err error) error { res.Status.State = v1alpha1.SchedulingDecisionStateError res.Status.Error = err.Error() + + log := logf.FromContext(ctx) + log.Error(err, "Updated SchedulingDecision with error", "name", res.Name) + return r.Status().Update(ctx, res) } From f38f9d5d6db652d8d09a3fb03e8c0e81bcfbe71e Mon Sep 17 00:00:00 2001 From: mblos Date: Tue, 7 Oct 2025 08:48:52 +0200 Subject: [PATCH 45/58] github workflow --- .github/workflows/push-charts.yaml | 18 +++++++++++ .github/workflows/push-images.yaml | 41 +++++++++++++++++++++++++ .github/workflows/test.yaml | 34 ++++++++++++++++++++ .github/workflows/update-appversion.yml | 21 +++++++++++++ 4 files changed, 114 insertions(+) diff --git a/.github/workflows/push-charts.yaml b/.github/workflows/push-charts.yaml index 7c3a14b1..70cbd76c 100644 --- a/.github/workflows/push-charts.yaml +++ b/.github/workflows/push-charts.yaml @@ -80,3 +80,21 @@ jobs: CHART_PACKAGE=$(ls $CHART_DIR/*.tgz) helm push $CHART_PACKAGE oci://${{ env.REGISTRY }}/${{ github.repository }}/charts/ done + - name: Get all changed decisions Chart.yaml files + id: changed-chart-yaml-files-decisions + uses: tj-actions/changed-files@v47 + with: + files: | + decisions/dist/chart/Chart.yaml + - name: Push decisions charts to registry + if: steps.changed-chart-yaml-files-decisions.outputs.all_changed_files != '' + shell: bash + env: + ALL_CHANGED_FILES: ${{ steps.changed-chart-yaml-files-decisions.outputs.all_changed_files }} + run: | + for CHART_FILE in ${ALL_CHANGED_FILES}; do + CHART_DIR=$(dirname $CHART_FILE) + helm package $CHART_DIR --dependency-update --destination $CHART_DIR + CHART_PACKAGE=$(ls $CHART_DIR/*.tgz) + helm push $CHART_PACKAGE oci://${{ env.REGISTRY }}/${{ github.repository }}/charts/ + done diff --git a/.github/workflows/push-images.yaml b/.github/workflows/push-images.yaml index 43abfbe7..cd9437ba 100644 --- a/.github/workflows/push-images.yaml +++ b/.github/workflows/push-images.yaml @@ -139,3 +139,44 @@ jobs: subject-name: ${{ env.REGISTRY }}/${{ github.repository }}-reservations-operator subject-digest: ${{ steps.push_cortex_reservations.outputs.digest }} push-to-registry: true + # Only build and push the decisions operator image if there are changes + # in the decisions directory. + - name: Get all changed decisions/ files + id: changed_decisions_files + uses: tj-actions/changed-files@v47 + with: + files: | + decisions/** + - name: Docker Meta (Cortex Decisions) + if: steps.changed_decisions_files.outputs.all_changed_files != '' + id: meta_cortex_decisions + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ github.repository }}-decisions-operator + tags: | + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=sha + latest + - name: Build and Push Cortex Decisions Operator + if: steps.changed_decisions_files.outputs.all_changed_files != '' + id: push_cortex_decisions + uses: docker/build-push-action@v6 + with: + context: . + file: Dockerfile.kubebuilder + platforms: linux/amd64,linux/arm64 + push: true + tags: ${{ steps.meta_cortex_decisions.outputs.tags }} + labels: ${{ steps.meta_cortex_decisions.outputs.labels }} + build-args: | + GO_MOD_PATH=decisions + GIT_TAG=${{ github.ref_name }} + GIT_COMMIT=${{ github.sha }} + - name: Generate Artifact Attestation for Cortex Decisions + if: steps.changed_decisions_files.outputs.all_changed_files != '' + uses: actions/attest-build-provenance@v3 + with: + subject-name: ${{ env.REGISTRY }}/${{ github.repository }}-decisions-operator + subject-digest: ${{ steps.push_cortex_decisions.outputs.digest }} + push-to-registry: true diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 9888dff8..79833951 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -25,6 +25,8 @@ jobs: go test -v ./... echo "Testing reservations module..." cd reservations && go test -v ./... + echo "Testing decisions module..." + cd ../decisions && go test -v ./... test-with-docker: # We don't need to run this longer test if the previous one already failed. @@ -61,6 +63,14 @@ jobs: -coverprofile=reservations_profile.cov ./internal/... go tool cover -func reservations_profile.cov > reservations_func_coverage.txt cd .. + + echo "Running tests for decisions module..." + cd decisions + go test -v \ + -coverpkg=./internal/... \ + -coverprofile=decisions_profile.cov ./internal/... + go tool cover -func decisions_profile.cov > decisions_func_coverage.txt + cd .. - name: Upload coverage files uses: actions/upload-artifact@v4 with: @@ -68,6 +78,7 @@ jobs: path: | pr_func_coverage.txt reservations/reservations_func_coverage.txt + decisions/decisions_func_coverage.txt # Steps below are only executed if the workflow is triggered by a pull request - name: Delete old coverage comments (PR only) if: ${{ github.event_name == 'pull_request' }} @@ -123,6 +134,19 @@ jobs: reservationsCoverageReport = 'No coverage data available'; } + // Read decisions module coverage report + let decisionsCoverageReport = ''; + let decisionsCoveragePercentage = 'unknown'; + try { + decisionsCoverageReport = fs.readFileSync('decisions/decisions_func_coverage.txt', 'utf8'); + const decisionsLines = decisionsCoverageReport.trim().split('\n'); + const decisionsLastLine = decisionsLines[decisionsLines.length - 1]; + const decisionsCoverageMatch = decisionsLastLine.match(/total:\s+\(statements\)\s+(\d+\.\d+)%/); + decisionsCoveragePercentage = decisionsCoverageMatch ? decisionsCoverageMatch[1] : 'unknown'; + } catch (error) { + decisionsCoverageReport = 'No coverage data available'; + } + let commentBody = '\n'; commentBody += '## Test Coverage Report\n\n'; @@ -144,6 +168,16 @@ jobs: commentBody += '```text\n'; commentBody += reservationsCoverageReport; commentBody += '```\n'; + commentBody += '\n\n'; + + // Decisions module coverage + commentBody += '
\n'; + commentBody += 'Coverage in decisions module (decisions/internal/): '; + commentBody += decisionsCoveragePercentage; + commentBody += '%\n\n'; + commentBody += '```text\n'; + commentBody += decisionsCoverageReport; + commentBody += '```\n'; commentBody += '
\n'; // Post the comment diff --git a/.github/workflows/update-appversion.yml b/.github/workflows/update-appversion.yml index f11e8980..dd4c3c51 100644 --- a/.github/workflows/update-appversion.yml +++ b/.github/workflows/update-appversion.yml @@ -30,6 +30,13 @@ jobs: files: | postgres/** + - name: Get all changed decisions/ files + id: changed_decisions_files + uses: tj-actions/changed-files@v47 + with: + files: | + decisions/** + # Always bumped - name: Update appVersion in cortex-core Chart.yaml run: | @@ -69,3 +76,17 @@ jobs: git add reservations/dist/chart/Chart.yaml git commit -m "Bump cortex-reservations chart appVersions to ${{ steps.vars.outputs.sha }} [skip ci]" || echo "No changes to commit" git push origin HEAD:main + + # Only bumped if there are changes in the decisions directory. + - name: Update appVersion in cortex-decisions Chart.yaml + if: steps.changed_decisions_files.outputs.all_changed_files != '' + run: | + sed -i 's/^\([ ]*appVersion:[ ]*\).*/\1"${{ steps.vars.outputs.sha }}"/' decisions/dist/chart/Chart.yaml + - name: Commit and push changes for cortex-decisions + if: steps.changed_decisions_files.outputs.all_changed_files != '' + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add decisions/dist/chart/Chart.yaml + git commit -m "Bump cortex-decisions chart appVersions to ${{ steps.vars.outputs.sha }} [skip ci]" || echo "No changes to commit" + git push origin HEAD:main From 5ceebd3aabe036694ad9b358086603ba298ef939 Mon Sep 17 00:00:00 2001 From: mblos Date: Tue, 7 Oct 2025 09:35:51 +0200 Subject: [PATCH 46/58] tests --- .../internal/controller/controller_test.go | 330 ++++++++++++++++++ 1 file changed, 330 insertions(+) diff --git a/decisions/internal/controller/controller_test.go b/decisions/internal/controller/controller_test.go index c3fc348e..9b20cad4 100644 --- a/decisions/internal/controller/controller_test.go +++ b/decisions/internal/controller/controller_test.go @@ -4,6 +4,7 @@ package controller import ( + "fmt" "testing" "time" @@ -758,3 +759,332 @@ func TestReconcileGlobalDescription(t *testing.T) { }) } } + +// TestReconcileEmptyDecisionsList tests the case where no decisions are provided +func TestReconcileEmptyDecisionsList(t *testing.T) { + resource := NewTestSchedulingDecision("test-empty-decisions"). + WithDecisions(). // No decisions provided + Build() + + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-empty-decisions") + + reconciler := CreateSchedulingReconciler(fakeClient) + _, err := reconciler.Reconcile(t.Context(), req) + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Fetch and verify the updated resource + updatedResource := AssertResourceExists(t, fakeClient, "test-empty-decisions") + AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateError) + AssertResourceError(t, updatedResource, "No decisions provided in spec") + + t.Logf("Empty decisions test completed: state=%s, error=%s", updatedResource.Status.State, updatedResource.Status.Error) +} + +// TestReconcileResourceNotFound tests the case where the resource is deleted during reconciliation +func TestReconcileResourceNotFound(t *testing.T) { + fakeClient, _ := SetupTestEnvironment(t) // No resource created + req := CreateTestRequest("non-existent-resource") + + reconciler := CreateSchedulingReconciler(fakeClient) + _, err := reconciler.Reconcile(t.Context(), req) + + // Should return an error when resource is not found + if err == nil { + t.Fatalf("Expected error when resource not found, got nil") + } + + t.Logf("Resource not found test completed: error=%v", err) +} + +// TestUtilityFunctions tests the standalone utility functions +func TestUtilityFunctions(t *testing.T) { + t.Run("findWinner", func(t *testing.T) { + tests := []struct { + name string + scores map[string]float64 + expectedWinner string + expectedScore float64 + }{ + { + name: "empty-map", + scores: map[string]float64{}, + expectedWinner: "", + expectedScore: MinScoreValue, + }, + { + name: "single-host", + scores: map[string]float64{"host1": 5.0}, + expectedWinner: "host1", + expectedScore: 5.0, + }, + { + name: "clear-winner", + scores: map[string]float64{"host1": 3.0, "host2": 1.0, "host3": 2.0}, + expectedWinner: "host1", + expectedScore: 3.0, + }, + { + name: "tied-scores", + scores: map[string]float64{"host1": 2.0, "host2": 2.0}, + expectedWinner: "", // Don't check specific winner for tied scores (map iteration order is not deterministic) + expectedScore: 2.0, + }, + { + name: "negative-scores", + scores: map[string]float64{"host1": -1.0, "host2": -2.0}, + expectedWinner: "host1", + expectedScore: -1.0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + winner, score := findWinner(tt.scores) + if tt.expectedWinner != "" && winner != tt.expectedWinner { + t.Errorf("Expected winner '%s', got '%s'", tt.expectedWinner, winner) + } + if score != tt.expectedScore { + t.Errorf("Expected score %f, got %f", tt.expectedScore, score) + } + // For tied scores, just verify we got one of the tied hosts + if tt.name == "tied-scores" { + if winner != "host1" && winner != "host2" { + t.Errorf("Expected winner to be either 'host1' or 'host2', got '%s'", winner) + } + } + }) + } + }) + + t.Run("mapToSortedHostScores", func(t *testing.T) { + scores := map[string]float64{ + "host1": 1.0, + "host2": 3.0, + "host3": 2.0, + } + sorted := mapToSortedHostScores(scores) + + if len(sorted) != 3 { + t.Errorf("Expected 3 sorted hosts, got %d", len(sorted)) + } + + // Should be sorted by score descending + if sorted[0].host != "host2" || sorted[0].score != 3.0 { + t.Errorf("Expected first host to be host2 with score 3.0, got %s with %f", sorted[0].host, sorted[0].score) + } + if sorted[1].host != "host3" || sorted[1].score != 2.0 { + t.Errorf("Expected second host to be host3 with score 2.0, got %s with %f", sorted[1].host, sorted[1].score) + } + if sorted[2].host != "host1" || sorted[2].score != 1.0 { + t.Errorf("Expected third host to be host1 with score 1.0, got %s with %f", sorted[2].host, sorted[2].score) + } + }) + + t.Run("findHostPosition", func(t *testing.T) { + hosts := []hostScore{ + {host: "host2", score: 3.0}, + {host: "host3", score: 2.0}, + {host: "host1", score: 1.0}, + } + + tests := []struct { + targetHost string + expectedPosition int + }{ + {"host2", 1}, // First position + {"host3", 2}, // Second position + {"host1", 3}, // Third position + {"host4", -1}, // Not found + } + + for _, tt := range tests { + position := findHostPosition(hosts, tt.targetHost) + if position != tt.expectedPosition { + t.Errorf("Expected position %d for host %s, got %d", tt.expectedPosition, tt.targetHost, position) + } + } + }) + + t.Run("getCertaintyLevel", func(t *testing.T) { + tests := []struct { + gap float64 + expectedCertainty string + }{ + {1.0, "high"}, // >= 0.5 + {0.5, "high"}, // exactly 0.5 + {0.3, "medium"}, // >= 0.2, < 0.5 + {0.2, "medium"}, // exactly 0.2 + {0.1, "low"}, // >= 0.0, < 0.2 + {0.0, "low"}, // exactly 0.0 + {-0.1, "low"}, // < 0.0 + } + + for _, tt := range tests { + certainty := getCertaintyLevel(tt.gap) + if certainty != tt.expectedCertainty { + t.Errorf("Expected certainty '%s' for gap %f, got '%s'", tt.expectedCertainty, tt.gap, certainty) + } + } + }) +} + +// TestStepImpactAnalysis tests the step impact calculation logic +func TestStepImpactAnalysis(t *testing.T) { + reconciler := &SchedulingDecisionReconciler{} + + t.Run("promotion-scenarios", func(t *testing.T) { + input := map[string]float64{ + "host1": 1.0, // Will become winner + "host2": 3.0, // Initial winner + "host3": 2.0, + } + + outputs := []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "promotion-step", + Activations: map[string]float64{ + "host1": 2.5, // host1: 3.5 (becomes winner) + "host2": -0.5, // host2: 2.5 (demoted) + "host3": 0.0, // host3: 2.0 + }, + }, + } + + finalScores := map[string]float64{ + "host1": 3.5, + "host2": 2.5, + "host3": 2.0, + } + + impacts := reconciler.calculateStepImpacts(input, outputs, finalScores) + + if len(impacts) != 1 { + t.Fatalf("Expected 1 step impact, got %d", len(impacts)) + } + + impact := impacts[0] + if impact.Step != "promotion-step" { + t.Errorf("Expected step 'promotion-step', got '%s'", impact.Step) + } + if !impact.PromotedToFirst { + t.Errorf("Expected PromotedToFirst to be true") + } + if impact.ScoreDelta != 2.5 { + t.Errorf("Expected ScoreDelta 2.5, got %f", impact.ScoreDelta) + } + if impact.CompetitorsRemoved != 0 { + t.Errorf("Expected CompetitorsRemoved 0, got %d", impact.CompetitorsRemoved) + } + }) + + t.Run("competitor-removal", func(t *testing.T) { + input := map[string]float64{ + "host1": 1.0, // Will become winner after competitors removed + "host2": 3.0, // Initial winner, will be removed + "host3": 2.0, // Will be removed + } + + outputs := []v1alpha1.SchedulingDecisionPipelineOutputSpec{ + { + Step: "filter-step", + Activations: map[string]float64{ + "host1": 0.0, // Only host1 survives + }, + }, + } + + finalScores := map[string]float64{ + "host1": 1.0, + } + + impacts := reconciler.calculateStepImpacts(input, outputs, finalScores) + + if len(impacts) != 1 { + t.Fatalf("Expected 1 step impact, got %d", len(impacts)) + } + + impact := impacts[0] + if impact.CompetitorsRemoved != 2 { + t.Errorf("Expected CompetitorsRemoved 2, got %d", impact.CompetitorsRemoved) + } + if !impact.PromotedToFirst { + t.Errorf("Expected PromotedToFirst to be true (host1 was not #1 before, became #1 after competitors removed)") + } + if impact.ScoreDelta != 0.0 { + t.Errorf("Expected ScoreDelta 0.0, got %f", impact.ScoreDelta) + } + }) + + t.Run("empty-inputs", func(t *testing.T) { + // Test with empty final scores + impacts := reconciler.calculateStepImpacts(map[string]float64{}, []v1alpha1.SchedulingDecisionPipelineOutputSpec{}, map[string]float64{}) + if len(impacts) != 0 { + t.Errorf("Expected 0 impacts for empty inputs, got %d", len(impacts)) + } + + // Test with no outputs + impacts = reconciler.calculateStepImpacts(map[string]float64{"host1": 1.0}, []v1alpha1.SchedulingDecisionPipelineOutputSpec{}, map[string]float64{"host1": 1.0}) + if len(impacts) != 0 { + t.Errorf("Expected 0 impacts for no outputs, got %d", len(impacts)) + } + }) +} + +// TestLargeDatasetPerformance tests the controller with larger datasets +func TestLargeDatasetPerformance(t *testing.T) { + // Create a decision with many hosts + input := make(map[string]float64) + activations := make(map[string]float64) + + for i := 0; i < 100; i++ { + hostName := fmt.Sprintf("host%d", i) + input[hostName] = float64(i) + activations[hostName] = float64(i % 10) // Vary activations + } + + decision := NewTestDecision("large-decision"). + WithInput(input). + WithPipelineOutputs( + NewTestPipelineOutput("weigher1", activations), + NewTestPipelineOutput("weigher2", activations), + NewTestPipelineOutput("weigher3", activations), + ). + Build() + + resource := NewTestSchedulingDecision("test-large-dataset"). + WithDecisions(decision). + Build() + + fakeClient, _ := SetupTestEnvironment(t, resource) + req := CreateTestRequest("test-large-dataset") + + reconciler := CreateSchedulingReconciler(fakeClient) + + start := time.Now() + _, err := reconciler.Reconcile(t.Context(), req) + duration := time.Since(start) + + if err != nil { + t.Fatalf("Reconcile returned an error: %v", err) + } + + // Verify the result + updatedResource := AssertResourceExists(t, fakeClient, "test-large-dataset") + AssertResourceState(t, updatedResource, v1alpha1.SchedulingDecisionStateResolved) + AssertResultCount(t, updatedResource, 1) + + result := updatedResource.Status.Results[0] + if len(result.FinalScores) != 100 { + t.Errorf("Expected 100 final scores, got %d", len(result.FinalScores)) + } + + t.Logf("Large dataset test completed in %v with %d hosts", duration, len(result.FinalScores)) + + // Performance check - should complete within reasonable time + if duration > 5*time.Second { + t.Errorf("Large dataset processing took too long: %v", duration) + } +} From 126b72ec92cdb03642fb92725a411dbda5ea1958 Mon Sep 17 00:00:00 2001 From: mblos Date: Tue, 7 Oct 2025 10:13:23 +0200 Subject: [PATCH 47/58] config naming --- decisions/dist/chart/values.yaml | 4 ++++ decisions/internal/controller/conf.go | 4 ++-- decisions/internal/controller/test_helpers.go | 2 +- decisions/internal/controller/ttl_controller.go | 6 +++--- decisions/internal/controller/ttl_controller_test.go | 2 +- 5 files changed, 11 insertions(+), 7 deletions(-) diff --git a/decisions/dist/chart/values.yaml b/decisions/dist/chart/values.yaml index df67c7d0..7176d3e7 100644 --- a/decisions/dist/chart/values.yaml +++ b/decisions/dist/chart/values.yaml @@ -116,6 +116,10 @@ decisions: endpoints: # The URL of the Nova external scheduler service. novaExternalScheduler: "http://cortex-nova-scheduler:8080/scheduler/nova/external" + # TTL for scheduling decisions after the last decision's RequestedAt timestamp + # Accepts Go duration strings like "24h", "48h", "72h", etc. + # If not specified, defaults to 24 hours + ttlAfterDecision: "24h" # Config provided here will override the config provided above. secrets: # Override the endpoints and credentials to your OpenStack. diff --git a/decisions/internal/controller/conf.go b/decisions/internal/controller/conf.go index 34524434..6c610eea 100644 --- a/decisions/internal/controller/conf.go +++ b/decisions/internal/controller/conf.go @@ -6,11 +6,11 @@ package controller import "time" const ( - DefaultTTLHoursAfterDecision = 24 * time.Hour + DefaultTTLAfterDecision = 24 * time.Hour ) // Configuration for the decisions operator. type Config struct { // TTL for scheduling decisions after the last decision's RequestedAt timestamp - TTLHoursAfterDecision time.Duration `json:"ttlHoursAfterDecision,omitempty"` + TTLAfterDecision time.Duration `json:"ttlAfterDecision,omitempty"` } diff --git a/decisions/internal/controller/test_helpers.go b/decisions/internal/controller/test_helpers.go index 07d3951f..5ebae177 100644 --- a/decisions/internal/controller/test_helpers.go +++ b/decisions/internal/controller/test_helpers.go @@ -305,7 +305,7 @@ func CreateTTLReconciler(fakeClient client.Client, scheme *runtime.Scheme, ttl t Client: fakeClient, Scheme: scheme, Conf: Config{ - TTLHoursAfterDecision: ttl, + TTLAfterDecision: ttl, }, } } diff --git a/decisions/internal/controller/ttl_controller.go b/decisions/internal/controller/ttl_controller.go index f1260422..8e88aaa1 100644 --- a/decisions/internal/controller/ttl_controller.go +++ b/decisions/internal/controller/ttl_controller.go @@ -82,10 +82,10 @@ func (r *SchedulingDecisionTTLController) Reconcile(ctx context.Context, req ctr } func (r *SchedulingDecisionTTLController) getTTL() time.Duration { - if r.Conf.TTLHoursAfterDecision > 0 { - return r.Conf.TTLHoursAfterDecision + if r.Conf.TTLAfterDecision > 0 { + return r.Conf.TTLAfterDecision } - return DefaultTTLHoursAfterDecision + return DefaultTTLAfterDecision } func (r *SchedulingDecisionTTLController) SetupWithManager(mgr ctrl.Manager) error { diff --git a/decisions/internal/controller/ttl_controller_test.go b/decisions/internal/controller/ttl_controller_test.go index 62b3ef3f..fc35c1b8 100644 --- a/decisions/internal/controller/ttl_controller_test.go +++ b/decisions/internal/controller/ttl_controller_test.go @@ -127,7 +127,7 @@ func TestTTLControllerDefaultTTL(t *testing.T) { } // Verify requeue time is reasonable - expectedRequeue := DefaultTTLHoursAfterDecision - DefaultTestAge + expectedRequeue := DefaultTTLAfterDecision - DefaultTestAge if result.RequeueAfter < expectedRequeue-TestTolerance || result.RequeueAfter > expectedRequeue+TestTolerance { t.Errorf("Requeue time %v not within expected range %v ± %v", result.RequeueAfter, expectedRequeue, TestTolerance) From f7465f5debaf82a38ae0a470ea0a8ed0cf1cf0b9 Mon Sep 17 00:00:00 2001 From: mblos Date: Tue, 7 Oct 2025 11:11:36 +0200 Subject: [PATCH 48/58] fix config --- decisions/dist/chart/values.yaml | 8 ++++---- decisions/internal/controller/conf.go | 8 +++----- decisions/internal/controller/test_helpers.go | 5 +++-- decisions/internal/controller/ttl_controller.go | 16 +++++++++++++--- .../internal/controller/ttl_controller_test.go | 2 +- 5 files changed, 24 insertions(+), 15 deletions(-) diff --git a/decisions/dist/chart/values.yaml b/decisions/dist/chart/values.yaml index 7176d3e7..d8ab60db 100644 --- a/decisions/dist/chart/values.yaml +++ b/decisions/dist/chart/values.yaml @@ -116,10 +116,10 @@ decisions: endpoints: # The URL of the Nova external scheduler service. novaExternalScheduler: "http://cortex-nova-scheduler:8080/scheduler/nova/external" - # TTL for scheduling decisions after the last decision's RequestedAt timestamp - # Accepts Go duration strings like "24h", "48h", "72h", etc. - # If not specified, defaults to 24 hours - ttlAfterDecision: "24h" + # TTL for scheduling decisions after the last decision's RequestedAt timestamp (in seconds) + # Examples: 86400 (24 hours), 172800 (48 hours), 259200 (72 hours) + # If not specified, defaults to 86400 seconds (24 hours) + ttlAfterDecisionSeconds: 86400 # Config provided here will override the config provided above. secrets: # Override the endpoints and credentials to your OpenStack. diff --git a/decisions/internal/controller/conf.go b/decisions/internal/controller/conf.go index 6c610eea..5a329b25 100644 --- a/decisions/internal/controller/conf.go +++ b/decisions/internal/controller/conf.go @@ -3,14 +3,12 @@ package controller -import "time" - const ( - DefaultTTLAfterDecision = 24 * time.Hour + DefaultTTLAfterDecisionSeconds = 24 * 60 * 60 // 24 hours in seconds ) // Configuration for the decisions operator. type Config struct { - // TTL for scheduling decisions after the last decision's RequestedAt timestamp - TTLAfterDecision time.Duration `json:"ttlAfterDecision,omitempty"` + // TTL for scheduling decisions after the last decision's RequestedAt timestamp (in seconds) + TTLAfterDecisionSeconds int `json:"ttlAfterDecisionSeconds,omitempty"` } diff --git a/decisions/internal/controller/test_helpers.go b/decisions/internal/controller/test_helpers.go index 5ebae177..c61ef73a 100644 --- a/decisions/internal/controller/test_helpers.go +++ b/decisions/internal/controller/test_helpers.go @@ -299,13 +299,14 @@ func AssertDescriptionContains(t *testing.T, description string, expectedContent } // CreateTTLReconciler creates a TTL reconciler with the given TTL duration -// If ttl is 0, the reconciler will use its internal default +// If ttlSeconds is 0, the reconciler will use its internal default func CreateTTLReconciler(fakeClient client.Client, scheme *runtime.Scheme, ttl time.Duration) *SchedulingDecisionTTLController { + ttlSeconds := int(ttl.Seconds()) return &SchedulingDecisionTTLController{ Client: fakeClient, Scheme: scheme, Conf: Config{ - TTLAfterDecision: ttl, + TTLAfterDecisionSeconds: ttlSeconds, }, } } diff --git a/decisions/internal/controller/ttl_controller.go b/decisions/internal/controller/ttl_controller.go index 8e88aaa1..f1faf4eb 100644 --- a/decisions/internal/controller/ttl_controller.go +++ b/decisions/internal/controller/ttl_controller.go @@ -82,13 +82,23 @@ func (r *SchedulingDecisionTTLController) Reconcile(ctx context.Context, req ctr } func (r *SchedulingDecisionTTLController) getTTL() time.Duration { - if r.Conf.TTLAfterDecision > 0 { - return r.Conf.TTLAfterDecision + if r.Conf.TTLAfterDecisionSeconds > 0 { + return time.Duration(r.Conf.TTLAfterDecisionSeconds) * time.Second } - return DefaultTTLAfterDecision + return time.Duration(DefaultTTLAfterDecisionSeconds) * time.Second } func (r *SchedulingDecisionTTLController) SetupWithManager(mgr ctrl.Manager) error { + log := mgr.GetLogger().WithName("ttl-controller") + + // Log the TTL configuration on startup + ttl := r.getTTL() + seconds := r.Conf.TTLAfterDecisionSeconds + if seconds == 0 { + seconds = DefaultTTLAfterDecisionSeconds + } + log.Info("TTL Controller configured", "ttlAfterDecisionSeconds", seconds, "ttlAfterDecision", ttl.String()) + return ctrl.NewControllerManagedBy(mgr). For(&decisionsv1alpha1.SchedulingDecision{}). Named("schedulingdecision-ttl"). diff --git a/decisions/internal/controller/ttl_controller_test.go b/decisions/internal/controller/ttl_controller_test.go index fc35c1b8..8fa19f91 100644 --- a/decisions/internal/controller/ttl_controller_test.go +++ b/decisions/internal/controller/ttl_controller_test.go @@ -127,7 +127,7 @@ func TestTTLControllerDefaultTTL(t *testing.T) { } // Verify requeue time is reasonable - expectedRequeue := DefaultTTLAfterDecision - DefaultTestAge + expectedRequeue := time.Duration(DefaultTTLAfterDecisionSeconds)*time.Second - DefaultTestAge if result.RequeueAfter < expectedRequeue-TestTolerance || result.RequeueAfter > expectedRequeue+TestTolerance { t.Errorf("Requeue time %v not within expected range %v ± %v", result.RequeueAfter, expectedRequeue, TestTolerance) From bf7030238c077a9b23fa4e1177488161a7c509ea Mon Sep 17 00:00:00 2001 From: mblos Date: Tue, 7 Oct 2025 11:28:55 +0200 Subject: [PATCH 49/58] reconcile issue --- decisions/internal/controller/controller.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index 30aadde5..5554e8e5 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -118,8 +118,10 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R // Fetch the decision object. var res v1alpha1.SchedulingDecision if err := r.Get(ctx, req.NamespacedName, &res); err != nil { - // Can happen when the resource was just deleted. - return ctrl.Result{}, err + // Resource was deleted or doesn't exist - nothing to process + // This can happen when the TTL controller deletes a resource while + // a reconcile request is still queued for the main controller + return ctrl.Result{}, client.IgnoreNotFound(err) } // Validate we have at least one decision From cc3be3a294417914e2fdd4e7759377bcf6c78977 Mon Sep 17 00:00:00 2001 From: mblos Date: Tue, 7 Oct 2025 11:34:49 +0200 Subject: [PATCH 50/58] reconcile issue --- decisions/internal/controller/controller.go | 14 +++++++++++++- decisions/internal/controller/controller_test.go | 9 +++++---- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index 5554e8e5..b299507c 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -11,6 +11,7 @@ import ( "strings" "time" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -178,7 +179,18 @@ func (r *SchedulingDecisionReconciler) Reconcile(ctx context.Context, req ctrl.R res.Status.Results = results if err := r.Status().Update(ctx, &res); err != nil { - return ctrl.Result{}, err + // Handle the case where resource was deleted during processing + if client.IgnoreNotFound(err) != nil { + // If it's a conflict error, just log and ignore - resource was modified concurrently + if apierrors.IsConflict(err) { + log := logf.FromContext(ctx) + log.Info("Resource was modified during processing, ignoring conflict", "name", res.Name, "error", err.Error()) + return ctrl.Result{}, nil + } + return ctrl.Result{}, err + } + // Resource was deleted (e.g., by TTL controller), nothing to update + return ctrl.Result{}, nil } log := logf.FromContext(ctx) diff --git a/decisions/internal/controller/controller_test.go b/decisions/internal/controller/controller_test.go index 9b20cad4..17f8d788 100644 --- a/decisions/internal/controller/controller_test.go +++ b/decisions/internal/controller/controller_test.go @@ -791,12 +791,13 @@ func TestReconcileResourceNotFound(t *testing.T) { reconciler := CreateSchedulingReconciler(fakeClient) _, err := reconciler.Reconcile(t.Context(), req) - // Should return an error when resource is not found - if err == nil { - t.Fatalf("Expected error when resource not found, got nil") + // Should gracefully handle when resource is not found (no error) + // This can happen when TTL controller deletes a resource while main controller has queued reconcile request + if err != nil { + t.Fatalf("Expected no error when resource not found (should be handled gracefully), got: %v", err) } - t.Logf("Resource not found test completed: error=%v", err) + t.Logf("Resource not found test completed: gracefully handled with no error") } // TestUtilityFunctions tests the standalone utility functions From 2fd6e8c8d42a8e2e5cbcc44b3dddbd0a2a364700 Mon Sep 17 00:00:00 2001 From: mblos Date: Tue, 7 Oct 2025 11:44:49 +0200 Subject: [PATCH 51/58] adding ttl on startup --- .../internal/controller/ttl_controller.go | 68 +++++++++++++++++-- .../controller/ttl_controller_test.go | 62 +++++++++++++++++ 2 files changed, 125 insertions(+), 5 deletions(-) diff --git a/decisions/internal/controller/ttl_controller.go b/decisions/internal/controller/ttl_controller.go index f1faf4eb..1dc3b942 100644 --- a/decisions/internal/controller/ttl_controller.go +++ b/decisions/internal/controller/ttl_controller.go @@ -7,6 +7,7 @@ import ( "context" "time" + "github.com/go-logr/logr" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -17,6 +18,20 @@ import ( decisionsv1alpha1 "github.com/cobaltcore-dev/cortex/decisions/api/v1alpha1" ) +// TTLStartupReconciler handles startup reconciliation for existing resources +type TTLStartupReconciler struct { + ttlController *SchedulingDecisionTTLController +} + +// Start implements the Runnable interface and runs startup reconciliation +func (s *TTLStartupReconciler) Start(ctx context.Context) error { + log := logf.FromContext(ctx).WithName("ttl-startup-reconciler") + log.Info("Starting TTL startup reconciliation for existing resources") + + s.ttlController.reconcileAllResourcesOnStartup(ctx) + return nil +} + // SchedulingDecisionTTLController handles automatic cleanup of resolved SchedulingDecision resources // after a configurable TTL period. type SchedulingDecisionTTLController struct { @@ -41,6 +56,18 @@ func (r *SchedulingDecisionTTLController) Reconcile(ctx context.Context, req ctr return ctrl.Result{}, client.IgnoreNotFound(err) } + return r.processResourceForTTL(ctx, &decision, log) +} + +func (r *SchedulingDecisionTTLController) getTTL() time.Duration { + if r.Conf.TTLAfterDecisionSeconds > 0 { + return time.Duration(r.Conf.TTLAfterDecisionSeconds) * time.Second + } + return time.Duration(DefaultTTLAfterDecisionSeconds) * time.Second +} + +// processResourceForTTL handles the common TTL logic for a single resource +func (r *SchedulingDecisionTTLController) processResourceForTTL(ctx context.Context, decision *decisionsv1alpha1.SchedulingDecision, log logr.Logger) (ctrl.Result, error) { // Calculate age based on last decision's RequestedAt timestamp var referenceTime time.Time if len(decision.Spec.Decisions) > 0 { @@ -62,7 +89,7 @@ func (r *SchedulingDecisionTTLController) Reconcile(ctx context.Context, req ctr "age", age.String(), "ttl", ttl.String()) - if err := r.Delete(ctx, &decision); err != nil { + if err := r.Delete(ctx, decision); err != nil { if client.IgnoreNotFound(err) != nil { log.Error(err, "Failed to delete expired SchedulingDecision", "name", decision.Name) return ctrl.Result{}, err @@ -81,11 +108,37 @@ func (r *SchedulingDecisionTTLController) Reconcile(ctx context.Context, req ctr return ctrl.Result{RequeueAfter: remainingTime}, nil } -func (r *SchedulingDecisionTTLController) getTTL() time.Duration { - if r.Conf.TTLAfterDecisionSeconds > 0 { - return time.Duration(r.Conf.TTLAfterDecisionSeconds) * time.Second +// reconcileAllResourcesOnStartup processes all existing SchedulingDecision resources +// to check for expired ones that should be cleaned up after controller restart +func (r *SchedulingDecisionTTLController) reconcileAllResourcesOnStartup(ctx context.Context) { + log := logf.FromContext(ctx).WithName("ttl-startup-reconciler") + + var resources decisionsv1alpha1.SchedulingDecisionList + if err := r.List(ctx, &resources); err != nil { + log.Error(err, "Failed to list SchedulingDecision resources during startup reconciliation") + return } - return time.Duration(DefaultTTLAfterDecisionSeconds) * time.Second + + log.Info("Processing existing resources for TTL cleanup", "resourceCount", len(resources.Items)) + + processedCount := 0 + expiredCount := 0 + + for _, resource := range resources.Items { + // Use the shared TTL processing logic + result, err := r.processResourceForTTL(ctx, &resource, log) + if err != nil { + log.Error(err, "Failed to process resource during startup reconciliation", "name", resource.Name) + } else if result.RequeueAfter == 0 { + // Resource was deleted (no requeue means it was expired and deleted) + expiredCount++ + } + processedCount++ + } + + log.Info("Startup TTL reconciliation completed", + "processedResources", processedCount, + "expiredResources", expiredCount) } func (r *SchedulingDecisionTTLController) SetupWithManager(mgr ctrl.Manager) error { @@ -99,6 +152,11 @@ func (r *SchedulingDecisionTTLController) SetupWithManager(mgr ctrl.Manager) err } log.Info("TTL Controller configured", "ttlAfterDecisionSeconds", seconds, "ttlAfterDecision", ttl.String()) + // Add the startup reconciler as a runnable + if err := mgr.Add(&TTLStartupReconciler{ttlController: r}); err != nil { + return err + } + return ctrl.NewControllerManagedBy(mgr). For(&decisionsv1alpha1.SchedulingDecision{}). Named("schedulingdecision-ttl"). diff --git a/decisions/internal/controller/ttl_controller_test.go b/decisions/internal/controller/ttl_controller_test.go index 8fa19f91..f4945625 100644 --- a/decisions/internal/controller/ttl_controller_test.go +++ b/decisions/internal/controller/ttl_controller_test.go @@ -148,3 +148,65 @@ func TestTTLControllerNonExistentResource(t *testing.T) { t.Error("Expected no requeue for non-existent resource") } } + +func TestTTLStartupReconciliation(t *testing.T) { + // Create resources with different ages + expiredDecision := NewTestDecision("expired-decision"). + WithRequestedAt(time.Now().Add(-OldTestAge)). + Build() + + youngDecision := NewTestDecision("young-decision"). + WithRequestedAt(time.Now().Add(-DefaultTestAge)). + Build() + + expiredResource := NewTestSchedulingDecision("expired-resource"). + WithDecisions(expiredDecision). + Build() + + youngResource := NewTestSchedulingDecision("young-resource"). + WithDecisions(youngDecision). + Build() + + fakeClient, scheme := SetupTestEnvironment(t, expiredResource, youngResource) + reconciler := CreateTTLReconciler(fakeClient, scheme, DefaultTestTTL) + + // Run startup reconciliation + reconciler.reconcileAllResourcesOnStartup(context.Background()) + + // Verify expired resource was deleted + AssertResourceDeleted(t, fakeClient, "expired-resource") + + // Verify young resource still exists + AssertResourceExists(t, fakeClient, "young-resource") +} + +func TestTTLStartupReconcilerRunnable(t *testing.T) { + fakeClient, scheme := SetupTestEnvironment(t) + reconciler := CreateTTLReconciler(fakeClient, scheme, DefaultTestTTL) + + // Create the startup reconciler + startupReconciler := &TTLStartupReconciler{ttlController: reconciler} + + // Test the Start method + err := startupReconciler.Start(context.Background()) + if err != nil { + t.Fatalf("TTLStartupReconciler.Start() should not return error: %v", err) + } + + // The method should complete without error (no resources to process) + t.Log("TTLStartupReconciler.Start() completed successfully") +} + +func TestTTLStartupReconciliationErrorHandling(t *testing.T) { + // This test verifies that startup reconciliation handles errors gracefully + // We can't easily simulate List() failures with the fake client, but we can + // test that the method doesn't panic and handles empty results properly + + fakeClient, scheme := SetupTestEnvironment(t) // No resources + reconciler := CreateTTLReconciler(fakeClient, scheme, DefaultTestTTL) + + // This should complete without error even with no resources + reconciler.reconcileAllResourcesOnStartup(context.Background()) + + t.Log("Startup reconciliation handled empty resource list gracefully") +} From 6c7826d8e50d367b6e6c00fc9917937569091964 Mon Sep 17 00:00:00 2001 From: mblos Date: Tue, 7 Oct 2025 13:23:26 +0200 Subject: [PATCH 52/58] fix id --- internal/scheduler/nova/pipeline.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/scheduler/nova/pipeline.go b/internal/scheduler/nova/pipeline.go index 2e0798fb..d2a9f11b 100644 --- a/internal/scheduler/nova/pipeline.go +++ b/internal/scheduler/nova/pipeline.go @@ -147,7 +147,7 @@ func (c *novaPipelineConsumer) Consume( } decisionRequest := v1alpha1.SchedulingDecisionRequest{ - ID: request.Spec.Data.InstanceUUID, + ID: *request.Context.GlobalRequestID, RequestedAt: metav1.Now(), EventType: eventType, Input: inWeights, From 1cce015eda5458d9bab9c4ff4a33a03a3e9255b7 Mon Sep 17 00:00:00 2001 From: mblos Date: Tue, 7 Oct 2025 13:56:48 +0200 Subject: [PATCH 53/58] always print global summary --- decisions/dist/chart/values.yaml | 2 -- decisions/internal/controller/controller.go | 9 +++++---- decisions/internal/controller/controller_test.go | 8 ++++---- decisions/internal/controller/ttl_controller.go | 8 +------- 4 files changed, 10 insertions(+), 17 deletions(-) diff --git a/decisions/dist/chart/values.yaml b/decisions/dist/chart/values.yaml index d8ab60db..8ff57f39 100644 --- a/decisions/dist/chart/values.yaml +++ b/decisions/dist/chart/values.yaml @@ -117,8 +117,6 @@ decisions: # The URL of the Nova external scheduler service. novaExternalScheduler: "http://cortex-nova-scheduler:8080/scheduler/nova/external" # TTL for scheduling decisions after the last decision's RequestedAt timestamp (in seconds) - # Examples: 86400 (24 hours), 172800 (48 hours), 259200 (72 hours) - # If not specified, defaults to 86400 seconds (24 hours) ttlAfterDecisionSeconds: 86400 # Config provided here will override the config provided above. secrets: diff --git a/decisions/internal/controller/controller.go b/decisions/internal/controller/controller.go index b299507c..7456bcce 100644 --- a/decisions/internal/controller/controller.go +++ b/decisions/internal/controller/controller.go @@ -567,11 +567,11 @@ func formatDuration(d time.Duration) string { return fmt.Sprintf("%dm", int(d.Minutes())) } -// generateGlobalDescription creates a global description for multiple decisions +// generateGlobalDescription creates a global description for decisions // showing the host chain with durations and detecting simple loops func (r *SchedulingDecisionReconciler) generateGlobalDescription(results []v1alpha1.SchedulingDecisionResult, decisions []v1alpha1.SchedulingDecisionRequest) string { - if len(results) <= 1 { - return "" // No global description needed for single or no decisions + if len(results) == 0 { + return "" // No decisions to describe } // Extract host chain from winners @@ -594,7 +594,8 @@ func (r *SchedulingDecisionReconciler) generateGlobalDescription(results []v1alp startTime := decisions[segmentStart].RequestedAt.Time var endTime time.Time if i == len(hostChain) { - endTime = time.Now() // Last segment + // For the last segment, use the same time as start time (0 duration) + endTime = startTime } else { endTime = decisions[i].RequestedAt.Time } diff --git a/decisions/internal/controller/controller_test.go b/decisions/internal/controller/controller_test.go index 17f8d788..5d65a7b9 100644 --- a/decisions/internal/controller/controller_test.go +++ b/decisions/internal/controller/controller_test.go @@ -650,14 +650,14 @@ func TestReconcileGlobalDescription(t *testing.T) { expectedGlobalDescription string }{ { - name: "single-decision-no-global", + name: "single-decision-with-global", decisions: []v1alpha1.SchedulingDecisionRequest{ NewTestDecision("decision-1"). WithInput(map[string]float64{"host1": 1.0, "host2": 2.0}). - WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 1.0, "host2": 0.0})). + WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 1.5, "host2": 0.0})). Build(), }, - expectedGlobalDescription: "", // No global description for single decision + expectedGlobalDescription: "chain: host1 (0m)", // Single decision shows chain with 0m duration - host1 wins with 2.5 vs host2 with 2.0 }, { name: "simple-chain-no-loop", @@ -725,7 +725,7 @@ func TestReconcileGlobalDescription(t *testing.T) { WithPipelineOutputs(NewTestPipelineOutput("weigher", map[string]float64{"host1": 1.0, "host3": 0.0})). Build(), }, - expectedGlobalDescription: "chain: host1 (2h; 2 decisions)", + expectedGlobalDescription: "chain: host1 (0m; 2 decisions)", // Last segment always shows 0m duration }, } diff --git a/decisions/internal/controller/ttl_controller.go b/decisions/internal/controller/ttl_controller.go index 1dc3b942..db5affa2 100644 --- a/decisions/internal/controller/ttl_controller.go +++ b/decisions/internal/controller/ttl_controller.go @@ -144,13 +144,7 @@ func (r *SchedulingDecisionTTLController) reconcileAllResourcesOnStartup(ctx con func (r *SchedulingDecisionTTLController) SetupWithManager(mgr ctrl.Manager) error { log := mgr.GetLogger().WithName("ttl-controller") - // Log the TTL configuration on startup - ttl := r.getTTL() - seconds := r.Conf.TTLAfterDecisionSeconds - if seconds == 0 { - seconds = DefaultTTLAfterDecisionSeconds - } - log.Info("TTL Controller configured", "ttlAfterDecisionSeconds", seconds, "ttlAfterDecision", ttl.String()) + log.Info("TTL Controller configured", "ttlAfterDecisionSeconds", r.getTTL().String()) // Add the startup reconciler as a runnable if err := mgr.Add(&TTLStartupReconciler{ttlController: r}); err != nil { From 77c916bdf05ccbab6fa43b0b2f5838d394b782a6 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Tue, 7 Oct 2025 14:12:44 +0200 Subject: [PATCH 54/58] Update MQTT scheduling decision fetch URL and add logging for debugging --- visualizer/nova.html | 39 ++++++++++++++++------------ visualizer/scheduling-decisions.html | 4 +-- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/visualizer/nova.html b/visualizer/nova.html index 2b47b0c5..211ad6a8 100644 --- a/visualizer/nova.html +++ b/visualizer/nova.html @@ -82,15 +82,16 @@ if (vmId != null) { loadSchedulingDecision(vmId) .then(data => state.schedulingDecision = data) + .then(() => redraw()) .catch(error => alert('Error loading scheduling decision: ' + error)); } async function loadSchedulingDecision(vmId) { - const response = await fetch(`http://localhost:8020/scheduler/nova/scheduling-decisions?vm_id=${vmId}`); + const response = await fetch(`http://localhost:8014/scheduler/nova/scheduling-decisions?vm_id=${vmId}`); if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`); } - return await response.json();; + return await response.json(); } function getUtilizationByHost() { @@ -108,36 +109,40 @@ const steps = {} const order = [] - for (const step of schedulingDecision.spec.pipeline.outputs) { + const decision = schedulingDecision.spec.decisions[0] + const status = schedulingDecision.status.results[0] + + for (const step of decision.pipeline.outputs ?? []) { steps[step.step] = step.activations order.push(step.step) } + return { request: { spec: { "nova_object.data": { - weights: schedulingDecision.spec.input, - availability_zone: schedulingDecision.spec.availbilityZone, + weights: decision.input, + availability_zone: decision.availbilityZone, flavor: { "nova_object.data": { - name: schedulingDecision.spec.flavor.name, - memory_mb: schedulingDecision.spec.flavor.ram, - vcpus: schedulingDecision.spec.flavor.vcpus, - disk: schedulingDecision.spec.flavor.disk, + name: decision.flavor.name, + memory_mb: decision.flavor.ram, + vcpus: decision.flavor.vcpus, + disk: decision.flavor.disk, } }, } }, - vmware: schedulingDecision.spec.vmware, - live: schedulingDecision.spec.live, - resize: schedulingDecision.spec.resize, - weights: schedulingDecision.spec.input, + vmware: decision.vmware, + live: decision.live, + resize: decision.resize, + weights: decision.input, }, - steps: schedulingDecision.status.steps, - in: schedulingDecision.spec.input, - out: schedulingDecision.status.finalScores, + steps: status.steps, + in: decision.input, + out: status.finalScores, steps, order } @@ -146,6 +151,8 @@ async function redraw() { if (Object.keys(state.pipeline).length === 0 && state.schedulingDecision === null) return + console.log('Redrawing with state:', state) + let data = state.pipeline if (state.schedulingDecision !== null) { data = convertSchedulingDecisionToPipeline(state.schedulingDecision) diff --git a/visualizer/scheduling-decisions.html b/visualizer/scheduling-decisions.html index c6d402af..8befc2b7 100644 --- a/visualizer/scheduling-decisions.html +++ b/visualizer/scheduling-decisions.html @@ -188,10 +188,10 @@

Scheduling Decisions

+ body { + margin: 0; + padding: 0; + height: 100vh; + overflow: hidden; + } -
- -
-
Waiting for mqtt data to arrive...
-
+ .app-container { + display: grid; + grid-template-columns: var(--sidebar-width, 300px) 1fr; + grid-template-rows: 60px 1fr; + grid-template-areas: + "header header" + "sidebar main"; + height: 100vh; + transition: grid-template-columns 0.3s ease; + } -
- - - -
+ .app-container.sidebar-collapsed { + --sidebar-width: 0px; + grid-template-columns: 0px 1fr; + } - + + \ No newline at end of file diff --git a/visualizer/scheduling-decisions.html b/visualizer/scheduling-decisions.html deleted file mode 100644 index 8befc2b7..00000000 --- a/visualizer/scheduling-decisions.html +++ /dev/null @@ -1,243 +0,0 @@ - - - - - - - - Cortex Scheduling Decisions - - - - - - - - - - - - -
- -
-
Waiting for Scheduling Decisions
-
- - - - - - \ No newline at end of file From ec6d1e08ae1e623b97ae5fdf7a895f0103c5dbfc Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Wed, 8 Oct 2025 11:03:20 +0200 Subject: [PATCH 56/58] Fix CSS style syntax in weight display for proper rendering --- visualizer/nova.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/visualizer/nova.html b/visualizer/nova.html index 3c26321f..0fbef361 100644 --- a/visualizer/nova.html +++ b/visualizer/nova.html @@ -398,7 +398,7 @@

Recent Decisions

innerTable += `
` From a3a0fdf5cc6155ae955f9730c9e56ba196d19588 Mon Sep 17 00:00:00 2001 From: mblos Date: Wed, 8 Oct 2025 11:09:42 +0200 Subject: [PATCH 57/58] fix request id --- internal/scheduler/nova/pipeline.go | 38 ++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/internal/scheduler/nova/pipeline.go b/internal/scheduler/nova/pipeline.go index d2a9f11b..ce3b1b2c 100644 --- a/internal/scheduler/nova/pipeline.go +++ b/internal/scheduler/nova/pipeline.go @@ -125,17 +125,33 @@ func (c *novaPipelineConsumer) Consume( }) } - flavor := request.Spec.Data.Flavor + // Initialize default values for resource calculation + var vcpus, ram, disk int + var flavorName string + var resources map[string]resource.Quantity - // Safe conversion with bounds checking to prevent integer overflow - vcpus := int(math.Min(float64(flavor.Data.VCPUs), math.MaxInt)) - ram := int(math.Min(float64(flavor.Data.MemoryMB), math.MaxInt)) - disk := int(math.Min(float64(flavor.Data.RootGB), math.MaxInt)) + if request.Spec.Data.Flavor.Data.Name == "" { + slog.Warn("scheduler: Flavor data is missing, using zero values for resources", "instanceUUID", request.Spec.Data.InstanceUUID) + // Use zero values for resources + resources = map[string]resource.Quantity{ + "cpu": *resource.NewQuantity(0, resource.DecimalSI), + "memory": *resource.NewQuantity(0, resource.DecimalSI), + "storage": *resource.NewQuantity(0, resource.DecimalSI), + } + flavorName = "unknown" + } else { + flavor := request.Spec.Data.Flavor + flavorName = flavor.Data.Name - resources := map[string]resource.Quantity{ - "cpu": *resource.NewQuantity(int64(vcpus), resource.DecimalSI), - "memory": *resource.NewQuantity(int64(ram), resource.DecimalSI), - "storage": *resource.NewQuantity(int64(disk), resource.DecimalSI), + vcpus = int(math.Min(float64(flavor.Data.VCPUs), math.MaxInt)) + ram = int(math.Min(float64(flavor.Data.MemoryMB), math.MaxInt)) + disk = int(math.Min(float64(flavor.Data.RootGB), math.MaxInt)) + + resources = map[string]resource.Quantity{ + "cpu": *resource.NewQuantity(int64(vcpus), resource.DecimalSI), + "memory": *resource.NewQuantity(int64(ram), resource.DecimalSI), + "storage": *resource.NewQuantity(int64(disk), resource.DecimalSI), + } } if request.VMware { @@ -147,7 +163,7 @@ func (c *novaPipelineConsumer) Consume( } decisionRequest := v1alpha1.SchedulingDecisionRequest{ - ID: *request.Context.GlobalRequestID, + ID: request.Context.RequestID, RequestedAt: metav1.Now(), EventType: eventType, Input: inWeights, @@ -157,7 +173,7 @@ func (c *novaPipelineConsumer) Consume( }, AvailabilityZone: request.Spec.Data.AvailabilityZone, Flavor: v1alpha1.Flavor{ - Name: flavor.Data.Name, + Name: flavorName, Resources: resources, }, } From a7e1bf952b1a3009a46beda41797c17671a4eda7 Mon Sep 17 00:00:00 2001 From: mblos Date: Wed, 8 Oct 2025 11:23:08 +0200 Subject: [PATCH 58/58] test added --- internal/scheduler/nova/pipeline_test.go | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/internal/scheduler/nova/pipeline_test.go b/internal/scheduler/nova/pipeline_test.go index cbd6e5a0..50aa152d 100644 --- a/internal/scheduler/nova/pipeline_test.go +++ b/internal/scheduler/nova/pipeline_test.go @@ -330,3 +330,27 @@ func TestPremodifier_ModifyRequest_PreservesOtherFields(t *testing.T) { t.Error("original host weight should have been replaced") } } + +// Test that the consumer handles missing flavor data correctly +func TestConsumerMissingFlavorData(t *testing.T) { + consumer := &novaPipelineConsumer{Client: nil} + + request := api.ExternalSchedulerRequest{ + Context: api.NovaRequestContext{ + RequestID: "test-request-id", + }, + Spec: api.NovaObject[api.NovaSpec]{ + Data: api.NovaSpec{ + InstanceUUID: "test-uuid", + Flavor: api.NovaObject[api.NovaFlavor]{ + Data: api.NovaFlavor{ + Name: "", // Empty flavor name triggers missing data handling + }, + }, + }, + }, + } + + // Should handle missing flavor data without panic and use fallback values + consumer.Consume(request, []string{}, map[string]float64{}, map[string]map[string]float64{}) +}
${host}
@@ -199,7 +241,7 @@ } } for (const host of hostnames) { - const hasWeight = state.pipeline.out[host] !== undefined + const hasWeight = data.out[host] !== undefined const utilization = utilizations[host] const cpusUsed = utilization.vcpusUsed @@ -246,21 +288,55 @@ } table += '
${weight.toFixed(2)}