From 62c2497b52ebb8d40f7e53d0a4f92a2240a5afc8 Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Thu, 21 May 2026 16:08:42 +0530 Subject: [PATCH 1/2] Update upgrade_oim.yml Signed-off-by: Abhishek S A --- upgrade/playbooks/upgrade_oim.yml | 65 +++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 21 deletions(-) diff --git a/upgrade/playbooks/upgrade_oim.yml b/upgrade/playbooks/upgrade_oim.yml index 9e39726d7d..fa39d7a63a 100644 --- a/upgrade/playbooks/upgrade_oim.yml +++ b/upgrade/playbooks/upgrade_oim.yml @@ -15,28 +15,25 @@ # ============================================================================ # upgrade_oim.yml — Internal playbook (imported by upgrade.yml --tags oim) # ============================================================================ -# Upgrades OIM components: OpenCHAMI containers + BuildStream (if enabled). +# Upgrades OIM components: OpenCHAMI containers. # Prerequisites: prepare_upgrade.yml must have been run first. # Reads upgrade_manifest.yml and skips if oim already completed. # # Flow: # 1. Pre-flight: read manifest, check idempotency -# 2. User approval prompt before proceeding +# 2. User approval prompt before proceeding (shows full upgrade plan) # 3. OpenCHAMI container upgrade (pg_dump, deployment-recipes, image pull, # ordered restart, DB migration, validation) -# 4. BuildStream container upgrade (conditional on enable_build_stream) -# 5. Mark OIM as completed in manifest +# 4. Mark OIM as completed in manifest # ============================================================================ -- name: Upgrade OIM (OpenCHAMI + conditional BuildStream) +- name: Upgrade OIM (OpenCHAMI) hosts: localhost connection: local gather_facts: true vars: manifest_path: /opt/omnia/.data/upgrade_manifest.yml component_name: oim - input_project_dir: "/opt/omnia/input/project_default" - build_stream_config_path: "/opt/omnia/input/project_default/build_stream_config.yml" tasks: # ── Pre-flight: manifest read + idempotency ───────────────────────── - name: Read upgrade_manifest.yml @@ -55,25 +52,51 @@ # ── User approval before proceeding ───────────────────────────────── # Bypass with: -e skip_approval=true (for CI/CD automation) - - name: Display OIM upgrade summary and request approval + - name: Show Omnia upgrade plan and wait for operator confirmation ansible.builtin.pause: prompt: |2 - ══════════════════════════════════════════════════════════════ - OIM UPGRADE — APPROVAL REQUIRED + OMNIA UPGRADE — OPERATOR APPROVAL REQUIRED ══════════════════════════════════════════════════════════════ - Source: {{ manifest.source_version | default('2.1.0.0') }} - Target: {{ manifest.target_version | default('2.2.0.0') }} - - This will upgrade the following OIM components: - 1. OpenCHAMI containers (pg_dump backup → deployment-recipes - update → image pull → ordered restart → DB migration) - 2. BuildStream container (if enabled in build_stream_config.yml) - - WARNING: This operation modifies running OIM services. - Ensure you have reviewed prepare_upgrade.yml output first. - Press ENTER to proceed or Ctrl+C to abort. + ── Version Transition ─────────────────────────────────────── + Current Omnia version : {{ manifest.source_version | default('2.1.0.0') }} + Target Omnia version : {{ manifest.target_version | default('2.2.0.0') }} + + ── Omnia Upgrade Execution Plan (in order) ────────────────── + 1. oim → Upgrade OpenCHAMI control-plane containers + on the Omnia Infrastructure Manager + 2. build_stream → Enable or upgrade BuildStream services + (skipped if not enabled in the build_stream_config.yml) + 3. local_repo → Synchronize Omnia 2.2 packages into the + local Pulp repository for cluster nodes + 4. build_image → Rebuild compute OS images against + the new Omnia 2.2 package set + 5. provision → Refresh Cloud-Init data and BSS boot + configurations to point at 2.2 images + 6. k8s → Roll the Kubernetes cluster to the new + Omnia-supported version (control plane + nodes) + 7. telemetry → Upgrade the Omnia telemetry stack + for kafka, victoria metrics, and other components + 8. slurm → Upgrade the Slurm controller, slurm nodes + + ── Impact & Risk ──────────────────────────────────────────── + • Running Omnia services will be modified and briefly restarted. + • Do NOT reboot the OIM during the upgrade + • Compute workloads should be drained or completed beforehand. + + ── Pre-flight Checklist ───────────────────────────────────── + [ ] prepare_upgrade.yml completed without errors + [ ] Kubernetes cluster healthy (no failing pods, all PVCs + bound, LoadBalancer services have external IPs) + [ ] Slurm cluster healthy (no failing jobs, all nodes ready) + [ ] Stable internet connectivity verified on the OIM + [ ] Maintenance window is active and stakeholders notified + [ ] Sufficient disk space available for new images & packages + + ────────────────────────────────────────────────────────────── + Press ENTER to PROCEED with the Omnia upgrade + Press Ctrl+C, A to ABORT and exit safely ══════════════════════════════════════════════════════════════ when: not (skip_approval | default(false) | bool) From 3f197406e39d72d7f58f68af74b0970dde22ab82 Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Fri, 22 May 2026 11:30:34 +0530 Subject: [PATCH 2/2] Update build_image_common.yml Signed-off-by: Abhishek S A --- .../roles/image_creation/tasks/build_image_common.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build_image_x86_64/roles/image_creation/tasks/build_image_common.yml b/build_image_x86_64/roles/image_creation/tasks/build_image_common.yml index 7e7bf5663c..2787ae52af 100644 --- a/build_image_x86_64/roles/image_creation/tasks/build_image_common.yml +++ b/build_image_x86_64/roles/image_creation/tasks/build_image_common.yml @@ -34,8 +34,8 @@ - name: Normalize build stream inputs for base image ansible.builtin.set_fact: - enable_build_stream: "{{ enable_build_stream | default(false) | bool }}" - build_stream_job_id: "{{ build_stream_job_id | default('') }}" + enable_build_stream: "{{ hostvars['localhost']['enable_build_stream'] | default(false) | bool }}" + build_stream_job_id: "{{ hostvars['localhost']['build_stream_job_id'] | default('') }}" image_key: "{{ image_key | default('') }}" base_image_suffix: "" compute_image_suffix: ""