From 6b05dfe105915e1db6e1352aeda406f588bb6677 Mon Sep 17 00:00:00 2001
From: Anil Prajapati <Anil.Prajapati@ibm.com>
Date: Mon, 5 May 2025 13:42:36 +0530
Subject: [PATCH] Revert ocp 417 and cpd 51 changes

---
 docs/playbooks/ocp.md                         |   6 +-
 .../playbooks/ocp_convert_to_disconnected.yml |   2 +-
 .../playbooks/ocp_fyre_provision.yml          |   2 +-
 .../playbooks/ocp_roks_provision.yml          |   2 +-
 .../cp4d_service/tasks/wait/wait-ccs.yml      | 151 +++---------------
 ibm/mas_devops/roles/mirror_ocp/README.md     |   8 +-
 .../roles/ocp_cluster_monitoring/README.md    |   4 +-
 .../roles/ocp_provision/defaults/main.yml     |   2 +-
 8 files changed, 38 insertions(+), 139 deletions(-)
diff --git a/docs/playbooks/ocp.md b/docs/playbooks/ocp.md
index 4c2da1ea0d..0efb9732e8 100644
--- a/docs/playbooks/ocp.md
+++ b/docs/playbooks/ocp.md
@@ -16,7 +16,7 @@ export AWS_SECRET_ACCESS_KEY=xxx
 export ROSA_TOKEN=xxx
 
 export CLUSTER_NAME=masonrosa
-export OCP_VERSION=4.17
+export OCP_VERSION=4.16
 export ROSA_COMPUTE_NODES=5
 export ROSA_CLUSTER_ADMIN_PASSWORD=xxx
 ansible-playbook ibm.mas_devops.ocp_rosa_provision
@@ -31,7 +31,7 @@ This also supports upgrading the storage volume used for the cluster's internal
 
 ```bash
 export CLUSTER_NAME=masinst1
-export OCP_VERSION=4.17_openshift
+export OCP_VERSION=4.16_openshift
 export IBMCLOUD_APIKEY=xxx
 export REBOOT_WORKER_NODES=true
 export CPD_ENTITLEMENT_KEY=xxx
@@ -44,7 +44,7 @@ This playbook will provision a QuickBurn OCP cluster in IBM DevIT Fyre service,
 
 ```bash
 export CLUSTER_NAME=masinst1
-export OCP_VERSION=4.17
+export OCP_VERSION=4.16
 export FYRE_USERNAME=xxx
 export FYRE_APIKEY=xxx
 export FYRE_PRODUCT_ID=xxx
diff --git a/ibm/mas_devops/playbooks/ocp_convert_to_disconnected.yml b/ibm/mas_devops/playbooks/ocp_convert_to_disconnected.yml
index 47176294c1..8f39fd92c3 100644
--- a/ibm/mas_devops/playbooks/ocp_convert_to_disconnected.yml
+++ b/ibm/mas_devops/playbooks/ocp_convert_to_disconnected.yml
@@ -5,7 +5,7 @@
   vars:
     ocp_operatorhub_disable_redhat_sources: true
 
-    ocp_release: "{{ lookup('env', 'OCP_RELEASE') | default('4.17', true) }}"
+    ocp_release: "{{ lookup('env', 'OCP_RELEASE') | default('4.16', true) }}"
     setup_redhat_release: true
     setup_redhat_catalogs: true
 
diff --git a/ibm/mas_devops/playbooks/ocp_fyre_provision.yml b/ibm/mas_devops/playbooks/ocp_fyre_provision.yml
index b8b4b577b9..9a351e6b7e 100644
--- a/ibm/mas_devops/playbooks/ocp_fyre_provision.yml
+++ b/ibm/mas_devops/playbooks/ocp_fyre_provision.yml
@@ -2,7 +2,7 @@
 - hosts: localhost
   vars:
     cluster_type: fyre
-    ocp_version: "{{ lookup('env', 'OCP_VERSION') | default('4.17', True) }}"
+    ocp_version: "{{ lookup('env', 'OCP_VERSION') | default('4.16', True) }}"
 
     # We update the cipher support on all installs, even though it's only technically
     # requires for FIPS clusters
diff --git a/ibm/mas_devops/playbooks/ocp_roks_provision.yml b/ibm/mas_devops/playbooks/ocp_roks_provision.yml
index ddd4c69627..c6d0480f84 100644
--- a/ibm/mas_devops/playbooks/ocp_roks_provision.yml
+++ b/ibm/mas_devops/playbooks/ocp_roks_provision.yml
@@ -3,7 +3,7 @@
   any_errors_fatal: true
   vars:
     cluster_type: roks
-    ocp_version: "{{ lookup('env', 'OCP_VERSION') | default('4.17_openshift', True) }}"
+    ocp_version: "{{ lookup('env', 'OCP_VERSION') | default('4.16_openshift', True) }}"
     prometheus_storage_class: ibmc-block-gold
     prometheus_alertmgr_storage_class: ibmc-file-gold-gid
 
diff --git a/ibm/mas_devops/roles/cp4d_service/tasks/wait/wait-ccs.yml b/ibm/mas_devops/roles/cp4d_service/tasks/wait/wait-ccs.yml
index 6ceeea55b9..d0b0430b01 100644
--- a/ibm/mas_devops/roles/cp4d_service/tasks/wait/wait-ccs.yml
+++ b/ibm/mas_devops/roles/cp4d_service/tasks/wait/wait-ccs.yml
@@ -23,15 +23,12 @@
       - "CCS CR already patched? .............. {{ is_ccs_already_patched }}"
       - "CCS Block Storage Class .............. {{ ccscr_output.resources[0].spec.blockStorageClass | default('<undefined>', true) }}"
 
-
 # 2. Apply the patch per recommendation from CP4D team
 # https://github.ibm.com/NGP-TWC/ml-planning/issues/32683
 # https://medium.com/@dany.drouin/scaling-watson-knowledge-catalog-on-cloud-pak-for-data-11623f41f7df
 # -----------------------------------------------------------------------------
 # only run following block if is_ccs_already_patched == False
-- name: "wait-ccs : Apply CCS Scaling Patch"
-  when: not is_ccs_already_patched
-  block:
+- block:
     - name: "wait-ccs : Patch ccs-cr to increase resource limits"
       kubernetes.core.k8s:
         api_version: ccs.cpd.ibm.com/v1beta1
@@ -62,7 +59,8 @@
           field_manager: ansible
           force_conflicts: true
 
-    # Delete ccs-operator pod to force the reconcile from the beginning after ccs-cr is patched.
+    # 3. Delete ccs-operator pod to force the reconcile from the beginning after ccs-cr is patched.
+    # -----------------------------------------------------------------------------
     - name: "wait-ccs : Scale down ccs-operator"
       kubernetes.core.k8s:
         api_version: apps/v1
@@ -91,7 +89,8 @@
           field_manager: ansible
           force_conflicts: true
 
-    # Wait for ccs operator ...
+    # 4. Wait for ccs operator ...
+    # -----------------------------------------------------------------------------
     - name: "wait-ccs : Wait for ccs-operator to be ready again (60s delay)"
       kubernetes.core.k8s_info:
         api_version: apps/v1
@@ -103,141 +102,41 @@
       retries: 20 # Approximately 20 minutes before we give up
       delay: 60 # 1 minute
 
+  when: not is_ccs_already_patched
 
 - include_tasks: "tasks/wait/wait-elasticsearch.yml"
   when:
     - cpd_48_or_higher # elastic search operator was just introduced with cpd 4.8
     - not skip_ibm_entitlement_injection # eventually we hope to be able to skip patching the elastic search cr with image pull secret, but not for now
 
-
-# 3. Wait for CouchDB Stateful Set to be ready
+# 5. Wait for CouchDB Stateful Set to be ready
 # -----------------------------------------------------------------------------
 # There have been issues with CouchDB not starting due to Persistent Storage,
 # This task restarts any failing pods
 - include_tasks: "tasks/wait/wait-couchdb.yml"
-  when: cpd_48
-
+  when:
+    - cpd_48
 
-# 4. Wait for CCS CR to be ready
+# 6. Wait for CCS CR to be ready
 # -----------------------------------------------------------------------------
 # Note: We can't fail early when we see Failed status, as the operator will
 # report failed multiple times during initial reconcile.
-# We give it an hour to have made it past upgrading elasticsearch if still
-# failing check to see if elasticsearch needs rescuing by deleting and
-# recreating.
-# regardless of if the elasticsearch rescue is performed we wait another 4 hours
-# to let the process complete (in the always section)
-- block:
-    - name: "wait-ccs : Wait for ccsStatus 'Completed' (5m interval)"
-      kubernetes.core.k8s_info:
-        api_version: "ccs.cpd.ibm.com/v1beta1"
-        kind: CCS
-        name: "ccs-cr"
-        namespace: "{{ cpd_instance_namespace }}"
-      register: ccs_cr_lookup
-      until:
-        - ccs_cr_lookup.resources is defined
-        - ccs_cr_lookup.resources | length == 1
-        - ccs_cr_lookup.resources[0].status is defined
-        - ccs_cr_lookup.resources[0].status.ccsStatus is defined
-        - ccs_cr_lookup.resources[0].status.ccsStatus == "Completed"
-      retries: 12 # 1 hour
-      delay: 300 # Every 5 minutes
-
-  rescue:
-    - name: "get the elasticsearch statefulset"
-      kubernetes.core.k8s_info:
-        api_version: apps/v1
-        namespace: "{{ cpd_instance_namespace }}"
-        kind: StatefulSet
-        label_selectors: ["ibm-es-master=True"]
-      register: statefulset_output
-
-    # We are aiming to catch this exception caused by a change in how ElasticSearch is managed by Cloud Pak for Data
-    # whereby in CPD 5.1 it starts to explicitly control the version of ElasticSearch, and sets it to an older version
-    # that the version it naturally would have been set to already, resulting in an unsupported downgrade.
-    #
-    # [2025-05-02T10:00:35,693][ERROR][o.o.b.OpenSearchUncaughtExceptionHandler] [elasticsea-0ac3-ib-6fb9-es-server-esnodes-0] uncaught exception in thread [main]
-    # org.opensearch.bootstrap.StartupException: java.lang.IllegalArgumentException: Could not load codec 'Lucene912'. Did you forget to add lucene-backward-codecs.jar?
-    #         at org.opensearch.bootstrap.OpenSearch.init(OpenSearch.java:185) ~[opensearch-2.17.0.jar:2.17.0]
-    #         at org.opensearch.bootstrap.OpenSearch.execute(OpenSearch.java:172) ~[opensearch-2.17.0.jar:2.17.0]
-    #         at org.opensearch.cli.EnvironmentAwareCommand.execute(EnvironmentAwareCommand.java:104) ~[opensearch-2.17.0.jar:2.17.0]
-    #         at org.opensearch.cli.Command.mainWithoutErrorHandling(Command.java:138) ~[opensearch-cli-2.17.0.jar:2.17.0]
-    #         at org.opensearch.cli.Command.main(Command.java:101) ~[opensearch-cli-2.17.0.jar:2.17.0]
-    #         at org.opensearch.bootstrap.OpenSearch.main(OpenSearch.java:138) ~[opensearch-2.17.0.jar:2.17.0]
-    #         at org.opensearch.bootstrap.OpenSearch.main(OpenSearch.java:104) ~[opensearch-2.17.0.jar:2.17.0]
-    # Caused by: java.lang.IllegalArgumentException: Could not load codec 'Lucene912'. Did you forget to add lucene-backward-codecs.jar?
-    #
-    # When this happens, the only solution we are aware of is to delete the ElasticSearch instance and allow it to be recreated fresh,
-    # the alternative is to not upgrade Cloud Pak for Data until after it catches up with the version of ElasticSearch already in use.
-    - name: "check if elasticsearch rescue is appropriate"
-      set_fact:
-        recover_elasticsearch: true
-      when:
-        - statefulset_output.resources is defined
-        - statefulset_output.resources | length == 1
-        - statefulset_output.resources[0].status is defined
-        - statefulset_output.resources[0].status.availableReplicas is defined
-        - statefulset_output.resources[0].status.availableReplicas == 0
-
-    - name: "Pause for 5 minutes before removing elasticsearchcluster and associated persistent volumes"
-      pause:
-        minutes: 5
-
-    - name: "reset rescue decision"
-      set_fact:
-        recover_elasticsearch: false
-
-    - name: "confirm elasticsearch rescue is still appropriate"
-      set_fact:
-        recover_elasticsearch: true
-      when:
-        - statefulset_output.resources is defined
-        - statefulset_output.resources | length == 1
-        - statefulset_output.resources[0].status is defined
-        - statefulset_output.resources[0].status.availableReplicas is defined
-        - statefulset_output.resources[0].status.availableReplicas == 0
-
-    - name: "delete the elasticsearch cluster"
-      k8s:
-        api_version: elasticsearch.opencontent.ibm.com/v1
-        kind: ElasticsearchCluster
-        state: absent
-        namespace: "{{ cpd_instance_namespace }}"
-        name: "{{ statefulset_output.resources[0].metadata.ownerReferences[0].name }}"
-      when:
-        - recover_elasticsearch == true
-
-    - name: "Delete the elasticsearch PVCs"
-      k8s:
-        api_version: v1
-        kind: PersistentVolumeClaim
-        namespace: "{{ cpd_instance_namespace }}"
-        label_selectors: ["app.kubernetes.io/component={{ statefulset_output.resources[0].metadata.name}}"]
-        state: absent
-      when:
-        - recover_elasticsearch == true
-
-  always:
-    - name: "wait-ccs : Wait for ccsStatus 'Completed' (5m interval)"
-      kubernetes.core.k8s_info:
-        api_version: "ccs.cpd.ibm.com/v1beta1"
-        kind: CCS
-        name: "ccs-cr"
-        namespace: "{{ cpd_instance_namespace }}"
-      register: ccs_cr_lookup
-      until:
-        - ccs_cr_lookup.resources is defined
-        - ccs_cr_lookup.resources | length == 1
-        - ccs_cr_lookup.resources[0].status is defined
-        - ccs_cr_lookup.resources[0].status.ccsStatus is defined
-        - ccs_cr_lookup.resources[0].status.ccsStatus == "Completed"
-      retries: 50 # Just over 4 hours
-      delay: 300 # Every 5 minutes
-
+- name: "wait-ccs : Wait for ccsStatus 'Completed' (5m interval)"
+  kubernetes.core.k8s_info:
+    api_version: "ccs.cpd.ibm.com/v1beta1"
+    kind: CCS
+    name: "ccs-cr"
+    namespace: "{{ cpd_instance_namespace }}"
+  register: ccs_cr_lookup
+  until:
+    - ccs_cr_lookup.resources is defined
+    - ccs_cr_lookup.resources | length == 1
+    - ccs_cr_lookup.resources[0].status is defined
+    - ccs_cr_lookup.resources[0].status.ccsStatus is defined
+    - ccs_cr_lookup.resources[0].status.ccsStatus == "Completed" #  or ccs_cr_lookup.resources[0].status.wmlStatus == "Failed"
+  retries: 50 # Just over 4 hours
+  delay: 300 # Every 5 minutes
 
-# 5. Check that the final status is "Completed"
-# -----------------------------------------------------------------------------
 - name: "wait-ccs : Check that the CCS ccsStatus is 'Completed'"
   assert:
     that: ccs_cr_lookup.resources[0].status.ccsStatus == "Completed"
diff --git a/ibm/mas_devops/roles/mirror_ocp/README.md b/ibm/mas_devops/roles/mirror_ocp/README.md
index b5bb129dbc..bb52a92321 100644
--- a/ibm/mas_devops/roles/mirror_ocp/README.md
+++ b/ibm/mas_devops/roles/mirror_ocp/README.md
@@ -81,21 +81,21 @@ Path to your Red Hat pull secret, available from: [https://console.redhat.com/op
 Role Variables - OpenShift Version
 -------------------------------------------------------------------------------
 ### ocp_release
-The Red Hat release you are mirroring content for, e.g. `4.17`.
+The Red Hat release you are mirroring content for, e.g. `4.16`.
 
 - **Required**
 - Environment Variable: `OCP_RELEASE`
 - Default: None
 
 ### ocp_min_version
-The minimum version of the Red Hat release to mirror platform content for, e.g. `4.17.9`.
+The minimum version of the Red Hat release to mirror platform content for, e.g. `4.16.11`.
 
 - **Optional**
 - Environment Variable: `OCP_MIN_VERSION`
 - Default: None
 
 ### ocp_max_version
-The maximimum version of the Red Hat release to mirror platform content for, e.g. `4.17.9`.
+The maximimum version of the Red Hat release to mirror platform content for, e.g. `4.16.20`.
 
 - **Optional**
 - Environment Variable: `OCP_MAX_VERSION`
@@ -157,7 +157,7 @@ Example Playbook
     mirror_redhat_platform: false
     mirror_redhat_operators: true
 
-    ocp_release: 4.17
+    ocp_release: 4.16
     redhat_pullsecret: ~/pull-secret.json
 
   roles:
diff --git a/ibm/mas_devops/roles/ocp_cluster_monitoring/README.md b/ibm/mas_devops/roles/ocp_cluster_monitoring/README.md
index d6ee177753..472c1b9b1e 100644
--- a/ibm/mas_devops/roles/ocp_cluster_monitoring/README.md
+++ b/ibm/mas_devops/roles/ocp_cluster_monitoring/README.md
@@ -2,8 +2,8 @@ ocp_cluster_monitoring
 ===============================================================================
 Configures the OpenShift Container Platform Cluster Monitoring enabling two settings:
 
-- [OpenShift user defined project monitoring](hhttps://docs.redhat.com/en/documentation/openshift_container_platform/4.17/html/monitoring/configuring-user-workload-monitoring#preparing-to-configure-the-monitoring-stack-uwm) is enabled (`openshift-monitoring` namespace)
-- [OpenShift monitoring stack](https://docs.redhat.com/en/documentation/openshift_container_platform/4.17/html/monitoring/configuring-user-workload-monitoring#configuring-persistent-storage_storing-and-recording-data-uwm) is configured to use persistent storage (`openshift-monitoring` namespace)
+- [OpenShift user defined project monitoring](https://access.redhat.com/documentation/en-us/openshift_container_platform/4.16/html/monitoring/enabling-monitoring-for-user-defined-projects) is enabled (`openshift-monitoring` namespace)
+- [OpenShift monitoring stack](https://access.redhat.com/documentation/en-us/openshift_container_platform/4.16/html/monitoring/index) is configured to use persistent storage (`openshift-monitoring` namespace)
 
 
 Role Variables
diff --git a/ibm/mas_devops/roles/ocp_provision/defaults/main.yml b/ibm/mas_devops/roles/ocp_provision/defaults/main.yml
index e035cfecdd..af9053f465 100644
--- a/ibm/mas_devops/roles/ocp_provision/defaults/main.yml
+++ b/ibm/mas_devops/roles/ocp_provision/defaults/main.yml
@@ -7,7 +7,7 @@ cluster_platform: "{{lookup('env', 'CLUSTER_PLATFORM') | default('x',true)}}"
 
 ocp_version: "{{ lookup('env', 'OCP_VERSION') }}"
 ocp_fips_enabled: "{{ lookup('env', 'OCP_FIPS_ENABLED') | default('false', true) | bool }}"
-default_ocp_version: 4.17
+default_ocp_version: 4.16
 
 supported_cluster_types:
   - fyre