Revert "Remove some cert approval related hacks."

This reverts commit a991cca. Some of the removed code is still needed. In particular, we still need to set the IPs on master Machines, as we have no other mechanism to do that. Without it, the master Machines and Nodes will not get linked, which the UI depends on. Related to issue openshift-metal3#260
dhellmann · Aug 7, 2019 · bb69bef · bb69bef
1 parent 377fd2f
commit bb69bef
Show file tree

Hide file tree

Showing 5 changed files with 77 additions and 19 deletions.
diff --git a/06_create_cluster.sh b/06_create_cluster.sh
@@ -72,7 +72,32 @@ if [ $(sudo podman ps | grep -w -e "ironic-api$" -e "ironic-conductor$" -e "iron
     exit 1
 fi
 
+# Run the fix_certs.sh script periodically as a workaround for
+# https://github.com/openshift-metalkube/dev-scripts/issues/260
+sudo systemd-run --on-active=30s --on-unit-active=1m --unit=fix_certs.service $(dirname $0)/fix_certs.sh
+
 # Call openshift-installer to deploy the bootstrap node and masters
 create_cluster ocp
 
 echo "Cluster up, you can interact with it via oc --config ${KUBECONFIG} <command>"
+
+# The deployment is complete, but we must manually add the IPs for the masters,
+# as we don't have a way to do that automatically yet. This is required for
+# CSRs to get auto approved for masters.
+# https://github.com/openshift-metal3/dev-scripts/issues/260
+# https://github.com/metal3-io/baremetal-operator/issues/242
+./add-machine-ips.sh
+
+# Bounce the machine approver to get it to notice the changes.
+oc scale deployment -n openshift-cluster-machine-approver --replicas=0 machine-approver
+while [ ! $(oc get deployment -n openshift-cluster-machine-approver machine-approver -o json | jq .spec.replicas) ]
+do
+  echo "Scaling down machine-approver..."
+done
+echo "Scaling up machine-approver..."
+oc scale deployment -n openshift-cluster-machine-approver --replicas=1 machine-approver
+
+# Wait a tiny bit, then list the csrs
+sleep 5
+oc get csr
+# END Hack
diff --git a/11_register_hosts.sh b/11_register_hosts.sh
@@ -78,12 +78,6 @@ list_workers | make_bm_workers | tee $SCRIPTDIR/ocp/worker_crs.yaml
 
 oc --config ocp/auth/kubeconfig apply -f $SCRIPTDIR/ocp/master_crs.yaml --namespace=openshift-machine-api
 
-# Run the fix_certs.sh script periodically as a workaround for
-# https://github.com/openshift-metalkube/dev-scripts/issues/260 This is only
-# required to approve certs for workers, as the master certs are approved
-# automatically during the bootstrap phase.
-sudo systemd-run --on-active=30s --on-unit-active=1m --unit=fix_certs.service $(dirname $0)/fix_certs.sh
-
 # Check if file exists
 [ -s "$SCRIPTDIR/ocp/worker_crs.yaml" ] || exit 0
 

diff --git a/add-machine-ips.sh b/add-machine-ips.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+set -x
+set -e
+
+source logging.sh
+source utils.sh
+source common.sh
+source ocp_install_env.sh
+
+for node in $(oc --config ocp/auth/kubeconfig get nodes -o template --template='{{range .items}}{{.metadata.uid}}:{{.metadata.name}}{{"\n"}}{{end}}'); do
+    node_name=$(echo $node | cut -f2 -d':')
+    machine_name=$CLUSTER_NAME-$node_name
+    if [[ "$machine_name" == *"worker"* ]]; then
+        machine_name=$(oc --config ocp/auth/kubeconfig get machines -n openshift-machine-api | grep $node_name | cut -f1 -d' ')
+    fi
+    $SCRIPTDIR/link-machine-and-node.sh "$machine_name" "$node"
+done
diff --git a/link-machine-and-node.sh b/link-machine-and-node.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+source utils.sh
+
+set -x
+set -e
+
+machine="$1"
+node="$2"
+
+if [ -z "$machine" -o -z "$node" ]; then
+    echo "Usage: $0 MACHINE NODE"
+    exit 1
+fi
+
+uid=$(echo $node | cut -f1 -d':')
+node_name=$(echo $node | cut -f2 -d':')
+
+# BEGIN Hack #260
+# Hack workaround for openshift-metalkube/dev-scripts#260 until it's done automatically
+# Also see https://github.com/metalkube/cluster-api-provider-baremetal/issues/49
+oc --config ocp/auth/kubeconfig proxy &
+proxy_pid=$!
+
+PROXY_API_PATH="http://localhost:8001/apis/machine.openshift.io/v1beta1/namespaces/openshift-machine-api/machines"
+wait_for_json oc_proxy "${PROXY_API_PATH}" 10 -H "Accept: application/json" -H "Content-Type: application/json"
+
+addresses=$(oc --config ocp/auth/kubeconfig get node ${node_name} -o json | jq -c '.status.addresses')
+
+curl -X PATCH \
+     ${PROXY_API_PATH}/${machine}/status \
+     -H "Content-type: application/merge-patch+json" \
+     -d '{"status":{"addresses":'"${addresses}"',"nodeRef":{"kind":"Node","name":"'"${node_name}"'","uid":"'"${uid}"'"}}}'
+
+kill $proxy_pid
diff --git a/run_ci.sh b/run_ci.sh
@@ -140,19 +140,6 @@ done
 set -o pipefail
 timeout -s 9 85m make |& ts "%b %d %H:%M:%S | " |& sed -e 's/.*auths.*/*** PULL_SECRET ***/g'
 
-# Deployment is complete, but now wait to ensure the worker node comes up.
-export KUBECONFIG=ocp/auth/kubeconfig
-
-wait_for_worker() {
-    worker=$1
-    echo "Waiting for worker $worker to appear ..."
-    while [ "$(oc get nodes | grep $worker)" = "" ]; do sleep 5; done
-    TIMEOUT_MINUTES=15
-    echo "$worker registered, waiting $TIMEOUT_MINUTES minutes for Ready condition ..."
-    oc wait node/$worker --for=condition=Ready --timeout=$[${TIMEOUT_MINUTES} * 60]s
-}
-wait_for_worker worker-0
-
 # Populate cache for files it doesn't have, or that have changed
 for FILE in $FILESTOCACHE ; do
     cached=$FILECACHEDIR/$(basename $FILE)