diff --git a/cloudman/clusterman/api.py b/cloudman/clusterman/api.py index aa6f2328..232e03b8 100644 --- a/cloudman/clusterman/api.py +++ b/cloudman/clusterman/api.py @@ -10,6 +10,7 @@ from . import exceptions from . import models from . import resources +from . import tasks class CMServiceContext(object): @@ -184,9 +185,9 @@ def delete(self, node): print(f"Deleting node: {node.name}") self.check_permissions('clusternodes.delete_clusternode', node) template = self.cluster.get_cluster_template() - template.remove_node(node) - # call the saved django delete method which we remapped - node.original_delete() + task = template.remove_node(node) + tasks.delete_node.delay(task.asdict().get('celery_id'), + node.cluster_id, node.id) else: print(f"Unexpected, asked to delete a null node!") diff --git a/cloudman/clusterman/tasks.py b/cloudman/clusterman/tasks.py new file mode 100644 index 00000000..deced67b --- /dev/null +++ b/cloudman/clusterman/tasks.py @@ -0,0 +1,55 @@ +"""Tasks to be executed asynchronously (via Celery).""" +from celery.app import shared_task +from celery.result import AsyncResult +from celery.result import allow_join_result + +from django.contrib.auth.models import User + +from clusterman import api +from clusterman.clients.kube_client import KubeClient + + +def node_not_present(node): + kube_client = KubeClient() + launch_task = node.deployment.tasks.filter(action='LAUNCH').first() + node_ip = launch_task.result.get('cloudLaunch', {}).get('private_ip') + print(f"Checking for presence of node ip: {node_ip}") + k8s_node = kube_client.nodes.find(node_ip) + return not k8s_node + + +def wait_till_deployment_deleted(deployment_delete_task_id): + with allow_join_result(): + deployment_delete_task = AsyncResult(deployment_delete_task_id) + print("Waiting for node deployment to be deleted...") + deployment_delete_task.wait() + if deployment_delete_task.successful(): + print("Deployment deleted successfully.") + return + else: + task_meta = deployment_delete_task.backend.get_task_meta( + deployment_delete_task.id) + print(f"Deployment delete failed: {task_meta.get('status')} with traceback:" + f"{task_meta.get('traceback')}") + + +@shared_task(bind=True, expires=120) +def delete_node(self, deployment_delete_task_id, cluster_id, node_id): + """ + Triggers a delete task through cloudlaunch. + If successful, removes reference to node + """ + admin = User.objects.filter(is_superuser=True).first() + cmapi = api.CloudManAPI(api.CMServiceContext(user=admin)) + cluster = cmapi.clusters.get(cluster_id) + node = cluster.nodes.get(node_id) + wait_till_deployment_deleted(deployment_delete_task_id) + if node_not_present(node): + # if desired state has been reached, clusterman no longer + # needs to maintain a reference to the node + # call the saved django delete method which we remapped + print(f"Node does not exist, removing clusterman reference.") + node.original_delete() + else: + print("Deleted node still exists, not removing clusterman" + "node reference.") diff --git a/cloudman/clusterman/tests/test_cluster_api.py b/cloudman/clusterman/tests/test_cluster_api.py index 780bcb37..1dabf47f 100644 --- a/cloudman/clusterman/tests/test_cluster_api.py +++ b/cloudman/clusterman/tests/test_cluster_api.py @@ -221,6 +221,11 @@ def create_mock_provider(self, name, config): patcher4.start() self.addCleanup(patcher4.stop) + patcher5 = patch('clusterman.tasks.wait_till_deployment_deleted', + side_effect=self._wait_till_deployment_deleted) + patcher5.start() + self.addCleanup(patcher5.stop) + super().setUp() def _add_dummy_node(self, app_config, provider_config, playbook_vars=None): @@ -258,6 +263,9 @@ def _add_dummy_node(self, app_config, provider_config, playbook_vars=None): kube_mocker.mock_kubectl._kubectl_add_node(node) return {} + def _wait_till_deployment_deleted(*args, **kwargs): + return + class CMClusterNodeServiceTests(CMClusterNodeTestBase):