Skip to content

Commit

Permalink
Merge d02f5a1 into f49318f
Browse files Browse the repository at this point in the history
  • Loading branch information
nuwang committed Mar 30, 2020
2 parents f49318f + d02f5a1 commit f09edf5
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 16 deletions.
6 changes: 4 additions & 2 deletions cloudman/clusterman/clients/kube_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,10 @@ def wait_till_jobs_complete(self, node, timeout=3600):
wait=tenacity.wait_fixed(5))
retryer(self._get_job_pods_in_node, name, "Running")

def drain(self, node, force=True, timeout=120):
def drain(self, node, force=True, timeout=120, ignore_daemonsets=True):
name = node.get('metadata', {}).get('name')
return helpers.run_command(
["kubectl", "drain", name, f"--timeout={timeout}s",
f"--force={'true' if force else 'false'}"])
f"--force={'true' if force else 'false'}",
f"--ignore-daemonsets={'true' if ignore_daemonsets else 'false'}"]
)
32 changes: 18 additions & 14 deletions cloudman/clusterman/plugins/rancher_kubernetes_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,20 +74,24 @@ def delete(self, provider, deployment):
rancher_client = self._create_rancher_client(rancher_cfg)
node_ip = deployment.get(
'launch_result', {}).get('cloudLaunch', {}).get('publicIP')
rancher_node_id = rancher_client.find_node(ip=node_ip)
if rancher_node_id:
kube_client = KubeClient()
k8s_node = kube_client.nodes.find(node_ip)[0]
# stop new jobs being scheduled on this node
kube_client.nodes.cordon(k8s_node)
# let existing jobs finish
kube_client.nodes.wait_till_jobs_complete(k8s_node)
# drain remaining pods
kube_client.nodes.drain(k8s_node, timeout=120)
# remove node from rancher
rancher_client.delete_node(rancher_node_id)
# delete the VM
return super().delete(provider, deployment)
try:
rancher_node_id = rancher_client.find_node(ip=node_ip)
if rancher_node_id:
try:
kube_client = KubeClient()
k8s_node = kube_client.nodes.find(node_ip)[0]
# stop new jobs being scheduled on this node
kube_client.nodes.cordon(k8s_node)
# let existing jobs finish
kube_client.nodes.wait_till_jobs_complete(k8s_node)
# drain remaining pods
kube_client.nodes.drain(k8s_node, timeout=120)
finally:
# remove node from rancher
rancher_client.delete_node(rancher_node_id)
finally:
# delete the VM
return super().delete(provider, deployment)

def _get_configurer(self, app_config):
# CloudMan2 can only be configured with ansible
Expand Down
2 changes: 2 additions & 0 deletions cloudman/clusterman/tests/mock_kubectl.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ def str2bool(v):
'--timeout', type=str, help='time to wait before giving up. e.g. 10s')
parser_drain.add_argument(
'--force', type=str2bool, default=False, help='continue even with unmanaged pods')
parser_drain.add_argument(
'--ignore-daemonsets', type=str2bool, default=True, help='ignore daemonsets')
parser_drain.set_defaults(func=self._kubectl_drain)

return parser
Expand Down

0 comments on commit f09edf5

Please sign in to comment.