Skip to content
Permalink
Browse files

feat(scheduler): add the ability to set KUBERNETES_POD_TERMINATION_GR…

…ACE_PERIOD_SECONDS per application (#1026)

Close #807
  • Loading branch information...
helgi committed Aug 30, 2016
1 parent ffa9040 commit 689df78084d669be214ac94d9573e61936215555
@@ -405,6 +405,9 @@ def _scale_pods(self, scale_types):
# see if the app config has deploy timeout preference, otherwise use global
deploy_timeout = release.config.values.get('DEIS_DEPLOY_TIMEOUT', settings.DEIS_DEPLOY_TIMEOUT) # noqa

# get application level pod termination grace period
pod_termination_grace_period_seconds = release.config.values.get('KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS', settings.KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS) # noqa

tasks = []
for scale_type, replicas in scale_types.items():
# only web / cmd are routable
@@ -433,6 +436,7 @@ def _scale_pods(self, scale_types):
'routable': routable,
'deploy_batches': batches,
'deploy_timeout': deploy_timeout,
'pod_termination_grace_period_seconds': pod_termination_grace_period_seconds,
}

# gather all proc types to be deployed
@@ -486,6 +490,9 @@ def deploy(self, release, force_deploy=False):

deployment_history = release.config.values.get('KUBERNETES_DEPLOYMENTS_REVISION_HISTORY_LIMIT', settings.KUBERNETES_DEPLOYMENTS_REVISION_HISTORY_LIMIT) # noqa

# get application level pod termination grace period
pod_termination_grace_period_seconds = release.config.values.get('KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS', settings.KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS) # noqa

# deploy application to k8s. Also handles initial scaling
deploys = {}
image = release.image
@@ -512,7 +519,6 @@ def deploy(self, release, force_deploy=False):
'tags': tags,
'envs': envs,
'registry': release.config.registry,
# only used if there is no previous RC
'replicas': replicas,
'version': version,
'app_type': scale_type,
@@ -522,7 +528,8 @@ def deploy(self, release, force_deploy=False):
'deploy_batches': batches,
'deploy_timeout': deploy_timeout,
'deployment_history_limit': deployment_history,
'release_summary': release.summary
'release_summary': release.summary,
'pod_termination_grace_period_seconds': pod_termination_grace_period_seconds,
}

# Sort deploys so routable comes first
@@ -729,6 +736,9 @@ def pod_name(size=5, chars=string.ascii_lowercase + string.digits):
# see if the app config has deploy timeout preference, otherwise use global
deploy_timeout = release.config.values.get('DEIS_DEPLOY_TIMEOUT', settings.DEIS_DEPLOY_TIMEOUT) # noqa

# get application level pod termination grace period
pod_termination_grace_period_seconds = release.config.values.get('KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS', settings.KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS) # noqa

name = self._get_job_id(scale_type) + '-' + pod_name()
self.log("{} on {} runs '{}'".format(user.username, name, command))

@@ -743,7 +753,8 @@ def pod_name(size=5, chars=string.ascii_lowercase + string.digits):
'registry': release.config.registry,
'version': version,
'build_type': release.build.type,
'deploy_timeout': deploy_timeout
'deploy_timeout': deploy_timeout,
'pod_termination_grace_period_seconds': pod_termination_grace_period_seconds,
}

try:
@@ -110,8 +110,8 @@ def manifest(self, namespace, name, image, **kwargs):
# apply tags as needed to restrict pod to particular node(s)
spec['nodeSelector'] = kwargs.get('tags', {})

# How long until a pod is forcefully terminated
spec['terminationGracePeriodSeconds'] = settings.KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS # noqa
# How long until a pod is forcefully terminated. 30 is kubernetes default
spec['terminationGracePeriodSeconds'] = kwargs.get('pod_termination_grace_period_seconds', 30) # noqa

# set the image pull policy that is associated with the application container
kwargs['image_pull_policy'] = settings.DOCKER_BUILDER_IMAGE_PULL_POLICY
@@ -384,14 +384,20 @@ def _set_image_secret(self, data, namespace, **kwargs):
data['imagePullSecrets'] = [{'name': secret_name}]

def delete(self, namespace, name):
# get timeout info from pod
pod = self.pod.get(namespace, name).json()
# 30 seconds is the kubernetes default
timeout = pod['spec'].get('terminationGracePeriodSeconds', 30)

# delete pod
url = self.api("/namespaces/{}/pods/{}", namespace, name)
resp = self.session.delete(url)
if self.unhealthy(resp.status_code):
raise KubeHTTPException(resp, 'delete Pod "{}" in Namespace "{}"', name, namespace)

# Verify the pod has been deleted
# Only wait as long as the grace period is - k8s will eventually GC
for _ in range(settings.KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS):
for _ in range(timeout):
try:
pod = self.pod.get(namespace, name).json()
# hide pod if it is passed the graceful termination period
@@ -630,7 +636,15 @@ def wait_until_terminated(self, namespace, labels, current, desired):
# https://github.com/kubernetes/kubernetes/blob/release-1.2/docs/devel/api-conventions.md#metadata
# http://kubernetes.io/docs/user-guide/pods/#termination-of-pods

timeout = settings.KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS
# fetch timeout from the first pod
pods = self.get(namespace, labels=labels).json()
if not pods['items']:
return

spec = pods['items'][0]['spec']
# default to 30 since that's kubernetes default
timeout = spec.get('terminationGracePeriodSeconds', 30)

delta = current - desired
self.log(namespace, "waiting for {} pods to be terminated ({}s timeout)".format(delta, timeout)) # noqa
for waited in range(timeout):
@@ -21,6 +21,7 @@ def create(self, namespace=None, name=generate_random_name(), **kwargs):
'app_type': kwargs.get('app_type', 'web'),
'version': kwargs.get('version', 'v99'),
'replicas': kwargs.get('replicas', 4),
'pod_termination_grace_period_seconds': 2,
}

deployment = self.scheduler.deployment.create(namespace, name, 'quay.io/fake/image',
@@ -38,6 +39,7 @@ def update(self, namespace=None, name=generate_random_name(), **kwargs):
'app_type': kwargs.get('app_type', 'web'),
'version': kwargs.get('version', 'v99'),
'replicas': kwargs.get('replicas', 4),
'pod_termination_grace_period_seconds': 2,
}

deployment = self.scheduler.deployment.update(namespace, name, 'quay.io/fake/image',
@@ -56,6 +58,7 @@ def scale(self, namespace=None, name=generate_random_name(), **kwargs):
'app_type': kwargs.get('app_type', 'web'),
'version': kwargs.get('version', 'v99'),
'replicas': kwargs.get('replicas', 4),
'pod_termination_grace_period_seconds': 2,
}

self.scheduler.scale(namespace, name, 'quay.io/fake/image', 'sh', 'start', **kwargs)
@@ -23,6 +23,7 @@ def create(self, namespace=None, name=generate_random_name(), **kwargs):
'app_type': kwargs.get('app_type', 'web'),
'version': kwargs.get('version', 'v99'),
'replicas': kwargs.get('replicas', 1),
'pod_termination_grace_period_seconds': 2,
}

# create a Deployment to test HPA with
@@ -68,6 +69,7 @@ def update_deployment(self, namespace=None, name=generate_random_name(), **kwarg
'app_type': kwargs.get('app_type', 'web'),
'version': kwargs.get('version', 'v99'),
'replicas': kwargs.get('replicas', 4),
'pod_termination_grace_period_seconds': 2,
}

deployment = self.scheduler.deployment.update(namespace, name, 'quay.io/fake/image',
@@ -21,6 +21,7 @@ def create(self, namespace=None, name=generate_random_name(), **kwargs):
'app_type': kwargs.get('app_type', 'web'),
'version': kwargs.get('version', 'v99'),
'replicas': kwargs.get('replicas', 4),
'pod_termination_grace_period_seconds': 2,
}

rc = self.scheduler.rc.create(namespace, name, 'quay.io/fake/image',
@@ -39,7 +40,8 @@ def scale_rc(self, namespace=None, name=generate_random_name(), **kwargs):
'app_type': kwargs.get('app_type', 'web'),
'version': kwargs.get('version', 'v99'),
'replicas': kwargs.get('replicas', 4),
'deploy_timeout': 120
'deploy_timeout': 120,
'pod_termination_grace_period_seconds': 2,
}

self.scheduler.scale_rc(namespace, name, 'quay.io/fake/image', 'sh', 'start', **kwargs)

0 comments on commit 689df78

Please sign in to comment.
You can’t perform that action at this time.