From a45643c95992e67c96621047763b4a7aab0e368e Mon Sep 17 00:00:00 2001 From: helgi Date: Tue, 30 Aug 2016 12:35:38 -0700 Subject: [PATCH] fix(app): rollback all process types to previous version when one (or more) process type fails a deploy Previously only the failed process type would roll itself back, resulting in a scenario where worker may be on v6 and web on v5 This moves away from using the built in rollback functionality in Deployments and rather deploys the previous release again to get the same effect. Rollback in Deployments was taking it to the last good known deployment, in this case that'd be the latest for some types... knowing the revision of the last deploy would be hard as well. The way it works when deploying the old release is an identical replicaset (and thus identical template pod hash is generated) so things will very much so look like a native rollback. If the Controller has done any changes to how it constructs the pod manifest then this could generate a totally new ReplicaSet but that is also fine as it will be booting the previous release from DB Fixes #1013 --- rootfs/api/models/app.py | 18 ++++++++++++++++-- rootfs/scheduler/__init__.py | 7 ------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/rootfs/api/models/app.py b/rootfs/api/models/app.py index ea05e761d..f9e6c1458 100644 --- a/rootfs/api/models/app.py +++ b/rootfs/api/models/app.py @@ -470,7 +470,7 @@ def _scale_pods(self, scale_types): self.log(err, logging.ERROR) raise ServiceUnavailable(err) from e - def deploy(self, release, force_deploy=False): + def deploy(self, release, force_deploy=False, rollback_on_failure=True): # noqa """ Deploy a new release to this application @@ -569,8 +569,22 @@ def deploy(self, release, force_deploy=False): ) for scale_type, kwargs in deploys.items() ] - async_run(tasks) + try: + async_run(tasks) + except KubeException as e: + if rollback_on_failure: + err = 'There was a problem deploying {}. Rolling back process types to release {}.'.format(version, "v{}".format(release.previous().version)) # noqa + # This goes in the log before the rollback starts + self.log(err, logging.ERROR) + # revert all process types to old release + self.deploy(release.previous(), force_deploy=True, rollback_on_failure=False) + # let it bubble up + raise DeisException('{}\n{}'.format(err, str(e))) from e + + # otherwise just re-raise + raise except Exception as e: + # This gets shown to the end user err = '(app::deploy): {}'.format(e) self.log(err, logging.ERROR) raise ServiceUnavailable(err) from e diff --git a/rootfs/scheduler/__init__.py b/rootfs/scheduler/__init__.py index 562671749..cadab0040 100644 --- a/rootfs/scheduler/__init__.py +++ b/rootfs/scheduler/__init__.py @@ -181,15 +181,8 @@ def deploy(self, namespace, name, image, entrypoint, command, **kwargs): # noqa namespace, name, image, entrypoint, command, **kwargs ) except KubeException as e: - # rollback to the previous Deployment - kwargs['rollback'] = True - self.deployment.update( - namespace, name, image, entrypoint, command, **kwargs - ) - raise KubeException( 'There was a problem while deploying {} of {}-{}. ' - 'Going back to the previous release. ' "Additional information:\n{}".format(version, namespace, app_type, str(e)) ) from e