From 56b32d41331737a5c92cc5089554b73e0e059270 Mon Sep 17 00:00:00 2001 From: helgi Date: Tue, 16 Aug 2016 17:11:54 -0700 Subject: [PATCH] ref(scheduler): split up the scheduler code into individual resources and to be modular This adds a resources package which contains all Kubernetes resources as their own individual classes. Each resource is self registering via the ResourceRegistry metaclass, this allows the base Resource class to know what resources are available and expose that up into KubeHTTPClient Each resource can specify their own api prefix and version, which allows us to support resources moving between API endpoints (see HPA, a new resource). To make it easy for resource to interact with one another each resource and the KubeHTTPClient can access other resources via `self.hpa.create()` or `self.pod.get()`, the short form (if a resource supports that) works, singular and plural also work via some intelligent mapping `version()` was added to let Resources find out what Kubernetes version they are dealing with. Closes #875 --- rootfs/api/models/__init__.py | 6 +- rootfs/api/models/app.py | 44 +- rootfs/api/models/certificate.py | 10 +- rootfs/api/models/config.py | 2 +- rootfs/api/models/release.py | 22 +- rootfs/api/tests/test_app.py | 8 +- rootfs/api/tests/test_app_settings.py | 10 +- rootfs/api/tests/test_domain.py | 8 +- rootfs/api/tests/test_pods.py | 4 +- rootfs/scheduler/__init__.py | 1744 ++--------------- rootfs/scheduler/exceptions.py | 16 + rootfs/scheduler/mock.py | 140 +- rootfs/scheduler/resources/__init__.py | 10 + rootfs/scheduler/resources/__resource.py | 34 + rootfs/scheduler/resources/deployment.py | 344 ++++ .../resources/horizontalpodautoscaler.py | 154 ++ rootfs/scheduler/resources/namespace.py | 62 + rootfs/scheduler/resources/node.py | 27 + rootfs/scheduler/resources/pod.py | 709 +++++++ rootfs/scheduler/resources/replicaset.py | 29 + .../resources/replicationcontroller.py | 135 ++ rootfs/scheduler/resources/secret.py | 115 ++ rootfs/scheduler/resources/service.py | 88 + rootfs/scheduler/tests/__init__.py | 5 +- rootfs/scheduler/tests/test_deployments.py | 43 +- .../tests/test_horizontalpodautoscaler.py | 203 ++ rootfs/scheduler/tests/test_namespaces.py | 10 +- rootfs/scheduler/tests/test_nodes.py | 19 +- .../tests/test_replicationcontrollers.py | 20 +- rootfs/scheduler/tests/test_scheduler.py | 35 +- rootfs/scheduler/tests/test_secrets.py | 26 +- rootfs/scheduler/tests/test_services.py | 24 +- 32 files changed, 2320 insertions(+), 1786 deletions(-) create mode 100644 rootfs/scheduler/exceptions.py create mode 100644 rootfs/scheduler/resources/__init__.py create mode 100644 rootfs/scheduler/resources/__resource.py create mode 100644 rootfs/scheduler/resources/deployment.py create mode 100644 rootfs/scheduler/resources/horizontalpodautoscaler.py create mode 100644 rootfs/scheduler/resources/namespace.py create mode 100644 rootfs/scheduler/resources/node.py create mode 100644 rootfs/scheduler/resources/pod.py create mode 100644 rootfs/scheduler/resources/replicaset.py create mode 100644 rootfs/scheduler/resources/replicationcontroller.py create mode 100644 rootfs/scheduler/resources/secret.py create mode 100644 rootfs/scheduler/resources/service.py create mode 100644 rootfs/scheduler/tests/test_horizontalpodautoscaler.py diff --git a/rootfs/api/models/__init__.py b/rootfs/api/models/__init__.py index 5e1e380d5..448a9e168 100644 --- a/rootfs/api/models/__init__.py +++ b/rootfs/api/models/__init__.py @@ -47,12 +47,12 @@ class Meta: @property def _scheduler(self): mod = importlib.import_module(settings.SCHEDULER_MODULE) - return mod.SchedulerClient() + return mod.SchedulerClient(settings.SCHEDULER_URL) def _fetch_service_config(self, app): try: # Get the service from k8s to attach the domain correctly - svc = self._scheduler.get_service(app, app).json() + svc = self._scheduler.svc.get(app, app).json() except KubeException as e: raise ServiceUnavailable('Could not fetch Kubernetes Service {}'.format(app)) from e @@ -93,7 +93,7 @@ def _save_service_config(self, app, component, data): # Update the k8s service for the application with new service information try: - self._scheduler.update_service(app, app, svc) + self._scheduler.svc.update(app, app, svc) except KubeException as e: raise ServiceUnavailable('Could not update Kubernetes Service {}'.format(app)) from e diff --git a/rootfs/api/models/app.py b/rootfs/api/models/app.py index 58b1d5043..1b060d541 100644 --- a/rootfs/api/models/app.py +++ b/rootfs/api/models/app.py @@ -104,7 +104,7 @@ def save(self, *args, **kwargs): self.release_set.latest() except Release.DoesNotExist: try: - if self._scheduler.get_namespace(self.id).status_code == 200: + if self._scheduler.ns.get(self.id).status_code == 200: # Namespace already exists err = "{} already exists as a namespace in this kuberenetes setup".format(self.id) # noqa self.log(err, logging.INFO) @@ -203,18 +203,18 @@ def create(self, *args, **kwargs): # noqa self.log('creating Namespace {} and services'.format(namespace), level=logging.DEBUG) # Create essential resources try: - self._scheduler.get_namespace(namespace) + self._scheduler.ns.get(namespace) except KubeException: - self._scheduler.create_namespace(namespace) + self._scheduler.ns.create(namespace) try: - self._scheduler.get_service(namespace, service) + self._scheduler.svc.get(namespace, service) except KubeException: - self._scheduler.create_service(namespace, service) + self._scheduler.svc.create(namespace, service) except KubeException as e: # Blow it all away only if something horrible happens try: - self._scheduler.delete_namespace(namespace) + self._scheduler.ns.delete(namespace) except KubeException as e: # Just feed into the item below raise ServiceUnavailable('Could not delete the Namespace in Kubernetes') from e @@ -234,12 +234,12 @@ def delete(self, *args, **kwargs): """Delete this application including all containers""" self.log("deleting environment") try: - self._scheduler.delete_namespace(self.id) + self._scheduler.ns.delete(self.id) # wait 30 seconds for termination for _ in range(30): try: - self._scheduler.get_namespace(self.id) + self._scheduler.ns.get(self.id) except KubeException: break except KubeException as e: @@ -264,7 +264,7 @@ def restart(self, **kwargs): # noqa desired = 0 labels = self._scheduler_filter(**kwargs) # fetch RS (which represent Deployments) - controllers = self._scheduler.get_replicasets(kwargs['id'], labels=labels) + controllers = self._scheduler.rs.get(kwargs['id'], labels=labels) for controller in controllers.json()['items']: desired += controller['spec']['replicas'] @@ -275,7 +275,7 @@ def restart(self, **kwargs): # noqa try: tasks = [ functools.partial( - self._scheduler.delete_pod, + self._scheduler.pod.delete, self.id, pod['name'] ) for pod in self.list_pods(**kwargs) @@ -577,7 +577,7 @@ def _check_deployment_in_progress(self, deploys, force_deploy=False): for scale_type, kwargs in deploys.items(): # Is there an existing deployment in progress? name = self._get_job_id(scale_type) - in_progress, deploy_okay = self._scheduler.deployment_in_progress( + in_progress, deploy_okay = self._scheduler.deployment.in_progress( self.id, name, kwargs.get("deploy_timeout"), kwargs.get("deploy_batches"), kwargs.get("replicas"), kwargs.get("tags") ) @@ -768,9 +768,9 @@ def list_pods(self, *args, **kwargs): # in case a singular pod is requested if 'name' in kwargs: - pods = [self._scheduler.get_pod(self.id, kwargs['name']).json()] + pods = [self._scheduler.pod.get(self.id, kwargs['name']).json()] else: - pods = self._scheduler.get_pods(self.id, labels=labels).json()['items'] + pods = self._scheduler.pod.get(self.id, labels=labels).json()['items'] data = [] for p in pods: @@ -779,14 +779,14 @@ def list_pods(self, *args, **kwargs): if labels['type'] == 'run': continue - state = str(self._scheduler.pod_state(p)) + state = str(self._scheduler.pod.state(p)) # follows kubelete convention - these are hidden unless show-all is set if state in ['down', 'crashed']: continue # hide pod if it is passed the graceful termination period - if self._scheduler.pod_deleted(p): + if self._scheduler.pod.deleted(p): continue item = Pod() @@ -862,9 +862,9 @@ def maintenance_mode(self, mode): try: service['metadata']['annotations']['router.deis.io/maintenance'] = str(mode).lower() - self._scheduler.update_service(self.id, self.id, data=service) + self._scheduler.svc.update(self.id, self.id, data=service) except KubeException as e: - self._scheduler.update_service(self.id, self.id, data=old_service) + self._scheduler.svc.update(self.id, self.id, data=old_service) raise ServiceUnavailable(str(e)) from e def routable(self, routable): @@ -876,9 +876,9 @@ def routable(self, routable): try: service['metadata']['labels']['router.deis.io/routable'] = str(routable).lower() - self._scheduler.update_service(self.id, self.id, data=service) + self._scheduler.svc.update(self.id, self.id, data=service) except KubeException as e: - self._scheduler.update_service(self.id, self.id, data=old_service) + self._scheduler.svc.update(self.id, self.id, data=old_service) raise ServiceUnavailable(str(e)) from e def _update_application_service(self, namespace, app_type, port, routable=False, annotations={}): # noqa @@ -907,10 +907,10 @@ def _update_application_service(self, namespace, app_type, port, routable=False, # port 80 is the only one we care about right now service['spec']['ports'][pos]['targetPort'] = int(port) - self._scheduler.update_service(namespace, namespace, data=service) + self._scheduler.svc.update(namespace, namespace, data=service) except Exception as e: # Fix service to old port and app type - self._scheduler.update_service(namespace, namespace, data=old_service) + self._scheduler.svc.update(namespace, namespace, data=old_service) raise KubeException(str(e)) from e def whitelist(self, whitelist): @@ -922,6 +922,6 @@ def whitelist(self, whitelist): try: addresses = ",".join(address for address in whitelist) service['metadata']['annotations']['router.deis.io/whitelist'] = addresses - self._scheduler.update_service(self.id, self.id, data=service) + self._scheduler.svc.update(self.id, self.id, data=service) except KubeException as e: raise ServiceUnavailable(str(e)) from e diff --git a/rootfs/api/models/certificate.py b/rootfs/api/models/certificate.py index e65e7701d..efac963e0 100644 --- a/rootfs/api/models/certificate.py +++ b/rootfs/api/models/certificate.py @@ -183,14 +183,14 @@ def attach_in_kubernetes(self, domain): 'tls.key': self.key } - secret = self._scheduler.get_secret(namespace, name).json()['data'] + secret = self._scheduler.secret.get(namespace, name).json()['data'] except KubeException: - self._scheduler.create_secret(namespace, name, data) + self._scheduler.secret.create(namespace, name, data) else: # update cert secret to the TLS Ingress format if required if secret != data: try: - self._scheduler.update_secret(namespace, name, data) + self._scheduler.secret.update(namespace, name, data) except KubeException as e: msg = 'There was a problem updating the certificate secret ' \ '{} for {}'.format(name, namespace) @@ -225,8 +225,8 @@ def detach(self, *args, **kwargs): if len(self.domains) == 0: try: # We raise an exception when a secret doesn't exist - self._scheduler.get_secret(namespace, name) - self._scheduler.delete_secret(namespace, name) + self._scheduler.secret.get(namespace, name) + self._scheduler.secret.delete(namespace, name) except KubeException as e: raise ServiceUnavailable("Could not delete certificate secret {} for application {}".format(name, namespace)) from e # noqa diff --git a/rootfs/api/models/config.py b/rootfs/api/models/config.py index 7a071ad3e..a64622680 100644 --- a/rootfs/api/models/config.py +++ b/rootfs/api/models/config.py @@ -94,7 +94,7 @@ def set_tags(self, previous_config): return # Get all nodes with label selectors - nodes = self._scheduler.get_nodes(labels=self.tags).json() + nodes = self._scheduler.node.get(labels=self.tags).json() if nodes['items']: return diff --git a/rootfs/api/models/release.py b/rootfs/api/models/release.py index ca5d4b365..26edb95ad 100644 --- a/rootfs/api/models/release.py +++ b/rootfs/api/models/release.py @@ -259,7 +259,7 @@ def cleanup_old(self): # noqa # Cleanup controllers labels = {'heritage': 'deis'} controller_removal = [] - controllers = self._scheduler.get_rcs(self.app.id, labels=labels).json() + controllers = self._scheduler.rc.get(self.app.id, labels=labels).json() for controller in controllers['items']: current_version = controller['metadata']['labels']['version'] # skip the latest release @@ -286,20 +286,20 @@ def cleanup_old(self): # noqa 'app': self.app.id, 'type': 'env', } - secrets = self._scheduler.get_secrets(self.app.id, labels=labels).json() + secrets = self._scheduler.secret.get(self.app.id, labels=labels).json() for secret in secrets['items']: current_version = secret['metadata']['labels']['version'] # skip the latest release if current_version == latest_version: continue - self._scheduler.delete_secret(self.app.id, secret['metadata']['name']) + self._scheduler.secret.delete(self.app.id, secret['metadata']['name']) # Remove stray pods labels = {'heritage': 'deis'} - pods = self._scheduler.get_pods(self.app.id, labels=labels).json() + pods = self._scheduler.pod.get(self.app.id, labels=labels).json() for pod in pods['items']: - if self._scheduler.pod_deleted(pod): + if self._scheduler.pod.deleted(pod): continue current_version = pod['metadata']['labels']['version'] @@ -308,7 +308,7 @@ def cleanup_old(self): # noqa continue try: - self._scheduler.delete_pod(self.app.id, pod['metadata']['name']) + self._scheduler.pod.delete(self.app.id, pod['metadata']['name']) except KubeHTTPException as e: # Sometimes k8s will manage to remove the pod from under us if e.response.status_code == 404: @@ -329,7 +329,7 @@ def _cleanup_deployment_secrets_and_configs(self, namespace): # Find all ReplicaSets versions = [] labels = {'heritage': 'deis', 'app': namespace} - replicasets = self._scheduler.get_replicasets(namespace, labels=labels).json() + replicasets = self._scheduler.rs.get(namespace, labels=labels).json() for replicaset in replicasets['items']: if ( 'version' not in replicaset['metadata']['labels'] or @@ -348,9 +348,9 @@ def _cleanup_deployment_secrets_and_configs(self, namespace): 'version__notin': versions } self.app.log('Cleaning up orphaned env var secrets for application {}'.format(namespace), level=logging.DEBUG) # noqa - secrets = self._scheduler.get_secrets(namespace, labels=labels).json() + secrets = self._scheduler.secret.get(namespace, labels=labels).json() for secret in secrets['items']: - self._scheduler.delete_secret(namespace, secret['metadata']['name']) + self._scheduler.secret.delete(namespace, secret['metadata']['name']) def _delete_release_in_scheduler(self, namespace, version): """ @@ -368,14 +368,14 @@ def _delete_release_in_scheduler(self, namespace, version): # see if the app config has deploy timeout preference, otherwise use global deploy_timeout = self.config.values.get('DEIS_DEPLOY_TIMEOUT', settings.DEIS_DEPLOY_TIMEOUT) # noqa - controllers = self._scheduler.get_rcs(namespace, labels=labels).json() + controllers = self._scheduler.rc.get(namespace, labels=labels).json() for controller in controllers['items']: self._scheduler.cleanup_release(namespace, controller, deploy_timeout) # remove secret that contains env vars for the release try: secret_name = "{}-{}-env".format(namespace, version) - self._scheduler.delete_secret(namespace, secret_name) + self._scheduler.secret.delete(namespace, secret_name) except KubeHTTPException: pass diff --git a/rootfs/api/tests/test_app.py b/rootfs/api/tests/test_app.py index 3e49d2207..fb0c1f3e3 100644 --- a/rootfs/api/tests/test_app.py +++ b/rootfs/api/tests/test_app.py @@ -373,22 +373,22 @@ def test_app_exists_in_kubernetes(self, mock_requests): def test_app_create_failure_kubernetes_create(self, mock_requests): """ - Create an app but have scheduler.create_service fail with an exception + Create an app but have scheduler.svc.create fail with an exception """ - with mock.patch('scheduler.KubeHTTPClient.create_service') as mock_kube: + with mock.patch('scheduler.resources.service.Service.create') as mock_kube: mock_kube.side_effect = KubeException('Boom!') response = self.client.post('/v2/apps') self.assertEqual(response.status_code, 503, response.data) def test_app_delete_failure_kubernetes_destroy(self, mock_requests): """ - Create an app and then delete but have scheduler.delete_namespace + Create an app and then delete but have scheduler.ns.delete fail with an exception """ # create app_id = self.create_app() - with mock.patch('scheduler.KubeHTTPClient.delete_namespace') as mock_kube: + with mock.patch('scheduler.resources.namespace.Namespace.delete') as mock_kube: # delete mock_kube.side_effect = KubeException('Boom!') response = self.client.delete('/v2/apps/{}'.format(app_id)) diff --git a/rootfs/api/tests/test_app_settings.py b/rootfs/api/tests/test_app_settings.py index d048108a6..ce759e3a4 100644 --- a/rootfs/api/tests/test_app_settings.py +++ b/rootfs/api/tests/test_app_settings.py @@ -72,10 +72,8 @@ def test_settings_routable(self, mock_requests): Create an application with the routable flag turned on or off """ # create app, expecting routable to be true - body = {'id': 'myid'} - response = self.client.post('/v2/apps', body) - self.assertEqual(response.status_code, 201, response.data) - app = App.objects.get(id='myid') + app_id = self.create_app() + app = App.objects.get(id=app_id) self.assertTrue(app.appsettings_set.latest().routable) # Set routable to false response = self.client.post( @@ -163,8 +161,8 @@ def test_kubernetes_service_failure(self, mock_requests): """ app_id = self.create_app() - # scheduler.update_service exception - with mock.patch('scheduler.KubeHTTPClient.update_service') as mock_kube: + # scheduler.svc.update exception + with mock.patch('scheduler.resources.service.Service.update') as mock_kube: mock_kube.side_effect = KubeException('Boom!') addresses = ["2.3.4.5"] url = '/v2/apps/{}/whitelist'.format(app_id) diff --git a/rootfs/api/tests/test_domain.py b/rootfs/api/tests/test_domain.py index ffd3baff0..47d831eb7 100644 --- a/rootfs/api/tests/test_domain.py +++ b/rootfs/api/tests/test_domain.py @@ -325,16 +325,16 @@ def test_kubernetes_service_failure(self): """ app_id = self.create_app() - # scheduler.get_service exception - with mock.patch('scheduler.KubeHTTPClient.get_service') as mock_kube: + # scheduler.svc.get exception + with mock.patch('scheduler.resources.service.Service.get') as mock_kube: mock_kube.side_effect = KubeException('Boom!') domain = 'foo.com' url = '/v2/apps/{}/domains'.format(app_id) response = self.client.post(url, {'domain': domain}) self.assertEqual(response.status_code, 503, response.data) - # scheduler.update_service exception - with mock.patch('scheduler.KubeHTTPClient.update_service') as mock_kube: + # scheduler.svc.update exception + with mock.patch('scheduler.resources.service.Service.update') as mock_kube: domain = 'foo.com' url = '/v2/apps/{}/domains'.format(app_id) response = self.client.post(url, {'domain': domain}) diff --git a/rootfs/api/tests/test_pods.py b/rootfs/api/tests/test_pods.py index 59e8c8ad7..1853ed591 100644 --- a/rootfs/api/tests/test_pods.py +++ b/rootfs/api/tests/test_pods.py @@ -792,8 +792,8 @@ def test_list_pods_failure(self, mock_requests): app_id = self.create_app() - with mock.patch('scheduler.KubeHTTPClient.get_pod') as kube_pod: - with mock.patch('scheduler.KubeHTTPClient.get_pods') as kube_pods: + with mock.patch('scheduler.resources.pod.Pod.get') as kube_pod: + with mock.patch('scheduler.resources.pod.Pod.get') as kube_pods: kube_pod.side_effect = KubeException('boom!') kube_pods.side_effect = KubeException('boom!') url = "/v2/apps/{app_id}/pods".format(**locals()) diff --git a/rootfs/scheduler/__init__.py b/rootfs/scheduler/__init__.py index 950e23153..562671749 100644 --- a/rootfs/scheduler/__init__.py +++ b/rootfs/scheduler/__init__.py @@ -1,52 +1,18 @@ from collections import OrderedDict -from datetime import datetime, timedelta -import json +from datetime import datetime import logging -import operator -import os -import time -from urllib.parse import urljoin -import base64 - -from django.conf import settings -from docker.auth import auth as docker_auth -from .states import PodState import requests from requests_toolbelt import user_agent -from .utils import dict_merge +import time from api import __version__ as deis_version +from scheduler.exceptions import KubeException, KubeHTTPException # noqa +from scheduler.states import PodState logger = logging.getLogger(__name__) - - -class KubeException(Exception): - def __init__(self, *args, **kwargs): - Exception.__init__(self, *args, **kwargs) - - -class KubeHTTPException(KubeException): - def __init__(self, response, errmsg, *args, **kwargs): - self.response = response - - msg = errmsg.format(*args) - msg = "failed to {}: {} {}".format( - msg, - response.status_code, - response.reason - ) - KubeException.__init__(self, msg, *args, **kwargs) - - -def unhealthy(status_code): - if not 200 <= status_code <= 299: - return True - - return False - - session = None +resource_mapping = OrderedDict() def get_session(): @@ -65,15 +31,104 @@ def get_session(): class KubeHTTPClient(object): - # used as the basis to check if a pod is ready - deploy_timeout = 120 - apiversion = "v1" + # ISO-8601 which is used by kubernetes + DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%SZ' - def __init__(self): - self.url = settings.SCHEDULER_URL + def __init__(self, url): + global resource_mapping + self.url = url self.session = get_session() - def log(self, namespace, message, level=logging.INFO): + # map the various k8s Resources to an internal property + from scheduler.resources import Resource # lazy load + for res in Resource: + name = str(res.__name__).lower() # singular + component = name + 's' # make plural + # check if component has already been processed + if component in resource_mapping: + continue + + # get past recursion problems in case of self reference + resource_mapping[component] = '' + resource_mapping[component] = res() + # map singular Resource name to the plural one + resource_mapping[name] = component + if res.short_name is not None: + # map short name to long name so a resource can be named rs + # but have the main object live at replicasets + resource_mapping[str(res.short_name).lower()] = component + + def __getattr__(self, name): + global resource_mapping + if name in resource_mapping: + # resolve to final name if needed + component = resource_mapping[name] + if type(component) is not str: + # already a component object + return component + + return resource_mapping[component] + + return object.__getattribute__(self, name) + + def version(self): + """Get Kubernetes version as a float""" + response = self.session.get(self.url + '/version') + if self.unhealthy(response.status_code): + raise KubeHTTPException(response, 'fetching Kubernetes version') + + data = response.json() + return float('{}.{}'.format(data['major'], data['minor'])) + + @staticmethod + def parse_date(date): + return datetime.strptime(date, KubeHTTPClient.DATETIME_FORMAT) + + @staticmethod + def unhealthy(status_code): + return not 200 <= status_code <= 299 + + @staticmethod + def query_params(labels=None, fields=None, resource_version=None, pretty=False): + query = {} + + # labels and fields are encoded slightly differently than python-requests can do + if labels: + selectors = [] + for key, value in labels.items(): + # http://kubernetes.io/docs/user-guide/labels/#set-based-requirement + if '__notin' in key: + key = key.replace('__notin', '') + selectors.append('{} notin({})'.format(key, ','.join(value))) + # list is automagically a in() + elif '__in' in key or isinstance(value, list): + key = key.replace('__in', '') + selectors.append('{} in({})'.format(key, ','.join(value))) + elif value is None: + # allowing a check if a label exists (or not) without caring about value + selectors.append(key) + # http://kubernetes.io/docs/user-guide/labels/#equality-based-requirement + elif isinstance(value, str): + selectors.append('{}={}'.format(key, value)) + + query['labelSelector'] = ','.join(selectors) + + if fields: + fields = ['{}={}'.format(key, value) for key, value in fields.items()] + query['fieldSelector'] = ','.join(fields) + + # Which resource version to start from. Otherwise starts from the beginning + if resource_version: + query['resourceVersion'] = resource_version + + # If output should pretty print, only True / False allowed + if pretty: + query['pretty'] = pretty + + return query + + @staticmethod + def log(namespace, message, level='INFO'): """Logs a message in the context of this application. This prefixes log messages with a namespace "tag". @@ -81,11 +136,11 @@ def log(self, namespace, message, level=logging.INFO): sort like releasing or scaling, will be considered as "belonging" to the application instead of the controller and will be handled accordingly. """ - logger.log(level, "[{}]: {}".format(namespace, message)) + lvl = getattr(logging, level.upper()) if hasattr(logging, level.upper()) else logging.INFO + logger.log(lvl, "[{}]: {}".format(namespace, message)) def deploy(self, namespace, name, image, entrypoint, command, **kwargs): # noqa """Deploy Deployment depending on what's requested""" - self.deploy_timeout = kwargs.get('deploy_timeout') app_type = kwargs.get('app_type') version = kwargs.get('version') @@ -93,7 +148,7 @@ def deploy(self, namespace, name, image, entrypoint, command, **kwargs): # noqa try: # construct old school RC name rc_name = '{}-{}-{}'.format(namespace, version, app_type) - self.get_rc(namespace, rc_name) + self.rc.get(namespace, rc_name) self.log(namespace, 'RC {} already exists. Stopping deploy'.format(rc_name)) return except KubeHTTPException: @@ -110,25 +165,25 @@ def deploy(self, namespace, name, image, entrypoint, command, **kwargs): # noqa 'heritage': 'deis', } # this depends on the deployment object having the latest information - deployment = self.get_deployment(namespace, name).json() + deployment = self.deployment.get(namespace, name).json() if deployment['spec']['template']['metadata']['labels'] == labels: self.log(namespace, 'Deployment {} with release {} already exists. Stopping deploy'.format(name, version)) # noqa return except KubeException: # create the initial deployment object (and the first revision) - self.create_deployment( + self.deployment.create( namespace, name, image, entrypoint, command, **kwargs ) else: try: # kick off a new revision of the deployment - self.update_deployment( + self.deployment.update( namespace, name, image, entrypoint, command, **kwargs ) except KubeException as e: # rollback to the previous Deployment kwargs['rollback'] = True - self.update_deployment( + self.deployment.update( namespace, name, image, entrypoint, command, **kwargs ) @@ -145,154 +200,42 @@ def cleanup_release(self, namespace, controller, timeout): # Deployment takes care of this in the API, RC does not # Have the RC scale down pods and delete itself self._scale_rc(namespace, controller['metadata']['name'], 0, timeout) - self.delete_rc(namespace, controller['metadata']['name']) + self.rc.delete(namespace, controller['metadata']['name']) # Remove stray pods that the scale down will have missed (this can occassionally happen) - pods = self.get_pods(namespace, labels=controller['metadata']['labels']).json() + pods = self.pod.get(namespace, labels=controller['metadata']['labels']).json() for pod in pods['items']: - if self.pod_deleted(pod): + if self.pod.deleted(pod): continue try: - self.delete_pod(namespace, pod['metadata']['name']) + self.pod.delete(namespace, pod['metadata']['name']) except KubeHTTPException as e: # Sometimes k8s will manage to remove the pod from under us if e.response.status_code == 404: continue - def _get_deploy_steps(self, batches, tags): - # if there is no batch information available default to available nodes for app - if not batches: - # figure out how many nodes the application can go on - steps = len(self.get_nodes(labels=tags).json()['items']) - else: - steps = int(batches) - - return steps - - def _get_deploy_batches(self, steps, desired): - # figure out what kind of batches the deploy is done in - 1 in, 1 out or higher - if desired < steps: - # do it all in one go - batches = [desired] - else: - # figure out the stepped deploy count and then see if there is a leftover - batches = [steps for n in set(range(1, (desired + 1))) if n % steps == 0] - if desired - sum(batches) > 0: - batches.append(desired - sum(batches)) - - return batches - def scale(self, namespace, name, image, entrypoint, command, **kwargs): """Scale Deployment""" - self.deploy_timeout = kwargs.get('deploy_timeout') - try: - self.get_deployment(namespace, name) + self.deployment.get(namespace, name) except KubeHTTPException as e: if e.response.status_code == 404: # create missing deployment - deleted if it fails try: - self.create_deployment(namespace, name, image, entrypoint, command, **kwargs) + self.deployment.create(namespace, name, image, entrypoint, command, **kwargs) except KubeException: # see if the deployment got created try: - self.get_deployment(namespace, name) + self.deployment.get(namespace, name) except KubeHTTPException as e: if e.response.status_code != 404: - self.delete_deployment(namespace, name) + self.deployment.delete(namespace, name) raise # let the scale failure bubble up - self._scale_deployment(namespace, name, image, entrypoint, command, **kwargs) - - def _build_pod_manifest(self, namespace, name, image, **kwargs): - app_type = kwargs.get('app_type') - build_type = kwargs.get('build_type') - - # labels that represent the pod(s) - labels = { - 'app': namespace, - 'version': kwargs.get('version'), - 'type': app_type, - 'heritage': 'deis', - } - - # create base pod structure - manifest = { - 'kind': 'Pod', - 'apiVersion': 'v1', - 'metadata': { - 'name': name, - 'labels': labels - }, - 'spec': {} - } - - # pod manifest spec - spec = manifest['spec'] - - # what should the pod do if it exits - spec['restartPolicy'] = kwargs.get('restartPolicy', 'Always') - - # apply tags as needed to restrict pod to particular node(s) - spec['nodeSelector'] = kwargs.get('tags', {}) - - # How long until a pod is forcefully terminated - spec['terminationGracePeriodSeconds'] = settings.KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS # noqa - - # set the image pull policy that is associated with the application container - kwargs['image_pull_policy'] = settings.DOCKER_BUILDER_IMAGE_PULL_POLICY - - # Check if it is a slug builder image. - if build_type == "buildpack": - # only buildpack apps need access to object storage - try: - self.get_secret(namespace, 'objectstorage-keyfile') - except KubeException: - secret = self.get_secret('deis', 'objectstorage-keyfile').json() - self.create_secret(namespace, 'objectstorage-keyfile', secret['data']) - - # add the required volume to the top level pod spec - spec['volumes'] = [{ - 'name': 'objectstorage-keyfile', - 'secret': { - 'secretName': 'objectstorage-keyfile' - } - }] - - # added to kwargs to send to the container function - kwargs['volumeMounts'] = [{ - 'name': 'objectstorage-keyfile', - 'mountPath': '/var/run/secrets/deis/objectstore/creds', - 'readOnly': True - }] - - # overwrite image so slugrunner image is used in the container - image = settings.SLUGRUNNER_IMAGE - # slugrunner pull policy - kwargs['image_pull_policy'] = settings.SLUG_BUILDER_IMAGE_PULL_POLICY - - # create the base container - container = {} - - # process to call - if kwargs.get('command', []): - container['command'] = kwargs.get('command') - if kwargs.get('args', []): - container['args'] = kwargs.get('args') - - # set information to the application container - kwargs['image'] = image - container_name = namespace + '-' + app_type - self._set_container(namespace, container_name, container, **kwargs) - # add image to the mix - self._set_image_secret(spec, namespace, **kwargs) - - spec['containers'] = [container] - - return manifest + self.deployment.scale(namespace, name, image, entrypoint, command, **kwargs) def run(self, namespace, name, image, entrypoint, command, **kwargs): """Run a one-off command.""" @@ -309,17 +252,17 @@ def run(self, namespace, name, image, entrypoint, command, **kwargs): # create application config and build the pod manifest self.set_application_config(namespace, kwargs.get('envs', {}), kwargs.get('version')) - manifest = self._build_pod_manifest(namespace, name, image, **kwargs) + manifest = self.pod.manifest(namespace, name, image, **kwargs) - url = self._api("/namespaces/{}/pods", namespace) + url = self.pods.api("/namespaces/{}/pods", namespace) response = self.session.post(url, json=manifest) - if unhealthy(response.status_code): + if self.unhealthy(response.status_code): raise KubeHTTPException(response, 'create Pod in Namespace "{}"', namespace) # wait for run pod to start - use the same function as scale labels = manifest['metadata']['labels'] containers = manifest['spec']['containers'] - self._wait_until_pods_are_ready( + self.pods.wait_until_ready( namespace, containers, labels, @@ -336,9 +279,9 @@ def run(self, namespace, name, image, entrypoint, command, **kwargs): waited = 0 timeout = 1200 # 20 minutes while (state == 'up' and waited < timeout): - response = self.get_pod(namespace, name) + response = self.pod.get(namespace, name) pod = response.json() - state = str(self.pod_state(pod)) + state = str(self.pod.state(pod)) # default data exit_code = 0 @@ -356,19 +299,19 @@ def run(self, namespace, name, image, entrypoint, command, **kwargs): raise KubeException('Timed out (20 mins) while running') # check if it is possible to get logs - state = self.pod_state(self.get_pod(namespace, name).json()) + state = self.pod.state(self.pod.get(namespace, name).json()) # States below up do not have logs if not isinstance(state, PodState) or state < PodState.up: return exit_code, 'Could not get logs. Pod is in state {}'.format(str(state)) # grab log information - log = self._pod_log(namespace, name) + log = self.pod.logs(namespace, name) log.encoding = 'utf-8' # defaults to "ISO-8859-1" otherwise... return exit_code, log.text finally: # cleanup - self.delete_pod(namespace, name) + self.pod.delete(namespace, name) def set_application_config(self, namespace, envs, version): # env vars are stored in secrets and mapped to env in k8s @@ -387,374 +330,34 @@ def set_application_config(self, namespace, envs, version): secrets_env = OrderedDict(sorted(secrets_env.items(), key=lambda t: t[0])) secret_name = "{}-{}-env".format(namespace, version) - self.get_secret(namespace, secret_name) + self.secret.get(namespace, secret_name) except KubeHTTPException: - self.create_secret(namespace, secret_name, secrets_env, labels=labels) + self.secret.create(namespace, secret_name, secrets_env, labels=labels) else: - self.update_secret(namespace, secret_name, secrets_env, labels=labels) + self.secret.update(namespace, secret_name, secrets_env, labels=labels) - def _set_container(self, namespace, container_name, data, **kwargs): - """Set app container information (env, healthcheck, etc) on a Pod""" - app_type = kwargs.get('app_type') - mem = kwargs.get('memory', {}).get(app_type) - cpu = kwargs.get('cpu', {}).get(app_type) - env = kwargs.get('envs', {}) - - # container name - data['name'] = container_name - # set the image to use - data['image'] = kwargs.get('image') - # set the image pull policy for the above image - data['imagePullPolicy'] = kwargs.get('image_pull_policy') - # add in any volumes that need to be mounted into the container - data['volumeMounts'] = kwargs.get('volumeMounts', []) - - # create env list if missing - if 'env' not in data: - data['env'] = [] - - if env: - # map application configuration (env secret) to env vars - secret_name = "{}-{}-env".format(namespace, kwargs.get('version')) - for key in env.keys(): - item = { - "name": key, - "valueFrom": { - "secretKeyRef": { - "name": secret_name, - # k8s doesn't allow _ so translate to -, see above - "key": key.lower().replace('_', '-') - } - } - } - - # add value to env hash. Overwrite hardcoded values if need be - match = next((k for k, e in enumerate(data["env"]) if e['name'] == key), None) - if match is not None: - data["env"][match] = item - else: - data["env"].append(item) - - # Inject debugging if workflow is in debug mode - if os.environ.get("DEIS_DEBUG", False): - data["env"].append({ - "name": "DEIS_DEBUG", - "value": "1" - }) - - # list sorted by dict key name - data['env'].sort(key=operator.itemgetter('name')) - - if mem or cpu: - data["resources"] = {"limits": {}} - - if mem: - if mem[-2:-1].isalpha() and mem[-1].isalpha(): - mem = mem[:-1] - - # memory needs to be upper cased (only first char) - mem = mem.upper() + "i" - data["resources"]["limits"]["memory"] = mem - - if cpu: - # CPU needs to be defined as lower case - data["resources"]["limits"]["cpu"] = cpu.lower() - - # add in healthchecks - self._set_health_checks(data, env, **kwargs) - - def _set_health_checks(self, container, env, **kwargs): - healthchecks = kwargs.get('healthcheck', None) - if healthchecks: - # check if a port is present. if not, auto-populate it - # TODO: rip this out when we stop supporting deis config:set HEALTHCHECK_URL - if ( - healthchecks.get('livenessProbe') is not None and - healthchecks['livenessProbe'].get('httpGet') is not None and - healthchecks['livenessProbe']['httpGet'].get('port') is None - ): - healthchecks['livenessProbe']['httpGet']['port'] = env['PORT'] - container.update(healthchecks) - elif kwargs.get('routable', False): - self._default_readiness_probe(container, kwargs.get('build_type'), env.get('PORT', None)) # noqa - - def _get_private_registry_config(self, registry, image): - secret_name = settings.REGISTRY_SECRET_PREFIX - if registry: - # try to get the hostname information - hostname = registry.get('hostname', None) - if not hostname: - hostname, _ = docker_auth.split_repo_name(image) - if hostname == docker_auth.INDEX_NAME: - hostname = "https://index.docker.io/v1/" - username = registry.get('username') - password = registry.get('password') - elif settings.REGISTRY_LOCATION == 'off-cluster': - secret = self.get_secret('deis', 'registry-secret').json() - username = secret['data']['username'] - password = secret['data']['password'] - hostname = secret['data']['hostname'] - if hostname == '': - hostname = "https://index.docker.io/v1/" - secret_name = secret_name+"-"+settings.REGISTRY_LOCATION - elif settings.REGISTRY_LOCATION in ['ecr', 'gcr']: - return None, secret_name+"-"+settings.REGISTRY_LOCATION, False + def _get_deploy_steps(self, batches, tags): + # if there is no batch information available default to available nodes for app + if not batches: + # figure out how many nodes the application can go on + steps = len(self.node.get(labels=tags).json()['items']) else: - return None, None, None - - # create / update private registry secret - auth = bytes('{}:{}'.format(username, password), 'UTF-8') - # value has to be a base64 encoded JSON - docker_config = json.dumps({ - "auths": { - hostname: { - "auth": base64.b64encode(auth).decode(encoding='UTF-8') - } - } - }) - return docker_config, secret_name, True - - def _set_image_secret(self, data, namespace, **kwargs): - """ - Take registry information and set as an imagePullSecret for an RC / Deployment - http://kubernetes.io/docs/user-guide/images/#specifying-imagepullsecrets-on-a-pod - """ - docker_config, secret_name, secret_create = self._get_private_registry_config(kwargs.get('registry', {}), kwargs.get('image')) # noqa - if secret_create is None: - return - elif secret_create: - secret_data = {'.dockerconfigjson': docker_config} - try: - self.get_secret(namespace, secret_name) - except KubeHTTPException: - self.create_secret( - namespace, - secret_name, - secret_data, - secret_type='kubernetes.io/dockerconfigjson' - ) - else: - self.update_secret( - namespace, - secret_name, - secret_data, - secret_type='kubernetes.io/dockerconfigjson' - ) - - # apply image pull secret to a Pod spec - data['imagePullSecrets'] = [{'name': secret_name}] - - def pod_state(self, pod): - """ - Resolve Pod state to an internally understandable format and returns a - PodState object that can be used for comparison or name can get gotten - via .name + steps = int(batches) - However if no match is found then a text representation is returned - """ - # See "Pod Phase" at http://kubernetes.io/docs/user-guide/pod-states/ - if pod is None: - return PodState.destroyed - - states = { - 'Pending': PodState.initializing, - 'ContainerCreating': PodState.creating, - 'Starting': PodState.starting, - 'Running': PodState.up, - 'Terminating': PodState.terminating, - 'Succeeded': PodState.down, - 'Failed': PodState.crashed, - 'Unknown': PodState.error, - } + return steps - # being in a Pending/ContainerCreating state can mean different things - # introspecting app container first - if pod['status']['phase'] in ['Pending', 'ContainerCreating']: - pod_state, _ = self._pod_pending_status(pod) - # being in a running state can mean a pod is starting, actually running or terminating - elif pod['status']['phase'] == 'Running': - # is the readiness probe passing? - pod_state = self._pod_readiness_status(pod) - if pod_state in ['Starting', 'Terminating']: - return states[pod_state] - elif pod_state == 'Running' and self._pod_liveness_status(pod): - # is the pod ready to serve requests? - return states[pod_state] + def _get_deploy_batches(self, steps, desired): + # figure out what kind of batches the deploy is done in - 1 in, 1 out or higher + if desired < steps: + # do it all in one go + batches = [desired] else: - # if no match was found for deis mapping then passthrough the real state - pod_state = pod['status']['phase'] - - return states.get(pod_state, pod_state) - - def _api(self, tmpl, *args): - """Return a fully-qualified Kubernetes API URL from a string template with args.""" - # FIXME better way of determining API version based on requested component - # extensions use apis and not api - # TODO this needs to be aware that deployments / rs could be top level in future releases - # https://github.com/deis/controller/issues/875 - prefix = 'api' - apiversion = 'v1' - components = tmpl.strip('/').split('/') - if len(components) > 2: - component = components[2] - if component in ['deployments', 'replicasets']: - prefix = 'apis' - apiversion = 'extensions/v1beta1' - - url = "/{}/{}".format(prefix, apiversion) + tmpl.format(*args) - return urljoin(self.url, url) - - def _selectors(self, **kwargs): - query = {} - - # labels and fields are encoded slightly differently than python-requests can do - labels = kwargs.get('labels', {}) - if labels: - selectors = [] - for key, value in labels.items(): - # http://kubernetes.io/docs/user-guide/labels/#set-based-requirement - if '__notin' in key: - key = key.replace('__notin', '') - selectors.append('{} notin({})'.format(key, ','.join(value))) - # list is automagically a in() - elif '__in' in key or isinstance(value, list): - key = key.replace('__in', '') - selectors.append('{} in({})'.format(key, ','.join(value))) - elif value is None: - # allowing a check if a label exists (or not) without caring about value - selectors.append(key) - # http://kubernetes.io/docs/user-guide/labels/#equality-based-requirement - elif isinstance(value, str): - selectors.append('{}={}'.format(key, value)) - - query['labelSelector'] = ','.join(selectors) - - fields = kwargs.get('fields', {}) - if fields: - fields = ['{}={}'.format(key, value) for key, value in fields.items()] - query['fieldSelector'] = ','.join(fields) - - # Which resource version to start from. Otherwise starts from the beginning - resource_version = kwargs.get('resourceVersion', None) - if resource_version: - query['resourceVersion'] = resource_version - - # If output should pretty print, only True / False allowed - pretty = bool(kwargs.get('pretty', False)) - if pretty: - query['pretty'] = pretty - - return query - - # NAMESPACE # - - def get_namespace_events(self, namespace, **kwargs): - url = self._api("/namespaces/{}/events", namespace) - response = self.session.get(url, params=self._selectors(**kwargs)) - if unhealthy(response.status_code): - raise KubeHTTPException(response, "get Events in Namespace {}", namespace) - - return response - - def get_namespace(self, namespace): - url = self._api("/namespaces/{}/", namespace) - response = self.session.get(url) - if unhealthy(response.status_code): - raise KubeHTTPException(response, 'get Namespace "{}"', namespace) - - return response - - def get_namespaces(self, **kwargs): - url = self._api("/namespaces") - response = self.session.get(url, params=self._selectors(**kwargs)) - if unhealthy(response.status_code): - raise KubeHTTPException(response, 'get Namespaces') - - return response - - def create_namespace(self, namespace): - url = self._api("/namespaces") - data = { - "kind": "Namespace", - "apiVersion": "v1", - "metadata": { - "name": namespace, - "labels": { - 'heritage': 'deis' - } - } - } - - response = self.session.post(url, json=data) - if not response.status_code == 201: - raise KubeHTTPException(response, "create Namespace {}".format(namespace)) - - return response - - def delete_namespace(self, namespace): - url = self._api("/namespaces/{}", namespace) - response = self.session.delete(url) - if unhealthy(response.status_code): - raise KubeHTTPException(response, 'delete Namespace "{}"', namespace) - - return response - - # REPLICATION CONTROLLER # - - def get_rc(self, namespace, name): - url = self._api("/namespaces/{}/replicationcontrollers/{}", namespace, name) - response = self.session.get(url) - if unhealthy(response.status_code): - raise KubeHTTPException( - response, - 'get ReplicationController "{}" in Namespace "{}"', name, namespace - ) - - return response - - def get_rcs(self, namespace, **kwargs): - url = self._api("/namespaces/{}/replicationcontrollers", namespace) - response = self.session.get(url, params=self._selectors(**kwargs)) - if unhealthy(response.status_code): - raise KubeHTTPException( - response, - 'get ReplicationControllers in Namespace "{}"', namespace - ) - - return response - - def _wait_until_pods_terminate(self, namespace, labels, current, desired): - """Wait until all the desired pods are terminated""" - # http://kubernetes.io/docs/api-reference/v1/definitions/#_v1_podspec - # https://github.com/kubernetes/kubernetes/blob/release-1.2/docs/devel/api-conventions.md#metadata - # http://kubernetes.io/docs/user-guide/pods/#termination-of-pods - - timeout = settings.KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS - delta = current - desired - self.log(namespace, "waiting for {} pods to be terminated ({}s timeout)".format(delta, timeout)) # noqa - for waited in range(timeout): - pods = self.get_pods(namespace, labels=labels).json() - count = len(pods['items']) - - # see if any pods are past their terminationGracePeriodsSeconds (as in stuck) - # seems to be a problem in k8s around that: - # https://github.com/kubernetes/kubernetes/search?q=terminating&type=Issues - # these will be eventually GC'ed by k8s, ignoring them for now - for pod in pods['items']: - # remove pod if it is passed the graceful termination period - if self.pod_deleted(pod): - count -= 1 - - # stop when all pods are terminated as expected - if count == desired: - break - - if waited > 0 and (waited % 10) == 0: - self.log(namespace, "waited {}s and {} pods out of {} are fully terminated".format(waited, (delta - count), delta)) # noqa - - time.sleep(1) + # figure out the stepped deploy count and then see if there is a leftover + batches = [steps for n in set(range(1, (desired + 1))) if n % steps == 0] + if desired - sum(batches) > 0: + batches.append(desired - sum(batches)) - self.log(namespace, "{} pods are terminated".format(delta)) + return batches def _deploy_probe_timeout(self, timeout, namespace, labels, containers): """ @@ -764,7 +367,7 @@ def _deploy_probe_timeout(self, timeout, namespace, labels, containers): """ container_name = '{}-{}'.format(labels['app'], labels['type']) - container = self._find_container(container_name, containers) + container = self.pod.find_container(container_name, containers) # get health info from container added_timeout = [] @@ -787,1060 +390,5 @@ def _deploy_probe_timeout(self, timeout, namespace, labels, containers): return timeout - def _wait_until_pods_are_ready(self, namespace, containers, labels, desired, timeout): # noqa - # If desired is 0 then there is no ready state to check on - if desired == 0: - return - - timeout = self._deploy_probe_timeout(timeout, namespace, labels, containers) - self.log(namespace, "waiting for {} pods in {} namespace to be in services ({}s timeout)".format(desired, namespace, timeout)) # noqa - - # Ensure the minimum desired number of pods are available - waited = 0 - while waited < timeout: - # figure out if there are any pending pod issues - additional_timeout = self._handle_pending_pods(namespace, labels) - if additional_timeout: - timeout += additional_timeout - # add 10 minutes to timeout to allow a pull image operation to finish - self.log(namespace, 'Kubernetes has been pulling the image for {}s'.format(seconds)) # noqa - self.log(namespace, 'Increasing timeout by {}s to allow a pull image operation to finish for pods'.format(additional_timeout)) # noqa - - count = 0 # ready pods - pods = self.get_pods(namespace, labels=labels).json() - for pod in pods['items']: - # now that state is running time to see if probes are passing - if self._pod_ready(pod): - count += 1 - continue - - # Find out if any pod goes beyond the Running (up) state - # Allow that to happen to account for very fast `deis run` as - # an example. Code using this function will account for it - state = self.pod_state(pod) - if isinstance(state, PodState) and state > PodState.up: - count += 1 - continue - - if count == desired: - break - - if waited > 0 and (waited % 10) == 0: - self.log(namespace, "waited {}s and {} pods are in service".format(waited, count)) - - # increase wait time without dealing with jitters from above code - waited += 1 - time.sleep(1) - - # timed out - if waited > timeout: - self.log(namespace, 'timed out ({}s) waiting for pods to come up in namespace {}'.format(timeout, namespace)) # noqa - - self.log(namespace, "{} out of {} pods are in service".format(count, desired)) # noqa - - def _scale_rc(self, namespace, name, desired, timeout): - rc = self.get_rc(namespace, name).json() - - current = int(rc['spec']['replicas']) - if desired == current: - self.log(namespace, "Not scaling RC {} to {} replicas. Already at desired replicas".format(name, desired)) # noqa - return - elif desired != rc['spec']['replicas']: # RC needs new replica count - # Set the new desired replica count - rc['spec']['replicas'] = desired - - self.log(namespace, "scaling RC {} from {} to {} replicas".format(name, current, desired)) # noqa - - self.update_rc(namespace, name, rc) - self._wait_until_rc_is_updated(namespace, name) - - # Double check enough pods are in the required state to service the application - labels = rc['metadata']['labels'] - containers = rc['spec']['template']['spec']['containers'] - self._wait_until_pods_are_ready(namespace, containers, labels, desired, timeout) - - # if it was a scale down operation, wait until terminating pods are done - if int(desired) < int(current): - self._wait_until_pods_terminate(namespace, labels, current, desired) - - def _find_container(self, container_name, containers): - """ - Locate a container by name in a list of containers - """ - for container in containers: - if container['name'] == container_name: - return container - - return None - - def create_rc(self, namespace, name, image, entrypoint, command, **kwargs): - manifest = { - 'kind': 'ReplicationController', - 'apiVersion': 'v1', - 'metadata': { - 'name': name, - 'labels': { - 'app': namespace, - 'version': kwargs.get('version'), - 'type': kwargs.get('app_type'), - 'heritage': 'deis', - } - }, - 'spec': { - 'replicas': kwargs.get('replicas', 0) - } - } - - # tell pod how to execute the process - kwargs['command'] = entrypoint - kwargs['args'] = command - - # pod manifest spec - manifest['spec']['template'] = self._build_pod_manifest(namespace, name, image, **kwargs) - - url = self._api("/namespaces/{}/replicationcontrollers", namespace) - resp = self.session.post(url, json=manifest) - if unhealthy(resp.status_code): - raise KubeHTTPException( - resp, - 'create ReplicationController "{}" in Namespace "{}"', name, namespace - ) - self.log(namespace, 'manifest used: {}'.format(json.dumps(manifest, indent=4)), logging.DEBUG) # noqa - - self._wait_until_rc_is_updated(namespace, name) - - return resp - - def _wait_until_rc_is_updated(self, namespace, name): - """ - Looks at status/observedGeneration and metadata/generation and - waits for observedGeneration >= generation to happen, indicates RC is ready - - More information is also available at: - https://github.com/kubernetes/kubernetes/blob/master/docs/devel/api-conventions.md#metadata - """ - self.log(namespace, "waiting for ReplicationController {} to get a newer generation (30s timeout)".format(name), logging.DEBUG) # noqa - for _ in range(30): - try: - rc = self.get_rc(namespace, name).json() - if ( - "observedGeneration" in rc["status"] and - rc["status"]["observedGeneration"] >= rc["metadata"]["generation"] - ): - self.log(namespace, "ReplicationController {} got a newer generation (30s timeout)".format(name), logging.DEBUG) # noqa - break - - time.sleep(1) - except KubeHTTPException as e: - if e.response.status_code == 404: - time.sleep(1) - - def update_rc(self, namespace, name, data): - url = self._api("/namespaces/{}/replicationcontrollers/{}", namespace, name) - response = self.session.put(url, json=data) - if unhealthy(response.status_code): - raise KubeHTTPException(response, 'scale ReplicationController "{}"', name) - - return response - - def delete_rc(self, namespace, name): - url = self._api("/namespaces/{}/replicationcontrollers/{}", namespace, name) - response = self.session.delete(url) - if unhealthy(response.status_code): - raise KubeHTTPException( - response, - 'delete ReplicationController "{}" in Namespace "{}"', name, namespace - ) - - return response - - def _default_readiness_probe(self, container, build_type, port=None): - # Update only the application container with the health check - if build_type == "buildpack": - container.update(self._default_buildpack_readiness_probe()) - elif port: - container.update(self._default_dockerapp_readiness_probe(port)) - - """ - Applies exec readiness probe to the slugrunner container. - http://kubernetes.io/docs/user-guide/pod-states/#container-probes - - /runner/init is the entry point of the slugrunner. - https://github.com/deis/slugrunner/blob/01eac53f1c5f1d1dfa7570bbd6b9e45c00441fea/rootfs/Dockerfile#L20 - Once it downloads the slug it starts running using `exec` which means the pid 1 - will point to the slug/application command instead of entry point once the application has - started. - https://github.com/deis/slugrunner/blob/01eac53f1c5f1d1dfa7570bbd6b9e45c00441fea/rootfs/runner/init#L90 - - This should be added only for the build pack apps when a custom liveness probe is not set to - make sure that the pod is ready only when the slug is downloaded and started running. - """ - def _default_buildpack_readiness_probe(self, delay=30, timeout=5, period_seconds=5, - success_threshold=1, failure_threshold=1): - readinessprobe = { - 'readinessProbe': { - # an exec probe - 'exec': { - "command": [ - "bash", - "-c", - "[[ '$(ps -p 1 -o args)' != *'bash /runner/init'* ]]" - ] - }, - # length of time to wait for a pod to initialize - # after pod startup, before applying health checking - 'initialDelaySeconds': delay, - 'timeoutSeconds': timeout, - 'periodSeconds': period_seconds, - 'successThreshold': success_threshold, - 'failureThreshold': failure_threshold, - }, - } - return readinessprobe - - def _default_dockerapp_readiness_probe(self, port, delay=5, timeout=5, period_seconds=5, - success_threshold=1, failure_threshold=1): - """ - Applies tcp socket readiness probe to the docker app container only if some port is exposed - by the docker image. - """ - readinessprobe = { - 'readinessProbe': { - # an exec probe - 'tcpSocket': { - "port": int(port) - }, - # length of time to wait for a pod to initialize - # after pod startup, before applying health checking - 'initialDelaySeconds': delay, - 'timeoutSeconds': timeout, - 'periodSeconds': period_seconds, - 'successThreshold': success_threshold, - 'failureThreshold': failure_threshold, - }, - } - return readinessprobe - - # SECRETS # - # http://kubernetes.io/docs/api-reference/v1/definitions/#_v1_secret - def get_secret(self, namespace, name): - url = self._api("/namespaces/{}/secrets/{}", namespace, name) - response = self.session.get(url) - if unhealthy(response.status_code): - raise KubeHTTPException( - response, - 'get Secret "{}" in Namespace "{}"', name, namespace - ) - - # decode the base64 data - secrets = response.json() - for key, value in secrets['data'].items(): - if value is None: - secrets['data'][key] = "" - continue - value = base64.b64decode(value) - value = value if isinstance(value, bytes) else bytes(str(value), 'UTF-8') - secrets['data'][key] = value.decode(encoding='UTF-8') - - # tell python-requests it actually hasn't consumed the data - response._content = bytes(json.dumps(secrets), 'UTF-8') - - return response - - def get_secrets(self, namespace, **kwargs): - url = self._api('/namespaces/{}/secrets', namespace) - response = self.session.get(url, params=self._selectors(**kwargs)) - if unhealthy(response.status_code): - raise KubeHTTPException(response, 'get Secrets in Namespace "{}"', namespace) - - return response - - def _build_secret_manifest(self, namespace, name, data, secret_type='Opaque', labels={}): - secret_types = ['Opaque', 'kubernetes.io/dockerconfigjson'] - if secret_type not in secret_types: - raise KubeException('{} is not a supported secret type. Use one of the following: '.format(secret_type, ', '.join(secret_types))) # noqa - - manifest = { - 'kind': 'Secret', - 'apiVersion': 'v1', - 'metadata': { - 'name': name, - 'namespace': namespace, - 'labels': { - 'app': namespace, - 'heritage': 'deis' - } - }, - 'type': secret_type, - 'data': {} - } - - # add in any additional label info - manifest['metadata']['labels'].update(labels) - - for key, value in data.items(): - if value is None: - manifest['data'].update({key: ''}) - continue - - value = value if isinstance(value, bytes) else bytes(str(value), 'UTF-8') - item = base64.b64encode(value).decode(encoding='UTF-8') - manifest['data'].update({key: item}) - - return manifest - - def create_secret(self, namespace, name, data, secret_type='Opaque', labels={}): - manifest = self._build_secret_manifest(namespace, name, data, secret_type, labels) - url = self._api("/namespaces/{}/secrets", namespace) - response = self.session.post(url, json=manifest) - if unhealthy(response.status_code): - raise KubeHTTPException( - response, - 'failed to create Secret "{}" in Namespace "{}"', name, namespace - ) - - return response - - def update_secret(self, namespace, name, data, secret_type='Opaque', labels={}): - manifest = self._build_secret_manifest(namespace, name, data, secret_type, labels) - url = self._api("/namespaces/{}/secrets/{}", namespace, name) - response = self.session.put(url, json=manifest) - if unhealthy(response.status_code): - raise KubeHTTPException( - response, - 'failed to update Secret "{}" in Namespace "{}"', - name, namespace - ) - - return response - - def delete_secret(self, namespace, name): - url = self._api("/namespaces/{}/secrets/{}", namespace, name) - response = self.session.delete(url) - if unhealthy(response.status_code): - raise KubeHTTPException( - response, - 'delete Secret "{}" in Namespace "{}"', name, namespace - ) - - return response - - # SERVICES # - - def get_service(self, namespace, name): - url = self._api("/namespaces/{}/services/{}", namespace, name) - response = self.session.get(url) - if unhealthy(response.status_code): - raise KubeHTTPException( - response, - 'get Service "{}" in Namespace "{}"', name, namespace - ) - - return response - - def get_services(self, namespace, **kwargs): - url = self._api('/namespaces/{}/services', namespace) - response = self.session.get(url, params=self._selectors(**kwargs)) - if unhealthy(response.status_code): - raise KubeHTTPException(response, 'get Services in Namespace "{}"', namespace) - - return response - - def create_service(self, namespace, name, **kwargs): - # Ports and app type will be overwritten as required - manifest = { - 'kind': 'Service', - 'apiVersion': 'v1', - 'metadata': { - 'name': name, - 'labels': { - 'app': namespace, - 'heritage': 'deis' - }, - 'annotations': {} - }, - 'spec': { - 'ports': [{ - 'name': 'http', - 'port': 80, - 'targetPort': 5000, - 'protocol': 'TCP' - }], - 'selector': { - 'app': namespace, - 'heritage': 'deis' - } - } - } - - data = dict_merge(manifest, kwargs.get('data', {})) - url = self._api("/namespaces/{}/services", namespace) - response = self.session.post(url, json=data) - if unhealthy(response.status_code): - raise KubeHTTPException( - response, - 'create Service "{}" in Namespace "{}"', namespace, namespace - ) - - return response - - def update_service(self, namespace, name, data): - url = self._api("/namespaces/{}/services/{}", namespace, name) - response = self.session.put(url, json=data) - if unhealthy(response.status_code): - raise KubeHTTPException( - response, - 'update Service "{}" in Namespace "{}"', namespace, name - ) - - return response - - def delete_service(self, namespace, name): - url = self._api("/namespaces/{}/services/{}", namespace, name) - response = self.session.delete(url) - if unhealthy(response.status_code): - raise KubeHTTPException( - response, - 'delete Service "{}" in Namespace "{}"', name, namespace - ) - - return response - - # PODS # - - def get_pod(self, namespace, name): - url = self._api("/namespaces/{}/pods/{}", namespace, name) - response = self.session.get(url) - if unhealthy(response.status_code): - raise KubeHTTPException(response, 'get Pod "{}" in Namespace "{}"', name, namespace) - - return response - - def get_pods(self, namespace, **kwargs): - url = self._api('/namespaces/{}/pods', namespace) - response = self.session.get(url, params=self._selectors(**kwargs)) - if unhealthy(response.status_code): - raise KubeHTTPException(response, 'get Pods in Namespace "{}"', namespace) - - return response - - def delete_pod(self, namespace, name): - url = self._api("/namespaces/{}/pods/{}", namespace, name) - resp = self.session.delete(url) - if unhealthy(resp.status_code): - raise KubeHTTPException(resp, 'delete Pod "{}" in Namespace "{}"', name, namespace) - - # Verify the pod has been deleted - # Only wait as long as the grace period is - k8s will eventually GC - for _ in range(settings.KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS): - try: - pod = self.get_pod(namespace, name).json() - # hide pod if it is passed the graceful termination period - if self.pod_deleted(pod): - return - except KubeHTTPException as e: - if e.response.status_code == 404: - break - - time.sleep(1) - - def _pod_log(self, namespace, name): - url = self._api("/namespaces/{}/pods/{}/log", namespace, name) - response = self.session.get(url) - if unhealthy(response.status_code): - raise KubeHTTPException( - response, - 'get logs for Pod "{}" in Namespace "{}"', name, namespace - ) - - return response - - def _pod_pending_status(self, pod): - """Introspect the pod containers when pod is in Pending state""" - if 'containerStatuses' not in pod['status']: - return 'Pending', '' - - name = '{}-{}'.format(pod['metadata']['labels']['app'], pod['metadata']['labels']['type']) - # find the right container in case there are many on the pod - container = self._find_container(name, pod['status']['containerStatuses']) - if container is None: - # Return Pending if nothing else can be found - return 'Pending', '' - - if 'waiting' in container['state']: - reason = container['state']['waiting']['reason'] - message = '' - # message is not always available - if 'message' in container['state']['waiting']: - message = container['state']['waiting']['message'] - - if reason == 'ContainerCreating': - # get the last event - events = self._pod_events(pod) - if not events: - # could not find any events - return reason, message - - event = events.pop() - return event['reason'], event['message'] - - return reason, message - - # Return Pending if nothing else can be found - return 'Pending', '' - - def _pod_events(self, pod): - """Process events for a given Pod to find if Pulling is happening, among other events""" - # fetch all events for this pod - fields = { - 'involvedObject.name': pod['metadata']['name'], - 'involvedObject.namespace': pod['metadata']['namespace'], - 'involvedObject.uid': pod['metadata']['uid'] - } - events = self.get_namespace_events(pod['metadata']['namespace'], fields=fields).json() - # make sure that events are sorted - events['items'].sort(key=lambda x: x['lastTimestamp']) - return events['items'] - - def _pod_readiness_status(self, pod): - """Check if the pod container have passed the readiness probes""" - name = '{}-{}'.format(pod['metadata']['labels']['app'], pod['metadata']['labels']['type']) - # find the right container in case there are many on the pod - container = self._find_container(name, pod['status']['containerStatuses']) - if container is None: - # Seems like the most sensible default - return 'Unknown' - - if not container['ready']: - if 'running' in container['state'].keys(): - return 'Starting' - - if ( - 'terminated' in container['state'].keys() or - 'deletionTimestamp' in pod['metadata'] - ): - return 'Terminating' - else: - # See if k8s is in Terminating state - if 'deletionTimestamp' in pod['metadata']: - return 'Terminating' - - return 'Running' - - # Seems like the most sensible default - return 'Unknown' - - def _pod_liveness_status(self, pod): - """Check if the pods liveness probe status has passed all checks""" - for condition in pod['status']['conditions']: - # type = Ready is the only binary type right now - if condition['type'] == 'Ready' and condition['status'] != 'True': - return False - - return True - - def _pod_ready(self, pod): - """Combines various checks to see if the pod is considered up or not by checking probes""" - return ( - pod['status']['phase'] == 'Running' and - # is the readiness probe passing? - self._pod_readiness_status(pod) == 'Running' and - # is the pod ready to serve requests? - self._pod_liveness_status(pod) - ) - - def pod_deleted(self, pod): - """Checks if a pod is deleted and past its graceful termination period""" - # https://github.com/kubernetes/kubernetes/blob/release-1.2/docs/devel/api-conventions.md#metadata - # http://kubernetes.io/docs/user-guide/pods/#termination-of-pods - if 'deletionTimestamp' in pod['metadata']: - deletion = datetime.strptime( - pod['metadata']['deletionTimestamp'], - settings.DEIS_DATETIME_FORMAT - ) - - # past the graceful deletion period - if deletion < datetime.utcnow(): - return True - - return False - - def _handle_pod_errors(self, pod, reason, message): - """ - Handle potential pod errors based on the Pending - reason passed into the function - - Images, FailedScheduling and others are needed - """ - # image error reported on the container level - container_errors = [ - 'Pending', # often an indication of deeper inspection is needed - 'ErrImagePull', - 'ImagePullBackOff', - 'RegistryUnavailable', - 'ErrImageInspect', - ] - # Image event reason mapping - event_errors = { - "Failed": "FailedToPullImage", - "InspectFailed": "FailedToInspectImage", - "ErrImageNeverPull": "ErrImageNeverPullPolicy", - # Not including this one for now as the message is not useful - # "BackOff": "BackOffPullImage", - # FailedScheduling relates limits - "FailedScheduling": "FailedScheduling", - } - - # Nicer error than from the event - # Often this gets to ImageBullBackOff before we can introspect tho - if reason == 'ErrImagePull': - raise KubeException(message) - - # collect all error messages of worth - messages = [] - if reason in container_errors: - for event in self._pod_events(pod): - if event['reason'] in event_errors.keys(): - # only show a given error once - event_errors.pop(event['reason']) - # strip out whitespaces on either side - message = "\n".join([x.strip() for x in event['message'].split("\n")]) - messages.append(message) - - if messages: - raise KubeException("\n".join(messages)) - - def _handle_pod_long_image_pulling(self, reason, pod): - """ - If pulling an image is taking long (1 minute) then return how many seconds - the pod ready state timeout should be extended by - - Return value is an int that represents seconds - """ - # only apply once - if getattr(self, '_handle_pod_long_image_pulling_applied', False): - return 0 - - if reason is not 'Pulling': - return 0 - - # last event should be Pulling in this case - event = self._pod_events(pod).pop() - # see if pull operation has been happening for over 1 minute - start = datetime.strptime( - event['firstTimestamp'], - settings.DEIS_DATETIME_FORMAT - ) - - seconds = 60 # time threshold before padding timeout - if (start + timedelta(seconds=seconds)) < datetime.utcnow(): - # make it so function doesn't do processing again - setattr(self, '_handle_pod_long_image_pulling_applied', True) - return 600 - - return 0 - - def _handle_pending_pods(self, namespace, labels): - """ - Detects if any pod is in the starting phases and handles - any potential issues around that, and increases timeouts - or throws errors as needed - """ - timeout = 0 - pods = self.get_pods(namespace, labels=labels).json() - for pod in pods['items']: - # only care about pods that are not starting or in the starting phases - if pod['status']['phase'] not in ['Pending', 'ContainerCreating']: - continue - - # Get more information on why a pod is pending - reason, message = self._pod_pending_status(pod) - # If pulling an image is taking long then increase the timeout - timeout += self._handle_pod_long_image_pulling(pod, reason) - - # handle errors and bubble up if need be - self._handle_pod_errors(pod, reason, message) - - return timeout - - # NODES # - - def get_nodes(self, **kwargs): - url = self._api('/nodes') - response = self.session.get(url, params=self._selectors(**kwargs)) - if unhealthy(response.status_code): - raise KubeHTTPException(response, 'get Nodes') - - return response - - def get_node(self, name, **kwargs): - url = self._api('/nodes/{}'.format(name)) - response = self.session.get(url) - if unhealthy(response.status_code): - raise KubeHTTPException(response, 'get Node {} in Nodes', name) - - return response - - # DEPLOYMENTS # - - def get_deployment(self, namespace, name): - url = self._api("/namespaces/{}/deployments/{}", namespace, name) - response = self.session.get(url) - if unhealthy(response.status_code): - raise KubeHTTPException( - response, - 'get Deployment "{}" in Namespace "{}"', name, namespace - ) - - return response - - def get_deployments(self, namespace, **kwargs): - url = self._api("/namespaces/{}/deployments", namespace) - response = self.session.get(url, params=self._selectors(**kwargs)) - if unhealthy(response.status_code): - raise KubeHTTPException(response, 'get Deployments in Namespace "{}"', namespace) - - return response - - def _wait_until_deployment_is_updated(self, namespace, name): - """ - Looks at status/observedGeneration and metadata/generation and - waits for observedGeneration >= generation to happen - - http://kubernetes.io/docs/user-guide/deployments/#the-status-of-a-deployment - More information is also available at: - https://github.com/kubernetes/kubernetes/blob/master/docs/devel/api-conventions.md#metadata - """ - self.log(namespace, "waiting for Deployment {} to get a newer generation (30s timeout)".format(name), logging.DEBUG) # noqa - for _ in range(30): - try: - deploy = self.get_deployment(namespace, name).json() - if ( - 'observedGeneration' in deploy['status'] and - deploy['status']['observedGeneration'] >= deploy['metadata']['generation'] - ): - self.log(namespace, "A newer generation was found for Deployment {}".format(name), logging.DEBUG) # noqa - break - - time.sleep(1) - except KubeHTTPException as e: - if e.response.status_code == 404: - time.sleep(1) - - def are_deployment_replicas_ready(self, namespace, name): - """ - Verify the status of a Deployment and if it is fully deployed - """ - deployment = self.get_deployment(namespace, name).json() - desired = deployment['spec']['replicas'] - status = deployment['status'] - - # right now updateReplicas is where it is at - # availableReplicas mean nothing until minReadySeconds is used - pods = status['updatedReplicas'] if 'updatedReplicas' in status else 0 - - # spec/replicas of 0 is a special case as other fields get removed from status - if desired == 0 and ('replicas' not in status or status['replicas'] == 0): - return True, pods - - if ( - 'unavailableReplicas' in status or - ('replicas' not in status or status['replicas'] is not desired) or - ('updatedReplicas' not in status or status['updatedReplicas'] is not desired) or - ('availableReplicas' not in status or status['availableReplicas'] is not desired) - ): - return False, pods - - return True, pods - - def delete_deployment(self, namespace, name): - url = self._api("/namespaces/{}/deployments/{}", namespace, name) - response = self.session.delete(url) - if unhealthy(response.status_code): - raise KubeHTTPException( - response, - 'delete Deployment "{}" in Namespace "{}"', name, namespace - ) - - return response - - def update_deployment(self, namespace, name, image, entrypoint, command, **kwargs): - manifest = self._build_deployment_manifest(namespace, - name, - image, - entrypoint, - command, - **kwargs) - - url = self._api("/namespaces/{}/deployments/{}", namespace, name) - response = self.session.put(url, json=manifest) - if unhealthy(response.status_code): - self.log(namespace, 'template used: {}'.format(json.dumps(manifest, indent=4)), logging.DEBUG) # noqa - raise KubeHTTPException(response, 'update Deployment "{}"', name) - - self._wait_until_deployment_is_updated(namespace, name) - self._wait_until_deployment_is_ready(namespace, name, **kwargs) - - return response - - def create_deployment(self, namespace, name, image, entrypoint, command, **kwargs): - manifest = self._build_deployment_manifest(namespace, - name, - image, - entrypoint, - command, - **kwargs) - - url = self._api("/namespaces/{}/deployments", namespace) - response = self.session.post(url, json=manifest) - if unhealthy(response.status_code): - raise KubeHTTPException( - response, - 'create Deployment "{}" in Namespace "{}"', name, namespace - ) - self.log(namespace, 'template used: {}'.format(json.dumps(manifest, indent=4)), logging.DEBUG) # noqa - - self._wait_until_deployment_is_updated(namespace, name) - self._wait_until_deployment_is_ready(namespace, name, **kwargs) - - return response - - def deployment_in_progress(self, namespace, name, deploy_timeout, batches, replicas, tags): - """ - Determine if a Deployment has a deploy in progress - - First is a very basic check to see if replicas are ready. - - If they are not ready then it is time to see if there are problems with any of the pods - such as image pull issues or similar. - - And then if that is still all okay then it is time to see if the deploy has - been in progress for longer than the allocated deploy time. Reason to do this - check is if a client has had a dropped connection. - - Returns 2 booleans, first one is for if the Deployment is in progress or not, second - one is or if a rollback action is advised while leaving the rollback up to the caller - """ - self.log(namespace, 'Checking if Deployment {} is in progress'.format(name), level=logging.DEBUG) # noqa - try: - ready, _ = self.are_deployment_replicas_ready(namespace, name) - if ready: - # nothing more to do - False since it is not in progress - self.log(namespace, 'All replicas for Deployment {} are ready'.format(name), level=logging.DEBUG) # noqa - return False, False - except KubeHTTPException as e: - # Deployment doesn't exist - if e.response.status_code == 404: - self.log(namespace, 'Deployment {} does not exist yet'.format(name), level=logging.DEBUG) # noqa - return False, False - - # get deployment information - deployment = self.get_deployment(namespace, name).json() - # get pod template labels since they include the release version - labels = deployment['spec']['template']['metadata']['labels'] - containers = deployment['spec']['template']['spec']['containers'] - - # calculate base deploy timeout - deploy_timeout = self._deploy_probe_timeout(deploy_timeout, namespace, labels, containers) - - # a rough calculation that figures out an overall timeout - steps = self._get_deploy_steps(batches, tags) - batches = self._get_deploy_batches(steps, replicas) - timeout = len(batches) * deploy_timeout - - # is there a slow image pull or image issues - try: - timeout += self._handle_pending_pods(namespace, labels) - except KubeException as e: - self.log(namespace, 'Deployment {} had stalled due an error and will be rolled back. {}'.format(name, str(e)), level=logging.DEBUG) # noqa - return False, True - - # fetch the latest RS for Deployment and use the start time to compare to deploy timeout - replicasets = self.get_replicasets(namespace, labels=labels).json()['items'] - # the labels should ensure that only 1 replicaset due to the version label - if len(replicasets) != 1: - # if more than one then sort by start time to newest is first - replicasets.sort(key=lambda x: x['metadata']['creationTimestamp'], reverse=True) - - # work with the latest copy - replica = replicasets.pop() - - # throw an exception if over TTL so error is bubbled up - start = datetime.strptime( - replica['metadata']['creationTimestamp'], - settings.DEIS_DATETIME_FORMAT - ) - - if (start + timedelta(seconds=timeout)) < datetime.utcnow(): - self.log(namespace, 'Deploy operation for Deployment {} in has expired. Rolling back to last good known release'.format(name), level=logging.DEBUG) # noqa - return False, True - - return True, False - - def _wait_until_deployment_is_ready(self, namespace, name, **kwargs): - replicas = int(kwargs.get('replicas', 0)) - # If desired is 0 then there is no ready state to check on - if replicas == 0: - return - - current = int(kwargs.get('previous_replicas', 0)) - batches = kwargs.get('deploy_batches', None) - deploy_timeout = kwargs.get('deploy_timeout', 120) - tags = kwargs.get('tags', {}) - steps = self._get_deploy_steps(batches, tags) - batches = self._get_deploy_batches(steps, replicas) - - deployment = self.get_deployment(namespace, name).json() - labels = deployment['spec']['template']['metadata']['labels'] - containers = deployment['spec']['template']['spec']['containers'] - - # if it was a scale down operation, wait until terminating pods are done - # Deployments say they are ready even when pods are being terminated - if replicas < current: - self._wait_until_pods_terminate(namespace, labels, current, replicas) - return - - # calculate base deploy timeout - deploy_timeout = self._deploy_probe_timeout(deploy_timeout, namespace, labels, containers) - - # a rough calculation that figures out an overall timeout - timeout = len(batches) * deploy_timeout - self.log(namespace, 'This deployments overall timeout is {}s - batch timout is {}s and there are {} batches to deploy with a total of {} pods'.format(timeout, deploy_timeout, len(batches), replicas)) # noqa - - waited = 0 - while waited < timeout: - ready, availablePods = self.are_deployment_replicas_ready(namespace, name) - if ready: - break - - # check every 10 seconds for pod failures. - # Depend on Deployment checks for ready pods - if waited > 0 and (waited % 10) == 0: - additional_timeout = self._handle_pending_pods(namespace, labels) - if additional_timeout: - timeout += additional_timeout - # add 10 minutes to timeout to allow a pull image operation to finish - self.log(namespace, 'Kubernetes has been pulling the image for {}s'.format(seconds)) # noqa - self.log(namespace, 'Increasing timeout by {}s to allow a pull image operation to finish for pods'.format(additional_timeout)) # noqa - - self.log(namespace, "waited {}s and {} pods are in service".format(waited, availablePods)) # noqa - - waited += 1 - time.sleep(1) - - def _build_deployment_manifest(self, namespace, name, image, entrypoint, command, **kwargs): - replicas = kwargs.get('replicas', 0) - batches = kwargs.get('deploy_batches', None) - tags = kwargs.get('tags', {}) - - labels = { - 'app': namespace, - 'type': kwargs.get('app_type'), - 'heritage': 'deis', - } - - manifest = { - 'kind': 'Deployment', - 'apiVersion': 'extensions/v1beta1', - 'metadata': { - 'name': name, - 'labels': labels, - 'annotations': { - 'kubernetes.io/change-cause': kwargs.get('release_summary', '') - } - }, - 'spec': { - 'replicas': replicas, - 'selector': { - 'matchLabels': labels - } - } - } - - # Add in Rollback (if asked for) - rollback = kwargs.get('rollback', False) - if rollback: - # http://kubernetes.io/docs/user-guide/deployments/#rollback-to - if rollback is True: - # rollback to the latest known working revision - revision = 0 - elif isinstance(rollback, int) or isinstance(rollback, str): - # rollback to a particular revision - revision = rollback - - # This gets cleared from the template after a rollback is done - manifest['spec']['rollbackTo'] = {'revision': str(revision)} - - # Add deployment strategy - - # see if application or global deploy batches are defined - maxSurge = self._get_deploy_steps(batches, tags) - # if replicas are higher than maxSurge then the old deployment is never scaled down - # maxSurge can't be 0 when maxUnavailable is 0 and the other way around - if replicas > 0 and replicas < maxSurge: - maxSurge = replicas - - # http://kubernetes.io/docs/user-guide/deployments/#strategy - manifest['spec']['strategy'] = { - 'rollingUpdate': { - 'maxSurge': maxSurge, - # This is never updated - 'maxUnavailable': 0 - }, - # RollingUpdate or Recreate - 'type': 'RollingUpdate', - } - - # Add in how many deployment revisions to keep - if kwargs.get('deployment_revision_history', None) is not None: - manifest['spec']['revisionHistoryLimit'] = int(kwargs.get('deployment_revision_history')) # noqa - - # tell pod how to execute the process - kwargs['command'] = entrypoint - kwargs['args'] = command - - # pod manifest spec - manifest['spec']['template'] = self._build_pod_manifest(namespace, name, image, **kwargs) - - return manifest - - def _scale_deployment(self, namespace, name, image, entrypoint, command, **kwargs): - """ - A convenience wrapper around Deployment update that does a little bit of introspection - to determine if scale level is already where it needs to be - """ - deployment = self.get_deployment(namespace, name).json() - desired = int(kwargs.get('replicas')) - current = int(deployment['spec']['replicas']) - if desired == current: - self.log(namespace, "Not scaling Deployment {} to {} replicas. Already at desired replicas".format(name, desired)) # noqa - return - elif desired != current: - # set the previous replicas count so the wait logic can deal with terminating pods - kwargs['previous_replicas'] = current - self.log(namespace, "scaling Deployment {} from {} to {} replicas".format(name, current, desired)) # noqa - self.update_deployment(namespace, name, image, entrypoint, command, **kwargs) - - def get_replicaset(self, namespace, name): - url = self._api("/namespaces/{}/replicasets/{}", namespace, name) - response = self.session.get(url) - if unhealthy(response.status_code): - raise KubeHTTPException( - response, - 'get ReplicaSet "{}" in Namespace "{}"', name, namespace - ) - - return response - - def get_replicasets(self, namespace, **kwargs): - url = self._api("/namespaces/{}/replicasets", namespace) - response = self.session.get(url, params=self._selectors(**kwargs)) - if unhealthy(response.status_code): - raise KubeHTTPException( - response, - 'get ReplicaSets in Namespace "{}"', namespace - ) - - return response SchedulerClient = KubeHTTPClient diff --git a/rootfs/scheduler/exceptions.py b/rootfs/scheduler/exceptions.py new file mode 100644 index 000000000..399819d24 --- /dev/null +++ b/rootfs/scheduler/exceptions.py @@ -0,0 +1,16 @@ +class KubeException(Exception): + def __init__(self, *args, **kwargs): + Exception.__init__(self, *args, **kwargs) + + +class KubeHTTPException(KubeException): + def __init__(self, response, errmsg, *args, **kwargs): + self.response = response + + msg = errmsg.format(*args) + msg = "failed to {}: {} {}".format( + msg, + response.status_code, + response.reason + ) + KubeException.__init__(self, msg, *args, **kwargs) diff --git a/rootfs/scheduler/mock.py b/rootfs/scheduler/mock.py index dc2757997..a635de457 100644 --- a/rootfs/scheduler/mock.py +++ b/rootfs/scheduler/mock.py @@ -79,6 +79,7 @@ def _acquire(self): resources = [ 'namespaces', 'nodes', 'pods', 'replicationcontrollers', 'secrets', 'services', 'events', 'deployments', 'replicasets', + 'horizontalpodautoscalers' ] @@ -111,6 +112,55 @@ def get_type(key, pos=-1): return 'unknown' +@CacheLock() +def process_hpa(): + """ + Process HPA. Add / remove replicas in target resources + as required. + + This function can obviously not react to CPU / stats + """ + for row in cache.get('horizontalpodautoscalers', []): + hpa = cache.get(row) + + # check if the resource referenced actually exists + kind = hpa['spec']['scaleRef']['kind'].lower() + 's' # make plural + name = hpa['spec']['scaleRef']['name'].lower() + deployment = None + for deploy in cache.get(kind, []): + item = cache.get(deploy) + if item['metadata']['name'] == name: + deployment = item + break + + if deployment is None: + return # nothing found + + # verify if the deployment is ready to messed with + # scaling up / down via HPA in the middle of a deploy can + # cause havoc in the scheduler code + desired = deployment['spec']['replicas'] + status = deployment['status'] + + if ( + 'unavailableReplicas' in status or + ('replicas' not in status or status['replicas'] is not desired) or + ('updatedReplicas' not in status or status['updatedReplicas'] is not desired) or + ('availableReplicas' not in status or status['availableReplicas'] is not desired) + ): + return + + min_replicas = hpa['spec']['minReplicas'] + max_replicas = hpa['spec']['maxReplicas'] + + if deployment['spec']['replicas'] < min_replicas: + deployment['spec']['replicas'] = min_replicas + elif deployment['spec']['replicas'] > max_replicas: + deployment['spec']['replicas'] = max_replicas + + manage_replicasets(deployment, deploy) + + @CacheLock() def pod_state_transitions(pod_url=None): """ @@ -198,7 +248,7 @@ def add_cleanup_pod(url): pod = cache.get(url) grace = settings.KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS pd = datetime.utcnow() + timedelta(seconds=grace) - timestamp = str(pd.strftime(settings.DEIS_DATETIME_FORMAT)) + timestamp = str(pd.strftime(MockSchedulerClient.DATETIME_FORMAT)) pod['metadata']['deletionTimestamp'] = timestamp cache.set(url, pod) @@ -253,7 +303,7 @@ def create_pods(url, labels, base, new_pods): for _ in range(new_pods): data = base.copy() # creation time - timestamp = str(datetime.utcnow().strftime(settings.DEIS_DATETIME_FORMAT)) + timestamp = str(datetime.utcnow().strftime(MockSchedulerClient.DATETIME_FORMAT)) data['metadata']['creationTimestamp'] = timestamp data['metadata']['uid'] = str(uuid.uuid4()) @@ -261,7 +311,7 @@ def create_pods(url, labels, base, new_pods): if 'generateName' in data['metadata']: data['metadata']['name'] = data['metadata']['generateName'] + pod_name() - timestamp = str(datetime.utcnow().strftime(settings.DEIS_DATETIME_FORMAT)) + timestamp = str(datetime.utcnow().strftime(MockSchedulerClient.DATETIME_FORMAT)) data['status'] = { 'startTime': timestamp, 'conditions': [ @@ -348,6 +398,9 @@ def manage_replicasets(deployment, url): The input data is going to be a Deployment object """ + # hash deployment.spec.template with adler32 to get pod hash + pod_hash = str(adler32(bytes(json.dumps(deployment['spec']['template'], sort_keys=True), 'UTF-8'))) # noqa + # reset Deployments status deployment['status']['replicas'] = deployment['spec']['replicas'] deployment['status']['unavailableReplicas'] = deployment['spec']['replicas'] @@ -357,18 +410,27 @@ def manage_replicasets(deployment, url): del deployment['status']['availableReplicas'] cache.set(url, deployment, None) - # hash deployment.spec.template with adler32 to get pod hash - pod_hash = str(adler32(bytes(json.dumps(deployment['spec']['template'], sort_keys=True), 'UTF-8'))) # noqa - - # fix up url + # get RS url rs_url = url.replace('_deployments_', '_replicasets_') + rs_url += '_' + pod_hash + namespaced_url = rs_url[0:(rs_url.find("_replicasets") + 12)] + + # get latest RS for deployment to see if a new RS is needed + old_rs = cache.get(rs_url, None) + if old_rs is not None: + # found an RS, template has not changed. Only update Deployment. + old_rs['spec']['replicas'] = deployment['spec']['replicas'] + cache.set(url, deployment, None) # save Deployment + cache.set(rs_url, old_rs, None) # save RS + upsert_pods(old_rs, rs_url) + update_deployment_status(namespaced_url, url, deployment, old_rs) + return # create new RS rs = copy.deepcopy(deployment) rs['kind'] = 'ReplicaSet' - # fix up the name + # fix up the name by adding pod hash to it rs['metadata']['name'] = rs['metadata']['name'] + '-' + pod_hash - rs_url += '_' + pod_hash # add the pod-template-hash label rs['metadata']['labels'] = rs['spec']['template']['metadata']['labels'].copy() @@ -381,7 +443,6 @@ def manage_replicasets(deployment, url): # save new ReplicaSet to cache add_cache_item(rs_url, 'replicasets', rs) - namespaced_url = rs_url[0:(rs_url.find("_replicasets") + 12)] data = cache.get(namespaced_url, []) # spin up/down pods for RS @@ -408,6 +469,10 @@ def manage_replicasets(deployment, url): upsert_pods(old_rs, item) + update_deployment_status(namespaced_url, url, deployment, rs) + + +def update_deployment_status(namespaced_url, url, deployment, rs): # Fill out deployment.status for success as pods transition to running state pod_url = namespaced_url.replace('_replicasets', '_pods').replace('apis_extensions_v1beta1', 'api_v1') # noqa while True: @@ -561,6 +626,7 @@ def post(request, context): # check if the namespace being posted to exists if resource_type != 'namespaces': namespace, _ = url.split('_{}_'.format(resource_type)) + namespace = namespace.replace('apis_autoscaling_v1', 'api_v1') namespace = namespace.replace('apis_extensions_v1beta1', 'api_v1') if cache.get(namespace) is None: context.status_code = 404 @@ -573,7 +639,7 @@ def post(request, context): return {} # fill in generic data - timestamp = str(datetime.utcnow().strftime(settings.DEIS_DATETIME_FORMAT)) + timestamp = str(datetime.utcnow().strftime(MockSchedulerClient.DATETIME_FORMAT)) data['metadata']['creationTimestamp'] = timestamp data['metadata']['resourceVersion'] = 1 data['metadata']['uid'] = str(uuid.uuid4()) @@ -616,6 +682,7 @@ def put(request, context): # check if the namespace being posted to exists if resource_type != 'namespaces': namespace, _ = url.split('_{}_'.format(resource_type)) + namespace = namespace.replace('apis_autoscaling_v1', 'api_v1') namespace = namespace.replace('apis_extensions_v1beta1', 'api_v1') if cache.get(namespace) is None: context.status_code = 404 @@ -646,13 +713,17 @@ def put(request, context): data['metadata']['resourceVersion'] += 1 data['metadata']['generation'] += 1 data['status']['observedGeneration'] += 1 + + # Update the individual resource + cache.set(url, data, None) + if resource_type in ['replicationcontrollers', 'replicasets']: upsert_pods(data, url) elif resource_type == 'deployments': manage_replicasets(data, url) - - # Update the individual resource - cache.set(url, data, None) + else: + # Update the individual resource + cache.set(url, data, None) context.status_code = 200 context.reason = 'OK' @@ -746,14 +817,21 @@ def mock_kubernetes(request, context): pod_state_transitions() # What to do about context + response = None if request.method == 'POST': - return post(request, context) + response = post(request, context) elif request.method == 'GET': - return get(request, context) + response = get(request, context) elif request.method == 'PUT': - return put(request, context) + response = put(request, context) elif request.method == 'DELETE': - return delete(request, context) + response = delete(request, context) + + # autoscaling + process_hpa() + + if response is not None: + return response # Log if any operation slips through that hasn't been accounted for logger.critical('COULD NOT FIND WHAT I AM') @@ -776,41 +854,44 @@ def session(): class MockSchedulerClient(KubeHTTPClient): - def __init__(self): - super().__init__() + def __init__(self, url): + super().__init__(url) + + # set version data + cache.set('version', {'major': '1', 'minor': '3'}, None) # Pre-seed data that is assumed to otherwise be there try: - self.get_namespace('deis') + self.ns.get('deis') except KubeHTTPException: - self.create_namespace('deis') + self.ns.create('deis') try: - self.get_secret('deis', 'objectstorage-keyfile') + self.secret.get('deis', 'objectstorage-keyfile') except KubeHTTPException: secrets = { 'access-key-id': 'i am a key', 'access-secret-key': 'i am a secret' } - self.create_secret('deis', 'objectstorage-keyfile', secrets) + self.secret.create('deis', 'objectstorage-keyfile', secrets) try: - self.get_secret('deis', 'registry-secret') + self.secret.get('deis', 'registry-secret') except KubeHTTPException: secrets = { 'username': 'test', 'password': 'test', 'hostname': '' } - self.create_secret('deis', 'registry-secret', secrets) + self.secret.create('deis', 'registry-secret', secrets) try: - self.get_namespace('duplicate') + self.ns.get('duplicate') except KubeHTTPException: - self.create_namespace('duplicate') + self.ns.create('duplicate') try: - self.get_node('172.17.8.100') + self.node.get('172.17.8.100') except KubeHTTPException: data = { "kind": "Node", @@ -892,6 +973,7 @@ def __init__(self): except Exception as e: logger.critical(e) + scheduler.session = session() SchedulerClient = MockSchedulerClient diff --git a/rootfs/scheduler/resources/__init__.py b/rootfs/scheduler/resources/__init__.py new file mode 100644 index 000000000..a80381661 --- /dev/null +++ b/rootfs/scheduler/resources/__init__.py @@ -0,0 +1,10 @@ +from scheduler.resources.__resource import Resource # noqa + +# Load in all resources +import pkgutil +import importlib +import os +pkgpath = os.path.dirname(__file__) +for _, name, _ in pkgutil.iter_modules([pkgpath]): + if not name.startswith('__'): + importlib.import_module('.{}'.format(name), 'scheduler.resources') diff --git a/rootfs/scheduler/resources/__resource.py b/rootfs/scheduler/resources/__resource.py new file mode 100644 index 000000000..d54f67955 --- /dev/null +++ b/rootfs/scheduler/resources/__resource.py @@ -0,0 +1,34 @@ +from urllib.parse import urljoin +from django.conf import settings +from .. import KubeHTTPClient + + +class ResourceRegistry(type): + """ + A registry of all Resources subclassed + """ + def __init__(cls, name, bases, nmspc): + super().__init__(name, bases, nmspc) + if not hasattr(cls, 'registry'): + cls.registry = set() + + cls.registry.add(cls) + cls.registry -= set(bases) # Remove base classes + + # Meta methods, called on class objects: + def __iter__(cls): + return iter(cls.registry) + + +class Resource(KubeHTTPClient, metaclass=ResourceRegistry): + api_version = 'v1' + api_prefix = 'api' + short_name = None + + def __init__(self): + super().__init__(settings.SCHEDULER_URL) + + def api(self, tmpl, *args): + """Return a fully-qualified Kubernetes API URL from a string template with args.""" + url = "/{}/{}".format(self.api_prefix, self.api_version) + tmpl.format(*args) + return urljoin(self.url, url) diff --git a/rootfs/scheduler/resources/deployment.py b/rootfs/scheduler/resources/deployment.py new file mode 100644 index 000000000..b3e84b7a3 --- /dev/null +++ b/rootfs/scheduler/resources/deployment.py @@ -0,0 +1,344 @@ +from datetime import datetime, timedelta +import json +import time +from scheduler.resources import Resource +from scheduler.exceptions import KubeException, KubeHTTPException + + +class Deployment(Resource): + api_prefix = 'apis' + api_version = 'extensions/v1beta1' + + def get(self, namespace, name=None, **kwargs): + """ + Fetch a single Deployment or a list + """ + url = '/namespaces/{}/deployments' + args = [namespace] + if name is not None: + args.append(name) + url += '/{}' + message = 'get Deployment "{}" in Namespace "{}"' + else: + message = 'get Deployments in Namespace "{}"' + + url = self.api(url, *args) + response = self.session.get(url, params=self.query_params(**kwargs)) + if self.unhealthy(response.status_code): + args.reverse() # error msg is in reverse order + raise KubeHTTPException(response, message, *args) + + return response + + def manifest(self, namespace, name, image, entrypoint, command, **kwargs): + replicas = kwargs.get('replicas', 0) + batches = kwargs.get('deploy_batches', None) + tags = kwargs.get('tags', {}) + + labels = { + 'app': namespace, + 'type': kwargs.get('app_type'), + 'heritage': 'deis', + } + + manifest = { + 'kind': 'Deployment', + 'apiVersion': 'extensions/v1beta1', + 'metadata': { + 'name': name, + 'labels': labels, + 'annotations': { + 'kubernetes.io/change-cause': kwargs.get('release_summary', '') + } + }, + 'spec': { + 'replicas': replicas, + 'selector': { + 'matchLabels': labels + } + } + } + + # Add in Rollback (if asked for) + rollback = kwargs.get('rollback', False) + if rollback: + # http://kubernetes.io/docs/user-guide/deployments/#rollback-to + if rollback is True: + # rollback to the latest known working revision + revision = 0 + elif isinstance(rollback, int) or isinstance(rollback, str): + # rollback to a particular revision + revision = rollback + + # This gets cleared from the template after a rollback is done + manifest['spec']['rollbackTo'] = {'revision': str(revision)} + + # Add deployment strategy + + # see if application or global deploy batches are defined + maxSurge = self._get_deploy_steps(batches, tags) + # if replicas are higher than maxSurge then the old deployment is never scaled down + # maxSurge can't be 0 when maxUnavailable is 0 and the other way around + if replicas > 0 and replicas < maxSurge: + maxSurge = replicas + + # http://kubernetes.io/docs/user-guide/deployments/#strategy + manifest['spec']['strategy'] = { + 'rollingUpdate': { + 'maxSurge': maxSurge, + # This is never updated + 'maxUnavailable': 0 + }, + # RollingUpdate or Recreate + 'type': 'RollingUpdate', + } + + # Add in how many deployment revisions to keep + if kwargs.get('deployment_revision_history', None) is not None: + manifest['spec']['revisionHistoryLimit'] = int(kwargs.get('deployment_revision_history')) # noqa + + # tell pod how to execute the process + kwargs['command'] = entrypoint + kwargs['args'] = command + + # pod manifest spec + manifest['spec']['template'] = self.pod.manifest(namespace, name, image, **kwargs) + + return manifest + + def create(self, namespace, name, image, entrypoint, command, **kwargs): + manifest = self.manifest(namespace, name, image, + entrypoint, command, **kwargs) + + url = self.api("/namespaces/{}/deployments", namespace) + response = self.session.post(url, json=manifest) + if self.unhealthy(response.status_code): + raise KubeHTTPException( + response, + 'create Deployment "{}" in Namespace "{}"', name, namespace + ) + self.log(namespace, 'template used: {}'.format(json.dumps(manifest, indent=4)), 'DEBUG') # noqa + + self.wait_until_updated(namespace, name) + self.wait_until_ready(namespace, name, **kwargs) + + return response + + def update(self, namespace, name, image, entrypoint, command, **kwargs): + manifest = self.manifest(namespace, name, image, + entrypoint, command, **kwargs) + + url = self.api("/namespaces/{}/deployments/{}", namespace, name) + response = self.session.put(url, json=manifest) + if self.unhealthy(response.status_code): + self.log(namespace, 'template used: {}'.format(json.dumps(manifest, indent=4)), 'DEBUG') # noqa + raise KubeHTTPException(response, 'update Deployment "{}"', name) + + self.wait_until_updated(namespace, name) + self.wait_until_ready(namespace, name, **kwargs) + + return response + + def delete(self, namespace, name): + url = self.api("/namespaces/{}/deployments/{}", namespace, name) + response = self.session.delete(url) + if self.unhealthy(response.status_code): + raise KubeHTTPException( + response, + 'delete Deployment "{}" in Namespace "{}"', name, namespace + ) + + return response + + def scale(self, namespace, name, image, entrypoint, command, **kwargs): + """ + A convenience wrapper around Deployment update that does a little bit of introspection + to determine if scale level is already where it needs to be + """ + deployment = self.deployment.get(namespace, name).json() + desired = int(kwargs.get('replicas')) + current = int(deployment['spec']['replicas']) + if desired == current: + self.log(namespace, "Not scaling Deployment {} to {} replicas. Already at desired replicas".format(name, desired)) # noqa + return + elif desired != current: + # set the previous replicas count so the wait logic can deal with terminating pods + kwargs['previous_replicas'] = current + self.log(namespace, "scaling Deployment {} from {} to {} replicas".format(name, current, desired)) # noqa + self.update(namespace, name, image, entrypoint, command, **kwargs) + + def in_progress(self, namespace, name, deploy_timeout, batches, replicas, tags): + """ + Determine if a Deployment has a deploy in progress + + First is a very basic check to see if replicas are ready. + + If they are not ready then it is time to see if there are problems with any of the pods + such as image pull issues or similar. + + And then if that is still all okay then it is time to see if the deploy has + been in progress for longer than the allocated deploy time. Reason to do this + check is if a client has had a dropped connection. + + Returns 2 booleans, first one is for if the Deployment is in progress or not, second + one is or if a rollback action is advised while leaving the rollback up to the caller + """ + self.log(namespace, 'Checking if Deployment {} is in progress'.format(name), level='DEBUG') # noqa + try: + ready, _ = self.are_replicas_ready(namespace, name) + if ready: + # nothing more to do - False since it is not in progress + self.log(namespace, 'All replicas for Deployment {} are ready'.format(name), level='DEBUG') # noqa + return False, False + except KubeHTTPException as e: + # Deployment doesn't exist + if e.response.status_code == 404: + self.log(namespace, 'Deployment {} does not exist yet'.format(name), level='DEBUG') # noqa + return False, False + + # get deployment information + deployment = self.deployment.get(namespace, name).json() + # get pod template labels since they include the release version + labels = deployment['spec']['template']['metadata']['labels'] + containers = deployment['spec']['template']['spec']['containers'] + + # calculate base deploy timeout + deploy_timeout = self._deploy_probe_timeout(deploy_timeout, namespace, labels, containers) + + # a rough calculation that figures out an overall timeout + steps = self._get_deploy_steps(batches, tags) + batches = self._get_deploy_batches(steps, replicas) + timeout = len(batches) * deploy_timeout + + # is there a slow image pull or image issues + try: + timeout += self.pod._handle_pending_pods(namespace, labels) + except KubeException as e: + self.log(namespace, 'Deployment {} had stalled due an error and will be rolled back. {}'.format(name, str(e)), level='DEBUG') # noqa + return False, True + + # fetch the latest RS for Deployment and use the start time to compare to deploy timeout + replicasets = self.rs.get(namespace, labels=labels).json()['items'] + # the labels should ensure that only 1 replicaset due to the version label + if len(replicasets) != 1: + # if more than one then sort by start time to newest is first + replicasets.sort(key=lambda x: x['metadata']['creationTimestamp'], reverse=True) + + # work with the latest copy + replica = replicasets.pop() + + # throw an exception if over TTL so error is bubbled up + start = self.parse_date(replica['metadata']['creationTimestamp']) + if (start + timedelta(seconds=timeout)) < datetime.utcnow(): + self.log(namespace, 'Deploy operation for Deployment {} in has expired. Rolling back to last good known release'.format(name), level='DEBUG') # noqa + return False, True + + return True, False + + def are_replicas_ready(self, namespace, name): + """ + Verify the status of a Deployment and if it is fully deployed + """ + deployment = self.get(namespace, name).json() + desired = deployment['spec']['replicas'] + status = deployment['status'] + + # right now updateReplicas is where it is at + # availableReplicas mean nothing until minReadySeconds is used + pods = status['updatedReplicas'] if 'updatedReplicas' in status else 0 + + # spec/replicas of 0 is a special case as other fields get removed from status + if desired == 0 and ('replicas' not in status or status['replicas'] == 0): + return True, pods + + if ( + 'unavailableReplicas' in status or + ('replicas' not in status or status['replicas'] is not desired) or + ('updatedReplicas' not in status or status['updatedReplicas'] is not desired) or + ('availableReplicas' not in status or status['availableReplicas'] is not desired) + ): + return False, pods + + return True, pods + + def wait_until_updated(self, namespace, name): + """ + Looks at status/observedGeneration and metadata/generation and + waits for observedGeneration >= generation to happen + + http://kubernetes.io/docs/user-guide/deployments/#the-status-of-a-deployment + More information is also available at: + https://github.com/kubernetes/kubernetes/blob/master/docs/devel/api-conventions.md#metadata + """ + self.log(namespace, "waiting for Deployment {} to get a newer generation (30s timeout)".format(name), 'DEBUG') # noqa + for _ in range(30): + try: + deploy = self.deployment.get(namespace, name).json() + if ( + 'observedGeneration' in deploy['status'] and + deploy['status']['observedGeneration'] >= deploy['metadata']['generation'] + ): + self.log(namespace, "A newer generation was found for Deployment {}".format(name), 'DEBUG') # noqa + break + + time.sleep(1) + except KubeHTTPException as e: + if e.response.status_code == 404: + time.sleep(1) + + def wait_until_ready(self, namespace, name, **kwargs): + """ + Wait until the Deployment object has all the replicas ready + and other factors that play in + + Deals with the wait time, timesout and more + """ + replicas = int(kwargs.get('replicas', 0)) + # If desired is 0 then there is no ready state to check on + if replicas == 0: + return + + current = int(kwargs.get('previous_replicas', 0)) + batches = kwargs.get('deploy_batches', None) + deploy_timeout = kwargs.get('deploy_timeout', 120) + tags = kwargs.get('tags', {}) + steps = self._get_deploy_steps(batches, tags) + batches = self._get_deploy_batches(steps, replicas) + + deployment = self.get(namespace, name).json() + labels = deployment['spec']['template']['metadata']['labels'] + containers = deployment['spec']['template']['spec']['containers'] + + # if it was a scale down operation, wait until terminating pods are done + # Deployments say they are ready even when pods are being terminated + if replicas < current: + self.pods.wait_until_terminated(namespace, labels, current, replicas) + return + + # calculate base deploy timeout + deploy_timeout = self._deploy_probe_timeout(deploy_timeout, namespace, labels, containers) + + # a rough calculation that figures out an overall timeout + timeout = len(batches) * deploy_timeout + self.log(namespace, 'This deployments overall timeout is {}s - batch timout is {}s and there are {} batches to deploy with a total of {} pods'.format(timeout, deploy_timeout, len(batches), replicas)) # noqa + + waited = 0 + while waited < timeout: + ready, availablePods = self.are_replicas_ready(namespace, name) + if ready: + break + + # check every 10 seconds for pod failures. + # Depend on Deployment checks for ready pods + if waited > 0 and (waited % 10) == 0: + additional_timeout = self.pod._handle_pending_pods(namespace, labels) + if additional_timeout: + timeout += additional_timeout + # add 10 minutes to timeout to allow a pull image operation to finish + self.log(namespace, 'Kubernetes has been pulling the image for {}s'.format(seconds)) # noqa + self.log(namespace, 'Increasing timeout by {}s to allow a pull image operation to finish for pods'.format(additional_timeout)) # noqa + + self.log(namespace, "waited {}s and {} pods are in service".format(waited, availablePods)) # noqa + + waited += 1 + time.sleep(1) diff --git a/rootfs/scheduler/resources/horizontalpodautoscaler.py b/rootfs/scheduler/resources/horizontalpodautoscaler.py new file mode 100644 index 000000000..748a2f6f5 --- /dev/null +++ b/rootfs/scheduler/resources/horizontalpodautoscaler.py @@ -0,0 +1,154 @@ +import json +from scheduler.resources import Resource +from scheduler.exceptions import KubeException, KubeHTTPException + + +class HorizontalPodAutoscaler(Resource): + api_prefix = 'apis' + short_name = 'hpa' + + @property + def api_version(self): + # API location changes between versions + # http://kubernetes.io/docs/user-guide/horizontal-pod-autoscaling/#api-object + if self.version() >= 1.3: + return 'autoscaling/v1' + + # 1.2 and older + return 'extensions/v1beta1' + + def get(self, namespace, name=None, **kwargs): + """ + Fetch a single HorizontalPodAutoscaler or a list + """ + url = '/namespaces/{}/horizontalpodautoscalers' + args = [namespace] + if name is not None: + args.append(name) + url += '/{}' + message = 'get HorizontalPodAutoscaler "{}" in Namespace "{}"' + else: + message = 'get HorizontalPodAutoscalers in Namespace "{}"' + + url = self.api(url, *args) + response = self.session.get(url, params=self.query_params(**kwargs)) + if self.unhealthy(response.status_code): + args.reverse() # error msg is in reverse order + raise KubeHTTPException(response, message, *args) + + return response + + def manifest(self, namespace, name, target, **kwargs): + min_replicas = kwargs.get('min') + max_replicas = kwargs.get('max') + cpu_percent = kwargs.get('cpu_percent') + + if min_replicas < 1: + raise KubeException('min replicas needs to be 1 or higher') + + if max_replicas < min_replicas: + raise KubeException('max replicas can not be smaller than min replicas') + + labels = { + 'app': namespace, + 'type': kwargs.get('app_type'), + 'heritage': 'deis', + } + + manifest = { + 'kind': 'HorizontalPodAutoscaler', + 'apiVersion': self.api_version, + 'metadata': { + 'name': name, + 'namespace': namespace, + 'labels': labels, + }, + 'spec': { + 'minReplicas': min_replicas, + 'maxReplicas': max_replicas, + 'scaleRef': { + # only works with Deployments, RS and RC + 'kind': target['kind'], + 'name': target['metadata']['name'], + # the resource of the above which does the scale action + 'subresource': 'scale', + }, + 'cpuUtilization': { + 'targetPercentage': cpu_percent + } + } + } + + return manifest + + def create(self, namespace, name, target, **kwargs): + manifest = self.manifest(namespace, name, target, **kwargs) + + url = self.api("/namespaces/{}/horizontalpodautoscalers", namespace) + response = self.session.post(url, json=manifest) + if self.unhealthy(response.status_code): + raise KubeHTTPException( + response, + 'create HorizontalPodAutoscaler "{}" in Namespace "{}"', name, namespace + ) + self.log(namespace, 'template used: {}'.format(json.dumps(manifest, indent=4)), 'DEBUG') # noqa + + # optionally wait for HPA if requested + if kwargs.get('wait', False): + self.wait(namespace, name) + + return response + + def update(self, namespace, name, target, **kwargs): + manifest = self.manifest(namespace, name, target, **kwargs) + + url = self.api("/namespaces/{}/horizontalpodautoscalers/{}", namespace, name) + response = self.session.put(url, json=manifest) + if self.unhealthy(response.status_code): + self.log(namespace, 'template used: {}'.format(json.dumps(manifest, indent=4)), 'DEBUG') # noqa + raise KubeHTTPException(response, 'update HorizontalPodAutoscaler "{}"', name) + + # optionally wait for HPA if requested + if kwargs.get('wait', False): + self.wait(namespace, name) + + return response + + def delete(self, namespace, name): + url = self.api("/namespaces/{}/horizontalpodautoscalers/{}", namespace, name) + response = self.session.delete(url) + if self.unhealthy(response.status_code): + raise KubeHTTPException( + response, + 'delete HorizontalPodAutoscaler "{}" in Namespace "{}"', name, namespace + ) + + return response + + def wait(self, namespace, name): + # fetch HPA details + hpa = self.hpa.get(namespace, name).json() + + # FIXME all of the below can be replaced with hpa['status'][desiredReplicas'] + # when https://github.com/kubernetes/kubernetes/issues/29739 is fixed + # until then we have to query things ourselves + + # only wait 30 seconds / attempts - this is not optimal + # ideally it would use the resources wait commands but they vary + for _ in range(30): + # fetch resource attached to it + resource_kind = hpa['spec']['scaleRef']['kind'].lower() + resource_name = hpa['spec']['scaleRef']['name'] + + resource = getattr(self, resource_kind) + resource = getattr(resource, 'get')(namespace, resource_name).json() + + # compare resource current replica count to HPA + # (Deployment vs RC vs RS is all different) + if resource_kind in ['replicationcontroller', 'replicaset']: + replicas = resource['status']['replicas'] + elif resource_kind == 'deployment': + replicas = resource['status']['availableReplicas'] + + if replicas <= hpa['spec']['maxReplicas'] or replicas >= hpa['spec']['minReplicas']: + break diff --git a/rootfs/scheduler/resources/namespace.py b/rootfs/scheduler/resources/namespace.py new file mode 100644 index 000000000..3ba1a9e70 --- /dev/null +++ b/rootfs/scheduler/resources/namespace.py @@ -0,0 +1,62 @@ +from scheduler.exceptions import KubeHTTPException +from scheduler.resources import Resource + + +class Namespace(Resource): + short_name = 'ns' + + def get(self, name=None, **kwargs): + """ + Fetch a single Namespace or a list + """ + url = '/namespaces' + args = [] + if name is not None: + args.append(name) + url += '/{}' + message = 'get Namespace "{}"' + else: + message = 'get Namespaces' + + url = self.api(url, *args) + response = self.session.get(url, params=self.query_params(**kwargs)) + if self.unhealthy(response.status_code): + args.reverse() # error msg is in reverse order + raise KubeHTTPException(response, message, *args) + + return response + + def create(self, namespace): + url = self.api("/namespaces") + data = { + "kind": "Namespace", + "apiVersion": "v1", + "metadata": { + "name": namespace, + "labels": { + 'heritage': 'deis' + } + } + } + + response = self.session.post(url, json=data) + if not response.status_code == 201: + raise KubeHTTPException(response, "create Namespace {}".format(namespace)) + + return response + + def delete(self, namespace): + url = self.api("/namespaces/{}", namespace) + response = self.session.delete(url) + if self.unhealthy(response.status_code): + raise KubeHTTPException(response, 'delete Namespace "{}"', namespace) + + return response + + def events(self, namespace, **kwargs): + url = self.api("/namespaces/{}/events", namespace) + response = self.session.get(url, params=self.query_params(**kwargs)) + if self.unhealthy(response.status_code): + raise KubeHTTPException(response, "get Events in Namespace {}", namespace) + + return response diff --git a/rootfs/scheduler/resources/node.py b/rootfs/scheduler/resources/node.py new file mode 100644 index 000000000..a1ae38b41 --- /dev/null +++ b/rootfs/scheduler/resources/node.py @@ -0,0 +1,27 @@ +from scheduler.resources import Resource +from scheduler.exceptions import KubeHTTPException + + +class Node(Resource): + short_name = 'no' + + def get(self, name=None, **kwargs): + """ + Fetch a single Node or a list + """ + url = '/nodes' + args = [] + if name is not None: + args.append(name) + url += '/{}' + message = 'get Node "{}" in Nodes' + else: + message = 'get Nodes' + + url = self.api(url, *args) + response = self.session.get(url, params=self.query_params(**kwargs)) + if self.unhealthy(response.status_code): + args.reverse() # error msg is in reverse order + raise KubeHTTPException(response, message, *args) + + return response diff --git a/rootfs/scheduler/resources/pod.py b/rootfs/scheduler/resources/pod.py new file mode 100644 index 000000000..d66e86ee1 --- /dev/null +++ b/rootfs/scheduler/resources/pod.py @@ -0,0 +1,709 @@ +import base64 +from datetime import datetime, timedelta +from docker.auth import auth as docker_auth +import json +import operator +import os +import time + +from django.conf import settings +from scheduler.exceptions import KubeException, KubeHTTPException +from scheduler.resources import Resource +from scheduler.states import PodState + + +class Pod(Resource): + short_name = 'po' + + def get(self, namespace, name=None, **kwargs): + """ + Fetch a single Pod or a list + """ + url = '/namespaces/{}/pods' + args = [namespace] + if name is not None: + args.append(name) + url += '/{}' + message = 'get Pod "{}" in Namespace "{}"' + else: + message = 'get Pods in Namespace "{}"' + + url = self.api(url, *args) + response = self.session.get(url, params=self.query_params(**kwargs)) + if self.unhealthy(response.status_code): + args.reverse() # error msg is in reverse order + raise KubeHTTPException(response, message, *args) + + return response + + def state(self, pod): + """ + Resolve Pod state to an internally understandable format and returns a + PodState object that can be used for comparison or name can get gotten + via .name + + However if no match is found then a text representation is returned + """ + # See "Pod Phase" at http://kubernetes.io/docs/user-guide/pod-states/ + if pod is None: + return PodState.destroyed + + states = { + 'Pending': PodState.initializing, + 'ContainerCreating': PodState.creating, + 'Starting': PodState.starting, + 'Running': PodState.up, + 'Terminating': PodState.terminating, + 'Succeeded': PodState.down, + 'Failed': PodState.crashed, + 'Unknown': PodState.error, + } + + # being in a Pending/ContainerCreating state can mean different things + # introspecting app container first + if pod['status']['phase'] in ['Pending', 'ContainerCreating']: + pod_state, _ = self.pod.pending_status(pod) + # being in a running state can mean a pod is starting, actually running or terminating + elif pod['status']['phase'] == 'Running': + # is the readiness probe passing? + pod_state = self.readiness_status(pod) + if pod_state in ['Starting', 'Terminating']: + return states[pod_state] + elif pod_state == 'Running' and self.liveness_status(pod): + # is the pod ready to serve requests? + return states[pod_state] + else: + # if no match was found for deis mapping then passthrough the real state + pod_state = pod['status']['phase'] + + return states.get(pod_state, pod_state) + + def manifest(self, namespace, name, image, **kwargs): + app_type = kwargs.get('app_type') + build_type = kwargs.get('build_type') + + # labels that represent the pod(s) + labels = { + 'app': namespace, + 'version': kwargs.get('version'), + 'type': app_type, + 'heritage': 'deis', + } + + # create base pod structure + manifest = { + 'kind': 'Pod', + 'apiVersion': 'v1', + 'metadata': { + 'name': name, + 'labels': labels + }, + 'spec': {} + } + + # pod manifest spec + spec = manifest['spec'] + + # what should the pod do if it exits + spec['restartPolicy'] = kwargs.get('restartPolicy', 'Always') + + # apply tags as needed to restrict pod to particular node(s) + spec['nodeSelector'] = kwargs.get('tags', {}) + + # How long until a pod is forcefully terminated + spec['terminationGracePeriodSeconds'] = settings.KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS # noqa + + # set the image pull policy that is associated with the application container + kwargs['image_pull_policy'] = settings.DOCKER_BUILDER_IMAGE_PULL_POLICY + + # Check if it is a slug builder image. + if build_type == "buildpack": + # only buildpack apps need access to object storage + try: + self.secret.get(namespace, 'objectstorage-keyfile') + except KubeException: + secret = self.secret.get('deis', 'objectstorage-keyfile').json() + self.secret.create(namespace, 'objectstorage-keyfile', secret['data']) + + # add the required volume to the top level pod spec + spec['volumes'] = [{ + 'name': 'objectstorage-keyfile', + 'secret': { + 'secretName': 'objectstorage-keyfile' + } + }] + + # added to kwargs to send to the container function + kwargs['volumeMounts'] = [{ + 'name': 'objectstorage-keyfile', + 'mountPath': '/var/run/secrets/deis/objectstore/creds', + 'readOnly': True + }] + + # overwrite image so slugrunner image is used in the container + image = settings.SLUGRUNNER_IMAGE + # slugrunner pull policy + kwargs['image_pull_policy'] = settings.SLUG_BUILDER_IMAGE_PULL_POLICY + + # create the base container + container = {} + + # process to call + if kwargs.get('command', []): + container['command'] = kwargs.get('command') + if kwargs.get('args', []): + container['args'] = kwargs.get('args') + + # set information to the application container + kwargs['image'] = image + container_name = namespace + '-' + app_type + self._set_container(namespace, container_name, container, **kwargs) + # add image to the mix + self._set_image_secret(spec, namespace, **kwargs) + + spec['containers'] = [container] + + return manifest + + def _set_container(self, namespace, container_name, data, **kwargs): + """Set app container information (env, healthcheck, etc) on a Pod""" + app_type = kwargs.get('app_type') + mem = kwargs.get('memory', {}).get(app_type) + cpu = kwargs.get('cpu', {}).get(app_type) + env = kwargs.get('envs', {}) + + # container name + data['name'] = container_name + # set the image to use + data['image'] = kwargs.get('image') + # set the image pull policy for the above image + data['imagePullPolicy'] = kwargs.get('image_pull_policy') + # add in any volumes that need to be mounted into the container + data['volumeMounts'] = kwargs.get('volumeMounts', []) + + # create env list if missing + if 'env' not in data: + data['env'] = [] + + if env: + # map application configuration (env secret) to env vars + secret_name = "{}-{}-env".format(namespace, kwargs.get('version')) + for key in env.keys(): + item = { + "name": key, + "valueFrom": { + "secretKeyRef": { + "name": secret_name, + # k8s doesn't allow _ so translate to -, see above + "key": key.lower().replace('_', '-') + } + } + } + + # add value to env hash. Overwrite hardcoded values if need be + match = next((k for k, e in enumerate(data["env"]) if e['name'] == key), None) + if match is not None: + data["env"][match] = item + else: + data["env"].append(item) + + # Inject debugging if workflow is in debug mode + if os.environ.get("DEIS_DEBUG", False): + data["env"].append({ + "name": "DEIS_DEBUG", + "value": "1" + }) + + # list sorted by dict key name + data['env'].sort(key=operator.itemgetter('name')) + + if mem or cpu: + data["resources"] = {"limits": {}} + + if mem: + if mem[-2:-1].isalpha() and mem[-1].isalpha(): + mem = mem[:-1] + + # memory needs to be upper cased (only first char) + mem = mem.upper() + "i" + data["resources"]["limits"]["memory"] = mem + + if cpu: + # CPU needs to be defined as lower case + data["resources"]["limits"]["cpu"] = cpu.lower() + + # add in healthchecks + self._set_health_checks(data, env, **kwargs) + + def _set_health_checks(self, container, env, **kwargs): + healthchecks = kwargs.get('healthcheck', None) + if healthchecks: + # check if a port is present. if not, auto-populate it + # TODO: rip this out when we stop supporting deis config:set HEALTHCHECK_URL + if ( + healthchecks.get('livenessProbe') is not None and + healthchecks['livenessProbe'].get('httpGet') is not None and + healthchecks['livenessProbe']['httpGet'].get('port') is None + ): + healthchecks['livenessProbe']['httpGet']['port'] = env['PORT'] + container.update(healthchecks) + elif kwargs.get('routable', False): + self._default_readiness_probe(container, kwargs.get('build_type'), env.get('PORT', None)) # noqa + + def _default_readiness_probe(self, container, build_type, port=None): + # Update only the application container with the health check + if build_type == "buildpack": + container.update(self._default_buildpack_readiness_probe()) + elif port: + container.update(self._default_dockerapp_readiness_probe(port)) + + """ + Applies exec readiness probe to the slugrunner container. + http://kubernetes.io/docs/user-guide/pod-states/#container-probes + + /runner/init is the entry point of the slugrunner. + https://github.com/deis/slugrunner/blob/01eac53f1c5f1d1dfa7570bbd6b9e45c00441fea/rootfs/Dockerfile#L20 + Once it downloads the slug it starts running using `exec` which means the pid 1 + will point to the slug/application command instead of entry point once the application has + started. + https://github.com/deis/slugrunner/blob/01eac53f1c5f1d1dfa7570bbd6b9e45c00441fea/rootfs/runner/init#L90 + + This should be added only for the build pack apps when a custom liveness probe is not set to + make sure that the pod is ready only when the slug is downloaded and started running. + """ + def _default_buildpack_readiness_probe(self, delay=30, timeout=5, period_seconds=5, + success_threshold=1, failure_threshold=1): + readinessprobe = { + 'readinessProbe': { + # an exec probe + 'exec': { + "command": [ + "bash", + "-c", + "[[ '$(ps -p 1 -o args)' != *'bash /runner/init'* ]]" + ] + }, + # length of time to wait for a pod to initialize + # after pod startup, before applying health checking + 'initialDelaySeconds': delay, + 'timeoutSeconds': timeout, + 'periodSeconds': period_seconds, + 'successThreshold': success_threshold, + 'failureThreshold': failure_threshold, + }, + } + return readinessprobe + + def _default_dockerapp_readiness_probe(self, port, delay=5, timeout=5, period_seconds=5, + success_threshold=1, failure_threshold=1): + """ + Applies tcp socket readiness probe to the docker app container only if some port is exposed + by the docker image. + """ + readinessprobe = { + 'readinessProbe': { + # an exec probe + 'tcpSocket': { + "port": int(port) + }, + # length of time to wait for a pod to initialize + # after pod startup, before applying health checking + 'initialDelaySeconds': delay, + 'timeoutSeconds': timeout, + 'periodSeconds': period_seconds, + 'successThreshold': success_threshold, + 'failureThreshold': failure_threshold, + }, + } + return readinessprobe + + def _get_private_registry_config(self, registry, image): + secret_name = settings.REGISTRY_SECRET_PREFIX + if registry: + # try to get the hostname information + hostname = registry.get('hostname', None) + if not hostname: + hostname, _ = docker_auth.split_repo_name(image) + if hostname == docker_auth.INDEX_NAME: + hostname = "https://index.docker.io/v1/" + username = registry.get('username') + password = registry.get('password') + elif settings.REGISTRY_LOCATION == 'off-cluster': + secret = self.secret.get('deis', 'registry-secret').json() + username = secret['data']['username'] + password = secret['data']['password'] + hostname = secret['data']['hostname'] + if hostname == '': + hostname = "https://index.docker.io/v1/" + secret_name = secret_name+"-"+settings.REGISTRY_LOCATION + elif settings.REGISTRY_LOCATION in ['ecr', 'gcr']: + return None, secret_name+"-"+settings.REGISTRY_LOCATION, False + else: + return None, None, None + + # create / update private registry secret + auth = bytes('{}:{}'.format(username, password), 'UTF-8') + # value has to be a base64 encoded JSON + docker_config = json.dumps({ + "auths": { + hostname: { + "auth": base64.b64encode(auth).decode(encoding='UTF-8') + } + } + }) + return docker_config, secret_name, True + + def _set_image_secret(self, data, namespace, **kwargs): + """ + Take registry information and set as an imagePullSecret for an RC / Deployment + http://kubernetes.io/docs/user-guide/images/#specifying-imagepullsecrets-on-a-pod + """ + docker_config, secret_name, secret_create = self._get_private_registry_config(kwargs.get('registry', {}), kwargs.get('image')) # noqa + if secret_create is None: + return + elif secret_create: + secret_data = {'.dockerconfigjson': docker_config} + try: + self.secret.get(namespace, secret_name) + except KubeHTTPException: + self.secret.create( + namespace, + secret_name, + secret_data, + secret_type='kubernetes.io/dockerconfigjson' + ) + else: + self.secret.update( + namespace, + secret_name, + secret_data, + secret_type='kubernetes.io/dockerconfigjson' + ) + + # apply image pull secret to a Pod spec + data['imagePullSecrets'] = [{'name': secret_name}] + + def delete(self, namespace, name): + url = self.api("/namespaces/{}/pods/{}", namespace, name) + resp = self.session.delete(url) + if self.unhealthy(resp.status_code): + raise KubeHTTPException(resp, 'delete Pod "{}" in Namespace "{}"', name, namespace) + + # Verify the pod has been deleted + # Only wait as long as the grace period is - k8s will eventually GC + for _ in range(settings.KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS): + try: + pod = self.pod.get(namespace, name).json() + # hide pod if it is passed the graceful termination period + if self.deleted(pod): + return + except KubeHTTPException as e: + if e.response.status_code == 404: + break + + time.sleep(1) + + def logs(self, namespace, name): + url = self.api("/namespaces/{}/pods/{}/log", namespace, name) + response = self.session.get(url) + if self.unhealthy(response.status_code): + raise KubeHTTPException( + response, + 'get logs for Pod "{}" in Namespace "{}"', name, namespace + ) + + return response + + def ready(self, pod): + """Combines various checks to see if the pod is considered up or not by checking probes""" + return ( + pod['status']['phase'] == 'Running' and + # is the readiness probe passing? + self.readiness_status(pod) == 'Running' and + # is the pod ready to serve requests? + self.liveness_status(pod) + ) + + def readiness_status(self, pod): + """Check if the pod container have passed the readiness probes""" + name = '{}-{}'.format(pod['metadata']['labels']['app'], pod['metadata']['labels']['type']) + # find the right container in case there are many on the pod + container = self.pod.find_container(name, pod['status']['containerStatuses']) + if container is None: + # Seems like the most sensible default + return 'Unknown' + + if not container['ready']: + if 'running' in container['state'].keys(): + return 'Starting' + + if ( + 'terminated' in container['state'].keys() or + 'deletionTimestamp' in pod['metadata'] + ): + return 'Terminating' + else: + # See if k8s is in Terminating state + if 'deletionTimestamp' in pod['metadata']: + return 'Terminating' + + return 'Running' + + # Seems like the most sensible default + return 'Unknown' + + def liveness_status(self, pod): + """Check if the pods liveness probe status has passed all checks""" + for condition in pod['status']['conditions']: + # type = Ready is the only binary type right now + if condition['type'] == 'Ready' and condition['status'] != 'True': + return False + + return True + + def deleted(self, pod): + """Checks if a pod is deleted and past its graceful termination period""" + # https://github.com/kubernetes/kubernetes/blob/release-1.2/docs/devel/api-conventions.md#metadata + # http://kubernetes.io/docs/user-guide/pods/#termination-of-pods + if 'deletionTimestamp' in pod['metadata']: + # past the graceful deletion period + deletion = self.parse_date(pod['metadata']['deletionTimestamp']) + if deletion < datetime.utcnow(): + return True + + return False + + def pending_status(self, pod): + """Introspect the pod containers when pod is in Pending state""" + if 'containerStatuses' not in pod['status']: + return 'Pending', '' + + name = '{}-{}'.format(pod['metadata']['labels']['app'], pod['metadata']['labels']['type']) + # find the right container in case there are many on the pod + container = self.pod.find_container(name, pod['status']['containerStatuses']) + if container is None: + # Return Pending if nothing else can be found + return 'Pending', '' + + if 'waiting' in container['state']: + reason = container['state']['waiting']['reason'] + message = '' + # message is not always available + if 'message' in container['state']['waiting']: + message = container['state']['waiting']['message'] + + if reason == 'ContainerCreating': + # get the last event + events = self.events(pod) + if not events: + # could not find any events + return reason, message + + event = events.pop() + return event['reason'], event['message'] + + return reason, message + + # Return Pending if nothing else can be found + return 'Pending', '' + + def events(self, pod): + """Process events for a given Pod to find if Pulling is happening, among other events""" + # fetch all events for this pod + fields = { + 'involvedObject.name': pod['metadata']['name'], + 'involvedObject.namespace': pod['metadata']['namespace'], + 'involvedObject.uid': pod['metadata']['uid'] + } + events = self.ns.events(pod['metadata']['namespace'], fields=fields).json() + # make sure that events are sorted + events['items'].sort(key=lambda x: x['lastTimestamp']) + return events['items'] + + def _handle_pod_errors(self, pod, reason, message): + """ + Handle potential pod errors based on the Pending + reason passed into the function + + Images, FailedScheduling and others are needed + """ + # image error reported on the container level + container_errors = [ + 'Pending', # often an indication of deeper inspection is needed + 'ErrImagePull', + 'ImagePullBackOff', + 'RegistryUnavailable', + 'ErrImageInspect', + ] + # Image event reason mapping + event_errors = { + "Failed": "FailedToPullImage", + "InspectFailed": "FailedToInspectImage", + "ErrImageNeverPull": "ErrImageNeverPullPolicy", + # Not including this one for now as the message is not useful + # "BackOff": "BackOffPullImage", + # FailedScheduling relates limits + "FailedScheduling": "FailedScheduling", + } + + # Nicer error than from the event + # Often this gets to ImageBullBackOff before we can introspect tho + if reason == 'ErrImagePull': + raise KubeException(message) + + # collect all error messages of worth + messages = [] + if reason in container_errors: + for event in self.events(pod): + if event['reason'] in event_errors.keys(): + # only show a given error once + event_errors.pop(event['reason']) + # strip out whitespaces on either side + message = "\n".join([x.strip() for x in event['message'].split("\n")]) + messages.append(message) + + if messages: + raise KubeException("\n".join(messages)) + + def _handle_long_image_pulling(self, reason, pod): + """ + If pulling an image is taking long (1 minute) then return how many seconds + the pod ready state timeout should be extended by + + Return value is an int that represents seconds + """ + # only apply once + if getattr(self, '_handle_long_image_pulling_applied', False): + return 0 + + if reason is not 'Pulling': + return 0 + + # last event should be Pulling in this case + event = self.events(pod).pop() + # see if pull operation has been happening for over 1 minute + seconds = 60 # time threshold before padding timeout + start = self.parse_date(event['firstTimestamp']) + if (start + timedelta(seconds=seconds)) < datetime.utcnow(): + # make it so function doesn't do processing again + setattr(self, '_handle_long_image_pulling_applied', True) + return 600 + + return 0 + + def _handle_pending_pods(self, namespace, labels): + """ + Detects if any pod is in the starting phases and handles + any potential issues around that, and increases timeouts + or throws errors as needed + """ + timeout = 0 + pods = self.get(namespace, labels=labels).json() + for pod in pods['items']: + # only care about pods that are not starting or in the starting phases + if pod['status']['phase'] not in ['Pending', 'ContainerCreating']: + continue + + # Get more information on why a pod is pending + reason, message = self.pending_status(pod) + # If pulling an image is taking long then increase the timeout + timeout += self._handle_long_image_pulling(pod, reason) + + # handle errors and bubble up if need be + self._handle_pod_errors(pod, reason, message) + + return timeout + + def find_container(self, container_name, containers): + """ + Locate a container by name in a list of containers + """ + for container in containers: + if container['name'] == container_name: + return container + + return None + + def wait_until_terminated(self, namespace, labels, current, desired): + """Wait until all the desired pods are terminated""" + # http://kubernetes.io/docs/api-reference/v1/definitions/#_v1_podspec + # https://github.com/kubernetes/kubernetes/blob/release-1.2/docs/devel/api-conventions.md#metadata + # http://kubernetes.io/docs/user-guide/pods/#termination-of-pods + + timeout = settings.KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS + delta = current - desired + self.log(namespace, "waiting for {} pods to be terminated ({}s timeout)".format(delta, timeout)) # noqa + for waited in range(timeout): + pods = self.get(namespace, labels=labels).json() + count = len(pods['items']) + + # see if any pods are past their terminationGracePeriodsSeconds (as in stuck) + # seems to be a problem in k8s around that: + # https://github.com/kubernetes/kubernetes/search?q=terminating&type=Issues + # these will be eventually GC'ed by k8s, ignoring them for now + for pod in pods['items']: + # remove pod if it is passed the graceful termination period + if self.deleted(pod): + count -= 1 + + # stop when all pods are terminated as expected + if count == desired: + break + + if waited > 0 and (waited % 10) == 0: + self.log(namespace, "waited {}s and {} pods out of {} are fully terminated".format(waited, (delta - count), delta)) # noqa + + time.sleep(1) + + self.log(namespace, "{} pods are terminated".format(delta)) + + def wait_until_ready(self, namespace, containers, labels, desired, timeout): # noqa + # If desired is 0 then there is no ready state to check on + if desired == 0: + return + + timeout = self._deploy_probe_timeout(timeout, namespace, labels, containers) + self.log(namespace, "waiting for {} pods in {} namespace to be in services ({}s timeout)".format(desired, namespace, timeout)) # noqa + + # Ensure the minimum desired number of pods are available + waited = 0 + while waited < timeout: + # figure out if there are any pending pod issues + additional_timeout = self._handle_pending_pods(namespace, labels) + if additional_timeout: + timeout += additional_timeout + # add 10 minutes to timeout to allow a pull image operation to finish + self.log(namespace, 'Kubernetes has been pulling the image for {}s'.format(seconds)) # noqa + self.log(namespace, 'Increasing timeout by {}s to allow a pull image operation to finish for pods'.format(additional_timeout)) # noqa + + count = 0 # ready pods + pods = self.get(namespace, labels=labels).json() + for pod in pods['items']: + # now that state is running time to see if probes are passing + if self.ready(pod): + count += 1 + continue + + # Find out if any pod goes beyond the Running (up) state + # Allow that to happen to account for very fast `deis run` as + # an example. Code using this function will account for it + state = self.state(pod) + if isinstance(state, PodState) and state > PodState.up: + count += 1 + continue + + if count == desired: + break + + if waited > 0 and (waited % 10) == 0: + self.log(namespace, "waited {}s and {} pods are in service".format(waited, count)) + + # increase wait time without dealing with jitters from above code + waited += 1 + time.sleep(1) + + # timed out + if waited > timeout: + self.log(namespace, 'timed out ({}s) waiting for pods to come up in namespace {}'.format(timeout, namespace)) # noqa + + self.log(namespace, "{} out of {} pods are in service".format(count, desired)) # noqa diff --git a/rootfs/scheduler/resources/replicaset.py b/rootfs/scheduler/resources/replicaset.py new file mode 100644 index 000000000..da1333faa --- /dev/null +++ b/rootfs/scheduler/resources/replicaset.py @@ -0,0 +1,29 @@ +from scheduler.exceptions import KubeHTTPException +from scheduler.resources import Resource + + +class ReplicaSet(Resource): + api_prefix = 'apis' + api_version = 'extensions/v1beta1' + short_name = 'rs' + + def get(self, namespace, name=None, **kwargs): + """ + Fetch a single ReplicaSet or a list + """ + url = '/namespaces/{}/replicasets' + args = [namespace] + if name is not None: + args.append(name) + url += '/{}' + message = 'get ReplicaSet "{}" in Namespace "{}"' + else: + message = 'get ReplicaSets in Namespace "{}"' + + url = self.api(url, *args) + response = self.session.get(url, params=self.query_params(**kwargs)) + if self.unhealthy(response.status_code): + args.reverse() # error msg is in reverse order + raise KubeHTTPException(response, message, *args) + + return response diff --git a/rootfs/scheduler/resources/replicationcontroller.py b/rootfs/scheduler/resources/replicationcontroller.py new file mode 100644 index 000000000..263c24c06 --- /dev/null +++ b/rootfs/scheduler/resources/replicationcontroller.py @@ -0,0 +1,135 @@ +import json +import time +from scheduler.exceptions import KubeHTTPException +from scheduler.resources import Resource + + +class ReplicationController(Resource): + short_name = 'rc' + + def get(self, namespace, name=None, **kwargs): + """ + Fetch a single ReplicationController or a list + """ + url = '/namespaces/{}/replicationcontrollers' + args = [namespace] + if name is not None: + args.append(name) + url += '/{}' + message = 'get ReplicationController "{}" in Namespace "{}"' + else: + message = 'get ReplicationControllers in Namespace "{}"' + + url = self.api(url, *args) + response = self.session.get(url, params=self.query_params(**kwargs)) + if self.unhealthy(response.status_code): + args.reverse() # error msg is in reverse order + raise KubeHTTPException(response, message, *args) + + return response + + def create(self, namespace, name, image, entrypoint, command, **kwargs): + manifest = { + 'kind': 'ReplicationController', + 'apiVersion': 'v1', + 'metadata': { + 'name': name, + 'labels': { + 'app': namespace, + 'version': kwargs.get('version'), + 'type': kwargs.get('app_type'), + 'heritage': 'deis', + } + }, + 'spec': { + 'replicas': kwargs.get('replicas', 0) + } + } + + # tell pod how to execute the process + kwargs['command'] = entrypoint + kwargs['args'] = command + + # pod manifest spec + manifest['spec']['template'] = self.pod.manifest(namespace, name, image, **kwargs) + + url = self.api("/namespaces/{}/replicationcontrollers", namespace) + resp = self.session.post(url, json=manifest) + if self.unhealthy(resp.status_code): + raise KubeHTTPException( + resp, + 'create ReplicationController "{}" in Namespace "{}"', name, namespace + ) + self.log(namespace, 'manifest used: {}'.format(json.dumps(manifest, indent=4)), 'DEBUG') # noqa + + self.wait_until_updated(namespace, name) + + return resp + + def update(self, namespace, name, data): + url = self.api("/namespaces/{}/replicationcontrollers/{}", namespace, name) + response = self.session.put(url, json=data) + if self.unhealthy(response.status_code): + raise KubeHTTPException(response, 'scale ReplicationController "{}"', name) + + return response + + def delete(self, namespace, name): + url = self.api("/namespaces/{}/replicationcontrollers/{}", namespace, name) + response = self.session.delete(url) + if self.unhealthy(response.status_code): + raise KubeHTTPException( + response, + 'delete ReplicationController "{}" in Namespace "{}"', name, namespace + ) + + return response + + def scale(self, namespace, name, desired, timeout): + rc = self.get(namespace, name).json() + + current = int(rc['spec']['replicas']) + if desired == current: + self.log(namespace, "Not scaling RC {} to {} replicas. Already at desired replicas".format(name, desired)) # noqa + return + elif desired != rc['spec']['replicas']: # RC needs new replica count + # Set the new desired replica count + rc['spec']['replicas'] = desired + + self.log(namespace, "scaling RC {} from {} to {} replicas".format(name, current, desired)) # noqa + + self.update(namespace, name, rc) + self.wait_until_updated(namespace, name) + + # Double check enough pods are in the required state to service the application + labels = rc['metadata']['labels'] + containers = rc['spec']['template']['spec']['containers'] + self.pods.wait_until_ready(namespace, containers, labels, desired, timeout) + + # if it was a scale down operation, wait until terminating pods are done + if int(desired) < int(current): + self.pods.wait_until_terminated(namespace, labels, current, desired) + + def wait_until_updated(self, namespace, name): + """ + Looks at status/observedGeneration and metadata/generation and + waits for observedGeneration >= generation to happen, indicates RC is ready + + More information is also available at: + https://github.com/kubernetes/kubernetes/blob/master/docs/devel/api-conventions.md#metadata + """ + self.log(namespace, "waiting for ReplicationController {} to get a newer generation (30s timeout)".format(name), 'DEBUG') # noqa + for _ in range(30): + try: + rc = self.get(namespace, name).json() + if ( + "observedGeneration" in rc["status"] and + rc["status"]["observedGeneration"] >= rc["metadata"]["generation"] + ): + self.log(namespace, "ReplicationController {} got a newer generation (30s timeout)".format(name), 'DEBUG') # noqa + break + + time.sleep(1) + except KubeHTTPException as e: + if e.response.status_code == 404: + time.sleep(1) diff --git a/rootfs/scheduler/resources/secret.py b/rootfs/scheduler/resources/secret.py new file mode 100644 index 000000000..0caf68357 --- /dev/null +++ b/rootfs/scheduler/resources/secret.py @@ -0,0 +1,115 @@ +import base64 +import json +from scheduler.resources import Resource +from scheduler.exceptions import KubeHTTPException, KubeException + + +class Secret(Resource): + def get(self, namespace, name=None, **kwargs): + """ + Fetch a single Secret or a list + """ + url = '/namespaces/{}/secrets' + args = [namespace] + if name is not None: + args.append(name) + url += '/{}' + message = 'get Secret "{}" in Namespace "{}"' + else: + message = 'get Secrets in Namespace "{}"' + + url = self.api(url, *args) + response = self.session.get(url, params=self.query_params(**kwargs)) + if self.unhealthy(response.status_code): + args.reverse() # error msg is in reverse order + raise KubeHTTPException(response, message, *args) + + # return right away if it is a list + if name is None: + return response + + # decode the base64 data + secrets = response.json() + for key, value in secrets['data'].items(): + if value is None: + secrets['data'][key] = '' + continue + + value = base64.b64decode(value) + value = value if isinstance(value, bytes) else bytes(str(value), 'UTF-8') + secrets['data'][key] = value.decode(encoding='UTF-8') + + # tell python-requests it actually hasn't consumed the data + response._content = bytes(json.dumps(secrets), 'UTF-8') + + return response + + def manifest(self, namespace, name, data, secret_type='Opaque', labels={}): + secret_types = ['Opaque', 'kubernetes.io/dockerconfigjson'] + if secret_type not in secret_types: + raise KubeException('{} is not a supported secret type. Use one of the following: '.format(secret_type, ', '.join(secret_types))) # noqa + + manifest = { + 'kind': 'Secret', + 'apiVersion': 'v1', + 'metadata': { + 'name': name, + 'namespace': namespace, + 'labels': { + 'app': namespace, + 'heritage': 'deis' + } + }, + 'type': secret_type, + 'data': {} + } + + # add in any additional label info + manifest['metadata']['labels'].update(labels) + + for key, value in data.items(): + if value is None: + manifest['data'].update({key: ''}) + continue + + value = value if isinstance(value, bytes) else bytes(str(value), 'UTF-8') + item = base64.b64encode(value).decode(encoding='UTF-8') + manifest['data'].update({key: item}) + + return manifest + + def create(self, namespace, name, data, secret_type='Opaque', labels={}): + manifest = self.manifest(namespace, name, data, secret_type, labels) + url = self.api("/namespaces/{}/secrets", namespace) + response = self.session.post(url, json=manifest) + if self.unhealthy(response.status_code): + raise KubeHTTPException( + response, + 'failed to create Secret "{}" in Namespace "{}"', name, namespace + ) + + return response + + def update(self, namespace, name, data, secret_type='Opaque', labels={}): + manifest = self.manifest(namespace, name, data, secret_type, labels) + url = self.api("/namespaces/{}/secrets/{}", namespace, name) + response = self.session.put(url, json=manifest) + if self.unhealthy(response.status_code): + raise KubeHTTPException( + response, + 'failed to update Secret "{}" in Namespace "{}"', + name, namespace + ) + + return response + + def delete(self, namespace, name): + url = self.api("/namespaces/{}/secrets/{}", namespace, name) + response = self.session.delete(url) + if self.unhealthy(response.status_code): + raise KubeHTTPException( + response, + 'delete Secret "{}" in Namespace "{}"', name, namespace + ) + + return response diff --git a/rootfs/scheduler/resources/service.py b/rootfs/scheduler/resources/service.py new file mode 100644 index 000000000..a2d060c61 --- /dev/null +++ b/rootfs/scheduler/resources/service.py @@ -0,0 +1,88 @@ +from scheduler.exceptions import KubeHTTPException +from scheduler.resources import Resource +from scheduler.utils import dict_merge + + +class Service(Resource): + short_name = 'svc' + + def get(self, namespace, name=None, **kwargs): + """ + Fetch a single Service or a list + """ + url = '/namespaces/{}/services' + args = [namespace] + if name is not None: + args.append(name) + url += '/{}' + message = 'get Service "{}" in Namespace "{}"' + else: + message = 'get Services in Namespace "{}"' + + url = self.api(url, *args) + response = self.session.get(url, params=self.query_params(**kwargs)) + if self.unhealthy(response.status_code): + args.reverse() # error msg is in reverse order + raise KubeHTTPException(response, message, *args) + + return response + + def create(self, namespace, name, **kwargs): + # Ports and app type will be overwritten as required + manifest = { + 'kind': 'Service', + 'apiVersion': 'v1', + 'metadata': { + 'name': name, + 'labels': { + 'app': namespace, + 'heritage': 'deis' + }, + 'annotations': {} + }, + 'spec': { + 'ports': [{ + 'name': 'http', + 'port': 80, + 'targetPort': 5000, + 'protocol': 'TCP' + }], + 'selector': { + 'app': namespace, + 'heritage': 'deis' + } + } + } + + data = dict_merge(manifest, kwargs.get('data', {})) + url = self.api("/namespaces/{}/services", namespace) + response = self.session.post(url, json=data) + if self.unhealthy(response.status_code): + raise KubeHTTPException( + response, + 'create Service "{}" in Namespace "{}"', namespace, namespace + ) + + return response + + def update(self, namespace, name, data): + url = self.api("/namespaces/{}/services/{}", namespace, name) + response = self.session.put(url, json=data) + if self.unhealthy(response.status_code): + raise KubeHTTPException( + response, + 'update Service "{}" in Namespace "{}"', namespace, name + ) + + return response + + def delete(self, namespace, name): + url = self.api("/namespaces/{}/services/{}", namespace, name) + response = self.session.delete(url) + if self.unhealthy(response.status_code): + raise KubeHTTPException( + response, + 'delete Service "{}" in Namespace "{}"', name, namespace + ) + + return response diff --git a/rootfs/scheduler/tests/__init__.py b/rootfs/scheduler/tests/__init__.py index 67295e315..e140ad50a 100644 --- a/rootfs/scheduler/tests/__init__.py +++ b/rootfs/scheduler/tests/__init__.py @@ -1,5 +1,6 @@ from django.core.cache import cache from django.test import TestCase as DjangoTestCase +from django.conf import settings from scheduler import mock from scheduler.utils import generate_random_name @@ -7,7 +8,7 @@ class TestCase(DjangoTestCase): def setUp(self): - self.scheduler = mock.MockSchedulerClient() + self.scheduler = mock.MockSchedulerClient(settings.SCHEDULER_URL) # have a namespace available at all times self.namespace = self.create_namespace() @@ -17,7 +18,7 @@ def tearDown(self): def create_namespace(self): namespace = generate_random_name() - response = self.scheduler.create_namespace(namespace) + response = self.scheduler.ns.create(namespace) self.assertEqual(response.status_code, 201, response.json()) # assert minimal amount data data = response.json() diff --git a/rootfs/scheduler/tests/test_deployments.py b/rootfs/scheduler/tests/test_deployments.py index c566d98f2..a82d194b4 100644 --- a/rootfs/scheduler/tests/test_deployments.py +++ b/rootfs/scheduler/tests/test_deployments.py @@ -23,10 +23,9 @@ def create(self, namespace=None, name=generate_random_name(), **kwargs): 'replicas': kwargs.get('replicas', 4), } - deployment = self.scheduler.create_deployment(namespace, name, 'quay.io/fake/image', + deployment = self.scheduler.deployment.create(namespace, name, 'quay.io/fake/image', 'sh', 'start', **kwargs) - data = deployment.json() - self.assertEqual(deployment.status_code, 201, data) + self.assertEqual(deployment.status_code, 201, deployment.json()) return name def update(self, namespace=None, name=generate_random_name(), **kwargs): @@ -41,7 +40,7 @@ def update(self, namespace=None, name=generate_random_name(), **kwargs): 'replicas': kwargs.get('replicas', 4), } - deployment = self.scheduler.update_deployment(namespace, name, 'quay.io/fake/image', + deployment = self.scheduler.deployment.update(namespace, name, 'quay.io/fake/image', 'sh', 'start', **kwargs) data = deployment.json() self.assertEqual(deployment.status_code, 200, data) @@ -83,13 +82,13 @@ def test_update_deployment_failure(self): def test_update(self): # test success name = self.create() - deployment = self.scheduler.get_deployment(self.namespace, name).json() + deployment = self.scheduler.deployment.get(self.namespace, name).json() self.assertEqual(deployment['spec']['replicas'], 4, deployment) # emulate scale without calling scale self.update(self.namespace, name, replicas=2) - deployment = self.scheduler.get_deployment(self.namespace, name).json() + deployment = self.scheduler.deployment.get(self.namespace, name).json() self.assertEqual(deployment['spec']['replicas'], 2, deployment) def test_delete_failure(self): @@ -98,19 +97,19 @@ def test_delete_failure(self): KubeHTTPException, msg='failed to delete Deployment foo in Namespace {}: 404 Not Found'.format(self.namespace) # noqa ): - self.scheduler.delete_deployment(self.namespace, 'foo') + self.scheduler.deployment.delete(self.namespace, 'foo') def test_delete(self): # test success name = self.create() - response = self.scheduler.delete_deployment(self.namespace, name) + response = self.scheduler.deployment.delete(self.namespace, name) data = response.json() self.assertEqual(response.status_code, 200, data) def test_get_deployments(self): # test success name = self.create() - response = self.scheduler.get_deployments(self.namespace) + response = self.scheduler.deployment.get(self.namespace) data = response.json() self.assertEqual(response.status_code, 200, data) self.assertIn('items', data) @@ -124,12 +123,12 @@ def test_get_deployment_failure(self): KubeHTTPException, msg='failed to get Deployment doesnotexist in Namespace {}: 404 Not Found'.format(self.namespace) # noqa ): - self.scheduler.get_deployment(self.namespace, 'doesnotexist') + self.scheduler.deployment.get(self.namespace, 'doesnotexist') def test_get_deployment(self): # test success name = self.create() - response = self.scheduler.get_deployment(self.namespace, name) + response = self.scheduler.deployment.get(self.namespace, name) data = response.json() self.assertEqual(response.status_code, 200, data) self.assertEqual(data['apiVersion'], 'extensions/v1beta1') @@ -145,22 +144,22 @@ def test_get_deployment(self): def test_scale(self): name = self.scale() - data = self.scheduler.get_deployment(self.namespace, name).json() + data = self.scheduler.deployment.get(self.namespace, name).json() self.assertEqual(data['kind'], 'Deployment') self.assertEqual(data['metadata']['name'], name) labels = {'app': self.namespace, 'version': 'v99', 'type': 'web'} - pods = self.scheduler.get_pods(self.namespace, labels=labels).json() + pods = self.scheduler.pod.get(self.namespace, labels=labels).json() self.assertEqual(len(pods['items']), 4) # scale to 8 name = self.scale(replicas=8) - pods = self.scheduler.get_pods(self.namespace, labels=labels).json() + pods = self.scheduler.pod.get(self.namespace, labels=labels).json() self.assertEqual(len(pods['items']), 8) # scale to 3 name = self.scale(replicas=3) - pods = self.scheduler.get_pods(self.namespace, labels=labels).json() + pods = self.scheduler.pod.get(self.namespace, labels=labels).json() self.assertEqual(len(pods['items']), 3) def test_get_deployment_replicasets(self): @@ -169,10 +168,9 @@ def test_get_deployment_replicasets(self): """ # test success deployment = self.create() - data = self.scheduler.get_deployment(self.namespace, deployment).json() + data = self.scheduler.deployment.get(self.namespace, deployment).json() - response = self.scheduler.get_replicasets(self.namespace, - labels=data['metadata']['labels']) + response = self.scheduler.rs.get(self.namespace, labels=data['metadata']['labels']) data = response.json() self.assertEqual(response.status_code, 200, data) self.assertIn('items', data) @@ -189,7 +187,7 @@ def test_get__deployment_replicaset_failure(self): KubeHTTPException, msg='failed to get ReplicaSet doesnotexist in Namespace {}: 404 Not Found'.format(self.namespace) # noqa ): - self.scheduler.get_replicaset(self.namespace, 'doesnotexist') + self.scheduler.rs.get(self.namespace, 'doesnotexist') def test_get_deployment_replicaset(self): """ @@ -197,15 +195,14 @@ def test_get_deployment_replicaset(self): """ # test success deployment = self.create() - data = self.scheduler.get_deployment(self.namespace, deployment).json() + data = self.scheduler.deployment.get(self.namespace, deployment).json() # get all replicasets and fish out the first one to match on - response = self.scheduler.get_replicasets(self.namespace, - labels=data['metadata']['labels']) + response = self.scheduler.rs.get(self.namespace, labels=data['metadata']['labels']) data = response.json() replica_name = data['items'][0]['metadata']['name'] - response = self.scheduler.get_replicaset(self.namespace, replica_name) + response = self.scheduler.rs.get(self.namespace, replica_name) data = response.json() self.assertEqual(response.status_code, 200, data) diff --git a/rootfs/scheduler/tests/test_horizontalpodautoscaler.py b/rootfs/scheduler/tests/test_horizontalpodautoscaler.py new file mode 100644 index 000000000..c9e17ad82 --- /dev/null +++ b/rootfs/scheduler/tests/test_horizontalpodautoscaler.py @@ -0,0 +1,203 @@ +""" +Unit tests for the Deis scheduler module. + +Run the tests with './manage.py test scheduler' +""" +from scheduler import KubeHTTPException +from scheduler.tests import TestCase +from scheduler.utils import generate_random_name + + +class HorizontalPodAutoscalersTest(TestCase): + """Tests scheduler horizontalpodautoscaler calls""" + + def create(self, namespace=None, name=generate_random_name(), **kwargs): + """ + Helper function to create and verify a horizontalpodautoscaler on the namespace + + Creates a Deployment so that HPA can work off an object + """ + namespace = self.namespace if namespace is None else namespace + # these are all required even if it is kwargs... + kwargs = { + 'app_type': kwargs.get('app_type', 'web'), + 'version': kwargs.get('version', 'v99'), + 'replicas': kwargs.get('replicas', 1), + } + + # create a Deployment to test HPA with + deployment = self.scheduler.deployment.create(namespace, name, 'quay.io/fake/image', + 'sh', 'start', **kwargs) + self.assertEqual(deployment.status_code, 201, deployment.json()) + + # create HPA referencing the Deployment above + kwargs = { + 'min': 2, + 'max': 4, + 'cpu_percent': 45, + 'wait': True + } + horizontalpodautoscaler = self.scheduler.hpa.create(namespace, name, deployment.json(), **kwargs) # noqa + self.assertEqual(horizontalpodautoscaler.status_code, 201, horizontalpodautoscaler.json()) # noqa + return name + + def update(self, namespace=None, name=generate_random_name(), **kwargs): + """ + Helper function to update and verify a horizontalpodautoscaler on the namespace + """ + namespace = self.namespace if namespace is None else namespace + deployment = self.scheduler.deployment.get(namespace, name) + + kwargs = { + 'min': kwargs.get('replicas'), + 'max': 4, + 'cpu_percent': 45, + 'wait': True + } + horizontalpodautoscaler = self.scheduler.hpa.update(namespace, name, deployment.json(), **kwargs) # noqa + self.assertEqual(horizontalpodautoscaler.status_code, 200, horizontalpodautoscaler.json()) # noqa + return name + + def update_deployment(self, namespace=None, name=generate_random_name(), **kwargs): + """ + Helper function to update and verify a deployment on the namespace + """ + namespace = self.namespace if namespace is None else namespace + # these are all required even if it is kwargs... + kwargs = { + 'app_type': kwargs.get('app_type', 'web'), + 'version': kwargs.get('version', 'v99'), + 'replicas': kwargs.get('replicas', 4), + } + + deployment = self.scheduler.deployment.update(namespace, name, 'quay.io/fake/image', + 'sh', 'start', **kwargs) + data = deployment.json() + self.assertEqual(deployment.status_code, 200, data) + return name + + def test_create_failure(self): + with self.assertRaises( + KubeHTTPException, + msg='failed to create HorizontalPodAutoscaler doesnotexist in Namespace {}: 404 Not Found'.format(self.namespace) # noqa + ): + self.create('doesnotexist', 'doesnotexist') + + def test_create(self): + name = self.create() + + # check the deployment object + deployment = self.scheduler.deployment.get(self.namespace, name).json() + self.assertEqual(deployment['spec']['replicas'], 2, deployment) + + # make sure HPA kicked things from 1 (set by Deployments) to 2 (HPA min) + labels = {'app': self.namespace, 'type': 'web', 'version': 'v99'} + pods = self.scheduler.pod.get(self.namespace, labels=labels).json() + self.assertEqual(len(pods['items']), 2) + + def test_update_horizontalpodautoscaler_failure(self): + # test failure + with self.assertRaises( + KubeHTTPException, + msg='failed to update HorizontalPodAutoscaler foo in Namespace {}: 404 Not Found'.format(self.namespace) # noqa + ): + self.update(self.namespace, 'foo') + + def test_update(self): + # test success + name = self.create() + + # check the deployment object + deployment = self.scheduler.deployment.get(self.namespace, name).json() + self.assertEqual(deployment['spec']['replicas'], 2, deployment) + + # make sure HPA kicked things from 1 (set by Deployments) to 2 (HPA min) + deployment = self.scheduler.deployment.get(self.namespace, name).json() + self.assertEqual(deployment['status']['availableReplicas'], 2) + + # update HPA to 3 replicas minimum + self.update(self.namespace, name, replicas=3) + + # check the deployment object + deployment = self.scheduler.deployment.get(self.namespace, name).json() + self.assertEqual(deployment['spec']['replicas'], 3, deployment) + + # make sure HPA kicked things from 1 (set by Deployments) to 3 (HPA min) + deployment = self.scheduler.deployment.get(self.namespace, name).json() + self.assertEqual(deployment['status']['availableReplicas'], 3) + + # scale deployment to 1 (should go back to 3) + self.update_deployment(self.namespace, name, replicas=1) + + # check the deployment object + deployment = self.scheduler.deployment.get(self.namespace, name).json() + self.assertEqual(deployment['spec']['replicas'], 3, deployment) + + # make sure HPA kicked things from 1 (set by Deployments) to 3 (HPA min) + deployment = self.scheduler.deployment.get(self.namespace, name).json() + self.assertEqual(deployment['status']['availableReplicas'], 3) + + # scale deployment to 6 (should go back to 4) + self.update_deployment(self.namespace, name, replicas=6) + + # check the deployment object + deployment = self.scheduler.deployment.get(self.namespace, name).json() + self.assertEqual(deployment['spec']['replicas'], 4, deployment) + + # make sure HPA kicked things from 6 (set by Deployments) to 4 (HPA min) + deployment = self.scheduler.deployment.get(self.namespace, name).json() + self.assertEqual(deployment['status']['availableReplicas'], 4) + + def test_delete_failure(self): + # test failure + with self.assertRaises( + KubeHTTPException, + msg='failed to delete HorizontalPodAutoscaler foo in Namespace {}: 404 Not Found'.format(self.namespace) # noqa + ): + self.scheduler.hpa.delete(self.namespace, 'foo') + + def test_delete(self): + # test success + name = self.create() + response = self.scheduler.hpa.delete(self.namespace, name) + data = response.json() + self.assertEqual(response.status_code, 200, data) + + def test_get_horizontalpodautoscalers(self): + # test success + name = self.create() + response = self.scheduler.hpa.get(self.namespace) + data = response.json() + self.assertEqual(response.status_code, 200, data) + self.assertIn('items', data) + self.assertEqual(1, len(data['items']), data['items']) + # simple verify of data + self.assertEqual(data['items'][0]['metadata']['name'], name, data) + + def test_get_horizontalpodautoscaler_failure(self): + # test failure + with self.assertRaises( + KubeHTTPException, + msg='failed to get HorizontalPodAutoscaler doesnotexist in Namespace {}: 404 Not Found'.format(self.namespace) # noqa + ): + self.scheduler.hpa.get(self.namespace, 'doesnotexist') + + def test_get_horizontalpodautoscaler(self): + # test success + name = self.create() + response = self.scheduler.hpa.get(self.namespace, name) + data = response.json() + self.assertEqual(response.status_code, 200, data) + if self.scheduler.version() < 1.3: + self.assertEqual(data['apiVersion'], 'extensions/v1beta1') + else: + self.assertEqual(data['apiVersion'], 'autoscaling/v1') + self.assertEqual(data['kind'], 'HorizontalPodAutoscaler') + self.assertEqual(data['metadata']['name'], name) + self.assertDictContainsSubset( + { + 'app': self.namespace, + 'heritage': 'deis' + }, + data['metadata']['labels'] + ) diff --git a/rootfs/scheduler/tests/test_namespaces.py b/rootfs/scheduler/tests/test_namespaces.py index cc68ff878..d4b3b90aa 100644 --- a/rootfs/scheduler/tests/test_namespaces.py +++ b/rootfs/scheduler/tests/test_namespaces.py @@ -15,7 +15,7 @@ def test_create_namespace(self): self.create_namespace() def test_get_namespaces(self): - response = self.scheduler.get_namespaces() + response = self.scheduler.ns.get() data = response.json() self.assertEqual(response.status_code, 200, data) self.assertIn('items', data) @@ -29,9 +29,9 @@ def test_get_namespace(self): KubeHTTPException, msg='failed to get Namespace doesnotexist: 404 Not Found' ): - self.scheduler.get_node('doesnotexist') + self.scheduler.node.get('doesnotexist') - response = self.scheduler.get_namespace(self.namespace) + response = self.scheduler.ns.get(self.namespace) data = response.json() self.assertEqual(response.status_code, 200, data) self.assertEqual(data['apiVersion'], 'v1') @@ -52,8 +52,8 @@ def test_delete_failure(self): KubeHTTPException, msg='failed to delete Namespace doesnotexist: 404 Not Found' ): - self.scheduler.delete_namespace('doesnotexist') + self.scheduler.ns.delete('doesnotexist') def test_delete_namespace(self): - response = self.scheduler.delete_namespace(self.namespace) + response = self.scheduler.ns.delete(self.namespace) self.assertEqual(response.status_code, 200, response.json()) diff --git a/rootfs/scheduler/tests/test_nodes.py b/rootfs/scheduler/tests/test_nodes.py index 3995af2f2..34c302b8b 100644 --- a/rootfs/scheduler/tests/test_nodes.py +++ b/rootfs/scheduler/tests/test_nodes.py @@ -3,24 +3,15 @@ Run the tests with "./manage.py test scheduler" """ -from django.core.cache import cache -from django.test import TestCase - -from scheduler import mock, KubeHTTPException +from scheduler.tests import TestCase +from scheduler import KubeHTTPException class NodesTest(TestCase): """Tests scheduler node calls""" - def setUp(self): - self.scheduler = mock.MockSchedulerClient() - - def tearDown(self): - # make sure every test has a clean slate for k8s mocking - cache.clear() - def test_get_nodes(self): - response = self.scheduler.get_nodes() + response = self.scheduler.node.get() data = response.json() self.assertEqual(response.status_code, 200, data) self.assertIn('items', data) @@ -34,10 +25,10 @@ def test_get_node(self): KubeHTTPException, msg='failed to get Node doesnotexist in Nodes: 404 Not Found' ): - self.scheduler.get_node('doesnotexist') + self.scheduler.node.get('doesnotexist') name = '172.17.8.100' - response = self.scheduler.get_node(name) + response = self.scheduler.node.get(name) data = response.json() self.assertEqual(response.status_code, 200, data) self.assertEqual(data['apiVersion'], 'v1') diff --git a/rootfs/scheduler/tests/test_replicationcontrollers.py b/rootfs/scheduler/tests/test_replicationcontrollers.py index ee5a70ef9..106a5d12c 100644 --- a/rootfs/scheduler/tests/test_replicationcontrollers.py +++ b/rootfs/scheduler/tests/test_replicationcontrollers.py @@ -23,7 +23,7 @@ def create(self, namespace=None, name=generate_random_name(), **kwargs): 'replicas': kwargs.get('replicas', 4), } - rc = self.scheduler.create_rc(namespace, name, 'quay.io/fake/image', + rc = self.scheduler.rc.create(namespace, name, 'quay.io/fake/image', 'sh', 'start', **kwargs) data = rc.json() self.assertEqual(rc.status_code, 201, data) @@ -61,19 +61,19 @@ def test_update_rc_failure(self): KubeHTTPException, msg='failed to update ReplicationController foo in Namespace {}: 404 Not Found'.format(self.namespace) # noqa ): - self.scheduler.update_rc(self.namespace, 'foo', {}) + self.scheduler.rc.update(self.namespace, 'foo', {}) def test_update(self): # test success name = self.create() - rc = self.scheduler.get_rc(self.namespace, name).json() + rc = self.scheduler.rc.get(self.namespace, name).json() self.assertEqual(rc['spec']['replicas'], 4, rc) rc['spec']['replicas'] = 2 - response = self.scheduler.update_rc(self.namespace, name, rc) + response = self.scheduler.rc.update(self.namespace, name, rc) self.assertEqual(response.status_code, 200, response.json()) - rc = self.scheduler.get_rc(self.namespace, name).json() + rc = self.scheduler.rc.get(self.namespace, name).json() self.assertEqual(rc['spec']['replicas'], 2, rc) def test_delete_failure(self): @@ -82,19 +82,19 @@ def test_delete_failure(self): KubeHTTPException, msg='failed to delete ReplicationController foo in Namespace {}: 404 Not Found'.format(self.namespace) # noqa ): - self.scheduler.delete_rc(self.namespace, 'foo') + self.scheduler.rc.delete(self.namespace, 'foo') def test_delete(self): # test success name = self.create() - response = self.scheduler.delete_rc(self.namespace, name) + response = self.scheduler.rc.delete(self.namespace, name) data = response.json() self.assertEqual(response.status_code, 200, data) def test_get_rcs(self): # test success name = self.create() - response = self.scheduler.get_rcs(self.namespace) + response = self.scheduler.rc.get(self.namespace) data = response.json() self.assertEqual(response.status_code, 200, data) self.assertIn('items', data) @@ -108,12 +108,12 @@ def test_get_rc_failure(self): KubeHTTPException, msg='failed to get ReplicationController doesnotexist in Namespace {}: 404 Not Found'.format(self.namespace) # noqa ): - self.scheduler.get_rc(self.namespace, 'doesnotexist') + self.scheduler.rc.get(self.namespace, 'doesnotexist') def test_get_rc(self): # test success name = self.create() - response = self.scheduler.get_rc(self.namespace, name) + response = self.scheduler.rc.get(self.namespace, name) data = response.json() self.assertEqual(response.status_code, 200, data) self.assertEqual(data['apiVersion'], 'v1') diff --git a/rootfs/scheduler/tests/test_scheduler.py b/rootfs/scheduler/tests/test_scheduler.py index 7317ec7f6..6fbaa37db 100644 --- a/rootfs/scheduler/tests/test_scheduler.py +++ b/rootfs/scheduler/tests/test_scheduler.py @@ -3,11 +3,9 @@ Run the tests with "./manage.py test scheduler" """ -from django.core.cache import cache -from django.test import TestCase +from scheduler.tests import TestCase from django.test.utils import override_settings -from scheduler import mock import base64 import json @@ -15,13 +13,6 @@ class SchedulerTest(TestCase): """Tests scheduler calls""" - def setUp(self): - self.scheduler = mock.MockSchedulerClient() - - def tearDown(self): - # make sure every test has a clean slate for k8s mocking - cache.clear() - def test_set_container_applies_healthcheck_with_routable(self): """ Test that when _set_container is called with the 'routable' kwarg set to True, @@ -54,19 +45,19 @@ def test_set_container_applies_healthcheck_with_routable(self): 'failureThreshold': 1, } - self.scheduler._set_container( + self.scheduler.pod._set_container( 'foo', 'bar', data, routable=True, healthcheck=healthcheck ) self.assertDictContainsSubset(healthcheck, data) data = {} - self.scheduler._set_container( + self.scheduler.pod._set_container( 'foo', 'bar', data, routable=True, build_type="buildpack", healthcheck={} ) self.assertEqual(data.get('livenessProbe'), None) self.assertEqual(data.get('readinessProbe'), readinessHealthCheck) data = {} - self.scheduler._set_container( + self.scheduler.pod._set_container( 'foo', 'bar', data, routable=False, healthcheck={} ) self.assertEqual(data.get('livenessProbe'), None) @@ -74,7 +65,7 @@ def test_set_container_applies_healthcheck_with_routable(self): # clear the dict to call again with routable as false data = {} - self.scheduler._set_container( + self.scheduler.pod._set_container( 'foo', 'bar', data, routable=False, healthcheck=healthcheck ) @@ -83,7 +74,7 @@ def test_set_container_applies_healthcheck_with_routable(self): # now call without setting 'routable', should default to False data = {} - self.scheduler._set_container( + self.scheduler.pod._set_container( 'foo', 'bar', data, healthcheck=healthcheck ) self.assertDictContainsSubset(healthcheck, data) @@ -97,7 +88,7 @@ def test_set_container_applies_healthcheck_with_routable(self): } } } - self.scheduler._set_health_checks( + self.scheduler.pod._set_health_checks( data, {'PORT': 80}, healthcheck=livenessProbe ) self.assertDictContainsSubset(healthcheck, data) @@ -108,7 +99,7 @@ def test_set_container_limits(self): Test that when _set_container has limits that is sets them properly """ data = {} - self.scheduler._set_container( + self.scheduler.pod._set_container( 'foo', 'bar', data, app_type='fake', cpu={'fake': '500M'}, memory={'fake': '1024m'} ) @@ -123,7 +114,7 @@ def test_get_private_registry_config(self): encAuth = base64.b64encode(auth).decode(encoding='UTF-8') image = 'test/test' - docker_config, secret_name, secret_create = self.scheduler._get_private_registry_config(registry, image) # noqa + docker_config, secret_name, secret_create = self.scheduler.pod._get_private_registry_config(registry, image) # noqa dockerConfig = json.loads(docker_config) expected = {"https://index.docker.io/v1/": { "auth": encAuth @@ -134,7 +125,7 @@ def test_get_private_registry_config(self): image = "quay.io/test/test" - docker_config, secret_name, secret_create = self.scheduler._get_private_registry_config(registry, image) # noqa + docker_config, secret_name, secret_create = self.scheduler.pod._get_private_registry_config(registry, image) # noqa dockerConfig = json.loads(docker_config) expected = {"quay.io": { "auth": encAuth @@ -147,7 +138,7 @@ def test_get_private_registry_config(self): def test_get_private_registry_config_ecr(self): registry = {} image = "test.com/test/test" - docker_config, secret_name, secret_create = self.scheduler._get_private_registry_config(registry, image) # noqa + docker_config, secret_name, secret_create = self.scheduler.pod._get_private_registry_config(registry, image) # noqa self.assertEqual(docker_config, None) self.assertEqual(secret_name, "private-registry-ecr") self.assertEqual(secret_create, False) @@ -158,7 +149,7 @@ def test_get_private_registry_config_off_cluster(self): auth = bytes('{}:{}'.format("test", "test"), 'UTF-8') encAuth = base64.b64encode(auth).decode(encoding='UTF-8') image = "test.com/test/test" - docker_config, secret_name, secret_create = self.scheduler._get_private_registry_config(registry, image) # noqa + docker_config, secret_name, secret_create = self.scheduler.pod._get_private_registry_config(registry, image) # noqa dockerConfig = json.loads(docker_config) expected = {"https://index.docker.io/v1/": { "auth": encAuth @@ -171,7 +162,7 @@ def test_get_private_registry_config_off_cluster(self): def test_get_private_registry_config_bad_registry_location(self): registry = {} image = "test.com/test/test" - docker_config, secret_name, secret_create = self.scheduler._get_private_registry_config(registry, image) # noqa + docker_config, secret_name, secret_create = self.scheduler.pod._get_private_registry_config(registry, image) # noqa self.assertEqual(docker_config, None) self.assertEqual(secret_name, None) self.assertEqual(secret_create, None) diff --git a/rootfs/scheduler/tests/test_secrets.py b/rootfs/scheduler/tests/test_secrets.py index a12e6afb6..359da04af 100644 --- a/rootfs/scheduler/tests/test_secrets.py +++ b/rootfs/scheduler/tests/test_secrets.py @@ -21,7 +21,7 @@ def create(self): 'this': 'that', 'empty': None, } - secret = self.scheduler.create_secret(self.namespace, name, data) + secret = self.scheduler.secret.create(self.namespace, name, data) data = secret.json() self.assertEqual(secret.status_code, 201, data) self.assertEqual(data['metadata']['name'], name) @@ -34,17 +34,17 @@ def test_create_failure(self): KubeHTTPException, msg='failed to create Secret doesnotexist in Namespace {}: 404 Not Found'.format(self.namespace) # noqa ): - self.scheduler.create_secret('doesnotexist', 'doesnotexist', {}) + self.scheduler.secret.create('doesnotexist', 'doesnotexist', {}) with self.assertRaises( KubeException, msg='invlaid is not a supported secret type. Use one of the following: Opaque, kubernetes.io/dockerconfigjson' # noqa ): - self.scheduler.create_secret(self.namespace, 'foo', {}, secret_type='invalid') + self.scheduler.secret.create(self.namespace, 'foo', {}, secret_type='invalid') def test_create(self): name = self.create() - secret = self.scheduler.get_secret(self.namespace, name).json() + secret = self.scheduler.secret.get(self.namespace, name).json() self.assertEqual(secret['data']['foo'], 'bar', secret) self.assertEqual(secret['data']['this'], 'that', secret) self.assertEqual(secret['type'], 'Opaque') @@ -55,21 +55,21 @@ def test_update_secret_failure(self): KubeHTTPException, msg='failed to update Secret foo in Namespace {}: 404 Not Found'.format(self.namespace) # noqa ): - self.scheduler.update_secret(self.namespace, 'foo', {}) + self.scheduler.secret.update(self.namespace, 'foo', {}) def test_update(self): # test success name = self.create() - secret = self.scheduler.get_secret(self.namespace, name).json() + secret = self.scheduler.secret.get(self.namespace, name).json() self.assertEqual(secret['data']['foo'], 'bar', secret) self.assertEqual(secret['data']['this'], 'that', secret) self.assertEqual(secret['type'], 'Opaque') secret['data']['foo'] = 5001 - response = self.scheduler.update_secret(self.namespace, name, secret['data']) + response = self.scheduler.secret.update(self.namespace, name, secret['data']) self.assertEqual(response.status_code, 200, response.json()) - secret = self.scheduler.get_secret(self.namespace, name).json() + secret = self.scheduler.secret.get(self.namespace, name).json() self.assertEqual(secret['data']['foo'], '5001', secret) def test_delete_failure(self): @@ -78,19 +78,19 @@ def test_delete_failure(self): KubeHTTPException, msg='failed to delete Secret foo in Namespace {}: 404 Not Found'.format(self.namespace) # noqa ): - self.scheduler.delete_secret(self.namespace, 'foo') + self.scheduler.secret.delete(self.namespace, 'foo') def test_delete(self): # test success name = self.create() - response = self.scheduler.delete_secret(self.namespace, name) + response = self.scheduler.secret.delete(self.namespace, name) data = response.json() self.assertEqual(response.status_code, 200, data) def test_get_secrets(self): # test success name = self.create() - response = self.scheduler.get_secrets(self.namespace) + response = self.scheduler.secret.get(self.namespace) data = response.json() self.assertEqual(response.status_code, 200, data) self.assertIn('items', data) @@ -104,12 +104,12 @@ def test_get_secret_failure(self): KubeHTTPException, msg='failed to get Secret doesnotexist in Namespace {}: 404 Not Found'.format(self.namespace) # noqa ): - self.scheduler.get_secret(self.namespace, 'doesnotexist') + self.scheduler.secret.get(self.namespace, 'doesnotexist') def test_get_secret(self): # test success name = self.create() - response = self.scheduler.get_secret(self.namespace, name) + response = self.scheduler.secret.get(self.namespace, name) data = response.json() self.assertEqual(response.status_code, 200, data) self.assertEqual(data['apiVersion'], 'v1') diff --git a/rootfs/scheduler/tests/test_services.py b/rootfs/scheduler/tests/test_services.py index 1bb884c2f..5ac4666b1 100644 --- a/rootfs/scheduler/tests/test_services.py +++ b/rootfs/scheduler/tests/test_services.py @@ -16,7 +16,7 @@ def create(self, data={}): Helper function to create and verify a service on the namespace """ name = generate_random_name() - service = self.scheduler.create_service(self.namespace, name, data=data) + service = self.scheduler.svc.create(self.namespace, name, data=data) data = service.json() self.assertEqual(service.status_code, 201, data) self.assertEqual(data['metadata']['name'], name) @@ -28,7 +28,7 @@ def test_create_failure(self): KubeHTTPException, msg='failed to create Service doesnotexist in Namespace {}: 404 Not Found'.format(self.namespace) # noqa ): - self.scheduler.create_service('doesnotexist', 'doesnotexist') + self.scheduler.svc.create('doesnotexist', 'doesnotexist') def test_create(self): # helper method takes care of the verification @@ -46,7 +46,7 @@ def test_create(self): } }) - service = self.scheduler.get_service(self.namespace, name).json() + service = self.scheduler.svc.get(self.namespace, name).json() self.assertEqual(service['spec']['ports'][0]['targetPort'], 5000, service) self.assertEqual(service['spec']['ports'][1]['targetPort'], 5001, service) @@ -56,19 +56,19 @@ def test_update_failure(self): KubeHTTPException, msg='failed to update Service foo in Namespace {}: 404 Not Found'.format(self.namespace) # noqa ): - self.scheduler.update_service(self.namespace, 'foo', {}) + self.scheduler.svc.update(self.namespace, 'foo', {}) def test_update(self): # test success name = self.create() - service = self.scheduler.get_service(self.namespace, name).json() + service = self.scheduler.svc.get(self.namespace, name).json() self.assertEqual(service['spec']['ports'][0]['targetPort'], 5000, service) service['spec']['ports'][0]['targetPort'] = 5001 - response = self.scheduler.update_service(self.namespace, name, service) + response = self.scheduler.svc.update(self.namespace, name, service) self.assertEqual(response.status_code, 200, response.json()) - service = self.scheduler.get_service(self.namespace, name).json() + service = self.scheduler.svc.get(self.namespace, name).json() self.assertEqual(service['spec']['ports'][0]['targetPort'], 5001, service) def test_delete_failure(self): @@ -77,19 +77,19 @@ def test_delete_failure(self): KubeHTTPException, msg='failed to delete Service foo in Namespace {}: 404 Not Found'.format(self.namespace) # noqa ): - self.scheduler.delete_service(self.namespace, 'foo') + self.scheduler.svc.delete(self.namespace, 'foo') def test_delete(self): # test success name = self.create() - response = self.scheduler.delete_service(self.namespace, name) + response = self.scheduler.svc.delete(self.namespace, name) data = response.json() self.assertEqual(response.status_code, 200, data) def test_get_services(self): # test success name = self.create() - response = self.scheduler.get_services(self.namespace) + response = self.scheduler.svc.get(self.namespace) data = response.json() self.assertEqual(response.status_code, 200, data) self.assertIn('items', data) @@ -103,12 +103,12 @@ def test_get_service_failure(self): KubeHTTPException, msg='failed to get Service doesnotexist in Namespace {}: 404 Not Found'.format(self.namespace) # noqa ): - self.scheduler.get_service(self.namespace, 'doesnotexist') + self.scheduler.svc.get(self.namespace, 'doesnotexist') def test_get_service(self): # test success name = self.create() - response = self.scheduler.get_service(self.namespace, name) + response = self.scheduler.svc.get(self.namespace, name) data = response.json() self.assertEqual(response.status_code, 200, data) self.assertEqual(data['apiVersion'], 'v1')