From 3a66c8b48ba00a1ed84dd7b368fbf3d84fdbdef2 Mon Sep 17 00:00:00 2001 From: AlaeddineAbdessalem Date: Fri, 30 Dec 2022 12:05:18 +0100 Subject: [PATCH] fix: respect timeout_ready when generating startup probe (#5560) --- jina/orchestrate/deployments/config/k8s.py | 2 + .../config/k8slib/kubernetes_deployment.py | 17 ++++++ .../k8s/template/deployment-executor.yml | 3 +- .../k8s/template/deployment-gateway.yml | 3 +- .../k8s/template/deployment-uses-after.yml | 6 ++- .../template/deployment-uses-before-after.yml | 9 ++-- .../k8s/template/deployment-uses-before.yml | 6 ++- .../k8s/template/statefulset-executor.yml | 3 +- tests/k8s/conftest.py | 5 +- tests/k8s/slow-load-executor/.dockerignore | 8 +++ tests/k8s/slow-load-executor/Dockerfile | 8 +++ tests/k8s/slow-load-executor/config.yml | 3 ++ .../slow-load-executor/slow_load_executor.py | 9 ++++ tests/k8s/test_k8s.py | 52 +++++++++++++++++++ .../flow-construct/test_flow_to_k8s_yaml.py | 9 +++- 15 files changed, 130 insertions(+), 13 deletions(-) create mode 100644 tests/k8s/slow-load-executor/.dockerignore create mode 100644 tests/k8s/slow-load-executor/Dockerfile create mode 100644 tests/k8s/slow-load-executor/config.yml create mode 100644 tests/k8s/slow-load-executor/slow_load_executor.py diff --git a/jina/orchestrate/deployments/config/k8s.py b/jina/orchestrate/deployments/config/k8s.py index ef3a319b6af78..97e3a2be6c7f1 100644 --- a/jina/orchestrate/deployments/config/k8s.py +++ b/jina/orchestrate/deployments/config/k8s.py @@ -103,6 +103,7 @@ def get_gateway_yamls( monitoring=self.common_args.monitoring, port_monitoring=self.common_args.port_monitoring, protocol=self.common_args.protocol, + timeout_ready=self.common_args.timeout_ready, ) def _get_image_name(self, uses: Optional[str]): @@ -213,6 +214,7 @@ def get_runtime_yamls( monitoring=cargs.monitoring, port_monitoring=cargs.port_monitoring, volumes=getattr(cargs, 'volumes', None), + timeout_ready=cargs.timeout_ready, ) def __init__( diff --git a/jina/orchestrate/deployments/config/k8slib/kubernetes_deployment.py b/jina/orchestrate/deployments/config/k8slib/kubernetes_deployment.py index 1bd1fde446aa3..a6c1dffcddb54 100644 --- a/jina/orchestrate/deployments/config/k8slib/kubernetes_deployment.py +++ b/jina/orchestrate/deployments/config/k8slib/kubernetes_deployment.py @@ -1,11 +1,15 @@ import json +import math import os +import warnings from argparse import Namespace from typing import Dict, List, Optional, Tuple, Union from jina.orchestrate.deployments.config.k8slib import kubernetes_tools from jina.serve.networking import GrpcConnectionPool +PERIOD_SECONDS = 5 + def get_template_yamls( name: str, @@ -32,6 +36,7 @@ def get_template_yamls( port_monitoring: Optional[int] = None, protocol: Optional[Union[str, List[str]]] = None, volumes: Optional[List[str]] = None, + timeout_ready: int = 600000, ) -> List[Dict]: """Get the yaml description of a service on Kubernetes @@ -59,6 +64,9 @@ def get_template_yamls( :param port_monitoring: port which will be exposed, for the prometheus server, by the deployed containers :param protocol: In case of being a Gateway, the protocol or protocols list used to expose its server :param volumes: If volumes are passed to Executors, Jina will create a StatefulSet instead of Deployment and include the first volume in the volume mounts + :param timeout_ready: The timeout in milliseconds of a Pod waits for the runtime to be ready. This parameter will be + reflected in Kubernetes in the startup configuration where the failureThreshold will be calculated depending on + timeout_ready. Value -1 is not supported for kubernetes :return: Return a dictionary with all the yaml configuration needed for a deployment """ # we can always assume the ports are the same for all executors since they run on different k8s pods @@ -79,6 +87,13 @@ def get_template_yamls( else: protocols = protocol + if timeout_ready == -1: + warnings.warn( + 'timeout_ready=-1 is not supported, setting timeout_ready to 10 minutes' + ) + timeout_ready = 600000 + failure_threshold = max(math.ceil((timeout_ready / 1000) / PERIOD_SECONDS), 3) + template_params = { 'name': name, 'namespace': namespace, @@ -102,6 +117,8 @@ def get_template_yamls( 'env_from_secret': env_from_secret, 'protocol': str(protocols[0]).lower() if protocols[0] is not None else '', 'volume_path': volumes[0] if volumes is not None else None, + 'period_seconds': PERIOD_SECONDS, + 'failure_threshold': failure_threshold, } if gpus and gpus != 'all': diff --git a/jina/resources/k8s/template/deployment-executor.yml b/jina/resources/k8s/template/deployment-executor.yml index b6f2642387f0c..1bc9d8e9951fe 100644 --- a/jina/resources/k8s/template/deployment-executor.yml +++ b/jina/resources/k8s/template/deployment-executor.yml @@ -58,7 +58,8 @@ spec: - executor - 127.0.0.1:{port} initialDelaySeconds: 5 - periodSeconds: 5 + periodSeconds: {period_seconds} + failureThreshold: {failure_threshold} timeoutSeconds: 10 livenessProbe: exec: diff --git a/jina/resources/k8s/template/deployment-gateway.yml b/jina/resources/k8s/template/deployment-gateway.yml index 4c860e62f7a12..ab52dcf4bfe4a 100644 --- a/jina/resources/k8s/template/deployment-gateway.yml +++ b/jina/resources/k8s/template/deployment-gateway.yml @@ -57,7 +57,8 @@ spec: - gateway - {protocol}://127.0.0.1:{port} initialDelaySeconds: 5 - periodSeconds: 5 + periodSeconds: {period_seconds} + failureThreshold: {failure_threshold} timeoutSeconds: 10 livenessProbe: exec: diff --git a/jina/resources/k8s/template/deployment-uses-after.yml b/jina/resources/k8s/template/deployment-uses-after.yml index 8fff1b977e182..b06ece5fead06 100644 --- a/jina/resources/k8s/template/deployment-uses-after.yml +++ b/jina/resources/k8s/template/deployment-uses-after.yml @@ -58,7 +58,8 @@ spec: - executor - 127.0.0.1:{port} initialDelaySeconds: 5 - periodSeconds: 5 + periodSeconds: {period_seconds} + failureThreshold: {failure_threshold} timeoutSeconds: 10 livenessProbe: exec: @@ -107,7 +108,8 @@ spec: - executor - 127.0.0.1:{port_uses_after} initialDelaySeconds: 5 - periodSeconds: 5 + periodSeconds: {period_seconds} + failureThreshold: {failure_threshold} timeoutSeconds: 10 livenessProbe: exec: diff --git a/jina/resources/k8s/template/deployment-uses-before-after.yml b/jina/resources/k8s/template/deployment-uses-before-after.yml index 74a7d8e041dab..24c11cb05e5d2 100644 --- a/jina/resources/k8s/template/deployment-uses-before-after.yml +++ b/jina/resources/k8s/template/deployment-uses-before-after.yml @@ -58,7 +58,8 @@ spec: - executor - 127.0.0.1:{port_uses} initialDelaySeconds: 5 - periodSeconds: 5 + periodSeconds: {period_seconds} + failureThreshold: {failure_threshold} timeoutSeconds: 10 livenessProbe: exec: @@ -107,7 +108,8 @@ spec: - executor - 127.0.0.1:{port_uses_before} initialDelaySeconds: 5 - periodSeconds: 5 + periodSeconds: {period_seconds} + failureThreshold: {failure_threshold} timeoutSeconds: 10 livenessProbe: exec: @@ -156,7 +158,8 @@ spec: - executor - 127.0.0.1:{port_uses_after} initialDelaySeconds: 5 - periodSeconds: 5 + periodSeconds: {period_seconds} + failureThreshold: {failure_threshold} timeoutSeconds: 10 livenessProbe: exec: diff --git a/jina/resources/k8s/template/deployment-uses-before.yml b/jina/resources/k8s/template/deployment-uses-before.yml index c4631802395d9..a9016435c08bd 100644 --- a/jina/resources/k8s/template/deployment-uses-before.yml +++ b/jina/resources/k8s/template/deployment-uses-before.yml @@ -58,7 +58,8 @@ spec: - executor - 127.0.0.1:{port} initialDelaySeconds: 5 - periodSeconds: 5 + periodSeconds: {period_seconds} + failureThreshold: {failure_threshold} timeoutSeconds: 10 livenessProbe: exec: @@ -107,7 +108,8 @@ spec: - executor - 127.0.0.1:{port_uses_before} initialDelaySeconds: 5 - periodSeconds: 5 + periodSeconds: {period_seconds} + failureThreshold: {failure_threshold} timeoutSeconds: 10 livenessProbe: exec: diff --git a/jina/resources/k8s/template/statefulset-executor.yml b/jina/resources/k8s/template/statefulset-executor.yml index 66ed11a9723c8..e669825413b74 100644 --- a/jina/resources/k8s/template/statefulset-executor.yml +++ b/jina/resources/k8s/template/statefulset-executor.yml @@ -58,7 +58,8 @@ spec: - executor - 127.0.0.1:{port} initialDelaySeconds: 5 - periodSeconds: 5 + periodSeconds: {period_seconds} + failureThreshold: {failure_threshold} timeoutSeconds: 10 livenessProbe: exec: diff --git a/tests/k8s/conftest.py b/tests/k8s/conftest.py index 3820c0fbdd402..96e099eecb844 100644 --- a/tests/k8s/conftest.py +++ b/tests/k8s/conftest.py @@ -4,11 +4,11 @@ import tempfile import time from pathlib import Path -from typing import List, Dict -from pytest import FixtureRequest +from typing import Dict, List import docker import pytest +from pytest import FixtureRequest from pytest_kind import KindCluster from jina.logging.logger import JinaLogger @@ -161,6 +161,7 @@ def image_name_tag_map() -> Dict[str, str]: 'custom-gateway': '0.1.1', 'test-stateful-executor': '0.13.1', 'multiprotocol-gateway': '0.1.1', + 'slow-load-executor': '0.1.1', } diff --git a/tests/k8s/slow-load-executor/.dockerignore b/tests/k8s/slow-load-executor/.dockerignore new file mode 100644 index 0000000000000..7772978e39f8b --- /dev/null +++ b/tests/k8s/slow-load-executor/.dockerignore @@ -0,0 +1,8 @@ +.git +.venv +.github +.pytest_cache +tests +__pycache__ +scripts +env \ No newline at end of file diff --git a/tests/k8s/slow-load-executor/Dockerfile b/tests/k8s/slow-load-executor/Dockerfile new file mode 100644 index 0000000000000..c564ffb4ca2c3 --- /dev/null +++ b/tests/k8s/slow-load-executor/Dockerfile @@ -0,0 +1,8 @@ +# TODO use fixed jina version for deterministic execution +FROM jinaai/jina:test-pip + +# setup the workspace +COPY . /workspace +WORKDIR /workspace + +ENTRYPOINT ["jina", "executor", "--uses", "config.yml"] diff --git a/tests/k8s/slow-load-executor/config.yml b/tests/k8s/slow-load-executor/config.yml new file mode 100644 index 0000000000000..d9cf2c70e317e --- /dev/null +++ b/tests/k8s/slow-load-executor/config.yml @@ -0,0 +1,3 @@ +jtype: SlowLoadExecutor +py_modules: + - slow_load_executor.py diff --git a/tests/k8s/slow-load-executor/slow_load_executor.py b/tests/k8s/slow-load-executor/slow_load_executor.py new file mode 100644 index 0000000000000..cbcca1ecb2651 --- /dev/null +++ b/tests/k8s/slow-load-executor/slow_load_executor.py @@ -0,0 +1,9 @@ +import time + +from jina import Executor + +time.sleep(60) + + +class SlowLoadExecutor(Executor): + pass diff --git a/tests/k8s/test_k8s.py b/tests/k8s/test_k8s.py index c5d9621d9545c..910dd0bdf7453 100644 --- a/tests/k8s/test_k8s.py +++ b/tests/k8s/test_k8s.py @@ -1637,3 +1637,55 @@ async def test_really_slow_executor_liveness_probe_works(docker_images, tmpdir, ).stdout.strip("\n") print(out) raise exc + + +@pytest.mark.asyncio +@pytest.mark.timeout(3600) +@pytest.mark.parametrize( + 'docker_images', + [['slow-load-executor']], + indirect=True, +) +async def test_flow_slow_load_executor(logger, docker_images, tmpdir, k8s_cluster): + from kubernetes import client + + api_client = client.ApiClient() + core_client = client.CoreV1Api(api_client=api_client) + app_client = client.AppsV1Api(api_client=api_client) + try: + port = 8080 + flow = Flow().add( + name='slow_load_executor', + uses=f'docker://{docker_images[0]}', + timeout_ready=65000, + ) + + dump_path = os.path.join(str(tmpdir), 'k8s-slow-load-executor') + namespace = 'slow-load-executor' + flow.to_kubernetes_yaml(dump_path, k8s_namespace=namespace) + + await create_all_flow_deployments_and_wait_ready( + dump_path, + namespace=namespace, + api_client=api_client, + app_client=app_client, + core_client=core_client, + deployment_replicas_expected={'gateway': 1, 'slow-load-executor': 1}, + logger=logger, + ) + + executor_pod_name = ( + core_client.list_namespaced_pod( + namespace=namespace, label_selector='app=slow-load-executor' + ) + .items[0] + .metadata.name + ) + with shell_portforward( + k8s_cluster._cluster.kubectl_path, executor_pod_name, port, port, namespace + ): + assert AsyncNewLoopRuntime.is_ready(f'localhost:{port}') + + except Exception as exc: + logger.error(f' Exception raised {exc}') + raise exc diff --git a/tests/unit/orchestrate/flow/flow-construct/test_flow_to_k8s_yaml.py b/tests/unit/orchestrate/flow/flow-construct/test_flow_to_k8s_yaml.py index 122fc81267b20..c0c3143c45ac1 100644 --- a/tests/unit/orchestrate/flow/flow-construct/test_flow_to_k8s_yaml.py +++ b/tests/unit/orchestrate/flow/flow-construct/test_flow_to_k8s_yaml.py @@ -16,7 +16,7 @@ def test_flow_to_k8s_yaml(tmpdir, protocol, flow_port): flow_kwargs['port'] = flow_port flow = ( Flow(**flow_kwargs) - .add(name='executor0', uses_with={'param': 0}) + .add(name='executor0', uses_with={'param': 0}, timeout_ready=60000) .add( name='executor1', shards=2, @@ -149,6 +149,13 @@ def test_flow_to_k8s_yaml(tmpdir, protocol, flow_port): assert executor0_objects[2]['metadata']['namespace'] == namespace assert executor0_objects[2]['metadata']['name'] == 'executor0' assert executor0_objects[2]['spec']['replicas'] == 1 + + executor0_startup_probe = executor0_objects[2]['spec']['template']['spec'][ + 'containers' + ][0]['startupProbe'] + assert executor0_startup_probe['failureThreshold'] == 12 + assert executor0_startup_probe['periodSeconds'] == 5 + executor0_args = executor0_objects[2]['spec']['template']['spec']['containers'][0][ 'args' ]