Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: respect timeout_ready when generating startup probe #5560

Merged
merged 9 commits into from Dec 30, 2022
2 changes: 2 additions & 0 deletions jina/orchestrate/deployments/config/k8s.py
Expand Up @@ -103,6 +103,7 @@ def get_gateway_yamls(
monitoring=self.common_args.monitoring,
port_monitoring=self.common_args.port_monitoring,
protocol=self.common_args.protocol,
timeout_ready=self.common_args.timeout_ready,
)

def _get_image_name(self, uses: Optional[str]):
Expand Down Expand Up @@ -213,6 +214,7 @@ def get_runtime_yamls(
monitoring=cargs.monitoring,
port_monitoring=cargs.port_monitoring,
volumes=getattr(cargs, 'volumes', None),
timeout_ready=cargs.timeout_ready,
)

def __init__(
Expand Down
@@ -1,11 +1,15 @@
import json
import math
import os
import warnings
from argparse import Namespace
from typing import Dict, List, Optional, Tuple, Union

from jina.orchestrate.deployments.config.k8slib import kubernetes_tools
from jina.serve.networking import GrpcConnectionPool

PERIOD_SECONDS = 5


def get_template_yamls(
name: str,
Expand All @@ -32,6 +36,7 @@ def get_template_yamls(
port_monitoring: Optional[int] = None,
protocol: Optional[Union[str, List[str]]] = None,
volumes: Optional[List[str]] = None,
timeout_ready: int = 600000,
) -> List[Dict]:
"""Get the yaml description of a service on Kubernetes

Expand Down Expand Up @@ -59,6 +64,9 @@ def get_template_yamls(
:param port_monitoring: port which will be exposed, for the prometheus server, by the deployed containers
:param protocol: In case of being a Gateway, the protocol or protocols list used to expose its server
:param volumes: If volumes are passed to Executors, Jina will create a StatefulSet instead of Deployment and include the first volume in the volume mounts
:param timeout_ready: The timeout in milliseconds of a Pod waits for the runtime to be ready. This parameter will be
reflected in Kubernetes in the startup configuration where the failureThreshold will be calculated depending on
timeout_ready. Value -1 is not supported for kubernetes
:return: Return a dictionary with all the yaml configuration needed for a deployment
"""
# we can always assume the ports are the same for all executors since they run on different k8s pods
Expand All @@ -79,6 +87,13 @@ def get_template_yamls(
else:
protocols = protocol

if timeout_ready == -1:
warnings.warn(
'timeout_ready=-1 is not supported, setting timeout_ready to 10 minutes'
)
timeout_ready = 600000
failure_threshold = max(math.ceil((timeout_ready / 1000) / PERIOD_SECONDS), 3)

template_params = {
'name': name,
'namespace': namespace,
Expand All @@ -102,6 +117,8 @@ def get_template_yamls(
'env_from_secret': env_from_secret,
'protocol': str(protocols[0]).lower() if protocols[0] is not None else '',
'volume_path': volumes[0] if volumes is not None else None,
'period_seconds': PERIOD_SECONDS,
'failure_threshold': failure_threshold,
}

if gpus and gpus != 'all':
Expand Down
3 changes: 2 additions & 1 deletion jina/resources/k8s/template/deployment-executor.yml
Expand Up @@ -58,7 +58,8 @@ spec:
- executor
- 127.0.0.1:{port}
initialDelaySeconds: 5
periodSeconds: 5
periodSeconds: {period_seconds}
failureThreshold: {failure_threshold}
timeoutSeconds: 10
livenessProbe:
exec:
Expand Down
3 changes: 2 additions & 1 deletion jina/resources/k8s/template/deployment-gateway.yml
Expand Up @@ -57,7 +57,8 @@ spec:
- gateway
- {protocol}://127.0.0.1:{port}
initialDelaySeconds: 5
periodSeconds: 5
periodSeconds: {period_seconds}
failureThreshold: {failure_threshold}
timeoutSeconds: 10
livenessProbe:
exec:
Expand Down
6 changes: 4 additions & 2 deletions jina/resources/k8s/template/deployment-uses-after.yml
Expand Up @@ -58,7 +58,8 @@ spec:
- executor
- 127.0.0.1:{port}
initialDelaySeconds: 5
periodSeconds: 5
periodSeconds: {period_seconds}
failureThreshold: {failure_threshold}
timeoutSeconds: 10
livenessProbe:
exec:
Expand Down Expand Up @@ -107,7 +108,8 @@ spec:
- executor
- 127.0.0.1:{port_uses_after}
initialDelaySeconds: 5
periodSeconds: 5
periodSeconds: {period_seconds}
failureThreshold: {failure_threshold}
timeoutSeconds: 10
livenessProbe:
exec:
Expand Down
9 changes: 6 additions & 3 deletions jina/resources/k8s/template/deployment-uses-before-after.yml
Expand Up @@ -58,7 +58,8 @@ spec:
- executor
- 127.0.0.1:{port_uses}
initialDelaySeconds: 5
periodSeconds: 5
periodSeconds: {period_seconds}
failureThreshold: {failure_threshold}
timeoutSeconds: 10
livenessProbe:
exec:
Expand Down Expand Up @@ -107,7 +108,8 @@ spec:
- executor
- 127.0.0.1:{port_uses_before}
initialDelaySeconds: 5
periodSeconds: 5
periodSeconds: {period_seconds}
failureThreshold: {failure_threshold}
timeoutSeconds: 10
livenessProbe:
exec:
Expand Down Expand Up @@ -156,7 +158,8 @@ spec:
- executor
- 127.0.0.1:{port_uses_after}
initialDelaySeconds: 5
periodSeconds: 5
periodSeconds: {period_seconds}
failureThreshold: {failure_threshold}
timeoutSeconds: 10
livenessProbe:
exec:
Expand Down
6 changes: 4 additions & 2 deletions jina/resources/k8s/template/deployment-uses-before.yml
Expand Up @@ -58,7 +58,8 @@ spec:
- executor
- 127.0.0.1:{port}
initialDelaySeconds: 5
periodSeconds: 5
periodSeconds: {period_seconds}
failureThreshold: {failure_threshold}
timeoutSeconds: 10
livenessProbe:
exec:
Expand Down Expand Up @@ -107,7 +108,8 @@ spec:
- executor
- 127.0.0.1:{port_uses_before}
initialDelaySeconds: 5
periodSeconds: 5
periodSeconds: {period_seconds}
failureThreshold: {failure_threshold}
timeoutSeconds: 10
livenessProbe:
exec:
Expand Down
3 changes: 2 additions & 1 deletion jina/resources/k8s/template/statefulset-executor.yml
Expand Up @@ -58,7 +58,8 @@ spec:
- executor
- 127.0.0.1:{port}
initialDelaySeconds: 5
periodSeconds: 5
periodSeconds: {period_seconds}
failureThreshold: {failure_threshold}
timeoutSeconds: 10
livenessProbe:
exec:
Expand Down
5 changes: 3 additions & 2 deletions tests/k8s/conftest.py
Expand Up @@ -4,11 +4,11 @@
import tempfile
import time
from pathlib import Path
from typing import List, Dict
from pytest import FixtureRequest
from typing import Dict, List

import docker
import pytest
from pytest import FixtureRequest
from pytest_kind import KindCluster

from jina.logging.logger import JinaLogger
Expand Down Expand Up @@ -161,6 +161,7 @@ def image_name_tag_map() -> Dict[str, str]:
'custom-gateway': '0.1.1',
'test-stateful-executor': '0.13.1',
'multiprotocol-gateway': '0.1.1',
'slow-load-executor': '0.1.1',
}


Expand Down
8 changes: 8 additions & 0 deletions tests/k8s/slow-load-executor/.dockerignore
@@ -0,0 +1,8 @@
.git
.venv
.github
.pytest_cache
tests
__pycache__
scripts
env
8 changes: 8 additions & 0 deletions tests/k8s/slow-load-executor/Dockerfile
@@ -0,0 +1,8 @@
# TODO use fixed jina version for deterministic execution
FROM jinaai/jina:test-pip

# setup the workspace
COPY . /workspace
WORKDIR /workspace

ENTRYPOINT ["jina", "executor", "--uses", "config.yml"]
3 changes: 3 additions & 0 deletions tests/k8s/slow-load-executor/config.yml
@@ -0,0 +1,3 @@
jtype: SlowLoadExecutor
py_modules:
- slow_load_executor.py
9 changes: 9 additions & 0 deletions tests/k8s/slow-load-executor/slow_load_executor.py
@@ -0,0 +1,9 @@
import time

from jina import Executor

time.sleep(60)


class SlowLoadExecutor(Executor):
pass
52 changes: 52 additions & 0 deletions tests/k8s/test_k8s.py
Expand Up @@ -1637,3 +1637,55 @@ async def test_really_slow_executor_liveness_probe_works(docker_images, tmpdir,
).stdout.strip("\n")
print(out)
raise exc


@pytest.mark.asyncio
@pytest.mark.timeout(3600)
@pytest.mark.parametrize(
'docker_images',
[['slow-load-executor']],
indirect=True,
)
async def test_flow_slow_load_executor(logger, docker_images, tmpdir, k8s_cluster):
from kubernetes import client

api_client = client.ApiClient()
core_client = client.CoreV1Api(api_client=api_client)
app_client = client.AppsV1Api(api_client=api_client)
try:
port = 8080
flow = Flow().add(
name='slow_load_executor',
uses=f'docker://{docker_images[0]}',
timeout_ready=65000,
)

dump_path = os.path.join(str(tmpdir), 'k8s-slow-load-executor')
namespace = 'slow-load-executor'
flow.to_kubernetes_yaml(dump_path, k8s_namespace=namespace)

await create_all_flow_deployments_and_wait_ready(
dump_path,
namespace=namespace,
api_client=api_client,
app_client=app_client,
core_client=core_client,
deployment_replicas_expected={'gateway': 1, 'slow-load-executor': 1},
logger=logger,
)

executor_pod_name = (
core_client.list_namespaced_pod(
namespace=namespace, label_selector='app=slow-load-executor'
)
.items[0]
.metadata.name
)
with shell_portforward(
k8s_cluster._cluster.kubectl_path, executor_pod_name, port, port, namespace
):
assert AsyncNewLoopRuntime.is_ready(f'localhost:{port}')

except Exception as exc:
logger.error(f' Exception raised {exc}')
raise exc
Expand Up @@ -16,7 +16,7 @@ def test_flow_to_k8s_yaml(tmpdir, protocol, flow_port):
flow_kwargs['port'] = flow_port
flow = (
Flow(**flow_kwargs)
.add(name='executor0', uses_with={'param': 0})
.add(name='executor0', uses_with={'param': 0}, timeout_ready=60000)
.add(
name='executor1',
shards=2,
Expand Down Expand Up @@ -149,6 +149,13 @@ def test_flow_to_k8s_yaml(tmpdir, protocol, flow_port):
assert executor0_objects[2]['metadata']['namespace'] == namespace
assert executor0_objects[2]['metadata']['name'] == 'executor0'
assert executor0_objects[2]['spec']['replicas'] == 1

executor0_startup_probe = executor0_objects[2]['spec']['template']['spec'][
'containers'
][0]['startupProbe']
assert executor0_startup_probe['failureThreshold'] == 12
assert executor0_startup_probe['periodSeconds'] == 5

executor0_args = executor0_objects[2]['spec']['template']['spec']['containers'][0][
'args'
]
Expand Down