From 840c221feaeaa0ea19c10d410dd39a7991c71372 Mon Sep 17 00:00:00 2001 From: tarrantro <290535018@qq.com> Date: Wed, 17 Aug 2022 15:54:24 +0800 Subject: [PATCH] docs: add gateway timeout and update shared gpu (#5069) --- docs/fundamentals/jcloud/advanced.md | 15 +++++++++++++++ docs/fundamentals/jcloud/resources.md | 6 +++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/docs/fundamentals/jcloud/advanced.md b/docs/fundamentals/jcloud/advanced.md index b4d1dd2f363a5..81f219f917d3f 100644 --- a/docs/fundamentals/jcloud/advanced.md +++ b/docs/fundamentals/jcloud/advanced.md @@ -103,6 +103,21 @@ executors: uses: jinahub+docker://Executor1 ``` +### Timeout + +By default, JCloud gateway will close connections that have been idle for over `600` seconds. If you want longer connection timeout threshold, you can consider changing the `timeout` parameter in `gateway`. + +```yaml +jtype: Flow +jcloud: + gateway: + ingress: kong + timeout: 600 +executors: + - name: executor1 + uses: jinahub+docker://Executor1 +``` + (retention-days)= ## Retention days diff --git a/docs/fundamentals/jcloud/resources.md b/docs/fundamentals/jcloud/resources.md index b40358debefd7..e70f29c632309 100644 --- a/docs/fundamentals/jcloud/resources.md +++ b/docs/fundamentals/jcloud/resources.md @@ -95,7 +95,7 @@ When using GPU resources, it may take few extra mins until all Executors ready t ##### Shared -An executor using a `shared` GPU shares this GPU with up to 10 other Executors. +An executor using a `shared` GPU shares this GPU with up to 4 other Executors. This enables a time-slicing, which allows workloads that land on oversubscribed GPUs to interleave with one another. ```yaml @@ -108,6 +108,10 @@ executors: gpu: shared ``` +```{note} +When using shared GPU resources, it will share the GPU memory across pods(24G memory total). If your application is memory consuming, we suggest using a dedicated GPU. +``` + ```{caution} There are no special provisions in place to isolate replicas that run on the same underlying GPU. Each workload has access to the GPU memory and runs in the same fault-domain as of all the others. Therefore, if one workload crashes, they all do. ```