From 9520fd4c8ccc6d5ea3fff1fe18527693075c8b31 Mon Sep 17 00:00:00 2001
From: ArthurFlag <arthur.flageul@docker.com>
Date: Fri, 9 May 2025 11:45:26 +0200
Subject: [PATCH 1/4] update packages

---
 package-lock.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/package-lock.json b/package-lock.json
index e0c4dcdd4ee9..890c2262f502 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1171,9 +1171,9 @@
       }
     },
     "node_modules/nanoid": {
-      "version": "3.3.7",
-      "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.7.tgz",
-      "integrity": "sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g==",
+      "version": "3.3.11",
+      "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz",
+      "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==",
       "funding": [
         {
           "type": "github",

From 206fefdc21c4e39f59ba8531b5d322c3503ef92a Mon Sep 17 00:00:00 2001
From: ArthurFlag <arthur.flageul@docker.com>
Date: Mon, 12 May 2025 16:25:23 +0200
Subject: [PATCH 2/4] update: clarify base urls

---
 content/manuals/ai/model-runner.md | 111 ++++++++++++++++-------------
 1 file changed, 60 insertions(+), 51 deletions(-)

diff --git a/content/manuals/ai/model-runner.md b/content/manuals/ai/model-runner.md
index 84898f989dd5..bd98b3e8f1b3 100644
--- a/content/manuals/ai/model-runner.md
+++ b/content/manuals/ai/model-runner.md
@@ -7,9 +7,9 @@ params:
       text: Beta
     group: AI
 weight: 20
-description: Learn how to use Docker Model Runner to manage and run AI models. 
+description: Learn how to use Docker Model Runner to manage and run AI models.
 keywords: Docker, ai, model runner, docker deskotp, llm
-aliases: 
+aliases:
   - /desktop/features/model-runner/
   - /ai/model-runner/
 ---
@@ -34,8 +34,8 @@ Models are pulled from Docker Hub the first time they're used and stored locally
 
 1. Navigate to the **Features in development** tab in settings.
 2. Under the **Experimental features** tab, select **Access experimental features**.
-3. Select **Apply and restart**. 
-4. Quit and reopen Docker Desktop to ensure the changes take effect. 
+3. Select **Apply and restart**.
+4. Quit and reopen Docker Desktop to ensure the changes take effect.
 5. Open the **Settings** view in Docker Desktop.
 6. Navigate to **Features in development**.
 7. From the **Beta** tab, check the **Enable Docker Model Runner** setting.
@@ -46,7 +46,7 @@ You can now use the `docker model` command in the CLI and view and interact with
 
 ### Model runner status
 
-Check whether the Docker Model Runner is active:
+Check whether the Docker Model Runner is active and displays the current inference engine:
 
 ```console
 $ docker model status
@@ -55,7 +55,7 @@ $ docker model status
 ### View all commands
 
 Displays help information and a list of available subcommands.
- 
+
 ```console
 $ docker model help
 ```
@@ -74,7 +74,7 @@ Commands:
   version     Show the current version
 ```
 
-### Pull a model 
+### Pull a model
 
 Pulls a model from Docker Hub to your local environment.
 
@@ -82,7 +82,7 @@ Pulls a model from Docker Hub to your local environment.
 $ docker model pull <model>
 ```
 
-Example: 
+Example:
 
 ```console
 $ docker model pull ai/smollm2
@@ -114,7 +114,13 @@ You will see something similar to:
 
 ### Run a model
 
-Run a model and interact with it using a submitted prompt or in chat mode.
+Run a model and interact with it using a submitted prompt or in chat mode. When you run a model, Docker
+calls an Inference Server API endpoint hosted by the Model Runner through Docker Desktop. The model
+stays in memory until another model is requested, or until a pre-defined inactivity timeout is reached (currently 5 minutes).
+
+You do not have to use `Docker model run` before interacting with a specific model from a
+host process or from within a container. Model Runner transparently loads the requested model on-demand, assuming it has been
+pulled beforehand and is locally available.
 
 #### One-time prompt
 
@@ -150,7 +156,7 @@ Chat session ended.
 
 ### Push a model to Docker Hub
 
-Use the following command to push your model to Docker Hub:
+To push your model to Docker Hub:
 
 ```console
 $ docker model push <namespace>/<model>
@@ -158,10 +164,10 @@ $ docker model push <namespace>/<model>
 
 ### Tag a model
 
-You can specify a particular version or variant of the model:
+To specify a particular version or variant of the model:
 
 ```console
-$ docker model tag 
+$ docker model tag
 ```
 
 If no tag is provided, Docker defaults to `latest`.
@@ -171,7 +177,7 @@ If no tag is provided, Docker defaults to `latest`.
 Fetch logs from Docker Model Runner to monitor activity or debug issues.
 
 ```console
-$ docker model logs 
+$ docker model logs
 ```
 
 The following flags are accepted:
@@ -211,7 +217,7 @@ If you want to try an existing GenAI application, follow these instructions.
 
 4. Open you app in the browser at the addresses specified in the repository [README](https://github.com/docker/hello-genai).
 
-You'll see the GenAI app's interface where you can start typing your prompts. 
+You'll see the GenAI app's interface where you can start typing your prompts.
 
 You can now interact with your own GenAI app, powered by a local model. Try a few prompts and notice how fast the responses are — all running on your machine with Docker.
 
@@ -219,45 +225,47 @@ You can now interact with your own GenAI app, powered by a local model. Try a fe
 
 ### What models are available?
 
-All the available models are hosted in the [public Docker Hub namespace of `ai`](https://hub.docker.com/u/ai). 
+All the available models are hosted in the [public Docker Hub namespace of `ai`](https://hub.docker.com/u/ai).
 
 ### What API endpoints are available?
 
-Once the feature is enabled, the following new APIs are available:
+Once the feature is enabled, new API endpoints are available under the following base URLs:
 
-```text
-#### Inside containers ####
+- **For containers**: `http://model-runner.docker.internal/`
+- **For host processes**: `http://localhost:12434/engines/`, assuming you have enabled TCP host access on default port 12434.
 
-http://model-runner.docker.internal/
+**Docker Model management endpoints:**
 
-    # Docker Model management
-    POST /models/create
-    GET /models
-    GET /models/{namespace}/{name}
-    DELETE /models/{namespace}/{name}
+```text
+POST /models/create
+GET /models
+GET /models/{namespace}/{name}
+DELETE /models/{namespace}/{name}
+```
 
-    # OpenAI endpoints
-    GET /engines/llama.cpp/v1/models
-    GET /engines/llama.cpp/v1/models/{namespace}/{name}
-    POST /engines/llama.cpp/v1/chat/completions
-    POST /engines/llama.cpp/v1/completions
-    POST /engines/llama.cpp/v1/embeddings
-    Note: You can also omit llama.cpp.
-    E.g., POST /engines/v1/chat/completions.
+**OpenAI endpoints:**
 
-#### Inside or outside containers (host) ####
+```text
+GET /engines/llama.cpp/v1/models
+GET /engines/llama.cpp/v1/models/{namespace}/{name}
+POST /engines/llama.cpp/v1/chat/completions
+POST /engines/llama.cpp/v1/completions
+POST /engines/llama.cpp/v1/embeddings
+Note:
+```
 
-Same endpoints on /var/run/docker.sock
+To call these endpoints via a Unix socket (`/var/run/docker.sock`), prefix their path with
+with `/exp/vDD4.40`.
+
+> [!NOTE]
+> You can omit `llama.cpp` from the path. For example: `POST /engines/v1/chat/completions`.
 
-    # While still in Beta
-    Prefixed with /exp/vDD4.40
-```
 
 ### How do I interact through the OpenAI API?
 
 #### From within a container
 
-Examples of calling an OpenAI endpoint (`chat/completions`) from within another container using `curl`:
+To call the `chat/completions` OpenAI endpoint from within another container using `curl`:
 
 ```bash
 #!/bin/sh
@@ -280,15 +288,18 @@ curl http://model-runner.docker.internal/engines/llama.cpp/v1/chat/completions \
 
 ```
 
-#### From the host using a Unix socket
+#### From the host using TCP
 
-Examples of calling an OpenAI endpoint (`chat/completions`) through the Docker socket from the host using `curl`:
+To call the `chat/completions` OpenAI endpoint from the host via TCP:
+
+1. Enable the host-side TCP support from the Docker Desktop GUI, or via the [Docker Desktop CLI](/manuals/desktop/features/desktop-cli.md).
+   For example: `docker desktop enable model-runner --tcp <port>`.
+2. Interact with it as documented in the previous section using `localhost` and the correct port.
 
 ```bash
 #!/bin/sh
 
-curl --unix-socket $HOME/.docker/run/docker.sock \
-    localhost/exp/vDD4.40/engines/llama.cpp/v1/chat/completions \
+	curl http://localhost:12434/engines/llama.cpp/v1/chat/completions \
     -H "Content-Type: application/json" \
     -d '{
         "model": "ai/smollm2",
@@ -303,19 +314,17 @@ curl --unix-socket $HOME/.docker/run/docker.sock \
             }
         ]
     }'
-
 ```
 
-#### From the host using TCP
-
-In case you want to interact with the API from the host, but use TCP instead of a Docker socket, you can enable the host-side TCP support from the Docker Desktop GUI, or via the [Docker Desktop CLI](/manuals/desktop/features/desktop-cli.md). For example, using `docker desktop enable model-runner --tcp <port>`.
+#### From the host using a Unix socket
 
-Afterwards, interact with it as previously documented using `localhost` and the chosen, or the default port.
+To call the `chat/completions` OpenAI endpoint through the Docker socket from the host using `curl`:
 
 ```bash
 #!/bin/sh
 
-	curl http://localhost:12434/engines/llama.cpp/v1/chat/completions \
+curl --unix-socket $HOME/.docker/run/docker.sock \
+    localhost/exp/vDD4.40/engines/llama.cpp/v1/chat/completions \
     -H "Content-Type: application/json" \
     -d '{
         "model": "ai/smollm2",
@@ -354,7 +363,7 @@ Once linked, re-run the command.
 
 ### No safeguard for running oversized models
 
-Currently, Docker Model Runner doesn't include safeguards to prevent you from launching models that exceed their system’s available resources. Attempting to run a model that is too large for the host machine may result in severe slowdowns or render the system temporarily unusable. This issue is particularly common when running LLMs models without sufficient GPU memory or system RAM. 
+Currently, Docker Model Runner doesn't include safeguards to prevent you from launching models that exceed their system's available resources. Attempting to run a model that is too large for the host machine may result in severe slowdowns or render the system temporarily unusable. This issue is particularly common when running LLMs models without sufficient GPU memory or system RAM.
 
 ### No consistent digest support in Model CLI
 
@@ -362,7 +371,7 @@ The Docker Model CLI currently lacks consistent support for specifying models by
 
 ## Share feedback
 
-Thanks for trying out Docker Model Runner. Give feedback or report any bugs you may find through the **Give feedback** link next to the **Enable Docker Model Runner** setting. 
+Thanks for trying out Docker Model Runner. Give feedback or report any bugs you may find through the **Give feedback** link next to the **Enable Docker Model Runner** setting.
 
 ## Disable the feature
 
@@ -371,4 +380,4 @@ To disable Docker Model Runner:
 1. Open the **Settings** view in Docker Desktop.
 2. Navigate to the **Beta** tab in **Features in development**.
 3. Clear the **Enable Docker Model Runner** checkbox.
-4. Select **Apply & restart**.
\ No newline at end of file
+4. Select **Apply & restart**.

From 2c8be89ebcdec5c4fe52d0696369d0f7c7f9a72e Mon Sep 17 00:00:00 2001
From: ArthurFlag <arthur.flageul@docker.com>
Date: Tue, 13 May 2025 10:09:47 +0200
Subject: [PATCH 3/4] fix

---
 content/manuals/ai/model-runner.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/content/manuals/ai/model-runner.md b/content/manuals/ai/model-runner.md
index bd98b3e8f1b3..14c93ada1c08 100644
--- a/content/manuals/ai/model-runner.md
+++ b/content/manuals/ai/model-runner.md
@@ -231,8 +231,8 @@ All the available models are hosted in the [public Docker Hub namespace of `ai`]
 
 Once the feature is enabled, new API endpoints are available under the following base URLs:
 
-- **For containers**: `http://model-runner.docker.internal/`
-- **For host processes**: `http://localhost:12434/engines/`, assuming you have enabled TCP host access on default port 12434.
+- **From containers**: `http://model-runner.docker.internal/`
+- **From host processes**: `http://localhost:12434/`, assuming you have enabled TCP host access on default port 12434.
 
 **Docker Model management endpoints:**
 
@@ -251,7 +251,6 @@ GET /engines/llama.cpp/v1/models/{namespace}/{name}
 POST /engines/llama.cpp/v1/chat/completions
 POST /engines/llama.cpp/v1/completions
 POST /engines/llama.cpp/v1/embeddings
-Note:
 ```
 
 To call these endpoints via a Unix socket (`/var/run/docker.sock`), prefix their path with

From 83edda5705f11ecc62c43ea69f2d3ac418f7731e Mon Sep 17 00:00:00 2001
From: Arthur <arthur.flageul@docker.com>
Date: Thu, 15 May 2025 11:11:15 +0200
Subject: [PATCH 4/4] Apply suggestions from code review

Co-authored-by: Allie Sadler <102604716+aevesdocker@users.noreply.github.com>
---
 content/manuals/ai/model-runner.md | 8 ++++----
 package-lock.json                  | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/content/manuals/ai/model-runner.md b/content/manuals/ai/model-runner.md
index 14c93ada1c08..65fe85622858 100644
--- a/content/manuals/ai/model-runner.md
+++ b/content/manuals/ai/model-runner.md
@@ -231,10 +231,10 @@ All the available models are hosted in the [public Docker Hub namespace of `ai`]
 
 Once the feature is enabled, new API endpoints are available under the following base URLs:
 
-- **From containers**: `http://model-runner.docker.internal/`
-- **From host processes**: `http://localhost:12434/`, assuming you have enabled TCP host access on default port 12434.
+- From containers: `http://model-runner.docker.internal/`
+- From host processes: `http://localhost:12434/`, assuming you have enabled TCP host access on default port 12434.
 
-**Docker Model management endpoints:**
+Docker Model management endpoints:
 
 ```text
 POST /models/create
@@ -243,7 +243,7 @@ GET /models/{namespace}/{name}
 DELETE /models/{namespace}/{name}
 ```
 
-**OpenAI endpoints:**
+OpenAI endpoints:
 
 ```text
 GET /engines/llama.cpp/v1/models
diff --git a/package-lock.json b/package-lock.json
index 890c2262f502..e0c4dcdd4ee9 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1171,9 +1171,9 @@
       }
     },
     "node_modules/nanoid": {
-      "version": "3.3.11",
-      "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz",
-      "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==",
+      "version": "3.3.7",
+      "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.7.tgz",
+      "integrity": "sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g==",
       "funding": [
         {
           "type": "github",