From b27f99ccb8e5068bec1d7ad1a2c1d976a9457318 Mon Sep 17 00:00:00 2001
From: Andrey Cheptsov <andrey.cheptsov@github.com>
Date: Wed, 6 May 2026 17:01:03 +0200
Subject: [PATCH 1/7] Inline example source code into docs pages

Drop the mkdocs hook that materialized examples/<path>/README.md into
docs/examples/<path>/index.md stubs at build time. Move the 20 navigated
example READMEs directly into docs/examples/<path>.md (flat layout, no
per-example subdirectory) and delete the parallel .dstack.yml configs
since their content is already inline in the markdown. The two GCP NCCL
test yamls that were only referenced via dead "Source code" admonitions
are now inlined into their respective tabs.

Within the moved pages, convert absolute https://dstack.ai/(docs|examples)
links to relative .md paths so mkdocs strict mode validates them.

Non-navigated examples (misc/, llms/, server-deployment/, plugins/,
single-node-training/{qlora,optimum-tpu}, the AMD subdirs, etc.) are
left untouched for a later pass.
---
 contributing/DOCS.md                          |  30 +---
 ...d-kubernetes-2024-recap-and-whats-ahead.md |   2 +-
 docs/blog/posts/changelog-07-25.md            |   2 +-
 docs/blog/posts/gpu-health-checks.md          |   2 +-
 docs/blog/posts/mpi.md                        |   2 +-
 docs/blog/posts/nebius-in-dstack-sky.md       |   4 +-
 docs/docs/concepts/fleets.md                  |   8 +-
 docs/docs/concepts/gateways.md                |   2 +-
 docs/docs/concepts/services.md                |   4 +-
 .../examples/accelerators/amd.md              |  18 +--
 docs/examples/accelerators/amd/index.md       |   0
 .../examples/accelerators/tenstorrent.md      |  28 ++--
 .../accelerators/tenstorrent/index.md         |   0
 .../examples/accelerators/tpu.md              |  14 +-
 docs/examples/accelerators/tpu/index.md       |   0
 .../examples/clusters/aws.md                  |  19 ++-
 docs/examples/clusters/aws/index.md           |   0
 .../examples/clusters/crusoe.md               |  14 +-
 docs/examples/clusters/crusoe/index.md        |   0
 .../examples/clusters/gcp.md                  | 136 +++++++++++++++---
 docs/examples/clusters/gcp/index.md           |   0
 .../examples/clusters/lambda.md               |  16 +--
 docs/examples/clusters/lambda/index.md        |   0
 .../examples/clusters/nccl-rccl-tests.md      |  16 +--
 .../clusters/nccl-rccl-tests/index.md         |   0
 .../examples/clusters/nebius.md               |  12 +-
 docs/examples/clusters/nebius/index.md        |   0
 .../examples/distributed-training/axolotl.md  |  18 +--
 .../distributed-training/axolotl/index.md     |   0
 .../distributed-training/ray-ragen.md         |  13 +-
 .../distributed-training/ray-ragen/index.md   |   0
 .../examples/distributed-training/trl.md      |  16 +--
 .../distributed-training/trl/index.md         |   0
 .../examples/inference/nim.md                 |  12 +-
 docs/examples/inference/nim/index.md          |   0
 .../examples/inference/sglang.md              |  16 +--
 docs/examples/inference/sglang/index.md       |   0
 .../examples/inference/trtllm.md              |  10 +-
 docs/examples/inference/trtllm/index.md       |   0
 .../examples/inference/vllm.md                |  16 +--
 docs/examples/inference/vllm/index.md         |   0
 .../examples/models/deepseek-v4.md            |   6 +-
 docs/examples/models/deepseek-v4/index.md     |   0
 .../examples/models/qwen36.md                 |  10 +-
 docs/examples/models/qwen36/index.md          |   0
 .../examples/single-node-training/axolotl.md  |  16 +--
 .../single-node-training/axolotl/index.md     |   0
 .../examples/single-node-training/trl.md      |  14 +-
 .../single-node-training/trl/index.md         |   0
 examples/accelerators/tenstorrent/.dstack.yml |   9 --
 .../tt-inference-server.dstack.yml            |  24 ----
 .../tenstorrent/tt-smi.dstack.yml             |  10 --
 examples/clusters/aws/fleet.dstack.yml        |   8 --
 examples/clusters/gcp/a3-fleet.dstack.yml     |   7 -
 examples/clusters/gcp/a3high-fleet.dstack.yml |   7 -
 .../clusters/gcp/a3high-nccl-tests.dstack.yml |  37 -----
 .../clusters/gcp/a3mega-nccl-tests.dstack.yml |  50 -------
 examples/clusters/gcp/a4-fleet.dstack.yml     |  13 --
 .../nccl-rccl-tests/nccl-tests.dstack.yml     |  29 ----
 .../nccl-rccl-tests/rccl-tests.dstack.yml     |  44 ------
 .../distributed-training/axolotl/.dstack.yml  |  49 -------
 .../axolotl/fleet.dstack.yml                  |   9 --
 .../ray-ragen/.dstack.yml                     |  39 -----
 .../ray-ragen/fleet.dstack.yml                |   9 --
 .../trl/deepspeed.dstack.yml                  |  52 -------
 .../distributed-training/trl/fleet.dstack.yml |   9 --
 .../distributed-training/trl/fsdp.dstack.yml  |  52 -------
 .../single-node-training/axolotl/.dstack.yml  |  28 ----
 .../single-node-training/trl/train.dstack.yml |  54 -------
 mkdocs.yml                                    | 101 ++++++-------
 scripts/docs/gen_examples.py                  |  31 ----
 scripts/docs/gen_llms_files.py                |  14 +-
 scripts/docs/hooks.py                         |  68 ++-------
 73 files changed, 339 insertions(+), 890 deletions(-)
 rename examples/accelerators/amd/README.md => docs/examples/accelerators/amd.md (91%)
 delete mode 100644 docs/examples/accelerators/amd/index.md
 rename examples/accelerators/tenstorrent/README.md => docs/examples/accelerators/tenstorrent.md (81%)
 delete mode 100644 docs/examples/accelerators/tenstorrent/index.md
 rename examples/accelerators/tpu/README.md => docs/examples/accelerators/tpu.md (94%)
 delete mode 100644 docs/examples/accelerators/tpu/index.md
 rename examples/clusters/aws/README.md => docs/examples/clusters/aws.md (84%)
 delete mode 100644 docs/examples/clusters/aws/index.md
 rename examples/clusters/crusoe/README.md => docs/examples/clusters/crusoe.md (88%)
 delete mode 100644 docs/examples/clusters/crusoe/index.md
 rename examples/clusters/gcp/README.md => docs/examples/clusters/gcp.md (76%)
 delete mode 100644 docs/examples/clusters/gcp/index.md
 rename examples/clusters/lambda/README.md => docs/examples/clusters/lambda.md (84%)
 delete mode 100644 docs/examples/clusters/lambda/index.md
 rename examples/clusters/nccl-rccl-tests/README.md => docs/examples/clusters/nccl-rccl-tests.md (82%)
 delete mode 100644 docs/examples/clusters/nccl-rccl-tests/index.md
 rename examples/clusters/nebius/README.md => docs/examples/clusters/nebius.md (90%)
 delete mode 100644 docs/examples/clusters/nebius/index.md
 rename examples/distributed-training/axolotl/README.md => docs/examples/distributed-training/axolotl.md (75%)
 delete mode 100644 docs/examples/distributed-training/axolotl/index.md
 rename examples/distributed-training/ray-ragen/README.md => docs/examples/distributed-training/ray-ragen.md (86%)
 delete mode 100644 docs/examples/distributed-training/ray-ragen/index.md
 rename examples/distributed-training/trl/README.md => docs/examples/distributed-training/trl.md (83%)
 delete mode 100644 docs/examples/distributed-training/trl/index.md
 rename examples/inference/nim/README.md => docs/examples/inference/nim.md (80%)
 delete mode 100644 docs/examples/inference/nim/index.md
 rename examples/inference/sglang/README.md => docs/examples/inference/sglang.md (89%)
 delete mode 100644 docs/examples/inference/sglang/index.md
 rename examples/inference/trtllm/README.md => docs/examples/inference/trtllm.md (83%)
 delete mode 100644 docs/examples/inference/trtllm/index.md
 rename examples/inference/vllm/README.md => docs/examples/inference/vllm.md (77%)
 delete mode 100644 docs/examples/inference/vllm/index.md
 rename examples/models/deepseek-v4/README.md => docs/examples/models/deepseek-v4.md (93%)
 delete mode 100644 docs/examples/models/deepseek-v4/index.md
 rename examples/models/qwen36/README.md => docs/examples/models/qwen36.md (91%)
 delete mode 100644 docs/examples/models/qwen36/index.md
 rename examples/single-node-training/axolotl/README.md => docs/examples/single-node-training/axolotl.md (78%)
 delete mode 100644 docs/examples/single-node-training/axolotl/index.md
 rename examples/single-node-training/trl/README.md => docs/examples/single-node-training/trl.md (83%)
 delete mode 100644 docs/examples/single-node-training/trl/index.md
 delete mode 100644 examples/accelerators/tenstorrent/.dstack.yml
 delete mode 100644 examples/accelerators/tenstorrent/tt-inference-server.dstack.yml
 delete mode 100644 examples/accelerators/tenstorrent/tt-smi.dstack.yml
 delete mode 100644 examples/clusters/aws/fleet.dstack.yml
 delete mode 100644 examples/clusters/gcp/a3-fleet.dstack.yml
 delete mode 100644 examples/clusters/gcp/a3high-fleet.dstack.yml
 delete mode 100644 examples/clusters/gcp/a3high-nccl-tests.dstack.yml
 delete mode 100644 examples/clusters/gcp/a3mega-nccl-tests.dstack.yml
 delete mode 100644 examples/clusters/gcp/a4-fleet.dstack.yml
 delete mode 100644 examples/clusters/nccl-rccl-tests/nccl-tests.dstack.yml
 delete mode 100644 examples/clusters/nccl-rccl-tests/rccl-tests.dstack.yml
 delete mode 100644 examples/distributed-training/axolotl/.dstack.yml
 delete mode 100644 examples/distributed-training/axolotl/fleet.dstack.yml
 delete mode 100644 examples/distributed-training/ray-ragen/.dstack.yml
 delete mode 100644 examples/distributed-training/ray-ragen/fleet.dstack.yml
 delete mode 100644 examples/distributed-training/trl/deepspeed.dstack.yml
 delete mode 100644 examples/distributed-training/trl/fleet.dstack.yml
 delete mode 100644 examples/distributed-training/trl/fsdp.dstack.yml
 delete mode 100644 examples/single-node-training/axolotl/.dstack.yml
 delete mode 100644 examples/single-node-training/trl/train.dstack.yml
 delete mode 100644 scripts/docs/gen_examples.py
diff --git a/contributing/DOCS.md b/contributing/DOCS.md
index 4fcc04d6d1..ce545803a4 100644
--- a/contributing/DOCS.md
+++ b/contributing/DOCS.md
@@ -39,7 +39,7 @@ uv run pre-commit install
 To preview the documentation, run the follow command:
 
 ```shell
-uv run mkdocs serve -w examples -s
+uv run mkdocs serve -s
 ```
 
 If you want to build static files, you can use the following command:
@@ -57,7 +57,6 @@ The documentation uses a custom build system with MkDocs hooks to generate vario
 Use these in `.envrc` to disable expensive docs regeneration, especially during `mkdocs serve` auto-reload. Set any of them to disable the corresponding artifact.
 
 ```shell
-export DSTACK_DOCS_DISABLE_EXAMPLES=1
 export DSTACK_DOCS_DISABLE_LLM_TXT=1
 export DSTACK_DOCS_DISABLE_CLI_REFERENCE=1
 export DSTACK_DOCS_DISABLE_YAML_SCHEMAS=1
@@ -69,19 +68,11 @@ export DSTACK_DOCS_DISABLE_REST_PLUGIN_SPEC_REFERENCE=1
 
 The build process is customized via hooks in `scripts/docs/hooks.py`:
 
-#### 1. Example materialization
-
-Example pages like `examples/single-node-training/trl/index.md` are stubs that reference `README.md` files in the repository root:
-- **Stub location**: `docs/examples/single-node-training/trl/index.md`
-- **Content source**: `examples/single-node-training/trl/README.md`
-
-During the build, the hook reads the README content and uses it for rendering the HTML page.
-
-#### 2. Schema reference expansion
+#### 1. Schema reference expansion
 
 Files in `docs/reference/**/*.md` can use `#SCHEMA#` placeholders that are expanded with generated schema documentation during the build.
 
-#### 3. llms.txt generation
+#### 2. llms.txt generation
 
 Two files are generated for LLM consumption:
 
@@ -108,9 +99,9 @@ description: Short description of what this page covers
 ---
 ```
 
-For examples, add frontmatter to the `README.md` files in the repository root (e.g., `examples/single-node-training/trl/README.md`).
+For examples, add frontmatter to the page files (e.g., `docs/examples/single-node-training/trl.md`).
 
-#### 4. Skills discovery
+#### 3. Skills discovery
 
 The build creates `.well-known/skills/` directory structure for skills discovery:
 - Reads `skills/dstack/SKILL.md`
@@ -129,18 +120,11 @@ docs/
 │   ├── concepts/           # Concept pages
 │   ├── guides/             # How-to guides
 │   └── reference/          # API reference (schema expansion)
-├── examples/               # Example stub files (index.md)
+├── examples/               # Example pages (inline source code)
 │   └── single-node-training/
-│       └── trl/
-│           └── index.md    # Stub referencing root README
+│       └── trl.md          # Page content with frontmatter
 └── overrides/              # Theme customization
 
-examples/                    # Example content (repository root)
-└── single-node-training/
-    └── trl/
-        ├── README.md       # Actual content with frontmatter
-        └── train.dstack.yml
-
 scripts/docs/
 ├── hooks.py                # MkDocs build hooks
 ├── gen_llms_files.py       # llms.txt generation
diff --git a/docs/blog/posts/beyond-kubernetes-2024-recap-and-whats-ahead.md b/docs/blog/posts/beyond-kubernetes-2024-recap-and-whats-ahead.md
index 9d32f336b0..8980c984f1 100644
--- a/docs/blog/posts/beyond-kubernetes-2024-recap-and-whats-ahead.md
+++ b/docs/blog/posts/beyond-kubernetes-2024-recap-and-whats-ahead.md
@@ -104,7 +104,7 @@ efficient manner.
 
 ### NVIDIA
 
-NVIDIA remains the top accelerator supported by `dstack`. Recently, we introduced a [NIM example](../../examples/inference/nim/index.md) 
+NVIDIA remains the top accelerator supported by `dstack`. Recently, we introduced a [NIM example](../../examples/inference/nim.md) 
 for model deployment, and we continue to enhance support for the rest of NVIDIA's ecosystem.
 
 ### AMD
diff --git a/docs/blog/posts/changelog-07-25.md b/docs/blog/posts/changelog-07-25.md
index a065ef37c7..50c8ff032a 100644
--- a/docs/blog/posts/changelog-07-25.md
+++ b/docs/blog/posts/changelog-07-25.md
@@ -144,7 +144,7 @@ resources:
 
 #### AWS EFA
 
-EFA is a network interface for EC2 that enables low-latency, high-bandwidth communication between nodes—crucial for scaling distributed deep learning. With `dstack`, EFA is automatically enabled when using supported instance types in fleets. Check out our [example](../../examples/clusters/aws/index.md)
+EFA is a network interface for EC2 that enables low-latency, high-bandwidth communication between nodes—crucial for scaling distributed deep learning. With `dstack`, EFA is automatically enabled when using supported instance types in fleets. Check out our [example](../../examples/clusters/aws.md)
 
 #### Default Docker images
 
diff --git a/docs/blog/posts/gpu-health-checks.md b/docs/blog/posts/gpu-health-checks.md
index 9b074023c4..1fe89e1d1d 100644
--- a/docs/blog/posts/gpu-health-checks.md
+++ b/docs/blog/posts/gpu-health-checks.md
@@ -51,7 +51,7 @@ A healthy instance is ready for workloads. A warning means you should monitor it
 
 This release focuses on passive checks using DCGM background health checks. These run continuously and do not interrupt workloads.
 
-For active checks today, you can run [NCCL/RCCL tests](../../examples/clusters/nccl-rccl-tests/index.md) as a [distributed task](../../docs/concepts/tasks.md#distributed-tasks) to verify GPU-to-GPU communication and bandwidth across a fleet. Active tests like these can reveal network or interconnect issues that passive monitoring might miss. More built-in support for active diagnostics is planned.
+For active checks today, you can run [NCCL/RCCL tests](../../examples/clusters/nccl-rccl-tests.md) as a [distributed task](../../docs/concepts/tasks.md#distributed-tasks) to verify GPU-to-GPU communication and bandwidth across a fleet. Active tests like these can reveal network or interconnect issues that passive monitoring might miss. More built-in support for active diagnostics is planned.
 
 ## Supported backends
 
diff --git a/docs/blog/posts/mpi.md b/docs/blog/posts/mpi.md
index 713059f2f7..37cd0dc7bf 100644
--- a/docs/blog/posts/mpi.md
+++ b/docs/blog/posts/mpi.md
@@ -100,5 +100,5 @@ as well as use MPI for other tasks.
 
 !!! info "What's next?"
     1. Learn more about [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), [services](../../docs/concepts/services.md), and [fleets](../../docs/concepts/fleets.md)
-    2. Check the [NCCL/RCCL tests](../../examples/clusters/nccl-rccl-tests/index.md) example
+    2. Check the [NCCL/RCCL tests](../../examples/clusters/nccl-rccl-tests.md) example
     3. Join [Discord](https://discord.gg/u8SmfwPpMd)
diff --git a/docs/blog/posts/nebius-in-dstack-sky.md b/docs/blog/posts/nebius-in-dstack-sky.md
index dd1617d290..823576f377 100644
--- a/docs/blog/posts/nebius-in-dstack-sky.md
+++ b/docs/blog/posts/nebius-in-dstack-sky.md
@@ -104,7 +104,7 @@ $ dstack apply -f my-cluster.dstack.yml
 Once the fleet is ready, you can run [distributed tasks](../../docs/concepts/tasks.md#distributed-tasks). 
 `dstack` automatically configures drivers, networking, and fast GPU-to-GPU interconnect.
 
-To learn more, see the [clusters](../../examples/clusters/nebius/index.md) guide.
+To learn more, see the [clusters](../../examples/clusters/nebius.md) guide.
 
 With Nebius joining `dstack` Sky, users can now run on-demand and spot GPUs and clusters directly through the marketplace—gaining access to the same production grade infrastrucure Nebius customers use for frontier-scale training, without needing a separate Nebius account. 
 
@@ -124,4 +124,4 @@ Our goal is to give teams maximum flexibility while removing the complexity of m
     4. Explore [dev environments](../../docs/concepts/dev-environments.md), 
         [tasks](../../docs/concepts/tasks.md), [services](../../docs/concepts/services.md), 
         and [fleets](../../docs/concepts/fleets.md)
-    5. Read the [clusters](../../examples/clusters/nebius/index.md) guide
+    5. Read the [clusters](../../examples/clusters/nebius.md) guide
diff --git a/docs/docs/concepts/fleets.md b/docs/docs/concepts/fleets.md
index b927e94d4f..685392bd80 100644
--- a/docs/docs/concepts/fleets.md
+++ b/docs/docs/concepts/fleets.md
@@ -164,22 +164,22 @@ This property ensures that instances are interconnected. This is required for ru
 
     === "AWS"
         On AWS, `dstack` requires `public_ips` to be set to `false` in the backend configuration.
-        Refer to the [AWS](../../examples/clusters/aws/index.md) example for more details.
+        Refer to the [AWS](../../examples/clusters/aws.md) example for more details.
 
     === "GCP"
         On GCP, you may need to configure `extra_vpcs` and `roce_vpcs` in the `gcp` backend configuration.
-        Refer to the [GCP](../../examples/clusters/gcp/index.md) examples for more details.
+        Refer to the [GCP](../../examples/clusters/gcp.md) examples for more details.
 
     === "Nebius"
         On [Nebius](https://docs.nebius.com/compute/clusters/gpu), `dstack` automatically configures InfiniBand networking if it is supported by the selected instance type.
 
     === "Crusoe"
         On [Crusoe](https://docs.crusoecloud.com/networking/infiniband/managing-infiniband-networks), `dstack` automatically configures InfiniBand networking if it is supported by the selected instance type.
-        Refer to the [Crusoe](../../examples/clusters/crusoe/index.md#vms) example for more details.
+        Refer to the [Crusoe](../../examples/clusters/crusoe.md#vms) example for more details.
 
     === "Kubernetes"
         If the Kubernetes cluster has interconnect configured, `dstack` can use it without additional setup.
-        See the [Lambda](../../examples/clusters/lambda/index.md#kubernetes) or [Crusoe](../../examples/clusters/crusoe/index.md#kubernetes) examples.
+        See the [Lambda](../../examples/clusters/lambda.md#kubernetes) or [Crusoe](../../examples/clusters/crusoe.md#kubernetes) examples.
     
     > See the [Clusters](../../examples.md#clusters) examples.
 
diff --git a/docs/docs/concepts/gateways.md b/docs/docs/concepts/gateways.md
index 29209124d5..53374aa53d 100644
--- a/docs/docs/concepts/gateways.md
+++ b/docs/docs/concepts/gateways.md
@@ -97,7 +97,7 @@ router:
 
 </div>
 
-If you configure the `sglang` router, [services](../concepts/services.md) can run either [standard SGLang workers](../../examples/inference/sglang/index.md) or [Prefill-Decode workers](../../examples/inference/sglang/index.md#pd-disaggregation) (aka PD disaggregation).
+If you configure the `sglang` router, [services](../concepts/services.md) can run either [standard SGLang workers](../../examples/inference/sglang.md) or [Prefill-Decode workers](../../examples/inference/sglang.md#pd-disaggregation) (aka PD disaggregation).
 
 !!! note "PD disaggregation"
     To run services with PD disaggregation see [SGLang PD disaggregation](https://dstack.ai/examples/inference/sglang/#pd-disaggregation).
diff --git a/docs/docs/concepts/services.md b/docs/docs/concepts/services.md
index 9969e565ab..1923aa0655 100644
--- a/docs/docs/concepts/services.md
+++ b/docs/docs/concepts/services.md
@@ -1288,5 +1288,5 @@ The rolling deployment stops when all replicas are updated or when a new deploym
     1. Read about [dev environments](dev-environments.md) and [tasks](tasks.md)
     2. Learn how to manage [fleets](fleets.md)
     3. See how to set up [gateways](gateways.md)
-    4. Check the [vLLM](../../examples/inference/vllm/index.md) and
-       [NIM](../../examples/inference/nim/index.md) examples
+    4. Check the [vLLM](../../examples/inference/vllm.md) and
+       [NIM](../../examples/inference/nim.md) examples
diff --git a/examples/accelerators/amd/README.md b/docs/examples/accelerators/amd.md
similarity index 91%
rename from examples/accelerators/amd/README.md
rename to docs/examples/accelerators/amd.md
index b35b29c1c9..5c0c306ce8 100644
--- a/examples/accelerators/amd/README.md
+++ b/docs/examples/accelerators/amd.md
@@ -6,12 +6,12 @@ description: Deploying and fine-tuning models on AMD MI300X GPUs using SGLang, v
 # AMD
 
 `dstack` supports running dev environments, tasks, and services on AMD GPUs.
-You can do that by setting up an [SSH fleet](https://dstack.ai/docs/concepts/fleets#ssh-fleets)
+You can do that by setting up an [SSH fleet](../../docs/concepts/fleets.md#ssh-fleets)
 with on-prem AMD GPUs or configuring a backend that offers AMD GPUs such as the `runpod` backend.
 
 ## Deployment
 
-Here are examples of a [service](https://dstack.ai/docs/services) that deploy
+Here are examples of a [service](../../docs/concepts/services.md) that deploy
 `Qwen/Qwen3.6-27B` on AMD MI300X GPUs using
 [SGLang](https://github.com/sgl-project/sglang) and
 [vLLM](https://docs.vllm.ai/en/latest/).
@@ -101,7 +101,7 @@ To request multiple GPUs, specify the quantity after the GPU name, separated by
 ## Fine-tuning
 
 > If you're planning multi-node AMD training, validate cluster networking first
-with the [NCCL/RCCL tests](https://dstack.ai/examples/clusters/nccl-rccl-tests/)
+with the [NCCL/RCCL tests](../clusters/nccl-rccl-tests.md)
 example.
 
 === "TRL"
@@ -230,14 +230,14 @@ $ dstack apply -f <configuration file>
 
 ## What's next?
 
-1. Browse the dedicated [SGLang](https://dstack.ai/examples/inference/sglang/)
-   and [vLLM](https://dstack.ai/examples/inference/vllm/) examples, plus
+1. Browse the dedicated [SGLang](../inference/sglang.md)
+   and [vLLM](../inference/vllm.md) examples, plus
    [Axolotl](https://github.com/ROCm/rocm-blogs/tree/release/blogs/artificial-intelligence/axolotl),
    [TRL](https://rocm.docs.amd.com/en/latest/how-to/llm-fine-tuning-optimization/fine-tuning-and-inference.html),
    and [ROCm Bitsandbytes](https://github.com/ROCm/bitsandbytes)
 2. For multi-node training, run
-   [NCCL/RCCL tests](https://dstack.ai/examples/clusters/nccl-rccl-tests/)
+   [NCCL/RCCL tests](../clusters/nccl-rccl-tests.md)
    to validate AMD cluster networking.
-3. Check [dev environments](https://dstack.ai/docs/dev-environments),
-   [tasks](https://dstack.ai/docs/tasks), and
-   [services](https://dstack.ai/docs/services).
+3. Check [dev environments](../../docs/concepts/dev-environments.md),
+   [tasks](../../docs/concepts/tasks.md), and
+   [services](../../docs/concepts/services.md).
diff --git a/docs/examples/accelerators/amd/index.md b/docs/examples/accelerators/amd/index.md
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/examples/accelerators/tenstorrent/README.md b/docs/examples/accelerators/tenstorrent.md
similarity index 81%
rename from examples/accelerators/tenstorrent/README.md
rename to docs/examples/accelerators/tenstorrent.md
index 4edc463f67..65005fd3a4 100644
--- a/examples/accelerators/tenstorrent/README.md
+++ b/docs/examples/accelerators/tenstorrent.md
@@ -10,11 +10,11 @@ description: Running dev environments, tasks, and services on Tenstorrent Wormho
 
 
 ??? info "SSH fleets"
-    <div editor-title="examples/acceleators/tenstorrent/fleet.dstack.yml"> 
+    <div editor-title="tt-fleet.dstack.yml"> 
 
     ```yaml
     type: fleet
-    name: wormwhole-fleet
+    name: tt-fleet
 
     ssh_config:
       user: root
@@ -34,15 +34,15 @@ description: Running dev environments, tasks, and services on Tenstorrent Wormho
     <div class="termy">
 
     ```bash
-    $ dstack apply -f examples/acceleators/tenstorrent/fleet.dstack.yml
+    $ dstack apply -f tt-fleet.dstack.yml
 
-     FLEET            RESOURCES                             PRICE  STATUS  CREATED
-     wormwhole-fleet  cpu=12 mem=32GB disk=243GB n150:12GB  $0     idle    18 sec ago
+     FLEET     RESOURCES                             PRICE  STATUS  CREATED
+     tt-fleet  cpu=12 mem=32GB disk=243GB n150:12GB  $0     idle    18 sec ago
     ```
 
     </div>
 
-    For more details on fleet configuration, refer to [SSH fleets](https://dstack.ai/docs/concepts/fleets#ssh-fleets).
+    For more details on fleet configuration, refer to [SSH fleets](../../docs/concepts/fleets.md#ssh-fleets).
 
 ## Services
 
@@ -50,7 +50,7 @@ Here's an example of a service that deploys
 [`Llama-3.2-1B-Instruct`](https://huggingface.co/meta-llama/Llama-3.2-1B) 
 using [Tenstorrent Inference Service](https://github.com/tenstorrent/tt-inference-server).
 
-<div editor-title="examples/acceleators/tenstorrent/tt-inference-server.dstack.yml"> 
+<div editor-title="service.dstack.yml"> 
 
 ```yaml
 type: service
@@ -86,7 +86,7 @@ Go ahead and run configuration using `dstack apply`:
 <div class="termy">
 
     ```bash
-    $ dstack apply -f examples/acceleators/tenstorrent/tt-inference-server.dstack.yml
+    $ dstack apply -f service.dstack.yml
     ```
 </div>
 
@@ -123,16 +123,16 @@ Additionally, the model is available via `dstack`'s control plane UI:
 
 ![](https://dstack.ai/static-assets/static-assets/images/dstack-tenstorrent-model-ui.png){ width=800 }
 
-When a [gateway](https://dstack.ai/docs/concepts/gateways/) is configured, the service endpoint 
+When a [gateway](../../docs/concepts/gateways.md) is configured, the service endpoint 
 is available at `https://<run name>.<gateway domain>/`.
 
-> Services support many options, including authentication, auto-scaling policies, etc. To learn more, refer to [Services](https://dstack.ai/docs/concepts/services).
+> Services support many options, including authentication, auto-scaling policies, etc. To learn more, refer to [Services](../../docs/concepts/services.md).
 
 ## Tasks
 
 Below is a task that simply runs `tt-smi -s`. Tasks can be used for training, fine-tuning, batch inference, or antything else.
 
-<div editor-title="examples/acceleators/tenstorrent/tt-smi.dstack.yml"> 
+<div editor-title="tt-task.dstack.yml"> 
 
 ```yaml
 type: task
@@ -159,13 +159,13 @@ resources:
 
 </div>
 
-> Tasks support many options, including multi-node configuration, max duration, etc. To learn more, refer to [Tasks](https://dstack.ai/docs/concepts/tasks).
+> Tasks support many options, including multi-node configuration, max duration, etc. To learn more, refer to [Tasks](../../docs/concepts/tasks.md).
 
 ## Dev environments
 
 Below is an example of a dev environment configuration. It can be used to provision a dev environemnt that can be accessed via your desktop IDE. 
 
-<div editor-title="examples/acceleators/tenstorrent/.dstack.yml"> 
+<div editor-title=".dstack.yml"> 
 
 ```yaml
 type: dev-environment
@@ -191,7 +191,7 @@ If you run it via `dstack apply`, it will output the URL to access it via your d
 
 ![](https://dstack.ai/static-assets/static-assets/images/dstack-tenstorrent-cursor.png){ width=800 }
 
-> Dev nevironments support many options, including inactivity and max duration, IDE configuration, etc. To learn more, refer to [Dev environments](https://dstack.ai/docs/concepts/tasks).
+> Dev nevironments support many options, including inactivity and max duration, IDE configuration, etc. To learn more, refer to [Dev environments](../../docs/concepts/tasks.md).
 
 ??? info "Feedback"
     Found a bug, or want to request a feature? File it in the [issue tracker](https://github.com/dstackai/dstack/issues),
diff --git a/docs/examples/accelerators/tenstorrent/index.md b/docs/examples/accelerators/tenstorrent/index.md
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/examples/accelerators/tpu/README.md b/docs/examples/accelerators/tpu.md
similarity index 94%
rename from examples/accelerators/tpu/README.md
rename to docs/examples/accelerators/tpu.md
index 53f31b93bd..92640a4835 100644
--- a/examples/accelerators/tpu/README.md
+++ b/docs/examples/accelerators/tpu.md
@@ -7,7 +7,7 @@ description: Deploying and fine-tuning models on Google Cloud TPUs using Optimum
 
 If you've configured the `gcp` backend in `dstack`, you can run dev environments, tasks, and services on [TPUs](https://cloud.google.com/tpu/docs/intro-to-tpu).
 Choose a TPU instance by specifying the TPU version and the number of cores (e.g. `v5litepod-8`) in the `gpu` property under `resources`,
-or request TPUs by specifying `tpu` as `vendor` ([see examples](https://dstack.ai/docs/guides/protips/#gpu)).
+or request TPUs by specifying `tpu` as `vendor` ([see examples](../../docs/guides/protips.md#gpu)).
 
 Below are a few examples on using TPUs for deployment and fine-tuning.
 
@@ -18,18 +18,18 @@ Below are a few examples on using TPUs for deployment and fine-tuning.
 
 !!! info "TPU storage"
     By default, each TPU VM contains a 100GB boot disk and its size cannot be changed.
-    If you need more storage, attach additional disks using [Volumes](https://dstack.ai/docs/concepts/volumes/).
+    If you need more storage, attach additional disks using [Volumes](../../docs/concepts/volumes.md).
 
 ## Deployment
 
 Many serving frameworks including vLLM and TGI have TPU support.
-Here's an example of a [service](https://dstack.ai/docs/services) that deploys Llama 3.1 8B using
+Here's an example of a [service](../../docs/concepts/services.md) that deploys Llama 3.1 8B using
 [Optimum TPU](https://github.com/huggingface/optimum-tpu)
 and [vLLM](https://github.com/vllm-project/vllm).
 
 === "Optimum TPU"
 
-    <div editor-title="service-optimum-tpu.dstack.yml">
+    <div editor-title="service.dstack.yml">
 
     ```yaml
     type: service
@@ -61,7 +61,7 @@ and [vLLM](https://github.com/vllm-project/vllm).
         the official Docker image can be used.
 
 === "vLLM"
-    <div editor-title="service-vllm-tpu.dstack.yml">
+    <div editor-title="service.dstack.yml">
 
     ```yaml
     type: service
@@ -189,5 +189,5 @@ Note, `v5litepod` is optimized for fine-tuning transformer-based models. Each co
 1. Browse [Optimum TPU](https://github.com/huggingface/optimum-tpu),
    [Optimum TPU TGI](https://github.com/huggingface/optimum-tpu/tree/main/text-generation-inference) and
    [vLLM](https://docs.vllm.ai/en/latest/getting_started/tpu-installation.html).
-2. Check [dev environments](https://dstack.ai/docs/dev-environments), [tasks](https://dstack.ai/docs/tasks),
-   [services](https://dstack.ai/docs/services), and [fleets](https://dstack.ai/docs/concepts/fleets).
+2. Check [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md),
+   [services](../../docs/concepts/services.md), and [fleets](../../docs/concepts/fleets.md).
diff --git a/docs/examples/accelerators/tpu/index.md b/docs/examples/accelerators/tpu/index.md
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/examples/clusters/aws/README.md b/docs/examples/clusters/aws.md
similarity index 84%
rename from examples/clusters/aws/README.md
rename to docs/examples/clusters/aws.md
index b6319e214e..688af91e0e 100644
--- a/examples/clusters/aws/README.md
+++ b/docs/examples/clusters/aws.md
@@ -42,7 +42,7 @@ projects:
 
 Once your backend is ready, define a fleet configuration.
 
-<div editor-title="examples/clusters/aws/efa-fleet.dstack.yml">
+<div editor-title="efa-fleet.dstack.yml">
     
     ```yaml
     type: fleet
@@ -62,7 +62,7 @@ Provision the fleet with `dstack apply`:
 <div class="termy">
 
 ```shell
-$ dstack apply -f examples/clusters/aws/efa-fleet.dstack.yml
+$ dstack apply -f efa-fleet.dstack.yml
 
 Provisioning...
 ---> 100%
@@ -96,7 +96,7 @@ Provisioning...
 
 To confirm that EFA is working, run NCCL tests:
 
-<div editor-title="examples/clusters/nccl-tests/.dstack.yml">
+<div editor-title="nccl-tests.dstack.yml">
 
 ```yaml
 type: task
@@ -135,7 +135,7 @@ Run it with `dstack apply`:
 <div class="termy">
 
 ```shell
-$ dstack apply -f examples/clusters/nccl-tests/.dstack.yml
+$ dstack apply -f nccl-tests.dstack.yml
 
 Provisioning...
 ---> 100%
@@ -150,7 +150,7 @@ Provisioning...
 
 Here’s an example using `torchrun` for a simple multi-node PyTorch job:
 
-<div editor-title="examples/distributed-training/torchrun/.dstack.yml">
+<div editor-title="train-distrib.dstack.yml">
 
 ```yaml
 type: task
@@ -186,7 +186,7 @@ Provision and launch it via `dstack apply`.
 <div class="termy">
 
 ```shell
-$ dstack apply -f examples/distributed-training/torchrun/.dstack.yml
+$ dstack apply -f train-distrib.dstack.yml
 
 Provisioning...
 ---> 100%
@@ -197,7 +197,6 @@ Provisioning...
 Instead of setting `python`, you can specify your own Docker image using `image`. Make sure that the image is properly configured for EFA.
 
 !!! info "What's next"
-    1. Learn more about [distributed tasks](https://dstack.ai/docs/concepts/tasks#distributed-tasks) 
-    2. Check [dev environments](https://dstack.ai/docs/concepts/dev-environments),
-       [services](https://dstack.ai/docs/concepts/services), and [fleets](https://dstack.ai/docs/concepts/fleets)
-    3. Read the [Clusters](https://dstack.ai/docs/guides/clusters) guide
+    1. Learn more about [distributed tasks](../../docs/concepts/tasks.md#distributed-tasks) and [cluster placement](../../docs/concepts/fleets.md#cluster-placement)
+    2. Check [dev environments](../../docs/concepts/dev-environments.md),
+       [services](../../docs/concepts/services.md), and [fleets](../../docs/concepts/fleets.md)
diff --git a/docs/examples/clusters/aws/index.md b/docs/examples/clusters/aws/index.md
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/examples/clusters/crusoe/README.md b/docs/examples/clusters/crusoe.md
similarity index 88%
rename from examples/clusters/crusoe/README.md
rename to docs/examples/clusters/crusoe.md
index ed416ae3e7..2a9c108ec6 100644
--- a/examples/clusters/crusoe/README.md
+++ b/docs/examples/clusters/crusoe.md
@@ -67,7 +67,7 @@ $ dstack apply -f crusoe-fleet.dstack.yml
 
 This will automatically create an IB partition and provision instances with InfiniBand networking.
 
-Once the fleet is created, you can run [dev environments](https://dstack.ai/docs/concepts/dev-environments), [tasks](https://dstack.ai/docs/concepts/tasks), and [services](https://dstack.ai/docs/concepts/services).
+Once the fleet is created, you can run [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), and [services](../../docs/concepts/services.md).
 
 > If you want instances to be provisioned on demand, you can set `nodes` to `0..2`. In this case, `dstack` will create instances only when you run workloads.
 
@@ -84,7 +84,7 @@ Once the fleet is created, you can run [dev environments](https://dstack.ai/docs
 
 ### Configure the backend
 
-Follow the standard instructions for setting up a [`kubernetes`](https://dstack.ai/docs/concepts/backends/#kubernetes) backend:
+Follow the standard instructions for setting up a [`kubernetes`](../../docs/concepts/backends.md#kubernetes) backend:
 
 <div editor-title="~/.dstack/server/config.yml">
 
@@ -133,15 +133,15 @@ $ dstack apply -f crusoe-fleet.dstack.yml
 
 </div>
 
-Once the fleet is created, you can run [dev environments](https://dstack.ai/docs/concepts/dev-environments), [tasks](https://dstack.ai/docs/concepts/tasks), and [services](https://dstack.ai/docs/concepts/services).
+Once the fleet is created, you can run [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), and [services](../../docs/concepts/services.md).
 
 ## NCCL tests
 
-Use a [distributed task](https://dstack.ai/docs/concepts/tasks#distributed-tasks) that runs NCCL tests to validate cluster network bandwidth.
+Use a [distributed task](../../docs/concepts/tasks.md#distributed-tasks) that runs NCCL tests to validate cluster network bandwidth.
 
 === "VMs"
 
-    With the Crusoe backend, HPC-X and NCCL topology files are pre-installed on the host VM image. Mount them into the container via [instance volumes](https://dstack.ai/docs/concepts/volumes#instance-volumes).
+    With the Crusoe backend, HPC-X and NCCL topology files are pre-installed on the host VM image. Mount them into the container via [instance volumes](../../docs/concepts/volumes.md#instance-volumes).
 
     <div editor-title="crusoe-nccl-tests.dstack.yml">
 
@@ -275,6 +275,6 @@ $ dstack apply -f crusoe-nccl-tests.dstack.yml
 
 ## What's next
 
-1. Learn about [dev environments](https://dstack.ai/docs/concepts/dev-environments), [tasks](https://dstack.ai/docs/concepts/tasks), [services](https://dstack.ai/docs/concepts/services)
-2. Check out [backends](https://dstack.ai/docs/concepts/backends#crusoe-cloud) and [fleets](https://dstack.ai/docs/concepts/fleets#cloud-fleets)
+1. Learn about [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), [services](../../docs/concepts/services.md)
+2. Check out [backends](../../docs/concepts/backends.md#crusoe-cloud) and [fleets](../../docs/concepts/fleets.md#cloud-fleets)
 3. Check the docs on [Crusoe's networking](https://docs.crusoecloud.com/networking/infiniband/) and ["Crusoe Managed" Kubernetes](https://docs.crusoecloud.com/orchestration/cmk/index.html)
diff --git a/docs/examples/clusters/crusoe/index.md b/docs/examples/clusters/crusoe/index.md
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/examples/clusters/gcp/README.md b/docs/examples/clusters/gcp.md
similarity index 76%
rename from examples/clusters/gcp/README.md
rename to docs/examples/clusters/gcp.md
index a4610235b2..b0f0393200 100644
--- a/examples/clusters/gcp/README.md
+++ b/docs/examples/clusters/gcp.md
@@ -191,7 +191,7 @@ Once you've configured the `gcp` backend, create the fleet configuration:
 
 === "A4"
 
-    <div editor-title="examples/clusters/gcp/a4-fleet.dstack.yml">
+    <div editor-title="a4-fleet.dstack.yml">
 
     ```yaml
     type: fleet
@@ -220,7 +220,7 @@ Once you've configured the `gcp` backend, create the fleet configuration:
     <div class="termy">
 
     ```shell
-    $ dstack apply -f examples/clusters/gcp/a4-fleet.dstack.yml
+    $ dstack apply -f a4-fleet.dstack.yml
 
     Provisioning...
     ---> 100%
@@ -257,7 +257,7 @@ Once you've configured the `gcp` backend, create the fleet configuration:
     <div class="termy">
 
     ```shell
-    $ dstack apply -f examples/clusters/gcp/a3mega-fleet.dstack.yml                
+    $ dstack apply -f a3mega-fleet.dstack.yml                
 
     FLEET         INSTANCE  BACKEND             GPU          PRICE            STATUS  CREATED
     a3mega-fleet  1         gcp (europe-west4)  H100:80GB:8  $22.1525 (spot)  idle    9 mins ago
@@ -273,7 +273,7 @@ Once you've configured the `gcp` backend, create the fleet configuration:
 
 === "A3 High/Edge"
 
-    <div editor-title="examples/clusters/gcp/a3high-fleet.dstack.yml">
+    <div editor-title="a3high-fleet.dstack.yml">
 
     ```yaml
     type: fleet
@@ -296,7 +296,7 @@ Once you've configured the `gcp` backend, create the fleet configuration:
     <div class="termy">
 
     ```shell
-    $ dstack apply -f examples/clusters/gcp/a3high-fleet.dstack.yml
+    $ dstack apply -f a3high-fleet.dstack.yml
                    
     FLEET         INSTANCE  BACKEND             GPU          PRICE            STATUS  CREATED
     a3mega-fleet  1         gcp (europe-west4)  H100:80GB:8  $20.5688 (spot)  idle    9 mins ago
@@ -324,7 +324,7 @@ Use a distributed task that runs NCCL tests to validate cluster network bandwidt
     <div class="termy">
 
     ```shell
-    $ dstack apply -f examples/clusters/nccl-tests/.dstack.yml
+    $ dstack apply -f nccl-tests.dstack.yml
 
     Provisioning...
     ---> 100%
@@ -351,15 +351,70 @@ Use a distributed task that runs NCCL tests to validate cluster network bandwidt
     </div>
 
 === "A3 Mega"
-    !!! info "Source code"
-        The source code of the task can be found at [examples/clusters/gcp/a3mega-nccl-tests.dstack.yml](https://github.com/dstackai/dstack/blob/master/examples/clusters/gcp/a3mega-nccl-tests.dstack.yml).
+
+    <div editor-title="nccl-tests.dstack.yml">
+
+    ```yaml
+    type: task
+    name: nccl-tests
+    nodes: 2
+    image: nvcr.io/nvidia/pytorch:24.04-py3
+    entrypoint: "bash -c" # Need to use bash instead of default dash for nccl-env-profile.sh
+    commands:
+      - |
+        # Setup TCPXO NCCL env variables
+        NCCL_LIB_DIR="/var/lib/tcpxo/lib64"
+        source ${NCCL_LIB_DIR}/nccl-env-profile-ll128.sh
+        export NCCL_FASTRAK_CTRL_DEV=enp0s12
+        export NCCL_FASTRAK_IFNAME=enp6s0,enp7s0,enp13s0,enp14s0,enp134s0,enp135s0,enp141s0,enp142s0
+        export NCCL_SOCKET_IFNAME=enp0s12
+        export NCCL_FASTRAK_LLCM_DEVICE_DIRECTORY="/dev/aperture_devices"
+        export LD_LIBRARY_PATH="${NCCL_LIB_DIR}:${LD_LIBRARY_PATH}"
+        # Build NCCL Tests
+        git clone https://github.com/NVIDIA/nccl-tests.git
+        cd nccl-tests
+        MPI=1 CC=mpicc CXX=mpicxx make -j
+        cd build
+        # We use FIFO for inter-node communication
+        FIFO=/tmp/dstack_job
+        if [ ${DSTACK_NODE_RANK} -eq 0 ]; then
+          sleep 10
+          echo "${DSTACK_NODES_IPS}" > hostfile
+          MPIRUN='mpirun --allow-run-as-root --hostfile hostfile'
+          # Wait for other nodes
+          while true; do
+            if ${MPIRUN} -n ${DSTACK_NODES_NUM} -N 1 true >/dev/null 2>&1; then
+              break
+            fi
+            echo 'Waiting for nodes...'
+            sleep 5
+          done
+          # Run NCCL Tests
+          ${MPIRUN} \
+            -n ${DSTACK_GPUS_NUM} -N ${DSTACK_GPUS_PER_NODE} \
+            --mca btl tcp,self --mca btl_tcp_if_exclude lo,docker0 \
+            $(env | awk -F= '{print "-x", $1}' | xargs) \
+            ./all_gather_perf -b 8M -e 8G -f 2 -g 1 -w 5 --iters 200 -c 0;
+          # Notify nodes the job is done
+          ${MPIRUN} -n ${DSTACK_NODES_NUM} -N 1 sh -c "echo done > ${FIFO}"
+        else
+          mkfifo ${FIFO}
+          # Wait for a message from the first node
+          cat ${FIFO}
+        fi
+    spot_policy: auto
+    resources:
+      shm_size: 16GB
+    ```
+
+    </div>
 
     Pass the configuration to `dstack apply`:
 
     <div class="termy">
 
     ```shell
-    $ dstack apply -f examples/clusters/gcp/a3mega-nccl-tests.dstack.yml
+    $ dstack apply -f nccl-tests.dstack.yml
 
     nccl-tests provisioning completed (running)
     nThread 1 nGpus 1 minBytes 8388608 maxBytes 8589934592 step: 2(factor) warmup iters: 5 iters: 200 agg iters: 1 validation: 0 graph: 0
@@ -385,15 +440,57 @@ Use a distributed task that runs NCCL tests to validate cluster network bandwidt
     </div>
 
 === "A3 High/Edge"
-    !!! info "Source code"
-        The source code of the task can be found at [examples/clusters/nccl-tests/.dstack.yml](https://github.com/dstackai/dstack/blob/master/examples/clusters/nccl-tests/.dstack.yml).
-    
+
+    <div editor-title="nccl-tests.dstack.yml">
+
+    ```yaml
+    type: task
+    name: nccl-tests
+    nodes: 2
+    image: us-docker.pkg.dev/gce-ai-infra/gpudirect-tcpx/nccl-plugin-gpudirecttcpx
+    commands:
+      - |
+        export NCCL_DEBUG=INFO
+        export LD_LIBRARY_PATH=/usr/local/tcpx/lib64:$LD_LIBRARY_PATH
+        # We use FIFO for inter-node communication
+        FIFO=/tmp/dstack_job
+        if [ ${DSTACK_NODE_RANK} -eq 0 ]; then
+          mkdir -p /scripts/hostfiles2
+          : > /scripts/hostfiles2/hostfile8
+          for ip in ${DSTACK_NODES_IPS}; do
+            echo "${ip} slots=${DSTACK_GPUS_PER_NODE}" >> /scripts/hostfiles2/hostfile8
+          done
+          MPIRUN='mpirun --allow-run-as-root --hostfile /scripts/hostfiles2/hostfile8'
+          # Wait for other nodes
+          while true; do
+            if ${MPIRUN} -n ${DSTACK_NODES_NUM} -N 1 true >/dev/null 2>&1; then
+              break
+            fi
+            echo 'Waiting for nodes...'
+            sleep 5
+          done
+          # Run NCCL Tests
+          NCCL_GPUDIRECTTCPX_FORCE_ACK=0 /scripts/run-allgather.sh 8 eth1,eth2,eth3,eth4 8M 8GB 2
+          # Notify nodes the job is done
+          ${MPIRUN} -n ${DSTACK_NODES_NUM} -N 1 sh -c "echo done > ${FIFO}"
+        else
+          mkfifo ${FIFO}
+          # Wait for a message from the first node
+          cat ${FIFO}
+        fi
+    spot_policy: auto
+    resources:
+      shm_size: 16GB
+    ```
+
+    </div>
+
     Pass the configuration to `dstack apply`:
 
     <div class="termy">
 
     ```shell
-    $ dstack apply -f examples/clusters/gcp/a3high-nccl-tests.dstack.yml
+    $ dstack apply -f nccl-tests.dstack.yml
 
     nccl-tests provisioning completed (running)
     nThread 1 nGpus 1 minBytes 8388608 maxBytes 8589934592 step: 2(factor) warmup iters: 5 iters: 200 agg iters: 1 validation: 0 graph: 0
@@ -418,16 +515,13 @@ Use a distributed task that runs NCCL tests to validate cluster network bandwidt
 
     </div>
 
-    !!! info "Source code"
-        The source code of the task can be found at [examples/clusters/gcp/a3high-nccl-tests.dstack.yml](https://github.com/dstackai/dstack/blob/master/examples/clusters/gcp/a3high-nccl-tests.dstack.yml).
-
 ### Distributed training
 
 === "A4"
-    You can use the standard [distributed task](https://dstack.ai/docs/concepts/tasks#distributed-tasks) example to run distributed training on A4 instances.
+    You can use the standard [distributed task](../../docs/concepts/tasks.md#distributed-tasks) example to run distributed training on A4 instances.
 
 === "A3 Mega"
-    You can use the standard [distributed task](https://dstack.ai/docs/concepts/tasks#distributed-tasks) example to run distributed training on A3 Mega instances. To enable GPUDirect-TCPX, make sure the required [NCCL environment variables](https://cloud.google.com/kubernetes-engine/docs/how-to/gpu-bandwidth-gpudirect-tcpx-autopilot#environment-variables-nccl) are properly set, for example by adding the following commands at the beginning:
+    You can use the standard [distributed task](../../docs/concepts/tasks.md#distributed-tasks) example to run distributed training on A3 Mega instances. To enable GPUDirect-TCPX, make sure the required [NCCL environment variables](https://cloud.google.com/kubernetes-engine/docs/how-to/gpu-bandwidth-gpudirect-tcpx-autopilot#environment-variables-nccl) are properly set, for example by adding the following commands at the beginning:
 
     ```shell
     # ...
@@ -446,7 +540,7 @@ Use a distributed task that runs NCCL tests to validate cluster network bandwidt
     ```
 
 === "A3 High/Edge"
-    You can use the standard [distributed task](https://dstack.ai/docs/concepts/tasks#distributed-tasks) example to run distributed training on A3 High/Edge instances. To enable GPUDirect-TCPX0, make sure the required [NCCL environment variables](https://cloud.google.com/kubernetes-engine/docs/how-to/gpu-bandwidth-gpudirect-tcpx-autopilot#environment-variables-nccl) are properly set, for example by adding the following commands at the beginning:
+    You can use the standard [distributed task](../../docs/concepts/tasks.md#distributed-tasks) example to run distributed training on A3 High/Edge instances. To enable GPUDirect-TCPX0, make sure the required [NCCL environment variables](https://cloud.google.com/kubernetes-engine/docs/how-to/gpu-bandwidth-gpudirect-tcpx-autopilot#environment-variables-nccl) are properly set, for example by adding the following commands at the beginning:
 
     ```shell
     # ...
@@ -483,6 +577,6 @@ In addition to distributed training, you can of course run regular tasks, dev en
 
 ## What's new
 
-1. Learn about [dev environments](https://dstack.ai/docs/concepts/dev-environments), [tasks](https://dstack.ai/docs/concepts/tasks), [services](https://dstack.ai/docs/concepts/services)
-2. Read the [Clusters](https://dstack.ai/docs/guides/clusters) guide
+1. Learn about [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), [services](../../docs/concepts/services.md)
+2. Read about [cluster placement](../../docs/concepts/fleets.md#cluster-placement)
 3. Check GCP's docs on using [A4](https://docs.cloud.google.com/compute/docs/gpus/create-gpu-vm-a3u-a4), and [A3 Mega/High/Edge](https://docs.cloud.google.com/compute/docs/gpus/gpudirect) instances
diff --git a/docs/examples/clusters/gcp/index.md b/docs/examples/clusters/gcp/index.md
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/examples/clusters/lambda/README.md b/docs/examples/clusters/lambda.md
similarity index 84%
rename from examples/clusters/lambda/README.md
rename to docs/examples/clusters/lambda.md
index 07fb0ce926..e66e74573a 100644
--- a/examples/clusters/lambda/README.md
+++ b/docs/examples/clusters/lambda.md
@@ -19,7 +19,7 @@ description: Setting up Lambda clusters using Kubernetes or 1-Click Clusters wit
 
 ### Configure the backend
 
-Follow the standard instructions for setting up a [Kubernetes](https://dstack.ai/docs/concepts/backends/#kubernetes) backend:
+Follow the standard instructions for setting up a [Kubernetes](../../docs/concepts/backends.md#kubernetes) backend:
 
 <div editor-title="~/.dstack/server/config.yml">
 
@@ -68,11 +68,11 @@ $ dstack apply -f lambda-fleet.dstack.yml
 
 </div>
 
-Once the fleet is created, you can run [dev environments](https://dstack.ai/docs/concepts/dev-environments), [tasks](https://dstack.ai/docs/concepts/tasks), and [services](https://dstack.ai/docs/concepts/services).
+Once the fleet is created, you can run [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), and [services](../../docs/concepts/services.md).
 
 ## 1-Click Clusters
 
-Another way to work with Lambda clusters is through [1CC](https://lambda.ai/1-click-clusters). While `dstack` supports automated cluster provisioning via [VM-based backends](https://dstack.ai/docs/concepts/backends#vm-based), there is currently no programmatic way to provision Lambda 1CCs. As a result, to use a 1CC cluster with `dstack`, you must use [SSH fleets](https://dstack.ai/docs/concepts/fleets).
+Another way to work with Lambda clusters is through [1CC](https://lambda.ai/1-click-clusters). While `dstack` supports automated cluster provisioning via [VM-based backends](../../docs/concepts/backends.md#vm-based), there is currently no programmatic way to provision Lambda 1CCs. As a result, to use a 1CC cluster with `dstack`, you must use [SSH fleets](../../docs/concepts/fleets.md).
 
 ### Prerequsisites
 
@@ -80,7 +80,7 @@ Another way to work with Lambda clusters is through [1CC](https://lambda.ai/1-cl
 
 ### Create a fleet
 
-Follow the standard instructions for setting up an [SSH fleet](https://dstack.ai/docs/concepts/fleets/#ssh-fleets):
+Follow the standard instructions for setting up an [SSH fleet](../../docs/concepts/fleets.md#ssh-fleets):
 
 <div editor-title="lambda-fleet.dstack.yml"> 
     
@@ -116,11 +116,11 @@ $ dstack apply -f lambda-fleet.dstack.yml
 
 </div>
 
-Once the fleet is created, you can run [dev environments](https://dstack.ai/docs/concepts/dev-environments), [tasks](https://dstack.ai/docs/concepts/tasks), and [services](https://dstack.ai/docs/concepts/services).
+Once the fleet is created, you can run [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), and [services](../../docs/concepts/services.md).
 
 ## Run tasks
 
-To run tasks on a cluster, you must use [distributed tasks](https://dstack.ai/docs/concepts/tasks#distributed-task).
+To run tasks on a cluster, you must use [distributed tasks](../../docs/concepts/tasks.md#distributed-task).
 
 ### Run NCCL tests
 
@@ -213,6 +213,6 @@ Provisioning...
 
 ## What's next
 
-1. Learn about [dev environments](https://dstack.ai/docs/concepts/dev-environments), [tasks](https://dstack.ai/docs/concepts/tasks), [services](https://dstack.ai/docs/concepts/services)
-2. Read the [Kuberentes](https://dstack.ai/docs/guides/kubernetes), and [Clusters](https://dstack.ai/docs/guides/clusters) guides
+1. Learn about [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), [services](../../docs/concepts/services.md)
+2. Read about the [Kubernetes backend](../../docs/concepts/backends.md#kubernetes) and [cluster placement](../../docs/concepts/fleets.md#cluster-placement)
 3. Check Lambda's docs on [Kubernetes](https://docs.lambda.ai/public-cloud/1-click-clusters/managed-kubernetes/#accessing-mk8s) and [1CC](https://docs.lambda.ai/public-cloud/1-click-clusters/)
diff --git a/docs/examples/clusters/lambda/index.md b/docs/examples/clusters/lambda/index.md
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/examples/clusters/nccl-rccl-tests/README.md b/docs/examples/clusters/nccl-rccl-tests.md
similarity index 82%
rename from examples/clusters/nccl-rccl-tests/README.md
rename to docs/examples/clusters/nccl-rccl-tests.md
index a9cadd82e8..4c565d8c68 100644
--- a/examples/clusters/nccl-rccl-tests/README.md
+++ b/docs/examples/clusters/nccl-rccl-tests.md
@@ -5,10 +5,10 @@ description: Running NCCL and RCCL tests to validate cluster network bandwidth
 
 # NCCL/RCCL tests
 
-This example shows how to run [NCCL](https://github.com/NVIDIA/nccl-tests) or [RCCL](https://github.com/ROCm/rccl-tests) tests on a cluster using [distributed tasks](https://dstack.ai/docs/concepts/tasks#distributed-tasks).
+This example shows how to run [NCCL](https://github.com/NVIDIA/nccl-tests) or [RCCL](https://github.com/ROCm/rccl-tests) tests on a cluster using [distributed tasks](../../docs/concepts/tasks.md#distributed-tasks).
 
 !!! info "Prerequisites"
-    Before running a distributed task, make sure to create a fleet with `placement` set to `cluster` (can be a [managed fleet](https://dstack.ai/docs/concepts/fleets#cluster-placement) or an [SSH fleet](https://dstack.ai/docs/concepts/fleets#ssh-placement)).
+    Before running a distributed task, make sure to create a fleet with `placement` set to `cluster` (can be a [managed fleet](../../docs/concepts/fleets.md#cluster-placement) or an [SSH fleet](../../docs/concepts/fleets.md#ssh-placement)).
 
 ## Running as a task
 
@@ -16,7 +16,7 @@ Here's an example of a task that runs AllReduce test on 2 nodes, each with 4 GPU
 
 === "NCCL tests"
 
-    <div editor-title="examples/clusters/nccl-rccl-tests/nccl-tests.dstack.yml">
+    <div editor-title="nccl-tests.dstack.yml">
 
     ```yaml
     type: task
@@ -59,7 +59,7 @@ Here's an example of a task that runs AllReduce test on 2 nodes, each with 4 GPU
 
 === "RCCL tests"
 
-    <div editor-title="examples/clusters/nccl-rccl-tests/rccl-tests.dstack.yml">
+    <div editor-title="rccl-tests.dstack.yml">
 
     ```yaml
     type: task
@@ -120,12 +120,12 @@ Here's an example of a task that runs AllReduce test on 2 nodes, each with 4 GPU
 
 ### Apply a configuration
 
-To run a configuration, use the [`dstack apply`](https://dstack.ai/docs/reference/cli/dstack/apply/) command.
+To run a configuration, use the [`dstack apply`](../../docs/reference/cli/dstack/apply.md) command.
 
 <div class="termy">
 
 ```shell
-$ dstack apply -f examples/clusters/nccl-rccl-tests/nccl-tests.dstack.yml
+$ dstack apply -f nccl-tests.dstack.yml
 
  #  BACKEND  REGION     INSTANCE       RESOURCES                                   SPOT  PRICE
  1  aws      us-east-1  g4dn.12xlarge  48xCPU, 192GB, 4xT4 (16GB), 100.0GB (disk)  no    $3.912
@@ -139,5 +139,5 @@ Submit the run nccl-tests? [y/n]: y
 
 ## What's next?
 
-1. Check [dev environments](https://dstack.ai/docs/concepts/dev-environments), [tasks](https://dstack.ai/docs/concepts/tasks), 
-   [services](https://dstack.ai/docsconcepts/services), and [fleets](https://dstack.ai/docs/concepts/fleets).
+1. Check [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), 
+   [services](../../docs/concepts/services.md), and [fleets](../../docs/concepts/fleets.md).
diff --git a/docs/examples/clusters/nccl-rccl-tests/index.md b/docs/examples/clusters/nccl-rccl-tests/index.md
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/examples/clusters/nebius/README.md b/docs/examples/clusters/nebius.md
similarity index 90%
rename from examples/clusters/nebius/README.md
rename to docs/examples/clusters/nebius.md
index 9f8bd349a0..6986a10ab5 100644
--- a/examples/clusters/nebius/README.md
+++ b/docs/examples/clusters/nebius.md
@@ -75,7 +75,7 @@ $ dstack apply -f nebius-fleet.dstack.yml
 
 This will automatically create a Nebius cluster and provision instances. 
 
-Once the fleet is created, you can run [dev environments](https://dstack.ai/docs/concepts/dev-environments), [tasks](https://dstack.ai/docs/concepts/tasks), and [services](https://dstack.ai/docs/concepts/services).
+Once the fleet is created, you can run [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), and [services](../../docs/concepts/services.md).
 
 > If you want instances to be provisioned on demand, you can set `nodes` to `0..2`. In this case, `dstack` will create instances only when you run workloads.
 
@@ -107,7 +107,7 @@ $ nebius mk8s cluster get-credentials --id &lt;cluster id&gt; --external
 
 ### Configure a backend
 
-Follow the standard instructions for setting up a [`kubernetes`](https://dstack.ai/docs/concepts/backends/#kubernetes) backend:
+Follow the standard instructions for setting up a [`kubernetes`](../../docs/concepts/backends.md#kubernetes) backend:
 
 <div editor-title="~/.dstack/server/config.yml">
 
@@ -154,11 +154,11 @@ $ dstack apply -f nebius-fleet.dstack.yml
 
 </div>
 
-Once the fleet is created, you can run [dev environments](https://dstack.ai/docs/concepts/dev-environments), [tasks](https://dstack.ai/docs/concepts/tasks), and [services](https://dstack.ai/docs/concepts/services).
+Once the fleet is created, you can run [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), and [services](../../docs/concepts/services.md).
 
 ## NCCL tests
 
-Use a [distributed task](https://dstack.ai/docs/concepts/tasks#distributed-tasks) to run NCCL tests and validate the cluster’s network bandwidth.
+Use a [distributed task](../../docs/concepts/tasks.md#distributed-tasks) to run NCCL tests and validate the cluster’s network bandwidth.
 
 <div editor-title="nccl-tests.dstack.yml">
 
@@ -252,6 +252,6 @@ nccl-tests provisioning completed (running)
 
 ## What's next
 
-1. Learn about [dev environments](https://dstack.ai/docs/concepts/dev-environments), [tasks](https://dstack.ai/docs/concepts/tasks), [services](https://dstack.ai/docs/concepts/services)
-2. Check out [backends](https://dstack.ai/docs/concepts/backends) and [fleets](https://dstack.ai/docs/concepts/fleets)
+1. Learn about [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), [services](../../docs/concepts/services.md)
+2. Check out [backends](../../docs/concepts/backends.md) and [fleets](../../docs/concepts/fleets.md)
 3. Read Nebius' docs on [networking for VMs](https://docs.nebius.com/compute/clusters/gpu) and the [managed Kubernetes service](https://docs.nebius.com/kubernetes).
diff --git a/docs/examples/clusters/nebius/index.md b/docs/examples/clusters/nebius/index.md
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/examples/distributed-training/axolotl/README.md b/docs/examples/distributed-training/axolotl.md
similarity index 75%
rename from examples/distributed-training/axolotl/README.md
rename to docs/examples/distributed-training/axolotl.md
index cd7be95e4c..c2e04d3fc6 100644
--- a/examples/distributed-training/axolotl/README.md
+++ b/docs/examples/distributed-training/axolotl.md
@@ -5,16 +5,16 @@ description: Distributed fine-tuning with Axolotl and FSDP across multiple nodes
 
 # Axolotl
 
-This example walks you through how to run distributed fine-tune using [Axolotl](https://github.com/axolotl-ai-cloud/axolotl) and [distributed tasks](https://dstack.ai/docs/concepts/tasks#distributed-tasks).
+This example walks you through how to run distributed fine-tune using [Axolotl](https://github.com/axolotl-ai-cloud/axolotl) and [distributed tasks](../../docs/concepts/tasks.md#distributed-tasks).
 
 !!! info "Prerequisites"
-    Before running a distributed task, make sure to create a fleet with `placement` set to `cluster` (can be a [managed fleet](https://dstack.ai/docs/concepts/fleets#cluster-placement) or an [SSH fleet](https://dstack.ai/docs/concepts/fleets#ssh-placement)).
+    Before running a distributed task, make sure to create a fleet with `placement` set to `cluster` (can be a [managed fleet](../../docs/concepts/fleets.md#cluster-placement) or an [SSH fleet](../../docs/concepts/fleets.md#ssh-placement)).
 
 ## Define a configuration
 
 Once the fleet is created, define a distributed task configuration. Here's an example of distributed `QLORA` task using `FSDP`.
 
-<div editor-title="examples/distributed-training/axolotl/.dstack.yml">
+<div editor-title="train-distrib.dstack.yml">
 
 ```yaml
 type: task
@@ -72,7 +72,7 @@ volumes:
 
 ### Apply the configuration
 
-To run a configuration, use the [`dstack apply`](https://dstack.ai/docs/reference/cli/dstack/apply.md) command.
+To run a configuration, use the [`dstack apply`](../../docs/reference/cli/dstack/apply.md) command.
 
 <div class="termy">
 
@@ -81,13 +81,13 @@ $ HF_TOKEN=...
 $ WANDB_API_KEY=...
 $ WANDB_PROJECT=...
 $ HUB_MODEL_ID=...
-$ dstack apply -f examples/distributed-training/trl/fsdp.dstack.yml
+$ dstack apply -f train-distrib.dstack.yml
 
  #  BACKEND       RESOURCES                       INSTANCE TYPE  PRICE
  1  ssh (remote)  cpu=208 mem=1772GB H100:80GB:8  instance       $0     idle
  2  ssh (remote)  cpu=208 mem=1772GB H100:80GB:8  instance       $0     idle
 
-Submit the run trl-train-fsdp-distrib? [y/n]: y
+Submit the run axolotl-multi-node-qlora-llama3-70b? [y/n]: y
 
 Provisioning...
 ---> 100%
@@ -95,6 +95,6 @@ Provisioning...
 </div>
 
 !!! info "What's next?"
-    1. Read the [clusters](https://dstack.ai/docs/guides/clusters) guide
-    2. Check [dev environments](https://dstack.ai/docs/dev-environments), [tasks](https://dstack.ai/docs/concepts/tasks),
-       [services](https://dstack.ai/docs/concepts/services), and [fleets](https://dstack.ai/docs/concepts/fleets)
+    1. Check [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md),
+       [services](../../docs/concepts/services.md), and [fleets](../../docs/concepts/fleets.md)
+    2. Read about [cluster placement](../../docs/concepts/fleets.md#cluster-placement)
diff --git a/docs/examples/distributed-training/axolotl/index.md b/docs/examples/distributed-training/axolotl/index.md
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/examples/distributed-training/ray-ragen/README.md b/docs/examples/distributed-training/ray-ragen.md
similarity index 86%
rename from examples/distributed-training/ray-ragen/README.md
rename to docs/examples/distributed-training/ray-ragen.md
index f7bd80d5c2..e3194b2b3a 100644
--- a/examples/distributed-training/ray-ragen/README.md
+++ b/docs/examples/distributed-training/ray-ragen.md
@@ -11,7 +11,7 @@ to fine-tune an agent on multiple nodes.
 Under the hood `RAGEN` uses [verl](https://github.com/volcengine/verl) for Reinforcement Learning and [Ray](https://docs.ray.io/en/latest/) for distributed training.
 
 !!! info "Prerequisites"
-    Before running a distributed task, make sure to create a fleet with `placement` set to `cluster` (can be a [managed fleet](https://dstack.ai/docs/concepts/fleets#cluster-placement) or an [SSH fleet](https://dstack.ai/docs/concepts/fleets#ssh-placement)).
+    Before running a distributed task, make sure to create a fleet with `placement` set to `cluster` (can be a [managed fleet](../../docs/concepts/fleets.md#cluster-placement) or an [SSH fleet](../../docs/concepts/fleets.md#ssh-placement)).
 
 ## Run a Ray cluster
 
@@ -19,11 +19,11 @@ If you want to use Ray with `dstack`, you have to first run a Ray cluster.
 
 The task below runs a Ray cluster on an existing fleet:
 
-<div editor-title="examples/distributed-training/ray-ragen/.dstack.yml">
+<div editor-title="ray-cluster.dstack.yml">
 
 ```yaml
 type: task
-name: ray-ragen-cluster
+name: ray-cluster
 
 nodes: 2
 
@@ -76,7 +76,7 @@ Now, if you run this task via `dstack apply`, it will automatically forward the
 <div class="termy">
 
 ```shell
-$ dstack apply -f examples/distributed-training/ray-ragen/.dstack.yml
+$ dstack apply -f ray-cluster.dstack.yml
 ```
 
 </div>
@@ -130,6 +130,5 @@ $ ray job submit \
 Using Ray via `dstack` is a powerful way to get access to the rich Ray ecosystem while benefiting from `dstack`'s provisioning capabilities.
 
 !!! info "What's next"
-    1. Check the [Clusters](https://dstack.ai/docs/guides/clusters) guide
-    2. Read about [distributed tasks](https://dstack.ai/docs/concepts/tasks#distributed-tasks) and [fleets](https://dstack.ai/docs/concepts/fleets)
-    3. Browse Ray's [docs](https://docs.ray.io/en/latest/train/examples.html) for other examples.
+    1. Read about [distributed tasks](../../docs/concepts/tasks.md#distributed-tasks), [fleets](../../docs/concepts/fleets.md), and [cluster placement](../../docs/concepts/fleets.md#cluster-placement)
+    2. Browse Ray's [docs](https://docs.ray.io/en/latest/train/examples.html) for other examples.
diff --git a/docs/examples/distributed-training/ray-ragen/index.md b/docs/examples/distributed-training/ray-ragen/index.md
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/examples/distributed-training/trl/README.md b/docs/examples/distributed-training/trl.md
similarity index 83%
rename from examples/distributed-training/trl/README.md
rename to docs/examples/distributed-training/trl.md
index 47d3f6f888..3a25c04b48 100644
--- a/examples/distributed-training/trl/README.md
+++ b/docs/examples/distributed-training/trl.md
@@ -8,7 +8,7 @@ description: Distributed fine-tuning with TRL, Accelerate, and DeepSpeed
 This example walks you through how to run distributed fine-tune using [TRL](https://github.com/huggingface/trl), [Accelerate](https://github.com/huggingface/accelerate) and [Deepspeed](https://github.com/deepspeedai/DeepSpeed).
 
 !!! info "Prerequisites"
-    Before running a distributed task, make sure to create a fleet with `placement` set to `cluster` (can be a [managed fleet](https://dstack.ai/docs/concepts/fleets#cluster-placement) or an [SSH fleet](https://dstack.ai/docs/concepts/fleets#ssh-placement)).
+    Before running a distributed task, make sure to create a fleet with `placement` set to `cluster` (can be a [managed fleet](../../docs/concepts/fleets.md#cluster-placement) or an [SSH fleet](../../docs/concepts/fleets.md#ssh-placement)).
 
 ## Define a configuration
 
@@ -16,7 +16,7 @@ Once the fleet is created, define a distributed task configuration. Here's an ex
 
 === "FSDP"
 
-    <div editor-title="examples/distributed-training/trl/fsdp.dstack.yml">
+    <div editor-title="train-distrib.dstack.yml">
     ```yaml
     type: task
     name: trl-train-fsdp-distrib
@@ -73,7 +73,7 @@ Once the fleet is created, define a distributed task configuration. Here's an ex
 
 === "Deepseed ZeRO-3"
 
-    <div editor-title="examples/distributed-training/trl/deepspeed.dstack.yml">
+    <div editor-title="train-distrib.dstack.yml">
     ```yaml
     type: task
     name: trl-train-deepspeed-distrib
@@ -133,7 +133,7 @@ Once the fleet is created, define a distributed task configuration. Here's an ex
 
 ### Apply the configuration
 
-To run a configuration, use the [`dstack apply`](https://dstack.ai/docs/reference/cli/dstack/apply.md) command.
+To run a configuration, use the [`dstack apply`](../../docs/reference/cli/dstack/apply.md) command.
 
 <div class="termy">
 
@@ -141,7 +141,7 @@ To run a configuration, use the [`dstack apply`](https://dstack.ai/docs/referenc
 $ HF_TOKEN=...
 $ WANDB_API_KEY=...
 $ HUB_MODEL_ID=...
-$ dstack apply -f examples/distributed-training/trl/fsdp.dstack.yml
+$ dstack apply -f train-distrib.dstack.yml
 
  #  BACKEND       RESOURCES                       INSTANCE TYPE  PRICE
  1  ssh (remote)  cpu=208 mem=1772GB H100:80GB:8  instance       $0     idle
@@ -155,6 +155,6 @@ Provisioning...
 </div>
 
 !!! info "What's next?"
-    1. Read the [clusters](https://dstack.ai/docs/guides/clusters) guide
-    2. Check [dev environments](https://dstack.ai/docs/concepts/dev-environments), [tasks](https://dstack.ai/docs/concepts/tasks),
-       [services](https://dstack.ai/docs/concepts/services), and [fleets](https://dstack.ai/docs/concepts/fleets)
+    1. Check [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md),
+       [services](../../docs/concepts/services.md), and [fleets](../../docs/concepts/fleets.md)
+    2. Read about [cluster placement](../../docs/concepts/fleets.md#cluster-placement)
diff --git a/docs/examples/distributed-training/trl/index.md b/docs/examples/distributed-training/trl/index.md
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/examples/inference/nim/README.md b/docs/examples/inference/nim.md
similarity index 80%
rename from examples/inference/nim/README.md
rename to docs/examples/inference/nim.md
index 680c51f498..263baa2737 100644
--- a/examples/inference/nim/README.md
+++ b/docs/examples/inference/nim.md
@@ -8,7 +8,7 @@ description: Deploying Nemotron-3-Super-120B-A12B using NVIDIA NIM
 This example shows how to deploy Nemotron-3-Super-120B-A12B using [NVIDIA NIM](https://docs.nvidia.com/nim/large-language-models/latest/getting-started.html) and `dstack`.
 
 ??? info "Prerequisites"
-    Once `dstack` is [installed](https://dstack.ai/docs/installation), clone the repo with examples.
+    Once `dstack` is [installed](../../docs/installation.md), clone the repo with examples.
 
     <div class="termy">
  
@@ -23,7 +23,7 @@ This example shows how to deploy Nemotron-3-Super-120B-A12B using [NVIDIA NIM](h
 
 Here's an example of a service that deploys Nemotron-3-Super-120B-A12B using NIM.
 
-<div editor-title="nemotron120.dstack.yml">
+<div editor-title="service.dstack.yml">
 
 ```yaml
 type: service
@@ -54,13 +54,13 @@ resources:
 ### Running a configuration
 
 Save the configuration above as `nemotron120.dstack.yml`, then use the
-[`dstack apply`](https://dstack.ai/docs/reference/cli/dstack/apply.md) command.
+[`dstack apply`](../../docs/reference/cli/dstack/apply.md) command.
 
 <div class="termy">
 
 ```shell
 $ NGC_API_KEY=...
-$ dstack apply -f nemotron120.dstack.yml
+$ dstack apply -f service.dstack.yml
 ```
 </div>
 
@@ -91,9 +91,9 @@ $ curl http://127.0.0.1:3000/proxy/services/main/nemotron120/v1/chat/completions
 
 </div>
 
-When a [gateway](https://dstack.ai/docs/concepts/gateways/) is configured, the service endpoint will be available at `https://nemotron120.<gateway domain>/`.
+When a [gateway](../../docs/concepts/gateways.md) is configured, the service endpoint will be available at `https://nemotron120.<gateway domain>/`.
 
 ## What's next?
 
-1. Check [services](https://dstack.ai/docs/services)
+1. Check [services](../../docs/concepts/services.md)
 2. Browse the [Nemotron-3-Super-120B-A12B model page](https://build.nvidia.com/nvidia/nemotron-3-super-120b-a12b)
diff --git a/docs/examples/inference/nim/index.md b/docs/examples/inference/nim/index.md
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/examples/inference/sglang/README.md b/docs/examples/inference/sglang.md
similarity index 89%
rename from examples/inference/sglang/README.md
rename to docs/examples/inference/sglang.md
index 3f2694c655..feda39a46d 100644
--- a/examples/inference/sglang/README.md
+++ b/docs/examples/inference/sglang.md
@@ -9,7 +9,7 @@ This example shows how to deploy `Qwen/Qwen3.6-27B` using
 [SGLang](https://github.com/sgl-project/sglang) and `dstack`.
 
 > For a `DeepSeek-V4-Pro` deployment on `B200:8`, see the
-[DeepSeek V4](../../models/deepseek-v4/index.md) model page.
+[DeepSeek V4](../models/deepseek-v4.md) model page.
 
 ## Apply a configuration
 
@@ -18,7 +18,7 @@ Here's an example of a service that deploys
 
 === "NVIDIA"
 
-    <div editor-title="qwen36.dstack.yml">
+    <div editor-title="service.dstack.yml">
 
     ```yaml
     type: service
@@ -53,7 +53,7 @@ Here's an example of a service that deploys
 
 === "AMD"
 
-    <div editor-title="qwen36.dstack.yml">
+    <div editor-title="service.dstack.yml">
 
     ```yaml
     type: service
@@ -94,13 +94,13 @@ guidance: a pinned ROCm image, tensor parallelism across all four GPUs, and the
 standard `qwen3` reasoning parser without extra ROCm-specific tuning flags.
 The first startup on MI300X can take longer while SGLang compiles ROCm kernels.
 
-Save one of the configurations above as `qwen36.dstack.yml`, then use the
-[`dstack apply`](https://dstack.ai/docs/reference/cli/dstack/apply.md) command.
+Save one of the configurations above as `service.dstack.yml`, then use the
+[`dstack apply`](../../docs/reference/cli/dstack/apply.md) command.
 
 <div class="termy">
 
 ```shell
-$ dstack apply -f qwen36.dstack.yml
+$ dstack apply -f service.dstack.yml
 ```
 
 </div>
@@ -132,7 +132,7 @@ Qwen3.6 uses thinking mode by default. To disable thinking, pass
 `"chat_template_kwargs": {"enable_thinking": false}` in the request body. To
 enable tool calling, add `--tool-call-parser qwen3_coder` to the serve command.
 
-> If a [gateway](https://dstack.ai/docs/concepts/gateways/) is configured (e.g. to enable auto-scaling, HTTPS, rate limits, etc.), the service endpoint will be available at `https://qwen36.<gateway domain>/`.
+> If a [gateway](../../docs/concepts/gateways.md) is configured (e.g. to enable auto-scaling, HTTPS, rate limits, etc.), the service endpoint will be available at `https://qwen36.<gateway domain>/`.
 
 ## Configuration options
 
@@ -221,5 +221,5 @@ Currently, auto-scaling only supports `rps` as the metric. TTFT and ITL metrics
 
 ## What's next?
 
-1. Read about [services](https://dstack.ai/docs/concepts/services) and [gateways](https://dstack.ai/docs/concepts/gateways)
+1. Read about [services](../../docs/concepts/services.md) and [gateways](../../docs/concepts/gateways.md)
 2. Browse the [Qwen 3.6 SGLang cookbook](https://docs.sglang.io/cookbook/autoregressive/Qwen/Qwen3.6) and the [SGLang server arguments reference](https://docs.sglang.ai/advanced_features/server_arguments.html)
diff --git a/docs/examples/inference/sglang/index.md b/docs/examples/inference/sglang/index.md
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/examples/inference/trtllm/README.md b/docs/examples/inference/trtllm.md
similarity index 83%
rename from examples/inference/trtllm/README.md
rename to docs/examples/inference/trtllm.md
index ae3666d225..8f95cefc63 100644
--- a/examples/inference/trtllm/README.md
+++ b/docs/examples/inference/trtllm.md
@@ -13,7 +13,7 @@ This example shows how to deploy `nvidia/Qwen3-235B-A22B-FP8` using
 Here's an example of a service that deploys
 `nvidia/Qwen3-235B-A22B-FP8` using TensorRT-LLM.
 
-<div editor-title="qwen235.dstack.yml">
+<div editor-title="service.dstack.yml">
 
 ```yaml
 type: service
@@ -53,12 +53,12 @@ resources:
 ```
 </div>
 
-Apply it with [`dstack apply`](https://dstack.ai/docs/reference/cli/dstack/apply.md):
+Apply it with [`dstack apply`](../../docs/reference/cli/dstack/apply.md):
 
 <div class="termy">
 
 ```shell
-$ dstack apply -f qwen235.dstack.yml
+$ dstack apply -f service.dstack.yml
 ```
 
 </div>
@@ -90,10 +90,10 @@ $ curl http://127.0.0.1:3000/proxy/services/main/qwen235/v1/chat/completions \
 
 </div>
 
-When a [gateway](https://dstack.ai/docs/concepts/gateways/) is configured, the service endpoint will be available at `https://qwen235.<gateway domain>/`.
+When a [gateway](../../docs/concepts/gateways.md) is configured, the service endpoint will be available at `https://qwen235.<gateway domain>/`.
 
 ## What's next?
 
-1. Read about [services](https://dstack.ai/docs/concepts/services) and [gateways](https://dstack.ai/docs/concepts/gateways)
+1. Read about [services](../../docs/concepts/services.md) and [gateways](../../docs/concepts/gateways.md)
 2. Browse the [TensorRT-LLM deployment guides](https://nvidia.github.io/TensorRT-LLM/deployment-guide/index.html) and the [Qwen3 deployment guide](https://nvidia.github.io/TensorRT-LLM/deployment-guide/deployment-guide-for-qwen3-on-trtllm.html)
 3. See the [`trtllm-serve` reference](https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve/trtllm-serve.html)
diff --git a/docs/examples/inference/trtllm/index.md b/docs/examples/inference/trtllm/index.md
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/examples/inference/vllm/README.md b/docs/examples/inference/vllm.md
similarity index 77%
rename from examples/inference/vllm/README.md
rename to docs/examples/inference/vllm.md
index 75d6add9be..4ac880defc 100644
--- a/examples/inference/vllm/README.md
+++ b/docs/examples/inference/vllm.md
@@ -15,7 +15,7 @@ Here's an example of a service that deploys
 
 === "NVIDIA"
 
-    <div editor-title="qwen36.dstack.yml">
+    <div editor-title="service.dstack.yml">
 
     ```yaml
     type: service
@@ -49,7 +49,7 @@ Here's an example of a service that deploys
 
 === "AMD"
 
-    <div editor-title="qwen36.dstack.yml">
+    <div editor-title="service.dstack.yml">
 
     ```yaml
     type: service
@@ -88,13 +88,13 @@ Qwen3.6-27B is a multimodal model. For text-only workloads, add
 `--language-model-only` to free more memory for the KV cache. To enable tool
 calling, add `--enable-auto-tool-choice --tool-call-parser qwen3_coder`.
 
-Save one of the configurations above as `qwen36.dstack.yml`, then use the
-[`dstack apply`](https://dstack.ai/docs/reference/cli/dstack/apply.md) command.
+Save one of the configurations above as `service.dstack.yml`, then use the
+[`dstack apply`](../../docs/reference/cli/dstack/apply.md) command.
 
 <div class="termy">
 
 ```shell
-$ dstack apply -f qwen36.dstack.yml
+$ dstack apply -f service.dstack.yml
 ```
 
 </div>
@@ -122,9 +122,9 @@ curl http://127.0.0.1:3000/proxy/services/main/qwen36/v1/chat/completions \
 
 </div>
 
-> If a [gateway](https://dstack.ai/docs/concepts/gateways/) is configured (e.g. to enable auto-scaling, HTTPS, rate limits, etc.), the service endpoint will be available at `https://qwen36.<gateway domain>/`.
+> If a [gateway](../../docs/concepts/gateways.md) is configured (e.g. to enable auto-scaling, HTTPS, rate limits, etc.), the service endpoint will be available at `https://qwen36.<gateway domain>/`.
 
 ## What's next?
 
-1. Read about [services](https://dstack.ai/docs/concepts/services) and [gateways](https://dstack.ai/docs/concepts/gateways)
-2. Browse the [Qwen 3.5 & 3.6 vLLM recipe](https://docs.vllm.ai/projects/recipes/en/latest/Qwen/Qwen3.5.html) and the [SGLang](https://dstack.ai/examples/inference/sglang/) example
+1. Read about [services](../../docs/concepts/services.md) and [gateways](../../docs/concepts/gateways.md)
+2. Browse the [Qwen 3.5 & 3.6 vLLM recipe](https://docs.vllm.ai/projects/recipes/en/latest/Qwen/Qwen3.5.html) and the [SGLang](../inference/sglang.md) example
diff --git a/docs/examples/inference/vllm/index.md b/docs/examples/inference/vllm/index.md
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/examples/models/deepseek-v4/README.md b/docs/examples/models/deepseek-v4.md
similarity index 93%
rename from examples/models/deepseek-v4/README.md
rename to docs/examples/models/deepseek-v4.md
index b36a343018..7efd9977e8 100644
--- a/examples/models/deepseek-v4/README.md
+++ b/docs/examples/models/deepseek-v4.md
@@ -6,7 +6,7 @@ description: Deploying DeepSeek-V4-Pro using SGLang on NVIDIA B200:8
 # DeepSeek V4
 
 This example shows how to deploy `deepseek-ai/DeepSeek-V4-Pro` as a
-[service](https://dstack.ai/docs/services) using
+[service](../../docs/concepts/services.md) using
 [SGLang](https://github.com/sgl-project/sglang) and `dstack`.
 
 ## Apply a configuration
@@ -64,7 +64,7 @@ This configuration uses the single-node Blackwell `DeepSeek-V4-Pro` recipe
 shape for `8 x NVIDIA B200`.
 
 Export your Hugging Face token and apply the configuration with
-[`dstack apply`](https://dstack.ai/docs/reference/cli/dstack/apply.md).
+[`dstack apply`](../../docs/reference/cli/dstack/apply.md).
 
 <div class="termy">
 
@@ -151,4 +151,4 @@ This returns both:
 
 1. Read the [DeepSeek-V4-Pro model card](https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro)
 2. Read the [DeepSeek-V4 SGLang cookbook](https://docs.sglang.io/cookbook/autoregressive/DeepSeek/DeepSeek-V4)
-3. Browse the dedicated [SGLang](https://dstack.ai/examples/inference/sglang/) and [vLLM](https://dstack.ai/examples/inference/vllm/) examples
+3. Browse the dedicated [SGLang](../inference/sglang.md) and [vLLM](../inference/vllm.md) examples
diff --git a/docs/examples/models/deepseek-v4/index.md b/docs/examples/models/deepseek-v4/index.md
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/examples/models/qwen36/README.md b/docs/examples/models/qwen36.md
similarity index 91%
rename from examples/models/qwen36/README.md
rename to docs/examples/models/qwen36.md
index bc92271b27..3723e36fa0 100644
--- a/examples/models/qwen36/README.md
+++ b/docs/examples/models/qwen36.md
@@ -6,7 +6,7 @@ description: Deploying Qwen3.6-27B using SGLang on NVIDIA and AMD GPUs
 # Qwen 3.6
 
 This example shows how to deploy `Qwen/Qwen3.6-27B` as a
-[service](https://dstack.ai/docs/services) using
+[service](../../docs/concepts/services.md) using
 [SGLang](https://github.com/sgl-project/sglang) and `dstack`.
 
 ## Apply a configuration
@@ -92,7 +92,7 @@ The NVIDIA and AMD configurations above use pinned SGLang images and the same
 straightforward 4-GPU layout used across the Qwen 3.6 docs and examples.
 
 Apply the configuration with
-[`dstack apply`](https://dstack.ai/docs/reference/cli/dstack/apply.md).
+[`dstack apply`](../../docs/reference/cli/dstack/apply.md).
 
 <div class="termy">
 
@@ -162,7 +162,7 @@ curl http://127.0.0.1:3000/proxy/services/main/qwen36/v1/chat/completions \
 1. Read the [Qwen/Qwen3.6-27B model card](https://huggingface.co/Qwen/Qwen3.6-27B)
 2. Read the [Qwen 3.6 SGLang cookbook](https://docs.sglang.io/cookbook/autoregressive/Qwen/Qwen3.6)
 3. Read the [Qwen 3.5 & 3.6 vLLM recipe](https://docs.vllm.ai/projects/recipes/en/latest/Qwen/Qwen3.5.html)
-4. Browse the dedicated [SGLang](https://dstack.ai/examples/inference/sglang/)
-   and [vLLM](https://dstack.ai/examples/inference/vllm/) examples
-5. Check the [AMD](https://dstack.ai/examples/accelerators/amd/) example for
+4. Browse the dedicated [SGLang](../inference/sglang.md)
+   and [vLLM](../inference/vllm.md) examples
+5. Check the [AMD](../accelerators/amd.md) example for
    more AMD deployment and training configurations
diff --git a/docs/examples/models/qwen36/index.md b/docs/examples/models/qwen36/index.md
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/examples/single-node-training/axolotl/README.md b/docs/examples/single-node-training/axolotl.md
similarity index 78%
rename from examples/single-node-training/axolotl/README.md
rename to docs/examples/single-node-training/axolotl.md
index 7781139e0b..3ab19d0502 100644
--- a/examples/single-node-training/axolotl/README.md
+++ b/docs/examples/single-node-training/axolotl.md
@@ -8,7 +8,7 @@ description: Fine-tuning models with Axolotl using FSDP and QLoRA
 This example shows how to use [Axolotl](https://github.com/OpenAccess-AI-Collective/axolotl) with `dstack` to fine-tune 4-bit Quantized `Llama-4-Scout-17B-16E` using SFT with FSDP and QLoRA.
 
 ??? info "Prerequisites"
-    Once `dstack` is [installed](https://dstack.ai/docs/installation), clone the repo with examples.
+    Once `dstack` is [installed](../../docs/installation.md), clone the repo with examples.
 
     <div class="termy">
  
@@ -25,7 +25,7 @@ Axolotl reads the model, QLoRA, and dataset arguments, as well as trainer config
 
 Below is a task configuration that does fine-tuning.
 
-<div editor-title="examples/single-node-training/axolotl/.dstack.yml">
+<div editor-title="train.dstack.yml">
 
 ```yaml
 type: task
@@ -63,7 +63,7 @@ resources:
 The task uses Axolotl's Docker image, where Axolotl is already pre-installed.
 
 !!! info "AMD"
-    The example above uses NVIDIA accelerators. To use it with AMD, check out [AMD](https://dstack.ai/examples/accelerators/amd#axolotl).
+    The example above uses NVIDIA accelerators. To use it with AMD, check out [AMD](../accelerators/amd.md#axolotl).
 
 ## Run the configuration
 
@@ -77,7 +77,7 @@ $ HF_TOKEN=...
 $ WANDB_API_KEY=...
 $ WANDB_PROJECT=...
 $ HUB_MODEL_ID=...
-$ dstack apply -f examples/single-node-training/axolotl/.dstack.yml
+$ dstack apply -f train.dstack.yml
 
  #  BACKEND              RESOURCES                     INSTANCE TYPE  PRICE
  1  vastai (cz-czechia)  cpu=64 mem=128GB H100:80GB:2  18794506       $3.8907
@@ -94,7 +94,7 @@ Provisioning...
 
 ## What's next?
 
-1. Browse the [Axolotl distributed training](https://dstack.ai/docs/examples/distributed-training/axolotl) example
-2. Check [dev environments](https://dstack.ai/docs/dev-environments), [tasks](https://dstack.ai/docs/tasks),
-   [services](https://dstack.ai/docs/services), [fleets](https://dstack.ai/docs/concepts/fleets)
-3. See the [AMD](https://dstack.ai/examples/accelerators/amd#axolotl) example
+1. Browse the [Axolotl distributed training](../distributed-training/axolotl.md) example
+2. Check [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md),
+   [services](../../docs/concepts/services.md), [fleets](../../docs/concepts/fleets.md)
+3. See the [AMD](../accelerators/amd.md#axolotl) example
diff --git a/docs/examples/single-node-training/axolotl/index.md b/docs/examples/single-node-training/axolotl/index.md
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/examples/single-node-training/trl/README.md b/docs/examples/single-node-training/trl.md
similarity index 83%
rename from examples/single-node-training/trl/README.md
rename to docs/examples/single-node-training/trl.md
index 82dca87a98..7295055259 100644
--- a/examples/single-node-training/trl/README.md
+++ b/docs/examples/single-node-training/trl.md
@@ -11,7 +11,7 @@ This example walks you through how to use [TRL](https://github.com/huggingface/t
 
 Below is a task configuration that does fine-tuning.
 
-<div editor-title="examples/single-node-training/trl/train.dstack.yml"> 
+<div editor-title="train.dstack.yml"> 
 
 ```yaml
 type: task
@@ -74,7 +74,7 @@ resources:
 Change the `resources` property to specify more GPUs.
 
 !!! info "AMD"
-    The example above uses NVIDIA accelerators. To use it with AMD, check out [AMD](https://dstack.ai/examples/accelerators/amd#trl).
+    The example above uses NVIDIA accelerators. To use it with AMD, check out [AMD](../accelerators/amd.md#trl).
 
 ??? info "DeepSpeed"
     For more memory-efficient use of multiple GPUs, consider using DeepSpeed and ZeRO Stage 3.
@@ -93,7 +93,7 @@ cloud resources and run the configuration.
 $ HF_TOKEN=...
 $ WANDB_API_KEY=...
 $ HUB_MODEL_ID=...
-$ dstack apply -f examples/single-node-training/trl/train.dstack.yml
+$ dstack apply -f train.dstack.yml
 
  #  BACKEND              RESOURCES                     INSTANCE TYPE  PRICE     
  1  vastai (cz-czechia)  cpu=64 mem=128GB H100:80GB:2  18794506       $3.8907   
@@ -110,7 +110,7 @@ Provisioning...
 
 ## What's next?
 
-1. Browse the [TRL distributed training](https://dstack.ai/docs/examples/distributed-training/trl) example
-2. Check [dev environments](https://dstack.ai/docs/dev-environments), [tasks](https://dstack.ai/docs/tasks), 
-   [services](https://dstack.ai/docs/services), and [fleets](https://dstack.ai/docs/fleets)
-3. See the [AMD](https://dstack.ai/examples/accelerators/amd#trl) example 
+1. Browse the [TRL distributed training](../distributed-training/trl.md) example
+2. Check [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), 
+   [services](../../docs/concepts/services.md), and [fleets](../../docs/concepts/fleets.md)
+3. See the [AMD](../accelerators/amd.md#trl) example 
diff --git a/docs/examples/single-node-training/trl/index.md b/docs/examples/single-node-training/trl/index.md
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/examples/accelerators/tenstorrent/.dstack.yml b/examples/accelerators/tenstorrent/.dstack.yml
deleted file mode 100644
index 6e3319a001..0000000000
--- a/examples/accelerators/tenstorrent/.dstack.yml
+++ /dev/null
@@ -1,9 +0,0 @@
-type: dev-environment
-name: cursor
-
-image: dstackai/tt-smi:latest
-
-ide: cursor
-
-resources:
-  gpu: n150:1
diff --git a/examples/accelerators/tenstorrent/tt-inference-server.dstack.yml b/examples/accelerators/tenstorrent/tt-inference-server.dstack.yml
deleted file mode 100644
index 6f1815ead1..0000000000
--- a/examples/accelerators/tenstorrent/tt-inference-server.dstack.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-type: service
-name: tt-inference-server
-
-env:
-  - HF_TOKEN
-  - HF_MODEL_REPO_ID=meta-llama/Llama-3.2-1B-Instruct
-image: ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-20.04-amd64:0.0.4-v0.56.0-rc47-e2e0002ac7dc
-commands:
-  - | 
-    . ${PYTHON_ENV_DIR}/bin/activate
-    pip install "huggingface_hub[cli]"
-    export LLAMA_DIR="/data/models--$(echo "$HF_MODEL_REPO_ID" | sed 's/\//--/g')/"
-    huggingface-cli download $HF_MODEL_REPO_ID --local-dir $LLAMA_DIR
-    python /home/container_app_user/app/src/run_vllm_api_server.py
-port: 7000
-
-model: meta-llama/Llama-3.2-1B-Instruct
-
-# Cache downloaded model
-volumes:
-  - /mnt/data/tt-inference-server/data:/data
-
-resources:
-  gpu: n150:1
diff --git a/examples/accelerators/tenstorrent/tt-smi.dstack.yml b/examples/accelerators/tenstorrent/tt-smi.dstack.yml
deleted file mode 100644
index b9478cb166..0000000000
--- a/examples/accelerators/tenstorrent/tt-smi.dstack.yml
+++ /dev/null
@@ -1,10 +0,0 @@
-type: task
-name: tt-smi
-
-image: dstackai/tt-smi:latest
-
-commands:
-  - tt-smi -s
-
-resources:
-  gpu: n150:1
diff --git a/examples/clusters/aws/fleet.dstack.yml b/examples/clusters/aws/fleet.dstack.yml
deleted file mode 100644
index 9914c3df1f..0000000000
--- a/examples/clusters/aws/fleet.dstack.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-type: fleet
-name: my-efa-fleet
-
-nodes: 2
-placement: cluster
-
-resources:
-  gpu: H100:8
diff --git a/examples/clusters/gcp/a3-fleet.dstack.yml b/examples/clusters/gcp/a3-fleet.dstack.yml
deleted file mode 100644
index 483877068d..0000000000
--- a/examples/clusters/gcp/a3-fleet.dstack.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-type: fleet
-name: a3mega-cluster
-nodes: 2
-placement: cluster
-instance_types:
-  - a3-megagpu-8g
-spot_policy: auto
diff --git a/examples/clusters/gcp/a3high-fleet.dstack.yml b/examples/clusters/gcp/a3high-fleet.dstack.yml
deleted file mode 100644
index e9f0a9dbc2..0000000000
--- a/examples/clusters/gcp/a3high-fleet.dstack.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-type: fleet
-name: a3high-cluster
-nodes: 2
-placement: cluster
-instance_types:
-  - a3-highgpu-8g
-spot_policy: auto
diff --git a/examples/clusters/gcp/a3high-nccl-tests.dstack.yml b/examples/clusters/gcp/a3high-nccl-tests.dstack.yml
deleted file mode 100644
index 6cacbdf54b..0000000000
--- a/examples/clusters/gcp/a3high-nccl-tests.dstack.yml
+++ /dev/null
@@ -1,37 +0,0 @@
-type: task
-name: nccl-tests
-nodes: 2
-image: us-docker.pkg.dev/gce-ai-infra/gpudirect-tcpx/nccl-plugin-gpudirecttcpx
-commands:
-  - |
-    export NCCL_DEBUG=INFO
-    export LD_LIBRARY_PATH=/usr/local/tcpx/lib64:$LD_LIBRARY_PATH
-    # We use FIFO for inter-node communication
-    FIFO=/tmp/dstack_job
-    if [ ${DSTACK_NODE_RANK} -eq 0 ]; then
-      mkdir -p /scripts/hostfiles2
-      : > /scripts/hostfiles2/hostfile8
-      for ip in ${DSTACK_NODES_IPS}; do
-        echo "${ip} slots=${DSTACK_GPUS_PER_NODE}" >> /scripts/hostfiles2/hostfile8
-      done
-      MPIRUN='mpirun --allow-run-as-root --hostfile /scripts/hostfiles2/hostfile8'
-      # Wait for other nodes
-      while true; do
-        if ${MPIRUN} -n ${DSTACK_NODES_NUM} -N 1 true >/dev/null 2>&1; then
-          break
-        fi
-        echo 'Waiting for nodes...'
-        sleep 5
-      done
-      # Run NCCL Tests
-      NCCL_GPUDIRECTTCPX_FORCE_ACK=0 /scripts/run-allgather.sh 8 eth1,eth2,eth3,eth4 8M 8GB 2
-      # Notify nodes the job is done
-      ${MPIRUN} -n ${DSTACK_NODES_NUM} -N 1 sh -c "echo done > ${FIFO}"
-    else
-      mkfifo ${FIFO}
-      # Wait for a message from the first node
-      cat ${FIFO}
-    fi
-spot_policy: auto
-resources:
-  shm_size: 16GB
diff --git a/examples/clusters/gcp/a3mega-nccl-tests.dstack.yml b/examples/clusters/gcp/a3mega-nccl-tests.dstack.yml
deleted file mode 100644
index 8c7e49d3f9..0000000000
--- a/examples/clusters/gcp/a3mega-nccl-tests.dstack.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-type: task
-name: nccl-tests
-nodes: 2
-image: nvcr.io/nvidia/pytorch:24.04-py3
-entrypoint: "bash -c" # Need to use bash instead of default dash for nccl-env-profile.sh
-commands:
-  - |
-    # Setup TCPXO NCCL env variables
-    NCCL_LIB_DIR="/var/lib/tcpxo/lib64"
-    source ${NCCL_LIB_DIR}/nccl-env-profile-ll128.sh
-    export NCCL_FASTRAK_CTRL_DEV=enp0s12
-    export NCCL_FASTRAK_IFNAME=enp6s0,enp7s0,enp13s0,enp14s0,enp134s0,enp135s0,enp141s0,enp142s0
-    export NCCL_SOCKET_IFNAME=enp0s12
-    export NCCL_FASTRAK_LLCM_DEVICE_DIRECTORY="/dev/aperture_devices"
-    export LD_LIBRARY_PATH="${NCCL_LIB_DIR}:${LD_LIBRARY_PATH}"
-    # Build NCCL Tests
-    git clone https://github.com/NVIDIA/nccl-tests.git
-    cd nccl-tests
-    MPI=1 CC=mpicc CXX=mpicxx make -j
-    cd build
-    # We use FIFO for inter-node communication
-    FIFO=/tmp/dstack_job
-    if [ ${DSTACK_NODE_RANK} -eq 0 ]; then
-      sleep 10
-      echo "${DSTACK_NODES_IPS}" > hostfile
-      MPIRUN='mpirun --allow-run-as-root --hostfile hostfile'
-      # Wait for other nodes
-      while true; do
-        if ${MPIRUN} -n ${DSTACK_NODES_NUM} -N 1 true >/dev/null 2>&1; then
-          break
-        fi
-        echo 'Waiting for nodes...'
-        sleep 5
-      done
-      # Run NCCL Tests
-      ${MPIRUN} \
-        -n ${DSTACK_GPUS_NUM} -N ${DSTACK_GPUS_PER_NODE} \
-        --mca btl tcp,self --mca btl_tcp_if_exclude lo,docker0 \
-        $(env | awk -F= '{print "-x", $1}' | xargs) \
-        ./all_gather_perf -b 8M -e 8G -f 2 -g 1 -w 5 --iters 200 -c 0;
-      # Notify nodes the job is done
-      ${MPIRUN} -n ${DSTACK_NODES_NUM} -N 1 sh -c "echo done > ${FIFO}"
-    else
-      mkfifo ${FIFO}
-      # Wait for a message from the first node
-      cat ${FIFO}
-    fi
-spot_policy: auto
-resources:
-  shm_size: 16GB
diff --git a/examples/clusters/gcp/a4-fleet.dstack.yml b/examples/clusters/gcp/a4-fleet.dstack.yml
deleted file mode 100644
index ac97e22def..0000000000
--- a/examples/clusters/gcp/a4-fleet.dstack.yml
+++ /dev/null
@@ -1,13 +0,0 @@
-type: fleet
-name: a4-cluster
-
-nodes: 2
-placement: cluster
-
-# Specify the zone where you have configured the RoCE VPC
-availability_zones: [us-west2-c]
-backends: [gcp]
-spot_policy: auto
-
-resources:
-  gpu: B200:8
diff --git a/examples/clusters/nccl-rccl-tests/nccl-tests.dstack.yml b/examples/clusters/nccl-rccl-tests/nccl-tests.dstack.yml
deleted file mode 100644
index 4232e60a9e..0000000000
--- a/examples/clusters/nccl-rccl-tests/nccl-tests.dstack.yml
+++ /dev/null
@@ -1,29 +0,0 @@
-type: task
-name: nccl-tests
-
-nodes: 2
-startup_order: workers-first
-stop_criteria: master-done
-
-env:
-  - NCCL_DEBUG=INFO
-commands:
-  - |
-    if [ $DSTACK_NODE_RANK -eq 0 ]; then
-      mpirun \
-        --allow-run-as-root \
-        --hostfile $DSTACK_MPI_HOSTFILE \
-        -n $DSTACK_GPUS_NUM \
-        -N $DSTACK_GPUS_PER_NODE \
-        --bind-to none \
-        /opt/nccl-tests/build/all_reduce_perf -b 8 -e 8G -f 2 -g 1
-    else
-      sleep infinity
-    fi
-
-# Uncomment if the `kubernetes` backend requires it for `/dev/infiniband` access
-#privileged: true
-
-resources:
-  gpu: nvidia:1..8
-  shm_size: 16GB
diff --git a/examples/clusters/nccl-rccl-tests/rccl-tests.dstack.yml b/examples/clusters/nccl-rccl-tests/rccl-tests.dstack.yml
deleted file mode 100644
index 5beb1cd3ee..0000000000
--- a/examples/clusters/nccl-rccl-tests/rccl-tests.dstack.yml
+++ /dev/null
@@ -1,44 +0,0 @@
-type: task
-name: rccl-tests
-
-nodes: 2
-startup_order: workers-first
-stop_criteria: master-done
-
-# Mount the system libraries folder from the host
-volumes:
-  - /usr/local/lib:/mnt/lib
-
-image: rocm/dev-ubuntu-22.04:6.4-complete
-env:
-  - NCCL_DEBUG=INFO
-  - OPEN_MPI_HOME=/usr/lib/x86_64-linux-gnu/openmpi
-commands:
-  # Setup MPI and build RCCL tests
-  - apt-get install -y git libopenmpi-dev openmpi-bin
-  - git clone https://github.com/ROCm/rccl-tests.git
-  - cd rccl-tests
-  - make MPI=1 MPI_HOME=$OPEN_MPI_HOME
-
-  # Preload the RoCE driver library from the host (for Broadcom driver compatibility)
-  - export LD_PRELOAD=/mnt/lib/libbnxt_re-rdmav34.so
-
-  # Run RCCL tests via MPI
-  - |
-    if [ $DSTACK_NODE_RANK -eq 0 ]; then
-      mpirun --allow-run-as-root \
-        --hostfile $DSTACK_MPI_HOSTFILE \
-        -n $DSTACK_GPUS_NUM \
-        -N $DSTACK_GPUS_PER_NODE \
-        --mca btl_tcp_if_include ens41np0 \
-        -x LD_PRELOAD \
-        -x NCCL_IB_HCA=mlx5_0/1,bnxt_re0,bnxt_re1,bnxt_re2,bnxt_re3,bnxt_re4,bnxt_re5,bnxt_re6,bnxt_re7 \
-        -x NCCL_IB_GID_INDEX=3 \
-        -x NCCL_IB_DISABLE=0 \
-        ./build/all_reduce_perf -b 8M -e 8G -f 2 -g 1 -w 5 --iters 20 -c 0;
-    else
-      sleep infinity
-    fi
-
-resources:
-  gpu: MI300X:8
diff --git a/examples/distributed-training/axolotl/.dstack.yml b/examples/distributed-training/axolotl/.dstack.yml
deleted file mode 100644
index 6192c689d0..0000000000
--- a/examples/distributed-training/axolotl/.dstack.yml
+++ /dev/null
@@ -1,49 +0,0 @@
-type: task
-name: axolotl-multi-node-qlora-llama3-70b
-
-# Size of the cluster
-nodes: 2
-
-# The axolotlai/axolotl:main-latest image does not include InfiniBand or RDMA libraries, so we need to use the NGC container.
-image: nvcr.io/nvidia/pytorch:25.01-py3
-# Required environment variables
-env:
-  - HF_TOKEN
-  - WANDB_API_KEY
-  - WANDB_PROJECT
-  - HUB_MODEL_ID
-  - NCCL_DEBUG=INFO
-  - CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-  - ACCELERATE_LOG_LEVEL=info
-# Commands of the task
-commands:
-  # Replacing the default Torch and FlashAttention in the NCG container with Axolotl-compatible versions.
-  # The preinstalled versions are incompatible with Axolotl.
-  - pip uninstall -y torch flash-attn
-  - pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/test/cu124
-  - pip install --no-build-isolation axolotl[flash-attn,deepspeed]
-  - wget https://raw.githubusercontent.com/huggingface/trl/main/examples/accelerate_configs/fsdp1.yaml
-  - wget https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/examples/llama-3/qlora-fsdp-70b.yaml
-  # Axolotl includes hf-xet version 1.1.0, which fails during downloads. Replacing it with the latest version (1.1.2).
-  - pip uninstall -y hf-xet
-  - pip install hf-xet --no-cache-dir
-  - |
-    accelerate launch \
-      --config_file=fsdp1.yaml \
-      -m axolotl.cli.train qlora-fsdp-70b.yaml \
-      --hub-model-id $HUB_MODEL_ID \
-      --output-dir /checkpoints/qlora-llama3-70b \
-      --wandb-project $DSTACK_RUN_NAME \
-      --wandb-name $WANDB_NAME \
-      --main_process_ip=$DSTACK_MASTER_NODE_IP \
-      --main_process_port=8008 \
-      --machine_rank=$DSTACK_NODE_RANK \
-      --num_processes=$DSTACK_GPUS_NUM \
-      --num_machines=$DSTACK_NODES_NUM
-  
-resources:
-  gpu: 80GB:8
-  shm_size: 128GB
-
-volumes:
-  - /checkpoints:/checkpoints
diff --git a/examples/distributed-training/axolotl/fleet.dstack.yml b/examples/distributed-training/axolotl/fleet.dstack.yml
deleted file mode 100644
index a522642091..0000000000
--- a/examples/distributed-training/axolotl/fleet.dstack.yml
+++ /dev/null
@@ -1,9 +0,0 @@
-type: fleet
-name: axolotl-fleet
-
-nodes: 2
-placement: cluster
-
-resources:
-  gpu: 80GB:8
-  shm_size: 128GB
diff --git a/examples/distributed-training/ray-ragen/.dstack.yml b/examples/distributed-training/ray-ragen/.dstack.yml
deleted file mode 100644
index 8dabde9e04..0000000000
--- a/examples/distributed-training/ray-ragen/.dstack.yml
+++ /dev/null
@@ -1,39 +0,0 @@
-type: task
-name: ray-ragen-cluster
-
-nodes: 2
-
-env:
-- WANDB_API_KEY
-image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6-mcore0.12.0-te2.2
-commands:
-  - wget -O miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
-  - bash miniconda.sh -b -p /workflow/miniconda
-  - eval "$(/workflow/miniconda/bin/conda shell.bash hook)"
-  - git clone https://github.com/RAGEN-AI/RAGEN.git
-  - cd RAGEN
-  - bash scripts/setup_ragen.sh
-  - conda activate ragen
-  - cd verl
-  - pip install --no-deps -e .
-  - pip install hf_transfer hf_xet
-  - pip uninstall -y ray
-  - pip install -U "ray[default]"
-  - |
-    if [ $DSTACK_NODE_RANK = 0 ]; then 
-        ray start --head --port=6379;
-    else
-        ray start --address=$DSTACK_MASTER_NODE_IP:6379
-    fi
-
-# Expose Ray dashboard port
-ports:
-  - 8265
-
-resources:
-  gpu: 80GB:8
-  shm_size: 128GB
-
-# Save checkpoints on the instance
-volumes:
-  - /checkpoints:/checkpoints
diff --git a/examples/distributed-training/ray-ragen/fleet.dstack.yml b/examples/distributed-training/ray-ragen/fleet.dstack.yml
deleted file mode 100644
index 04cd389254..0000000000
--- a/examples/distributed-training/ray-ragen/fleet.dstack.yml
+++ /dev/null
@@ -1,9 +0,0 @@
-type: fleet
-name: ray-ragen-cluster-fleet
-
-nodes: 2
-placement: cluster
-
-resources:
-  gpu: 80GB:8
-  shm_size: 128GB
diff --git a/examples/distributed-training/trl/deepspeed.dstack.yml b/examples/distributed-training/trl/deepspeed.dstack.yml
deleted file mode 100644
index 972351f6ac..0000000000
--- a/examples/distributed-training/trl/deepspeed.dstack.yml
+++ /dev/null
@@ -1,52 +0,0 @@
-type: task
-name: trl-train-deepspeed-distrib
-
-# Size of the cluster
-nodes: 2
-
-image: nvcr.io/nvidia/pytorch:25.01-py3
-
-# Required environment variables
-env:
-  - HF_TOKEN
-  - WANDB_API_KEY
-  - HUB_MODEL_ID
-  - MODEL_ID=meta-llama/Llama-3.1-8B
-  - ACCELERATE_LOG_LEVEL=info
-# Commands of the task
-commands:
-  - pip install transformers bitsandbytes peft wandb deepspeed
-  - git clone https://github.com/huggingface/trl
-  - cd trl
-  - pip install .
-  - |
-    accelerate launch \
-      --config_file=examples/accelerate_configs/deepspeed_zero3.yaml \
-      --main_process_ip=$DSTACK_MASTER_NODE_IP \
-      --main_process_port=8008 \
-      --machine_rank=$DSTACK_NODE_RANK \
-      --num_processes=$DSTACK_GPUS_NUM \
-      --num_machines=$DSTACK_NODES_NUM \
-      trl/scripts/sft.py \
-      --model_name $MODEL_ID \
-      --dataset_name OpenAssistant/oasst_top1_2023-08-25 \
-      --dataset_text_field="text" \
-      --per_device_train_batch_size 1 \
-      --per_device_eval_batch_size 1 \
-      --gradient_accumulation_steps 4 \
-      --learning_rate 2e-4 \
-      --report_to wandb \
-      --bf16 \
-      --max_seq_length 1024 \
-      --attn_implementation flash_attention_2 \
-      --logging_steps=10 \
-      --output_dir /checkpoints/llama31-ft \
-      --hub_model_id $HUB_MODEL_ID \
-      --torch_dtype bfloat16
-
-resources:
-  gpu: 80GB:8
-  shm_size: 128GB
-
-volumes:
-  - /checkpoints:/checkpoints
diff --git a/examples/distributed-training/trl/fleet.dstack.yml b/examples/distributed-training/trl/fleet.dstack.yml
deleted file mode 100644
index 1275794e8c..0000000000
--- a/examples/distributed-training/trl/fleet.dstack.yml
+++ /dev/null
@@ -1,9 +0,0 @@
-type: fleet
-name: trl-train-fleet
-
-nodes: 2
-placement: cluster
-
-resources:
-  gpu: 80GB:8
-  shm_size: 128GB
diff --git a/examples/distributed-training/trl/fsdp.dstack.yml b/examples/distributed-training/trl/fsdp.dstack.yml
deleted file mode 100644
index b00104033b..0000000000
--- a/examples/distributed-training/trl/fsdp.dstack.yml
+++ /dev/null
@@ -1,52 +0,0 @@
-type: task
-name: trl-train-fsdp-distrib
-
-# Size of the cluster
-nodes: 2
-
-image: nvcr.io/nvidia/pytorch:25.01-py3
-
-# Required environment variables
-env:
-  - HF_TOKEN
-  - WANDB_API_KEY
-  - HUB_MODEL_ID
-  - MODEL_ID=meta-llama/Llama-3.1-8B
-  - ACCELERATE_LOG_LEVEL=info
-# Commands of the task
-commands:
-  - pip install transformers bitsandbytes peft wandb
-  - git clone https://github.com/huggingface/trl
-  - cd trl
-  - pip install .
-  - |
-    accelerate launch \
-      --config_file=examples/accelerate_configs/fsdp1.yaml \
-      --main_process_ip=$DSTACK_MASTER_NODE_IP \
-      --main_process_port=8008 \
-      --machine_rank=$DSTACK_NODE_RANK \
-      --num_processes=$DSTACK_GPUS_NUM \
-      --num_machines=$DSTACK_NODES_NUM \
-      trl/scripts/sft.py \
-      --model_name $MODEL_ID \
-      --dataset_name OpenAssistant/oasst_top1_2023-08-25 \
-      --dataset_text_field="text" \
-      --per_device_train_batch_size 1 \
-      --per_device_eval_batch_size 1 \
-      --gradient_accumulation_steps 4 \
-      --learning_rate 2e-4 \
-      --report_to wandb \
-      --bf16 \
-      --max_seq_length 1024 \
-      --attn_implementation flash_attention_2 \
-      --logging_steps=10 \
-      --output_dir /checkpoints/llama31-ft \
-      --hub_model_id $HUB_MODEL_ID \
-      --torch_dtype bfloat16
-
-resources:
-  gpu: 80GB:8
-  shm_size: 128GB
-
-volumes:
-  - /checkpoints:/checkpoints
diff --git a/examples/single-node-training/axolotl/.dstack.yml b/examples/single-node-training/axolotl/.dstack.yml
deleted file mode 100644
index dd28618904..0000000000
--- a/examples/single-node-training/axolotl/.dstack.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-type: task
-# The name is optional, if not specified, generated randomly
-name: axolotl-nvidia-llama-scout-train
-
-# Using the official Axolotl's Docker image
-image: axolotlai/axolotl:main-latest
-
-# Required environment variables
-env:
-  - HF_TOKEN
-  - WANDB_API_KEY
-  - WANDB_PROJECT
-  - HUB_MODEL_ID
-# Commands of the task
-commands:
-  - wget https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/examples/llama-4/scout-qlora-flexattn-fsdp2.yaml
-  - | 
-    axolotl train scout-qlora-flexattn-fsdp2.yaml \
-      --wandb-project $WANDB_PROJECT \
-      --wandb-name $DSTACK_RUN_NAME \
-      --hub-model-id $HUB_MODEL_ID
-
-resources:
-  # Four GPU (required by FSDP)
-  gpu: H100:4
-  # Shared memory size for inter-process communication
-  shm_size: 64GB
-  disk: 500GB..
diff --git a/examples/single-node-training/trl/train.dstack.yml b/examples/single-node-training/trl/train.dstack.yml
deleted file mode 100644
index 9b24ae6131..0000000000
--- a/examples/single-node-training/trl/train.dstack.yml
+++ /dev/null
@@ -1,54 +0,0 @@
-type: task
-# The name is optional, if not specified, generated randomly
-name: trl-train
-
-python: 3.12
-
-# Required environment variables
-env:
-  - HF_TOKEN
-  - WANDB_API_KEY
-  - HUB_MODEL_ID
-  - ACCELERATE_LOG_LEVEL=info
-# Commands of the task
-commands:
-  # Pin torch==2.6.0 to avoid building Flash Attention from source.
-  # Prebuilt Flash Attention wheels are not available for the latest torch==2.7.0.
-  - uv pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0
-  - uv pip install transformers bitsandbytes peft wandb
-  - uv pip install flash_attn --no-build-isolation
-  - git clone https://github.com/huggingface/trl
-  - cd trl
-  - uv pip install .
-  - |
-    accelerate launch \
-      --config_file=examples/accelerate_configs/multi_gpu.yaml \
-      --num_processes $DSTACK_GPUS_PER_NODE \
-      trl/scripts/sft.py \
-      --model_name meta-llama/Meta-Llama-3.1-8B \
-      --dataset_name OpenAssistant/oasst_top1_2023-08-25 \
-      --dataset_text_field="text" \
-      --per_device_train_batch_size 1 \
-      --per_device_eval_batch_size 1 \
-      --gradient_accumulation_steps 4 \
-      --learning_rate 2e-4 \
-      --report_to wandb \
-      --bf16 \
-      --max_seq_length 1024 \
-      --lora_r 16 \
-      --lora_alpha 32 \
-      --lora_target_modules q_proj k_proj v_proj o_proj \
-      --load_in_4bit \
-      --use_peft \
-      --attn_implementation "flash_attention_2" \
-      --logging_steps=10 \
-      --output_dir models/llama31 \
-      --hub_model_id peterschmidt85/FineLlama-3.1-8B
-resources:
-  gpu:
-    # 24GB or more VRAM
-    memory: 24GB..
-    # One or more GPU
-    count: 1..
-  # Shared memory (for multi-gpu)
-  shm_size: 24GB
diff --git a/mkdocs.yml b/mkdocs.yml
index 1b75f0ebe5..6c82dd15a5 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -98,41 +98,41 @@ plugins:
         "docs/tasks.md": "docs/concepts/tasks.md"
         "docs/services.md": "docs/concepts/services.md"
         "docs/fleets.md": "docs/concepts/fleets.md"
-        "docs/examples/llms/llama31.md": "examples/inference/vllm/index.md"
-        "docs/examples/llms/llama32.md": "examples/inference/vllm/index.md"
-        "docs/examples/llms/qwen36.md": "examples/models/qwen36/index.md"
-        "examples/llms/llama31/index.md": "examples/inference/vllm/index.md"
-        "examples/llms/llama32/index.md": "examples/inference/vllm/index.md"
-        "examples/llms/qwen36/index.md": "examples/models/qwen36/index.md"
-        "docs/examples/accelerators/amd/index.md": "examples/accelerators/amd/index.md"
-        "docs/examples/deployment/nim/index.md": "examples/inference/nim/index.md"
-        "docs/examples/deployment/vllm/index.md": "examples/inference/vllm/index.md"
+        "docs/examples/llms/llama31.md": "examples/inference/vllm.md"
+        "docs/examples/llms/llama32.md": "examples/inference/vllm.md"
+        "docs/examples/llms/qwen36.md": "examples/models/qwen36.md"
+        "examples/llms/llama31/index.md": "examples/inference/vllm.md"
+        "examples/llms/llama32/index.md": "examples/inference/vllm.md"
+        "examples/llms/qwen36/index.md": "examples/models/qwen36.md"
+        "docs/examples/accelerators/amd/index.md": "examples/accelerators/amd.md"
+        "docs/examples/deployment/nim/index.md": "examples/inference/nim.md"
+        "docs/examples/deployment/vllm/index.md": "examples/inference/vllm.md"
         "backends.md": "docs/concepts/backends.md"
         "blog/monitoring-gpu-usage.md": "blog/posts/dstack-metrics.md"
         "blog/inactive-dev-environments-auto-shutdown.md": "blog/posts/inactivity-duration.md"
         "blog/data-centers-and-private-clouds.md": "blog/posts/gpu-blocks-and-proxy-jump.md"
-        "blog/distributed-training-with-aws-efa.md": "examples/clusters/aws/index.md"
+        "blog/distributed-training-with-aws-efa.md": "examples/clusters/aws.md"
         "blog/dstack-stats.md": "blog/posts/dstack-metrics.md"
         "docs/guides/metrics.md": "docs/concepts/metrics.md"
         "docs/guides/monitoring.md": "docs/concepts/metrics.md"
         "blog/nvidia-and-amd-on-vultr.md.md": "blog/posts/nvidia-and-amd-on-vultr.md"
-        "examples/misc/nccl-tests/index.md": "examples/clusters/nccl-rccl-tests/index.md"
-        "examples/misc/a3high-clusters/index.md": "examples/clusters/gcp/index.md"
-        "examples/misc/a3mega-clusters/index.md": "examples/clusters/gcp/index.md"
-        "examples/distributed-training/nccl-tests/index.md": "examples/clusters/nccl-rccl-tests/index.md"
-        "examples/distributed-training/rccl-tests/index.md": "examples/clusters/nccl-rccl-tests/index.md"
-        "examples/deployment/nim/index.md": "examples/inference/nim/index.md"
-        "examples/deployment/vllm/index.md": "examples/inference/vllm/index.md"
-        "examples/deployment/sglang/index.md": "examples/inference/sglang/index.md"
-        "examples/deployment/trtllm/index.md": "examples/inference/trtllm/index.md"
-        "examples/fine-tuning/trl/index.md": "examples/single-node-training/trl/index.md"
-        "examples/fine-tuning/axolotl/index.md": "examples/single-node-training/axolotl/index.md"
-        "blog/efa.md": "examples/clusters/aws/index.md"
+        "examples/misc/nccl-tests/index.md": "examples/clusters/nccl-rccl-tests.md"
+        "examples/misc/a3high-clusters/index.md": "examples/clusters/gcp.md"
+        "examples/misc/a3mega-clusters/index.md": "examples/clusters/gcp.md"
+        "examples/distributed-training/nccl-tests/index.md": "examples/clusters/nccl-rccl-tests.md"
+        "examples/distributed-training/rccl-tests/index.md": "examples/clusters/nccl-rccl-tests.md"
+        "examples/deployment/nim/index.md": "examples/inference/nim.md"
+        "examples/deployment/vllm/index.md": "examples/inference/vllm.md"
+        "examples/deployment/sglang/index.md": "examples/inference/sglang.md"
+        "examples/deployment/trtllm/index.md": "examples/inference/trtllm.md"
+        "examples/fine-tuning/trl/index.md": "examples/single-node-training/trl.md"
+        "examples/fine-tuning/axolotl/index.md": "examples/single-node-training/axolotl.md"
+        "blog/efa.md": "examples/clusters/aws.md"
         "docs/concepts/repos.md": "docs/concepts/dev-environments.md#repos"
-        "examples/clusters/a3high/index.md": "examples/clusters/gcp/index.md"
-        "examples/clusters/a3mega/index.md": "examples/clusters/gcp/index.md"
-        "examples/clusters/a4/index.md": "examples/clusters/gcp/index.md"
-        "examples/clusters/efa/index.md": "examples/clusters/aws/index.md"
+        "examples/clusters/a3high/index.md": "examples/clusters/gcp.md"
+        "examples/clusters/a3mega/index.md": "examples/clusters/gcp.md"
+        "examples/clusters/a4/index.md": "examples/clusters/gcp.md"
+        "examples/clusters/efa/index.md": "examples/clusters/aws.md"
         "docs/guides/migration.md": "docs/guides/upgrade.md"
         "docs/reference/api/rest/index.md": "docs/reference/api/http/index.md"
   - typeset
@@ -242,10 +242,11 @@ nav:
       - Guides:
           - Server deployment: docs/guides/server-deployment.md
           - Troubleshooting: docs/guides/troubleshooting.md
-          - Protips: docs/guides/protips.md
-          - Upgrade: docs/guides/upgrade.md
-          - Migration: 
-            - Slurm: docs/guides/migration/slurm.md
+          - More:
+            - Protips: docs/guides/protips.md
+            - Upgrade: docs/guides/upgrade.md
+            - Migration: 
+              - Slurm: docs/guides/migration/slurm.md
       - Reference:
           - .dstack.yml:
               - dev-environment: docs/reference/dstack.yml/dev-environment.md
@@ -288,31 +289,31 @@ nav:
   - Examples:
       - examples.md
       - Single-node training:
-          - TRL: examples/single-node-training/trl/index.md
-          - Axolotl: examples/single-node-training/axolotl/index.md
+          - TRL: examples/single-node-training/trl.md
+          - Axolotl: examples/single-node-training/axolotl.md
       - Distributed training:
-          - TRL: examples/distributed-training/trl/index.md
-          - Axolotl: examples/distributed-training/axolotl/index.md
-          - Ray+RAGEN: examples/distributed-training/ray-ragen/index.md
+          - TRL: examples/distributed-training/trl.md
+          - Axolotl: examples/distributed-training/axolotl.md
+          - Ray+RAGEN: examples/distributed-training/ray-ragen.md
       - Clusters:
-          - AWS: examples/clusters/aws/index.md
-          - GCP: examples/clusters/gcp/index.md
-          - Lambda: examples/clusters/lambda/index.md
-          - Crusoe: examples/clusters/crusoe/index.md
-          - Nebius: examples/clusters/nebius/index.md
-          - NCCL/RCCL tests: examples/clusters/nccl-rccl-tests/index.md
+          - AWS: examples/clusters/aws.md
+          - GCP: examples/clusters/gcp.md
+          - Lambda: examples/clusters/lambda.md
+          - Crusoe: examples/clusters/crusoe.md
+          - Nebius: examples/clusters/nebius.md
+          - NCCL/RCCL tests: examples/clusters/nccl-rccl-tests.md
       - Inference:
-          - SGLang: examples/inference/sglang/index.md
-          - vLLM: examples/inference/vllm/index.md
-          - NIM: examples/inference/nim/index.md
-          - TensorRT-LLM: examples/inference/trtllm/index.md
+          - SGLang: examples/inference/sglang.md
+          - vLLM: examples/inference/vllm.md
+          - NIM: examples/inference/nim.md
+          - TensorRT-LLM: examples/inference/trtllm.md
       - Models:
-          - DeepSeek V4: examples/models/deepseek-v4/index.md
-          - Qwen 3.6: examples/models/qwen36/index.md
+          - DeepSeek V4: examples/models/deepseek-v4.md
+          - Qwen 3.6: examples/models/qwen36.md
       - Accelerators:
-          - AMD: examples/accelerators/amd/index.md
-          - TPU: examples/accelerators/tpu/index.md
-          - Tenstorrent: examples/accelerators/tenstorrent/index.md
+          - AMD: examples/accelerators/amd.md
+          - TPU: examples/accelerators/tpu.md
+          - Tenstorrent: examples/accelerators/tenstorrent.md
   - Blog:
       - blog/index.md
   - Case studies: blog/case-studies.md
diff --git a/scripts/docs/gen_examples.py b/scripts/docs/gen_examples.py
deleted file mode 100644
index 364ac7dfea..0000000000
--- a/scripts/docs/gen_examples.py
+++ /dev/null
@@ -1,31 +0,0 @@
-"""
-Copies examples/**/README.md files as docs/examples/**/index.md
-"""
-
-import logging
-import os
-from fnmatch import fnmatch
-from pathlib import Path
-
-import mkdocs_gen_files
-from mkdocs.structure.files import File
-
-FILE_PATTERN = "examples/**/index.md"
-logger = logging.getLogger("mkdocs.plugins.dstack.examples")
-
-disable_env = "DSTACK_DOCS_DISABLE_EXAMPLES"
-if os.environ.get(disable_env):
-    logger.warning(f"Examples generation is disabled: {disable_env} is set")
-    exit()
-
-logger.info("Generating examples documentation...")
-
-file: File
-for file in mkdocs_gen_files.files:
-    if not fnmatch(file.src_uri, FILE_PATTERN):
-        continue
-    p = (Path(file.src_dir).parent / file.src_uri).parent / "README.md"
-    with open(p, "r") as f:
-        text = f.read()
-    with mkdocs_gen_files.open(file.src_uri, "w") as f:
-        f.write(text)
diff --git a/scripts/docs/gen_llms_files.py b/scripts/docs/gen_llms_files.py
index a7eee100ce..3e1b9ec6d0 100644
--- a/scripts/docs/gen_llms_files.py
+++ b/scripts/docs/gen_llms_files.py
@@ -31,18 +31,11 @@ def read_frontmatter(file_path: Path) -> Dict[str, Any]:
     return {}
 
 
-def get_page_info(page_path: str, docs_dir: Path, repo_root: Path) -> Optional[Dict[str, str]]:
+def get_page_info(page_path: str, docs_dir: Path) -> Optional[Dict[str, str]]:
     """Get title and description for a page from its frontmatter."""
     # page_path is relative to docs_dir
     full_path = docs_dir / page_path
 
-    # For examples/**/index.md, read from README.md at repo root (same logic as hooks.py)
-    if page_path.startswith("examples/") and page_path.endswith("index.md"):
-        example_dir = Path(page_path).parent
-        readme_path = repo_root / example_dir / "README.md"
-        if readme_path.exists():
-            full_path = readme_path
-
     if not full_path.exists():
         return None
 
@@ -67,7 +60,6 @@ def parse_mkdocs_nav(mkdocs_config: Dict[str, Any], repo_root: str) -> List[Dict
 
     # Get docs_dir from config
     docs_dir = Path(repo_root) / mkdocs_config.get("docs_dir", "docs")
-    repo_root_path = Path(repo_root)
 
     def extract_pages(content_list):
         """Recursively extract all pages from a section's content, including nested subsections."""
@@ -75,7 +67,7 @@ def extract_pages(content_list):
         for item in content_list:
             if isinstance(item, str):
                 # Plain string path like "examples.md"
-                page_info = get_page_info(item, docs_dir, repo_root_path)
+                page_info = get_page_info(item, docs_dir)
                 if page_info:
                     items.append(
                         {
@@ -89,7 +81,7 @@ def extract_pages(content_list):
                 for title, path in item.items():
                     if isinstance(path, str):
                         # Page with title
-                        page_info = get_page_info(path, docs_dir, repo_root_path)
+                        page_info = get_page_info(path, docs_dir)
                         if page_info:
                             items.append(
                                 {
diff --git a/scripts/docs/hooks.py b/scripts/docs/hooks.py
index ce5b3740bf..7e202c2587 100644
--- a/scripts/docs/hooks.py
+++ b/scripts/docs/hooks.py
@@ -14,7 +14,6 @@
 
 WELL_KNOWN_SKILLS_DIR = ".well-known/skills"
 SKILL_PATH = ("skills", "dstack", "SKILL.md")
-DISABLE_EXAMPLES_ENV = "DSTACK_DOCS_DISABLE_EXAMPLES"
 DISABLE_LLM_TXT_ENV = "DSTACK_DOCS_DISABLE_LLM_TXT"
 DISABLE_YAML_SCHEMAS_ENV = "DSTACK_DOCS_DISABLE_YAML_SCHEMAS"
 SCHEMA_REFERENCE_PREFIX = "docs/reference/"
@@ -64,28 +63,9 @@ def _get_schema_expanded_content(rel_path, config, src_path=None):
     return _expand_schema_references(text)
 
 
-def _get_materialized_content(rel_path, config):
-    """Return README content for examples/**/index.md stubs, else None."""
-    if os.environ.get(DISABLE_EXAMPLES_ENV):
-        return None
-
-    if rel_path.startswith("examples/") and rel_path.endswith("index.md"):
-        repo_root = os.path.dirname(config["config_file_path"])
-        example_dir = os.path.dirname(rel_path)
-        readme_path = os.path.join(repo_root, example_dir, "README.md")
-
-        if os.path.isfile(readme_path):
-            with open(readme_path, "r", encoding="utf-8") as f:
-                return f.read()
-    return None
-
-
 def on_page_read_source(page, config):
-    """Use README content for example stubs and expanded schema for reference docs when rendering HTML."""
+    """Use expanded schema content for reference docs when rendering HTML."""
     rel_path = page.file.src_uri
-    content = _get_materialized_content(rel_path, config)
-    if content is not None:
-        return content
     content = _get_schema_expanded_content(rel_path, config)
     if content is not None:
         return content
@@ -93,8 +73,6 @@ def on_page_read_source(page, config):
 
 
 def on_config(config):
-    if os.environ.get(DISABLE_EXAMPLES_ENV):
-        log.warning("Examples documentation is disabled")
     if os.environ.get(DISABLE_YAML_SCHEMAS_ENV):
         log.warning("YAML schema reference generation is disabled")
     if os.environ.get(DISABLE_LLM_TXT_ENV):
@@ -102,24 +80,6 @@ def on_config(config):
     return config
 
 
-def on_page_context(context, page, config, nav):
-    """Override edit_url only for example stubs so Edit points to the README; other pages use theme default from edit_uri."""
-    repo_url = (config.get("repo_url") or "").rstrip("/")
-    edit_uri = (config.get("edit_uri") or "edit/master/docs/").strip("/")
-    if not repo_url:
-        return context
-    # edit_uri is e.g. "edit/master/docs" -> branch is second segment
-    edit_parts = edit_uri.split("/")
-    branch = edit_parts[1] if len(edit_parts) >= 2 else "master"
-
-    rel_path = page.file.src_uri
-    if rel_path.startswith("examples/") and rel_path.endswith("index.md"):
-        example_dir = os.path.dirname(rel_path)
-        page.edit_url = f"{repo_url}/edit/{branch}/{example_dir}/README.md"
-
-    return context
-
-
 def on_post_build(config):
     """Copy .md files to site (raw) and write .well-known/skills index."""
     site_dir = config["site_dir"]
@@ -143,27 +103,17 @@ def on_post_build(config):
 
             src_path = os.path.join(root, file)
             rel_path = os.path.relpath(src_path, docs_dir).replace(os.sep, "/")
-            content = _get_materialized_content(rel_path, config)
-
-            if content:
-                clean_name = os.path.dirname(rel_path) + ".md"
-                dest_path = os.path.join(site_dir, clean_name)
-                os.makedirs(os.path.dirname(dest_path), exist_ok=True)
+            content = _get_schema_expanded_content(rel_path, config, src_path=src_path)
+            dest_path = os.path.join(site_dir, rel_path)
+            os.makedirs(os.path.dirname(dest_path), exist_ok=True)
+            if content is not None:
+                # Write expanded schema content
+                log.info(f"Expanding schema references in {rel_path}")
                 with open(dest_path, "w", encoding="utf-8") as f:
                     f.write(content)
             else:
-                # Check if this is a schema reference file that needs expansion
-                content = _get_schema_expanded_content(rel_path, config, src_path=src_path)
-                dest_path = os.path.join(site_dir, rel_path)
-                os.makedirs(os.path.dirname(dest_path), exist_ok=True)
-                if content is not None:
-                    # Write expanded schema content
-                    log.info(f"Expanding schema references in {rel_path}")
-                    with open(dest_path, "w", encoding="utf-8") as f:
-                        f.write(content)
-                else:
-                    # Just copy the file as-is
-                    shutil.copy2(src_path, dest_path)
+                # Just copy the file as-is
+                shutil.copy2(src_path, dest_path)
 
     _write_well_known_skills(config, site_dir)
     _generate_llms_files(config, site_dir)

From 8531602fb1edcc5b8e2e3a1dd0b3a5aedd3d2b69 Mon Sep 17 00:00:00 2001
From: Andrey Cheptsov <andrey.cheptsov@github.com>
Date: Wed, 6 May 2026 22:33:29 +0200
Subject: [PATCH 2/7] Move examples under /docs/, merge single and distributed
 training

- Move docs/examples/ to docs/docs/examples/ so URLs become /docs/examples/...
  instead of /examples/.... The old /examples/<cat>/<name>/ URLs continue to
  work via redirects, including the recently-published /docs/examples/
  {single-node-training,distributed-training}/ paths.
- Merge "Single-node training" and "Distributed training" example sections
  into a single "Training" section. TRL and Axolotl pages now contain both
  variants under top-level "Single-node training" and "Distributed training"
  H2 sections; Ray+RAGEN moves over unchanged.
- Convert remaining absolute https://dstack.ai/(docs|examples)/... links to
  relative .md links throughout the moved example pages and the concept docs
  that point into them. Drop dead /docs/guides/{clusters,kubernetes} links
  (target pages were removed earlier) and replace with anchor links to the
  Kubernetes backend / cluster placement sections where appropriate.
- Inline two GCP NCCL test yamls (a3mega-nccl-tests, a3high-nccl-tests) that
  were previously referenced via dead "Source code" admonitions.
---
 docs/assets/stylesheets/extra.css             |   8 +-
 ...d-kubernetes-2024-recap-and-whats-ahead.md |   2 +-
 docs/blog/posts/changelog-07-25.md            |   2 +-
 docs/blog/posts/ea-gtc25.md                   |   2 +-
 docs/blog/posts/gpu-health-checks.md          |   4 +-
 docs/blog/posts/intel-gaudi.md                |   2 +-
 docs/blog/posts/kubernetes-beta.md            |   4 +-
 docs/blog/posts/mpi.md                        |   2 +-
 docs/blog/posts/nebius-in-dstack-sky.md       |   4 +-
 docs/blog/posts/pd-disaggregation.md          |   2 +-
 docs/blog/posts/toffee.md                     |   2 +-
 docs/docs/concepts/backends.md                |   2 +-
 docs/docs/concepts/fleets.md                  |  15 +-
 docs/docs/concepts/gateways.md                |   4 +-
 docs/docs/concepts/services.md                |   4 +-
 docs/docs/concepts/tasks.md                   |   8 +-
 docs/{ => docs}/examples.md                   |  73 ++---
 docs/{ => docs}/examples/accelerators/amd.md  |  10 +-
 .../examples/accelerators/intel/index.md      |   0
 .../examples/accelerators/tenstorrent.md      |  10 +-
 docs/{ => docs}/examples/accelerators/tpu.md  |  10 +-
 docs/{ => docs}/examples/clusters/aws.md      |   6 +-
 docs/{ => docs}/examples/clusters/crusoe.md   |  14 +-
 docs/{ => docs}/examples/clusters/gcp.md      |  10 +-
 docs/{ => docs}/examples/clusters/lambda.md   |  16 +-
 .../examples/clusters/nccl-rccl-tests.md      |  10 +-
 docs/{ => docs}/examples/clusters/nebius.md   |  12 +-
 docs/{ => docs}/examples/inference/nim.md     |   8 +-
 docs/{ => docs}/examples/inference/sglang.md  |   6 +-
 docs/{ => docs}/examples/inference/trtllm.md  |   6 +-
 docs/{ => docs}/examples/inference/vllm.md    |   6 +-
 .../examples/llms/deepseek/index.md           |   0
 docs/{ => docs}/examples/llms/llama/index.md  |   0
 .../examples/misc/docker-compose/index.md     |   0
 .../{ => docs}/examples/models/deepseek-v4.md |   4 +-
 docs/{ => docs}/examples/models/qwen36.md     |   4 +-
 .../{ => docs}/examples/models/wan22/index.md |   0
 docs/docs/examples/training/axolotl.md        | 185 ++++++++++++
 .../examples/training}/ray-ragen.md           |   4 +-
 docs/docs/examples/training/trl.md            | 272 ++++++++++++++++++
 docs/docs/guides/migration/slurm.md           |   2 +-
 docs/docs/quickstart.md                       |   2 +-
 docs/examples/distributed-training/axolotl.md | 100 -------
 docs/examples/distributed-training/trl.md     | 160 -----------
 docs/examples/single-node-training/axolotl.md | 100 -------
 docs/examples/single-node-training/trl.md     | 116 --------
 docs/overrides/main.html                      |   9 +-
 mkdocs.yml                                    | 132 +++++----
 skills/dstack/SKILL.md                        |   6 +-
 49 files changed, 670 insertions(+), 690 deletions(-)
 rename docs/{ => docs}/examples.md (67%)
 rename docs/{ => docs}/examples/accelerators/amd.md (95%)
 rename docs/{ => docs}/examples/accelerators/intel/index.md (100%)
 rename docs/{ => docs}/examples/accelerators/tenstorrent.md (94%)
 rename docs/{ => docs}/examples/accelerators/tpu.md (95%)
 rename docs/{ => docs}/examples/clusters/aws.md (92%)
 rename docs/{ => docs}/examples/clusters/crusoe.md (90%)
 rename docs/{ => docs}/examples/clusters/gcp.md (94%)
 rename docs/{ => docs}/examples/clusters/lambda.md (89%)
 rename docs/{ => docs}/examples/clusters/nccl-rccl-tests.md (89%)
 rename docs/{ => docs}/examples/clusters/nebius.md (92%)
 rename docs/{ => docs}/examples/inference/nim.md (84%)
 rename docs/{ => docs}/examples/inference/sglang.md (93%)
 rename docs/{ => docs}/examples/inference/trtllm.md (87%)
 rename docs/{ => docs}/examples/inference/vllm.md (88%)
 rename docs/{ => docs}/examples/llms/deepseek/index.md (100%)
 rename docs/{ => docs}/examples/llms/llama/index.md (100%)
 rename docs/{ => docs}/examples/misc/docker-compose/index.md (100%)
 rename docs/{ => docs}/examples/models/deepseek-v4.md (97%)
 rename docs/{ => docs}/examples/models/qwen36.md (97%)
 rename docs/{ => docs}/examples/models/wan22/index.md (100%)
 create mode 100644 docs/docs/examples/training/axolotl.md
 rename docs/{examples/distributed-training => docs/examples/training}/ray-ragen.md (93%)
 create mode 100644 docs/docs/examples/training/trl.md
 delete mode 100644 docs/examples/distributed-training/axolotl.md
 delete mode 100644 docs/examples/distributed-training/trl.md
 delete mode 100644 docs/examples/single-node-training/axolotl.md
 delete mode 100644 docs/examples/single-node-training/trl.md

diff --git a/docs/assets/stylesheets/extra.css b/docs/assets/stylesheets/extra.css
index cb2d68e55d..e344baf9d0 100644
--- a/docs/assets/stylesheets/extra.css
+++ b/docs/assets/stylesheets/extra.css
@@ -1283,19 +1283,19 @@ html .md-footer-meta.md-typeset a:is(:focus,:hover) {
 
 
 
-    .md-tabs__item:nth-child(7) {
+    .md-tabs__item:nth-child(6) {
         margin-left: auto;
         padding-right: 0.5rem;
     }
 
-    .md-tabs__item:nth-child(n+7) .md-tabs__link {
+    .md-tabs__item:nth-child(n+6) .md-tabs__link {
         visibility: hidden;
         width: 35px;
         display: inline-block;
         margin-top: 12px;
     }
 
-    .md-tabs__item:nth-child(n+7) .md-tabs__link:before {
+    .md-tabs__item:nth-child(n+6) .md-tabs__link:before {
         width: 38px;
         height: 38px;
         margin-top: 4px;
@@ -1318,7 +1318,7 @@ html .md-footer-meta.md-typeset a:is(:focus,:hover) {
         margin-right: -7px;
     } */
 
-    .md-tabs__item:nth-child(7) .md-tabs__link:before {
+    .md-tabs__item:nth-child(6) .md-tabs__link:before {
         position: relative;
         content: '';
         width: 34px;
diff --git a/docs/blog/posts/beyond-kubernetes-2024-recap-and-whats-ahead.md b/docs/blog/posts/beyond-kubernetes-2024-recap-and-whats-ahead.md
index 8980c984f1..fb43d7f3ea 100644
--- a/docs/blog/posts/beyond-kubernetes-2024-recap-and-whats-ahead.md
+++ b/docs/blog/posts/beyond-kubernetes-2024-recap-and-whats-ahead.md
@@ -104,7 +104,7 @@ efficient manner.
 
 ### NVIDIA
 
-NVIDIA remains the top accelerator supported by `dstack`. Recently, we introduced a [NIM example](../../examples/inference/nim.md) 
+NVIDIA remains the top accelerator supported by `dstack`. Recently, we introduced a [NIM example](../../docs/examples/inference/nim.md) 
 for model deployment, and we continue to enhance support for the rest of NVIDIA's ecosystem.
 
 ### AMD
diff --git a/docs/blog/posts/changelog-07-25.md b/docs/blog/posts/changelog-07-25.md
index 50c8ff032a..e231ac6a37 100644
--- a/docs/blog/posts/changelog-07-25.md
+++ b/docs/blog/posts/changelog-07-25.md
@@ -144,7 +144,7 @@ resources:
 
 #### AWS EFA
 
-EFA is a network interface for EC2 that enables low-latency, high-bandwidth communication between nodes—crucial for scaling distributed deep learning. With `dstack`, EFA is automatically enabled when using supported instance types in fleets. Check out our [example](../../examples/clusters/aws.md)
+EFA is a network interface for EC2 that enables low-latency, high-bandwidth communication between nodes—crucial for scaling distributed deep learning. With `dstack`, EFA is automatically enabled when using supported instance types in fleets. Check out our [example](../../docs/examples/clusters/aws.md)
 
 #### Default Docker images
 
diff --git a/docs/blog/posts/ea-gtc25.md b/docs/blog/posts/ea-gtc25.md
index 4a287a21ea..499c5402cb 100644
--- a/docs/blog/posts/ea-gtc25.md
+++ b/docs/blog/posts/ea-gtc25.md
@@ -85,4 +85,4 @@ By adopting tools that are cloud-agnostic and developer-friendly, EA has reduced
 !!! info "What's next?"
     1. Check [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), [services](../../docs/concepts/services.md), and [fleets](../../docs/concepts/fleets.md)
     2. Follow [Quickstart](../../docs/quickstart.md)
-    3. Browse [Examples](../../examples.md)
+    3. Browse [Examples](../../docs/examples.md)
diff --git a/docs/blog/posts/gpu-health-checks.md b/docs/blog/posts/gpu-health-checks.md
index 1fe89e1d1d..84746ed90f 100644
--- a/docs/blog/posts/gpu-health-checks.md
+++ b/docs/blog/posts/gpu-health-checks.md
@@ -51,7 +51,7 @@ A healthy instance is ready for workloads. A warning means you should monitor it
 
 This release focuses on passive checks using DCGM background health checks. These run continuously and do not interrupt workloads.
 
-For active checks today, you can run [NCCL/RCCL tests](../../examples/clusters/nccl-rccl-tests.md) as a [distributed task](../../docs/concepts/tasks.md#distributed-tasks) to verify GPU-to-GPU communication and bandwidth across a fleet. Active tests like these can reveal network or interconnect issues that passive monitoring might miss. More built-in support for active diagnostics is planned.
+For active checks today, you can run [NCCL/RCCL tests](../../docs/examples/clusters/nccl-rccl-tests.md) as a [distributed task](../../docs/concepts/tasks.md#distributed-tasks) to verify GPU-to-GPU communication and bandwidth across a fleet. Active tests like these can reveal network or interconnect issues that passive monitoring might miss. More built-in support for active diagnostics is planned.
 
 ## Supported backends
 
@@ -68,6 +68,6 @@ If you have experience with GPU reliability or ideas for automated recovery, joi
 
 !!! info "What's next?"
     1. Check [Quickstart](../../docs/quickstart.md)
-    2. Explore the [clusters](../../examples.md#clusters) examples
+    2. Explore the [fleets](../../docs/concepts/fleets.md#cluster-placement) guide
     3. Learn more about [metrics](../../docs/concepts/metrics.md)
     4. Join [Discord](https://discord.gg/u8SmfwPpMd)
diff --git a/docs/blog/posts/intel-gaudi.md b/docs/blog/posts/intel-gaudi.md
index 4ac0e67708..37b8c383b4 100644
--- a/docs/blog/posts/intel-gaudi.md
+++ b/docs/blog/posts/intel-gaudi.md
@@ -158,7 +158,7 @@ $ dstack apply -f examples/single-node-training/trl/intel/.dstack.yml -R
 
 `dstack` will automatically create containers according to the run configuration and execute them across the fleet.
 
-> Explore our [examples](../../examples/accelerators/intel/index.md) to learn how to train and deploy large models on
+> Explore our [examples](../../docs/examples/accelerators/intel/index.md) to learn how to train and deploy large models on
 > Intel Gaudi AI Accelerator.
 
 !!! info "Intel Tiber AI Cloud"
diff --git a/docs/blog/posts/kubernetes-beta.md b/docs/blog/posts/kubernetes-beta.md
index a00a429af3..64fb6117c5 100644
--- a/docs/blog/posts/kubernetes-beta.md
+++ b/docs/blog/posts/kubernetes-beta.md
@@ -284,7 +284,7 @@ Submit the run nccl-tests? [y/n]: y
 
 </div>
 
-For more examples, explore the [distirbuted training](../../examples.md#distributed-training) section in the docs.
+For more examples, explore the [training](../../docs/examples.md#training) section in the docs.
 
 ## FAQ
 
@@ -311,5 +311,5 @@ Support for AMD GPUs is coming soon — our team is actively working on it right
     2. Explore [dev environments](../../docs/concepts/dev-environments.md), 
         [tasks](../../docs/concepts/tasks.md), [services](../../docs/concepts/services.md), 
         and [fleets](../../docs/concepts/fleets.md)
-    3. Browse the [clusters](../../examples.md#clusters) examples
+    3. Browse the [fleets](../../docs/concepts/fleets.md#cluster-placement) guide
     4. Join [Discord](https://discord.gg/u8SmfwPpMd)
diff --git a/docs/blog/posts/mpi.md b/docs/blog/posts/mpi.md
index 37cd0dc7bf..02152aad3b 100644
--- a/docs/blog/posts/mpi.md
+++ b/docs/blog/posts/mpi.md
@@ -100,5 +100,5 @@ as well as use MPI for other tasks.
 
 !!! info "What's next?"
     1. Learn more about [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), [services](../../docs/concepts/services.md), and [fleets](../../docs/concepts/fleets.md)
-    2. Check the [NCCL/RCCL tests](../../examples/clusters/nccl-rccl-tests.md) example
+    2. Check the [NCCL/RCCL tests](../../docs/examples/clusters/nccl-rccl-tests.md) example
     3. Join [Discord](https://discord.gg/u8SmfwPpMd)
diff --git a/docs/blog/posts/nebius-in-dstack-sky.md b/docs/blog/posts/nebius-in-dstack-sky.md
index 823576f377..1f911f98d3 100644
--- a/docs/blog/posts/nebius-in-dstack-sky.md
+++ b/docs/blog/posts/nebius-in-dstack-sky.md
@@ -104,7 +104,7 @@ $ dstack apply -f my-cluster.dstack.yml
 Once the fleet is ready, you can run [distributed tasks](../../docs/concepts/tasks.md#distributed-tasks). 
 `dstack` automatically configures drivers, networking, and fast GPU-to-GPU interconnect.
 
-To learn more, see the [clusters](../../examples/clusters/nebius.md) guide.
+To learn more, see the [clusters](../../docs/examples/clusters/nebius.md) guide.
 
 With Nebius joining `dstack` Sky, users can now run on-demand and spot GPUs and clusters directly through the marketplace—gaining access to the same production grade infrastrucure Nebius customers use for frontier-scale training, without needing a separate Nebius account. 
 
@@ -124,4 +124,4 @@ Our goal is to give teams maximum flexibility while removing the complexity of m
     4. Explore [dev environments](../../docs/concepts/dev-environments.md), 
         [tasks](../../docs/concepts/tasks.md), [services](../../docs/concepts/services.md), 
         and [fleets](../../docs/concepts/fleets.md)
-    5. Read the [clusters](../../examples/clusters/nebius.md) guide
+    5. Read the [clusters](../../docs/examples/clusters/nebius.md) guide
diff --git a/docs/blog/posts/pd-disaggregation.md b/docs/blog/posts/pd-disaggregation.md
index e9f0bc0a7c..dd3f27c9e8 100644
--- a/docs/blog/posts/pd-disaggregation.md
+++ b/docs/blog/posts/pd-disaggregation.md
@@ -27,7 +27,7 @@ For inference, `dstack` provides a [services](../../docs/concepts/services.md) a
 > If you’re new to Prefill–Decode disaggregation, see the official [SGLang docs](https://docs.sglang.io/advanced_features/pd_disaggregation.html).
 
 !!! note "Deprecation notice"
-    Configuring the SGLang router in a gateway is deprecated and will be disallowed in a future release. To run router and workers as separate replica groups, see [SGLang PD disaggregation (router as replica group)](https://dstack.ai/examples/inference/sglang/#pd-disaggregation).
+    Configuring the SGLang router in a gateway is deprecated and will be disallowed in a future release. To run router and workers as separate replica groups, see [SGLang PD disaggregation (router as replica group)](../../docs/examples/inference/sglang.md#pd-disaggregation).
 
 ## Services
 
diff --git a/docs/blog/posts/toffee.md b/docs/blog/posts/toffee.md
index 190ecf8c27..512218c1bb 100644
--- a/docs/blog/posts/toffee.md
+++ b/docs/blog/posts/toffee.md
@@ -85,4 +85,4 @@ As Toffee’s user base and model footprint grew, investing further in home-grow
 !!! info "What's next?"
     1. Check [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), [services](../../docs/concepts/services.md), and [fleets](../../docs/concepts/fleets.md)
     2. Follow [Quickstart](../../docs/quickstart.md)
-    3. Browse [Examples](../../examples.md)
+    3. Browse [Examples](../../docs/examples.md)
diff --git a/docs/docs/concepts/backends.md b/docs/docs/concepts/backends.md
index 5bded6ba03..2f6186c6be 100644
--- a/docs/docs/concepts/backends.md
+++ b/docs/docs/concepts/backends.md
@@ -1188,7 +1188,7 @@ projects:
 
     This applies to offers shown in `dstack apply` (run plans), during provisioning, and in `dstack offer`. Unlike other backends, offers for the `kubernetes` backend always reflect the lower limit of the range.
 
-> To learn more, see the [Lambda](../../examples/clusters/lambda/#kubernetes) and [Crusoe](../../examples/clusters/crusoe/#kubernetes) examples.
+> To learn more, see the [Lambda](../examples/clusters/lambda/#kubernetes) and [Crusoe](../examples/clusters/crusoe/#kubernetes) examples.
 
 ### Runpod
 
diff --git a/docs/docs/concepts/fleets.md b/docs/docs/concepts/fleets.md
index 685392bd80..22057c0709 100644
--- a/docs/docs/concepts/fleets.md
+++ b/docs/docs/concepts/fleets.md
@@ -164,24 +164,22 @@ This property ensures that instances are interconnected. This is required for ru
 
     === "AWS"
         On AWS, `dstack` requires `public_ips` to be set to `false` in the backend configuration.
-        Refer to the [AWS](../../examples/clusters/aws.md) example for more details.
+        Refer to the [AWS](../examples/clusters/aws.md) example for more details.
 
     === "GCP"
         On GCP, you may need to configure `extra_vpcs` and `roce_vpcs` in the `gcp` backend configuration.
-        Refer to the [GCP](../../examples/clusters/gcp.md) examples for more details.
+        Refer to the [GCP](../examples/clusters/gcp.md) examples for more details.
 
     === "Nebius"
         On [Nebius](https://docs.nebius.com/compute/clusters/gpu), `dstack` automatically configures InfiniBand networking if it is supported by the selected instance type.
 
     === "Crusoe"
         On [Crusoe](https://docs.crusoecloud.com/networking/infiniband/managing-infiniband-networks), `dstack` automatically configures InfiniBand networking if it is supported by the selected instance type.
-        Refer to the [Crusoe](../../examples/clusters/crusoe.md#vms) example for more details.
+        Refer to the [Crusoe](../examples/clusters/crusoe.md#vms) example for more details.
 
     === "Kubernetes"
         If the Kubernetes cluster has interconnect configured, `dstack` can use it without additional setup.
-        See the [Lambda](../../examples/clusters/lambda.md#kubernetes) or [Crusoe](../../examples/clusters/crusoe.md#kubernetes) examples.
-    
-    > See the [Clusters](../../examples.md#clusters) examples.
+        See the [Lambda](../examples/clusters/lambda.md#kubernetes) or [Crusoe](../examples/clusters/crusoe.md#kubernetes) examples.
 
     <!-- TODO: Add Runpod tab to cluster backends once cluster support is fully tested -->
 
@@ -211,6 +209,9 @@ This property ensures that instances are interconnected. This is required for ru
 
         <!-- TODO: Add network configuration example -->
 
+!!! info "Examples"
+    See the cluster examples for [AWS](../examples/clusters/aws.md), [GCP](../examples/clusters/gcp.md), [Lambda](../examples/clusters/lambda.md), [Crusoe](../examples/clusters/crusoe.md), [Nebius](../examples/clusters/nebius.md), and [NCCL/RCCL tests](../examples/clusters/nccl-rccl-tests.md).
+
 ### Nodes
 
 The `nodes` property is supported only by backend fleets and specifies how many nodes `dstack` must or can provision.
@@ -537,4 +538,4 @@ Use `--group-by gpu,backend` to aggregate offers.
     2. Read about [Backends](backends.md) guide
     3. Learn how to [export fleets](exports.md) to other projects
     4. Explore the [`.dstack.yml` reference](../reference/dstack.yml/fleet.md)
-    5. See the [Clusters](../../examples.md#clusters) example
+    5. See the cluster examples for [AWS](../examples/clusters/aws.md), [GCP](../examples/clusters/gcp.md), [Lambda](../examples/clusters/lambda.md), [Crusoe](../examples/clusters/crusoe.md), [Nebius](../examples/clusters/nebius.md), and [NCCL/RCCL tests](../examples/clusters/nccl-rccl-tests.md)
diff --git a/docs/docs/concepts/gateways.md b/docs/docs/concepts/gateways.md
index 53374aa53d..5e072a6966 100644
--- a/docs/docs/concepts/gateways.md
+++ b/docs/docs/concepts/gateways.md
@@ -97,10 +97,10 @@ router:
 
 </div>
 
-If you configure the `sglang` router, [services](../concepts/services.md) can run either [standard SGLang workers](../../examples/inference/sglang.md) or [Prefill-Decode workers](../../examples/inference/sglang.md#pd-disaggregation) (aka PD disaggregation).
+If you configure the `sglang` router, [services](../concepts/services.md) can run either [standard SGLang workers](../examples/inference/sglang.md) or [Prefill-Decode workers](../examples/inference/sglang.md#pd-disaggregation) (aka PD disaggregation).
 
 !!! note "PD disaggregation"
-    To run services with PD disaggregation see [SGLang PD disaggregation](https://dstack.ai/examples/inference/sglang/#pd-disaggregation).
+    To run services with PD disaggregation see [SGLang PD disaggregation](../examples/inference/sglang.md#pd-disaggregation).
 
 !!! note "Deprecation"
     Configuring the SGLang router in a gateway is deprecated and will be disallowed in a future release.
diff --git a/docs/docs/concepts/services.md b/docs/docs/concepts/services.md
index 1923aa0655..a0e0de7936 100644
--- a/docs/docs/concepts/services.md
+++ b/docs/docs/concepts/services.md
@@ -1288,5 +1288,5 @@ The rolling deployment stops when all replicas are updated or when a new deploym
     1. Read about [dev environments](dev-environments.md) and [tasks](tasks.md)
     2. Learn how to manage [fleets](fleets.md)
     3. See how to set up [gateways](gateways.md)
-    4. Check the [vLLM](../../examples/inference/vllm.md) and
-       [NIM](../../examples/inference/nim.md) examples
+    4. Check the [vLLM](../examples/inference/vllm.md) and
+       [NIM](../examples/inference/nim.md) examples
diff --git a/docs/docs/concepts/tasks.md b/docs/docs/concepts/tasks.md
index dd4a83c62c..43eb8e80cb 100644
--- a/docs/docs/concepts/tasks.md
+++ b/docs/docs/concepts/tasks.md
@@ -150,8 +150,10 @@ Jobs on each node communicate using their private IP addresses. Use `DSTACK_MAST
 
 `dstack` is easy to use with `accelerate`, `torchrun`, Ray, Spark, and any other distributed frameworks.
     
-> For detailed examples, see the [distributed training](../../examples.md#distributed-training) 
-  and [clusters](../../examples.md#clusters) examples.
+!!! info "Examples"
+    See the training examples for [TRL](../examples/training/trl.md#distributed-training), [Axolotl](../examples/training/axolotl.md#distributed-training), and [Ray+RAGEN](../examples/training/ray-ragen.md).
+
+    See the cluster examples for [AWS](../examples/clusters/aws.md), [GCP](../examples/clusters/gcp.md), [Lambda](../examples/clusters/lambda.md), [Crusoe](../examples/clusters/crusoe.md), [Nebius](../examples/clusters/nebius.md), and [NCCL/RCCL tests](../examples/clusters/nccl-rccl-tests.md).
 
 ??? info "Network interface"
     Distributed frameworks usually detect the correct network interface automatically,
@@ -877,4 +879,4 @@ via the [`spot_policy`](../reference/dstack.yml/task.md#spot_policy) property. I
 !!! info "What's next?"
     1. Read about [dev environments](dev-environments.md) and [services](services.md)
     2. Learn how to manage [fleets](fleets.md)
-    3. Check the [Axolotl](/examples/single-node-training/axolotl) example
+    3. Check the [Axolotl](../examples/training/axolotl.md) example
diff --git a/docs/examples.md b/docs/docs/examples.md
similarity index 67%
rename from docs/examples.md
rename to docs/docs/examples.md
index 5770425b9d..59203cdf8f 100644
--- a/docs/examples.md
+++ b/docs/docs/examples.md
@@ -14,66 +14,39 @@ hide:
 }
 </style> -->
 
-## Single-node training
+## Training
 
 <div class="tx-landing__highlights_grid">
-    <a href="/examples/single-node-training/trl"
+    <a href="/docs/examples/training/trl"
        class="feature-cell">
         <h3>
             TRL
         </h3>
 
         <p>
-            Fine-tune Llama 3.1 8B on a custom dataset using TRL.
+            Fine-tune Llama 3.1 8B with SFT and QLoRA, single-node or distributed across multiple nodes.
         </p>
     </a>
 
-    <a href="/examples/single-node-training/axolotl"
+    <a href="/docs/examples/training/axolotl"
        class="feature-cell">
         <h3>
             Axolotl
         </h3>
 
         <p>
-            Fine-tune Llama 4 on a custom dataset using Axolotl.
+            Fine-tune Llama models with FSDP and QLoRA, single-node or distributed across multiple nodes.
         </p>
     </a>
-</div>
-
-## Distributed training
 
-<div class="tx-landing__highlights_grid">
-    <a href="/examples/distributed-training/trl"
-       class="feature-cell sky">
-        <h3>
-            TRL
-        </h3>
-
-        <p>
-            Fine-tune LLM on multiple nodes
-            with TRL, Accelerate, and Deepspeed.
-        </p>
-    </a>
-    <a href="/examples/distributed-training/axolotl"
-       class="feature-cell sky">
-        <h3>
-            Axolotl
-        </h3>
-
-        <p>
-            Fine-tune LLM on multiple nodes
-            with Axolotl.
-        </p>
-    </a>
-    <a href="/examples/distributed-training/ray-ragen"
-       class="feature-cell sky">
+    <a href="/docs/examples/training/ray-ragen"
+       class="feature-cell">
         <h3>
             Ray+RAGEN
         </h3>
 
         <p>
-            Fine-tune an agent on multiple nodes
-            with RAGEN, verl, and Ray.
+            Fine-tune an agent on multiple nodes with RAGEN, verl, and Ray.
         </p>
     </a>
 </div>
@@ -82,7 +55,7 @@ hide:
 ## Clusters
 
 <div class="tx-landing__highlights_grid">
-    <a href="/examples/clusters/gcp"
+    <a href="/docs/examples/clusters/gcp"
        class="feature-cell sky">
         <h3>
             GCP
@@ -92,7 +65,7 @@ hide:
             Set up GCP A4 and A3 clusters with optimized networking
         </p>
     </a>
-    <a href="/examples/clusters/efa"
+    <a href="/docs/examples/clusters/efa"
        class="feature-cell sky">
         <h3>
             AWS
@@ -102,7 +75,7 @@ hide:
             Set up AWS EFA clusters with optimized networking
         </p>
     </a>
-    <a href="/examples/clusters/lambda"
+    <a href="/docs/examples/clusters/lambda"
        class="feature-cell sky">
         <h3>
             Lambda
@@ -112,7 +85,7 @@ hide:
             Set up Lambda clusters with optimized networking
         </p>
     </a>
-    <a href="/examples/clusters/crusoe"
+    <a href="/docs/examples/clusters/crusoe"
        class="feature-cell sky">
         <h3>
             Crusoe
@@ -122,7 +95,7 @@ hide:
             Set up Crusoe clusters with optimized networking
         </p>
     </a>
-    <a href="/examples/clusters/nebius"
+    <a href="/docs/examples/clusters/nebius"
        class="feature-cell sky">
         <h3>
             Nebius
@@ -132,7 +105,7 @@ hide:
             Set up Nebius clusters with optimized networking
         </p>
     </a>
-    <a href="/examples/clusters/nccl-rccl-tests"
+    <a href="/docs/examples/clusters/nccl-rccl-tests"
        class="feature-cell sky">
         <h3>
             NCCL/RCCL tests
@@ -147,7 +120,7 @@ hide:
 ## Inference
 
 <div class="tx-landing__highlights_grid">
-    <a href="/examples/inference/sglang" 
+    <a href="/docs/examples/inference/sglang" 
        class="feature-cell">
        <h3>
            SGLang
@@ -156,7 +129,7 @@ hide:
            Deploy Qwen3.6-27B with SGLang
       </p>
     </a>
-    <a href="/examples/inference/vllm" 
+    <a href="/docs/examples/inference/vllm" 
        class="feature-cell">
        <h3>
            vLLM
@@ -165,7 +138,7 @@ hide:
             Deploy Qwen3.6-27B with vLLM
         </p>
     </a>
-    <a href="/examples/inference/nim" 
+    <a href="/docs/examples/inference/nim" 
        class="feature-cell">
        <h3>
            NIM
@@ -174,7 +147,7 @@ hide:
             Deploy a DeepSeek distilled model with NIM
         </p>
     </a>
-    <a href="/examples/inference/trtllm"
+    <a href="/docs/examples/inference/trtllm"
        class="feature-cell">
        <h3>
            TensorRT-LLM
@@ -188,7 +161,7 @@ hide:
 ## Models
 
 <div class="tx-landing__highlights_grid">
-    <a href="/examples/models/deepseek-v4"
+    <a href="/docs/examples/models/deepseek-v4"
        class="feature-cell">
         <h3>
             DeepSeek V4
@@ -199,7 +172,7 @@ hide:
         </p>
     </a>
 
-    <a href="/examples/models/qwen36"
+    <a href="/docs/examples/models/qwen36"
        class="feature-cell">
         <h3>
             Qwen 3.6
@@ -214,7 +187,7 @@ hide:
 ## Accelerators
 
 <div class="tx-landing__highlights_grid">
-    <a href="/examples/accelerators/amd"
+    <a href="/docs/examples/accelerators/amd"
        class="feature-cell sky">
         <h3>
             AMD
@@ -225,7 +198,7 @@ hide:
         </p>
     </a>
 
-    <a href="/examples/accelerators/tpu"
+    <a href="/docs/examples/accelerators/tpu"
        class="feature-cell sky">
         <h3>
             TPU
@@ -236,7 +209,7 @@ hide:
         </p>
     </a>
 
-    <a href="/examples/accelerators/tenstorrent"
+    <a href="/docs/examples/accelerators/tenstorrent"
        class="feature-cell sky">
         <h3>
             Tenstorrent
diff --git a/docs/examples/accelerators/amd.md b/docs/docs/examples/accelerators/amd.md
similarity index 95%
rename from docs/examples/accelerators/amd.md
rename to docs/docs/examples/accelerators/amd.md
index 5c0c306ce8..26f255f280 100644
--- a/docs/examples/accelerators/amd.md
+++ b/docs/docs/examples/accelerators/amd.md
@@ -6,12 +6,12 @@ description: Deploying and fine-tuning models on AMD MI300X GPUs using SGLang, v
 # AMD
 
 `dstack` supports running dev environments, tasks, and services on AMD GPUs.
-You can do that by setting up an [SSH fleet](../../docs/concepts/fleets.md#ssh-fleets)
+You can do that by setting up an [SSH fleet](../../concepts/fleets.md#ssh-fleets)
 with on-prem AMD GPUs or configuring a backend that offers AMD GPUs such as the `runpod` backend.
 
 ## Deployment
 
-Here are examples of a [service](../../docs/concepts/services.md) that deploy
+Here are examples of a [service](../../concepts/services.md) that deploy
 `Qwen/Qwen3.6-27B` on AMD MI300X GPUs using
 [SGLang](https://github.com/sgl-project/sglang) and
 [vLLM](https://docs.vllm.ai/en/latest/).
@@ -238,6 +238,6 @@ $ dstack apply -f <configuration file>
 2. For multi-node training, run
    [NCCL/RCCL tests](../clusters/nccl-rccl-tests.md)
    to validate AMD cluster networking.
-3. Check [dev environments](../../docs/concepts/dev-environments.md),
-   [tasks](../../docs/concepts/tasks.md), and
-   [services](../../docs/concepts/services.md).
+3. Check [dev environments](../../concepts/dev-environments.md),
+   [tasks](../../concepts/tasks.md), and
+   [services](../../concepts/services.md).
diff --git a/docs/examples/accelerators/intel/index.md b/docs/docs/examples/accelerators/intel/index.md
similarity index 100%
rename from docs/examples/accelerators/intel/index.md
rename to docs/docs/examples/accelerators/intel/index.md
diff --git a/docs/examples/accelerators/tenstorrent.md b/docs/docs/examples/accelerators/tenstorrent.md
similarity index 94%
rename from docs/examples/accelerators/tenstorrent.md
rename to docs/docs/examples/accelerators/tenstorrent.md
index 65005fd3a4..8344ced187 100644
--- a/docs/examples/accelerators/tenstorrent.md
+++ b/docs/docs/examples/accelerators/tenstorrent.md
@@ -42,7 +42,7 @@ description: Running dev environments, tasks, and services on Tenstorrent Wormho
 
     </div>
 
-    For more details on fleet configuration, refer to [SSH fleets](../../docs/concepts/fleets.md#ssh-fleets).
+    For more details on fleet configuration, refer to [SSH fleets](../../concepts/fleets.md#ssh-fleets).
 
 ## Services
 
@@ -123,10 +123,10 @@ Additionally, the model is available via `dstack`'s control plane UI:
 
 ![](https://dstack.ai/static-assets/static-assets/images/dstack-tenstorrent-model-ui.png){ width=800 }
 
-When a [gateway](../../docs/concepts/gateways.md) is configured, the service endpoint 
+When a [gateway](../../concepts/gateways.md) is configured, the service endpoint 
 is available at `https://<run name>.<gateway domain>/`.
 
-> Services support many options, including authentication, auto-scaling policies, etc. To learn more, refer to [Services](../../docs/concepts/services.md).
+> Services support many options, including authentication, auto-scaling policies, etc. To learn more, refer to [Services](../../concepts/services.md).
 
 ## Tasks
 
@@ -159,7 +159,7 @@ resources:
 
 </div>
 
-> Tasks support many options, including multi-node configuration, max duration, etc. To learn more, refer to [Tasks](../../docs/concepts/tasks.md).
+> Tasks support many options, including multi-node configuration, max duration, etc. To learn more, refer to [Tasks](../../concepts/tasks.md).
 
 ## Dev environments
 
@@ -191,7 +191,7 @@ If you run it via `dstack apply`, it will output the URL to access it via your d
 
 ![](https://dstack.ai/static-assets/static-assets/images/dstack-tenstorrent-cursor.png){ width=800 }
 
-> Dev nevironments support many options, including inactivity and max duration, IDE configuration, etc. To learn more, refer to [Dev environments](../../docs/concepts/tasks.md).
+> Dev nevironments support many options, including inactivity and max duration, IDE configuration, etc. To learn more, refer to [Dev environments](../../concepts/tasks.md).
 
 ??? info "Feedback"
     Found a bug, or want to request a feature? File it in the [issue tracker](https://github.com/dstackai/dstack/issues),
diff --git a/docs/examples/accelerators/tpu.md b/docs/docs/examples/accelerators/tpu.md
similarity index 95%
rename from docs/examples/accelerators/tpu.md
rename to docs/docs/examples/accelerators/tpu.md
index 92640a4835..8c4d1584bb 100644
--- a/docs/examples/accelerators/tpu.md
+++ b/docs/docs/examples/accelerators/tpu.md
@@ -7,7 +7,7 @@ description: Deploying and fine-tuning models on Google Cloud TPUs using Optimum
 
 If you've configured the `gcp` backend in `dstack`, you can run dev environments, tasks, and services on [TPUs](https://cloud.google.com/tpu/docs/intro-to-tpu).
 Choose a TPU instance by specifying the TPU version and the number of cores (e.g. `v5litepod-8`) in the `gpu` property under `resources`,
-or request TPUs by specifying `tpu` as `vendor` ([see examples](../../docs/guides/protips.md#gpu)).
+or request TPUs by specifying `tpu` as `vendor` ([see examples](../../guides/protips.md#gpu)).
 
 Below are a few examples on using TPUs for deployment and fine-tuning.
 
@@ -18,12 +18,12 @@ Below are a few examples on using TPUs for deployment and fine-tuning.
 
 !!! info "TPU storage"
     By default, each TPU VM contains a 100GB boot disk and its size cannot be changed.
-    If you need more storage, attach additional disks using [Volumes](../../docs/concepts/volumes.md).
+    If you need more storage, attach additional disks using [Volumes](../../concepts/volumes.md).
 
 ## Deployment
 
 Many serving frameworks including vLLM and TGI have TPU support.
-Here's an example of a [service](../../docs/concepts/services.md) that deploys Llama 3.1 8B using
+Here's an example of a [service](../../concepts/services.md) that deploys Llama 3.1 8B using
 [Optimum TPU](https://github.com/huggingface/optimum-tpu)
 and [vLLM](https://github.com/vllm-project/vllm).
 
@@ -189,5 +189,5 @@ Note, `v5litepod` is optimized for fine-tuning transformer-based models. Each co
 1. Browse [Optimum TPU](https://github.com/huggingface/optimum-tpu),
    [Optimum TPU TGI](https://github.com/huggingface/optimum-tpu/tree/main/text-generation-inference) and
    [vLLM](https://docs.vllm.ai/en/latest/getting_started/tpu-installation.html).
-2. Check [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md),
-   [services](../../docs/concepts/services.md), and [fleets](../../docs/concepts/fleets.md).
+2. Check [dev environments](../../concepts/dev-environments.md), [tasks](../../concepts/tasks.md),
+   [services](../../concepts/services.md), and [fleets](../../concepts/fleets.md).
diff --git a/docs/examples/clusters/aws.md b/docs/docs/examples/clusters/aws.md
similarity index 92%
rename from docs/examples/clusters/aws.md
rename to docs/docs/examples/clusters/aws.md
index 688af91e0e..54e1cd667d 100644
--- a/docs/examples/clusters/aws.md
+++ b/docs/docs/examples/clusters/aws.md
@@ -197,6 +197,6 @@ Provisioning...
 Instead of setting `python`, you can specify your own Docker image using `image`. Make sure that the image is properly configured for EFA.
 
 !!! info "What's next"
-    1. Learn more about [distributed tasks](../../docs/concepts/tasks.md#distributed-tasks) and [cluster placement](../../docs/concepts/fleets.md#cluster-placement)
-    2. Check [dev environments](../../docs/concepts/dev-environments.md),
-       [services](../../docs/concepts/services.md), and [fleets](../../docs/concepts/fleets.md)
+    1. Learn more about [distributed tasks](../../concepts/tasks.md#distributed-tasks) and [cluster placement](../../concepts/fleets.md#cluster-placement)
+    2. Check [dev environments](../../concepts/dev-environments.md),
+       [services](../../concepts/services.md), and [fleets](../../concepts/fleets.md)
diff --git a/docs/examples/clusters/crusoe.md b/docs/docs/examples/clusters/crusoe.md
similarity index 90%
rename from docs/examples/clusters/crusoe.md
rename to docs/docs/examples/clusters/crusoe.md
index 2a9c108ec6..28901a8e3c 100644
--- a/docs/examples/clusters/crusoe.md
+++ b/docs/docs/examples/clusters/crusoe.md
@@ -67,7 +67,7 @@ $ dstack apply -f crusoe-fleet.dstack.yml
 
 This will automatically create an IB partition and provision instances with InfiniBand networking.
 
-Once the fleet is created, you can run [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), and [services](../../docs/concepts/services.md).
+Once the fleet is created, you can run [dev environments](../../concepts/dev-environments.md), [tasks](../../concepts/tasks.md), and [services](../../concepts/services.md).
 
 > If you want instances to be provisioned on demand, you can set `nodes` to `0..2`. In this case, `dstack` will create instances only when you run workloads.
 
@@ -84,7 +84,7 @@ Once the fleet is created, you can run [dev environments](../../docs/concepts/de
 
 ### Configure the backend
 
-Follow the standard instructions for setting up a [`kubernetes`](../../docs/concepts/backends.md#kubernetes) backend:
+Follow the standard instructions for setting up a [`kubernetes`](../../concepts/backends.md#kubernetes) backend:
 
 <div editor-title="~/.dstack/server/config.yml">
 
@@ -133,15 +133,15 @@ $ dstack apply -f crusoe-fleet.dstack.yml
 
 </div>
 
-Once the fleet is created, you can run [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), and [services](../../docs/concepts/services.md).
+Once the fleet is created, you can run [dev environments](../../concepts/dev-environments.md), [tasks](../../concepts/tasks.md), and [services](../../concepts/services.md).
 
 ## NCCL tests
 
-Use a [distributed task](../../docs/concepts/tasks.md#distributed-tasks) that runs NCCL tests to validate cluster network bandwidth.
+Use a [distributed task](../../concepts/tasks.md#distributed-tasks) that runs NCCL tests to validate cluster network bandwidth.
 
 === "VMs"
 
-    With the Crusoe backend, HPC-X and NCCL topology files are pre-installed on the host VM image. Mount them into the container via [instance volumes](../../docs/concepts/volumes.md#instance-volumes).
+    With the Crusoe backend, HPC-X and NCCL topology files are pre-installed on the host VM image. Mount them into the container via [instance volumes](../../concepts/volumes.md#instance-volumes).
 
     <div editor-title="crusoe-nccl-tests.dstack.yml">
 
@@ -275,6 +275,6 @@ $ dstack apply -f crusoe-nccl-tests.dstack.yml
 
 ## What's next
 
-1. Learn about [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), [services](../../docs/concepts/services.md)
-2. Check out [backends](../../docs/concepts/backends.md#crusoe-cloud) and [fleets](../../docs/concepts/fleets.md#cloud-fleets)
+1. Learn about [dev environments](../../concepts/dev-environments.md), [tasks](../../concepts/tasks.md), [services](../../concepts/services.md)
+2. Check out [backends](../../concepts/backends.md#crusoe-cloud) and [fleets](../../concepts/fleets.md#cloud-fleets)
 3. Check the docs on [Crusoe's networking](https://docs.crusoecloud.com/networking/infiniband/) and ["Crusoe Managed" Kubernetes](https://docs.crusoecloud.com/orchestration/cmk/index.html)
diff --git a/docs/examples/clusters/gcp.md b/docs/docs/examples/clusters/gcp.md
similarity index 94%
rename from docs/examples/clusters/gcp.md
rename to docs/docs/examples/clusters/gcp.md
index b0f0393200..eb9ddef0c2 100644
--- a/docs/examples/clusters/gcp.md
+++ b/docs/docs/examples/clusters/gcp.md
@@ -518,10 +518,10 @@ Use a distributed task that runs NCCL tests to validate cluster network bandwidt
 ### Distributed training
 
 === "A4"
-    You can use the standard [distributed task](../../docs/concepts/tasks.md#distributed-tasks) example to run distributed training on A4 instances.
+    You can use the standard [distributed task](../../concepts/tasks.md#distributed-tasks) example to run distributed training on A4 instances.
 
 === "A3 Mega"
-    You can use the standard [distributed task](../../docs/concepts/tasks.md#distributed-tasks) example to run distributed training on A3 Mega instances. To enable GPUDirect-TCPX, make sure the required [NCCL environment variables](https://cloud.google.com/kubernetes-engine/docs/how-to/gpu-bandwidth-gpudirect-tcpx-autopilot#environment-variables-nccl) are properly set, for example by adding the following commands at the beginning:
+    You can use the standard [distributed task](../../concepts/tasks.md#distributed-tasks) example to run distributed training on A3 Mega instances. To enable GPUDirect-TCPX, make sure the required [NCCL environment variables](https://cloud.google.com/kubernetes-engine/docs/how-to/gpu-bandwidth-gpudirect-tcpx-autopilot#environment-variables-nccl) are properly set, for example by adding the following commands at the beginning:
 
     ```shell
     # ...
@@ -540,7 +540,7 @@ Use a distributed task that runs NCCL tests to validate cluster network bandwidt
     ```
 
 === "A3 High/Edge"
-    You can use the standard [distributed task](../../docs/concepts/tasks.md#distributed-tasks) example to run distributed training on A3 High/Edge instances. To enable GPUDirect-TCPX0, make sure the required [NCCL environment variables](https://cloud.google.com/kubernetes-engine/docs/how-to/gpu-bandwidth-gpudirect-tcpx-autopilot#environment-variables-nccl) are properly set, for example by adding the following commands at the beginning:
+    You can use the standard [distributed task](../../concepts/tasks.md#distributed-tasks) example to run distributed training on A3 High/Edge instances. To enable GPUDirect-TCPX0, make sure the required [NCCL environment variables](https://cloud.google.com/kubernetes-engine/docs/how-to/gpu-bandwidth-gpudirect-tcpx-autopilot#environment-variables-nccl) are properly set, for example by adding the following commands at the beginning:
 
     ```shell
     # ...
@@ -577,6 +577,6 @@ In addition to distributed training, you can of course run regular tasks, dev en
 
 ## What's new
 
-1. Learn about [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), [services](../../docs/concepts/services.md)
-2. Read about [cluster placement](../../docs/concepts/fleets.md#cluster-placement)
+1. Learn about [dev environments](../../concepts/dev-environments.md), [tasks](../../concepts/tasks.md), [services](../../concepts/services.md)
+2. Read about [cluster placement](../../concepts/fleets.md#cluster-placement)
 3. Check GCP's docs on using [A4](https://docs.cloud.google.com/compute/docs/gpus/create-gpu-vm-a3u-a4), and [A3 Mega/High/Edge](https://docs.cloud.google.com/compute/docs/gpus/gpudirect) instances
diff --git a/docs/examples/clusters/lambda.md b/docs/docs/examples/clusters/lambda.md
similarity index 89%
rename from docs/examples/clusters/lambda.md
rename to docs/docs/examples/clusters/lambda.md
index e66e74573a..1ebe35ce76 100644
--- a/docs/examples/clusters/lambda.md
+++ b/docs/docs/examples/clusters/lambda.md
@@ -19,7 +19,7 @@ description: Setting up Lambda clusters using Kubernetes or 1-Click Clusters wit
 
 ### Configure the backend
 
-Follow the standard instructions for setting up a [Kubernetes](../../docs/concepts/backends.md#kubernetes) backend:
+Follow the standard instructions for setting up a [Kubernetes](../../concepts/backends.md#kubernetes) backend:
 
 <div editor-title="~/.dstack/server/config.yml">
 
@@ -68,11 +68,11 @@ $ dstack apply -f lambda-fleet.dstack.yml
 
 </div>
 
-Once the fleet is created, you can run [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), and [services](../../docs/concepts/services.md).
+Once the fleet is created, you can run [dev environments](../../concepts/dev-environments.md), [tasks](../../concepts/tasks.md), and [services](../../concepts/services.md).
 
 ## 1-Click Clusters
 
-Another way to work with Lambda clusters is through [1CC](https://lambda.ai/1-click-clusters). While `dstack` supports automated cluster provisioning via [VM-based backends](../../docs/concepts/backends.md#vm-based), there is currently no programmatic way to provision Lambda 1CCs. As a result, to use a 1CC cluster with `dstack`, you must use [SSH fleets](../../docs/concepts/fleets.md).
+Another way to work with Lambda clusters is through [1CC](https://lambda.ai/1-click-clusters). While `dstack` supports automated cluster provisioning via [VM-based backends](../../concepts/backends.md#vm-based), there is currently no programmatic way to provision Lambda 1CCs. As a result, to use a 1CC cluster with `dstack`, you must use [SSH fleets](../../concepts/fleets.md).
 
 ### Prerequsisites
 
@@ -80,7 +80,7 @@ Another way to work with Lambda clusters is through [1CC](https://lambda.ai/1-cl
 
 ### Create a fleet
 
-Follow the standard instructions for setting up an [SSH fleet](../../docs/concepts/fleets.md#ssh-fleets):
+Follow the standard instructions for setting up an [SSH fleet](../../concepts/fleets.md#ssh-fleets):
 
 <div editor-title="lambda-fleet.dstack.yml"> 
     
@@ -116,11 +116,11 @@ $ dstack apply -f lambda-fleet.dstack.yml
 
 </div>
 
-Once the fleet is created, you can run [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), and [services](../../docs/concepts/services.md).
+Once the fleet is created, you can run [dev environments](../../concepts/dev-environments.md), [tasks](../../concepts/tasks.md), and [services](../../concepts/services.md).
 
 ## Run tasks
 
-To run tasks on a cluster, you must use [distributed tasks](../../docs/concepts/tasks.md#distributed-task).
+To run tasks on a cluster, you must use [distributed tasks](../../concepts/tasks.md#distributed-task).
 
 ### Run NCCL tests
 
@@ -213,6 +213,6 @@ Provisioning...
 
 ## What's next
 
-1. Learn about [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), [services](../../docs/concepts/services.md)
-2. Read about the [Kubernetes backend](../../docs/concepts/backends.md#kubernetes) and [cluster placement](../../docs/concepts/fleets.md#cluster-placement)
+1. Learn about [dev environments](../../concepts/dev-environments.md), [tasks](../../concepts/tasks.md), [services](../../concepts/services.md)
+2. Read about the [Kubernetes backend](../../concepts/backends.md#kubernetes) and [cluster placement](../../concepts/fleets.md#cluster-placement)
 3. Check Lambda's docs on [Kubernetes](https://docs.lambda.ai/public-cloud/1-click-clusters/managed-kubernetes/#accessing-mk8s) and [1CC](https://docs.lambda.ai/public-cloud/1-click-clusters/)
diff --git a/docs/examples/clusters/nccl-rccl-tests.md b/docs/docs/examples/clusters/nccl-rccl-tests.md
similarity index 89%
rename from docs/examples/clusters/nccl-rccl-tests.md
rename to docs/docs/examples/clusters/nccl-rccl-tests.md
index 4c565d8c68..196f08d495 100644
--- a/docs/examples/clusters/nccl-rccl-tests.md
+++ b/docs/docs/examples/clusters/nccl-rccl-tests.md
@@ -5,10 +5,10 @@ description: Running NCCL and RCCL tests to validate cluster network bandwidth
 
 # NCCL/RCCL tests
 
-This example shows how to run [NCCL](https://github.com/NVIDIA/nccl-tests) or [RCCL](https://github.com/ROCm/rccl-tests) tests on a cluster using [distributed tasks](../../docs/concepts/tasks.md#distributed-tasks).
+This example shows how to run [NCCL](https://github.com/NVIDIA/nccl-tests) or [RCCL](https://github.com/ROCm/rccl-tests) tests on a cluster using [distributed tasks](../../concepts/tasks.md#distributed-tasks).
 
 !!! info "Prerequisites"
-    Before running a distributed task, make sure to create a fleet with `placement` set to `cluster` (can be a [managed fleet](../../docs/concepts/fleets.md#cluster-placement) or an [SSH fleet](../../docs/concepts/fleets.md#ssh-placement)).
+    Before running a distributed task, make sure to create a fleet with `placement` set to `cluster` (can be a [managed fleet](../../concepts/fleets.md#cluster-placement) or an [SSH fleet](../../concepts/fleets.md#ssh-placement)).
 
 ## Running as a task
 
@@ -120,7 +120,7 @@ Here's an example of a task that runs AllReduce test on 2 nodes, each with 4 GPU
 
 ### Apply a configuration
 
-To run a configuration, use the [`dstack apply`](../../docs/reference/cli/dstack/apply.md) command.
+To run a configuration, use the [`dstack apply`](../../reference/cli/dstack/apply.md) command.
 
 <div class="termy">
 
@@ -139,5 +139,5 @@ Submit the run nccl-tests? [y/n]: y
 
 ## What's next?
 
-1. Check [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), 
-   [services](../../docs/concepts/services.md), and [fleets](../../docs/concepts/fleets.md).
+1. Check [dev environments](../../concepts/dev-environments.md), [tasks](../../concepts/tasks.md), 
+   [services](../../concepts/services.md), and [fleets](../../concepts/fleets.md).
diff --git a/docs/examples/clusters/nebius.md b/docs/docs/examples/clusters/nebius.md
similarity index 92%
rename from docs/examples/clusters/nebius.md
rename to docs/docs/examples/clusters/nebius.md
index 6986a10ab5..20b1a47555 100644
--- a/docs/examples/clusters/nebius.md
+++ b/docs/docs/examples/clusters/nebius.md
@@ -75,7 +75,7 @@ $ dstack apply -f nebius-fleet.dstack.yml
 
 This will automatically create a Nebius cluster and provision instances. 
 
-Once the fleet is created, you can run [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), and [services](../../docs/concepts/services.md).
+Once the fleet is created, you can run [dev environments](../../concepts/dev-environments.md), [tasks](../../concepts/tasks.md), and [services](../../concepts/services.md).
 
 > If you want instances to be provisioned on demand, you can set `nodes` to `0..2`. In this case, `dstack` will create instances only when you run workloads.
 
@@ -107,7 +107,7 @@ $ nebius mk8s cluster get-credentials --id &lt;cluster id&gt; --external
 
 ### Configure a backend
 
-Follow the standard instructions for setting up a [`kubernetes`](../../docs/concepts/backends.md#kubernetes) backend:
+Follow the standard instructions for setting up a [`kubernetes`](../../concepts/backends.md#kubernetes) backend:
 
 <div editor-title="~/.dstack/server/config.yml">
 
@@ -154,11 +154,11 @@ $ dstack apply -f nebius-fleet.dstack.yml
 
 </div>
 
-Once the fleet is created, you can run [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), and [services](../../docs/concepts/services.md).
+Once the fleet is created, you can run [dev environments](../../concepts/dev-environments.md), [tasks](../../concepts/tasks.md), and [services](../../concepts/services.md).
 
 ## NCCL tests
 
-Use a [distributed task](../../docs/concepts/tasks.md#distributed-tasks) to run NCCL tests and validate the cluster’s network bandwidth.
+Use a [distributed task](../../concepts/tasks.md#distributed-tasks) to run NCCL tests and validate the cluster’s network bandwidth.
 
 <div editor-title="nccl-tests.dstack.yml">
 
@@ -252,6 +252,6 @@ nccl-tests provisioning completed (running)
 
 ## What's next
 
-1. Learn about [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), [services](../../docs/concepts/services.md)
-2. Check out [backends](../../docs/concepts/backends.md) and [fleets](../../docs/concepts/fleets.md)
+1. Learn about [dev environments](../../concepts/dev-environments.md), [tasks](../../concepts/tasks.md), [services](../../concepts/services.md)
+2. Check out [backends](../../concepts/backends.md) and [fleets](../../concepts/fleets.md)
 3. Read Nebius' docs on [networking for VMs](https://docs.nebius.com/compute/clusters/gpu) and the [managed Kubernetes service](https://docs.nebius.com/kubernetes).
diff --git a/docs/examples/inference/nim.md b/docs/docs/examples/inference/nim.md
similarity index 84%
rename from docs/examples/inference/nim.md
rename to docs/docs/examples/inference/nim.md
index 263baa2737..f7d1c03edf 100644
--- a/docs/examples/inference/nim.md
+++ b/docs/docs/examples/inference/nim.md
@@ -8,7 +8,7 @@ description: Deploying Nemotron-3-Super-120B-A12B using NVIDIA NIM
 This example shows how to deploy Nemotron-3-Super-120B-A12B using [NVIDIA NIM](https://docs.nvidia.com/nim/large-language-models/latest/getting-started.html) and `dstack`.
 
 ??? info "Prerequisites"
-    Once `dstack` is [installed](../../docs/installation.md), clone the repo with examples.
+    Once `dstack` is [installed](../../installation.md), clone the repo with examples.
 
     <div class="termy">
  
@@ -54,7 +54,7 @@ resources:
 ### Running a configuration
 
 Save the configuration above as `nemotron120.dstack.yml`, then use the
-[`dstack apply`](../../docs/reference/cli/dstack/apply.md) command.
+[`dstack apply`](../../reference/cli/dstack/apply.md) command.
 
 <div class="termy">
 
@@ -91,9 +91,9 @@ $ curl http://127.0.0.1:3000/proxy/services/main/nemotron120/v1/chat/completions
 
 </div>
 
-When a [gateway](../../docs/concepts/gateways.md) is configured, the service endpoint will be available at `https://nemotron120.<gateway domain>/`.
+When a [gateway](../../concepts/gateways.md) is configured, the service endpoint will be available at `https://nemotron120.<gateway domain>/`.
 
 ## What's next?
 
-1. Check [services](../../docs/concepts/services.md)
+1. Check [services](../../concepts/services.md)
 2. Browse the [Nemotron-3-Super-120B-A12B model page](https://build.nvidia.com/nvidia/nemotron-3-super-120b-a12b)
diff --git a/docs/examples/inference/sglang.md b/docs/docs/examples/inference/sglang.md
similarity index 93%
rename from docs/examples/inference/sglang.md
rename to docs/docs/examples/inference/sglang.md
index feda39a46d..775dcedd48 100644
--- a/docs/examples/inference/sglang.md
+++ b/docs/docs/examples/inference/sglang.md
@@ -95,7 +95,7 @@ standard `qwen3` reasoning parser without extra ROCm-specific tuning flags.
 The first startup on MI300X can take longer while SGLang compiles ROCm kernels.
 
 Save one of the configurations above as `service.dstack.yml`, then use the
-[`dstack apply`](../../docs/reference/cli/dstack/apply.md) command.
+[`dstack apply`](../../reference/cli/dstack/apply.md) command.
 
 <div class="termy">
 
@@ -132,7 +132,7 @@ Qwen3.6 uses thinking mode by default. To disable thinking, pass
 `"chat_template_kwargs": {"enable_thinking": false}` in the request body. To
 enable tool calling, add `--tool-call-parser qwen3_coder` to the serve command.
 
-> If a [gateway](../../docs/concepts/gateways.md) is configured (e.g. to enable auto-scaling, HTTPS, rate limits, etc.), the service endpoint will be available at `https://qwen36.<gateway domain>/`.
+> If a [gateway](../../concepts/gateways.md) is configured (e.g. to enable auto-scaling, HTTPS, rate limits, etc.), the service endpoint will be available at `https://qwen36.<gateway domain>/`.
 
 ## Configuration options
 
@@ -221,5 +221,5 @@ Currently, auto-scaling only supports `rps` as the metric. TTFT and ITL metrics
 
 ## What's next?
 
-1. Read about [services](../../docs/concepts/services.md) and [gateways](../../docs/concepts/gateways.md)
+1. Read about [services](../../concepts/services.md) and [gateways](../../concepts/gateways.md)
 2. Browse the [Qwen 3.6 SGLang cookbook](https://docs.sglang.io/cookbook/autoregressive/Qwen/Qwen3.6) and the [SGLang server arguments reference](https://docs.sglang.ai/advanced_features/server_arguments.html)
diff --git a/docs/examples/inference/trtllm.md b/docs/docs/examples/inference/trtllm.md
similarity index 87%
rename from docs/examples/inference/trtllm.md
rename to docs/docs/examples/inference/trtllm.md
index 8f95cefc63..c058820b0a 100644
--- a/docs/examples/inference/trtllm.md
+++ b/docs/docs/examples/inference/trtllm.md
@@ -53,7 +53,7 @@ resources:
 ```
 </div>
 
-Apply it with [`dstack apply`](../../docs/reference/cli/dstack/apply.md):
+Apply it with [`dstack apply`](../../reference/cli/dstack/apply.md):
 
 <div class="termy">
 
@@ -90,10 +90,10 @@ $ curl http://127.0.0.1:3000/proxy/services/main/qwen235/v1/chat/completions \
 
 </div>
 
-When a [gateway](../../docs/concepts/gateways.md) is configured, the service endpoint will be available at `https://qwen235.<gateway domain>/`.
+When a [gateway](../../concepts/gateways.md) is configured, the service endpoint will be available at `https://qwen235.<gateway domain>/`.
 
 ## What's next?
 
-1. Read about [services](../../docs/concepts/services.md) and [gateways](../../docs/concepts/gateways.md)
+1. Read about [services](../../concepts/services.md) and [gateways](../../concepts/gateways.md)
 2. Browse the [TensorRT-LLM deployment guides](https://nvidia.github.io/TensorRT-LLM/deployment-guide/index.html) and the [Qwen3 deployment guide](https://nvidia.github.io/TensorRT-LLM/deployment-guide/deployment-guide-for-qwen3-on-trtllm.html)
 3. See the [`trtllm-serve` reference](https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve/trtllm-serve.html)
diff --git a/docs/examples/inference/vllm.md b/docs/docs/examples/inference/vllm.md
similarity index 88%
rename from docs/examples/inference/vllm.md
rename to docs/docs/examples/inference/vllm.md
index 4ac880defc..b5b83c4664 100644
--- a/docs/examples/inference/vllm.md
+++ b/docs/docs/examples/inference/vllm.md
@@ -89,7 +89,7 @@ Qwen3.6-27B is a multimodal model. For text-only workloads, add
 calling, add `--enable-auto-tool-choice --tool-call-parser qwen3_coder`.
 
 Save one of the configurations above as `service.dstack.yml`, then use the
-[`dstack apply`](../../docs/reference/cli/dstack/apply.md) command.
+[`dstack apply`](../../reference/cli/dstack/apply.md) command.
 
 <div class="termy">
 
@@ -122,9 +122,9 @@ curl http://127.0.0.1:3000/proxy/services/main/qwen36/v1/chat/completions \
 
 </div>
 
-> If a [gateway](../../docs/concepts/gateways.md) is configured (e.g. to enable auto-scaling, HTTPS, rate limits, etc.), the service endpoint will be available at `https://qwen36.<gateway domain>/`.
+> If a [gateway](../../concepts/gateways.md) is configured (e.g. to enable auto-scaling, HTTPS, rate limits, etc.), the service endpoint will be available at `https://qwen36.<gateway domain>/`.
 
 ## What's next?
 
-1. Read about [services](../../docs/concepts/services.md) and [gateways](../../docs/concepts/gateways.md)
+1. Read about [services](../../concepts/services.md) and [gateways](../../concepts/gateways.md)
 2. Browse the [Qwen 3.5 & 3.6 vLLM recipe](https://docs.vllm.ai/projects/recipes/en/latest/Qwen/Qwen3.5.html) and the [SGLang](../inference/sglang.md) example
diff --git a/docs/examples/llms/deepseek/index.md b/docs/docs/examples/llms/deepseek/index.md
similarity index 100%
rename from docs/examples/llms/deepseek/index.md
rename to docs/docs/examples/llms/deepseek/index.md
diff --git a/docs/examples/llms/llama/index.md b/docs/docs/examples/llms/llama/index.md
similarity index 100%
rename from docs/examples/llms/llama/index.md
rename to docs/docs/examples/llms/llama/index.md
diff --git a/docs/examples/misc/docker-compose/index.md b/docs/docs/examples/misc/docker-compose/index.md
similarity index 100%
rename from docs/examples/misc/docker-compose/index.md
rename to docs/docs/examples/misc/docker-compose/index.md
diff --git a/docs/examples/models/deepseek-v4.md b/docs/docs/examples/models/deepseek-v4.md
similarity index 97%
rename from docs/examples/models/deepseek-v4.md
rename to docs/docs/examples/models/deepseek-v4.md
index 7efd9977e8..833e5163d7 100644
--- a/docs/examples/models/deepseek-v4.md
+++ b/docs/docs/examples/models/deepseek-v4.md
@@ -6,7 +6,7 @@ description: Deploying DeepSeek-V4-Pro using SGLang on NVIDIA B200:8
 # DeepSeek V4
 
 This example shows how to deploy `deepseek-ai/DeepSeek-V4-Pro` as a
-[service](../../docs/concepts/services.md) using
+[service](../../concepts/services.md) using
 [SGLang](https://github.com/sgl-project/sglang) and `dstack`.
 
 ## Apply a configuration
@@ -64,7 +64,7 @@ This configuration uses the single-node Blackwell `DeepSeek-V4-Pro` recipe
 shape for `8 x NVIDIA B200`.
 
 Export your Hugging Face token and apply the configuration with
-[`dstack apply`](../../docs/reference/cli/dstack/apply.md).
+[`dstack apply`](../../reference/cli/dstack/apply.md).
 
 <div class="termy">
 
diff --git a/docs/examples/models/qwen36.md b/docs/docs/examples/models/qwen36.md
similarity index 97%
rename from docs/examples/models/qwen36.md
rename to docs/docs/examples/models/qwen36.md
index 3723e36fa0..35ea72fd11 100644
--- a/docs/examples/models/qwen36.md
+++ b/docs/docs/examples/models/qwen36.md
@@ -6,7 +6,7 @@ description: Deploying Qwen3.6-27B using SGLang on NVIDIA and AMD GPUs
 # Qwen 3.6
 
 This example shows how to deploy `Qwen/Qwen3.6-27B` as a
-[service](../../docs/concepts/services.md) using
+[service](../../concepts/services.md) using
 [SGLang](https://github.com/sgl-project/sglang) and `dstack`.
 
 ## Apply a configuration
@@ -92,7 +92,7 @@ The NVIDIA and AMD configurations above use pinned SGLang images and the same
 straightforward 4-GPU layout used across the Qwen 3.6 docs and examples.
 
 Apply the configuration with
-[`dstack apply`](../../docs/reference/cli/dstack/apply.md).
+[`dstack apply`](../../reference/cli/dstack/apply.md).
 
 <div class="termy">
 
diff --git a/docs/examples/models/wan22/index.md b/docs/docs/examples/models/wan22/index.md
similarity index 100%
rename from docs/examples/models/wan22/index.md
rename to docs/docs/examples/models/wan22/index.md
diff --git a/docs/docs/examples/training/axolotl.md b/docs/docs/examples/training/axolotl.md
new file mode 100644
index 0000000000..5266a86745
--- /dev/null
+++ b/docs/docs/examples/training/axolotl.md
@@ -0,0 +1,185 @@
+---
+title: Axolotl
+description: Fine-tuning Llama models with Axolotl — single-node SFT with FSDP and QLoRA, or distributed across multiple nodes
+---
+
+# Axolotl
+
+This example shows how to use [Axolotl](https://github.com/OpenAccess-AI-Collective/axolotl) with `dstack` to fine-tune Llama models — on a single node with SFT, FSDP, and QLoRA, or distributed across multiple nodes.
+
+## Single-node training
+
+This section walks through fine-tuning 4-bit quantized `Llama-4-Scout-17B-16E` using SFT with FSDP and QLoRA.
+
+### Define a configuration
+
+Axolotl reads the model, QLoRA, and dataset arguments, as well as trainer configuration from a [`scout-qlora-flexattn-fsdp2.yaml`](https://github.com/axolotl-ai-cloud/axolotl/blob/main/examples/llama-4/scout-qlora-flexattn-fsdp2.yaml) file. The configuration uses 4-bit axolotl quantized version of `meta-llama/Llama-4-Scout-17B-16E`, requiring only ~43GB VRAM/GPU with 4K context length.
+
+Below is a task configuration that does fine-tuning.
+
+<div editor-title="train.dstack.yml">
+
+```yaml
+type: task
+# The name is optional, if not specified, generated randomly
+name: axolotl-nvidia-llama-scout-train
+
+# Using the official Axolotl's Docker image
+image: axolotlai/axolotl:main-latest
+
+# Required environment variables
+env:
+  - HF_TOKEN
+  - WANDB_API_KEY
+  - WANDB_PROJECT
+  - HUB_MODEL_ID
+# Commands of the task
+commands:
+  - wget https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/examples/llama-4/scout-qlora-flexattn-fsdp2.yaml
+  - |
+    axolotl train scout-qlora-flexattn-fsdp2.yaml \
+      --wandb-project $WANDB_PROJECT \
+      --wandb-name $DSTACK_RUN_NAME \
+      --hub-model-id $HUB_MODEL_ID
+
+resources:
+  # Four GPU (required by FSDP)
+  gpu: H100:4
+  # Shared memory size for inter-process communication
+  shm_size: 64GB
+  disk: 500GB..
+```
+
+</div>
+
+The task uses Axolotl's Docker image, where Axolotl is already pre-installed.
+
+!!! info "AMD"
+    The example above uses NVIDIA accelerators. To use it with AMD, check out [AMD](../accelerators/amd.md#axolotl).
+
+### Run the configuration
+
+Once the configuration is ready, run `dstack apply -f <configuration file>`, and `dstack` will automatically provision the
+cloud resources and run the configuration.
+
+<div class="termy">
+
+```shell
+$ HF_TOKEN=...
+$ WANDB_API_KEY=...
+$ WANDB_PROJECT=...
+$ HUB_MODEL_ID=...
+$ dstack apply -f train.dstack.yml
+
+ #  BACKEND              RESOURCES                     INSTANCE TYPE  PRICE
+ 1  vastai (cz-czechia)  cpu=64 mem=128GB H100:80GB:2  18794506       $3.8907
+ 2  vastai (us-texas)    cpu=52 mem=64GB  H100:80GB:2  20442365       $3.6926
+ 3  vastai (fr-france)   cpu=64 mem=96GB  H100:80GB:2  20379984       $3.7389
+
+Submit the run axolotl-nvidia-llama-scout-train? [y/n]:
+
+Provisioning...
+---> 100%
+```
+
+</div>
+
+## Distributed training
+
+!!! info "Prerequisites"
+    Before running a distributed task, make sure to create a fleet with `placement` set to `cluster` (can be a [managed fleet](../../concepts/fleets.md#cluster-placement) or an [SSH fleet](../../concepts/fleets.md#ssh-placement)).
+
+This section walks through running distributed fine-tuning of `Llama-3.1-70B` with QLoRA and FSDP across multiple nodes.
+
+### Define a configuration
+
+Once the fleet is created, define a distributed task configuration. Here's an example of a distributed `QLoRA` task using `FSDP`.
+
+<div editor-title="train-distrib.dstack.yml">
+
+```yaml
+type: task
+name: axolotl-multi-node-qlora-llama3-70b
+
+nodes: 2
+
+image: nvcr.io/nvidia/pytorch:25.01-py3
+
+env:
+  - HF_TOKEN
+  - WANDB_API_KEY
+  - WANDB_PROJECT
+  - HUB_MODEL_ID
+  - CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+  - NCCL_DEBUG=INFO
+  - ACCELERATE_LOG_LEVEL=info
+
+commands:
+  # Replacing the default Torch and FlashAttention in the NCG container with Axolotl-compatible versions.
+  # The preinstalled versions are incompatible with Axolotl.
+  - pip uninstall -y torch flash-attn
+  - pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/test/cu124
+  - pip install --no-build-isolation axolotl[flash-attn,deepspeed]
+  - wget https://raw.githubusercontent.com/huggingface/trl/main/examples/accelerate_configs/fsdp1.yaml
+  - wget https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/examples/llama-3/qlora-fsdp-70b.yaml
+  # Axolotl includes hf-xet version 1.1.0, which fails during downloads. Replacing it with the latest version (1.1.2).
+  - pip uninstall -y hf-xet
+  - pip install hf-xet --no-cache-dir
+  - |
+    accelerate launch \
+      --config_file=fsdp1.yaml \
+      -m axolotl.cli.train qlora-fsdp-70b.yaml \
+      --hub-model-id $HUB_MODEL_ID \
+      --output-dir /checkpoints/qlora-llama3-70b \
+      --wandb-project $WANDB_PROJECT \
+      --wandb-name $DSTACK_RUN_NAME \
+      --main_process_ip=$DSTACK_MASTER_NODE_IP \
+      --main_process_port=8008 \
+      --machine_rank=$DSTACK_NODE_RANK \
+      --num_processes=$DSTACK_GPUS_NUM \
+      --num_machines=$DSTACK_NODES_NUM
+
+resources:
+  gpu: 80GB:8
+  shm_size: 128GB
+
+volumes:
+  - /checkpoints:/checkpoints
+```
+
+</div>
+
+!!! info "Docker image"
+    We are using `nvcr.io/nvidia/pytorch:25.01-py3` from NGC because it includes the necessary libraries and packages for RDMA and InfiniBand support.
+
+### Run the configuration
+
+To run a configuration, use the [`dstack apply`](../../reference/cli/dstack/apply.md) command.
+
+<div class="termy">
+
+```shell
+$ HF_TOKEN=...
+$ WANDB_API_KEY=...
+$ WANDB_PROJECT=...
+$ HUB_MODEL_ID=...
+$ dstack apply -f train-distrib.dstack.yml
+
+ #  BACKEND       RESOURCES                       INSTANCE TYPE  PRICE
+ 1  ssh (remote)  cpu=208 mem=1772GB H100:80GB:8  instance       $0     idle
+ 2  ssh (remote)  cpu=208 mem=1772GB H100:80GB:8  instance       $0     idle
+
+Submit the run axolotl-multi-node-qlora-llama3-70b? [y/n]: y
+
+Provisioning...
+---> 100%
+```
+
+</div>
+
+## What's next?
+
+1. Check [dev environments](../../concepts/dev-environments.md), [tasks](../../concepts/tasks.md),
+   [services](../../concepts/services.md), and [fleets](../../concepts/fleets.md)
+2. Read about [cluster placement](../../concepts/fleets.md#cluster-placement)
+3. See the [AMD](../accelerators/amd.md#axolotl) example
diff --git a/docs/examples/distributed-training/ray-ragen.md b/docs/docs/examples/training/ray-ragen.md
similarity index 93%
rename from docs/examples/distributed-training/ray-ragen.md
rename to docs/docs/examples/training/ray-ragen.md
index e3194b2b3a..73e8749e83 100644
--- a/docs/examples/distributed-training/ray-ragen.md
+++ b/docs/docs/examples/training/ray-ragen.md
@@ -11,7 +11,7 @@ to fine-tune an agent on multiple nodes.
 Under the hood `RAGEN` uses [verl](https://github.com/volcengine/verl) for Reinforcement Learning and [Ray](https://docs.ray.io/en/latest/) for distributed training.
 
 !!! info "Prerequisites"
-    Before running a distributed task, make sure to create a fleet with `placement` set to `cluster` (can be a [managed fleet](../../docs/concepts/fleets.md#cluster-placement) or an [SSH fleet](../../docs/concepts/fleets.md#ssh-placement)).
+    Before running a distributed task, make sure to create a fleet with `placement` set to `cluster` (can be a [managed fleet](../../concepts/fleets.md#cluster-placement) or an [SSH fleet](../../concepts/fleets.md#ssh-placement)).
 
 ## Run a Ray cluster
 
@@ -130,5 +130,5 @@ $ ray job submit \
 Using Ray via `dstack` is a powerful way to get access to the rich Ray ecosystem while benefiting from `dstack`'s provisioning capabilities.
 
 !!! info "What's next"
-    1. Read about [distributed tasks](../../docs/concepts/tasks.md#distributed-tasks), [fleets](../../docs/concepts/fleets.md), and [cluster placement](../../docs/concepts/fleets.md#cluster-placement)
+    1. Read about [distributed tasks](../../concepts/tasks.md#distributed-tasks), [fleets](../../concepts/fleets.md), and [cluster placement](../../concepts/fleets.md#cluster-placement)
     2. Browse Ray's [docs](https://docs.ray.io/en/latest/train/examples.html) for other examples.
diff --git a/docs/docs/examples/training/trl.md b/docs/docs/examples/training/trl.md
new file mode 100644
index 0000000000..ffeb3766f8
--- /dev/null
+++ b/docs/docs/examples/training/trl.md
@@ -0,0 +1,272 @@
+---
+title: TRL
+description: Fine-tuning Llama with TRL — single-node SFT with QLoRA, or distributed across multiple nodes with FSDP and DeepSpeed
+---
+
+# TRL
+
+This example walks you through how to use [TRL](https://github.com/huggingface/trl) with `dstack` to fine-tune `Llama-3.1-8B` — on a single node with SFT and QLoRA, or distributed across multiple nodes with [Accelerate](https://github.com/huggingface/accelerate) and [DeepSpeed](https://github.com/deepspeedai/DeepSpeed).
+
+## Single-node training
+
+### Define a configuration
+
+Below is a task configuration that does fine-tuning.
+
+<div editor-title="train.dstack.yml"> 
+
+```yaml
+type: task
+name: trl-train
+
+python: 3.12
+# Ensure nvcc is installed (req. for Flash Attention) 
+nvcc: true
+
+env:
+  - HF_TOKEN
+  - WANDB_API_KEY
+  - HUB_MODEL_ID
+commands:
+  # Pin torch==2.6.0 to avoid building Flash Attention from source.
+  # Prebuilt Flash Attention wheels are not available for the latest torch==2.7.0.
+  - uv pip install torch==2.6.0
+  - uv pip install transformers bitsandbytes peft wandb
+  - uv pip install flash_attn --no-build-isolation
+  - git clone https://github.com/huggingface/trl
+  - cd trl
+  - uv pip install .
+  - |
+    accelerate launch \
+      --config_file=examples/accelerate_configs/multi_gpu.yaml \
+      --num_processes $DSTACK_GPUS_PER_NODE \
+      trl/scripts/sft.py \
+      --model_name meta-llama/Meta-Llama-3.1-8B \
+      --dataset_name OpenAssistant/oasst_top1_2023-08-25 \
+      --dataset_text_field="text" \
+      --per_device_train_batch_size 1 \
+      --per_device_eval_batch_size 1 \
+      --gradient_accumulation_steps 4 \
+      --learning_rate 2e-4 \
+      --report_to wandb \
+      --bf16 \
+      --max_seq_length 1024 \
+      --lora_r 16 \
+      --lora_alpha 32 \
+      --lora_target_modules q_proj k_proj v_proj o_proj \
+      --load_in_4bit \
+      --use_peft \
+      --attn_implementation "flash_attention_2" \
+      --logging_steps=10 \
+      --output_dir models/llama31 \
+      --hub_model_id peterschmidt85/FineLlama-3.1-8B
+
+resources:
+  gpu:
+    # 24GB or more VRAM
+    memory: 24GB..
+    # One or more GPU
+    count: 1..
+  # Shared memory (for multi-gpu)
+  shm_size: 24GB
+```
+
+</div>
+
+Change the `resources` property to specify more GPUs.
+
+!!! info "AMD"
+    The example above uses NVIDIA accelerators. To use it with AMD, check out [AMD](../accelerators/amd.md#trl).
+
+??? info "DeepSpeed"
+    For more memory-efficient use of multiple GPUs, consider using DeepSpeed and ZeRO Stage 3.
+
+    To do this, use the `examples/accelerate_configs/deepspeed_zero3.yaml` configuration file instead of 
+    `examples/accelerate_configs/multi_gpu.yaml`.
+
+### Run the configuration
+
+Once the configuration is ready, run `dstack apply -f <configuration file>`, and `dstack` will automatically provision the
+cloud resources and run the configuration.
+
+<div class="termy">
+
+```shell
+$ HF_TOKEN=...
+$ WANDB_API_KEY=...
+$ HUB_MODEL_ID=...
+$ dstack apply -f train.dstack.yml
+
+ #  BACKEND              RESOURCES                     INSTANCE TYPE  PRICE     
+ 1  vastai (cz-czechia)  cpu=64 mem=128GB H100:80GB:2  18794506       $3.8907   
+ 2  vastai (us-texas)    cpu=52 mem=64GB  H100:80GB:2  20442365       $3.6926   
+ 3  vastai (fr-france)   cpu=64 mem=96GB  H100:80GB:2  20379984       $3.7389
+
+Submit the run trl-train? [y/n]:
+
+Provisioning...
+---> 100%
+```
+
+</div>
+
+## Distributed training
+
+!!! info "Prerequisites"
+    Before running a distributed task, make sure to create a fleet with `placement` set to `cluster` (can be a [managed fleet](../../concepts/fleets.md#cluster-placement) or an [SSH fleet](../../concepts/fleets.md#ssh-placement)).
+
+### Define a configuration
+
+Once the fleet is created, define a distributed task configuration. Here's an example using either FSDP or DeepSpeed ZeRO-3.
+
+=== "FSDP"
+
+    <div editor-title="train-distrib.dstack.yml">
+
+    ```yaml
+    type: task
+    name: trl-train-fsdp-distrib
+
+    nodes: 2
+
+    image: nvcr.io/nvidia/pytorch:25.01-py3
+
+    env:
+      - HF_TOKEN
+      - ACCELERATE_LOG_LEVEL=info
+      - WANDB_API_KEY
+      - MODEL_ID=meta-llama/Llama-3.1-8B
+      - HUB_MODEL_ID
+
+    commands:
+      - pip install transformers bitsandbytes peft wandb
+      - git clone https://github.com/huggingface/trl
+      - cd trl
+      - pip install .
+      - |
+        accelerate launch \
+          --config_file=examples/accelerate_configs/fsdp1.yaml \
+          --main_process_ip=$DSTACK_MASTER_NODE_IP \
+          --main_process_port=8008 \
+          --machine_rank=$DSTACK_NODE_RANK \
+          --num_processes=$DSTACK_GPUS_NUM \
+          --num_machines=$DSTACK_NODES_NUM \
+          trl/scripts/sft.py \
+          --model_name $MODEL_ID \
+          --dataset_name OpenAssistant/oasst_top1_2023-08-25 \
+          --dataset_text_field="text" \
+          --per_device_train_batch_size 1 \
+          --per_device_eval_batch_size 1 \
+          --gradient_accumulation_steps 4 \
+          --learning_rate 2e-4 \
+          --report_to wandb \
+          --bf16 \
+          --max_seq_length 1024 \
+          --attn_implementation flash_attention_2 \
+          --logging_steps=10 \
+          --output_dir /checkpoints/llama31-ft \
+          --hub_model_id $HUB_MODEL_ID \
+          --torch_dtype bfloat16
+
+    resources:
+      gpu: 80GB:8
+      shm_size: 128GB
+
+    volumes:
+      - /checkpoints:/checkpoints
+    ```
+
+    </div>
+
+=== "DeepSpeed ZeRO-3"
+
+    <div editor-title="train-distrib.dstack.yml">
+
+    ```yaml
+    type: task
+    name: trl-train-deepspeed-distrib
+
+    nodes: 2
+
+    image: nvcr.io/nvidia/pytorch:25.01-py3
+
+    env:
+      - HF_TOKEN
+      - WANDB_API_KEY
+      - HUB_MODEL_ID
+      - MODEL_ID=meta-llama/Llama-3.1-8B
+      - ACCELERATE_LOG_LEVEL=info
+
+    commands:
+      - pip install transformers bitsandbytes peft wandb deepspeed
+      - git clone https://github.com/huggingface/trl
+      - cd trl
+      - pip install .
+      - |
+        accelerate launch \
+          --config_file=examples/accelerate_configs/deepspeed_zero3.yaml \
+          --main_process_ip=$DSTACK_MASTER_NODE_IP \
+          --main_process_port=8008 \
+          --machine_rank=$DSTACK_NODE_RANK \
+          --num_processes=$DSTACK_GPUS_NUM \
+          --num_machines=$DSTACK_NODES_NUM \
+          trl/scripts/sft.py \
+          --model_name $MODEL_ID \
+          --dataset_name OpenAssistant/oasst_top1_2023-08-25 \
+          --dataset_text_field="text" \
+          --per_device_train_batch_size 1 \
+          --per_device_eval_batch_size 1 \
+          --gradient_accumulation_steps 4 \
+          --learning_rate 2e-4 \
+          --report_to wandb \
+          --bf16 \
+          --max_seq_length 1024 \
+          --attn_implementation flash_attention_2 \
+          --logging_steps=10 \
+          --output_dir /checkpoints/llama31-ft \
+          --hub_model_id $HUB_MODEL_ID \
+          --torch_dtype bfloat16
+
+    resources:
+      gpu: 80GB:8
+      shm_size: 128GB
+
+    volumes:
+      - /checkpoints:/checkpoints
+    ```
+
+    </div>
+
+!!! info "Docker image"
+    We are using `nvcr.io/nvidia/pytorch:25.01-py3` from NGC because it includes the necessary libraries and packages for RDMA and InfiniBand support.
+
+### Run the configuration
+
+To run a configuration, use the [`dstack apply`](../../reference/cli/dstack/apply.md) command.
+
+<div class="termy">
+
+```shell
+$ HF_TOKEN=...
+$ WANDB_API_KEY=...
+$ HUB_MODEL_ID=...
+$ dstack apply -f train-distrib.dstack.yml
+
+ #  BACKEND       RESOURCES                       INSTANCE TYPE  PRICE
+ 1  ssh (remote)  cpu=208 mem=1772GB H100:80GB:8  instance       $0     idle
+ 2  ssh (remote)  cpu=208 mem=1772GB H100:80GB:8  instance       $0     idle
+
+Submit the run trl-train-fsdp-distrib? [y/n]: y
+
+Provisioning...
+---> 100%
+```
+
+</div>
+
+## What's next?
+
+1. Check [dev environments](../../concepts/dev-environments.md), [tasks](../../concepts/tasks.md), 
+   [services](../../concepts/services.md), and [fleets](../../concepts/fleets.md)
+2. Read about [cluster placement](../../concepts/fleets.md#cluster-placement)
+3. See the [AMD](../accelerators/amd.md#trl) example
diff --git a/docs/docs/guides/migration/slurm.md b/docs/docs/guides/migration/slurm.md
index 97b4546b58..2791075e8d 100644
--- a/docs/docs/guides/migration/slurm.md
+++ b/docs/docs/guides/migration/slurm.md
@@ -1847,4 +1847,4 @@ fi
 
 1. Check out [Quickstart](../../quickstart.md)
 2. Read about [dev environments](../../concepts/dev-environments.md), [tasks](../../concepts/tasks.md), and [services](../../concepts/services.md)
-3. Browse the [examples](../../../examples.md)
+3. Browse the [examples](../../examples.md)
diff --git a/docs/docs/quickstart.md b/docs/docs/quickstart.md
index 80a98f79bf..da37d46ded 100644
--- a/docs/docs/quickstart.md
+++ b/docs/docs/quickstart.md
@@ -277,5 +277,5 @@ Something not working? See the [troubleshooting](guides/troubleshooting.md) guid
 
 !!! info "What's next?"
     1. Read about [backends](concepts/backends.md),  [dev environments](concepts/dev-environments.md), [tasks](concepts/tasks.md), [services](concepts/services.md), and [fleets](concepts/services.md)
-    2. Browse [examples](../examples.md)
+    2. Browse [examples](examples.md)
     3. Join [Discord](https://discord.gg/u8SmfwPpMd)
diff --git a/docs/examples/distributed-training/axolotl.md b/docs/examples/distributed-training/axolotl.md
deleted file mode 100644
index c2e04d3fc6..0000000000
--- a/docs/examples/distributed-training/axolotl.md
+++ /dev/null
@@ -1,100 +0,0 @@
----
-title: Axolotl
-description: Distributed fine-tuning with Axolotl and FSDP across multiple nodes
----
-
-# Axolotl
-
-This example walks you through how to run distributed fine-tune using [Axolotl](https://github.com/axolotl-ai-cloud/axolotl) and [distributed tasks](../../docs/concepts/tasks.md#distributed-tasks).
-
-!!! info "Prerequisites"
-    Before running a distributed task, make sure to create a fleet with `placement` set to `cluster` (can be a [managed fleet](../../docs/concepts/fleets.md#cluster-placement) or an [SSH fleet](../../docs/concepts/fleets.md#ssh-placement)).
-
-## Define a configuration
-
-Once the fleet is created, define a distributed task configuration. Here's an example of distributed `QLORA` task using `FSDP`.
-
-<div editor-title="train-distrib.dstack.yml">
-
-```yaml
-type: task
-name: axolotl-multi-node-qlora-llama3-70b
-
-nodes: 2
-
-image: nvcr.io/nvidia/pytorch:25.01-py3
-
-env:
-  - HF_TOKEN
-  - WANDB_API_KEY
-  - WANDB_PROJECT
-  - HUB_MODEL_ID
-  - CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-  - NCCL_DEBUG=INFO
-  - ACCELERATE_LOG_LEVEL=info
-
-commands:
-  # Replacing the default Torch and FlashAttention in the NCG container with Axolotl-compatible versions.
-  # The preinstalled versions are incompatible with Axolotl.
-  - pip uninstall -y torch flash-attn
-  - pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/test/cu124
-  - pip install --no-build-isolation axolotl[flash-attn,deepspeed]
-  - wget https://raw.githubusercontent.com/huggingface/trl/main/examples/accelerate_configs/fsdp1.yaml
-  - wget https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/examples/llama-3/qlora-fsdp-70b.yaml
-  # Axolotl includes hf-xet version 1.1.0, which fails during downloads. Replacing it with the latest version (1.1.2).
-  - pip uninstall -y hf-xet
-  - pip install hf-xet --no-cache-dir
-  - |
-    accelerate launch \
-      --config_file=fsdp1.yaml \
-      -m axolotl.cli.train qlora-fsdp-70b.yaml \
-      --hub-model-id $HUB_MODEL_ID \
-      --output-dir /checkpoints/qlora-llama3-70b \
-      --wandb-project $WANDB_PROJECT \
-      --wandb-name $DSTACK_RUN_NAME \
-      --main_process_ip=$DSTACK_MASTER_NODE_IP \
-      --main_process_port=8008 \
-      --machine_rank=$DSTACK_NODE_RANK \
-      --num_processes=$DSTACK_GPUS_NUM \
-      --num_machines=$DSTACK_NODES_NUM
-
-resources:
-  gpu: 80GB:8
-  shm_size: 128GB
-
-volumes:
-  - /checkpoints:/checkpoints
-```
-</div>
-
-!!! info "Docker image"
-    We are using `nvcr.io/nvidia/pytorch:25.01-py3` from NGC because it includes the necessary libraries and packages for RDMA and InfiniBand support.
-
-### Apply the configuration
-
-To run a configuration, use the [`dstack apply`](../../docs/reference/cli/dstack/apply.md) command.
-
-<div class="termy">
-
-```shell
-$ HF_TOKEN=...
-$ WANDB_API_KEY=...
-$ WANDB_PROJECT=...
-$ HUB_MODEL_ID=...
-$ dstack apply -f train-distrib.dstack.yml
-
- #  BACKEND       RESOURCES                       INSTANCE TYPE  PRICE
- 1  ssh (remote)  cpu=208 mem=1772GB H100:80GB:8  instance       $0     idle
- 2  ssh (remote)  cpu=208 mem=1772GB H100:80GB:8  instance       $0     idle
-
-Submit the run axolotl-multi-node-qlora-llama3-70b? [y/n]: y
-
-Provisioning...
----> 100%
-```
-</div>
-
-!!! info "What's next?"
-    1. Check [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md),
-       [services](../../docs/concepts/services.md), and [fleets](../../docs/concepts/fleets.md)
-    2. Read about [cluster placement](../../docs/concepts/fleets.md#cluster-placement)
diff --git a/docs/examples/distributed-training/trl.md b/docs/examples/distributed-training/trl.md
deleted file mode 100644
index 3a25c04b48..0000000000
--- a/docs/examples/distributed-training/trl.md
+++ /dev/null
@@ -1,160 +0,0 @@
----
-title: TRL
-description: Distributed fine-tuning with TRL, Accelerate, and DeepSpeed
----
-
-# TRL
-
-This example walks you through how to run distributed fine-tune using [TRL](https://github.com/huggingface/trl), [Accelerate](https://github.com/huggingface/accelerate) and [Deepspeed](https://github.com/deepspeedai/DeepSpeed).
-
-!!! info "Prerequisites"
-    Before running a distributed task, make sure to create a fleet with `placement` set to `cluster` (can be a [managed fleet](../../docs/concepts/fleets.md#cluster-placement) or an [SSH fleet](../../docs/concepts/fleets.md#ssh-placement)).
-
-## Define a configuration
-
-Once the fleet is created, define a distributed task configuration. Here's an example of such a task.
-
-=== "FSDP"
-
-    <div editor-title="train-distrib.dstack.yml">
-    ```yaml
-    type: task
-    name: trl-train-fsdp-distrib
-
-    nodes: 2
-
-    image: nvcr.io/nvidia/pytorch:25.01-py3
-
-    env:
-      - HF_TOKEN
-      - ACCELERATE_LOG_LEVEL=info
-      - WANDB_API_KEY
-      - MODEL_ID=meta-llama/Llama-3.1-8B
-      - HUB_MODEL_ID
-
-    commands:
-      - pip install transformers bitsandbytes peft wandb
-      - git clone https://github.com/huggingface/trl
-      - cd trl
-      - pip install .
-      - |
-        accelerate launch \
-          --config_file=examples/accelerate_configs/fsdp1.yaml \
-          --main_process_ip=$DSTACK_MASTER_NODE_IP \
-          --main_process_port=8008 \
-          --machine_rank=$DSTACK_NODE_RANK \
-          --num_processes=$DSTACK_GPUS_NUM \
-          --num_machines=$DSTACK_NODES_NUM \
-          trl/scripts/sft.py \
-          --model_name $MODEL_ID \
-          --dataset_name OpenAssistant/oasst_top1_2023-08-25 \
-          --dataset_text_field="text" \
-          --per_device_train_batch_size 1 \
-          --per_device_eval_batch_size 1 \
-          --gradient_accumulation_steps 4 \
-          --learning_rate 2e-4 \
-          --report_to wandb \
-          --bf16 \
-          --max_seq_length 1024 \
-          --attn_implementation flash_attention_2 \
-          --logging_steps=10 \
-          --output_dir /checkpoints/llama31-ft \
-          --hub_model_id $HUB_MODEL_ID \
-          --torch_dtype bfloat16
-
-    resources:
-      gpu: 80GB:8
-      shm_size: 128GB
-
-    volumes:
-      - /checkpoints:/checkpoints
-    ```
-    </div>
-
-=== "Deepseed ZeRO-3"
-
-    <div editor-title="train-distrib.dstack.yml">
-    ```yaml
-    type: task
-    name: trl-train-deepspeed-distrib
-
-    nodes: 2
-
-    image: nvcr.io/nvidia/pytorch:25.01-py3
-
-    env:
-      - HF_TOKEN
-      - WANDB_API_KEY
-      - HUB_MODEL_ID
-      - MODEL_ID=meta-llama/Llama-3.1-8B
-      - ACCELERATE_LOG_LEVEL=info
-
-    commands:
-      - pip install transformers bitsandbytes peft wandb deepspeed
-      - git clone https://github.com/huggingface/trl
-      - cd trl
-      - pip install .
-      - |
-        accelerate launch \
-          --config_file=examples/accelerate_configs/deepspeed_zero3.yaml \
-          --main_process_ip=$DSTACK_MASTER_NODE_IP \
-          --main_process_port=8008 \
-          --machine_rank=$DSTACK_NODE_RANK \
-          --num_processes=$DSTACK_GPUS_NUM \
-          --num_machines=$DSTACK_NODES_NUM \
-          trl/scripts/sft.py \
-          --model_name $MODEL_ID \
-          --dataset_name OpenAssistant/oasst_top1_2023-08-25 \
-          --dataset_text_field="text" \
-          --per_device_train_batch_size 1 \
-          --per_device_eval_batch_size 1 \
-          --gradient_accumulation_steps 4 \
-          --learning_rate 2e-4 \
-          --report_to wandb \
-          --bf16 \
-          --max_seq_length 1024 \
-          --attn_implementation flash_attention_2 \
-          --logging_steps=10 \
-          --output_dir /checkpoints/llama31-ft \
-          --hub_model_id $HUB_MODEL_ID \
-          --torch_dtype bfloat16
-
-    resources:
-      gpu: 80GB:8
-      shm_size: 128GB
-
-    volumes:
-      - /checkpoints:/checkpoints
-    ```
-    </div>
-
-!!! info "Docker image"
-    We are using `nvcr.io/nvidia/pytorch:25.01-py3` from NGC because it includes the necessary libraries and packages for RDMA and InfiniBand support.
-
-### Apply the configuration
-
-To run a configuration, use the [`dstack apply`](../../docs/reference/cli/dstack/apply.md) command.
-
-<div class="termy">
-
-```shell
-$ HF_TOKEN=...
-$ WANDB_API_KEY=...
-$ HUB_MODEL_ID=...
-$ dstack apply -f train-distrib.dstack.yml
-
- #  BACKEND       RESOURCES                       INSTANCE TYPE  PRICE
- 1  ssh (remote)  cpu=208 mem=1772GB H100:80GB:8  instance       $0     idle
- 2  ssh (remote)  cpu=208 mem=1772GB H100:80GB:8  instance       $0     idle
-
-Submit the run trl-train-fsdp-distrib? [y/n]: y
-
-Provisioning...
----> 100%
-```
-</div>
-
-!!! info "What's next?"
-    1. Check [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md),
-       [services](../../docs/concepts/services.md), and [fleets](../../docs/concepts/fleets.md)
-    2. Read about [cluster placement](../../docs/concepts/fleets.md#cluster-placement)
diff --git a/docs/examples/single-node-training/axolotl.md b/docs/examples/single-node-training/axolotl.md
deleted file mode 100644
index 3ab19d0502..0000000000
--- a/docs/examples/single-node-training/axolotl.md
+++ /dev/null
@@ -1,100 +0,0 @@
----
-title: Axolotl
-description: Fine-tuning models with Axolotl using FSDP and QLoRA
----
-
-# Axolotl
-
-This example shows how to use [Axolotl](https://github.com/OpenAccess-AI-Collective/axolotl) with `dstack` to fine-tune 4-bit Quantized `Llama-4-Scout-17B-16E` using SFT with FSDP and QLoRA.
-
-??? info "Prerequisites"
-    Once `dstack` is [installed](../../docs/installation.md), clone the repo with examples.
-
-    <div class="termy">
- 
-    ```shell
-    $ git clone https://github.com/dstackai/dstack
-    $ cd dstack
-    ```
- 
-    </div>
-
-## Define a configuration
-
-Axolotl reads the model, QLoRA, and dataset arguments, as well as trainer configuration from a [`scout-qlora-flexattn-fsdp2.yaml`](https://github.com/axolotl-ai-cloud/axolotl/blob/main/examples/llama-4/scout-qlora-flexattn-fsdp2.yaml) file. The configuration uses 4-bit axolotl quantized version of `meta-llama/Llama-4-Scout-17B-16E`, requiring only ~43GB VRAM/GPU with 4K context length.
-
-Below is a task configuration that does fine-tuning.
-
-<div editor-title="train.dstack.yml">
-
-```yaml
-type: task
-# The name is optional, if not specified, generated randomly
-name: axolotl-nvidia-llama-scout-train
-
-# Using the official Axolotl's Docker image
-image: axolotlai/axolotl:main-latest
-
-# Required environment variables
-env:
-  - HF_TOKEN
-  - WANDB_API_KEY
-  - WANDB_PROJECT
-  - HUB_MODEL_ID
-# Commands of the task
-commands:
-  - wget https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/examples/llama-4/scout-qlora-flexattn-fsdp2.yaml
-  - |
-    axolotl train scout-qlora-flexattn-fsdp2.yaml \
-      --wandb-project $WANDB_PROJECT \
-      --wandb-name $DSTACK_RUN_NAME \
-      --hub-model-id $HUB_MODEL_ID
-
-resources:
-  # Four GPU (required by FSDP)
-  gpu: H100:4
-  # Shared memory size for inter-process communication
-  shm_size: 64GB
-  disk: 500GB..
-```
-
-</div>
-
-The task uses Axolotl's Docker image, where Axolotl is already pre-installed.
-
-!!! info "AMD"
-    The example above uses NVIDIA accelerators. To use it with AMD, check out [AMD](../accelerators/amd.md#axolotl).
-
-## Run the configuration
-
-Once the configuration is ready, run `dstack apply -f <configuration file>`, and `dstack` will automatically provision the
-cloud resources and run the configuration.
-
-<div class="termy">
-
-```shell
-$ HF_TOKEN=...
-$ WANDB_API_KEY=...
-$ WANDB_PROJECT=...
-$ HUB_MODEL_ID=...
-$ dstack apply -f train.dstack.yml
-
- #  BACKEND              RESOURCES                     INSTANCE TYPE  PRICE
- 1  vastai (cz-czechia)  cpu=64 mem=128GB H100:80GB:2  18794506       $3.8907
- 2  vastai (us-texas)    cpu=52 mem=64GB  H100:80GB:2  20442365       $3.6926
- 3  vastai (fr-france)   cpu=64 mem=96GB  H100:80GB:2  20379984       $3.7389
-
-Submit the run axolotl-nvidia-llama-scout-train? [y/n]:
-
-Provisioning...
----> 100%
-```
-
-</div>
-
-## What's next?
-
-1. Browse the [Axolotl distributed training](../distributed-training/axolotl.md) example
-2. Check [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md),
-   [services](../../docs/concepts/services.md), [fleets](../../docs/concepts/fleets.md)
-3. See the [AMD](../accelerators/amd.md#axolotl) example
diff --git a/docs/examples/single-node-training/trl.md b/docs/examples/single-node-training/trl.md
deleted file mode 100644
index 7295055259..0000000000
--- a/docs/examples/single-node-training/trl.md
+++ /dev/null
@@ -1,116 +0,0 @@
----
-title: TRL
-description: Fine-tuning Llama with TRL using SFT and QLoRA
----
-
-# TRL
-
-This example walks you through how to use [TRL](https://github.com/huggingface/trl) to fine-tune `Llama-3.1-8B` with `dstack` using SFT with QLoRA.
-
-## Define a configuration
-
-Below is a task configuration that does fine-tuning.
-
-<div editor-title="train.dstack.yml"> 
-
-```yaml
-type: task
-name: trl-train
-
-python: 3.12
-# Ensure nvcc is installed (req. for Flash Attention) 
-nvcc: true
-
-env:
-  - HF_TOKEN
-  - WANDB_API_KEY
-  - HUB_MODEL_ID
-commands:
-  # Pin torch==2.6.0 to avoid building Flash Attention from source.
-  # Prebuilt Flash Attention wheels are not available for the latest torch==2.7.0.
-  - uv pip install torch==2.6.0
-  - uv pip install transformers bitsandbytes peft wandb
-  - uv pip install flash_attn --no-build-isolation
-  - git clone https://github.com/huggingface/trl
-  - cd trl
-  - uv pip install .
-  - |
-    accelerate launch \
-      --config_file=examples/accelerate_configs/multi_gpu.yaml \
-      --num_processes $DSTACK_GPUS_PER_NODE \
-      trl/scripts/sft.py \
-      --model_name meta-llama/Meta-Llama-3.1-8B \
-      --dataset_name OpenAssistant/oasst_top1_2023-08-25 \
-      --dataset_text_field="text" \
-      --per_device_train_batch_size 1 \
-      --per_device_eval_batch_size 1 \
-      --gradient_accumulation_steps 4 \
-      --learning_rate 2e-4 \
-      --report_to wandb \
-      --bf16 \
-      --max_seq_length 1024 \
-      --lora_r 16 \
-      --lora_alpha 32 \
-      --lora_target_modules q_proj k_proj v_proj o_proj \
-      --load_in_4bit \
-      --use_peft \
-      --attn_implementation "flash_attention_2" \
-      --logging_steps=10 \
-      --output_dir models/llama31 \
-      --hub_model_id peterschmidt85/FineLlama-3.1-8B
-
-resources:
-  gpu:
-    # 24GB or more VRAM
-    memory: 24GB..
-    # One or more GPU
-    count: 1..
-  # Shared memory (for multi-gpu)
-  shm_size: 24GB
-```
-
-</div>
-
-Change the `resources` property to specify more GPUs.
-
-!!! info "AMD"
-    The example above uses NVIDIA accelerators. To use it with AMD, check out [AMD](../accelerators/amd.md#trl).
-
-??? info "DeepSpeed"
-    For more memory-efficient use of multiple GPUs, consider using DeepSpeed and ZeRO Stage 3.
-
-    To do this, use the `examples/accelerate_configs/deepspeed_zero3.yaml` configuration file instead of 
-    `examples/accelerate_configs/multi_gpu.yaml`.
-
-## Run the configuration
-
-Once the configuration is ready, run `dstack apply -f <configuration file>`, and `dstack` will automatically provision the
-cloud resources and run the configuration.
-
-<div class="termy">
-
-```shell
-$ HF_TOKEN=...
-$ WANDB_API_KEY=...
-$ HUB_MODEL_ID=...
-$ dstack apply -f train.dstack.yml
-
- #  BACKEND              RESOURCES                     INSTANCE TYPE  PRICE     
- 1  vastai (cz-czechia)  cpu=64 mem=128GB H100:80GB:2  18794506       $3.8907   
- 2  vastai (us-texas)    cpu=52 mem=64GB  H100:80GB:2  20442365       $3.6926   
- 3  vastai (fr-france)   cpu=64 mem=96GB  H100:80GB:2  20379984       $3.7389
-
-Submit the run trl-train? [y/n]:
-
-Provisioning...
----> 100%
-```
-
-</div>
-
-## What's next?
-
-1. Browse the [TRL distributed training](../distributed-training/trl.md) example
-2. Check [dev environments](../../docs/concepts/dev-environments.md), [tasks](../../docs/concepts/tasks.md), 
-   [services](../../docs/concepts/services.md), and [fleets](../../docs/concepts/fleets.md)
-3. See the [AMD](../accelerators/amd.md#trl) example 
diff --git a/docs/overrides/main.html b/docs/overrides/main.html
index 3ae52c2be3..805495b0b1 100644
--- a/docs/overrides/main.html
+++ b/docs/overrides/main.html
@@ -223,11 +223,10 @@
               
               <div class="tx-footer__section">
                 <div class="tx-footer__section-title">Examples</div>
-                <a href="/examples#fine-tuning" class="tx-footer__section-link">Single-node training</a>
-                <a href="/examples#distributed-training" class="tx-footer__section-link">Distributed training</a>
-                <a href="/examples#clusters" class="tx-footer__section-link">Clusters</a>
-                <a href="/examples#inference" class="tx-footer__section-link">Inference</a>
-                <a href="/examples#models" class="tx-footer__section-link">Models</a>
+                <a href="/docs/examples/training/trl/" class="tx-footer__section-link">Training</a>
+                <a href="/docs/examples/clusters/aws/" class="tx-footer__section-link">Clusters</a>
+                <a href="/docs/examples/inference/sglang/" class="tx-footer__section-link">Inference</a>
+                <a href="/docs/examples/models/deepseek-v4/" class="tx-footer__section-link">Models</a>
               </div>
               
               <div class="tx-footer__section">
diff --git a/mkdocs.yml b/mkdocs.yml
index 6c82dd15a5..0b993b8b8f 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -98,43 +98,71 @@ plugins:
         "docs/tasks.md": "docs/concepts/tasks.md"
         "docs/services.md": "docs/concepts/services.md"
         "docs/fleets.md": "docs/concepts/fleets.md"
-        "docs/examples/llms/llama31.md": "examples/inference/vllm.md"
-        "docs/examples/llms/llama32.md": "examples/inference/vllm.md"
-        "docs/examples/llms/qwen36.md": "examples/models/qwen36.md"
-        "examples/llms/llama31/index.md": "examples/inference/vllm.md"
-        "examples/llms/llama32/index.md": "examples/inference/vllm.md"
-        "examples/llms/qwen36/index.md": "examples/models/qwen36.md"
-        "docs/examples/accelerators/amd/index.md": "examples/accelerators/amd.md"
-        "docs/examples/deployment/nim/index.md": "examples/inference/nim.md"
-        "docs/examples/deployment/vllm/index.md": "examples/inference/vllm.md"
+        "docs/examples/llms/llama31.md": "docs/examples/inference/vllm.md"
+        "docs/examples/llms/llama32.md": "docs/examples/inference/vllm.md"
+        "docs/examples/llms/qwen36.md": "docs/examples/models/qwen36.md"
+        "examples/llms/llama31/index.md": "docs/examples/inference/vllm.md"
+        "examples/llms/llama32/index.md": "docs/examples/inference/vllm.md"
+        "examples/llms/qwen36/index.md": "docs/examples/models/qwen36.md"
+        "docs/examples/accelerators/amd/index.md": "docs/examples/accelerators/amd.md"
+        "docs/examples/deployment/nim/index.md": "docs/examples/inference/nim.md"
+        "docs/examples/deployment/vllm/index.md": "docs/examples/inference/vllm.md"
         "backends.md": "docs/concepts/backends.md"
         "blog/monitoring-gpu-usage.md": "blog/posts/dstack-metrics.md"
         "blog/inactive-dev-environments-auto-shutdown.md": "blog/posts/inactivity-duration.md"
         "blog/data-centers-and-private-clouds.md": "blog/posts/gpu-blocks-and-proxy-jump.md"
-        "blog/distributed-training-with-aws-efa.md": "examples/clusters/aws.md"
+        "blog/distributed-training-with-aws-efa.md": "docs/examples/clusters/aws.md"
         "blog/dstack-stats.md": "blog/posts/dstack-metrics.md"
         "docs/guides/metrics.md": "docs/concepts/metrics.md"
         "docs/guides/monitoring.md": "docs/concepts/metrics.md"
         "blog/nvidia-and-amd-on-vultr.md.md": "blog/posts/nvidia-and-amd-on-vultr.md"
-        "examples/misc/nccl-tests/index.md": "examples/clusters/nccl-rccl-tests.md"
-        "examples/misc/a3high-clusters/index.md": "examples/clusters/gcp.md"
-        "examples/misc/a3mega-clusters/index.md": "examples/clusters/gcp.md"
-        "examples/distributed-training/nccl-tests/index.md": "examples/clusters/nccl-rccl-tests.md"
-        "examples/distributed-training/rccl-tests/index.md": "examples/clusters/nccl-rccl-tests.md"
-        "examples/deployment/nim/index.md": "examples/inference/nim.md"
-        "examples/deployment/vllm/index.md": "examples/inference/vllm.md"
-        "examples/deployment/sglang/index.md": "examples/inference/sglang.md"
-        "examples/deployment/trtllm/index.md": "examples/inference/trtllm.md"
-        "examples/fine-tuning/trl/index.md": "examples/single-node-training/trl.md"
-        "examples/fine-tuning/axolotl/index.md": "examples/single-node-training/axolotl.md"
-        "blog/efa.md": "examples/clusters/aws.md"
+        "examples/misc/nccl-tests/index.md": "docs/examples/clusters/nccl-rccl-tests.md"
+        "examples/misc/a3high-clusters/index.md": "docs/examples/clusters/gcp.md"
+        "examples/misc/a3mega-clusters/index.md": "docs/examples/clusters/gcp.md"
+        "examples/distributed-training/nccl-tests/index.md": "docs/examples/clusters/nccl-rccl-tests.md"
+        "examples/distributed-training/rccl-tests/index.md": "docs/examples/clusters/nccl-rccl-tests.md"
+        "examples/deployment/nim/index.md": "docs/examples/inference/nim.md"
+        "examples/deployment/vllm/index.md": "docs/examples/inference/vllm.md"
+        "examples/deployment/sglang/index.md": "docs/examples/inference/sglang.md"
+        "examples/deployment/trtllm/index.md": "docs/examples/inference/trtllm.md"
+        "examples/fine-tuning/trl/index.md": "docs/examples/training/trl.md"
+        "examples/fine-tuning/axolotl/index.md": "docs/examples/training/axolotl.md"
+        "blog/efa.md": "docs/examples/clusters/aws.md"
         "docs/concepts/repos.md": "docs/concepts/dev-environments.md#repos"
-        "examples/clusters/a3high/index.md": "examples/clusters/gcp.md"
-        "examples/clusters/a3mega/index.md": "examples/clusters/gcp.md"
-        "examples/clusters/a4/index.md": "examples/clusters/gcp.md"
-        "examples/clusters/efa/index.md": "examples/clusters/aws.md"
+        "examples/clusters/a3high/index.md": "docs/examples/clusters/gcp.md"
+        "examples/clusters/a3mega/index.md": "docs/examples/clusters/gcp.md"
+        "examples/clusters/a4/index.md": "docs/examples/clusters/gcp.md"
+        "examples/clusters/efa/index.md": "docs/examples/clusters/aws.md"
         "docs/guides/migration.md": "docs/guides/upgrade.md"
         "docs/reference/api/rest/index.md": "docs/reference/api/http/index.md"
+        # Examples moved from /examples/ to /docs/examples/
+        "examples.md": "docs/examples.md"
+        "examples/single-node-training/trl.md": "docs/examples/training/trl.md"
+        "examples/single-node-training/axolotl.md": "docs/examples/training/axolotl.md"
+        "examples/distributed-training/trl.md": "docs/examples/training/trl.md"
+        "examples/distributed-training/axolotl.md": "docs/examples/training/axolotl.md"
+        "examples/distributed-training/ray-ragen.md": "docs/examples/training/ray-ragen.md"
+        # Single-node and distributed training merged under Training
+        "docs/examples/single-node-training/trl.md": "docs/examples/training/trl.md"
+        "docs/examples/single-node-training/axolotl.md": "docs/examples/training/axolotl.md"
+        "docs/examples/distributed-training/trl.md": "docs/examples/training/trl.md"
+        "docs/examples/distributed-training/axolotl.md": "docs/examples/training/axolotl.md"
+        "docs/examples/distributed-training/ray-ragen.md": "docs/examples/training/ray-ragen.md"
+        "examples/clusters/aws.md": "docs/examples/clusters/aws.md"
+        "examples/clusters/gcp.md": "docs/examples/clusters/gcp.md"
+        "examples/clusters/lambda.md": "docs/examples/clusters/lambda.md"
+        "examples/clusters/crusoe.md": "docs/examples/clusters/crusoe.md"
+        "examples/clusters/nebius.md": "docs/examples/clusters/nebius.md"
+        "examples/clusters/nccl-rccl-tests.md": "docs/examples/clusters/nccl-rccl-tests.md"
+        "examples/inference/sglang.md": "docs/examples/inference/sglang.md"
+        "examples/inference/vllm.md": "docs/examples/inference/vllm.md"
+        "examples/inference/nim.md": "docs/examples/inference/nim.md"
+        "examples/inference/trtllm.md": "docs/examples/inference/trtllm.md"
+        "examples/models/deepseek-v4.md": "docs/examples/models/deepseek-v4.md"
+        "examples/models/qwen36.md": "docs/examples/models/qwen36.md"
+        "examples/accelerators/amd.md": "docs/examples/accelerators/amd.md"
+        "examples/accelerators/tpu.md": "docs/examples/accelerators/tpu.md"
+        "examples/accelerators/tenstorrent.md": "docs/examples/accelerators/tenstorrent.md"
   - typeset
   - gen-files:
       # TODO: convert these to hooks (schema reference migrated)
@@ -247,6 +275,30 @@ nav:
             - Upgrade: docs/guides/upgrade.md
             - Migration: 
               - Slurm: docs/guides/migration/slurm.md
+      - Examples:
+        - Training:
+            - TRL: docs/examples/training/trl.md
+            - Axolotl: docs/examples/training/axolotl.md
+            - Ray+RAGEN: docs/examples/training/ray-ragen.md
+        - Clusters:
+            - AWS: docs/examples/clusters/aws.md
+            - GCP: docs/examples/clusters/gcp.md
+            - Lambda: docs/examples/clusters/lambda.md
+            - Crusoe: docs/examples/clusters/crusoe.md
+            - Nebius: docs/examples/clusters/nebius.md
+            - NCCL/RCCL tests: docs/examples/clusters/nccl-rccl-tests.md
+        - Inference:
+            - SGLang: docs/examples/inference/sglang.md
+            - vLLM: docs/examples/inference/vllm.md
+            - NIM: docs/examples/inference/nim.md
+            - TensorRT-LLM: docs/examples/inference/trtllm.md
+        - Models:
+            - DeepSeek V4: docs/examples/models/deepseek-v4.md
+            - Qwen 3.6: docs/examples/models/qwen36.md
+        - Accelerators:
+            - AMD: docs/examples/accelerators/amd.md
+            - TPU: docs/examples/accelerators/tpu.md
+            - Tenstorrent: docs/examples/accelerators/tenstorrent.md
       - Reference:
           - .dstack.yml:
               - dev-environment: docs/reference/dstack.yml/dev-environment.md
@@ -286,34 +338,6 @@ nav:
               - REST API: docs/reference/plugins/rest/index.md
           - llms-full.txt: https://dstack.ai/llms-full.txt
           - skill.md: https://dstack.ai/skill.md
-  - Examples:
-      - examples.md
-      - Single-node training:
-          - TRL: examples/single-node-training/trl.md
-          - Axolotl: examples/single-node-training/axolotl.md
-      - Distributed training:
-          - TRL: examples/distributed-training/trl.md
-          - Axolotl: examples/distributed-training/axolotl.md
-          - Ray+RAGEN: examples/distributed-training/ray-ragen.md
-      - Clusters:
-          - AWS: examples/clusters/aws.md
-          - GCP: examples/clusters/gcp.md
-          - Lambda: examples/clusters/lambda.md
-          - Crusoe: examples/clusters/crusoe.md
-          - Nebius: examples/clusters/nebius.md
-          - NCCL/RCCL tests: examples/clusters/nccl-rccl-tests.md
-      - Inference:
-          - SGLang: examples/inference/sglang.md
-          - vLLM: examples/inference/vllm.md
-          - NIM: examples/inference/nim.md
-          - TensorRT-LLM: examples/inference/trtllm.md
-      - Models:
-          - DeepSeek V4: examples/models/deepseek-v4.md
-          - Qwen 3.6: examples/models/qwen36.md
-      - Accelerators:
-          - AMD: examples/accelerators/amd.md
-          - TPU: examples/accelerators/tpu.md
-          - Tenstorrent: examples/accelerators/tenstorrent.md
   - Blog:
       - blog/index.md
   - Case studies: blog/case-studies.md
diff --git a/skills/dstack/SKILL.md b/skills/dstack/SKILL.md
index 5acd09c546..9400e1af34 100644
--- a/skills/dstack/SKILL.md
+++ b/skills/dstack/SKILL.md
@@ -546,8 +546,8 @@ Common issues:
 - [Pro tips](https://dstack.ai/docs/guides/protips.md)
 
 **Accelerator-specific examples:**
-- [AMD](https://dstack.ai/examples/accelerators/amd/index.md)
-- [Google TPU](https://dstack.ai/examples/accelerators/tpu/index.md)
-- [Tenstorrent](https://dstack.ai/examples/accelerators/tenstorrent/index.md)
+- [AMD](https://dstack.ai/docs/examples/accelerators/amd.md)
+- [Google TPU](https://dstack.ai/docs/examples/accelerators/tpu.md)
+- [Tenstorrent](https://dstack.ai/docs/examples/accelerators/tenstorrent.md)
 
 **Full documentation:** https://dstack.ai/llms-full.txt

From ad306249b8e2e68ecd136b487cc439eec905c112 Mon Sep 17 00:00:00 2001
From: Andrey Cheptsov <andrey.cheptsov@github.com>
Date: Wed, 6 May 2026 22:38:52 +0200
Subject: [PATCH 3/7] Drop redirect that collides with the real AMD example
 page

The redirect source 'docs/examples/accelerators/amd/index.md' is now the
canonical URL of the moved AMD example page, so the redirect plugin was
overwriting the real page's index.html with a self-referencing redirect,
producing an infinite loop.
---
 mkdocs.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mkdocs.yml b/mkdocs.yml
index 0b993b8b8f..29815a7fdb 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -104,7 +104,6 @@ plugins:
         "examples/llms/llama31/index.md": "docs/examples/inference/vllm.md"
         "examples/llms/llama32/index.md": "docs/examples/inference/vllm.md"
         "examples/llms/qwen36/index.md": "docs/examples/models/qwen36.md"
-        "docs/examples/accelerators/amd/index.md": "docs/examples/accelerators/amd.md"
         "docs/examples/deployment/nim/index.md": "docs/examples/inference/nim.md"
         "docs/examples/deployment/vllm/index.md": "docs/examples/inference/vllm.md"
         "backends.md": "docs/concepts/backends.md"

From e559d588f0a65ac01c38e7da132838cae3730426 Mon Sep 17 00:00:00 2001
From: Andrey Cheptsov <andrey.cheptsov@github.com>
Date: Wed, 6 May 2026 22:40:27 +0200
Subject: [PATCH 4/7] Add Accelerators link to footer Examples section

---
 docs/overrides/main.html | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/overrides/main.html b/docs/overrides/main.html
index 805495b0b1..342e29303b 100644
--- a/docs/overrides/main.html
+++ b/docs/overrides/main.html
@@ -227,6 +227,7 @@
                 <a href="/docs/examples/clusters/aws/" class="tx-footer__section-link">Clusters</a>
                 <a href="/docs/examples/inference/sglang/" class="tx-footer__section-link">Inference</a>
                 <a href="/docs/examples/models/deepseek-v4/" class="tx-footer__section-link">Models</a>
+                <a href="/docs/examples/accelerators/amd/" class="tx-footer__section-link">Accelerators</a>
               </div>
               
               <div class="tx-footer__section">

From f37a7a8b108eec3ffd7681696d825664833c4904 Mon Sep 17 00:00:00 2001
From: Andrey Cheptsov <andrey.cheptsov@github.com>
Date: Wed, 6 May 2026 22:42:18 +0200
Subject: [PATCH 5/7] Re-ordered top menu (moved Blog to the end)

---
 mkdocs.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mkdocs.yml b/mkdocs.yml
index 29815a7fdb..09318ab169 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -337,10 +337,10 @@ nav:
               - REST API: docs/reference/plugins/rest/index.md
           - llms-full.txt: https://dstack.ai/llms-full.txt
           - skill.md: https://dstack.ai/skill.md
-  - Blog:
-      - blog/index.md
   - Case studies: blog/case-studies.md
   - Benchmarks: blog/benchmarks.md
+  - Blog:
+      - blog/index.md
   - Discord: https://discord.gg/u8SmfwPpMd" target="_blank
 #  - Changelog: https://github.com/dstackai/dstack/releases" target="_blank
 #  - GitHub: https://github.com/dstackai/dstack" target="_blank

From 5f5f9a632525a8f1deccb4dde2028662a20c8a54 Mon Sep 17 00:00:00 2001
From: Andrey Cheptsov <andrey.cheptsov@github.com>
Date: Thu, 7 May 2026 11:49:14 +0200
Subject: [PATCH 6/7] Rename docs/ to mkdocs/ and extract just recipes

The mkdocs source dir is now mkdocs/, and the previously-confusing
docs/docs/ nesting becomes mkdocs/docs/ (read: "in mkdocs sources,
the /docs/ URL section").

- mkdocs.yml: explicit docs_dir: mkdocs, plus custom_dir, cards_layout_dir,
  pymdownx.snippets.base_path, edit_uri all repointed to mkdocs/
- scripts/docs/{gen_openapi_reference,gen_rest_plugin_spec_reference}.py
  output paths repointed to mkdocs/docs/...; also write a trailing
  newline so end-of-file-fixer doesn't fight with mkdocs serve
- .gitignore: openapi.json patterns repointed to mkdocs/docs/...; untrack
  the three generated openapi.json files (they're regenerated by the
  build and shouldn't be in git)
- .github/workflows/build.yml path filter updated to mkdocs/**
- README.md, AGENTS.md, contributing/{DOCS,BACKENDS}.md prose updated
- .justfile: extract docs recipes into mkdocs/.justfile (mkdocs-serve,
  mkdocs-build) following the same pattern as runner/ and frontend/;
  drop -w examples (no longer needed) and add --livereload to work
  around mkdocs live-reload bugs
---
 .github/workflows/build.yml                   |   2 +-
 .gitignore                                    |   7 +--
 .justfile                                     |   7 +--
 AGENTS.md                                     |   2 +-
 README.md                                     |   4 +-
 contributing/BACKENDS.md                      |   4 +-
 contributing/DOCS.md                          |  40 ++++++++++--------
 mkdocs.yml                                    |  11 +++--
 mkdocs/.justfile                              |  15 +++++++
 {docs => mkdocs}/.nojekyll                    |   0
 {docs => mkdocs}/CNAME                        |   0
 .../assets/fonts/Geist-Variable.woff2         | Bin
 .../assets/fonts/GeistMono-Variable.woff2     | Bin
 .../assets/fonts/GeistPixel-Circle.woff2      | Bin
 .../assets/fonts/GeistPixel-Square.woff2      | Bin
 {docs => mkdocs}/assets/images/aws-logo.svg   |   0
 {docs => mkdocs}/assets/images/azure-logo.svg |   0
 .../assets/images/cats-in-hats.png            | Bin
 .../assets/images/chevron-down.svg            |   0
 .../assets/images/crusoe-logo.svg             |   0
 {docs => mkdocs}/assets/images/cudo-logo.svg  |   0
 .../assets/images/datacrunch-logo.svg         |   0
 .../assets/images/discord-logo.svg            |   0
 {docs => mkdocs}/assets/images/dstack-cli.png | Bin
 .../assets/images/dstack-code-dark.png        | Bin
 .../images/dstack-dev-environments-code.png   | Bin
 .../assets/images/dstack-dolly-code.png       | Bin
 .../assets/images/dstack-dolly.png            | Bin
 .../assets/images/dstack-fav-32.ico           | Bin
 .../images/dstack-fine-tuning-diagram.png     | Bin
 .../assets/images/dstack-finetuning-hf.png    | Bin
 .../assets/images/dstack-finetuning-wandb.png | Bin
 .../assets/images/dstack-google-colab.png     | Bin
 .../assets/images/dstack-gradio-falcon.png    | Bin
 .../images/dstack-hub-create-aws-project.png  | Bin
 .../dstack-hub-create-azure-project.png       | Bin
 .../images/dstack-hub-create-gcp-project.png  | Bin
 .../dstack-hub-create-lambda-project.png      | Bin
 .../images/dstack-hub-create-project.png      | Bin
 .../assets/images/dstack-hub-edit-gateway.png | Bin
 .../images/dstack-hub-view-project-empty.png  | Bin
 .../assets/images/dstack-hub-view-project.png | Bin
 .../images/dstack-huggingface-space.png       | Bin
 .../assets/images/dstack-jupyterlab.png       | Bin
 .../assets/images/dstack-lambda-api-key.png   | Bin
 .../images/dstack-llmchat-discord-chat.png    | Bin
 .../assets/images/dstack-llmchat-gallery.png  | Bin
 .../assets/images/dstack-logo-dark.svg        |   0
 .../assets/images/dstack-logo-notext.png      | Bin
 .../assets/images/dstack-logo-notext.svg      |   0
 .../assets/images/dstack-logo.svg             |   0
 .../assets/images/dstack-mixtral-chat-ui.png  | Bin
 .../dstack-playground-github-actions-ui.png   | Bin
 .../assets/images/dstack-run-artifacts.png    | Bin
 .../images/dstack-runpod-dev-environment.png  | Bin
 .../assets/images/dstack-server-output-2.png  | Bin
 .../images/dstack-stable-diffusion-code.png   | Bin
 .../assets/images/dstack-stable-diffusion.png | Bin
 .../assets/images/dstack-tensorboard.png      | Bin
 .../assets/images/dstack-train-artifacts.png  | Bin
 .../images/dstack-v010-vscode-desktop.png     | Bin
 .../assets/images/dstack-vscode-jupyter.png   | Bin
 .../assets/images/dstack-vscode.png           | Bin
 {docs => mkdocs}/assets/images/gcp-logo.svg   |   0
 .../assets/images/github-logo.svg             |   0
 .../assets/images/hero_code_background.png    | Bin
 .../assets/images/hotaisle-logo.svg           |   0
 .../assets/images/jb-toolbox-logo.svg         |   0
 .../assets/images/jupyter-logo.svg            |   0
 {docs => mkdocs}/assets/images/kapa.svg       |   0
 .../images/kubernetes-logo-original.svg       |   0
 .../assets/images/kubernetes-logo.svg         |   0
 .../assets/images/lambda-logo.svg             |   0
 {docs => mkdocs}/assets/images/logo.svg       |   0
 {docs => mkdocs}/assets/images/minus.svg      |   0
 .../assets/images/nebius-logo-original.svg    |   0
 .../assets/images/nebius-logo.svg             |   0
 {docs => mkdocs}/assets/images/oci-logo.svg   |   0
 {docs => mkdocs}/assets/images/plus.svg       |   0
 .../assets/images/runpod-logo.svg             |   0
 {docs => mkdocs}/assets/images/ssh-logo.svg   |   0
 .../assets/images/tensordock-logo.svg         |   0
 .../assets/images/tensorwave-logo.svg         |   0
 .../assets/images/vastai-logo.svg             |   0
 {docs => mkdocs}/assets/images/verda-logo.svg |   0
 .../assets/images/vscode-logo.svg             |   0
 {docs => mkdocs}/assets/images/vultr-logo.svg |   0
 {docs => mkdocs}/assets/javascripts/extra.js  |   0
 .../assets/javascripts/pricing.js             |   0
 .../assets/javascripts/termynal.js            |   0
 {docs => mkdocs}/assets/stylesheets/extra.css |   0
 .../assets/stylesheets/landing.css            |   0
 .../assets/stylesheets/pricing.css            |   0
 .../assets/stylesheets/termynal.css           |   0
 {docs => mkdocs}/blog/index.md                |   0
 {docs => mkdocs}/blog/posts/0_20.md           |   0
 .../blog/posts/agentic-orchestration.md       |   0
 .../posts/amd-mi300x-inference-benchmark.md   |   0
 {docs => mkdocs}/blog/posts/amd-on-runpod.md  |   0
 .../blog/posts/amd-on-tensorwave.md           |   0
 ...benchmark-amd-containers-and-partitions.md |   0
 .../blog/posts/benchmark-amd-vms.md           |   0
 .../blog/posts/benchmarking-pd-ratios.md      |   0
 ...d-kubernetes-2024-recap-and-whats-ahead.md |   0
 .../blog/posts/changelog-07-25.md             |   0
 {docs => mkdocs}/blog/posts/cursor.md         |   0
 .../posts/digitalocean-and-amd-dev-cloud.md   |   0
 .../blog/posts/docker-inside-containers.md    |   0
 {docs => mkdocs}/blog/posts/dstack-metrics.md |   0
 .../posts/dstack-sky-own-cloud-accounts.md    |   0
 {docs => mkdocs}/blog/posts/dstack-sky.md     |   0
 {docs => mkdocs}/blog/posts/ea-gtc25.md       |   0
 .../blog/posts/gh200-on-lambda.md             |   0
 .../blog/posts/gpu-blocks-and-proxy-jump.md   |   0
 .../blog/posts/gpu-health-checks.md           |   0
 {docs => mkdocs}/blog/posts/graphsignal.md    |   0
 .../posts/h100-mi300x-inference-benchmark.md  |   0
 .../posts/h200-mi300x-deepskeek-benchmark.md  |   0
 {docs => mkdocs}/blog/posts/hotaisle.md       |   0
 .../posts/images/dstack-diagram-stack-3.png   | Bin
 .../posts/images/dstack-research-banner-2.png | Bin
 .../blog/posts/images/dstack-sky-banner-4.png | Bin
 .../blog/posts/inactivity-duration.md         |   0
 .../blog/posts/instance-volumes.md            |   0
 {docs => mkdocs}/blog/posts/intel-gaudi.md    |   0
 .../blog/posts/kubernetes-beta.md             |   0
 {docs => mkdocs}/blog/posts/metrics-ui.md     |   0
 {docs => mkdocs}/blog/posts/mpi.md            |   0
 .../blog/posts/nebius-in-dstack-sky.md        |   0
 {docs => mkdocs}/blog/posts/nebius.md         |   0
 .../blog/posts/nvidia-and-amd-on-vultr.md     |   0
 .../blog/posts/nvidia-dgx-spark.md            |   0
 .../blog/posts/pd-disaggregation.md           |   0
 {docs => mkdocs}/blog/posts/probes.md         |   0
 {docs => mkdocs}/blog/posts/prometheus.md     |   0
 {docs => mkdocs}/blog/posts/sglang-router.md  |   0
 {docs => mkdocs}/blog/posts/smg.md            |   0
 .../blog/posts/state-of-cloud-gpu-2025.md     |   0
 {docs => mkdocs}/blog/posts/toffee.md         |   0
 {docs => mkdocs}/blog/posts/tpu-on-gcp.md     |   0
 .../blog/posts/volumes-on-runpod.md           |   0
 {docs => mkdocs}/docs/concepts/backends.md    |   0
 .../docs/concepts/dev-environments.md         |   0
 {docs => mkdocs}/docs/concepts/events.md      |   0
 {docs => mkdocs}/docs/concepts/exports.md     |   0
 {docs => mkdocs}/docs/concepts/fleets.md      |   0
 {docs => mkdocs}/docs/concepts/gateways.md    |   0
 {docs => mkdocs}/docs/concepts/metrics.md     |   0
 {docs => mkdocs}/docs/concepts/projects.md    |   0
 {docs => mkdocs}/docs/concepts/secrets.md     |   0
 {docs => mkdocs}/docs/concepts/services.md    |   0
 .../docs/concepts/snippets/manage-fleets.ext  |   0
 .../docs/concepts/snippets/manage-runs.ext    |   0
 {docs => mkdocs}/docs/concepts/tasks.md       |   0
 {docs => mkdocs}/docs/concepts/volumes.md     |   0
 {docs => mkdocs}/docs/examples.md             |   0
 .../docs/examples/accelerators/amd.md         |   0
 .../docs/examples/accelerators/intel/index.md |   0
 .../docs/examples/accelerators/tenstorrent.md |   0
 .../docs/examples/accelerators/tpu.md         |   0
 .../docs/examples/clusters/aws.md             |   0
 .../docs/examples/clusters/crusoe.md          |   0
 .../docs/examples/clusters/gcp.md             |   0
 .../docs/examples/clusters/lambda.md          |   0
 .../docs/examples/clusters/nccl-rccl-tests.md |   0
 .../docs/examples/clusters/nebius.md          |   0
 .../docs/examples/inference/nim.md            |   0
 .../docs/examples/inference/sglang.md         |   0
 .../docs/examples/inference/trtllm.md         |   0
 .../docs/examples/inference/vllm.md           |   0
 .../docs/examples/llms/deepseek/index.md      |   0
 .../docs/examples/llms/llama/index.md         |   0
 .../examples/misc/docker-compose/index.md     |   0
 .../docs/examples/models/deepseek-v4.md       |   0
 .../docs/examples/models/qwen36.md            |   0
 .../docs/examples/models/wan22/index.md       |   0
 .../docs/examples/training/axolotl.md         |   0
 .../docs/examples/training/ray-ragen.md       |   0
 .../docs/examples/training/trl.md             |   0
 .../docs/guides/migration/slurm.md            |   0
 {docs => mkdocs}/docs/guides/protips.md       |   0
 .../docs/guides/server-deployment.md          |   0
 .../docs/guides/troubleshooting.md            |   0
 {docs => mkdocs}/docs/guides/upgrade.md       |   0
 {docs => mkdocs}/docs/index.md                |   0
 {docs => mkdocs}/docs/installation.md         |   0
 {docs => mkdocs}/docs/quickstart.md           |   0
 .../docs/reference/api/http/index.md          |   0
 .../docs/reference/api/python/index.md        |   0
 .../docs/reference/cli/dstack/apply.md        |   0
 .../docs/reference/cli/dstack/attach.md       |   0
 .../docs/reference/cli/dstack/delete.md       |   0
 .../docs/reference/cli/dstack/event.md        |   0
 .../docs/reference/cli/dstack/export.md       |   0
 .../docs/reference/cli/dstack/fleet.md        |   0
 .../docs/reference/cli/dstack/gateway.md      |   0
 .../docs/reference/cli/dstack/import.md       |   0
 .../docs/reference/cli/dstack/init.md         |   0
 .../docs/reference/cli/dstack/login.md        |   0
 .../docs/reference/cli/dstack/logs.md         |   0
 .../docs/reference/cli/dstack/metrics.md      |   0
 .../docs/reference/cli/dstack/offer.md        |   0
 .../docs/reference/cli/dstack/project.md      |   0
 .../docs/reference/cli/dstack/ps.md           |   0
 .../docs/reference/cli/dstack/secret.md       |   0
 .../docs/reference/cli/dstack/server.md       |   0
 .../docs/reference/cli/dstack/stop.md         |   0
 .../docs/reference/cli/dstack/volume.md       |   0
 {docs => mkdocs}/docs/reference/dstack.yml.md |   0
 .../reference/dstack.yml/dev-environment.md   |   0
 .../docs/reference/dstack.yml/fleet.md        |   0
 .../docs/reference/dstack.yml/gateway.md      |   0
 .../docs/reference/dstack.yml/service.md      |   0
 .../docs/reference/dstack.yml/task.md         |   0
 .../docs/reference/dstack.yml/volume.md       |   0
 .../docs/reference/environment-variables.md   |   0
 .../docs/reference/plugins/python/index.md    |   0
 .../docs/reference/plugins/rest/index.md      |   0
 .../docs/reference/profiles.yml.md            |   0
 .../docs/reference/server/config.yml.md       |   0
 {docs => mkdocs}/index.md                     |   0
 {docs => mkdocs}/layouts/custom.yml           |   0
 .../.icons/custom/colored/discord.svg         |   0
 .../.icons/custom/colored/github.svg          |   0
 .../.icons/custom/colored/twitter.svg         |   0
 .../overrides/.icons/custom/github.svg        |   0
 .../overrides/assets/images/github-logo.png   | Bin
 .../overrides/assets/images/hero.svg          |   0
 .../overrides/assets/images/new.svg           |   0
 .../assets/images/quotes/alvarobartt.jpg      | Bin
 .../assets/images/quotes/chansung.jpg         | Bin
 .../assets/images/quotes/cudopete.png         | Bin
 .../overrides/assets/images/quotes/eckart.png | Bin
 .../assets/images/quotes/movchan.jpg          | Bin
 .../overrides/assets/images/quotes/spott.jpg  | Bin
 .../overrides/assets/images/slack.png         | Bin
 .../overrides/assets/images/twitter.png       | Bin
 {docs => mkdocs}/overrides/header-2.html      |   0
 {docs => mkdocs}/overrides/header.html        |   0
 {docs => mkdocs}/overrides/home.html          |   0
 {docs => mkdocs}/overrides/landing.html       |   0
 {docs => mkdocs}/overrides/main.html          |   0
 {docs => mkdocs}/overrides/partials/post.html |   0
 {docs => mkdocs}/overrides/path.html          |   0
 {docs => mkdocs}/overrides/pricing.html       |   0
 {docs => mkdocs}/overrides/privacy.html       |   0
 {docs => mkdocs}/overrides/toc-item.html      |   0
 {docs => mkdocs}/overrides/toc.html           |   0
 {docs => mkdocs}/privacy.md                   |   0
 {docs => mkdocs}/robots.txt                   |   0
 {docs => mkdocs}/terms.md                     |   0
 scripts/docs/gen_openapi_reference.py         |   4 +-
 .../docs/gen_rest_plugin_spec_reference.py    |   4 +-
 253 files changed, 63 insertions(+), 37 deletions(-)
 create mode 100644 mkdocs/.justfile
 rename {docs => mkdocs}/.nojekyll (100%)
 rename {docs => mkdocs}/CNAME (100%)
 rename {docs => mkdocs}/assets/fonts/Geist-Variable.woff2 (100%)
 rename {docs => mkdocs}/assets/fonts/GeistMono-Variable.woff2 (100%)
 rename {docs => mkdocs}/assets/fonts/GeistPixel-Circle.woff2 (100%)
 rename {docs => mkdocs}/assets/fonts/GeistPixel-Square.woff2 (100%)
 rename {docs => mkdocs}/assets/images/aws-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/azure-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/cats-in-hats.png (100%)
 rename {docs => mkdocs}/assets/images/chevron-down.svg (100%)
 rename {docs => mkdocs}/assets/images/crusoe-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/cudo-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/datacrunch-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/discord-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/dstack-cli.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-code-dark.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-dev-environments-code.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-dolly-code.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-dolly.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-fav-32.ico (100%)
 rename {docs => mkdocs}/assets/images/dstack-fine-tuning-diagram.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-finetuning-hf.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-finetuning-wandb.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-google-colab.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-gradio-falcon.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-hub-create-aws-project.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-hub-create-azure-project.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-hub-create-gcp-project.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-hub-create-lambda-project.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-hub-create-project.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-hub-edit-gateway.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-hub-view-project-empty.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-hub-view-project.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-huggingface-space.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-jupyterlab.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-lambda-api-key.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-llmchat-discord-chat.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-llmchat-gallery.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-logo-dark.svg (100%)
 rename {docs => mkdocs}/assets/images/dstack-logo-notext.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-logo-notext.svg (100%)
 rename {docs => mkdocs}/assets/images/dstack-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/dstack-mixtral-chat-ui.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-playground-github-actions-ui.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-run-artifacts.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-runpod-dev-environment.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-server-output-2.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-stable-diffusion-code.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-stable-diffusion.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-tensorboard.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-train-artifacts.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-v010-vscode-desktop.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-vscode-jupyter.png (100%)
 rename {docs => mkdocs}/assets/images/dstack-vscode.png (100%)
 rename {docs => mkdocs}/assets/images/gcp-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/github-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/hero_code_background.png (100%)
 rename {docs => mkdocs}/assets/images/hotaisle-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/jb-toolbox-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/jupyter-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/kapa.svg (100%)
 rename {docs => mkdocs}/assets/images/kubernetes-logo-original.svg (100%)
 rename {docs => mkdocs}/assets/images/kubernetes-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/lambda-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/logo.svg (100%)
 rename {docs => mkdocs}/assets/images/minus.svg (100%)
 rename {docs => mkdocs}/assets/images/nebius-logo-original.svg (100%)
 rename {docs => mkdocs}/assets/images/nebius-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/oci-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/plus.svg (100%)
 rename {docs => mkdocs}/assets/images/runpod-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/ssh-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/tensordock-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/tensorwave-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/vastai-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/verda-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/vscode-logo.svg (100%)
 rename {docs => mkdocs}/assets/images/vultr-logo.svg (100%)
 rename {docs => mkdocs}/assets/javascripts/extra.js (100%)
 rename {docs => mkdocs}/assets/javascripts/pricing.js (100%)
 rename {docs => mkdocs}/assets/javascripts/termynal.js (100%)
 rename {docs => mkdocs}/assets/stylesheets/extra.css (100%)
 rename {docs => mkdocs}/assets/stylesheets/landing.css (100%)
 rename {docs => mkdocs}/assets/stylesheets/pricing.css (100%)
 rename {docs => mkdocs}/assets/stylesheets/termynal.css (100%)
 rename {docs => mkdocs}/blog/index.md (100%)
 rename {docs => mkdocs}/blog/posts/0_20.md (100%)
 rename {docs => mkdocs}/blog/posts/agentic-orchestration.md (100%)
 rename {docs => mkdocs}/blog/posts/amd-mi300x-inference-benchmark.md (100%)
 rename {docs => mkdocs}/blog/posts/amd-on-runpod.md (100%)
 rename {docs => mkdocs}/blog/posts/amd-on-tensorwave.md (100%)
 rename {docs => mkdocs}/blog/posts/benchmark-amd-containers-and-partitions.md (100%)
 rename {docs => mkdocs}/blog/posts/benchmark-amd-vms.md (100%)
 rename {docs => mkdocs}/blog/posts/benchmarking-pd-ratios.md (100%)
 rename {docs => mkdocs}/blog/posts/beyond-kubernetes-2024-recap-and-whats-ahead.md (100%)
 rename {docs => mkdocs}/blog/posts/changelog-07-25.md (100%)
 rename {docs => mkdocs}/blog/posts/cursor.md (100%)
 rename {docs => mkdocs}/blog/posts/digitalocean-and-amd-dev-cloud.md (100%)
 rename {docs => mkdocs}/blog/posts/docker-inside-containers.md (100%)
 rename {docs => mkdocs}/blog/posts/dstack-metrics.md (100%)
 rename {docs => mkdocs}/blog/posts/dstack-sky-own-cloud-accounts.md (100%)
 rename {docs => mkdocs}/blog/posts/dstack-sky.md (100%)
 rename {docs => mkdocs}/blog/posts/ea-gtc25.md (100%)
 rename {docs => mkdocs}/blog/posts/gh200-on-lambda.md (100%)
 rename {docs => mkdocs}/blog/posts/gpu-blocks-and-proxy-jump.md (100%)
 rename {docs => mkdocs}/blog/posts/gpu-health-checks.md (100%)
 rename {docs => mkdocs}/blog/posts/graphsignal.md (100%)
 rename {docs => mkdocs}/blog/posts/h100-mi300x-inference-benchmark.md (100%)
 rename {docs => mkdocs}/blog/posts/h200-mi300x-deepskeek-benchmark.md (100%)
 rename {docs => mkdocs}/blog/posts/hotaisle.md (100%)
 rename {docs => mkdocs}/blog/posts/images/dstack-diagram-stack-3.png (100%)
 rename {docs => mkdocs}/blog/posts/images/dstack-research-banner-2.png (100%)
 rename {docs => mkdocs}/blog/posts/images/dstack-sky-banner-4.png (100%)
 rename {docs => mkdocs}/blog/posts/inactivity-duration.md (100%)
 rename {docs => mkdocs}/blog/posts/instance-volumes.md (100%)
 rename {docs => mkdocs}/blog/posts/intel-gaudi.md (100%)
 rename {docs => mkdocs}/blog/posts/kubernetes-beta.md (100%)
 rename {docs => mkdocs}/blog/posts/metrics-ui.md (100%)
 rename {docs => mkdocs}/blog/posts/mpi.md (100%)
 rename {docs => mkdocs}/blog/posts/nebius-in-dstack-sky.md (100%)
 rename {docs => mkdocs}/blog/posts/nebius.md (100%)
 rename {docs => mkdocs}/blog/posts/nvidia-and-amd-on-vultr.md (100%)
 rename {docs => mkdocs}/blog/posts/nvidia-dgx-spark.md (100%)
 rename {docs => mkdocs}/blog/posts/pd-disaggregation.md (100%)
 rename {docs => mkdocs}/blog/posts/probes.md (100%)
 rename {docs => mkdocs}/blog/posts/prometheus.md (100%)
 rename {docs => mkdocs}/blog/posts/sglang-router.md (100%)
 rename {docs => mkdocs}/blog/posts/smg.md (100%)
 rename {docs => mkdocs}/blog/posts/state-of-cloud-gpu-2025.md (100%)
 rename {docs => mkdocs}/blog/posts/toffee.md (100%)
 rename {docs => mkdocs}/blog/posts/tpu-on-gcp.md (100%)
 rename {docs => mkdocs}/blog/posts/volumes-on-runpod.md (100%)
 rename {docs => mkdocs}/docs/concepts/backends.md (100%)
 rename {docs => mkdocs}/docs/concepts/dev-environments.md (100%)
 rename {docs => mkdocs}/docs/concepts/events.md (100%)
 rename {docs => mkdocs}/docs/concepts/exports.md (100%)
 rename {docs => mkdocs}/docs/concepts/fleets.md (100%)
 rename {docs => mkdocs}/docs/concepts/gateways.md (100%)
 rename {docs => mkdocs}/docs/concepts/metrics.md (100%)
 rename {docs => mkdocs}/docs/concepts/projects.md (100%)
 rename {docs => mkdocs}/docs/concepts/secrets.md (100%)
 rename {docs => mkdocs}/docs/concepts/services.md (100%)
 rename {docs => mkdocs}/docs/concepts/snippets/manage-fleets.ext (100%)
 rename {docs => mkdocs}/docs/concepts/snippets/manage-runs.ext (100%)
 rename {docs => mkdocs}/docs/concepts/tasks.md (100%)
 rename {docs => mkdocs}/docs/concepts/volumes.md (100%)
 rename {docs => mkdocs}/docs/examples.md (100%)
 rename {docs => mkdocs}/docs/examples/accelerators/amd.md (100%)
 rename {docs => mkdocs}/docs/examples/accelerators/intel/index.md (100%)
 rename {docs => mkdocs}/docs/examples/accelerators/tenstorrent.md (100%)
 rename {docs => mkdocs}/docs/examples/accelerators/tpu.md (100%)
 rename {docs => mkdocs}/docs/examples/clusters/aws.md (100%)
 rename {docs => mkdocs}/docs/examples/clusters/crusoe.md (100%)
 rename {docs => mkdocs}/docs/examples/clusters/gcp.md (100%)
 rename {docs => mkdocs}/docs/examples/clusters/lambda.md (100%)
 rename {docs => mkdocs}/docs/examples/clusters/nccl-rccl-tests.md (100%)
 rename {docs => mkdocs}/docs/examples/clusters/nebius.md (100%)
 rename {docs => mkdocs}/docs/examples/inference/nim.md (100%)
 rename {docs => mkdocs}/docs/examples/inference/sglang.md (100%)
 rename {docs => mkdocs}/docs/examples/inference/trtllm.md (100%)
 rename {docs => mkdocs}/docs/examples/inference/vllm.md (100%)
 rename {docs => mkdocs}/docs/examples/llms/deepseek/index.md (100%)
 rename {docs => mkdocs}/docs/examples/llms/llama/index.md (100%)
 rename {docs => mkdocs}/docs/examples/misc/docker-compose/index.md (100%)
 rename {docs => mkdocs}/docs/examples/models/deepseek-v4.md (100%)
 rename {docs => mkdocs}/docs/examples/models/qwen36.md (100%)
 rename {docs => mkdocs}/docs/examples/models/wan22/index.md (100%)
 rename {docs => mkdocs}/docs/examples/training/axolotl.md (100%)
 rename {docs => mkdocs}/docs/examples/training/ray-ragen.md (100%)
 rename {docs => mkdocs}/docs/examples/training/trl.md (100%)
 rename {docs => mkdocs}/docs/guides/migration/slurm.md (100%)
 rename {docs => mkdocs}/docs/guides/protips.md (100%)
 rename {docs => mkdocs}/docs/guides/server-deployment.md (100%)
 rename {docs => mkdocs}/docs/guides/troubleshooting.md (100%)
 rename {docs => mkdocs}/docs/guides/upgrade.md (100%)
 rename {docs => mkdocs}/docs/index.md (100%)
 rename {docs => mkdocs}/docs/installation.md (100%)
 rename {docs => mkdocs}/docs/quickstart.md (100%)
 rename {docs => mkdocs}/docs/reference/api/http/index.md (100%)
 rename {docs => mkdocs}/docs/reference/api/python/index.md (100%)
 rename {docs => mkdocs}/docs/reference/cli/dstack/apply.md (100%)
 rename {docs => mkdocs}/docs/reference/cli/dstack/attach.md (100%)
 rename {docs => mkdocs}/docs/reference/cli/dstack/delete.md (100%)
 rename {docs => mkdocs}/docs/reference/cli/dstack/event.md (100%)
 rename {docs => mkdocs}/docs/reference/cli/dstack/export.md (100%)
 rename {docs => mkdocs}/docs/reference/cli/dstack/fleet.md (100%)
 rename {docs => mkdocs}/docs/reference/cli/dstack/gateway.md (100%)
 rename {docs => mkdocs}/docs/reference/cli/dstack/import.md (100%)
 rename {docs => mkdocs}/docs/reference/cli/dstack/init.md (100%)
 rename {docs => mkdocs}/docs/reference/cli/dstack/login.md (100%)
 rename {docs => mkdocs}/docs/reference/cli/dstack/logs.md (100%)
 rename {docs => mkdocs}/docs/reference/cli/dstack/metrics.md (100%)
 rename {docs => mkdocs}/docs/reference/cli/dstack/offer.md (100%)
 rename {docs => mkdocs}/docs/reference/cli/dstack/project.md (100%)
 rename {docs => mkdocs}/docs/reference/cli/dstack/ps.md (100%)
 rename {docs => mkdocs}/docs/reference/cli/dstack/secret.md (100%)
 rename {docs => mkdocs}/docs/reference/cli/dstack/server.md (100%)
 rename {docs => mkdocs}/docs/reference/cli/dstack/stop.md (100%)
 rename {docs => mkdocs}/docs/reference/cli/dstack/volume.md (100%)
 rename {docs => mkdocs}/docs/reference/dstack.yml.md (100%)
 rename {docs => mkdocs}/docs/reference/dstack.yml/dev-environment.md (100%)
 rename {docs => mkdocs}/docs/reference/dstack.yml/fleet.md (100%)
 rename {docs => mkdocs}/docs/reference/dstack.yml/gateway.md (100%)
 rename {docs => mkdocs}/docs/reference/dstack.yml/service.md (100%)
 rename {docs => mkdocs}/docs/reference/dstack.yml/task.md (100%)
 rename {docs => mkdocs}/docs/reference/dstack.yml/volume.md (100%)
 rename {docs => mkdocs}/docs/reference/environment-variables.md (100%)
 rename {docs => mkdocs}/docs/reference/plugins/python/index.md (100%)
 rename {docs => mkdocs}/docs/reference/plugins/rest/index.md (100%)
 rename {docs => mkdocs}/docs/reference/profiles.yml.md (100%)
 rename {docs => mkdocs}/docs/reference/server/config.yml.md (100%)
 rename {docs => mkdocs}/index.md (100%)
 rename {docs => mkdocs}/layouts/custom.yml (100%)
 rename {docs => mkdocs}/overrides/.icons/custom/colored/discord.svg (100%)
 rename {docs => mkdocs}/overrides/.icons/custom/colored/github.svg (100%)
 rename {docs => mkdocs}/overrides/.icons/custom/colored/twitter.svg (100%)
 rename {docs => mkdocs}/overrides/.icons/custom/github.svg (100%)
 rename {docs => mkdocs}/overrides/assets/images/github-logo.png (100%)
 rename {docs => mkdocs}/overrides/assets/images/hero.svg (100%)
 rename {docs => mkdocs}/overrides/assets/images/new.svg (100%)
 rename {docs => mkdocs}/overrides/assets/images/quotes/alvarobartt.jpg (100%)
 rename {docs => mkdocs}/overrides/assets/images/quotes/chansung.jpg (100%)
 rename {docs => mkdocs}/overrides/assets/images/quotes/cudopete.png (100%)
 rename {docs => mkdocs}/overrides/assets/images/quotes/eckart.png (100%)
 rename {docs => mkdocs}/overrides/assets/images/quotes/movchan.jpg (100%)
 rename {docs => mkdocs}/overrides/assets/images/quotes/spott.jpg (100%)
 rename {docs => mkdocs}/overrides/assets/images/slack.png (100%)
 rename {docs => mkdocs}/overrides/assets/images/twitter.png (100%)
 rename {docs => mkdocs}/overrides/header-2.html (100%)
 rename {docs => mkdocs}/overrides/header.html (100%)
 rename {docs => mkdocs}/overrides/home.html (100%)
 rename {docs => mkdocs}/overrides/landing.html (100%)
 rename {docs => mkdocs}/overrides/main.html (100%)
 rename {docs => mkdocs}/overrides/partials/post.html (100%)
 rename {docs => mkdocs}/overrides/path.html (100%)
 rename {docs => mkdocs}/overrides/pricing.html (100%)
 rename {docs => mkdocs}/overrides/privacy.html (100%)
 rename {docs => mkdocs}/overrides/toc-item.html (100%)
 rename {docs => mkdocs}/overrides/toc.html (100%)
 rename {docs => mkdocs}/privacy.md (100%)
 rename {docs => mkdocs}/robots.txt (100%)
 rename {docs => mkdocs}/terms.md (100%)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index dcd92e696f..77e50916c9 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -5,7 +5,7 @@ on:
     branches:
       - "master"
     paths-ignore:
-      - "docs/**"
+      - "mkdocs/**"
       - "mkdocs.yml"
   pull_request:
     branches:
diff --git a/.gitignore b/.gitignore
index a4fe9bb963..b80772c83c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,6 +26,7 @@ uv.lock
 /src/dstack/_internal/server/statics
 
 profiling_results.html
-docs/docs/reference/api/http/openapi.json
-docs/docs/reference/api/rest/openapi.json
-docs/docs/reference/plugins/rest/rest_plugin_openapi.json
+
+mkdocs/docs/reference/api/http/openapi.json
+mkdocs/docs/reference/api/rest/openapi.json
+mkdocs/docs/reference/plugins/rest/rest_plugin_openapi.json
diff --git a/.justfile b/.justfile
index 59defcfa1b..efa8c87f61 100644
--- a/.justfile
+++ b/.justfile
@@ -5,7 +5,9 @@
 # Run `just` to see all available commands.
 #
 # Components:
-# * runner/justfile – Building and uploading dstack runner and shim
+# * runner/.justfile – Building and uploading dstack runner and shim
+# * frontend/.justfile – Building and running the frontend
+# * mkdocs/.justfile – Building and previewing the docs site
 
 default:
     @just --list
@@ -16,5 +18,4 @@ import "runner/.justfile"
 
 import "frontend/.justfile"
 
-docs-serve:
-    uv run mkdocs serve --livereload -w examples -s
+import "mkdocs/.justfile"
diff --git a/AGENTS.md b/AGENTS.md
index 2bf7290a17..0bbee7d51c 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -4,7 +4,7 @@
 - Core Python package lives in `src/dstack`; internal modules (including server) sit under `_internal`, API surfaces under `api`, and plugin integrations under `plugins`.
 - Tests reside in `src/tests` and mirror package paths; add new suites alongside the code they cover.
 - Frontend lives in `frontend` (React/webpack) and is built into `src/dstack/_internal/server/statics`.
-- Docs sources are in `docs` with extra contributor notes in `contributing/*.md`; examples for users sit in `examples/`.
+- Docs sources are in `mkdocs/docs/` with extra contributor notes in `contributing/*.md`.
 
 ## Build, Test, and Development Commands
 - Install deps (editable package with extras): `uv sync --all-extras` (uses `.venv` in repo).
diff --git a/README.md b/README.md
index e9e2d2ee10..b1e616f1b9 100644
--- a/README.md
+++ b/README.md
@@ -2,8 +2,8 @@
 <h2>
   <a target="_blank" href="https://dstack.ai">
     <picture>
-      <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/dstackai/dstack/master/docs/assets/images/dstack-logo-dark.svg"/>
-      <img alt="dstack" src="https://raw.githubusercontent.com/dstackai/dstack/master/docs/assets/images/dstack-logo.svg" width="350px"/>
+      <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/dstackai/dstack/master/mkdocs/assets/images/dstack-logo-dark.svg"/>
+      <img alt="dstack" src="https://raw.githubusercontent.com/dstackai/dstack/master/mkdocs/assets/images/dstack-logo.svg" width="350px"/>
     </picture>
   </a>
 </h2>
diff --git a/contributing/BACKENDS.md b/contributing/BACKENDS.md
index e6a32bbb38..0c7ac6a151 100644
--- a/contributing/BACKENDS.md
+++ b/contributing/BACKENDS.md
@@ -160,8 +160,8 @@ If instances in the backend take more than 10 minutes to start, override the def
 
 ### 2.10. Document the backend
 
-Add the backend to the [Concepts->Backends](https://github.com/dstackai/dstack/blob/master/docs/docs/concepts/backends.md
-) page and the [server/comfig.yml](https://github.com/dstackai/dstack/blob/master/docs/docs/reference/server/config.yml.md) reference.
+Add the backend to the [Concepts->Backends](https://github.com/dstackai/dstack/blob/master/mkdocs/docs/concepts/backends.md
+) page and the [server/comfig.yml](https://github.com/dstackai/dstack/blob/master/mkdocs/docs/reference/server/config.yml.md) reference.
 
 ## 3. Appendix
 
diff --git a/contributing/DOCS.md b/contributing/DOCS.md
index ce545803a4..08c899f34f 100644
--- a/contributing/DOCS.md
+++ b/contributing/DOCS.md
@@ -39,9 +39,11 @@ uv run pre-commit install
 To preview the documentation, run the follow command:
 
 ```shell
-uv run mkdocs serve -s
+uv run mkdocs serve --livereload -s
 ```
 
+The `--livereload` flag is required to work around live-reload bugs in recent `mkdocs` versions.
+
 If you want to build static files, you can use the following command:
 
 ```shell
@@ -99,7 +101,7 @@ description: Short description of what this page covers
 ---
 ```
 
-For examples, add frontmatter to the page files (e.g., `docs/examples/single-node-training/trl.md`).
+For examples, add frontmatter to the page files (e.g., `mkdocs/docs/examples/training/trl.md`).
 
 #### 3. Skills discovery
 
@@ -112,28 +114,32 @@ The build creates `.well-known/skills/` directory structure for skills discovery
 ### File structure
 
 ```
-docs/
-├── docs/                    # Main documentation content
-│   ├── index.md            # Getting started
+mkdocs/                         # docs_dir for the mkdocs site
+├── index.md                    # Homepage
+├── docs/                       # /docs/ URL section
+│   ├── index.md                # Getting started
 │   ├── installation.md
 │   ├── quickstart.md
-│   ├── concepts/           # Concept pages
-│   ├── guides/             # How-to guides
-│   └── reference/          # API reference (schema expansion)
-├── examples/               # Example pages (inline source code)
-│   └── single-node-training/
-│       └── trl.md          # Page content with frontmatter
-└── overrides/              # Theme customization
+│   ├── concepts/               # Concept pages
+│   ├── guides/                 # How-to guides
+│   ├── reference/              # API reference (schema expansion)
+│   └── examples/               # Example pages (inline source code)
+│       └── training/
+│           └── trl.md          # Page content with frontmatter
+├── blog/                       # Blog posts
+├── overrides/                  # Theme customization
+├── layouts/                    # Social card layouts
+└── assets/                     # Stylesheets, images, fonts
 
 scripts/docs/
-├── hooks.py                # MkDocs build hooks
-├── gen_llms_files.py       # llms.txt generation
-├── gen_schema_reference.py # Schema expansion
-└── gen_cli_reference.py    # CLI reference generation
+├── hooks.py                    # MkDocs build hooks
+├── gen_llms_files.py           # llms.txt generation
+├── gen_schema_reference.py     # Schema expansion
+└── gen_cli_reference.py        # CLI reference generation
 
 skills/
 └── dstack/
-    └── SKILL.md            # Skills discovery content
+    └── SKILL.md                # Skills discovery content
 ```
 
 ### Testing changes
diff --git a/mkdocs.yml b/mkdocs.yml
index 09318ab169..5d723bd0c9 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -8,18 +8,21 @@ site_description: >-
 # Repository
 repo_url: https://github.com/dstackai/dstack
 repo_name: dstackai/dstack
-edit_uri: edit/master/docs/
+edit_uri: edit/master/mkdocs/
 
 #Copyright
 copyright: © 2025 dstack
 
+# Source directory for site content
+docs_dir: mkdocs
+
 hooks:
   - scripts/docs/hooks.py
 
 # Configuration
 theme:
   name: material
-  custom_dir: docs/overrides
+  custom_dir: mkdocs/overrides
   logo: assets/images/dstack-logo-notext.svg
   favicon: assets/images/dstack-fav-32.ico
   icon:
@@ -77,7 +80,7 @@ plugins:
         color: "#FFFFFF"
         font_family: "Roboto"
       #      debug: true
-      cards_layout_dir: docs/layouts
+      cards_layout_dir: mkdocs/layouts
       cards_layout: custom
   - search
   - redirects:
@@ -198,7 +201,7 @@ markdown_extensions:
   - pymdownx.details
   - pymdownx.superfences
   - pymdownx.snippets:
-      base_path: ["docs"]
+      base_path: ["mkdocs"]
   - pymdownx.tabbed:
       alternate_style: true
       slugify: !!python/object/apply:pymdownx.slugs.slugify
diff --git a/mkdocs/.justfile b/mkdocs/.justfile
new file mode 100644
index 0000000000..a5d019887d
--- /dev/null
+++ b/mkdocs/.justfile
@@ -0,0 +1,15 @@
+# Justfile for building and previewing the docs site
+#
+# Run `just` to see all available commands
+
+default:
+    @just --list
+
+# Preview the docs site with live-reload
+mkdocs-serve:
+    # --livereload works around live-reload bugs in recent mkdocs versions
+    uv run mkdocs serve --livereload -s
+
+# Build the docs site to ./site
+mkdocs-build:
+    uv run mkdocs build -s
diff --git a/docs/.nojekyll b/mkdocs/.nojekyll
similarity index 100%
rename from docs/.nojekyll
rename to mkdocs/.nojekyll
diff --git a/docs/CNAME b/mkdocs/CNAME
similarity index 100%
rename from docs/CNAME
rename to mkdocs/CNAME
diff --git a/docs/assets/fonts/Geist-Variable.woff2 b/mkdocs/assets/fonts/Geist-Variable.woff2
similarity index 100%
rename from docs/assets/fonts/Geist-Variable.woff2
rename to mkdocs/assets/fonts/Geist-Variable.woff2
diff --git a/docs/assets/fonts/GeistMono-Variable.woff2 b/mkdocs/assets/fonts/GeistMono-Variable.woff2
similarity index 100%
rename from docs/assets/fonts/GeistMono-Variable.woff2
rename to mkdocs/assets/fonts/GeistMono-Variable.woff2
diff --git a/docs/assets/fonts/GeistPixel-Circle.woff2 b/mkdocs/assets/fonts/GeistPixel-Circle.woff2
similarity index 100%
rename from docs/assets/fonts/GeistPixel-Circle.woff2
rename to mkdocs/assets/fonts/GeistPixel-Circle.woff2
diff --git a/docs/assets/fonts/GeistPixel-Square.woff2 b/mkdocs/assets/fonts/GeistPixel-Square.woff2
similarity index 100%
rename from docs/assets/fonts/GeistPixel-Square.woff2
rename to mkdocs/assets/fonts/GeistPixel-Square.woff2
diff --git a/docs/assets/images/aws-logo.svg b/mkdocs/assets/images/aws-logo.svg
similarity index 100%
rename from docs/assets/images/aws-logo.svg
rename to mkdocs/assets/images/aws-logo.svg
diff --git a/docs/assets/images/azure-logo.svg b/mkdocs/assets/images/azure-logo.svg
similarity index 100%
rename from docs/assets/images/azure-logo.svg
rename to mkdocs/assets/images/azure-logo.svg
diff --git a/docs/assets/images/cats-in-hats.png b/mkdocs/assets/images/cats-in-hats.png
similarity index 100%
rename from docs/assets/images/cats-in-hats.png
rename to mkdocs/assets/images/cats-in-hats.png
diff --git a/docs/assets/images/chevron-down.svg b/mkdocs/assets/images/chevron-down.svg
similarity index 100%
rename from docs/assets/images/chevron-down.svg
rename to mkdocs/assets/images/chevron-down.svg
diff --git a/docs/assets/images/crusoe-logo.svg b/mkdocs/assets/images/crusoe-logo.svg
similarity index 100%
rename from docs/assets/images/crusoe-logo.svg
rename to mkdocs/assets/images/crusoe-logo.svg
diff --git a/docs/assets/images/cudo-logo.svg b/mkdocs/assets/images/cudo-logo.svg
similarity index 100%
rename from docs/assets/images/cudo-logo.svg
rename to mkdocs/assets/images/cudo-logo.svg
diff --git a/docs/assets/images/datacrunch-logo.svg b/mkdocs/assets/images/datacrunch-logo.svg
similarity index 100%
rename from docs/assets/images/datacrunch-logo.svg
rename to mkdocs/assets/images/datacrunch-logo.svg
diff --git a/docs/assets/images/discord-logo.svg b/mkdocs/assets/images/discord-logo.svg
similarity index 100%
rename from docs/assets/images/discord-logo.svg
rename to mkdocs/assets/images/discord-logo.svg
diff --git a/docs/assets/images/dstack-cli.png b/mkdocs/assets/images/dstack-cli.png
similarity index 100%
rename from docs/assets/images/dstack-cli.png
rename to mkdocs/assets/images/dstack-cli.png
diff --git a/docs/assets/images/dstack-code-dark.png b/mkdocs/assets/images/dstack-code-dark.png
similarity index 100%
rename from docs/assets/images/dstack-code-dark.png
rename to mkdocs/assets/images/dstack-code-dark.png
diff --git a/docs/assets/images/dstack-dev-environments-code.png b/mkdocs/assets/images/dstack-dev-environments-code.png
similarity index 100%
rename from docs/assets/images/dstack-dev-environments-code.png
rename to mkdocs/assets/images/dstack-dev-environments-code.png
diff --git a/docs/assets/images/dstack-dolly-code.png b/mkdocs/assets/images/dstack-dolly-code.png
similarity index 100%
rename from docs/assets/images/dstack-dolly-code.png
rename to mkdocs/assets/images/dstack-dolly-code.png
diff --git a/docs/assets/images/dstack-dolly.png b/mkdocs/assets/images/dstack-dolly.png
similarity index 100%
rename from docs/assets/images/dstack-dolly.png
rename to mkdocs/assets/images/dstack-dolly.png
diff --git a/docs/assets/images/dstack-fav-32.ico b/mkdocs/assets/images/dstack-fav-32.ico
similarity index 100%
rename from docs/assets/images/dstack-fav-32.ico
rename to mkdocs/assets/images/dstack-fav-32.ico
diff --git a/docs/assets/images/dstack-fine-tuning-diagram.png b/mkdocs/assets/images/dstack-fine-tuning-diagram.png
similarity index 100%
rename from docs/assets/images/dstack-fine-tuning-diagram.png
rename to mkdocs/assets/images/dstack-fine-tuning-diagram.png
diff --git a/docs/assets/images/dstack-finetuning-hf.png b/mkdocs/assets/images/dstack-finetuning-hf.png
similarity index 100%
rename from docs/assets/images/dstack-finetuning-hf.png
rename to mkdocs/assets/images/dstack-finetuning-hf.png
diff --git a/docs/assets/images/dstack-finetuning-wandb.png b/mkdocs/assets/images/dstack-finetuning-wandb.png
similarity index 100%
rename from docs/assets/images/dstack-finetuning-wandb.png
rename to mkdocs/assets/images/dstack-finetuning-wandb.png
diff --git a/docs/assets/images/dstack-google-colab.png b/mkdocs/assets/images/dstack-google-colab.png
similarity index 100%
rename from docs/assets/images/dstack-google-colab.png
rename to mkdocs/assets/images/dstack-google-colab.png
diff --git a/docs/assets/images/dstack-gradio-falcon.png b/mkdocs/assets/images/dstack-gradio-falcon.png
similarity index 100%
rename from docs/assets/images/dstack-gradio-falcon.png
rename to mkdocs/assets/images/dstack-gradio-falcon.png
diff --git a/docs/assets/images/dstack-hub-create-aws-project.png b/mkdocs/assets/images/dstack-hub-create-aws-project.png
similarity index 100%
rename from docs/assets/images/dstack-hub-create-aws-project.png
rename to mkdocs/assets/images/dstack-hub-create-aws-project.png
diff --git a/docs/assets/images/dstack-hub-create-azure-project.png b/mkdocs/assets/images/dstack-hub-create-azure-project.png
similarity index 100%
rename from docs/assets/images/dstack-hub-create-azure-project.png
rename to mkdocs/assets/images/dstack-hub-create-azure-project.png
diff --git a/docs/assets/images/dstack-hub-create-gcp-project.png b/mkdocs/assets/images/dstack-hub-create-gcp-project.png
similarity index 100%
rename from docs/assets/images/dstack-hub-create-gcp-project.png
rename to mkdocs/assets/images/dstack-hub-create-gcp-project.png
diff --git a/docs/assets/images/dstack-hub-create-lambda-project.png b/mkdocs/assets/images/dstack-hub-create-lambda-project.png
similarity index 100%
rename from docs/assets/images/dstack-hub-create-lambda-project.png
rename to mkdocs/assets/images/dstack-hub-create-lambda-project.png
diff --git a/docs/assets/images/dstack-hub-create-project.png b/mkdocs/assets/images/dstack-hub-create-project.png
similarity index 100%
rename from docs/assets/images/dstack-hub-create-project.png
rename to mkdocs/assets/images/dstack-hub-create-project.png
diff --git a/docs/assets/images/dstack-hub-edit-gateway.png b/mkdocs/assets/images/dstack-hub-edit-gateway.png
similarity index 100%
rename from docs/assets/images/dstack-hub-edit-gateway.png
rename to mkdocs/assets/images/dstack-hub-edit-gateway.png
diff --git a/docs/assets/images/dstack-hub-view-project-empty.png b/mkdocs/assets/images/dstack-hub-view-project-empty.png
similarity index 100%
rename from docs/assets/images/dstack-hub-view-project-empty.png
rename to mkdocs/assets/images/dstack-hub-view-project-empty.png
diff --git a/docs/assets/images/dstack-hub-view-project.png b/mkdocs/assets/images/dstack-hub-view-project.png
similarity index 100%
rename from docs/assets/images/dstack-hub-view-project.png
rename to mkdocs/assets/images/dstack-hub-view-project.png
diff --git a/docs/assets/images/dstack-huggingface-space.png b/mkdocs/assets/images/dstack-huggingface-space.png
similarity index 100%
rename from docs/assets/images/dstack-huggingface-space.png
rename to mkdocs/assets/images/dstack-huggingface-space.png
diff --git a/docs/assets/images/dstack-jupyterlab.png b/mkdocs/assets/images/dstack-jupyterlab.png
similarity index 100%
rename from docs/assets/images/dstack-jupyterlab.png
rename to mkdocs/assets/images/dstack-jupyterlab.png
diff --git a/docs/assets/images/dstack-lambda-api-key.png b/mkdocs/assets/images/dstack-lambda-api-key.png
similarity index 100%
rename from docs/assets/images/dstack-lambda-api-key.png
rename to mkdocs/assets/images/dstack-lambda-api-key.png
diff --git a/docs/assets/images/dstack-llmchat-discord-chat.png b/mkdocs/assets/images/dstack-llmchat-discord-chat.png
similarity index 100%
rename from docs/assets/images/dstack-llmchat-discord-chat.png
rename to mkdocs/assets/images/dstack-llmchat-discord-chat.png
diff --git a/docs/assets/images/dstack-llmchat-gallery.png b/mkdocs/assets/images/dstack-llmchat-gallery.png
similarity index 100%
rename from docs/assets/images/dstack-llmchat-gallery.png
rename to mkdocs/assets/images/dstack-llmchat-gallery.png
diff --git a/docs/assets/images/dstack-logo-dark.svg b/mkdocs/assets/images/dstack-logo-dark.svg
similarity index 100%
rename from docs/assets/images/dstack-logo-dark.svg
rename to mkdocs/assets/images/dstack-logo-dark.svg
diff --git a/docs/assets/images/dstack-logo-notext.png b/mkdocs/assets/images/dstack-logo-notext.png
similarity index 100%
rename from docs/assets/images/dstack-logo-notext.png
rename to mkdocs/assets/images/dstack-logo-notext.png
diff --git a/docs/assets/images/dstack-logo-notext.svg b/mkdocs/assets/images/dstack-logo-notext.svg
similarity index 100%
rename from docs/assets/images/dstack-logo-notext.svg
rename to mkdocs/assets/images/dstack-logo-notext.svg
diff --git a/docs/assets/images/dstack-logo.svg b/mkdocs/assets/images/dstack-logo.svg
similarity index 100%
rename from docs/assets/images/dstack-logo.svg
rename to mkdocs/assets/images/dstack-logo.svg
diff --git a/docs/assets/images/dstack-mixtral-chat-ui.png b/mkdocs/assets/images/dstack-mixtral-chat-ui.png
similarity index 100%
rename from docs/assets/images/dstack-mixtral-chat-ui.png
rename to mkdocs/assets/images/dstack-mixtral-chat-ui.png
diff --git a/docs/assets/images/dstack-playground-github-actions-ui.png b/mkdocs/assets/images/dstack-playground-github-actions-ui.png
similarity index 100%
rename from docs/assets/images/dstack-playground-github-actions-ui.png
rename to mkdocs/assets/images/dstack-playground-github-actions-ui.png
diff --git a/docs/assets/images/dstack-run-artifacts.png b/mkdocs/assets/images/dstack-run-artifacts.png
similarity index 100%
rename from docs/assets/images/dstack-run-artifacts.png
rename to mkdocs/assets/images/dstack-run-artifacts.png
diff --git a/docs/assets/images/dstack-runpod-dev-environment.png b/mkdocs/assets/images/dstack-runpod-dev-environment.png
similarity index 100%
rename from docs/assets/images/dstack-runpod-dev-environment.png
rename to mkdocs/assets/images/dstack-runpod-dev-environment.png
diff --git a/docs/assets/images/dstack-server-output-2.png b/mkdocs/assets/images/dstack-server-output-2.png
similarity index 100%
rename from docs/assets/images/dstack-server-output-2.png
rename to mkdocs/assets/images/dstack-server-output-2.png
diff --git a/docs/assets/images/dstack-stable-diffusion-code.png b/mkdocs/assets/images/dstack-stable-diffusion-code.png
similarity index 100%
rename from docs/assets/images/dstack-stable-diffusion-code.png
rename to mkdocs/assets/images/dstack-stable-diffusion-code.png
diff --git a/docs/assets/images/dstack-stable-diffusion.png b/mkdocs/assets/images/dstack-stable-diffusion.png
similarity index 100%
rename from docs/assets/images/dstack-stable-diffusion.png
rename to mkdocs/assets/images/dstack-stable-diffusion.png
diff --git a/docs/assets/images/dstack-tensorboard.png b/mkdocs/assets/images/dstack-tensorboard.png
similarity index 100%
rename from docs/assets/images/dstack-tensorboard.png
rename to mkdocs/assets/images/dstack-tensorboard.png
diff --git a/docs/assets/images/dstack-train-artifacts.png b/mkdocs/assets/images/dstack-train-artifacts.png
similarity index 100%
rename from docs/assets/images/dstack-train-artifacts.png
rename to mkdocs/assets/images/dstack-train-artifacts.png
diff --git a/docs/assets/images/dstack-v010-vscode-desktop.png b/mkdocs/assets/images/dstack-v010-vscode-desktop.png
similarity index 100%
rename from docs/assets/images/dstack-v010-vscode-desktop.png
rename to mkdocs/assets/images/dstack-v010-vscode-desktop.png
diff --git a/docs/assets/images/dstack-vscode-jupyter.png b/mkdocs/assets/images/dstack-vscode-jupyter.png
similarity index 100%
rename from docs/assets/images/dstack-vscode-jupyter.png
rename to mkdocs/assets/images/dstack-vscode-jupyter.png
diff --git a/docs/assets/images/dstack-vscode.png b/mkdocs/assets/images/dstack-vscode.png
similarity index 100%
rename from docs/assets/images/dstack-vscode.png
rename to mkdocs/assets/images/dstack-vscode.png
diff --git a/docs/assets/images/gcp-logo.svg b/mkdocs/assets/images/gcp-logo.svg
similarity index 100%
rename from docs/assets/images/gcp-logo.svg
rename to mkdocs/assets/images/gcp-logo.svg
diff --git a/docs/assets/images/github-logo.svg b/mkdocs/assets/images/github-logo.svg
similarity index 100%
rename from docs/assets/images/github-logo.svg
rename to mkdocs/assets/images/github-logo.svg
diff --git a/docs/assets/images/hero_code_background.png b/mkdocs/assets/images/hero_code_background.png
similarity index 100%
rename from docs/assets/images/hero_code_background.png
rename to mkdocs/assets/images/hero_code_background.png
diff --git a/docs/assets/images/hotaisle-logo.svg b/mkdocs/assets/images/hotaisle-logo.svg
similarity index 100%
rename from docs/assets/images/hotaisle-logo.svg
rename to mkdocs/assets/images/hotaisle-logo.svg
diff --git a/docs/assets/images/jb-toolbox-logo.svg b/mkdocs/assets/images/jb-toolbox-logo.svg
similarity index 100%
rename from docs/assets/images/jb-toolbox-logo.svg
rename to mkdocs/assets/images/jb-toolbox-logo.svg
diff --git a/docs/assets/images/jupyter-logo.svg b/mkdocs/assets/images/jupyter-logo.svg
similarity index 100%
rename from docs/assets/images/jupyter-logo.svg
rename to mkdocs/assets/images/jupyter-logo.svg
diff --git a/docs/assets/images/kapa.svg b/mkdocs/assets/images/kapa.svg
similarity index 100%
rename from docs/assets/images/kapa.svg
rename to mkdocs/assets/images/kapa.svg
diff --git a/docs/assets/images/kubernetes-logo-original.svg b/mkdocs/assets/images/kubernetes-logo-original.svg
similarity index 100%
rename from docs/assets/images/kubernetes-logo-original.svg
rename to mkdocs/assets/images/kubernetes-logo-original.svg
diff --git a/docs/assets/images/kubernetes-logo.svg b/mkdocs/assets/images/kubernetes-logo.svg
similarity index 100%
rename from docs/assets/images/kubernetes-logo.svg
rename to mkdocs/assets/images/kubernetes-logo.svg
diff --git a/docs/assets/images/lambda-logo.svg b/mkdocs/assets/images/lambda-logo.svg
similarity index 100%
rename from docs/assets/images/lambda-logo.svg
rename to mkdocs/assets/images/lambda-logo.svg
diff --git a/docs/assets/images/logo.svg b/mkdocs/assets/images/logo.svg
similarity index 100%
rename from docs/assets/images/logo.svg
rename to mkdocs/assets/images/logo.svg
diff --git a/docs/assets/images/minus.svg b/mkdocs/assets/images/minus.svg
similarity index 100%
rename from docs/assets/images/minus.svg
rename to mkdocs/assets/images/minus.svg
diff --git a/docs/assets/images/nebius-logo-original.svg b/mkdocs/assets/images/nebius-logo-original.svg
similarity index 100%
rename from docs/assets/images/nebius-logo-original.svg
rename to mkdocs/assets/images/nebius-logo-original.svg
diff --git a/docs/assets/images/nebius-logo.svg b/mkdocs/assets/images/nebius-logo.svg
similarity index 100%
rename from docs/assets/images/nebius-logo.svg
rename to mkdocs/assets/images/nebius-logo.svg
diff --git a/docs/assets/images/oci-logo.svg b/mkdocs/assets/images/oci-logo.svg
similarity index 100%
rename from docs/assets/images/oci-logo.svg
rename to mkdocs/assets/images/oci-logo.svg
diff --git a/docs/assets/images/plus.svg b/mkdocs/assets/images/plus.svg
similarity index 100%
rename from docs/assets/images/plus.svg
rename to mkdocs/assets/images/plus.svg
diff --git a/docs/assets/images/runpod-logo.svg b/mkdocs/assets/images/runpod-logo.svg
similarity index 100%
rename from docs/assets/images/runpod-logo.svg
rename to mkdocs/assets/images/runpod-logo.svg
diff --git a/docs/assets/images/ssh-logo.svg b/mkdocs/assets/images/ssh-logo.svg
similarity index 100%
rename from docs/assets/images/ssh-logo.svg
rename to mkdocs/assets/images/ssh-logo.svg
diff --git a/docs/assets/images/tensordock-logo.svg b/mkdocs/assets/images/tensordock-logo.svg
similarity index 100%
rename from docs/assets/images/tensordock-logo.svg
rename to mkdocs/assets/images/tensordock-logo.svg
diff --git a/docs/assets/images/tensorwave-logo.svg b/mkdocs/assets/images/tensorwave-logo.svg
similarity index 100%
rename from docs/assets/images/tensorwave-logo.svg
rename to mkdocs/assets/images/tensorwave-logo.svg
diff --git a/docs/assets/images/vastai-logo.svg b/mkdocs/assets/images/vastai-logo.svg
similarity index 100%
rename from docs/assets/images/vastai-logo.svg
rename to mkdocs/assets/images/vastai-logo.svg
diff --git a/docs/assets/images/verda-logo.svg b/mkdocs/assets/images/verda-logo.svg
similarity index 100%
rename from docs/assets/images/verda-logo.svg
rename to mkdocs/assets/images/verda-logo.svg
diff --git a/docs/assets/images/vscode-logo.svg b/mkdocs/assets/images/vscode-logo.svg
similarity index 100%
rename from docs/assets/images/vscode-logo.svg
rename to mkdocs/assets/images/vscode-logo.svg
diff --git a/docs/assets/images/vultr-logo.svg b/mkdocs/assets/images/vultr-logo.svg
similarity index 100%
rename from docs/assets/images/vultr-logo.svg
rename to mkdocs/assets/images/vultr-logo.svg
diff --git a/docs/assets/javascripts/extra.js b/mkdocs/assets/javascripts/extra.js
similarity index 100%
rename from docs/assets/javascripts/extra.js
rename to mkdocs/assets/javascripts/extra.js
diff --git a/docs/assets/javascripts/pricing.js b/mkdocs/assets/javascripts/pricing.js
similarity index 100%
rename from docs/assets/javascripts/pricing.js
rename to mkdocs/assets/javascripts/pricing.js
diff --git a/docs/assets/javascripts/termynal.js b/mkdocs/assets/javascripts/termynal.js
similarity index 100%
rename from docs/assets/javascripts/termynal.js
rename to mkdocs/assets/javascripts/termynal.js
diff --git a/docs/assets/stylesheets/extra.css b/mkdocs/assets/stylesheets/extra.css
similarity index 100%
rename from docs/assets/stylesheets/extra.css
rename to mkdocs/assets/stylesheets/extra.css
diff --git a/docs/assets/stylesheets/landing.css b/mkdocs/assets/stylesheets/landing.css
similarity index 100%
rename from docs/assets/stylesheets/landing.css
rename to mkdocs/assets/stylesheets/landing.css
diff --git a/docs/assets/stylesheets/pricing.css b/mkdocs/assets/stylesheets/pricing.css
similarity index 100%
rename from docs/assets/stylesheets/pricing.css
rename to mkdocs/assets/stylesheets/pricing.css
diff --git a/docs/assets/stylesheets/termynal.css b/mkdocs/assets/stylesheets/termynal.css
similarity index 100%
rename from docs/assets/stylesheets/termynal.css
rename to mkdocs/assets/stylesheets/termynal.css
diff --git a/docs/blog/index.md b/mkdocs/blog/index.md
similarity index 100%
rename from docs/blog/index.md
rename to mkdocs/blog/index.md
diff --git a/docs/blog/posts/0_20.md b/mkdocs/blog/posts/0_20.md
similarity index 100%
rename from docs/blog/posts/0_20.md
rename to mkdocs/blog/posts/0_20.md
diff --git a/docs/blog/posts/agentic-orchestration.md b/mkdocs/blog/posts/agentic-orchestration.md
similarity index 100%
rename from docs/blog/posts/agentic-orchestration.md
rename to mkdocs/blog/posts/agentic-orchestration.md
diff --git a/docs/blog/posts/amd-mi300x-inference-benchmark.md b/mkdocs/blog/posts/amd-mi300x-inference-benchmark.md
similarity index 100%
rename from docs/blog/posts/amd-mi300x-inference-benchmark.md
rename to mkdocs/blog/posts/amd-mi300x-inference-benchmark.md
diff --git a/docs/blog/posts/amd-on-runpod.md b/mkdocs/blog/posts/amd-on-runpod.md
similarity index 100%
rename from docs/blog/posts/amd-on-runpod.md
rename to mkdocs/blog/posts/amd-on-runpod.md
diff --git a/docs/blog/posts/amd-on-tensorwave.md b/mkdocs/blog/posts/amd-on-tensorwave.md
similarity index 100%
rename from docs/blog/posts/amd-on-tensorwave.md
rename to mkdocs/blog/posts/amd-on-tensorwave.md
diff --git a/docs/blog/posts/benchmark-amd-containers-and-partitions.md b/mkdocs/blog/posts/benchmark-amd-containers-and-partitions.md
similarity index 100%
rename from docs/blog/posts/benchmark-amd-containers-and-partitions.md
rename to mkdocs/blog/posts/benchmark-amd-containers-and-partitions.md
diff --git a/docs/blog/posts/benchmark-amd-vms.md b/mkdocs/blog/posts/benchmark-amd-vms.md
similarity index 100%
rename from docs/blog/posts/benchmark-amd-vms.md
rename to mkdocs/blog/posts/benchmark-amd-vms.md
diff --git a/docs/blog/posts/benchmarking-pd-ratios.md b/mkdocs/blog/posts/benchmarking-pd-ratios.md
similarity index 100%
rename from docs/blog/posts/benchmarking-pd-ratios.md
rename to mkdocs/blog/posts/benchmarking-pd-ratios.md
diff --git a/docs/blog/posts/beyond-kubernetes-2024-recap-and-whats-ahead.md b/mkdocs/blog/posts/beyond-kubernetes-2024-recap-and-whats-ahead.md
similarity index 100%
rename from docs/blog/posts/beyond-kubernetes-2024-recap-and-whats-ahead.md
rename to mkdocs/blog/posts/beyond-kubernetes-2024-recap-and-whats-ahead.md
diff --git a/docs/blog/posts/changelog-07-25.md b/mkdocs/blog/posts/changelog-07-25.md
similarity index 100%
rename from docs/blog/posts/changelog-07-25.md
rename to mkdocs/blog/posts/changelog-07-25.md
diff --git a/docs/blog/posts/cursor.md b/mkdocs/blog/posts/cursor.md
similarity index 100%
rename from docs/blog/posts/cursor.md
rename to mkdocs/blog/posts/cursor.md
diff --git a/docs/blog/posts/digitalocean-and-amd-dev-cloud.md b/mkdocs/blog/posts/digitalocean-and-amd-dev-cloud.md
similarity index 100%
rename from docs/blog/posts/digitalocean-and-amd-dev-cloud.md
rename to mkdocs/blog/posts/digitalocean-and-amd-dev-cloud.md
diff --git a/docs/blog/posts/docker-inside-containers.md b/mkdocs/blog/posts/docker-inside-containers.md
similarity index 100%
rename from docs/blog/posts/docker-inside-containers.md
rename to mkdocs/blog/posts/docker-inside-containers.md
diff --git a/docs/blog/posts/dstack-metrics.md b/mkdocs/blog/posts/dstack-metrics.md
similarity index 100%
rename from docs/blog/posts/dstack-metrics.md
rename to mkdocs/blog/posts/dstack-metrics.md
diff --git a/docs/blog/posts/dstack-sky-own-cloud-accounts.md b/mkdocs/blog/posts/dstack-sky-own-cloud-accounts.md
similarity index 100%
rename from docs/blog/posts/dstack-sky-own-cloud-accounts.md
rename to mkdocs/blog/posts/dstack-sky-own-cloud-accounts.md
diff --git a/docs/blog/posts/dstack-sky.md b/mkdocs/blog/posts/dstack-sky.md
similarity index 100%
rename from docs/blog/posts/dstack-sky.md
rename to mkdocs/blog/posts/dstack-sky.md
diff --git a/docs/blog/posts/ea-gtc25.md b/mkdocs/blog/posts/ea-gtc25.md
similarity index 100%
rename from docs/blog/posts/ea-gtc25.md
rename to mkdocs/blog/posts/ea-gtc25.md
diff --git a/docs/blog/posts/gh200-on-lambda.md b/mkdocs/blog/posts/gh200-on-lambda.md
similarity index 100%
rename from docs/blog/posts/gh200-on-lambda.md
rename to mkdocs/blog/posts/gh200-on-lambda.md
diff --git a/docs/blog/posts/gpu-blocks-and-proxy-jump.md b/mkdocs/blog/posts/gpu-blocks-and-proxy-jump.md
similarity index 100%
rename from docs/blog/posts/gpu-blocks-and-proxy-jump.md
rename to mkdocs/blog/posts/gpu-blocks-and-proxy-jump.md
diff --git a/docs/blog/posts/gpu-health-checks.md b/mkdocs/blog/posts/gpu-health-checks.md
similarity index 100%
rename from docs/blog/posts/gpu-health-checks.md
rename to mkdocs/blog/posts/gpu-health-checks.md
diff --git a/docs/blog/posts/graphsignal.md b/mkdocs/blog/posts/graphsignal.md
similarity index 100%
rename from docs/blog/posts/graphsignal.md
rename to mkdocs/blog/posts/graphsignal.md
diff --git a/docs/blog/posts/h100-mi300x-inference-benchmark.md b/mkdocs/blog/posts/h100-mi300x-inference-benchmark.md
similarity index 100%
rename from docs/blog/posts/h100-mi300x-inference-benchmark.md
rename to mkdocs/blog/posts/h100-mi300x-inference-benchmark.md
diff --git a/docs/blog/posts/h200-mi300x-deepskeek-benchmark.md b/mkdocs/blog/posts/h200-mi300x-deepskeek-benchmark.md
similarity index 100%
rename from docs/blog/posts/h200-mi300x-deepskeek-benchmark.md
rename to mkdocs/blog/posts/h200-mi300x-deepskeek-benchmark.md
diff --git a/docs/blog/posts/hotaisle.md b/mkdocs/blog/posts/hotaisle.md
similarity index 100%
rename from docs/blog/posts/hotaisle.md
rename to mkdocs/blog/posts/hotaisle.md
diff --git a/docs/blog/posts/images/dstack-diagram-stack-3.png b/mkdocs/blog/posts/images/dstack-diagram-stack-3.png
similarity index 100%
rename from docs/blog/posts/images/dstack-diagram-stack-3.png
rename to mkdocs/blog/posts/images/dstack-diagram-stack-3.png
diff --git a/docs/blog/posts/images/dstack-research-banner-2.png b/mkdocs/blog/posts/images/dstack-research-banner-2.png
similarity index 100%
rename from docs/blog/posts/images/dstack-research-banner-2.png
rename to mkdocs/blog/posts/images/dstack-research-banner-2.png
diff --git a/docs/blog/posts/images/dstack-sky-banner-4.png b/mkdocs/blog/posts/images/dstack-sky-banner-4.png
similarity index 100%
rename from docs/blog/posts/images/dstack-sky-banner-4.png
rename to mkdocs/blog/posts/images/dstack-sky-banner-4.png
diff --git a/docs/blog/posts/inactivity-duration.md b/mkdocs/blog/posts/inactivity-duration.md
similarity index 100%
rename from docs/blog/posts/inactivity-duration.md
rename to mkdocs/blog/posts/inactivity-duration.md
diff --git a/docs/blog/posts/instance-volumes.md b/mkdocs/blog/posts/instance-volumes.md
similarity index 100%
rename from docs/blog/posts/instance-volumes.md
rename to mkdocs/blog/posts/instance-volumes.md
diff --git a/docs/blog/posts/intel-gaudi.md b/mkdocs/blog/posts/intel-gaudi.md
similarity index 100%
rename from docs/blog/posts/intel-gaudi.md
rename to mkdocs/blog/posts/intel-gaudi.md
diff --git a/docs/blog/posts/kubernetes-beta.md b/mkdocs/blog/posts/kubernetes-beta.md
similarity index 100%
rename from docs/blog/posts/kubernetes-beta.md
rename to mkdocs/blog/posts/kubernetes-beta.md
diff --git a/docs/blog/posts/metrics-ui.md b/mkdocs/blog/posts/metrics-ui.md
similarity index 100%
rename from docs/blog/posts/metrics-ui.md
rename to mkdocs/blog/posts/metrics-ui.md
diff --git a/docs/blog/posts/mpi.md b/mkdocs/blog/posts/mpi.md
similarity index 100%
rename from docs/blog/posts/mpi.md
rename to mkdocs/blog/posts/mpi.md
diff --git a/docs/blog/posts/nebius-in-dstack-sky.md b/mkdocs/blog/posts/nebius-in-dstack-sky.md
similarity index 100%
rename from docs/blog/posts/nebius-in-dstack-sky.md
rename to mkdocs/blog/posts/nebius-in-dstack-sky.md
diff --git a/docs/blog/posts/nebius.md b/mkdocs/blog/posts/nebius.md
similarity index 100%
rename from docs/blog/posts/nebius.md
rename to mkdocs/blog/posts/nebius.md
diff --git a/docs/blog/posts/nvidia-and-amd-on-vultr.md b/mkdocs/blog/posts/nvidia-and-amd-on-vultr.md
similarity index 100%
rename from docs/blog/posts/nvidia-and-amd-on-vultr.md
rename to mkdocs/blog/posts/nvidia-and-amd-on-vultr.md
diff --git a/docs/blog/posts/nvidia-dgx-spark.md b/mkdocs/blog/posts/nvidia-dgx-spark.md
similarity index 100%
rename from docs/blog/posts/nvidia-dgx-spark.md
rename to mkdocs/blog/posts/nvidia-dgx-spark.md
diff --git a/docs/blog/posts/pd-disaggregation.md b/mkdocs/blog/posts/pd-disaggregation.md
similarity index 100%
rename from docs/blog/posts/pd-disaggregation.md
rename to mkdocs/blog/posts/pd-disaggregation.md
diff --git a/docs/blog/posts/probes.md b/mkdocs/blog/posts/probes.md
similarity index 100%
rename from docs/blog/posts/probes.md
rename to mkdocs/blog/posts/probes.md
diff --git a/docs/blog/posts/prometheus.md b/mkdocs/blog/posts/prometheus.md
similarity index 100%
rename from docs/blog/posts/prometheus.md
rename to mkdocs/blog/posts/prometheus.md
diff --git a/docs/blog/posts/sglang-router.md b/mkdocs/blog/posts/sglang-router.md
similarity index 100%
rename from docs/blog/posts/sglang-router.md
rename to mkdocs/blog/posts/sglang-router.md
diff --git a/docs/blog/posts/smg.md b/mkdocs/blog/posts/smg.md
similarity index 100%
rename from docs/blog/posts/smg.md
rename to mkdocs/blog/posts/smg.md
diff --git a/docs/blog/posts/state-of-cloud-gpu-2025.md b/mkdocs/blog/posts/state-of-cloud-gpu-2025.md
similarity index 100%
rename from docs/blog/posts/state-of-cloud-gpu-2025.md
rename to mkdocs/blog/posts/state-of-cloud-gpu-2025.md
diff --git a/docs/blog/posts/toffee.md b/mkdocs/blog/posts/toffee.md
similarity index 100%
rename from docs/blog/posts/toffee.md
rename to mkdocs/blog/posts/toffee.md
diff --git a/docs/blog/posts/tpu-on-gcp.md b/mkdocs/blog/posts/tpu-on-gcp.md
similarity index 100%
rename from docs/blog/posts/tpu-on-gcp.md
rename to mkdocs/blog/posts/tpu-on-gcp.md
diff --git a/docs/blog/posts/volumes-on-runpod.md b/mkdocs/blog/posts/volumes-on-runpod.md
similarity index 100%
rename from docs/blog/posts/volumes-on-runpod.md
rename to mkdocs/blog/posts/volumes-on-runpod.md
diff --git a/docs/docs/concepts/backends.md b/mkdocs/docs/concepts/backends.md
similarity index 100%
rename from docs/docs/concepts/backends.md
rename to mkdocs/docs/concepts/backends.md
diff --git a/docs/docs/concepts/dev-environments.md b/mkdocs/docs/concepts/dev-environments.md
similarity index 100%
rename from docs/docs/concepts/dev-environments.md
rename to mkdocs/docs/concepts/dev-environments.md
diff --git a/docs/docs/concepts/events.md b/mkdocs/docs/concepts/events.md
similarity index 100%
rename from docs/docs/concepts/events.md
rename to mkdocs/docs/concepts/events.md
diff --git a/docs/docs/concepts/exports.md b/mkdocs/docs/concepts/exports.md
similarity index 100%
rename from docs/docs/concepts/exports.md
rename to mkdocs/docs/concepts/exports.md
diff --git a/docs/docs/concepts/fleets.md b/mkdocs/docs/concepts/fleets.md
similarity index 100%
rename from docs/docs/concepts/fleets.md
rename to mkdocs/docs/concepts/fleets.md
diff --git a/docs/docs/concepts/gateways.md b/mkdocs/docs/concepts/gateways.md
similarity index 100%
rename from docs/docs/concepts/gateways.md
rename to mkdocs/docs/concepts/gateways.md
diff --git a/docs/docs/concepts/metrics.md b/mkdocs/docs/concepts/metrics.md
similarity index 100%
rename from docs/docs/concepts/metrics.md
rename to mkdocs/docs/concepts/metrics.md
diff --git a/docs/docs/concepts/projects.md b/mkdocs/docs/concepts/projects.md
similarity index 100%
rename from docs/docs/concepts/projects.md
rename to mkdocs/docs/concepts/projects.md
diff --git a/docs/docs/concepts/secrets.md b/mkdocs/docs/concepts/secrets.md
similarity index 100%
rename from docs/docs/concepts/secrets.md
rename to mkdocs/docs/concepts/secrets.md
diff --git a/docs/docs/concepts/services.md b/mkdocs/docs/concepts/services.md
similarity index 100%
rename from docs/docs/concepts/services.md
rename to mkdocs/docs/concepts/services.md
diff --git a/docs/docs/concepts/snippets/manage-fleets.ext b/mkdocs/docs/concepts/snippets/manage-fleets.ext
similarity index 100%
rename from docs/docs/concepts/snippets/manage-fleets.ext
rename to mkdocs/docs/concepts/snippets/manage-fleets.ext
diff --git a/docs/docs/concepts/snippets/manage-runs.ext b/mkdocs/docs/concepts/snippets/manage-runs.ext
similarity index 100%
rename from docs/docs/concepts/snippets/manage-runs.ext
rename to mkdocs/docs/concepts/snippets/manage-runs.ext
diff --git a/docs/docs/concepts/tasks.md b/mkdocs/docs/concepts/tasks.md
similarity index 100%
rename from docs/docs/concepts/tasks.md
rename to mkdocs/docs/concepts/tasks.md
diff --git a/docs/docs/concepts/volumes.md b/mkdocs/docs/concepts/volumes.md
similarity index 100%
rename from docs/docs/concepts/volumes.md
rename to mkdocs/docs/concepts/volumes.md
diff --git a/docs/docs/examples.md b/mkdocs/docs/examples.md
similarity index 100%
rename from docs/docs/examples.md
rename to mkdocs/docs/examples.md
diff --git a/docs/docs/examples/accelerators/amd.md b/mkdocs/docs/examples/accelerators/amd.md
similarity index 100%
rename from docs/docs/examples/accelerators/amd.md
rename to mkdocs/docs/examples/accelerators/amd.md
diff --git a/docs/docs/examples/accelerators/intel/index.md b/mkdocs/docs/examples/accelerators/intel/index.md
similarity index 100%
rename from docs/docs/examples/accelerators/intel/index.md
rename to mkdocs/docs/examples/accelerators/intel/index.md
diff --git a/docs/docs/examples/accelerators/tenstorrent.md b/mkdocs/docs/examples/accelerators/tenstorrent.md
similarity index 100%
rename from docs/docs/examples/accelerators/tenstorrent.md
rename to mkdocs/docs/examples/accelerators/tenstorrent.md
diff --git a/docs/docs/examples/accelerators/tpu.md b/mkdocs/docs/examples/accelerators/tpu.md
similarity index 100%
rename from docs/docs/examples/accelerators/tpu.md
rename to mkdocs/docs/examples/accelerators/tpu.md
diff --git a/docs/docs/examples/clusters/aws.md b/mkdocs/docs/examples/clusters/aws.md
similarity index 100%
rename from docs/docs/examples/clusters/aws.md
rename to mkdocs/docs/examples/clusters/aws.md
diff --git a/docs/docs/examples/clusters/crusoe.md b/mkdocs/docs/examples/clusters/crusoe.md
similarity index 100%
rename from docs/docs/examples/clusters/crusoe.md
rename to mkdocs/docs/examples/clusters/crusoe.md
diff --git a/docs/docs/examples/clusters/gcp.md b/mkdocs/docs/examples/clusters/gcp.md
similarity index 100%
rename from docs/docs/examples/clusters/gcp.md
rename to mkdocs/docs/examples/clusters/gcp.md
diff --git a/docs/docs/examples/clusters/lambda.md b/mkdocs/docs/examples/clusters/lambda.md
similarity index 100%
rename from docs/docs/examples/clusters/lambda.md
rename to mkdocs/docs/examples/clusters/lambda.md
diff --git a/docs/docs/examples/clusters/nccl-rccl-tests.md b/mkdocs/docs/examples/clusters/nccl-rccl-tests.md
similarity index 100%
rename from docs/docs/examples/clusters/nccl-rccl-tests.md
rename to mkdocs/docs/examples/clusters/nccl-rccl-tests.md
diff --git a/docs/docs/examples/clusters/nebius.md b/mkdocs/docs/examples/clusters/nebius.md
similarity index 100%
rename from docs/docs/examples/clusters/nebius.md
rename to mkdocs/docs/examples/clusters/nebius.md
diff --git a/docs/docs/examples/inference/nim.md b/mkdocs/docs/examples/inference/nim.md
similarity index 100%
rename from docs/docs/examples/inference/nim.md
rename to mkdocs/docs/examples/inference/nim.md
diff --git a/docs/docs/examples/inference/sglang.md b/mkdocs/docs/examples/inference/sglang.md
similarity index 100%
rename from docs/docs/examples/inference/sglang.md
rename to mkdocs/docs/examples/inference/sglang.md
diff --git a/docs/docs/examples/inference/trtllm.md b/mkdocs/docs/examples/inference/trtllm.md
similarity index 100%
rename from docs/docs/examples/inference/trtllm.md
rename to mkdocs/docs/examples/inference/trtllm.md
diff --git a/docs/docs/examples/inference/vllm.md b/mkdocs/docs/examples/inference/vllm.md
similarity index 100%
rename from docs/docs/examples/inference/vllm.md
rename to mkdocs/docs/examples/inference/vllm.md
diff --git a/docs/docs/examples/llms/deepseek/index.md b/mkdocs/docs/examples/llms/deepseek/index.md
similarity index 100%
rename from docs/docs/examples/llms/deepseek/index.md
rename to mkdocs/docs/examples/llms/deepseek/index.md
diff --git a/docs/docs/examples/llms/llama/index.md b/mkdocs/docs/examples/llms/llama/index.md
similarity index 100%
rename from docs/docs/examples/llms/llama/index.md
rename to mkdocs/docs/examples/llms/llama/index.md
diff --git a/docs/docs/examples/misc/docker-compose/index.md b/mkdocs/docs/examples/misc/docker-compose/index.md
similarity index 100%
rename from docs/docs/examples/misc/docker-compose/index.md
rename to mkdocs/docs/examples/misc/docker-compose/index.md
diff --git a/docs/docs/examples/models/deepseek-v4.md b/mkdocs/docs/examples/models/deepseek-v4.md
similarity index 100%
rename from docs/docs/examples/models/deepseek-v4.md
rename to mkdocs/docs/examples/models/deepseek-v4.md
diff --git a/docs/docs/examples/models/qwen36.md b/mkdocs/docs/examples/models/qwen36.md
similarity index 100%
rename from docs/docs/examples/models/qwen36.md
rename to mkdocs/docs/examples/models/qwen36.md
diff --git a/docs/docs/examples/models/wan22/index.md b/mkdocs/docs/examples/models/wan22/index.md
similarity index 100%
rename from docs/docs/examples/models/wan22/index.md
rename to mkdocs/docs/examples/models/wan22/index.md
diff --git a/docs/docs/examples/training/axolotl.md b/mkdocs/docs/examples/training/axolotl.md
similarity index 100%
rename from docs/docs/examples/training/axolotl.md
rename to mkdocs/docs/examples/training/axolotl.md
diff --git a/docs/docs/examples/training/ray-ragen.md b/mkdocs/docs/examples/training/ray-ragen.md
similarity index 100%
rename from docs/docs/examples/training/ray-ragen.md
rename to mkdocs/docs/examples/training/ray-ragen.md
diff --git a/docs/docs/examples/training/trl.md b/mkdocs/docs/examples/training/trl.md
similarity index 100%
rename from docs/docs/examples/training/trl.md
rename to mkdocs/docs/examples/training/trl.md
diff --git a/docs/docs/guides/migration/slurm.md b/mkdocs/docs/guides/migration/slurm.md
similarity index 100%
rename from docs/docs/guides/migration/slurm.md
rename to mkdocs/docs/guides/migration/slurm.md
diff --git a/docs/docs/guides/protips.md b/mkdocs/docs/guides/protips.md
similarity index 100%
rename from docs/docs/guides/protips.md
rename to mkdocs/docs/guides/protips.md
diff --git a/docs/docs/guides/server-deployment.md b/mkdocs/docs/guides/server-deployment.md
similarity index 100%
rename from docs/docs/guides/server-deployment.md
rename to mkdocs/docs/guides/server-deployment.md
diff --git a/docs/docs/guides/troubleshooting.md b/mkdocs/docs/guides/troubleshooting.md
similarity index 100%
rename from docs/docs/guides/troubleshooting.md
rename to mkdocs/docs/guides/troubleshooting.md
diff --git a/docs/docs/guides/upgrade.md b/mkdocs/docs/guides/upgrade.md
similarity index 100%
rename from docs/docs/guides/upgrade.md
rename to mkdocs/docs/guides/upgrade.md
diff --git a/docs/docs/index.md b/mkdocs/docs/index.md
similarity index 100%
rename from docs/docs/index.md
rename to mkdocs/docs/index.md
diff --git a/docs/docs/installation.md b/mkdocs/docs/installation.md
similarity index 100%
rename from docs/docs/installation.md
rename to mkdocs/docs/installation.md
diff --git a/docs/docs/quickstart.md b/mkdocs/docs/quickstart.md
similarity index 100%
rename from docs/docs/quickstart.md
rename to mkdocs/docs/quickstart.md
diff --git a/docs/docs/reference/api/http/index.md b/mkdocs/docs/reference/api/http/index.md
similarity index 100%
rename from docs/docs/reference/api/http/index.md
rename to mkdocs/docs/reference/api/http/index.md
diff --git a/docs/docs/reference/api/python/index.md b/mkdocs/docs/reference/api/python/index.md
similarity index 100%
rename from docs/docs/reference/api/python/index.md
rename to mkdocs/docs/reference/api/python/index.md
diff --git a/docs/docs/reference/cli/dstack/apply.md b/mkdocs/docs/reference/cli/dstack/apply.md
similarity index 100%
rename from docs/docs/reference/cli/dstack/apply.md
rename to mkdocs/docs/reference/cli/dstack/apply.md
diff --git a/docs/docs/reference/cli/dstack/attach.md b/mkdocs/docs/reference/cli/dstack/attach.md
similarity index 100%
rename from docs/docs/reference/cli/dstack/attach.md
rename to mkdocs/docs/reference/cli/dstack/attach.md
diff --git a/docs/docs/reference/cli/dstack/delete.md b/mkdocs/docs/reference/cli/dstack/delete.md
similarity index 100%
rename from docs/docs/reference/cli/dstack/delete.md
rename to mkdocs/docs/reference/cli/dstack/delete.md
diff --git a/docs/docs/reference/cli/dstack/event.md b/mkdocs/docs/reference/cli/dstack/event.md
similarity index 100%
rename from docs/docs/reference/cli/dstack/event.md
rename to mkdocs/docs/reference/cli/dstack/event.md
diff --git a/docs/docs/reference/cli/dstack/export.md b/mkdocs/docs/reference/cli/dstack/export.md
similarity index 100%
rename from docs/docs/reference/cli/dstack/export.md
rename to mkdocs/docs/reference/cli/dstack/export.md
diff --git a/docs/docs/reference/cli/dstack/fleet.md b/mkdocs/docs/reference/cli/dstack/fleet.md
similarity index 100%
rename from docs/docs/reference/cli/dstack/fleet.md
rename to mkdocs/docs/reference/cli/dstack/fleet.md
diff --git a/docs/docs/reference/cli/dstack/gateway.md b/mkdocs/docs/reference/cli/dstack/gateway.md
similarity index 100%
rename from docs/docs/reference/cli/dstack/gateway.md
rename to mkdocs/docs/reference/cli/dstack/gateway.md
diff --git a/docs/docs/reference/cli/dstack/import.md b/mkdocs/docs/reference/cli/dstack/import.md
similarity index 100%
rename from docs/docs/reference/cli/dstack/import.md
rename to mkdocs/docs/reference/cli/dstack/import.md
diff --git a/docs/docs/reference/cli/dstack/init.md b/mkdocs/docs/reference/cli/dstack/init.md
similarity index 100%
rename from docs/docs/reference/cli/dstack/init.md
rename to mkdocs/docs/reference/cli/dstack/init.md
diff --git a/docs/docs/reference/cli/dstack/login.md b/mkdocs/docs/reference/cli/dstack/login.md
similarity index 100%
rename from docs/docs/reference/cli/dstack/login.md
rename to mkdocs/docs/reference/cli/dstack/login.md
diff --git a/docs/docs/reference/cli/dstack/logs.md b/mkdocs/docs/reference/cli/dstack/logs.md
similarity index 100%
rename from docs/docs/reference/cli/dstack/logs.md
rename to mkdocs/docs/reference/cli/dstack/logs.md
diff --git a/docs/docs/reference/cli/dstack/metrics.md b/mkdocs/docs/reference/cli/dstack/metrics.md
similarity index 100%
rename from docs/docs/reference/cli/dstack/metrics.md
rename to mkdocs/docs/reference/cli/dstack/metrics.md
diff --git a/docs/docs/reference/cli/dstack/offer.md b/mkdocs/docs/reference/cli/dstack/offer.md
similarity index 100%
rename from docs/docs/reference/cli/dstack/offer.md
rename to mkdocs/docs/reference/cli/dstack/offer.md
diff --git a/docs/docs/reference/cli/dstack/project.md b/mkdocs/docs/reference/cli/dstack/project.md
similarity index 100%
rename from docs/docs/reference/cli/dstack/project.md
rename to mkdocs/docs/reference/cli/dstack/project.md
diff --git a/docs/docs/reference/cli/dstack/ps.md b/mkdocs/docs/reference/cli/dstack/ps.md
similarity index 100%
rename from docs/docs/reference/cli/dstack/ps.md
rename to mkdocs/docs/reference/cli/dstack/ps.md
diff --git a/docs/docs/reference/cli/dstack/secret.md b/mkdocs/docs/reference/cli/dstack/secret.md
similarity index 100%
rename from docs/docs/reference/cli/dstack/secret.md
rename to mkdocs/docs/reference/cli/dstack/secret.md
diff --git a/docs/docs/reference/cli/dstack/server.md b/mkdocs/docs/reference/cli/dstack/server.md
similarity index 100%
rename from docs/docs/reference/cli/dstack/server.md
rename to mkdocs/docs/reference/cli/dstack/server.md
diff --git a/docs/docs/reference/cli/dstack/stop.md b/mkdocs/docs/reference/cli/dstack/stop.md
similarity index 100%
rename from docs/docs/reference/cli/dstack/stop.md
rename to mkdocs/docs/reference/cli/dstack/stop.md
diff --git a/docs/docs/reference/cli/dstack/volume.md b/mkdocs/docs/reference/cli/dstack/volume.md
similarity index 100%
rename from docs/docs/reference/cli/dstack/volume.md
rename to mkdocs/docs/reference/cli/dstack/volume.md
diff --git a/docs/docs/reference/dstack.yml.md b/mkdocs/docs/reference/dstack.yml.md
similarity index 100%
rename from docs/docs/reference/dstack.yml.md
rename to mkdocs/docs/reference/dstack.yml.md
diff --git a/docs/docs/reference/dstack.yml/dev-environment.md b/mkdocs/docs/reference/dstack.yml/dev-environment.md
similarity index 100%
rename from docs/docs/reference/dstack.yml/dev-environment.md
rename to mkdocs/docs/reference/dstack.yml/dev-environment.md
diff --git a/docs/docs/reference/dstack.yml/fleet.md b/mkdocs/docs/reference/dstack.yml/fleet.md
similarity index 100%
rename from docs/docs/reference/dstack.yml/fleet.md
rename to mkdocs/docs/reference/dstack.yml/fleet.md
diff --git a/docs/docs/reference/dstack.yml/gateway.md b/mkdocs/docs/reference/dstack.yml/gateway.md
similarity index 100%
rename from docs/docs/reference/dstack.yml/gateway.md
rename to mkdocs/docs/reference/dstack.yml/gateway.md
diff --git a/docs/docs/reference/dstack.yml/service.md b/mkdocs/docs/reference/dstack.yml/service.md
similarity index 100%
rename from docs/docs/reference/dstack.yml/service.md
rename to mkdocs/docs/reference/dstack.yml/service.md
diff --git a/docs/docs/reference/dstack.yml/task.md b/mkdocs/docs/reference/dstack.yml/task.md
similarity index 100%
rename from docs/docs/reference/dstack.yml/task.md
rename to mkdocs/docs/reference/dstack.yml/task.md
diff --git a/docs/docs/reference/dstack.yml/volume.md b/mkdocs/docs/reference/dstack.yml/volume.md
similarity index 100%
rename from docs/docs/reference/dstack.yml/volume.md
rename to mkdocs/docs/reference/dstack.yml/volume.md
diff --git a/docs/docs/reference/environment-variables.md b/mkdocs/docs/reference/environment-variables.md
similarity index 100%
rename from docs/docs/reference/environment-variables.md
rename to mkdocs/docs/reference/environment-variables.md
diff --git a/docs/docs/reference/plugins/python/index.md b/mkdocs/docs/reference/plugins/python/index.md
similarity index 100%
rename from docs/docs/reference/plugins/python/index.md
rename to mkdocs/docs/reference/plugins/python/index.md
diff --git a/docs/docs/reference/plugins/rest/index.md b/mkdocs/docs/reference/plugins/rest/index.md
similarity index 100%
rename from docs/docs/reference/plugins/rest/index.md
rename to mkdocs/docs/reference/plugins/rest/index.md
diff --git a/docs/docs/reference/profiles.yml.md b/mkdocs/docs/reference/profiles.yml.md
similarity index 100%
rename from docs/docs/reference/profiles.yml.md
rename to mkdocs/docs/reference/profiles.yml.md
diff --git a/docs/docs/reference/server/config.yml.md b/mkdocs/docs/reference/server/config.yml.md
similarity index 100%
rename from docs/docs/reference/server/config.yml.md
rename to mkdocs/docs/reference/server/config.yml.md
diff --git a/docs/index.md b/mkdocs/index.md
similarity index 100%
rename from docs/index.md
rename to mkdocs/index.md
diff --git a/docs/layouts/custom.yml b/mkdocs/layouts/custom.yml
similarity index 100%
rename from docs/layouts/custom.yml
rename to mkdocs/layouts/custom.yml
diff --git a/docs/overrides/.icons/custom/colored/discord.svg b/mkdocs/overrides/.icons/custom/colored/discord.svg
similarity index 100%
rename from docs/overrides/.icons/custom/colored/discord.svg
rename to mkdocs/overrides/.icons/custom/colored/discord.svg
diff --git a/docs/overrides/.icons/custom/colored/github.svg b/mkdocs/overrides/.icons/custom/colored/github.svg
similarity index 100%
rename from docs/overrides/.icons/custom/colored/github.svg
rename to mkdocs/overrides/.icons/custom/colored/github.svg
diff --git a/docs/overrides/.icons/custom/colored/twitter.svg b/mkdocs/overrides/.icons/custom/colored/twitter.svg
similarity index 100%
rename from docs/overrides/.icons/custom/colored/twitter.svg
rename to mkdocs/overrides/.icons/custom/colored/twitter.svg
diff --git a/docs/overrides/.icons/custom/github.svg b/mkdocs/overrides/.icons/custom/github.svg
similarity index 100%
rename from docs/overrides/.icons/custom/github.svg
rename to mkdocs/overrides/.icons/custom/github.svg
diff --git a/docs/overrides/assets/images/github-logo.png b/mkdocs/overrides/assets/images/github-logo.png
similarity index 100%
rename from docs/overrides/assets/images/github-logo.png
rename to mkdocs/overrides/assets/images/github-logo.png
diff --git a/docs/overrides/assets/images/hero.svg b/mkdocs/overrides/assets/images/hero.svg
similarity index 100%
rename from docs/overrides/assets/images/hero.svg
rename to mkdocs/overrides/assets/images/hero.svg
diff --git a/docs/overrides/assets/images/new.svg b/mkdocs/overrides/assets/images/new.svg
similarity index 100%
rename from docs/overrides/assets/images/new.svg
rename to mkdocs/overrides/assets/images/new.svg
diff --git a/docs/overrides/assets/images/quotes/alvarobartt.jpg b/mkdocs/overrides/assets/images/quotes/alvarobartt.jpg
similarity index 100%
rename from docs/overrides/assets/images/quotes/alvarobartt.jpg
rename to mkdocs/overrides/assets/images/quotes/alvarobartt.jpg
diff --git a/docs/overrides/assets/images/quotes/chansung.jpg b/mkdocs/overrides/assets/images/quotes/chansung.jpg
similarity index 100%
rename from docs/overrides/assets/images/quotes/chansung.jpg
rename to mkdocs/overrides/assets/images/quotes/chansung.jpg
diff --git a/docs/overrides/assets/images/quotes/cudopete.png b/mkdocs/overrides/assets/images/quotes/cudopete.png
similarity index 100%
rename from docs/overrides/assets/images/quotes/cudopete.png
rename to mkdocs/overrides/assets/images/quotes/cudopete.png
diff --git a/docs/overrides/assets/images/quotes/eckart.png b/mkdocs/overrides/assets/images/quotes/eckart.png
similarity index 100%
rename from docs/overrides/assets/images/quotes/eckart.png
rename to mkdocs/overrides/assets/images/quotes/eckart.png
diff --git a/docs/overrides/assets/images/quotes/movchan.jpg b/mkdocs/overrides/assets/images/quotes/movchan.jpg
similarity index 100%
rename from docs/overrides/assets/images/quotes/movchan.jpg
rename to mkdocs/overrides/assets/images/quotes/movchan.jpg
diff --git a/docs/overrides/assets/images/quotes/spott.jpg b/mkdocs/overrides/assets/images/quotes/spott.jpg
similarity index 100%
rename from docs/overrides/assets/images/quotes/spott.jpg
rename to mkdocs/overrides/assets/images/quotes/spott.jpg
diff --git a/docs/overrides/assets/images/slack.png b/mkdocs/overrides/assets/images/slack.png
similarity index 100%
rename from docs/overrides/assets/images/slack.png
rename to mkdocs/overrides/assets/images/slack.png
diff --git a/docs/overrides/assets/images/twitter.png b/mkdocs/overrides/assets/images/twitter.png
similarity index 100%
rename from docs/overrides/assets/images/twitter.png
rename to mkdocs/overrides/assets/images/twitter.png
diff --git a/docs/overrides/header-2.html b/mkdocs/overrides/header-2.html
similarity index 100%
rename from docs/overrides/header-2.html
rename to mkdocs/overrides/header-2.html
diff --git a/docs/overrides/header.html b/mkdocs/overrides/header.html
similarity index 100%
rename from docs/overrides/header.html
rename to mkdocs/overrides/header.html
diff --git a/docs/overrides/home.html b/mkdocs/overrides/home.html
similarity index 100%
rename from docs/overrides/home.html
rename to mkdocs/overrides/home.html
diff --git a/docs/overrides/landing.html b/mkdocs/overrides/landing.html
similarity index 100%
rename from docs/overrides/landing.html
rename to mkdocs/overrides/landing.html
diff --git a/docs/overrides/main.html b/mkdocs/overrides/main.html
similarity index 100%
rename from docs/overrides/main.html
rename to mkdocs/overrides/main.html
diff --git a/docs/overrides/partials/post.html b/mkdocs/overrides/partials/post.html
similarity index 100%
rename from docs/overrides/partials/post.html
rename to mkdocs/overrides/partials/post.html
diff --git a/docs/overrides/path.html b/mkdocs/overrides/path.html
similarity index 100%
rename from docs/overrides/path.html
rename to mkdocs/overrides/path.html
diff --git a/docs/overrides/pricing.html b/mkdocs/overrides/pricing.html
similarity index 100%
rename from docs/overrides/pricing.html
rename to mkdocs/overrides/pricing.html
diff --git a/docs/overrides/privacy.html b/mkdocs/overrides/privacy.html
similarity index 100%
rename from docs/overrides/privacy.html
rename to mkdocs/overrides/privacy.html
diff --git a/docs/overrides/toc-item.html b/mkdocs/overrides/toc-item.html
similarity index 100%
rename from docs/overrides/toc-item.html
rename to mkdocs/overrides/toc-item.html
diff --git a/docs/overrides/toc.html b/mkdocs/overrides/toc.html
similarity index 100%
rename from docs/overrides/toc.html
rename to mkdocs/overrides/toc.html
diff --git a/docs/privacy.md b/mkdocs/privacy.md
similarity index 100%
rename from docs/privacy.md
rename to mkdocs/privacy.md
diff --git a/docs/robots.txt b/mkdocs/robots.txt
similarity index 100%
rename from docs/robots.txt
rename to mkdocs/robots.txt
diff --git a/docs/terms.md b/mkdocs/terms.md
similarity index 100%
rename from docs/terms.md
rename to mkdocs/terms.md
diff --git a/scripts/docs/gen_openapi_reference.py b/scripts/docs/gen_openapi_reference.py
index 847bf74c46..edf5cf84aa 100644
--- a/scripts/docs/gen_openapi_reference.py
+++ b/scripts/docs/gen_openapi_reference.py
@@ -23,8 +23,8 @@
     {"url": "https://sky.dstack.ai", "description": "Managed server"},
 ]
 app.version = DSTACK_VERSION or "0.0.0"
-output_path = Path("docs/docs/reference/api/http/openapi.json")
+output_path = Path("mkdocs/docs/reference/api/http/openapi.json")
 output_path.parent.mkdir(parents=True, exist_ok=True)
-new_content = json.dumps(app.openapi())
+new_content = json.dumps(app.openapi()) + "\n"
 if not output_path.exists() or output_path.read_text() != new_content:
     output_path.write_text(new_content)
diff --git a/scripts/docs/gen_rest_plugin_spec_reference.py b/scripts/docs/gen_rest_plugin_spec_reference.py
index bfc5018dc8..8c89a84596 100644
--- a/scripts/docs/gen_rest_plugin_spec_reference.py
+++ b/scripts/docs/gen_rest_plugin_spec_reference.py
@@ -30,8 +30,8 @@
     {"url": "http://localhost:8000", "description": "Local server"},
 ]
 app.version = DSTACK_VERSION or "0.0.0"
-output_path = Path("docs/docs/reference/plugins/rest/rest_plugin_openapi.json")
+output_path = Path("mkdocs/docs/reference/plugins/rest/rest_plugin_openapi.json")
 output_path.parent.mkdir(parents=True, exist_ok=True)
-new_content = json.dumps(app.openapi())
+new_content = json.dumps(app.openapi()) + "\n"
 if not output_path.exists() or output_path.read_text() != new_content:
     output_path.write_text(new_content)

From 916a23ef67da42ab05d92d05db21f20224d061de Mon Sep 17 00:00:00 2001
From: Andrey Cheptsov <andrey.cheptsov@github.com>
Date: Thu, 7 May 2026 11:56:38 +0200
Subject: [PATCH 7/7] Apply ruff import-sorting fixes to docs gen scripts

---
 scripts/docs/gen_cli_reference.py    | 1 +
 scripts/docs/gen_schema_reference.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/docs/gen_cli_reference.py b/scripts/docs/gen_cli_reference.py
index b72f48d1f8..86ddae329b 100644
--- a/scripts/docs/gen_cli_reference.py
+++ b/scripts/docs/gen_cli_reference.py
@@ -14,6 +14,7 @@
 from functools import cache
 
 import mkdocs_gen_files
+
 from mkdocs.structure.files import File
 
 FILE_PATTERN = "docs/reference/cli/dstack/*.md"
diff --git a/scripts/docs/gen_schema_reference.py b/scripts/docs/gen_schema_reference.py
index 2df30c845d..71657a2479 100644
--- a/scripts/docs/gen_schema_reference.py
+++ b/scripts/docs/gen_schema_reference.py
@@ -13,11 +13,11 @@
 
 import mkdocs_gen_files
 import yaml
-from mkdocs.structure.files import File
 from pydantic.main import BaseModel
 from typing_extensions import Annotated, Any, Dict, Literal, Type, Union, get_args, get_origin
 
 from dstack._internal.core.models.resources import Range
+from mkdocs.structure.files import File
 
 FILE_PATTERN = "docs/reference/**.md"
 logger = logging.getLogger("mkdocs.plugins.dstack.schema")