diff --git a/docs/docs/capabilities/models/index.mdx b/docs/docs/capabilities/models/index.mdx
index f4a10b1ae..2460905de 100644
--- a/docs/docs/capabilities/models/index.mdx
+++ b/docs/docs/capabilities/models/index.mdx
@@ -30,7 +30,9 @@ For details on each format, see the [Model Formats](/docs/capabilities/models/mo
 :::
 
 ## Built-in Models 
-Cortex.cpp offers a range of built-in models that include popular open-source options. These models, hosted on HuggingFace as [Cortex Model Repositories](/docs/hub/cortex-hub), are pre-compiled for different engines, enabling each model to have multiple branches in various formats.
+Cortex offers a range of [Built-in models](/models) that include popular open-source options. 
+
+These models are hosted on [Cortex's HuggingFace](https://huggingface.co/cortexso) and are pre-compiled for different engines, enabling each model to have multiple branches in various formats.
 
 ### Built-in Model Variants
 Built-in models are made available across the following variants: 
@@ -39,10 +41,7 @@ Built-in models are made available across the following variants:
 - **By Size**: `7b`, `13b`, and more.
 - **By quantizations**: `q4`, `q8`, and more.
 
-:::info
-You can see our full list of Built-in Models [here](/models). 
-:::
-
 ## Next steps
-- Cortex requires a `model.yaml` file to run a model. Find out more [here](/docs/capabilities/models/model-yaml).
-- Cortex supports multiple model hubs hosting built-in models. See details [here](/docs/model-sources).
\ No newline at end of file
+- See Cortex's list of [Built-in Models](/models). 
+- Cortex supports multiple model hubs hosting built-in models. See details [here](/docs/capabilities/models/sources).
+- Cortex requires a `model.yaml` file to run a model. Find out more [here](/docs/capabilities/models/model-yaml).
\ No newline at end of file
diff --git a/docs/docs/capabilities/models/model-yaml.mdx b/docs/docs/capabilities/models/model-yaml.mdx
index b47b86a64..e761d7da2 100644
--- a/docs/docs/capabilities/models/model-yaml.mdx
+++ b/docs/docs/capabilities/models/model-yaml.mdx
@@ -179,7 +179,7 @@ Model load parameters include the options that control how Cortex.cpp runs the m
 | `prompt_template`      | Template for formatting the prompt, including system messages and instructions.      | Yes          |
 | `engine`      | The engine that run model, default to `llama-cpp` for local model with gguf format.      | Yes          |
 
-All parameters from the `model.yml` file are used for running the model via the [CLI chat command](/docs/cli/chat) or [CLI run command](/docs/cli/run). These parameters also act as defaults when using the [model start API](/api-reference#tag/models/post/v1/models/start) through cortex.cpp.
+All parameters from the `model.yml` file are used for running the model via the [CLI run command](/docs/cli/run). These parameters also act as defaults when using the [model start API](/api-reference#tag/models/post/v1/models/start) through cortex.cpp.
 
 ## Runtime parameters
 
@@ -217,8 +217,8 @@ The API is accessible at the `/v1/chat/completions` URL and accepts all paramete
 
 With the `llama-cpp` engine, cortex.cpp accept all parameters from [`model.yml` inference section](#Inference Parameters) and accept all parameters from the chat completion API.
 
-:::info
+<!-- :::info
 You can download all the supported model formats from the following:
 - [Cortex Model Repos](/docs/hub/cortex-hub)
 - [HuggingFace Model Repos](/docs/hub/hugging-face)
-:::
+::: -->
diff --git a/docs/docs/hub/cortex-hub.mdx b/docs/docs/capabilities/models/sources/cortex-hub.mdx
similarity index 100%
rename from docs/docs/hub/cortex-hub.mdx
rename to docs/docs/capabilities/models/sources/cortex-hub.mdx
diff --git a/docs/docs/capabilities/models/sources/hugging-face.mdx b/docs/docs/capabilities/models/sources/hugging-face.mdx
new file mode 100644
index 000000000..5dc74a14e
--- /dev/null
+++ b/docs/docs/capabilities/models/sources/hugging-face.mdx
@@ -0,0 +1,66 @@
+---
+title: Hugging Face
+description: Cortex supports all `GGUF` and `ONNX` models available in Huggingface repositories, providing access to a wide range of models.
+---
+
+import Tabs from "@theme/Tabs";
+import TabItem from "@theme/TabItem";
+
+Cortex.cpp supports all `GGUF` from the [Hugging Face Hub](https://huggingface.co).
+
+You can pull HuggingFace models via:
+- repository handle: eg `author/model_id`
+- direct url: eg `https://huggingface.co/QuantFactory/OpenMath2-Llama3.1-8B-GGUF/blob/main/OpenMath2-Llama3.1-8B.Q4_0.gguf`
+
+
+## GGUF
+To view all available `GGUF` models on HuggingFace, select the `GGUF` tag in the Libraries section.
+
+![HF GGUF](/img/docs/gguf.png)
+<Tabs>
+  <TabItem value="MacOs/Linux" label="MacOs/Linux">
+  ```sh
+    ## Pull the Codestral-22B-v0.1-GGUF model from the bartowski organization
+    cortex pull bartowski/Codestral-22B-v0.1-GGUF
+
+    # Pull the gemma-7b model from the google organization
+    cortex pull https://huggingface.co/QuantFactory/OpenMath2-Llama3.1-8B-GGUF/blob/main/OpenMath2-Llama3.1-8B.Q4_0.gguf
+  ```
+  </TabItem>
+  <TabItem value="Windows" label="Windows">
+  ```sh
+    ## Pull the Codestral-22B-v0.1-GGUF model from the bartowski organization
+    cortex.exe pull bartowski/Codestral-22B-v0.1-GGUF
+
+    # Pull the gemma-7b model from the google organization
+    cortex.exe pull google/gemma-7b
+  ```
+  </TabItem>
+</Tabs>
+
+<!-- ## ONNX
+![HF ONNX](/img/docs/onnx.png)
+To view all available `ONNX` models on HuggingFace, select the `ONNX` tag in the Libraries section.
+<Tabs>
+  <TabItem value="MacOs/Linux" label="MacOs/Linux">
+  ```sh
+    ## Pull the XLM-Roberta-Large-Vit-B-16Plus model from the immich-app organization
+    cortex pull immich-app/XLM-Roberta-Large-Vit-B-16Plus
+
+    # Pull the mt0-base model from the bigscience organization
+    cortex pull bigscience/mt0-base
+  ```
+  </TabItem>
+  <TabItem value="Windows" label="Windows">
+  ```sh
+    ## Pull the XLM-Roberta-Large-Vit-B-16Plus model from the immich-app organization
+    cortex.exe pull immich-app/XLM-Roberta-Large-Vit-B-16Plus
+
+    # Pull the mt0-base model from the bigscience organization
+    cortex.exe pull bigscience/mt0-base
+  ```
+  </TabItem>
+</Tabs>
+
+## TensorRT-LLM
+We are still working to support all available `TensorRT-LLM` models on HuggingFace. For now, Cortex.cpp only supports built-in `TensorRT-LLM` models, which can be downloaded from the [Cortex Model Repos](/docs/capabilities/models/sources/cortex-hub). -->
diff --git a/docs/docs/hub/index.mdx b/docs/docs/capabilities/models/sources/index.mdx
similarity index 97%
rename from docs/docs/hub/index.mdx
rename to docs/docs/capabilities/models/sources/index.mdx
index f5c903ed0..186f192dc 100644
--- a/docs/docs/hub/index.mdx
+++ b/docs/docs/capabilities/models/sources/index.mdx
@@ -1,14 +1,8 @@
 ---
-slug: /model-sources
 title: Model Sources
+description: Model
 ---
 
-import DocCardList from "@theme/DocCardList";
-
-:::warning
-🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
-:::
-
 # Pulling Models in Cortex
 
 Cortex provides a streamlined way to pull (download) machine learning models from Hugging Face and other third-party sources, as well as import models from local storage. This functionality allows users to easily access a variety of pre-trained models to enhance their applications.
@@ -348,6 +342,4 @@ Response:
 }
 ```
 
-With Cortex, pulling and managing models is simplified, allowing you to focus more on building your applications!
-
-<DocCardList />
+With Cortex, pulling and managing models is simplified, allowing you to focus more on building your applications!
\ No newline at end of file
diff --git a/docs/docs/hub/nvidia-ngc.mdx b/docs/docs/capabilities/models/sources/nvidia-ngc.mdx
similarity index 100%
rename from docs/docs/hub/nvidia-ngc.mdx
rename to docs/docs/capabilities/models/sources/nvidia-ngc.mdx
diff --git a/docs/docs/chat-completions.mdx b/docs/docs/chat-completions.mdx
index 9b1dce01d..c4f40f0d1 100644
--- a/docs/docs/chat-completions.mdx
+++ b/docs/docs/chat-completions.mdx
@@ -146,5 +146,5 @@ Cortex also acts as an aggregator for remote inference requests from a single en
 :::note
 Learn more about Chat Completions capabilities:
 - [Chat Completions API Reference](/api-reference#tag/inference/post/chat/completions)
-- [Chat Completions CLI command](/docs/cli/chat)
+- [`cortex run` CLI command](/docs/cli/run)
 :::
diff --git a/docs/docs/cli/chat.mdx b/docs/docs/cli/chat.mdx
deleted file mode 100644
index 0b7ee7083..000000000
--- a/docs/docs/cli/chat.mdx
+++ /dev/null
@@ -1,71 +0,0 @@
----
-title: Cortex Chat
-description: Cortex chat command.
-slug: "chat"
----
-
-import Tabs from "@theme/Tabs";
-import TabItem from "@theme/TabItem";
-
-:::warning
-🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
-:::
-
-# `cortex chat`
-:::info
-This CLI command calls the following API endpoint:
-- [Download Model](/api-reference#tag/models/post/v1/models/pull) (The command only calls this endpoint if the specified model is not downloaded yet.)
-- Install Engine (The command only calls this endpoint if the specified engine is not downloaded yet.)
-- [Start Model](/api-reference#tag/models/post/v1/models/start)
-- [Chat Completions](/api-reference#tag/inference/post/v1/chat/completions) (The command makes a call to this endpoint if the `-c` option is used.)
-:::
-
-This command starts a chat session with a specified model, allowing you to interact directly with it through an interactive chat interface.
-
-## Usage
-:::info
-You can use the `--verbose` flag to display more detailed output of the internal processes. To apply this flag, use the following format: `cortex --verbose [subcommand]`.
-:::
-<Tabs>
-  <TabItem value="MacOs/Linux" label="MacOs/Linux">
-  ```sh
-  # Stable
-  cortex chat [options] <model_id> -m <message>
-
-  # Beta
-  cortex-beta chat [options] <model_id> -m <message>
-
-  # Nightly
-  cortex-nightly chat [options] <model_id> -m <message>
-  ```
-  </TabItem>
-  <TabItem value="Windows" label="Windows">
-  ```sh
-  # Stable
-  cortex.exe chat [options] <model_id> -m <message>
-
-  # Beta
-  cortex-beta.exe chat [options] <model_id> -m <message>
-
-  # Nightly
-  cortex-nightly.exe chat [options] <model_id> -m <message>
-  ```
-  </TabItem>
-</Tabs>
-
-:::info
-This command uses a `model_id` from the model that you have downloaded or available in your file system.
-:::
-
-## Options
-
-| Option                        | Description                                                                                     | Required | Default value | Example                       |
-| ----------------------------- | ----------------------------------------------------------------------------------------------- | -------- | ------------- | ----------------------------- |
-| `model_id`                    | Model ID to chat with. | Yes       | -             | `mistral`                       |
-| `-m`, `--message <message>`   | Message to send to the model                                                                     | Yes       | -             | `-m "Hello, model!"`          |
-| `-h`, `--help`                | Display help information for the command.                                                         | No       | -             | `-h`                          |
-
-<!-- | `-t`, `--thread <thread_id>`  | Thread ID. If not provided, will create new thread                                               | No       | -             | `-t 98765`                    | -->
-<!-- | `-a`, `--attach`              | Attach to interactive chat session                                                               | No       | `false`       | `-a`                          |
-| `-p`, `--preset <preset>`     | Apply a chat preset to the chat session                                                          | No       | -             | `-p default`                  | -->
-
diff --git a/docs/docs/cli/cortex.mdx b/docs/docs/cli/cortex.mdx
index ef887dd10..cb8cf29fd 100644
--- a/docs/docs/cli/cortex.mdx
+++ b/docs/docs/cli/cortex.mdx
@@ -7,12 +7,8 @@ slug: /cli
 import Tabs from "@theme/Tabs";
 import TabItem from "@theme/TabItem";
 
-:::warning
-🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
-:::
-
-# Cortex
-This command list all the available commands within the Cortex.cpp commands.
+# `cortex`
+This command list all the available commands within the Cortex commands.
 
 ## Usage
 :::info
@@ -21,48 +17,23 @@ You can use the `--verbose` flag to display more detailed output of the internal
 <Tabs>
   <TabItem value="MacOs/Linux" label="MacOs/Linux">
   ```sh
-  # Stable
   cortex
-
-  # Beta
-  cortex-beta
-
-  # Nightly
-  cortex-nightly
   ```
   </TabItem>
   <TabItem value="Windows" label="Windows">
   ```sh
-  # Stable
   cortex.exe
-
-  # Beta
-  cortex-beta.exe
-
-  # Nightly
-  cortex-nightly.exe
   ```
   </TabItem>
 </Tabs>
 
-
-## Command Chaining
-Cortex CLI's command chaining support allows multiple commands to be executed in sequence with a simplified syntax.
-
-For example:
-
-- [cortex run](/docs/cli/run)
-- [cortex chat](/docs/cli/chat)
-
 ## Sub Commands
 
+- [cortex start](/docs/cli/start): Start the Cortex API server (starts automatically with other commands)
+- [cortex run](/docs/cli/run): Shortcut for `cortex models start`. Pull a remote model or start a local model, and start chatting.
+- [cortex pull](/docs/cli/pull): Download a model.
 - [cortex models](/docs/cli/models): Manage and configure models.
-- [cortex chat](/docs/cli/chat): Send a chat request to a model.
 - [cortex ps](/docs/cli/ps): Display active models and their operational status.
-- [cortex embeddings](/docs/cli/embeddings): Create an embedding vector representing the input text.
-- [cortex engines](/docs/cli/engines): Manage Cortex.cpp engines.
-- [cortex pull|download](/docs/cli/pull): Download a model.
-- [cortex run](/docs/cli/run): Shortcut to pull, start and chat with a model.
-- [cortex update](/docs/cli/update): Update the Cortex.cpp version.
-- [cortex start](/docs/cli/start): Start the Cortex.cpp API server.
-- [cortex stop](/docs/cli/stop): Stop the Cortex.cpp API server.
+- [cortex engines](/docs/cli/engines): Manage Cortex engines.
+- [cortex update](/docs/cli/update): Update the Cortex version.
+- [cortex stop](/docs/cli/stop): Stop the Cortex API server.
diff --git a/docs/docs/cli/ps.mdx b/docs/docs/cli/ps.mdx
index 2641a388f..a70a9501c 100644
--- a/docs/docs/cli/ps.mdx
+++ b/docs/docs/cli/ps.mdx
@@ -7,59 +7,42 @@ slug: "ps"
 import Tabs from "@theme/Tabs";
 import TabItem from "@theme/TabItem";
 
-:::warning
-🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
-:::
-
 # `cortex ps`
 
-This command shows the running model and its status.
-
-
+This command shows the running model and its status (Engine, RAM, VRAM, and Uptime).
 
 ## Usage
-:::info
-You can use the `--verbose` flag to display more detailed output of the internal processes. To apply this flag, use the following format: `cortex --verbose [subcommand]`.
-:::
 <Tabs>
   <TabItem value="MacOs/Linux" label="MacOs/Linux">
   ```sh
-  # Stable
   cortex ps [options]
-
-  # Beta
-  cortex-beta ps [options]
-
-  # Nightly
-  cortex-nightly ps [options]
   ```
   </TabItem>
   <TabItem value="Windows" label="Windows">
   ```sh
-  # Stable
   cortex.exe ps [options]
-  
-  # Beta
-  cortex-beta.exe ps [options]
-
-  # Nightly
-  cortex-nightly.exe ps [options]
   ```
   </TabItem>
 </Tabs>
 
-
 For example, it returns the following table:
 
 ```bash
-+----------------+-----------+----------+-----------+-----------+
-| Model          | Engine    | RAM      | VRAM      | Up time   |
-+----------------+-----------+----------+-----------+-----------+
-| tinyllama:gguf | llama-cpp | 35.16 MB | 601.02 MB | 5 seconds |
-+----------------+-----------+----------+-----------+-----------+
+> cortex ps
++------------------------+-----------+-----------+-----------+-------------------------------+
+| Model                  | Engine    | RAM       | VRAM      | Uptime                        |
++------------------------+-----------+-----------+-----------+-------------------------------+
+| llama3.2:3b-gguf-q4-km | llama-cpp | 308.23 MB | 1.87 GB   | 7 seconds                     |
++------------------------+-----------+-----------+-----------+-------------------------------+
+| tinyllama:1b-gguf      | llama-cpp | 35.16 MB  | 636.18 MB | 1 hour, 5 minutes, 45 seconds |
++------------------------+-----------+-----------+-----------+-------------------------------+
 ```
 ## Options
 
 | Option            | Description                                           | Required | Default value | Example     |
 |-------------------|-------------------------------------------------------|----------|---------------|-------------|
-| `-h`, `--help`      | Display help information for the command.             | No       | -             | `-h`    |
\ No newline at end of file
+| `-h`, `--help`      | Display help information for the command.             | No       | -             | `-h`    |
+
+:::info
+You can use the `--verbose` flag to display more detailed output of the internal processes. To apply this flag, use the following format: `cortex --verbose [subcommand]`.
+:::
\ No newline at end of file
diff --git a/docs/docs/cli/pull.mdx b/docs/docs/cli/pull.mdx
index df1f3917d..028962896 100644
--- a/docs/docs/cli/pull.mdx
+++ b/docs/docs/cli/pull.mdx
@@ -7,18 +7,23 @@ slug: "pull"
 import Tabs from "@theme/Tabs";
 import TabItem from "@theme/TabItem";
 
-:::warning
-🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
-:::
-
 # `cortex pull`
 :::info
 This CLI command calls the following API endpoint:
-- [Download Model](/api-reference#tag/models/post/v1/models/{modelId}/pull)
+- [Download Model](/api-reference#tag/pulling-models/post/v1/models/pull)
 :::
-This command downloads models from supported [model repositories](/docs/model-sources).
+This command displays downloaded models, or displays models available for downloading. 
+
+There are 3 ways to download models:
+- From Cortex's [Built-in models](/models): `cortex pull <model_name>`
+- HuggingFace repository handle: `cortex pull <author/RepoID>`
+- HuggingFace direct URL: `cortex pull <huggingface URL ending with .gguf>`
+
+Each of these will display the model quantizations available, to be selected for download.
+
+For more information, please refer to [Pulling Models](/docs/capabilities/models/sources).
 
-The downloaded model will be stored in the Cortex folder in your home data directory.
+The downloaded model file will be stored in the [Cortex Data Folder](/docs/architecture/data-folder).
 
 
 ## Usage
@@ -28,34 +33,39 @@ You can use the `--verbose` flag to display more detailed output of the internal
 <Tabs>
   <TabItem value="MacOs/Linux" label="MacOs/Linux">
   ```sh
-  # Stable
   cortex pull [options] <model_id> 
-
-  # Beta
-  cortex-beta pull [options] <model_id> 
-
-  # Nightly
-  cortex-nightly pull [options] <model_id> 
   ```
   </TabItem>
   <TabItem value="Windows" label="Windows">
   ```sh
-  # Stable
   cortex.exe pull [options] <model_id> 
-  
-  # Beta
-  cortex-beta.exe pull [options] <model_id> 
-
-  # Nightly
-  cortex-nightly.exe pull [options] <model_id> 
   ```
   </TabItem>
 </Tabs>
 
+For example, this returns the following:
+```bash
+> cortex pull llama3.2
+Downloaded models:
+    llama3.2:3b-gguf-q4-km
+
+Available to download:
+    1. llama3.2:3b-gguf-q2-k
+    2. llama3.2:3b-gguf-q3-kl
+    3. llama3.2:3b-gguf-q3-km
+    4. llama3.2:3b-gguf-q3-ks
+    5. llama3.2:3b-gguf-q4-ks
+    6. llama3.2:3b-gguf-q5-km
+    7. llama3.2:3b-gguf-q5-ks
+    8. llama3.2:3b-gguf-q6-k
+    9. llama3.2:3b-gguf-q8-0
+
+Select a model (1-9):
+```
 
 ## Options
 
 | Option         | Description                                       | Required | Default value | Example     |
 | -------------- | ------------------------------------------------- | -------- | ------------- | ----------- |
 | `model_id`     | The identifier of the model you want to download. | Yes      | -             | `mistral` |
-| `-h`, `--help` | Display help information for the command.         | No       | -             | `-h`        |
+| `-h`, `--help` | Display help information for the command.         | No       | -             | `-h`        |
\ No newline at end of file
diff --git a/docs/docs/cli/run.mdx b/docs/docs/cli/run.mdx
index 786b5cb0c..b0b9143ad 100644
--- a/docs/docs/cli/run.mdx
+++ b/docs/docs/cli/run.mdx
@@ -7,75 +7,35 @@ slug: "run"
 import Tabs from "@theme/Tabs";
 import TabItem from "@theme/TabItem";
 
-:::warning
-🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
-:::
-
 # `cortex run`
-:::info
-This CLI command calls the following API endpoint:
-- [Download Model](/api-reference#tag/models/post/v1/models/pull) (The command only calls this endpoint if the specified model is not downloaded yet.)
-- Install Engine (The command only calls this endpoint if the specified engine is not downloaded yet.)
-- [Start Model](/api-reference#tag/models/post/v1/models/start)
-:::
 
-This command facilitates the initiation of starting a specified machine-learning model.
+This CLI command is a shortcut to run models easily. It executes this sequence of commands:
+1. [`cortex pull`](/docs/cli/models/): This command pulls the specified model if the model is not yet downloaded, or finds a local model.
+2. [`cortex engines install`](/docs/cli/engines/): This command installs the specified engines if not yet downloaded.
+3. [`cortex models start`](/docs/cli/models/): This command starts the specified model, making it active and ready for interactions.
 
 ## Usage
 :::info
 You can use the `--verbose` flag to display more detailed output of the internal processes. To apply this flag, use the following format: `cortex --verbose [subcommand]`.
 :::
+
 <Tabs>
   <TabItem value="MacOs/Linux" label="MacOs/Linux">
   ```sh
-  # Stable
-  cortex [options] <model_id>:[engine]
-
-  # Beta
-  cortex-beta [options] <model_id>:[engine]
-
-  # Nightly
-  cortex-nightly [options] <model_id>:[engine]
+  cortex [options] <model_id>
   ```
   </TabItem>
   <TabItem value="Windows" label="Windows">
   ```sh
-  # Stable
-  cortex.exe [options] <model_id>:[engine]
-  
-  # Beta
-  cortex-beta.exe [options] <model_id>:[engine]
-
-  # Nightly
-  cortex-nightly.exe [options] <model_id>:[engine]
+  cortex.exe [options] <model_id>
   ```
   </TabItem>
 </Tabs>
 
-### `model_id`
-You can use the [Built-in models](/docs/hub/cortex-hub) or Supported [HuggingFace models](/docs/hub/hugging-face).
-
-:::info
-This command downloads and installs the model if not already available in your file system, then starts it for interaction.
-:::
-
-
 ## Options
 
 | Option                      | Description                                                                 | Required | Default value                                | Example                |
 |-----------------------------|-----------------------------------------------------------------------------|----------|----------------------------------------------|------------------------|
-| `model_id`                  | The identifier of the model you want to chat with.                          | Yes       | `Prompt to select from the available models` | `mistral`       |
-| `-h`, `--help`                | Display help information for the command.                                   | No       | -                                            | `-h`               |
-<!-- | `-t`, `--thread <thread_id>`  | Specify the Thread ID. Defaults to creating a new thread if none specified. | No       | -                                            | `-t jan_1717650808`       |
-| `-p`, `--preset`              | Apply a chat preset to the chat session.                                    | No       | -                                            | `-p friendly`    |
-| `-c`, `--chat`                | Start a chat session after running the model.                                   | No       | -                                            | `-c`               | -->
-
-
-
-## Command Chain
-
-`cortex run` command is a convenience wrapper that automatically executes a sequence of commands to simplify user interactions:
-
-1. [`cortex pull`](/docs/cli/models/): This command pulls the specified model if the model is not yet downloaded.
-2. [`cortex engines install`](/docs/cli/engines/): This command installs the specified engines if not yet downloaded.
-3. [`cortex models start`](/docs/cli/models/): This command starts the specified model, making it active and ready for interactions.
+| `model_id`                  | The identifier of the model you want to chat with.                          | Yes       | - | `mistral`       |
+| `-h`, `--help`              | Display help information for the command.                                   | No       | -                                            | `-h`               |
+<!-- | `-t`, `--thread <thread_id>`  | Specify the Thread ID. Defaults to creating a new thread if none specified. | No       | -                                            | `-t jan_1717650808`       |                                      | `-c`               | -->
diff --git a/docs/docs/cli/start.mdx b/docs/docs/cli/start.mdx
index c180908eb..37a521ab1 100644
--- a/docs/docs/cli/start.mdx
+++ b/docs/docs/cli/start.mdx
@@ -6,16 +6,10 @@ description: Cortex CLI.
 import Tabs from "@theme/Tabs";
 import TabItem from "@theme/TabItem";
 
-:::warning
-🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
-:::
+# `cortex start`
 
-# Start
-:::info
-This is the initial command you need to run to start using Cortex.cpp.
-:::
-
-This command start the Cortex.cpp's API server processes.
+This command starts the Cortex API server processes.
+If the server is not yet running, the server will automatically be started when running other Cortex commands. 
 
 ## Usage
 :::info
@@ -24,26 +18,12 @@ You can use the `--verbose` flag to display more detailed output of the internal
 <Tabs>
   <TabItem value="MacOs/Linux" label="MacOs/Linux">
   ```sh
-  # Stable
   cortex start [options]
-
-  # Beta
-  cortex-beta start [options]
-
-  # Nightly
-  cortex-nightly start [options]
   ```
   </TabItem>
   <TabItem value="Windows" label="Windows">
   ```sh
-  # Stable
   cortex.exe start [options]
-
-  # Beta
-  cortex-beta.exe start [options]
-
-  # Nightly
-  cortex-nightly.exe start [options]
   ```
   </TabItem>
 </Tabs>
@@ -54,7 +34,6 @@ You can use the `--verbose` flag to display more detailed output of the internal
 | Option                       | Description                               | Required | Default value | Example                       |
 | ---------------------------- | ----------------------------------------- | -------- | ------------- | ----------------------------- |
 | `-h`, `--help`               | Display help information for the command.  | No       | -             | `-h`                          |
-| `-p`, `--port <port>`        | Port to serve the application.             | No       | -             | `-p 39281`                     |
+| `-p`, `--port <port>`        | Port to serve the application.             | No       | `39281`       | `-p 39281`                     |
 <!-- | `-a`, `--address <address>`  | Address to use.                            | No       | -             | `-a 192.168.1.1`              | -->
 <!--| `--dataFolder <dataFolder>`  | Set the data folder directory             | No       | -             | `--dataFolder /path/to/data`  | -->
-
diff --git a/docs/docs/cli/stop.mdx b/docs/docs/cli/stop.mdx
index 48c4eec31..0b8625f9e 100644
--- a/docs/docs/cli/stop.mdx
+++ b/docs/docs/cli/stop.mdx
@@ -7,10 +7,6 @@ slug: "stop"
 import Tabs from "@theme/Tabs";
 import TabItem from "@theme/TabItem";
 
-:::warning
-🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
-:::
-
 # `cortex stop`
 :::info
 This CLI command calls the following API endpoint:
@@ -18,8 +14,6 @@ This CLI command calls the following API endpoint:
 :::
 This command stops the API server.
 
-
-
 ## Usage
 :::info
 You can use the `--verbose` flag to display more detailed output of the internal processes. To apply this flag, use the following format: `cortex --verbose [subcommand]`.
@@ -27,26 +21,12 @@ You can use the `--verbose` flag to display more detailed output of the internal
 <Tabs>
   <TabItem value="MacOs/Linux" label="MacOs/Linux">
   ```sh
-  # Stable
   cortex stop [options]
-
-  # Beta
-  cortex-beta stop [options]
-
-  # Nightly
-  cortex-nightly stop [options]
   ```
   </TabItem>
   <TabItem value="Windows" label="Windows">
   ```sh
-  # Stable
   cortex.exe stop [options]
-  
-  # Beta
-  cortex-beta.exe stop [options]
-
-  # Nightly
-  cortex-nightly.exe stop [options]
   ```
   </TabItem>
 </Tabs>
@@ -56,4 +36,4 @@ You can use the `--verbose` flag to display more detailed output of the internal
 
 | Option            | Description                                           | Required | Default value | Example     |
 |-------------------|-------------------------------------------------------|----------|---------------|-------------|
-| `-h`, `--help`      | Display help information for the command.             | No       | -             | `-h`    |
+| `-h`, `--help`      | Display help information for the command.             | No       | -             | `-h`    |
\ No newline at end of file
diff --git a/docs/docs/cli/update.mdx b/docs/docs/cli/update.mdx
index f54d554cc..0f06f8476 100644
--- a/docs/docs/cli/update.mdx
+++ b/docs/docs/cli/update.mdx
@@ -7,16 +7,11 @@ slug: "update"
 import Tabs from "@theme/Tabs";
 import TabItem from "@theme/TabItem";
 
-:::warning
-🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
-:::
-
 # `cortex update`
 
 This command updates Cortex.cpp to the provided version or the latest version.
 
 
-
 ## Usage
 :::info
 You can use the `--verbose` flag to display more detailed output of the internal processes. To apply this flag, use the following format: `cortex --verbose [subcommand]`.
@@ -24,26 +19,12 @@ You can use the `--verbose` flag to display more detailed output of the internal
 <Tabs>
   <TabItem value="MacOs/Linux" label="MacOs/Linux">
   ```sh
-  # Stable
   cortex update [options]
-
-  # Beta
-  cortex-beta update [options]
-
-  # Nightly
-  cortex-nightly update [options]
   ```
   </TabItem>
   <TabItem value="Windows" label="Windows">
   ```sh
-  # Stable
   cortex.exe update [options]
-  
-  # Beta
-  cortex-beta.exe update [options]
-
-  # Nightly
-  cortex-nightly.exe update [options]
   ```
   </TabItem>
 </Tabs>
@@ -57,7 +38,7 @@ By default, if no version is specified, Cortex.cpp will be updated to the latest
 | Option                     | Description                               | Required | Default Value | Example                |
 |----------------------------|-------------------------------------------|----------|---------------|------------------------|
 | `-h`, `--help`         | Display help information for the command.            | No       | -             | `-h`             |
-| `-v`  | Specify the version of the Cortex.               | No       | -   | `-v 0.5.0`|
+| `-v`  | Specify the version of the Cortex.               | No       | -   | `-v1.0.1`|
 
 
 
diff --git a/docs/docs/engines/llamacpp.mdx b/docs/docs/engines/llamacpp.mdx
index c550e2e92..2ace67944 100644
--- a/docs/docs/engines/llamacpp.mdx
+++ b/docs/docs/engines/llamacpp.mdx
@@ -99,8 +99,8 @@ ngl: 41 # Undefined = loaded from model
 | `n_probs`            | Number of top token probabilities to return in the output.                                             | No           |
 | `min_keep`           | Minimum number of tokens to keep during top-k sampling.                                                | No           |
 
-:::info
+<!-- :::info
 You can download a `GGUF` model from the following:
-- [Cortex Model Repos](/docs/hub/cortex-hub)
-- [HuggingFace Model Repos](/docs/hub/hugging-face)
-:::
\ No newline at end of file
+- [Cortex Model Repos](/docs/capabilities/models/sources/cortex-hub)
+- [HuggingFace Model Repos](/docs/capabilities/models/sources/hugging-face)
+::: -->
\ No newline at end of file
diff --git a/docs/docs/engines/onnx.mdx b/docs/docs/engines/onnx.mdx
index 7110007d7..370aa1e53 100644
--- a/docs/docs/engines/onnx.mdx
+++ b/docs/docs/engines/onnx.mdx
@@ -54,8 +54,8 @@ stream: true # true | false
 | `prompt_template`      | Template for formatting the prompt, including system messages and instructions.      | Yes          |
 
 
-:::info
+<!-- :::info
 You can download a `ONNX` model from the following:
-- [Cortex Model Repos](/docs/hub/cortex-hub)
-- [HuggingFace Model Repos](/docs/hub/hugging-face)
-:::
\ No newline at end of file
+- [Cortex Model Repos](/docs/capabilities/models/sources/cortex-hub)
+- [HuggingFace Model Repos](/docs/capabilities/models/sources/hugging-face)
+::: -->
\ No newline at end of file
diff --git a/docs/docs/engines/tensorrt-llm.mdx b/docs/docs/engines/tensorrt-llm.mdx
index 1a06b0a86..94a3d3875 100644
--- a/docs/docs/engines/tensorrt-llm.mdx
+++ b/docs/docs/engines/tensorrt-llm.mdx
@@ -64,9 +64,9 @@ stream: true # true | false
 | `text_model`           | Indicates if the text model is being used (true or false).                           | Yes          |
 | `prompt_template`      | Template for formatting the prompt, including system messages and instructions.      | Yes          |
 
-:::info
+<!-- :::info
 You can download a `TensorRT-LLM` model from the following:
-- [Cortex Model Repos](/docs/hub/cortex-hub)
-- [HuggingFace Model Repos](/docs/hub/hugging-face)
+- [Cortex Model Repos](/docs/capabilities/models/sources/cortex-hub)
+- [HuggingFace Model Repos](/docs/capabilities/models/sources/hugging-face)
 - Nvidia Catalog (Coming Soon!)
-:::
\ No newline at end of file
+::: -->
\ No newline at end of file
diff --git a/docs/docs/hub/hugging-face.mdx b/docs/docs/hub/hugging-face.mdx
deleted file mode 100644
index a4409992a..000000000
--- a/docs/docs/hub/hugging-face.mdx
+++ /dev/null
@@ -1,128 +0,0 @@
----
-title: Hugging Face
-description: Cortex supports all `GGUF` and `ONNX` models available in Huggingface repositories, providing access to a wide range of models.
----
-
-import Tabs from "@theme/Tabs";
-import TabItem from "@theme/TabItem";
-
-:::warning
-🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
-:::
-
-
-Cortex.cpp supports all `GGUF` and `ONNX` models from the [Hugging Face Hub](https://huggingface.co), along with its built-in models. For `TensorRT-LLM` models, only built-in models in the [Cortex Model Repos](/docs/hub/cortex-hub) are supported.
-
-:::info
-To pull a supported model from HuggingFace, use the format `ORG_ID/MODEL_ID`.
-:::
-## GGUF
-![HF GGUF](/img/docs/gguf.png)
-To view all available `GGUF` models on HuggingFace, select the `GGUF` tag in the Libraries section.
-<Tabs>
-  <TabItem value="MacOs/Linux" label="MacOs/Linux">
-  ```sh
-    # Stable
-    ## Pull the Codestral-22B-v0.1-GGUF model from the bartowski organization
-    cortex pull bartowski/Codestral-22B-v0.1-GGUF
-
-    # Pull the gemma-7b model from the google organization
-    cortex pull google/gemma-7b
-
-    # Beta
-    ## Pull the Codestral-22B-v0.1-GGUF model from the bartowski organization
-    cortex-beta pull bartowski/Codestral-22B-v0.1-GGUF
-
-    # Pull the gemma-7b model from the google organization
-    cortex-beta pull google/gemma-7b
-
-    # Nightly
-    ## Pull the Codestral-22B-v0.1-GGUF model from the bartowski organization
-    cortex-nightly pull bartowski/Codestral-22B-v0.1-GGUF
-
-    # Pull the gemma-7b model from the google organization
-    cortex-nightly pull google/gemma-7b
-  ```
-  </TabItem>
-  <TabItem value="Windows" label="Windows">
-  ```sh
-    # Stable
-    ## Pull the Codestral-22B-v0.1-GGUF model from the bartowski organization
-    cortex.exe pull bartowski/Codestral-22B-v0.1-GGUF
-
-    # Pull the gemma-7b model from the google organization
-    cortex.exe pull google/gemma-7b
-
-    # Beta
-    ## Pull the Codestral-22B-v0.1-GGUF model from the bartowski organization
-    cortex-beta.exe pull bartowski/Codestral-22B-v0.1-GGUF
-
-    # Pull the gemma-7b model from the google organization
-    cortex-beta.exe pull google/gemma-7b
-
-    # Nightly
-    ## Pull the Codestral-22B-v0.1-GGUF model from the bartowski organization
-    cortex-nightly.exe pull bartowski/Codestral-22B-v0.1-GGUF
-
-    # Pull the gemma-7b model from the google organization
-    cortex-nightly.exe pull google/gemma-7b
-  ```
-  </TabItem>
-</Tabs>
-
-## ONNX
-![HF ONNX](/img/docs/onnx.png)
-To view all available `ONNX` models on HuggingFace, select the `ONNX` tag in the Libraries section.
-<Tabs>
-  <TabItem value="MacOs/Linux" label="MacOs/Linux">
-  ```sh
-    # Stable
-    ## Pull the XLM-Roberta-Large-Vit-B-16Plus model from the immich-app organization
-    cortex pull immich-app/XLM-Roberta-Large-Vit-B-16Plus
-
-    # Pull the mt0-base model from the bigscience organization
-    cortex pull bigscience/mt0-base
-
-    # Beta
-    ## Pull the XLM-Roberta-Large-Vit-B-16Plus model from the immich-app organization
-    cortex-beta pull immich-app/XLM-Roberta-Large-Vit-B-16Plus
-
-    # Pull the mt0-base model from the bigscience organization
-    cortex-beta pull bigscience/mt0-base
-
-    # Nightly
-    ## Pull the XLM-Roberta-Large-Vit-B-16Plus model from the immich-app organization
-    cortex-nightly pull immich-app/XLM-Roberta-Large-Vit-B-16Plus
-
-    # Pull the mt0-base model from the bigscience organization
-    cortex-nightly pull bigscience/mt0-base
-  ```
-  </TabItem>
-  <TabItem value="Windows" label="Windows">
-  ```sh
-    # Stable
-    ## Pull the XLM-Roberta-Large-Vit-B-16Plus model from the immich-app organization
-    cortex.exe pull immich-app/XLM-Roberta-Large-Vit-B-16Plus
-
-    # Pull the mt0-base model from the bigscience organization
-    cortex.exe pull bigscience/mt0-base
-
-    # Beta
-    ## Pull the XLM-Roberta-Large-Vit-B-16Plus model from the immich-app organization
-    cortex-beta.exe pull immich-app/XLM-Roberta-Large-Vit-B-16Plus
-
-    # Pull the mt0-base model from the bigscience organization
-    cortex-beta.exe pull bigscience/mt0-base
-
-    # Nightly
-    ## Pull the XLM-Roberta-Large-Vit-B-16Plus model from the immich-app organization
-    cortex-nightly.exe pull immich-app/XLM-Roberta-Large-Vit-B-16Plus
-
-    # Pull the mt0-base model from the bigscience organization
-    cortex-nightly.exe pull bigscience/mt0-base
-  ```
-  </TabItem>
-</Tabs>
-
-## TensorRT-LLM
-We are still working to support all available `TensorRT-LLM` models on HuggingFace. For now, Cortex.cpp only supports built-in `TensorRT-LLM` models, which can be downloaded from the [Cortex Model Repos](/docs/hub/cortex-hub).
diff --git a/docs/sidebars.ts b/docs/sidebars.ts
index bf520499c..bdad562c2 100644
--- a/docs/sidebars.ts
+++ b/docs/sidebars.ts
@@ -88,16 +88,12 @@ const sidebars: SidebarsConfig = {
     {
       type: "category",
       label: "Pulling Models",
-      link: { type: "doc", id: "hub/index" },
+      link: { type: "doc", id: "capabilities/models/sources/index" },
       collapsed: true,
       items: [
-        { type: "doc", id: "hub/hugging-face", label: "Adding a HF Organization" },
-        // { type: "doc", id: "hub/cortex-hub", label: "Cortex Model Repos" },
-        // {
-        //   type: "doc",
-        //   id: "hub/nvidia-ngc",
-        //   label: "Nvidia Catalog (Coming Soon)",
-        // },
+        // { type: "doc", id: "capabilities/models/sources/hugging-face", label: "Hugging Face" },
+        // { type: "doc", id: "capabilities/models/sources/cortex-hub", label: "Cortex Model Repos" },
+        // { type: "doc", id: "capabilities/models/sources/nvidia-ngc", label: "Nvidia Catalog (Coming Soon)"},
       ],
     },
     {
@@ -169,16 +165,15 @@ const sidebars: SidebarsConfig = {
     },
     { type: "doc", id: "cli/cortex", label: "cortex" },
     { type: "doc", id: "cli/start", label: "cortex start" },
-    { type: "doc", id: "cli/chat", label: "cortex chat" },
+    { type: "doc", id: "cli/run", label: "cortex run" },
     // { type: "doc", id: "cli/embeddings", label: "cortex embeddings" },
     // { type: "doc", id: "cli/presets", label: "cortex presets" },
     { type: "doc", id: "cli/pull", label: "cortex pull" },
-    { type: "doc", id: "cli/run", label: "cortex run" },
     { type: "doc", id: "cli/models/index", label: "cortex models" },
     { type: "doc", id: "cli/engines/index", label: "cortex engines" },
-    { type: "doc", id: "cli/stop", label: "cortex stop" },
     { type: "doc", id: "cli/ps", label: "cortex ps" },
     { type: "doc", id: "cli/update", label: "cortex update" }, 
+    { type: "doc", id: "cli/stop", label: "cortex stop" },
   ]
 };