diff --git a/docs/docs/architecture/cortex-db.md b/docs/docs/architecture/cortex-db.md
new file mode 100644
index 000000000..09de74ab4
--- /dev/null
+++ b/docs/docs/architecture/cortex-db.md
@@ -0,0 +1,3 @@
+---
+title: cortex.db
+---
\ No newline at end of file
diff --git a/docs/docs/basic-usage/cortexrc.mdx b/docs/docs/architecture/cortexrc.mdx
similarity index 100%
rename from docs/docs/basic-usage/cortexrc.mdx
rename to docs/docs/architecture/cortexrc.mdx
diff --git a/docs/docs/data-folder.mdx b/docs/docs/architecture/data-folder.mdx
similarity index 98%
rename from docs/docs/data-folder.mdx
rename to docs/docs/architecture/data-folder.mdx
index 7acfbd361..cda2a4402 100644
--- a/docs/docs/data-folder.mdx
+++ b/docs/docs/architecture/data-folder.mdx
@@ -132,7 +132,7 @@ The main directory that stores all Cortex-related files, located in the user's h
 #### `models/`
 Contains the AI models used by Cortex for processing and generating responses.
 :::info
-For more information regarding the `model.list` and `model.yaml`, please see [here](/docs/model-yaml).
+For more information regarding the `model.list` and `model.yaml`, please see [here](/docs/capabilities/models/model-yaml).
 :::
 #### `logs/`
 Stores log files that are essential for troubleshooting and monitoring the performance of the Cortex.cpp API server and CLI.
diff --git a/docs/docs/assistants/index.md b/docs/docs/assistants/index.md
new file mode 100644
index 000000000..d38b33e52
--- /dev/null
+++ b/docs/docs/assistants/index.md
@@ -0,0 +1,3 @@
+---
+title: Assistants
+---
\ No newline at end of file
diff --git a/docs/docs/assistants/tools/index.md b/docs/docs/assistants/tools/index.md
new file mode 100644
index 000000000..9f8badb32
--- /dev/null
+++ b/docs/docs/assistants/tools/index.md
@@ -0,0 +1,3 @@
+---
+title: Tools 
+---
\ No newline at end of file
diff --git a/docs/docs/basic-usage/server.mdx b/docs/docs/basic-usage/api-server.mdx
similarity index 90%
rename from docs/docs/basic-usage/server.mdx
rename to docs/docs/basic-usage/api-server.mdx
index 69203b2e6..1003fff1f 100644
--- a/docs/docs/basic-usage/server.mdx
+++ b/docs/docs/basic-usage/api-server.mdx
@@ -1,16 +1,11 @@
 ---
-title: API
+title: API Server
 description: Cortex Server Overview.
-slug: "server"
 ---
 
 import Tabs from "@theme/Tabs";
 import TabItem from "@theme/TabItem";
 
-:::warning
-🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
-:::
-
 Cortex has an [API server](https://cortex.so/api-reference) that runs at `localhost:39281`.
 
 
diff --git a/docs/docs/basic-usage/command-line.md b/docs/docs/basic-usage/command-line.md
deleted file mode 100644
index f48a0b94c..000000000
--- a/docs/docs/basic-usage/command-line.md
+++ /dev/null
@@ -1,48 +0,0 @@
----
-title: Command Line Interface
-description: Cortex CLI Overview.
-slug: "command-line"
----
-
-:::warning
-🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
-:::
-
-Cortex has a [Docker](https://docs.docker.com/engine/reference/commandline/cli/) and [Ollama](https://ollama.com/)-inspired [CLI syntax](/docs/cli) for running model operations. 
-
-## How It Works
-Cortex’s CLI invokes the Cortex Engine’s API, which runs in the background on port `39281`. 
-
-
-## Basic Usage
-### [Start Cortex Server](/docs/cli)
-```bash
-# By default the server will be started on port `39281`
-cortex
-```
-### [Run Model](/docs/cli/run)
-Cortex supports these [Built-in Models](/models)
-```bash
-# Pull and start a model
-cortex run <model_id>
-```
-### [Chat with Model](/docs/cli/chat)
-```bash
-# chat with a model
-cortex chat <model_id>
-```
-### [Show the Model State](/docs/cli/ps) 
-```bash
-# Show a model and cortex system status
-cortex ps
-```
-### [Stop Model](/docs/cli/stop)
-```bash
-# Stop a model
-cortex stop
-```
-### [Pull Model](/docs/cli/pull)
-```bash
-# Pull a model
-cortex pull <model_id>
-```
diff --git a/docs/docs/basic-usage/integration/js-library.md b/docs/docs/basic-usage/cortex-js.md
similarity index 79%
rename from docs/docs/basic-usage/integration/js-library.md
rename to docs/docs/basic-usage/cortex-js.md
index e2d83fcdd..4e5a4a774 100644
--- a/docs/docs/basic-usage/integration/js-library.md
+++ b/docs/docs/basic-usage/cortex-js.md
@@ -1,9 +1,18 @@
 ---
 title: cortex.js
-description: How to integrate cortex.js with a Typescript application.
-slug: "ts-library"
+description: How to use the Cortex.js Library
 ---
 
+[Cortex.js](https://github.com/janhq/cortex.js) is a Typescript client library that can be used to interact with the Cortex API. 
+
+This is still a work in progress, and we will let the community know once a stable version is available. 
+
+:::warning
+🚧 Cortex.js is currently under development, and this page is a stub for future development. 
+:::
+
+
+<!-- 
 :::warning
 🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
 :::
@@ -61,4 +70,4 @@ async function inference() {
 }
 
 inference();
-```
+``` -->
diff --git a/docs/docs/basic-usage/integration/py-library.md b/docs/docs/basic-usage/cortex-py.md
similarity index 91%
rename from docs/docs/basic-usage/integration/py-library.md
rename to docs/docs/basic-usage/cortex-py.md
index 3e126d068..4ff1504d8 100644
--- a/docs/docs/basic-usage/integration/py-library.md
+++ b/docs/docs/basic-usage/cortex-py.md
@@ -1,9 +1,15 @@
 ---
 title: cortex.py
 description: How to integrate cortex.py with a Python application.
-slug: "py-library"
 ---
 
+
+:::warning
+🚧 Cortex.py is currently under development, and this page is a stub for future development. 
+:::
+
+
+<!-- 
 :::warning
 🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
 :::
@@ -51,4 +57,4 @@ completion = client.chat.completions.create(
     ],
 )
 print(completion.choices[0].message.content)
-```
+``` -->
diff --git a/docs/docs/basic-usage/overview.mdx b/docs/docs/basic-usage/index.mdx
similarity index 98%
rename from docs/docs/basic-usage/overview.mdx
rename to docs/docs/basic-usage/index.mdx
index 107746845..93baed257 100644
--- a/docs/docs/basic-usage/overview.mdx
+++ b/docs/docs/basic-usage/index.mdx
@@ -1,6 +1,6 @@
 ---
 title: Overview
-description: Overview.
+description: Cortex Overview
 slug: "basic-usage"
 ---
 
diff --git a/docs/docs/built-in-models.mdx b/docs/docs/built-in-models.mdx
deleted file mode 100644
index 836c2d874..000000000
--- a/docs/docs/built-in-models.mdx
+++ /dev/null
@@ -1,54 +0,0 @@
----
-title: Built-in Models
-description: Cortex Curated Models
----
-
-import Tabs from "@theme/Tabs";
-import TabItem from "@theme/TabItem";
-
-
-:::warning
-🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
-:::
-
-Cortex.cpp maintains a collection of built-in models that cover the most popular open-source models.
-
-## Cortex Model Repos
-Built-in models are [Cortex Model Repositories](/docs/hub/cortex-hub) hosted on HuggingFace and pre-compiled for different engines, allowing one model to have multiple branches in various formats.
-
-## Built-in Model Variants
-Built-in models are made available across the following variants: 
-
-- **By format**: `gguf`, `onnx`, and `tensorrt-llm`
-- **By Size**: `7b`, `13b`, and more.
-- **By quantizations**: `q4`, `q8`, and more.
-:::info
-You can see our full list of Built-in Models [here](/models). 
-:::
-### Run Model 
-
-Built-in models can be run via Docker-like syntax:
-
-```bash
-# Run a model
-cortex run model-id
-# Run a model variant
-cortex run model-id:branch
-```
-For example:
-
-```bash
-# Run Mistral Built-in Model
-cortex pull mistral
-# Run Mistral in GGUF format
-cortex pull mistral:gguf
-# Run Mistral in TensorRT-LLM format
-cortex engines tensorrt-llm init
-cortex pull mistral:7b-tensorrt-llm
-# Run Mistral in ONNX format
-cortex engines onnx init
-cortex pull mistral:onnx
-# Run Mistral with a different size
-cortex pull mistral:7b-gguf
-
-```
\ No newline at end of file
diff --git a/docs/docs/capabilities/audio-generation.md b/docs/docs/capabilities/audio-generation.md
new file mode 100644
index 000000000..355f25d6d
--- /dev/null
+++ b/docs/docs/capabilities/audio-generation.md
@@ -0,0 +1,3 @@
+---
+unlisted: true
+---
\ No newline at end of file
diff --git a/docs/docs/capabilities/embeddings.md b/docs/docs/capabilities/embeddings.md
new file mode 100644
index 000000000..2c2fb4d54
--- /dev/null
+++ b/docs/docs/capabilities/embeddings.md
@@ -0,0 +1,7 @@
+---
+title: Embeddings
+---
+
+:::info
+🚧 Cortex is currently under development, and this page is a stub for future development. 
+:::
\ No newline at end of file
diff --git a/docs/docs/capabilities/hardware/index.md b/docs/docs/capabilities/hardware/index.md
new file mode 100644
index 000000000..acf190ecc
--- /dev/null
+++ b/docs/docs/capabilities/hardware/index.md
@@ -0,0 +1,39 @@
+---
+title: Hardware Awareness
+draft: True
+---
+
+# Hardware Awareness
+
+Cortex is designed to be hardware aware, meaning it can detect your hardware configuration and automatically set parameters to optimize compatibility and performance, and avoid hardware-related errors.
+
+## Hardware Optimization
+
+Cortex's Hardware awareness allows it to do the following: 
+
+- Context Length Optimization: Cortex maximizes the context length allowed by your hardware, ensuring that you can work with larger datasets and more complex models without performance degradation.
+- Engine Optimization: we detect your CPU and GPU, and maintain a list of optimized engines for each hardware configuration, e.g. taking advantage of AVX-2 and AVX-512 instructions on CPUs. 
+
+## Hardware Awareness
+
+- Preventing hardware-related error
+- Error Handling for Insufficient VRAM: When loading a second model, Cortex provides useful error messages if there is insufficient VRAM memory. This proactive approach helps prevent out-of-memory errors and guides users on how to resolve the issue.
+
+### Model Compatibility
+
+- Model Compatibility Detection: Cortex automatically detects your hardware configuration to determine the compatibility of different models. This ensures that the models you use are optimized for your specific hardware setup.
+- This is for the Hub, and for existing Models 
+
+## Hardware Management
+
+### Activating Specific GPUs
+
+Cortex gives you the ability to activating specific GPUs for inference, giving you fine-grained control over hardware resources. This is especially useful for multi-GPU systems. 
+- Activate GPUs: Cortex can activate and utilize GPUs to accelerate processing, ensuring that computationally intensive tasks are handled efficiently.
+You also have the option to deactivate all GPUs, to run inference on only CPU and RAM. 
+
+### Hardware Monitoring
+
+- Monitoring System Usage
+- Monitor VRAM Usage: Cortex keeps track of VRAM usage to prevent out-of-memory (OOM) errors. It ensures that VRAM is used efficiently and provides warnings when resources are running low.
+- Monitor System Resource Usage: Cortex continuously monitors the usage of system resources, including CPU, RAM, and GPUs. This helps in maintaining optimal performance and identifying potential bottlenecks.
diff --git a/docs/docs/capabilities/image-generation.md b/docs/docs/capabilities/image-generation.md
new file mode 100644
index 000000000..355f25d6d
--- /dev/null
+++ b/docs/docs/capabilities/image-generation.md
@@ -0,0 +1,3 @@
+---
+unlisted: true
+---
\ No newline at end of file
diff --git a/docs/docs/model-overview.mdx b/docs/docs/capabilities/models/index.mdx
similarity index 90%
rename from docs/docs/model-overview.mdx
rename to docs/docs/capabilities/models/index.mdx
index 0eecc9ee4..b6f4b9036 100644
--- a/docs/docs/model-overview.mdx
+++ b/docs/docs/capabilities/models/index.mdx
@@ -20,7 +20,7 @@ Cortex.cpp supports three model formats:
 - TensorRT-LLM
 
 :::info
-For details on each format, see the [Model Formats](/docs/model-yaml#model-formats) page.
+For details on each format, see the [Model Formats](/docs/capabilities/models/model-yaml#model-formats) page.
 :::
 
 ## Built-in Models 
@@ -38,5 +38,5 @@ You can see our full list of Built-in Models [here](/models).
 :::
 
 ## Next steps
-- Cortex requires a `model.yaml` file to run a model. Find out more [here](/docs/model-yaml).
+- Cortex requires a `model.yaml` file to run a model. Find out more [here](/docs/capabilities/models/model-yaml).
 - Cortex supports multiple model hubs hosting built-in models. See details [here](/docs/model-sources).
\ No newline at end of file
diff --git a/docs/docs/model-yaml.mdx b/docs/docs/capabilities/models/model-yaml.mdx
similarity index 91%
rename from docs/docs/model-yaml.mdx
rename to docs/docs/capabilities/models/model-yaml.mdx
index 53a25a770..983f0f528 100644
--- a/docs/docs/model-yaml.mdx
+++ b/docs/docs/capabilities/models/model-yaml.mdx
@@ -6,24 +6,14 @@ description: The model.yaml
 import Tabs from "@theme/Tabs";
 import TabItem from "@theme/TabItem";
 
-
 :::warning
 🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
 :::
 
 Cortex.cpp uses a `model.yaml` file to specify the configuration for running a model. Models can be downloaded from the Cortex Model Hub or Hugging Face repositories. Once downloaded, the model data is parsed and stored in the `models` folder.
 
-## `model.list`
-The `model.list` file acts as a registry for all model files used by Cortex.cpp. It keeps track of every downloaded and imported model by listing their details in a structured format. Each time a model is downloaded or imported, Cortex.cpp will automatically append an entry to `model.list` with the following format:
-```
-# Downloaded model
-<model-id> <author_repo-id> <branch-name> <path-to-model.yaml> <model-alias>
-
-# Imported model
-<model-id> local imported <path-to-model-id.yaml> <model-alias>
+## Structure of `model.yaml`
 
-```
-## `model.yaml` High Level Structure
 Here is an example of `model.yaml` format:
 ```yaml
 # BEGIN GENERAL METADATA
@@ -71,7 +61,7 @@ ngl: 33             # Undefined = loaded from model
 
 The `model.yaml` is composed of three high-level sections:
 
-### Cortex Meta
+### Model Metadata
 ```yaml
 model: gemma-2-9b-it-Q8_0 
 name: Llama 3.1      
diff --git a/docs/docs/model-presets.mdx b/docs/docs/capabilities/models/presets.mdx
similarity index 98%
rename from docs/docs/model-presets.mdx
rename to docs/docs/capabilities/models/presets.mdx
index d4196e146..799cf6cbc 100644
--- a/docs/docs/model-presets.mdx
+++ b/docs/docs/capabilities/models/presets.mdx
@@ -7,6 +7,7 @@ description: Model Presets
 🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
 :::
 
+<!-- 
 ## Model Presets
 
 Model presets are saved `model.yaml` files that serve as templates for pre-configured model settings. These presets are designed to ensure optimal performance with the specified engine.
@@ -14,4 +15,4 @@ These presets are not restricted to specific models. You can apply the presets t
 
 :::info
 Model presets override the values of the `model.yaml`. If presets are available, Cortex uses them. Otherwise, it defaults to `model.yaml` values.
-:::
\ No newline at end of file
+::: -->
\ No newline at end of file
diff --git a/docs/docs/capabilities/moderation.md b/docs/docs/capabilities/moderation.md
new file mode 100644
index 000000000..355f25d6d
--- /dev/null
+++ b/docs/docs/capabilities/moderation.md
@@ -0,0 +1,3 @@
+---
+unlisted: true
+---
\ No newline at end of file
diff --git a/docs/docs/capabilities/reasoning.md b/docs/docs/capabilities/reasoning.md
new file mode 100644
index 000000000..355f25d6d
--- /dev/null
+++ b/docs/docs/capabilities/reasoning.md
@@ -0,0 +1,3 @@
+---
+unlisted: true
+---
\ No newline at end of file
diff --git a/docs/docs/capabilities/speech-to-text.md b/docs/docs/capabilities/speech-to-text.md
new file mode 100644
index 000000000..355f25d6d
--- /dev/null
+++ b/docs/docs/capabilities/speech-to-text.md
@@ -0,0 +1,3 @@
+---
+unlisted: true
+---
\ No newline at end of file
diff --git a/docs/docs/capabilities/text-generation.md b/docs/docs/capabilities/text-generation.md
new file mode 100644
index 000000000..680625667
--- /dev/null
+++ b/docs/docs/capabilities/text-generation.md
@@ -0,0 +1,7 @@
+---
+title: Text Generation
+---
+
+:::info
+🚧 Cortex is currently under development, and this page is a stub for future development. 
+:::
\ No newline at end of file
diff --git a/docs/docs/capabilities/text-to-speech.md b/docs/docs/capabilities/text-to-speech.md
new file mode 100644
index 000000000..355f25d6d
--- /dev/null
+++ b/docs/docs/capabilities/text-to-speech.md
@@ -0,0 +1,3 @@
+---
+unlisted: true
+---
\ No newline at end of file
diff --git a/docs/docs/capabilities/vision.md b/docs/docs/capabilities/vision.md
new file mode 100644
index 000000000..355f25d6d
--- /dev/null
+++ b/docs/docs/capabilities/vision.md
@@ -0,0 +1,3 @@
+---
+unlisted: true
+---
\ No newline at end of file
diff --git a/docs/docs/chat-completions.mdx b/docs/docs/chat-completions.mdx
index c39f25877..9b1dce01d 100644
--- a/docs/docs/chat-completions.mdx
+++ b/docs/docs/chat-completions.mdx
@@ -1,7 +1,6 @@
 ---
 title: Chat Completions
-description: Chat Completions Feature.
-slug: "text-generation"
+description: Chat Completions Feature
 ---
 
 import Tabs from "@theme/Tabs";
diff --git a/docs/docs/integrate-remote-engine.mdx b/docs/docs/engines/engine-extension.mdx
similarity index 94%
rename from docs/docs/integrate-remote-engine.mdx
rename to docs/docs/engines/engine-extension.mdx
index b32fcc635..8a62cd813 100644
--- a/docs/docs/integrate-remote-engine.mdx
+++ b/docs/docs/engines/engine-extension.mdx
@@ -1,8 +1,13 @@
 ---
-title: Integrate Remote Engine
-description: How to integrate remote engine into Cortex.
+title: Building Engine Extensions
+description: Cortex supports Engine Extensions to integrate both :ocal inference engines, and Remote APIs.
 ---
 
+:::info
+🚧 Cortex is currently under development, and this page is a stub for future development. 
+:::
+
+<!-- 
 import Tabs from "@theme/Tabs";
 import TabItem from "@theme/TabItem";
 
@@ -81,4 +86,4 @@ The `transformResponse` method is used to transform the data received from the e
 **Example: Anthropic Engine**
 
 In the Anthropic Engine, the `transformResponse` method handles both stream and non-stream responses. It processes the response data and converts it into a standardized format.
-
+ -->
diff --git a/docs/docs/engines/llamacpp.mdx b/docs/docs/engines/llamacpp.mdx
index f65c15473..c550e2e92 100644
--- a/docs/docs/engines/llamacpp.mdx
+++ b/docs/docs/engines/llamacpp.mdx
@@ -13,7 +13,7 @@ Cortex uses `llama.cpp` as the default engine by default the `GGUF` format is su
 Cortex automatically generates any `GGUF` model from the HuggingFace repo that does not have the `model.yaml` file.
 :::
 
-## [`model.yaml`](/docs/model-yaml) Sample
+## [`model.yaml`](/docs/capabilities/models/model-yaml) Sample
 ```yaml
 ## BEGIN GENERAL GGUF METADATA
 id: Mistral-Nemo-Instruct-2407 # Model ID unique between models (author / quantization)
diff --git a/docs/docs/engines/onnx.mdx b/docs/docs/engines/onnx.mdx
index d4e999406..7110007d7 100644
--- a/docs/docs/engines/onnx.mdx
+++ b/docs/docs/engines/onnx.mdx
@@ -1,6 +1,7 @@
 ---
 title: ONNX
-description: ONNX Model Format.
+description: ONNX Model Format
+unlisted: true
 ---
 
 :::warning
@@ -17,7 +18,7 @@ cortex engines onnx init
 ## Run an ONNX model
 cortex run openhermes-2.5:7b-onnx
 ```
-## [`model.yaml`](/docs/model-yaml) Sample
+## [`model.yaml`](/docs/capabilities/models/model-yaml) Sample
 ```yaml
 name: openhermes-2.5
 model: openhermes
diff --git a/docs/docs/engines/tensorrt-llm.mdx b/docs/docs/engines/tensorrt-llm.mdx
index 0cfe7d483..1a06b0a86 100644
--- a/docs/docs/engines/tensorrt-llm.mdx
+++ b/docs/docs/engines/tensorrt-llm.mdx
@@ -1,6 +1,7 @@
 ---
 title: TensorRT-LLM
-description: TensorRT-LLM Model Format.
+description: TensorRT-LLM Model Format
+unlisted: true
 ---
 
 :::warning
@@ -17,7 +18,7 @@ cortex engines tensorrt-llm init
 ## Run a TensorRT-LLM model
 cortex run openhermes-2.5:7b-tensorrt-llm
 ```
-## [`model.yaml`](/docs/model-yaml) Sample
+## [`model.yaml`](/docs/capabilities/models/model-yaml) Sample
 ```yaml
 name: Openhermes-2.5 7b Linux Ada
 model: openhermes-2.5:7B-tensorrt-llm
diff --git a/docs/docs/guides/function-calling.md b/docs/docs/guides/function-calling.md
new file mode 100644
index 000000000..40a708675
--- /dev/null
+++ b/docs/docs/guides/function-calling.md
@@ -0,0 +1,3 @@
+---
+title: Function Calling
+---
\ No newline at end of file
diff --git a/docs/docs/guides/structured-outputs.md b/docs/docs/guides/structured-outputs.md
new file mode 100644
index 000000000..b14739ab2
--- /dev/null
+++ b/docs/docs/guides/structured-outputs.md
@@ -0,0 +1,3 @@
+---
+title: Structured Outputs
+---
\ No newline at end of file
diff --git a/docs/docs/installation/gpu-acceleration.mdx b/docs/docs/installation/gpu-acceleration.mdx
deleted file mode 100644
index ff57a714f..000000000
--- a/docs/docs/installation/gpu-acceleration.mdx
+++ /dev/null
@@ -1,8 +0,0 @@
----
-title: GPU Acceleration
-description: GPU Acceleration.
----
-
-:::warning
-🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
-:::
\ No newline at end of file
diff --git a/docs/docs/quickstart.mdx b/docs/docs/quickstart.mdx
index 687707c66..2ebf53c7b 100644
--- a/docs/docs/quickstart.mdx
+++ b/docs/docs/quickstart.mdx
@@ -171,6 +171,6 @@ This command starts the Cortex.cpp API server at `localhost:39281`.
 ## What's Next?
 Now that Cortex.cpp is set up, here are the next steps to explore:
 
-1. Adjust the folder path and configuration using the [`.cortexrc`](/docs/basic-usage/cortexrc) file.
-2. Explore the Cortex.cpp [data folder](/docs/data-folder) to understand how it stores data.
-3. Learn about the structure of the [`model.yaml`](/docs/model-yaml) file in Cortex.cpp.
+1. Adjust the folder path and configuration using the [`.cortexrc`](/docs/architecture/cortexrc) file.
+2. Explore the Cortex.cpp [data folder](/docs/architecture/data-folder) to understand how it stores data.
+3. Learn about the structure of the [`model.yaml`](/docs/capabilities/models/model-yaml) file in Cortex.cpp.
diff --git a/docs/docusaurus.config.ts b/docs/docusaurus.config.ts
index 32b32ab99..659e155d7 100644
--- a/docs/docusaurus.config.ts
+++ b/docs/docusaurus.config.ts
@@ -410,7 +410,6 @@ const config: Config = {
       items: [
         { to: "/models", label: "Models", position: "left" },
         { to: "/changelog", label: "Changelog", position: "left" },
-        { to: "/contact", label: "Enterprise", position: "left" },
         {
           type: "doc",
           position: "right",
diff --git a/docs/sidebars.ts b/docs/sidebars.ts
index d5d7fd020..36a3e7420 100644
--- a/docs/sidebars.ts
+++ b/docs/sidebars.ts
@@ -43,269 +43,120 @@ const sidebars: SidebarsConfig = {
         { type: "doc", id: "installation/mac", label: "Mac" },
         { type: "doc", id: "installation/linux", label: "Linux" },
         { type: "doc", id: "installation/docker", label: "Docker" },
-        {
-          type: "doc",
-          id: "installation/gpu-acceleration",
-          label: "GPU Acceleration",
-        },
       ],
     },
-    {
-      type: "html",
-      value: "BASIC USAGE",
-
-      className: "sidebar-divider",
-    },
-    { type: "doc", id: "basic-usage/overview", label: "Overview" },
-    { type: "doc", id: "basic-usage/cortexrc", label: ".cortexrc" },
-    { type: "doc", id: "model-yaml", label: "model.yaml" },
-    { type: "doc", id: "data-folder", label: "Data Folder" },
     {
       type: "category",
-      label: "Libraries",
-      link: {
-        type: "generated-index",
-      },
+      label: "Basic Usage",
+      link: { type: "doc", id: "basic-usage/index" },
       collapsed: true,
       items: [
+        { type: "doc", id: "basic-usage/api-server", label: "API Server" },
         {
           type: "doc",
-          id: "basic-usage/integration/js-library",
+          id: "basic-usage/cortex-js",
           label: "cortex.js",
         },
         {
           type: "doc",
-          id: "basic-usage/integration/py-library",
+          id: "basic-usage/cortex-py",
           label: "cortex.py",
         },
       ],
     },
     {
       type: "category",
-      label: "Model Sources",
+      label: "Architecture",
+      link: {
+        type: "generated-index",
+      },
+      collapsed: true,
+      items: [
+        { type: "doc", id: "architecture/data-folder", label: "Cortex Data Folder" },
+        { type: "doc", id: "architecture/cortex-db", label: "cortex.db" },
+        { type: "doc", id: "architecture/cortexrc", label: ".cortexrc" },
+      ]
+    },
+    {
+      type: "html",
+      value: "CAPABILITIES",
+      className: "sidebar-divider",
+    },
+    {
+      type: "category",
+      label: "Pulling Models",
       link: { type: "doc", id: "hub/index" },
       collapsed: true,
       items: [
-        { type: "doc", id: "hub/cortex-hub", label: "Cortex Model Repos" },
-        { type: "doc", id: "hub/hugging-face", label: "HuggingFace Repos" },
-        {
-          type: "doc",
-          id: "hub/nvidia-ngc",
-          label: "Nvidia Catalog (Coming Soon)",
-        },
+        { type: "doc", id: "hub/hugging-face", label: "Adding a HF Organization" },
+        // { type: "doc", id: "hub/cortex-hub", label: "Cortex Model Repos" },
+        // {
+        //   type: "doc",
+        //   id: "hub/nvidia-ngc",
+        //   label: "Nvidia Catalog (Coming Soon)",
+        // },
       ],
     },
     {
       type: "category",
-      label: "Engines",
+      label: "Running Models",
+      link: { type: "doc", id: "capabilities/models/index"},
+      collapsed: true,
+      items: [
+        { type: "doc", id: "capabilities/models/model-yaml", label: "model.yaml" },
+        { type: "doc", id: "capabilities/models/presets", label: "Model Presets" },
+      ],
+    },
+    {
+      type: "category",
+      label: "Engine Management",
       link: { type: "doc", id: "engines/index" },
       collapsed: true,
       items: [
-        { type: "doc", id: "engines/llamacpp", label: "Llama.cpp" },
+        { type: "doc", id: "engines/llamacpp", label: "llama.cpp" },
         // { type: "doc", id: "engines/tensorrt-llm", label: "TensorRT-LLM" },
         // { type: "doc", id: "engines/onnx", label: "ONNX" },
+        { type: "doc", id: "engines/engine-extension", label: "Building Engine Extensions" },
+        
       ],
     },
-    // {
-    //   type: "category",
-    //   label: "Basic Usage",
-    //   link: {
-    //     type: "generated-index",
-    //   },
-    //   collapsed: true,
-    //   items: [
-    //     { type: "doc", id: "basic-usage/command-line", label: "CLI" },
-    //     { type: "doc", id: "basic-usage/server", label: "API" },
-    // {
-    //   type: "category",
-    //   label: "Integration",
-    //   link: {
-    //     type: "generated-index",
-    //   },
-    //   collapsed: true,
-    //   items: [
-    //     {
-    //       type: "doc",
-    //       id: "basic-usage/integration/js-library",
-    //       label: "cortex.js",
-    //     },
-    //     {
-    //       type: "doc",
-    //       id: "basic-usage/integration/py-library",
-    //       label: "cortex.py",
-    //     },
-    //   ],
-    // },
-    //   ],
-    // },
-    // { type: "doc", id: "telemetry", label: "Telemetry" },
-    // MODELs
-    // {
-    //   type: "html",
-    //   value: "MODELS",
-    //   className: "sidebar-divider",
-    // },
-    // { type: "doc", id: "model-overview", label: "Overview" },
-    // { type: "doc", id: "model-yaml", label: "model.yaml" },
-    // { type: "doc", id: "built-in-models", label: "Built-in Models" },
-    // {
-    //   type: "category",
-    //   label: "Using Models",
-    //   link: { type: "doc", id: "using-models" },
-    //   collapsed: true,
-    //   items: [
-    //     { type: "doc", id: "model-yaml", label: "model.yaml" },
-    //     // { type: "doc", id: "model-presets", label: "Model Presets" },
-    //     { type: "doc", id: "built-in-models", label: "Built-in Models" },
-    //   ],
-    // },
-    // BASIC USAGE
-    // {
-    //   type: "html",
-    //   value: "BASIC USAGE",
-    //   className: "sidebar-divider",
-    // },
-    // { type: "doc", id: "command-line", label: "CLI" },
-    // { type: "doc", id: "ts-library", label: "Typescript Library" },
-    // { type: "doc", id: "py-library", label: "Python Library" },
-    // { type: "doc", id: "server", label: "Server Endpoint" },
-    // CAPABILITIES
-    // {
-    //   type: "html",
-    //   value: "ENDPOINTS",
-    //   className: "sidebar-divider",
-    // },
-    // { type: "doc", id: "chat-completions", label: "Chat Completions" },
-    // { type: "doc", id: "embeddings", label: "Embeddings" },
-    // CLI
-    {
-      type: "html",
-      value: "CLI",
-      className: "sidebar-divider",
-    },
-    { type: "doc", id: "cli/cortex", label: "cortex" },
-    { type: "doc", id: "cli/start", label: "cortex start" },
-    { type: "doc", id: "cli/chat", label: "cortex chat" },
-    // { type: "doc", id: "cli/embeddings", label: "cortex embeddings" },
-    // { type: "doc", id: "cli/presets", label: "cortex presets" },
-    { type: "doc", id: "cli/pull", label: "cortex pull" },
-    { type: "doc", id: "cli/run", label: "cortex run" },
-    { type: "doc", id: "cli/models/index", label: "cortex models" },
-    { type: "doc", id: "cli/engines/index", label: "cortex engines" },
-    { type: "doc", id: "cli/stop", label: "cortex stop" },
-    { type: "doc", id: "cli/ps", label: "cortex ps" },
-    { type: "doc", id: "cli/update", label: "cortex update" },
-    // { type: "doc", id: "cli/telemetry", label: "cortex telemetry" },
-    // { type: "doc", id: "cli/benchmark", label: "cortex benchmark" },
-    // ARCHITECTURE
-    // {
-    //   type: "html",
-    //   value: "ARCHITECTURE",
-    //   className: "sidebar-divider",
-    // },
-    // { type: "doc", id: "architecture", label: "Cortex" },
-    // {
-    //   type: "category",
-    //   label: "Engines",
-    //   link: {
-    //     type: "generated-index",
-    //   },
-    //   collapsed: true,
-    //   items: [
-    //     { type: "doc", id: "cortex-llamacpp", label: "llama.cpp" },
-    //     { type: "doc", id: "cortex-tensorrt-llm", label: "TensorRT-LLM" },
-    //     { type: "doc", id: "cortex-onnx", label: "ONNX" },
-    //     {
-    //       type: "doc",
-    //       id: "integrate-remote-engine",
-    //       label: "Integrate Remote Engine",
-    //     },
-    //   ],
-    // },
-    // {
-    //   type: "category",
-    //   label: "Infrastructure",
-    //   link: {
-    //     type: "generated-index",
-    //   },
-    //   collapsed: true,
-    //   items: [
-    //     { type: "doc", id: "telemetry-architecture", label: "Telemetry Infra" },
-    //     {
-    //       type: "doc",
-    //       id: "benchmarking-architecture",
-    //       label: "Benchmarking Infra",
-    //     },
-    //   ],
-    // },
-    // {
-    //   type: "html",
-    //   value: "TROUBLESHOOTING",
-    //   className: "sidebar-divider",
-    // },
-    // { type: "doc", id: "troubleshooting", label: "Troubleshooting" },
-  ],
-  platform: [
-    {
-      type: "html",
-      value:
-        '<div class="mt-4"><a class="menu__link" href="/docs/"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide mr-2 lucide-brain-circuit w-7 h-7 p-1 border rounded border-gray-200 dark:border-gray-700"><path d="M12 5a3 3 0 1 0-5.997.125 4 4 0 0 0-2.526 5.77 4 4 0 0 0 .556 6.588A4 4 0 1 0 12 18Z"></path><path d="M9 13a4.5 4.5 0 0 0 3-4"></path><path d="M6.003 5.125A3 3 0 0 0 6.401 6.5"></path><path d="M3.477 10.896a4 4 0 0 1 .585-.396"></path><path d="M6 18a4 4 0 0 1-1.967-.516"></path><path d="M12 13h4"></path><path d="M12 18h6a2 2 0 0 1 2 2v1"></path><path d="M12 8h8"></path><path d="M16 8V5a2 2 0 0 1 2-2"></path><circle cx="16" cy="13" r=".5"></circle><circle cx="18" cy="3" r=".5"></circle><circle cx="20" cy="21" r=".5"></circle><circle cx="20" cy="8" r=".5"></circle></svg>Cortex</a></div>',
-    },
     {
-      type: "html",
-      value:
-        '<div><a class="menu__link" href="/docs/cortex-platform/"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide mr-2 lucide-blocks w-7 h-7 p-1 border rounded border-gray-200 dark:border-gray-700"><rect width="7" height="7" x="14" y="3" rx="1"></rect><path d="M10 21V8a1 1 0 0 0-1-1H4a1 1 0 0 0-1 1v12a1 1 0 0 0 1 1h12a1 1 0 0 0 1-1v-5a1 1 0 0 0-1-1H3"></path></svg>Platform<span class="bg-black dark:bg-white dark:text-black text-white rounded-full px-2 py-1 ml-2 text-xs">Coming Soon</span></a></div>',
+      type: "category",
+      label: "Hardware Awareness",
+      link: { type: "doc", id: "capabilities/hardware/index" },
+      collapsed: true,
+      items: [
+      ],
     },
+    { type: "doc", id: "capabilities/text-generation", label: "Text Generation" },
+    // { type: "doc", id: "capabilities/image-generation", label: "Image Generation" },
+    // { type: "doc", id: "capabilities/vision", label: "Vision" },
+    // { type: "doc", id: "capabilities/audio-generation", label: "Audio Generation" },
+    // { type: "doc", id: "capabilities/text-to-speech", label: "Text to Speech" },
+    // { type: "doc", id: "capabilities/speech-to-text", label: "Speech to text" },
+    { type: "doc", id: "capabilities/embeddings", label: "Embeddings" },
+    // { type: "doc", id: "capabilities/moderation", label: "Moderation" },
+    // { type: "doc", id: "capabilities/reasoning", label: "Reasoning" },
     {
       type: "html",
-      value: "GET STARTED",
+      value: "GUIDES",
       className: "sidebar-divider",
     },
-    "cortex-platform/about",
+    { type: "doc", id: "guides/function-calling", label: "Function Calling"},
+    { type: "doc", id: "guides/structured-outputs", label: "Structured Outputs"},
     {
       type: "html",
-      value: "ENDPOINTS",
+      value: "ASSISTANTS",
       className: "sidebar-divider",
     },
-    { type: "doc", id: "cortex-platform/benchmarking", label: "Benchmarking" },
-    {
-        type: "html",
-        value: "ARCHITECTURE",
-        className: "sidebar-divider",
-      },
-      { type: "doc", id: "architecture", label: "Cortex" },
-      {
-        type: "category",
-        label: "Engines",
-        link: {
-          type: "generated-index",
-        },
-        collapsed: true,
-        items: [
-          { type: "doc", id: "cortex-llamacpp", label: "llama.cpp" },
-          { type: "doc", id: "cortex-tensorrt-llm", label: "TensorRT-LLM" },
-          { type: "doc", id: "cortex-onnx", label: "ONNX" },
-          {
-            type: "doc",
-            id: "integrate-remote-engine",
-            label: "Integrate Remote Engine",
-          },
-        ],
-      },
+    { type: "doc", id: "assistants/index", label: "Assistants"},
     {
       type: "category",
-      label: "Infrastructure",
-      link: {
-        type: "generated-index",
-      },
+      label: "Tools",
+      link: { type: "doc", id: "assistants/tools/index" },
       collapsed: true,
       items: [
-        { type: "doc", id: "telemetry-architecture", label: "Telemetry Infra" },
-        {
-          type: "doc",
-          id: "benchmarking-architecture",
-          label: "Benchmarking Infra",
-        },
+        // { type: "doc", id: "assistants/tools/file-search", label: "File Search" },
       ],
     },
     {
@@ -313,19 +164,19 @@ const sidebars: SidebarsConfig = {
       value: "CLI",
       className: "sidebar-divider",
     },
-    // { type: "doc", id: "cli/cortex", label: "cortex" },
-    // { type: "doc", id: "cli/chat", label: "cortex chat" },
+    { type: "doc", id: "cli/cortex", label: "cortex" },
+    { type: "doc", id: "cli/start", label: "cortex start" },
+    { type: "doc", id: "cli/chat", label: "cortex chat" },
     // { type: "doc", id: "cli/embeddings", label: "cortex embeddings" },
-    { type: "doc", id: "cli/presets", label: "cortex presets" },
-    // { type: "doc", id: "cli/pull", label: "cortex pull" },
-    // { type: "doc", id: "cli/run", label: "cortex run" },
-    // { type: "doc", id: "cli/models/index", label: "cortex models" },
-    // { type: "doc", id: "cli/engines/index", label: "cortex engines" },
-    // { type: "doc", id: "cli/stop", label: "cortex stop" },
-    // { type: "doc", id: "cli/ps", label: "cortex ps" },
-    // { type: "doc", id: "cli/telemetry", label: "cortex telemetry" },
-    { type: "doc", id: "cli/benchmark", label: "cortex benchmark" },
-  ],
+    // { type: "doc", id: "cli/presets", label: "cortex presets" },
+    { type: "doc", id: "cli/pull", label: "cortex pull" },
+    { type: "doc", id: "cli/run", label: "cortex run" },
+    { type: "doc", id: "cli/models/index", label: "cortex models" },
+    { type: "doc", id: "cli/engines/index", label: "cortex engines" },
+    { type: "doc", id: "cli/stop", label: "cortex stop" },
+    { type: "doc", id: "cli/ps", label: "cortex ps" },
+    { type: "doc", id: "cli/update", label: "cortex update" }, 
+  ]
 };
 
 export default sidebars;
diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json
index a01e0790d..f6120a4ad 100644
--- a/docs/static/openapi/cortex.json
+++ b/docs/static/openapi/cortex.json
@@ -227,9 +227,7 @@
             }
           }
         },
-        "tags": [
-          "Inference"
-        ]
+        "tags": ["Chat"]
       }
     },
     "/v1/models/pull": {
@@ -1357,7 +1355,7 @@
   },
   "tags": [
     {
-      "name": "Inference",
+      "name": "Chat",
       "description": "This endpoint initiates interaction with a Large Language Models (LLM)."
     },
     {
@@ -1393,7 +1391,7 @@
     {
       "name": "CORTEX",
       "tags": [
-        "Inference",
+        "Chat",
         "Engines",
         "Events",
         "Models",