diff --git a/docs/docs/architecture/cortex-db.md b/docs/docs/architecture/cortex-db.md new file mode 100644 index 000000000..09de74ab4 --- /dev/null +++ b/docs/docs/architecture/cortex-db.md @@ -0,0 +1,3 @@ +--- +title: cortex.db +--- \ No newline at end of file diff --git a/docs/docs/basic-usage/cortexrc.mdx b/docs/docs/architecture/cortexrc.mdx similarity index 100% rename from docs/docs/basic-usage/cortexrc.mdx rename to docs/docs/architecture/cortexrc.mdx diff --git a/docs/docs/data-folder.mdx b/docs/docs/architecture/data-folder.mdx similarity index 98% rename from docs/docs/data-folder.mdx rename to docs/docs/architecture/data-folder.mdx index 7acfbd361..cda2a4402 100644 --- a/docs/docs/data-folder.mdx +++ b/docs/docs/architecture/data-folder.mdx @@ -132,7 +132,7 @@ The main directory that stores all Cortex-related files, located in the user's h #### `models/` Contains the AI models used by Cortex for processing and generating responses. :::info -For more information regarding the `model.list` and `model.yaml`, please see [here](/docs/model-yaml). +For more information regarding the `model.list` and `model.yaml`, please see [here](/docs/capabilities/models/model-yaml). ::: #### `logs/` Stores log files that are essential for troubleshooting and monitoring the performance of the Cortex.cpp API server and CLI. diff --git a/docs/docs/assistants/index.md b/docs/docs/assistants/index.md new file mode 100644 index 000000000..d38b33e52 --- /dev/null +++ b/docs/docs/assistants/index.md @@ -0,0 +1,3 @@ +--- +title: Assistants +--- \ No newline at end of file diff --git a/docs/docs/assistants/tools/index.md b/docs/docs/assistants/tools/index.md new file mode 100644 index 000000000..9f8badb32 --- /dev/null +++ b/docs/docs/assistants/tools/index.md @@ -0,0 +1,3 @@ +--- +title: Tools +--- \ No newline at end of file diff --git a/docs/docs/basic-usage/server.mdx b/docs/docs/basic-usage/api-server.mdx similarity index 90% rename from docs/docs/basic-usage/server.mdx rename to docs/docs/basic-usage/api-server.mdx index 69203b2e6..1003fff1f 100644 --- a/docs/docs/basic-usage/server.mdx +++ b/docs/docs/basic-usage/api-server.mdx @@ -1,16 +1,11 @@ --- -title: API +title: API Server description: Cortex Server Overview. -slug: "server" --- import Tabs from "@theme/Tabs"; import TabItem from "@theme/TabItem"; -:::warning -🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. -::: - Cortex has an [API server](https://cortex.so/api-reference) that runs at `localhost:39281`. diff --git a/docs/docs/basic-usage/command-line.md b/docs/docs/basic-usage/command-line.md deleted file mode 100644 index f48a0b94c..000000000 --- a/docs/docs/basic-usage/command-line.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -title: Command Line Interface -description: Cortex CLI Overview. -slug: "command-line" ---- - -:::warning -🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. -::: - -Cortex has a [Docker](https://docs.docker.com/engine/reference/commandline/cli/) and [Ollama](https://ollama.com/)-inspired [CLI syntax](/docs/cli) for running model operations. - -## How It Works -Cortex’s CLI invokes the Cortex Engine’s API, which runs in the background on port `39281`. - - -## Basic Usage -### [Start Cortex Server](/docs/cli) -```bash -# By default the server will be started on port `39281` -cortex -``` -### [Run Model](/docs/cli/run) -Cortex supports these [Built-in Models](/models) -```bash -# Pull and start a model -cortex run -``` -### [Chat with Model](/docs/cli/chat) -```bash -# chat with a model -cortex chat -``` -### [Show the Model State](/docs/cli/ps) -```bash -# Show a model and cortex system status -cortex ps -``` -### [Stop Model](/docs/cli/stop) -```bash -# Stop a model -cortex stop -``` -### [Pull Model](/docs/cli/pull) -```bash -# Pull a model -cortex pull -``` diff --git a/docs/docs/basic-usage/integration/js-library.md b/docs/docs/basic-usage/cortex-js.md similarity index 79% rename from docs/docs/basic-usage/integration/js-library.md rename to docs/docs/basic-usage/cortex-js.md index e2d83fcdd..4e5a4a774 100644 --- a/docs/docs/basic-usage/integration/js-library.md +++ b/docs/docs/basic-usage/cortex-js.md @@ -1,9 +1,18 @@ --- title: cortex.js -description: How to integrate cortex.js with a Typescript application. -slug: "ts-library" +description: How to use the Cortex.js Library --- +[Cortex.js](https://github.com/janhq/cortex.js) is a Typescript client library that can be used to interact with the Cortex API. + +This is still a work in progress, and we will let the community know once a stable version is available. + +:::warning +🚧 Cortex.js is currently under development, and this page is a stub for future development. +::: + + + diff --git a/docs/docs/basic-usage/integration/py-library.md b/docs/docs/basic-usage/cortex-py.md similarity index 91% rename from docs/docs/basic-usage/integration/py-library.md rename to docs/docs/basic-usage/cortex-py.md index 3e126d068..4ff1504d8 100644 --- a/docs/docs/basic-usage/integration/py-library.md +++ b/docs/docs/basic-usage/cortex-py.md @@ -1,9 +1,15 @@ --- title: cortex.py description: How to integrate cortex.py with a Python application. -slug: "py-library" --- + +:::warning +🚧 Cortex.py is currently under development, and this page is a stub for future development. +::: + + + diff --git a/docs/docs/basic-usage/overview.mdx b/docs/docs/basic-usage/index.mdx similarity index 98% rename from docs/docs/basic-usage/overview.mdx rename to docs/docs/basic-usage/index.mdx index 107746845..93baed257 100644 --- a/docs/docs/basic-usage/overview.mdx +++ b/docs/docs/basic-usage/index.mdx @@ -1,6 +1,6 @@ --- title: Overview -description: Overview. +description: Cortex Overview slug: "basic-usage" --- diff --git a/docs/docs/built-in-models.mdx b/docs/docs/built-in-models.mdx deleted file mode 100644 index 836c2d874..000000000 --- a/docs/docs/built-in-models.mdx +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: Built-in Models -description: Cortex Curated Models ---- - -import Tabs from "@theme/Tabs"; -import TabItem from "@theme/TabItem"; - - -:::warning -🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. -::: - -Cortex.cpp maintains a collection of built-in models that cover the most popular open-source models. - -## Cortex Model Repos -Built-in models are [Cortex Model Repositories](/docs/hub/cortex-hub) hosted on HuggingFace and pre-compiled for different engines, allowing one model to have multiple branches in various formats. - -## Built-in Model Variants -Built-in models are made available across the following variants: - -- **By format**: `gguf`, `onnx`, and `tensorrt-llm` -- **By Size**: `7b`, `13b`, and more. -- **By quantizations**: `q4`, `q8`, and more. -:::info -You can see our full list of Built-in Models [here](/models). -::: -### Run Model - -Built-in models can be run via Docker-like syntax: - -```bash -# Run a model -cortex run model-id -# Run a model variant -cortex run model-id:branch -``` -For example: - -```bash -# Run Mistral Built-in Model -cortex pull mistral -# Run Mistral in GGUF format -cortex pull mistral:gguf -# Run Mistral in TensorRT-LLM format -cortex engines tensorrt-llm init -cortex pull mistral:7b-tensorrt-llm -# Run Mistral in ONNX format -cortex engines onnx init -cortex pull mistral:onnx -# Run Mistral with a different size -cortex pull mistral:7b-gguf - -``` \ No newline at end of file diff --git a/docs/docs/capabilities/audio-generation.md b/docs/docs/capabilities/audio-generation.md new file mode 100644 index 000000000..355f25d6d --- /dev/null +++ b/docs/docs/capabilities/audio-generation.md @@ -0,0 +1,3 @@ +--- +unlisted: true +--- \ No newline at end of file diff --git a/docs/docs/capabilities/embeddings.md b/docs/docs/capabilities/embeddings.md new file mode 100644 index 000000000..2c2fb4d54 --- /dev/null +++ b/docs/docs/capabilities/embeddings.md @@ -0,0 +1,7 @@ +--- +title: Embeddings +--- + +:::info +🚧 Cortex is currently under development, and this page is a stub for future development. +::: \ No newline at end of file diff --git a/docs/docs/capabilities/hardware/index.md b/docs/docs/capabilities/hardware/index.md new file mode 100644 index 000000000..acf190ecc --- /dev/null +++ b/docs/docs/capabilities/hardware/index.md @@ -0,0 +1,39 @@ +--- +title: Hardware Awareness +draft: True +--- + +# Hardware Awareness + +Cortex is designed to be hardware aware, meaning it can detect your hardware configuration and automatically set parameters to optimize compatibility and performance, and avoid hardware-related errors. + +## Hardware Optimization + +Cortex's Hardware awareness allows it to do the following: + +- Context Length Optimization: Cortex maximizes the context length allowed by your hardware, ensuring that you can work with larger datasets and more complex models without performance degradation. +- Engine Optimization: we detect your CPU and GPU, and maintain a list of optimized engines for each hardware configuration, e.g. taking advantage of AVX-2 and AVX-512 instructions on CPUs. + +## Hardware Awareness + +- Preventing hardware-related error +- Error Handling for Insufficient VRAM: When loading a second model, Cortex provides useful error messages if there is insufficient VRAM memory. This proactive approach helps prevent out-of-memory errors and guides users on how to resolve the issue. + +### Model Compatibility + +- Model Compatibility Detection: Cortex automatically detects your hardware configuration to determine the compatibility of different models. This ensures that the models you use are optimized for your specific hardware setup. +- This is for the Hub, and for existing Models + +## Hardware Management + +### Activating Specific GPUs + +Cortex gives you the ability to activating specific GPUs for inference, giving you fine-grained control over hardware resources. This is especially useful for multi-GPU systems. +- Activate GPUs: Cortex can activate and utilize GPUs to accelerate processing, ensuring that computationally intensive tasks are handled efficiently. +You also have the option to deactivate all GPUs, to run inference on only CPU and RAM. + +### Hardware Monitoring + +- Monitoring System Usage +- Monitor VRAM Usage: Cortex keeps track of VRAM usage to prevent out-of-memory (OOM) errors. It ensures that VRAM is used efficiently and provides warnings when resources are running low. +- Monitor System Resource Usage: Cortex continuously monitors the usage of system resources, including CPU, RAM, and GPUs. This helps in maintaining optimal performance and identifying potential bottlenecks. diff --git a/docs/docs/capabilities/image-generation.md b/docs/docs/capabilities/image-generation.md new file mode 100644 index 000000000..355f25d6d --- /dev/null +++ b/docs/docs/capabilities/image-generation.md @@ -0,0 +1,3 @@ +--- +unlisted: true +--- \ No newline at end of file diff --git a/docs/docs/model-overview.mdx b/docs/docs/capabilities/models/index.mdx similarity index 90% rename from docs/docs/model-overview.mdx rename to docs/docs/capabilities/models/index.mdx index 0eecc9ee4..b6f4b9036 100644 --- a/docs/docs/model-overview.mdx +++ b/docs/docs/capabilities/models/index.mdx @@ -20,7 +20,7 @@ Cortex.cpp supports three model formats: - TensorRT-LLM :::info -For details on each format, see the [Model Formats](/docs/model-yaml#model-formats) page. +For details on each format, see the [Model Formats](/docs/capabilities/models/model-yaml#model-formats) page. ::: ## Built-in Models @@ -38,5 +38,5 @@ You can see our full list of Built-in Models [here](/models). ::: ## Next steps -- Cortex requires a `model.yaml` file to run a model. Find out more [here](/docs/model-yaml). +- Cortex requires a `model.yaml` file to run a model. Find out more [here](/docs/capabilities/models/model-yaml). - Cortex supports multiple model hubs hosting built-in models. See details [here](/docs/model-sources). \ No newline at end of file diff --git a/docs/docs/model-yaml.mdx b/docs/docs/capabilities/models/model-yaml.mdx similarity index 91% rename from docs/docs/model-yaml.mdx rename to docs/docs/capabilities/models/model-yaml.mdx index 53a25a770..983f0f528 100644 --- a/docs/docs/model-yaml.mdx +++ b/docs/docs/capabilities/models/model-yaml.mdx @@ -6,24 +6,14 @@ description: The model.yaml import Tabs from "@theme/Tabs"; import TabItem from "@theme/TabItem"; - :::warning 🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: Cortex.cpp uses a `model.yaml` file to specify the configuration for running a model. Models can be downloaded from the Cortex Model Hub or Hugging Face repositories. Once downloaded, the model data is parsed and stored in the `models` folder. -## `model.list` -The `model.list` file acts as a registry for all model files used by Cortex.cpp. It keeps track of every downloaded and imported model by listing their details in a structured format. Each time a model is downloaded or imported, Cortex.cpp will automatically append an entry to `model.list` with the following format: -``` -# Downloaded model - - -# Imported model - local imported +## Structure of `model.yaml` -``` -## `model.yaml` High Level Structure Here is an example of `model.yaml` format: ```yaml # BEGIN GENERAL METADATA @@ -71,7 +61,7 @@ ngl: 33 # Undefined = loaded from model The `model.yaml` is composed of three high-level sections: -### Cortex Meta +### Model Metadata ```yaml model: gemma-2-9b-it-Q8_0 name: Llama 3.1 diff --git a/docs/docs/model-presets.mdx b/docs/docs/capabilities/models/presets.mdx similarity index 98% rename from docs/docs/model-presets.mdx rename to docs/docs/capabilities/models/presets.mdx index d4196e146..799cf6cbc 100644 --- a/docs/docs/model-presets.mdx +++ b/docs/docs/capabilities/models/presets.mdx @@ -7,6 +7,7 @@ description: Model Presets 🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: + \ No newline at end of file diff --git a/docs/docs/capabilities/moderation.md b/docs/docs/capabilities/moderation.md new file mode 100644 index 000000000..355f25d6d --- /dev/null +++ b/docs/docs/capabilities/moderation.md @@ -0,0 +1,3 @@ +--- +unlisted: true +--- \ No newline at end of file diff --git a/docs/docs/capabilities/reasoning.md b/docs/docs/capabilities/reasoning.md new file mode 100644 index 000000000..355f25d6d --- /dev/null +++ b/docs/docs/capabilities/reasoning.md @@ -0,0 +1,3 @@ +--- +unlisted: true +--- \ No newline at end of file diff --git a/docs/docs/capabilities/speech-to-text.md b/docs/docs/capabilities/speech-to-text.md new file mode 100644 index 000000000..355f25d6d --- /dev/null +++ b/docs/docs/capabilities/speech-to-text.md @@ -0,0 +1,3 @@ +--- +unlisted: true +--- \ No newline at end of file diff --git a/docs/docs/capabilities/text-generation.md b/docs/docs/capabilities/text-generation.md new file mode 100644 index 000000000..680625667 --- /dev/null +++ b/docs/docs/capabilities/text-generation.md @@ -0,0 +1,7 @@ +--- +title: Text Generation +--- + +:::info +🚧 Cortex is currently under development, and this page is a stub for future development. +::: \ No newline at end of file diff --git a/docs/docs/capabilities/text-to-speech.md b/docs/docs/capabilities/text-to-speech.md new file mode 100644 index 000000000..355f25d6d --- /dev/null +++ b/docs/docs/capabilities/text-to-speech.md @@ -0,0 +1,3 @@ +--- +unlisted: true +--- \ No newline at end of file diff --git a/docs/docs/capabilities/vision.md b/docs/docs/capabilities/vision.md new file mode 100644 index 000000000..355f25d6d --- /dev/null +++ b/docs/docs/capabilities/vision.md @@ -0,0 +1,3 @@ +--- +unlisted: true +--- \ No newline at end of file diff --git a/docs/docs/chat-completions.mdx b/docs/docs/chat-completions.mdx index c39f25877..9b1dce01d 100644 --- a/docs/docs/chat-completions.mdx +++ b/docs/docs/chat-completions.mdx @@ -1,7 +1,6 @@ --- title: Chat Completions -description: Chat Completions Feature. -slug: "text-generation" +description: Chat Completions Feature --- import Tabs from "@theme/Tabs"; diff --git a/docs/docs/integrate-remote-engine.mdx b/docs/docs/engines/engine-extension.mdx similarity index 94% rename from docs/docs/integrate-remote-engine.mdx rename to docs/docs/engines/engine-extension.mdx index b32fcc635..8a62cd813 100644 --- a/docs/docs/integrate-remote-engine.mdx +++ b/docs/docs/engines/engine-extension.mdx @@ -1,8 +1,13 @@ --- -title: Integrate Remote Engine -description: How to integrate remote engine into Cortex. +title: Building Engine Extensions +description: Cortex supports Engine Extensions to integrate both :ocal inference engines, and Remote APIs. --- +:::info +🚧 Cortex is currently under development, and this page is a stub for future development. +::: + + diff --git a/docs/docs/engines/llamacpp.mdx b/docs/docs/engines/llamacpp.mdx index f65c15473..c550e2e92 100644 --- a/docs/docs/engines/llamacpp.mdx +++ b/docs/docs/engines/llamacpp.mdx @@ -13,7 +13,7 @@ Cortex uses `llama.cpp` as the default engine by default the `GGUF` format is su Cortex automatically generates any `GGUF` model from the HuggingFace repo that does not have the `model.yaml` file. ::: -## [`model.yaml`](/docs/model-yaml) Sample +## [`model.yaml`](/docs/capabilities/models/model-yaml) Sample ```yaml ## BEGIN GENERAL GGUF METADATA id: Mistral-Nemo-Instruct-2407 # Model ID unique between models (author / quantization) diff --git a/docs/docs/engines/onnx.mdx b/docs/docs/engines/onnx.mdx index d4e999406..7110007d7 100644 --- a/docs/docs/engines/onnx.mdx +++ b/docs/docs/engines/onnx.mdx @@ -1,6 +1,7 @@ --- title: ONNX -description: ONNX Model Format. +description: ONNX Model Format +unlisted: true --- :::warning @@ -17,7 +18,7 @@ cortex engines onnx init ## Run an ONNX model cortex run openhermes-2.5:7b-onnx ``` -## [`model.yaml`](/docs/model-yaml) Sample +## [`model.yaml`](/docs/capabilities/models/model-yaml) Sample ```yaml name: openhermes-2.5 model: openhermes diff --git a/docs/docs/engines/tensorrt-llm.mdx b/docs/docs/engines/tensorrt-llm.mdx index 0cfe7d483..1a06b0a86 100644 --- a/docs/docs/engines/tensorrt-llm.mdx +++ b/docs/docs/engines/tensorrt-llm.mdx @@ -1,6 +1,7 @@ --- title: TensorRT-LLM -description: TensorRT-LLM Model Format. +description: TensorRT-LLM Model Format +unlisted: true --- :::warning @@ -17,7 +18,7 @@ cortex engines tensorrt-llm init ## Run a TensorRT-LLM model cortex run openhermes-2.5:7b-tensorrt-llm ``` -## [`model.yaml`](/docs/model-yaml) Sample +## [`model.yaml`](/docs/capabilities/models/model-yaml) Sample ```yaml name: Openhermes-2.5 7b Linux Ada model: openhermes-2.5:7B-tensorrt-llm diff --git a/docs/docs/guides/function-calling.md b/docs/docs/guides/function-calling.md new file mode 100644 index 000000000..40a708675 --- /dev/null +++ b/docs/docs/guides/function-calling.md @@ -0,0 +1,3 @@ +--- +title: Function Calling +--- \ No newline at end of file diff --git a/docs/docs/guides/structured-outputs.md b/docs/docs/guides/structured-outputs.md new file mode 100644 index 000000000..b14739ab2 --- /dev/null +++ b/docs/docs/guides/structured-outputs.md @@ -0,0 +1,3 @@ +--- +title: Structured Outputs +--- \ No newline at end of file diff --git a/docs/docs/installation/gpu-acceleration.mdx b/docs/docs/installation/gpu-acceleration.mdx deleted file mode 100644 index ff57a714f..000000000 --- a/docs/docs/installation/gpu-acceleration.mdx +++ /dev/null @@ -1,8 +0,0 @@ ---- -title: GPU Acceleration -description: GPU Acceleration. ---- - -:::warning -🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. -::: \ No newline at end of file diff --git a/docs/docs/quickstart.mdx b/docs/docs/quickstart.mdx index 687707c66..2ebf53c7b 100644 --- a/docs/docs/quickstart.mdx +++ b/docs/docs/quickstart.mdx @@ -171,6 +171,6 @@ This command starts the Cortex.cpp API server at `localhost:39281`. ## What's Next? Now that Cortex.cpp is set up, here are the next steps to explore: -1. Adjust the folder path and configuration using the [`.cortexrc`](/docs/basic-usage/cortexrc) file. -2. Explore the Cortex.cpp [data folder](/docs/data-folder) to understand how it stores data. -3. Learn about the structure of the [`model.yaml`](/docs/model-yaml) file in Cortex.cpp. +1. Adjust the folder path and configuration using the [`.cortexrc`](/docs/architecture/cortexrc) file. +2. Explore the Cortex.cpp [data folder](/docs/architecture/data-folder) to understand how it stores data. +3. Learn about the structure of the [`model.yaml`](/docs/capabilities/models/model-yaml) file in Cortex.cpp. diff --git a/docs/docusaurus.config.ts b/docs/docusaurus.config.ts index 32b32ab99..659e155d7 100644 --- a/docs/docusaurus.config.ts +++ b/docs/docusaurus.config.ts @@ -410,7 +410,6 @@ const config: Config = { items: [ { to: "/models", label: "Models", position: "left" }, { to: "/changelog", label: "Changelog", position: "left" }, - { to: "/contact", label: "Enterprise", position: "left" }, { type: "doc", position: "right", diff --git a/docs/sidebars.ts b/docs/sidebars.ts index d5d7fd020..36a3e7420 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -43,269 +43,120 @@ const sidebars: SidebarsConfig = { { type: "doc", id: "installation/mac", label: "Mac" }, { type: "doc", id: "installation/linux", label: "Linux" }, { type: "doc", id: "installation/docker", label: "Docker" }, - { - type: "doc", - id: "installation/gpu-acceleration", - label: "GPU Acceleration", - }, ], }, - { - type: "html", - value: "BASIC USAGE", - - className: "sidebar-divider", - }, - { type: "doc", id: "basic-usage/overview", label: "Overview" }, - { type: "doc", id: "basic-usage/cortexrc", label: ".cortexrc" }, - { type: "doc", id: "model-yaml", label: "model.yaml" }, - { type: "doc", id: "data-folder", label: "Data Folder" }, { type: "category", - label: "Libraries", - link: { - type: "generated-index", - }, + label: "Basic Usage", + link: { type: "doc", id: "basic-usage/index" }, collapsed: true, items: [ + { type: "doc", id: "basic-usage/api-server", label: "API Server" }, { type: "doc", - id: "basic-usage/integration/js-library", + id: "basic-usage/cortex-js", label: "cortex.js", }, { type: "doc", - id: "basic-usage/integration/py-library", + id: "basic-usage/cortex-py", label: "cortex.py", }, ], }, { type: "category", - label: "Model Sources", + label: "Architecture", + link: { + type: "generated-index", + }, + collapsed: true, + items: [ + { type: "doc", id: "architecture/data-folder", label: "Cortex Data Folder" }, + { type: "doc", id: "architecture/cortex-db", label: "cortex.db" }, + { type: "doc", id: "architecture/cortexrc", label: ".cortexrc" }, + ] + }, + { + type: "html", + value: "CAPABILITIES", + className: "sidebar-divider", + }, + { + type: "category", + label: "Pulling Models", link: { type: "doc", id: "hub/index" }, collapsed: true, items: [ - { type: "doc", id: "hub/cortex-hub", label: "Cortex Model Repos" }, - { type: "doc", id: "hub/hugging-face", label: "HuggingFace Repos" }, - { - type: "doc", - id: "hub/nvidia-ngc", - label: "Nvidia Catalog (Coming Soon)", - }, + { type: "doc", id: "hub/hugging-face", label: "Adding a HF Organization" }, + // { type: "doc", id: "hub/cortex-hub", label: "Cortex Model Repos" }, + // { + // type: "doc", + // id: "hub/nvidia-ngc", + // label: "Nvidia Catalog (Coming Soon)", + // }, ], }, { type: "category", - label: "Engines", + label: "Running Models", + link: { type: "doc", id: "capabilities/models/index"}, + collapsed: true, + items: [ + { type: "doc", id: "capabilities/models/model-yaml", label: "model.yaml" }, + { type: "doc", id: "capabilities/models/presets", label: "Model Presets" }, + ], + }, + { + type: "category", + label: "Engine Management", link: { type: "doc", id: "engines/index" }, collapsed: true, items: [ - { type: "doc", id: "engines/llamacpp", label: "Llama.cpp" }, + { type: "doc", id: "engines/llamacpp", label: "llama.cpp" }, // { type: "doc", id: "engines/tensorrt-llm", label: "TensorRT-LLM" }, // { type: "doc", id: "engines/onnx", label: "ONNX" }, + { type: "doc", id: "engines/engine-extension", label: "Building Engine Extensions" }, + ], }, - // { - // type: "category", - // label: "Basic Usage", - // link: { - // type: "generated-index", - // }, - // collapsed: true, - // items: [ - // { type: "doc", id: "basic-usage/command-line", label: "CLI" }, - // { type: "doc", id: "basic-usage/server", label: "API" }, - // { - // type: "category", - // label: "Integration", - // link: { - // type: "generated-index", - // }, - // collapsed: true, - // items: [ - // { - // type: "doc", - // id: "basic-usage/integration/js-library", - // label: "cortex.js", - // }, - // { - // type: "doc", - // id: "basic-usage/integration/py-library", - // label: "cortex.py", - // }, - // ], - // }, - // ], - // }, - // { type: "doc", id: "telemetry", label: "Telemetry" }, - // MODELs - // { - // type: "html", - // value: "MODELS", - // className: "sidebar-divider", - // }, - // { type: "doc", id: "model-overview", label: "Overview" }, - // { type: "doc", id: "model-yaml", label: "model.yaml" }, - // { type: "doc", id: "built-in-models", label: "Built-in Models" }, - // { - // type: "category", - // label: "Using Models", - // link: { type: "doc", id: "using-models" }, - // collapsed: true, - // items: [ - // { type: "doc", id: "model-yaml", label: "model.yaml" }, - // // { type: "doc", id: "model-presets", label: "Model Presets" }, - // { type: "doc", id: "built-in-models", label: "Built-in Models" }, - // ], - // }, - // BASIC USAGE - // { - // type: "html", - // value: "BASIC USAGE", - // className: "sidebar-divider", - // }, - // { type: "doc", id: "command-line", label: "CLI" }, - // { type: "doc", id: "ts-library", label: "Typescript Library" }, - // { type: "doc", id: "py-library", label: "Python Library" }, - // { type: "doc", id: "server", label: "Server Endpoint" }, - // CAPABILITIES - // { - // type: "html", - // value: "ENDPOINTS", - // className: "sidebar-divider", - // }, - // { type: "doc", id: "chat-completions", label: "Chat Completions" }, - // { type: "doc", id: "embeddings", label: "Embeddings" }, - // CLI - { - type: "html", - value: "CLI", - className: "sidebar-divider", - }, - { type: "doc", id: "cli/cortex", label: "cortex" }, - { type: "doc", id: "cli/start", label: "cortex start" }, - { type: "doc", id: "cli/chat", label: "cortex chat" }, - // { type: "doc", id: "cli/embeddings", label: "cortex embeddings" }, - // { type: "doc", id: "cli/presets", label: "cortex presets" }, - { type: "doc", id: "cli/pull", label: "cortex pull" }, - { type: "doc", id: "cli/run", label: "cortex run" }, - { type: "doc", id: "cli/models/index", label: "cortex models" }, - { type: "doc", id: "cli/engines/index", label: "cortex engines" }, - { type: "doc", id: "cli/stop", label: "cortex stop" }, - { type: "doc", id: "cli/ps", label: "cortex ps" }, - { type: "doc", id: "cli/update", label: "cortex update" }, - // { type: "doc", id: "cli/telemetry", label: "cortex telemetry" }, - // { type: "doc", id: "cli/benchmark", label: "cortex benchmark" }, - // ARCHITECTURE - // { - // type: "html", - // value: "ARCHITECTURE", - // className: "sidebar-divider", - // }, - // { type: "doc", id: "architecture", label: "Cortex" }, - // { - // type: "category", - // label: "Engines", - // link: { - // type: "generated-index", - // }, - // collapsed: true, - // items: [ - // { type: "doc", id: "cortex-llamacpp", label: "llama.cpp" }, - // { type: "doc", id: "cortex-tensorrt-llm", label: "TensorRT-LLM" }, - // { type: "doc", id: "cortex-onnx", label: "ONNX" }, - // { - // type: "doc", - // id: "integrate-remote-engine", - // label: "Integrate Remote Engine", - // }, - // ], - // }, - // { - // type: "category", - // label: "Infrastructure", - // link: { - // type: "generated-index", - // }, - // collapsed: true, - // items: [ - // { type: "doc", id: "telemetry-architecture", label: "Telemetry Infra" }, - // { - // type: "doc", - // id: "benchmarking-architecture", - // label: "Benchmarking Infra", - // }, - // ], - // }, - // { - // type: "html", - // value: "TROUBLESHOOTING", - // className: "sidebar-divider", - // }, - // { type: "doc", id: "troubleshooting", label: "Troubleshooting" }, - ], - platform: [ - { - type: "html", - value: - '', - }, { - type: "html", - value: - '', + type: "category", + label: "Hardware Awareness", + link: { type: "doc", id: "capabilities/hardware/index" }, + collapsed: true, + items: [ + ], }, + { type: "doc", id: "capabilities/text-generation", label: "Text Generation" }, + // { type: "doc", id: "capabilities/image-generation", label: "Image Generation" }, + // { type: "doc", id: "capabilities/vision", label: "Vision" }, + // { type: "doc", id: "capabilities/audio-generation", label: "Audio Generation" }, + // { type: "doc", id: "capabilities/text-to-speech", label: "Text to Speech" }, + // { type: "doc", id: "capabilities/speech-to-text", label: "Speech to text" }, + { type: "doc", id: "capabilities/embeddings", label: "Embeddings" }, + // { type: "doc", id: "capabilities/moderation", label: "Moderation" }, + // { type: "doc", id: "capabilities/reasoning", label: "Reasoning" }, { type: "html", - value: "GET STARTED", + value: "GUIDES", className: "sidebar-divider", }, - "cortex-platform/about", + { type: "doc", id: "guides/function-calling", label: "Function Calling"}, + { type: "doc", id: "guides/structured-outputs", label: "Structured Outputs"}, { type: "html", - value: "ENDPOINTS", + value: "ASSISTANTS", className: "sidebar-divider", }, - { type: "doc", id: "cortex-platform/benchmarking", label: "Benchmarking" }, - { - type: "html", - value: "ARCHITECTURE", - className: "sidebar-divider", - }, - { type: "doc", id: "architecture", label: "Cortex" }, - { - type: "category", - label: "Engines", - link: { - type: "generated-index", - }, - collapsed: true, - items: [ - { type: "doc", id: "cortex-llamacpp", label: "llama.cpp" }, - { type: "doc", id: "cortex-tensorrt-llm", label: "TensorRT-LLM" }, - { type: "doc", id: "cortex-onnx", label: "ONNX" }, - { - type: "doc", - id: "integrate-remote-engine", - label: "Integrate Remote Engine", - }, - ], - }, + { type: "doc", id: "assistants/index", label: "Assistants"}, { type: "category", - label: "Infrastructure", - link: { - type: "generated-index", - }, + label: "Tools", + link: { type: "doc", id: "assistants/tools/index" }, collapsed: true, items: [ - { type: "doc", id: "telemetry-architecture", label: "Telemetry Infra" }, - { - type: "doc", - id: "benchmarking-architecture", - label: "Benchmarking Infra", - }, + // { type: "doc", id: "assistants/tools/file-search", label: "File Search" }, ], }, { @@ -313,19 +164,19 @@ const sidebars: SidebarsConfig = { value: "CLI", className: "sidebar-divider", }, - // { type: "doc", id: "cli/cortex", label: "cortex" }, - // { type: "doc", id: "cli/chat", label: "cortex chat" }, + { type: "doc", id: "cli/cortex", label: "cortex" }, + { type: "doc", id: "cli/start", label: "cortex start" }, + { type: "doc", id: "cli/chat", label: "cortex chat" }, // { type: "doc", id: "cli/embeddings", label: "cortex embeddings" }, - { type: "doc", id: "cli/presets", label: "cortex presets" }, - // { type: "doc", id: "cli/pull", label: "cortex pull" }, - // { type: "doc", id: "cli/run", label: "cortex run" }, - // { type: "doc", id: "cli/models/index", label: "cortex models" }, - // { type: "doc", id: "cli/engines/index", label: "cortex engines" }, - // { type: "doc", id: "cli/stop", label: "cortex stop" }, - // { type: "doc", id: "cli/ps", label: "cortex ps" }, - // { type: "doc", id: "cli/telemetry", label: "cortex telemetry" }, - { type: "doc", id: "cli/benchmark", label: "cortex benchmark" }, - ], + // { type: "doc", id: "cli/presets", label: "cortex presets" }, + { type: "doc", id: "cli/pull", label: "cortex pull" }, + { type: "doc", id: "cli/run", label: "cortex run" }, + { type: "doc", id: "cli/models/index", label: "cortex models" }, + { type: "doc", id: "cli/engines/index", label: "cortex engines" }, + { type: "doc", id: "cli/stop", label: "cortex stop" }, + { type: "doc", id: "cli/ps", label: "cortex ps" }, + { type: "doc", id: "cli/update", label: "cortex update" }, + ] }; export default sidebars; diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json index a01e0790d..f6120a4ad 100644 --- a/docs/static/openapi/cortex.json +++ b/docs/static/openapi/cortex.json @@ -227,9 +227,7 @@ } } }, - "tags": [ - "Inference" - ] + "tags": ["Chat"] } }, "/v1/models/pull": { @@ -1357,7 +1355,7 @@ }, "tags": [ { - "name": "Inference", + "name": "Chat", "description": "This endpoint initiates interaction with a Large Language Models (LLM)." }, { @@ -1393,7 +1391,7 @@ { "name": "CORTEX", "tags": [ - "Inference", + "Chat", "Engines", "Events", "Models",