diff --git a/docs/docs/capabilities/models/index.mdx b/docs/docs/capabilities/models/index.mdx index f4a10b1ae..2460905de 100644 --- a/docs/docs/capabilities/models/index.mdx +++ b/docs/docs/capabilities/models/index.mdx @@ -30,7 +30,9 @@ For details on each format, see the [Model Formats](/docs/capabilities/models/mo ::: ## Built-in Models -Cortex.cpp offers a range of built-in models that include popular open-source options. These models, hosted on HuggingFace as [Cortex Model Repositories](/docs/hub/cortex-hub), are pre-compiled for different engines, enabling each model to have multiple branches in various formats. +Cortex offers a range of [Built-in models](/models) that include popular open-source options. + +These models are hosted on [Cortex's HuggingFace](https://huggingface.co/cortexso) and are pre-compiled for different engines, enabling each model to have multiple branches in various formats. ### Built-in Model Variants Built-in models are made available across the following variants: @@ -39,10 +41,7 @@ Built-in models are made available across the following variants: - **By Size**: `7b`, `13b`, and more. - **By quantizations**: `q4`, `q8`, and more. -:::info -You can see our full list of Built-in Models [here](/models). -::: - ## Next steps -- Cortex requires a `model.yaml` file to run a model. Find out more [here](/docs/capabilities/models/model-yaml). -- Cortex supports multiple model hubs hosting built-in models. See details [here](/docs/model-sources). \ No newline at end of file +- See Cortex's list of [Built-in Models](/models). +- Cortex supports multiple model hubs hosting built-in models. See details [here](/docs/capabilities/models/sources). +- Cortex requires a `model.yaml` file to run a model. Find out more [here](/docs/capabilities/models/model-yaml). \ No newline at end of file diff --git a/docs/docs/capabilities/models/model-yaml.mdx b/docs/docs/capabilities/models/model-yaml.mdx index b47b86a64..e761d7da2 100644 --- a/docs/docs/capabilities/models/model-yaml.mdx +++ b/docs/docs/capabilities/models/model-yaml.mdx @@ -179,7 +179,7 @@ Model load parameters include the options that control how Cortex.cpp runs the m | `prompt_template` | Template for formatting the prompt, including system messages and instructions. | Yes | | `engine` | The engine that run model, default to `llama-cpp` for local model with gguf format. | Yes | -All parameters from the `model.yml` file are used for running the model via the [CLI chat command](/docs/cli/chat) or [CLI run command](/docs/cli/run). These parameters also act as defaults when using the [model start API](/api-reference#tag/models/post/v1/models/start) through cortex.cpp. +All parameters from the `model.yml` file are used for running the model via the [CLI run command](/docs/cli/run). These parameters also act as defaults when using the [model start API](/api-reference#tag/models/post/v1/models/start) through cortex.cpp. ## Runtime parameters @@ -217,8 +217,8 @@ The API is accessible at the `/v1/chat/completions` URL and accepts all paramete With the `llama-cpp` engine, cortex.cpp accept all parameters from [`model.yml` inference section](#Inference Parameters) and accept all parameters from the chat completion API. -:::info + diff --git a/docs/docs/hub/cortex-hub.mdx b/docs/docs/capabilities/models/sources/cortex-hub.mdx similarity index 100% rename from docs/docs/hub/cortex-hub.mdx rename to docs/docs/capabilities/models/sources/cortex-hub.mdx diff --git a/docs/docs/capabilities/models/sources/hugging-face.mdx b/docs/docs/capabilities/models/sources/hugging-face.mdx new file mode 100644 index 000000000..5dc74a14e --- /dev/null +++ b/docs/docs/capabilities/models/sources/hugging-face.mdx @@ -0,0 +1,66 @@ +--- +title: Hugging Face +description: Cortex supports all `GGUF` and `ONNX` models available in Huggingface repositories, providing access to a wide range of models. +--- + +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; + +Cortex.cpp supports all `GGUF` from the [Hugging Face Hub](https://huggingface.co). + +You can pull HuggingFace models via: +- repository handle: eg `author/model_id` +- direct url: eg `https://huggingface.co/QuantFactory/OpenMath2-Llama3.1-8B-GGUF/blob/main/OpenMath2-Llama3.1-8B.Q4_0.gguf` + + +## GGUF +To view all available `GGUF` models on HuggingFace, select the `GGUF` tag in the Libraries section. + +![HF GGUF](/img/docs/gguf.png) + + + ```sh + ## Pull the Codestral-22B-v0.1-GGUF model from the bartowski organization + cortex pull bartowski/Codestral-22B-v0.1-GGUF + + # Pull the gemma-7b model from the google organization + cortex pull https://huggingface.co/QuantFactory/OpenMath2-Llama3.1-8B-GGUF/blob/main/OpenMath2-Llama3.1-8B.Q4_0.gguf + ``` + + + ```sh + ## Pull the Codestral-22B-v0.1-GGUF model from the bartowski organization + cortex.exe pull bartowski/Codestral-22B-v0.1-GGUF + + # Pull the gemma-7b model from the google organization + cortex.exe pull google/gemma-7b + ``` + + + + diff --git a/docs/docs/hub/index.mdx b/docs/docs/capabilities/models/sources/index.mdx similarity index 97% rename from docs/docs/hub/index.mdx rename to docs/docs/capabilities/models/sources/index.mdx index f5c903ed0..186f192dc 100644 --- a/docs/docs/hub/index.mdx +++ b/docs/docs/capabilities/models/sources/index.mdx @@ -1,14 +1,8 @@ --- -slug: /model-sources title: Model Sources +description: Model --- -import DocCardList from "@theme/DocCardList"; - -:::warning -🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. -::: - # Pulling Models in Cortex Cortex provides a streamlined way to pull (download) machine learning models from Hugging Face and other third-party sources, as well as import models from local storage. This functionality allows users to easily access a variety of pre-trained models to enhance their applications. @@ -348,6 +342,4 @@ Response: } ``` -With Cortex, pulling and managing models is simplified, allowing you to focus more on building your applications! - - +With Cortex, pulling and managing models is simplified, allowing you to focus more on building your applications! \ No newline at end of file diff --git a/docs/docs/hub/nvidia-ngc.mdx b/docs/docs/capabilities/models/sources/nvidia-ngc.mdx similarity index 100% rename from docs/docs/hub/nvidia-ngc.mdx rename to docs/docs/capabilities/models/sources/nvidia-ngc.mdx diff --git a/docs/docs/chat-completions.mdx b/docs/docs/chat-completions.mdx index 9b1dce01d..c4f40f0d1 100644 --- a/docs/docs/chat-completions.mdx +++ b/docs/docs/chat-completions.mdx @@ -146,5 +146,5 @@ Cortex also acts as an aggregator for remote inference requests from a single en :::note Learn more about Chat Completions capabilities: - [Chat Completions API Reference](/api-reference#tag/inference/post/chat/completions) -- [Chat Completions CLI command](/docs/cli/chat) +- [`cortex run` CLI command](/docs/cli/run) ::: diff --git a/docs/docs/cli/chat.mdx b/docs/docs/cli/chat.mdx deleted file mode 100644 index 0b7ee7083..000000000 --- a/docs/docs/cli/chat.mdx +++ /dev/null @@ -1,71 +0,0 @@ ---- -title: Cortex Chat -description: Cortex chat command. -slug: "chat" ---- - -import Tabs from "@theme/Tabs"; -import TabItem from "@theme/TabItem"; - -:::warning -🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. -::: - -# `cortex chat` -:::info -This CLI command calls the following API endpoint: -- [Download Model](/api-reference#tag/models/post/v1/models/pull) (The command only calls this endpoint if the specified model is not downloaded yet.) -- Install Engine (The command only calls this endpoint if the specified engine is not downloaded yet.) -- [Start Model](/api-reference#tag/models/post/v1/models/start) -- [Chat Completions](/api-reference#tag/inference/post/v1/chat/completions) (The command makes a call to this endpoint if the `-c` option is used.) -::: - -This command starts a chat session with a specified model, allowing you to interact directly with it through an interactive chat interface. - -## Usage -:::info -You can use the `--verbose` flag to display more detailed output of the internal processes. To apply this flag, use the following format: `cortex --verbose [subcommand]`. -::: - - - ```sh - # Stable - cortex chat [options] -m - - # Beta - cortex-beta chat [options] -m - - # Nightly - cortex-nightly chat [options] -m - ``` - - - ```sh - # Stable - cortex.exe chat [options] -m - - # Beta - cortex-beta.exe chat [options] -m - - # Nightly - cortex-nightly.exe chat [options] -m - ``` - - - -:::info -This command uses a `model_id` from the model that you have downloaded or available in your file system. -::: - -## Options - -| Option | Description | Required | Default value | Example | -| ----------------------------- | ----------------------------------------------------------------------------------------------- | -------- | ------------- | ----------------------------- | -| `model_id` | Model ID to chat with. | Yes | - | `mistral` | -| `-m`, `--message ` | Message to send to the model | Yes | - | `-m "Hello, model!"` | -| `-h`, `--help` | Display help information for the command. | No | - | `-h` | - - - - diff --git a/docs/docs/cli/cortex.mdx b/docs/docs/cli/cortex.mdx index ef887dd10..cb8cf29fd 100644 --- a/docs/docs/cli/cortex.mdx +++ b/docs/docs/cli/cortex.mdx @@ -7,12 +7,8 @@ slug: /cli import Tabs from "@theme/Tabs"; import TabItem from "@theme/TabItem"; -:::warning -🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. -::: - -# Cortex -This command list all the available commands within the Cortex.cpp commands. +# `cortex` +This command list all the available commands within the Cortex commands. ## Usage :::info @@ -21,48 +17,23 @@ You can use the `--verbose` flag to display more detailed output of the internal ```sh - # Stable cortex - - # Beta - cortex-beta - - # Nightly - cortex-nightly ``` ```sh - # Stable cortex.exe - - # Beta - cortex-beta.exe - - # Nightly - cortex-nightly.exe ``` - -## Command Chaining -Cortex CLI's command chaining support allows multiple commands to be executed in sequence with a simplified syntax. - -For example: - -- [cortex run](/docs/cli/run) -- [cortex chat](/docs/cli/chat) - ## Sub Commands +- [cortex start](/docs/cli/start): Start the Cortex API server (starts automatically with other commands) +- [cortex run](/docs/cli/run): Shortcut for `cortex models start`. Pull a remote model or start a local model, and start chatting. +- [cortex pull](/docs/cli/pull): Download a model. - [cortex models](/docs/cli/models): Manage and configure models. -- [cortex chat](/docs/cli/chat): Send a chat request to a model. - [cortex ps](/docs/cli/ps): Display active models and their operational status. -- [cortex embeddings](/docs/cli/embeddings): Create an embedding vector representing the input text. -- [cortex engines](/docs/cli/engines): Manage Cortex.cpp engines. -- [cortex pull|download](/docs/cli/pull): Download a model. -- [cortex run](/docs/cli/run): Shortcut to pull, start and chat with a model. -- [cortex update](/docs/cli/update): Update the Cortex.cpp version. -- [cortex start](/docs/cli/start): Start the Cortex.cpp API server. -- [cortex stop](/docs/cli/stop): Stop the Cortex.cpp API server. +- [cortex engines](/docs/cli/engines): Manage Cortex engines. +- [cortex update](/docs/cli/update): Update the Cortex version. +- [cortex stop](/docs/cli/stop): Stop the Cortex API server. diff --git a/docs/docs/cli/ps.mdx b/docs/docs/cli/ps.mdx index 2641a388f..a70a9501c 100644 --- a/docs/docs/cli/ps.mdx +++ b/docs/docs/cli/ps.mdx @@ -7,59 +7,42 @@ slug: "ps" import Tabs from "@theme/Tabs"; import TabItem from "@theme/TabItem"; -:::warning -🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. -::: - # `cortex ps` -This command shows the running model and its status. - - +This command shows the running model and its status (Engine, RAM, VRAM, and Uptime). ## Usage -:::info -You can use the `--verbose` flag to display more detailed output of the internal processes. To apply this flag, use the following format: `cortex --verbose [subcommand]`. -::: ```sh - # Stable cortex ps [options] - - # Beta - cortex-beta ps [options] - - # Nightly - cortex-nightly ps [options] ``` ```sh - # Stable cortex.exe ps [options] - - # Beta - cortex-beta.exe ps [options] - - # Nightly - cortex-nightly.exe ps [options] ``` - For example, it returns the following table: ```bash -+----------------+-----------+----------+-----------+-----------+ -| Model | Engine | RAM | VRAM | Up time | -+----------------+-----------+----------+-----------+-----------+ -| tinyllama:gguf | llama-cpp | 35.16 MB | 601.02 MB | 5 seconds | -+----------------+-----------+----------+-----------+-----------+ +> cortex ps ++------------------------+-----------+-----------+-----------+-------------------------------+ +| Model | Engine | RAM | VRAM | Uptime | ++------------------------+-----------+-----------+-----------+-------------------------------+ +| llama3.2:3b-gguf-q4-km | llama-cpp | 308.23 MB | 1.87 GB | 7 seconds | ++------------------------+-----------+-----------+-----------+-------------------------------+ +| tinyllama:1b-gguf | llama-cpp | 35.16 MB | 636.18 MB | 1 hour, 5 minutes, 45 seconds | ++------------------------+-----------+-----------+-----------+-------------------------------+ ``` ## Options | Option | Description | Required | Default value | Example | |-------------------|-------------------------------------------------------|----------|---------------|-------------| -| `-h`, `--help` | Display help information for the command. | No | - | `-h` | \ No newline at end of file +| `-h`, `--help` | Display help information for the command. | No | - | `-h` | + +:::info +You can use the `--verbose` flag to display more detailed output of the internal processes. To apply this flag, use the following format: `cortex --verbose [subcommand]`. +::: \ No newline at end of file diff --git a/docs/docs/cli/pull.mdx b/docs/docs/cli/pull.mdx index df1f3917d..028962896 100644 --- a/docs/docs/cli/pull.mdx +++ b/docs/docs/cli/pull.mdx @@ -7,18 +7,23 @@ slug: "pull" import Tabs from "@theme/Tabs"; import TabItem from "@theme/TabItem"; -:::warning -🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. -::: - # `cortex pull` :::info This CLI command calls the following API endpoint: -- [Download Model](/api-reference#tag/models/post/v1/models/{modelId}/pull) +- [Download Model](/api-reference#tag/pulling-models/post/v1/models/pull) ::: -This command downloads models from supported [model repositories](/docs/model-sources). +This command displays downloaded models, or displays models available for downloading. + +There are 3 ways to download models: +- From Cortex's [Built-in models](/models): `cortex pull ` +- HuggingFace repository handle: `cortex pull ` +- HuggingFace direct URL: `cortex pull ` + +Each of these will display the model quantizations available, to be selected for download. + +For more information, please refer to [Pulling Models](/docs/capabilities/models/sources). -The downloaded model will be stored in the Cortex folder in your home data directory. +The downloaded model file will be stored in the [Cortex Data Folder](/docs/architecture/data-folder). ## Usage @@ -28,34 +33,39 @@ You can use the `--verbose` flag to display more detailed output of the internal ```sh - # Stable cortex pull [options] - - # Beta - cortex-beta pull [options] - - # Nightly - cortex-nightly pull [options] ``` ```sh - # Stable cortex.exe pull [options] - - # Beta - cortex-beta.exe pull [options] - - # Nightly - cortex-nightly.exe pull [options] ``` +For example, this returns the following: +```bash +> cortex pull llama3.2 +Downloaded models: + llama3.2:3b-gguf-q4-km + +Available to download: + 1. llama3.2:3b-gguf-q2-k + 2. llama3.2:3b-gguf-q3-kl + 3. llama3.2:3b-gguf-q3-km + 4. llama3.2:3b-gguf-q3-ks + 5. llama3.2:3b-gguf-q4-ks + 6. llama3.2:3b-gguf-q5-km + 7. llama3.2:3b-gguf-q5-ks + 8. llama3.2:3b-gguf-q6-k + 9. llama3.2:3b-gguf-q8-0 + +Select a model (1-9): +``` ## Options | Option | Description | Required | Default value | Example | | -------------- | ------------------------------------------------- | -------- | ------------- | ----------- | | `model_id` | The identifier of the model you want to download. | Yes | - | `mistral` | -| `-h`, `--help` | Display help information for the command. | No | - | `-h` | +| `-h`, `--help` | Display help information for the command. | No | - | `-h` | \ No newline at end of file diff --git a/docs/docs/cli/run.mdx b/docs/docs/cli/run.mdx index 786b5cb0c..b0b9143ad 100644 --- a/docs/docs/cli/run.mdx +++ b/docs/docs/cli/run.mdx @@ -7,75 +7,35 @@ slug: "run" import Tabs from "@theme/Tabs"; import TabItem from "@theme/TabItem"; -:::warning -🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. -::: - # `cortex run` -:::info -This CLI command calls the following API endpoint: -- [Download Model](/api-reference#tag/models/post/v1/models/pull) (The command only calls this endpoint if the specified model is not downloaded yet.) -- Install Engine (The command only calls this endpoint if the specified engine is not downloaded yet.) -- [Start Model](/api-reference#tag/models/post/v1/models/start) -::: -This command facilitates the initiation of starting a specified machine-learning model. +This CLI command is a shortcut to run models easily. It executes this sequence of commands: +1. [`cortex pull`](/docs/cli/models/): This command pulls the specified model if the model is not yet downloaded, or finds a local model. +2. [`cortex engines install`](/docs/cli/engines/): This command installs the specified engines if not yet downloaded. +3. [`cortex models start`](/docs/cli/models/): This command starts the specified model, making it active and ready for interactions. ## Usage :::info You can use the `--verbose` flag to display more detailed output of the internal processes. To apply this flag, use the following format: `cortex --verbose [subcommand]`. ::: + ```sh - # Stable - cortex [options] :[engine] - - # Beta - cortex-beta [options] :[engine] - - # Nightly - cortex-nightly [options] :[engine] + cortex [options] ``` ```sh - # Stable - cortex.exe [options] :[engine] - - # Beta - cortex-beta.exe [options] :[engine] - - # Nightly - cortex-nightly.exe [options] :[engine] + cortex.exe [options] ``` -### `model_id` -You can use the [Built-in models](/docs/hub/cortex-hub) or Supported [HuggingFace models](/docs/hub/hugging-face). - -:::info -This command downloads and installs the model if not already available in your file system, then starts it for interaction. -::: - - ## Options | Option | Description | Required | Default value | Example | |-----------------------------|-----------------------------------------------------------------------------|----------|----------------------------------------------|------------------------| -| `model_id` | The identifier of the model you want to chat with. | Yes | `Prompt to select from the available models` | `mistral` | -| `-h`, `--help` | Display help information for the command. | No | - | `-h` | - - - - -## Command Chain - -`cortex run` command is a convenience wrapper that automatically executes a sequence of commands to simplify user interactions: - -1. [`cortex pull`](/docs/cli/models/): This command pulls the specified model if the model is not yet downloaded. -2. [`cortex engines install`](/docs/cli/engines/): This command installs the specified engines if not yet downloaded. -3. [`cortex models start`](/docs/cli/models/): This command starts the specified model, making it active and ready for interactions. +| `model_id` | The identifier of the model you want to chat with. | Yes | - | `mistral` | +| `-h`, `--help` | Display help information for the command. | No | - | `-h` | + diff --git a/docs/docs/cli/start.mdx b/docs/docs/cli/start.mdx index c180908eb..37a521ab1 100644 --- a/docs/docs/cli/start.mdx +++ b/docs/docs/cli/start.mdx @@ -6,16 +6,10 @@ description: Cortex CLI. import Tabs from "@theme/Tabs"; import TabItem from "@theme/TabItem"; -:::warning -🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. -::: +# `cortex start` -# Start -:::info -This is the initial command you need to run to start using Cortex.cpp. -::: - -This command start the Cortex.cpp's API server processes. +This command starts the Cortex API server processes. +If the server is not yet running, the server will automatically be started when running other Cortex commands. ## Usage :::info @@ -24,26 +18,12 @@ You can use the `--verbose` flag to display more detailed output of the internal ```sh - # Stable cortex start [options] - - # Beta - cortex-beta start [options] - - # Nightly - cortex-nightly start [options] ``` ```sh - # Stable cortex.exe start [options] - - # Beta - cortex-beta.exe start [options] - - # Nightly - cortex-nightly.exe start [options] ``` @@ -54,7 +34,6 @@ You can use the `--verbose` flag to display more detailed output of the internal | Option | Description | Required | Default value | Example | | ---------------------------- | ----------------------------------------- | -------- | ------------- | ----------------------------- | | `-h`, `--help` | Display help information for the command. | No | - | `-h` | -| `-p`, `--port ` | Port to serve the application. | No | - | `-p 39281` | +| `-p`, `--port ` | Port to serve the application. | No | `39281` | `-p 39281` | - diff --git a/docs/docs/cli/stop.mdx b/docs/docs/cli/stop.mdx index 48c4eec31..0b8625f9e 100644 --- a/docs/docs/cli/stop.mdx +++ b/docs/docs/cli/stop.mdx @@ -7,10 +7,6 @@ slug: "stop" import Tabs from "@theme/Tabs"; import TabItem from "@theme/TabItem"; -:::warning -🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. -::: - # `cortex stop` :::info This CLI command calls the following API endpoint: @@ -18,8 +14,6 @@ This CLI command calls the following API endpoint: ::: This command stops the API server. - - ## Usage :::info You can use the `--verbose` flag to display more detailed output of the internal processes. To apply this flag, use the following format: `cortex --verbose [subcommand]`. @@ -27,26 +21,12 @@ You can use the `--verbose` flag to display more detailed output of the internal ```sh - # Stable cortex stop [options] - - # Beta - cortex-beta stop [options] - - # Nightly - cortex-nightly stop [options] ``` ```sh - # Stable cortex.exe stop [options] - - # Beta - cortex-beta.exe stop [options] - - # Nightly - cortex-nightly.exe stop [options] ``` @@ -56,4 +36,4 @@ You can use the `--verbose` flag to display more detailed output of the internal | Option | Description | Required | Default value | Example | |-------------------|-------------------------------------------------------|----------|---------------|-------------| -| `-h`, `--help` | Display help information for the command. | No | - | `-h` | +| `-h`, `--help` | Display help information for the command. | No | - | `-h` | \ No newline at end of file diff --git a/docs/docs/cli/update.mdx b/docs/docs/cli/update.mdx index f54d554cc..0f06f8476 100644 --- a/docs/docs/cli/update.mdx +++ b/docs/docs/cli/update.mdx @@ -7,16 +7,11 @@ slug: "update" import Tabs from "@theme/Tabs"; import TabItem from "@theme/TabItem"; -:::warning -🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. -::: - # `cortex update` This command updates Cortex.cpp to the provided version or the latest version. - ## Usage :::info You can use the `--verbose` flag to display more detailed output of the internal processes. To apply this flag, use the following format: `cortex --verbose [subcommand]`. @@ -24,26 +19,12 @@ You can use the `--verbose` flag to display more detailed output of the internal ```sh - # Stable cortex update [options] - - # Beta - cortex-beta update [options] - - # Nightly - cortex-nightly update [options] ``` ```sh - # Stable cortex.exe update [options] - - # Beta - cortex-beta.exe update [options] - - # Nightly - cortex-nightly.exe update [options] ``` @@ -57,7 +38,7 @@ By default, if no version is specified, Cortex.cpp will be updated to the latest | Option | Description | Required | Default Value | Example | |----------------------------|-------------------------------------------|----------|---------------|------------------------| | `-h`, `--help` | Display help information for the command. | No | - | `-h` | -| `-v` | Specify the version of the Cortex. | No | - | `-v 0.5.0`| +| `-v` | Specify the version of the Cortex. | No | - | `-v1.0.1`| diff --git a/docs/docs/engines/llamacpp.mdx b/docs/docs/engines/llamacpp.mdx index c550e2e92..2ace67944 100644 --- a/docs/docs/engines/llamacpp.mdx +++ b/docs/docs/engines/llamacpp.mdx @@ -99,8 +99,8 @@ ngl: 41 # Undefined = loaded from model | `n_probs` | Number of top token probabilities to return in the output. | No | | `min_keep` | Minimum number of tokens to keep during top-k sampling. | No | -:::info + \ No newline at end of file diff --git a/docs/docs/engines/onnx.mdx b/docs/docs/engines/onnx.mdx index 7110007d7..370aa1e53 100644 --- a/docs/docs/engines/onnx.mdx +++ b/docs/docs/engines/onnx.mdx @@ -54,8 +54,8 @@ stream: true # true | false | `prompt_template` | Template for formatting the prompt, including system messages and instructions. | Yes | -:::info + \ No newline at end of file diff --git a/docs/docs/engines/tensorrt-llm.mdx b/docs/docs/engines/tensorrt-llm.mdx index 1a06b0a86..94a3d3875 100644 --- a/docs/docs/engines/tensorrt-llm.mdx +++ b/docs/docs/engines/tensorrt-llm.mdx @@ -64,9 +64,9 @@ stream: true # true | false | `text_model` | Indicates if the text model is being used (true or false). | Yes | | `prompt_template` | Template for formatting the prompt, including system messages and instructions. | Yes | -:::info + \ No newline at end of file diff --git a/docs/docs/hub/hugging-face.mdx b/docs/docs/hub/hugging-face.mdx deleted file mode 100644 index a4409992a..000000000 --- a/docs/docs/hub/hugging-face.mdx +++ /dev/null @@ -1,128 +0,0 @@ ---- -title: Hugging Face -description: Cortex supports all `GGUF` and `ONNX` models available in Huggingface repositories, providing access to a wide range of models. ---- - -import Tabs from "@theme/Tabs"; -import TabItem from "@theme/TabItem"; - -:::warning -🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. -::: - - -Cortex.cpp supports all `GGUF` and `ONNX` models from the [Hugging Face Hub](https://huggingface.co), along with its built-in models. For `TensorRT-LLM` models, only built-in models in the [Cortex Model Repos](/docs/hub/cortex-hub) are supported. - -:::info -To pull a supported model from HuggingFace, use the format `ORG_ID/MODEL_ID`. -::: -## GGUF -![HF GGUF](/img/docs/gguf.png) -To view all available `GGUF` models on HuggingFace, select the `GGUF` tag in the Libraries section. - - - ```sh - # Stable - ## Pull the Codestral-22B-v0.1-GGUF model from the bartowski organization - cortex pull bartowski/Codestral-22B-v0.1-GGUF - - # Pull the gemma-7b model from the google organization - cortex pull google/gemma-7b - - # Beta - ## Pull the Codestral-22B-v0.1-GGUF model from the bartowski organization - cortex-beta pull bartowski/Codestral-22B-v0.1-GGUF - - # Pull the gemma-7b model from the google organization - cortex-beta pull google/gemma-7b - - # Nightly - ## Pull the Codestral-22B-v0.1-GGUF model from the bartowski organization - cortex-nightly pull bartowski/Codestral-22B-v0.1-GGUF - - # Pull the gemma-7b model from the google organization - cortex-nightly pull google/gemma-7b - ``` - - - ```sh - # Stable - ## Pull the Codestral-22B-v0.1-GGUF model from the bartowski organization - cortex.exe pull bartowski/Codestral-22B-v0.1-GGUF - - # Pull the gemma-7b model from the google organization - cortex.exe pull google/gemma-7b - - # Beta - ## Pull the Codestral-22B-v0.1-GGUF model from the bartowski organization - cortex-beta.exe pull bartowski/Codestral-22B-v0.1-GGUF - - # Pull the gemma-7b model from the google organization - cortex-beta.exe pull google/gemma-7b - - # Nightly - ## Pull the Codestral-22B-v0.1-GGUF model from the bartowski organization - cortex-nightly.exe pull bartowski/Codestral-22B-v0.1-GGUF - - # Pull the gemma-7b model from the google organization - cortex-nightly.exe pull google/gemma-7b - ``` - - - -## ONNX -![HF ONNX](/img/docs/onnx.png) -To view all available `ONNX` models on HuggingFace, select the `ONNX` tag in the Libraries section. - - - ```sh - # Stable - ## Pull the XLM-Roberta-Large-Vit-B-16Plus model from the immich-app organization - cortex pull immich-app/XLM-Roberta-Large-Vit-B-16Plus - - # Pull the mt0-base model from the bigscience organization - cortex pull bigscience/mt0-base - - # Beta - ## Pull the XLM-Roberta-Large-Vit-B-16Plus model from the immich-app organization - cortex-beta pull immich-app/XLM-Roberta-Large-Vit-B-16Plus - - # Pull the mt0-base model from the bigscience organization - cortex-beta pull bigscience/mt0-base - - # Nightly - ## Pull the XLM-Roberta-Large-Vit-B-16Plus model from the immich-app organization - cortex-nightly pull immich-app/XLM-Roberta-Large-Vit-B-16Plus - - # Pull the mt0-base model from the bigscience organization - cortex-nightly pull bigscience/mt0-base - ``` - - - ```sh - # Stable - ## Pull the XLM-Roberta-Large-Vit-B-16Plus model from the immich-app organization - cortex.exe pull immich-app/XLM-Roberta-Large-Vit-B-16Plus - - # Pull the mt0-base model from the bigscience organization - cortex.exe pull bigscience/mt0-base - - # Beta - ## Pull the XLM-Roberta-Large-Vit-B-16Plus model from the immich-app organization - cortex-beta.exe pull immich-app/XLM-Roberta-Large-Vit-B-16Plus - - # Pull the mt0-base model from the bigscience organization - cortex-beta.exe pull bigscience/mt0-base - - # Nightly - ## Pull the XLM-Roberta-Large-Vit-B-16Plus model from the immich-app organization - cortex-nightly.exe pull immich-app/XLM-Roberta-Large-Vit-B-16Plus - - # Pull the mt0-base model from the bigscience organization - cortex-nightly.exe pull bigscience/mt0-base - ``` - - - -## TensorRT-LLM -We are still working to support all available `TensorRT-LLM` models on HuggingFace. For now, Cortex.cpp only supports built-in `TensorRT-LLM` models, which can be downloaded from the [Cortex Model Repos](/docs/hub/cortex-hub). diff --git a/docs/sidebars.ts b/docs/sidebars.ts index bf520499c..bdad562c2 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -88,16 +88,12 @@ const sidebars: SidebarsConfig = { { type: "category", label: "Pulling Models", - link: { type: "doc", id: "hub/index" }, + link: { type: "doc", id: "capabilities/models/sources/index" }, collapsed: true, items: [ - { type: "doc", id: "hub/hugging-face", label: "Adding a HF Organization" }, - // { type: "doc", id: "hub/cortex-hub", label: "Cortex Model Repos" }, - // { - // type: "doc", - // id: "hub/nvidia-ngc", - // label: "Nvidia Catalog (Coming Soon)", - // }, + // { type: "doc", id: "capabilities/models/sources/hugging-face", label: "Hugging Face" }, + // { type: "doc", id: "capabilities/models/sources/cortex-hub", label: "Cortex Model Repos" }, + // { type: "doc", id: "capabilities/models/sources/nvidia-ngc", label: "Nvidia Catalog (Coming Soon)"}, ], }, { @@ -169,16 +165,15 @@ const sidebars: SidebarsConfig = { }, { type: "doc", id: "cli/cortex", label: "cortex" }, { type: "doc", id: "cli/start", label: "cortex start" }, - { type: "doc", id: "cli/chat", label: "cortex chat" }, + { type: "doc", id: "cli/run", label: "cortex run" }, // { type: "doc", id: "cli/embeddings", label: "cortex embeddings" }, // { type: "doc", id: "cli/presets", label: "cortex presets" }, { type: "doc", id: "cli/pull", label: "cortex pull" }, - { type: "doc", id: "cli/run", label: "cortex run" }, { type: "doc", id: "cli/models/index", label: "cortex models" }, { type: "doc", id: "cli/engines/index", label: "cortex engines" }, - { type: "doc", id: "cli/stop", label: "cortex stop" }, { type: "doc", id: "cli/ps", label: "cortex ps" }, { type: "doc", id: "cli/update", label: "cortex update" }, + { type: "doc", id: "cli/stop", label: "cortex stop" }, ] };