From 06aefc512da3c1ff4beb7a85401bd1f9030d0b65 Mon Sep 17 00:00:00 2001 From: irfanpena Date: Wed, 26 Jun 2024 06:56:36 +0700 Subject: [PATCH 1/4] Add the tensorrt-llm yaml example --- docs/installation/linux.mdx | 2 +- docs/installation/mac.mdx | 2 +- docs/installation/windows.mdx | 4 +-- docs/model-operations.mdx | 63 +++++++++++++++++++++++++++++++++++ 4 files changed, 67 insertions(+), 4 deletions(-) diff --git a/docs/installation/linux.mdx b/docs/installation/linux.mdx index 5ea4ca9..dcd65bb 100644 --- a/docs/installation/linux.mdx +++ b/docs/installation/linux.mdx @@ -110,7 +110,7 @@ Install NPM on your machine before proceeding with this step. ```sh # Install globally on your system -npm i -g @janhq/cortex +npm i -g @janhq/cortexso ``` :::info Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance. diff --git a/docs/installation/mac.mdx b/docs/installation/mac.mdx index 4cadbde..e20840b 100644 --- a/docs/installation/mac.mdx +++ b/docs/installation/mac.mdx @@ -66,7 +66,7 @@ Install NPM on your machine before proceeding with this step. ```sh # Install globally on your system -npm i -g @janhq/cortex +npm i -g @janhq/cortexso ``` :::info Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance. diff --git a/docs/installation/windows.mdx b/docs/installation/windows.mdx index 3ab1d1b..2e853b0 100644 --- a/docs/installation/windows.mdx +++ b/docs/installation/windows.mdx @@ -88,7 +88,7 @@ Install NPM on your machine before proceeding with this step. ```sh # Install globally on your system -npm i -g @janhq/cortex +npm i -g @janhq/cortexso ``` :::info Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance. @@ -122,7 +122,7 @@ Install NPM on your machine before proceeding with this step. ```sh # Install globally on your system -npm i -g @janhq/cortex +npm i -g @janhq/cortexso ``` :::info Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance. diff --git a/docs/model-operations.mdx b/docs/model-operations.mdx index 6265ecc..60afa5a 100644 --- a/docs/model-operations.mdx +++ b/docs/model-operations.mdx @@ -194,6 +194,69 @@ If a `model.yaml` is not available, Cortex autogenerates it from model metadata. + **Example `model.yaml` for `llama3 8B-tensorrt-llm-windows-ada`** + ```yaml + name: Llama 3 + model: llama3:8B + version: 1 + + # files: + + # Results Preferences + top_p: 0.95 + temperature: 0.7 + frequency_penalty: 0 + presence_penalty: 0 + max_tokens: 8192 # Infer from base config.json -> max_position_embeddings + stream: true # true | false + + # Engine / Model Settings + engine: cortex.tensorrtllm + os: windows # from CI env var + gpu_arch: ada # from CI env var + quantization_method: awq # from CI env var + precision: int4 # from CI env var + tp: 1 # from CI env var + trtllm_version: 0.9.0 # From CI env var + ctx_len: 8192 # Infer from base config.json -> max_position_embeddings + text_model: false # Fixed value - https://github.com/janhq/jan/blob/dev/extensions/tensorrt-llm-extension/resources/models.json#L41C7-L41C26 + prompt_template: "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" + # Prompt template: Can only be retrieved from instruct model + # - https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/main/tokenizer_config.json#L2053 + # - Requires jinja format parser + ``` + **Example `model.yaml` for `llama3 8B-tensorrt-llm-linux-ada`** + + ```yaml + name: Llama 3 + model: llama3:8B + version: 1 + + # files: + + # Results Preferences + top_p: 0.95 + temperature: 0.7 + frequency_penalty: 0 + presence_penalty: 0 + max_tokens: 8192 # Infer from base config.json -> max_position_embeddings + stream: true # true | false + + # Engine / Model Settings + engine: cortex.tensorrtllm + os: linux # from CI env var + gpu_arch: ada # from CI env var + quantization_method: awq # from CI env var + precision: int4 # from CI env var + tp: 1 # from CI env var + trtllm_version: 0.9.0 # From CI env var + ctx_len: 8192 # Infer from base config.json -> max_position_embeddings + text_model: false # Fixed value - https://github.com/janhq/jan/blob/dev/extensions/tensorrt-llm-extension/resources/models.json#L41C7-L41C26 + prompt_template: "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" + # Prompt template: Can only be retrieved from instruct model + # - https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/main/tokenizer_config.json#L2053 + # - Requires jinja format parser + ``` From 8a1fd80b0809e256a2650278784b45ea14cbd719 Mon Sep 17 00:00:00 2001 From: irfanpena Date: Wed, 26 Jun 2024 07:00:11 +0700 Subject: [PATCH 2/4] Update the format --- docs/quickstart.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/quickstart.md b/docs/quickstart.md index abf4657..d9a89e6 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -58,8 +58,9 @@ cortex pull bartowski/Hermes-2-Theta-Llama-3-70B-GGUF # Download a preconfigured model from https://huggingface.co/cortexhub cortex pull llama3 ``` - +:::info Read more about [model operations](./model-operations). +::: ## Get Help @@ -69,9 +70,11 @@ cortex cortex CMD -h ``` +:::info Cortex is still in early development, so if you have any questions, please reach out to us: - [GitHub](https://github.com/janhq/cortex) - [Discord](https://discord.gg/YFKKeuVu) +::: ## Next Steps From 71e0cca6a079d2c45b15ec36cacd8d5e520b5042 Mon Sep 17 00:00:00 2001 From: irfanpena Date: Wed, 26 Jun 2024 07:17:43 +0700 Subject: [PATCH 3/4] Update the hardware --- docs/cortex-onnx.mdx | 4 ++++ docs/cortex-tensorrt-llm.mdx | 4 ++++ docs/hardware.md | 19 +++++++++++++++++++ docs/telemetry.mdx | 3 +++ 4 files changed, 30 insertions(+) diff --git a/docs/cortex-onnx.mdx b/docs/cortex-onnx.mdx index f76840c..05f51a1 100644 --- a/docs/cortex-onnx.mdx +++ b/docs/cortex-onnx.mdx @@ -4,6 +4,10 @@ description: Onnx Architecture slug: "cortex-onnx" --- +:::warning +🚧 Cortex is under construction. +::: + ## Introduction Cortex.onnx is a C++ inference library for Windows that relies on [onnxruntime-genai](https://github.com/microsoft/onnxruntime-genai), utilizing DirectML for hardware acceleration. [DirectML](https://github.com/microsoft/DirectML) is a high-performance DirectX 12 library for machine learning, providing GPU acceleration across various hardware and drivers, including AMD, Intel, NVIDIA, and Qualcomm GPUs. It integrates and sometimes upstreams [onnxruntime-genai](https://github.com/microsoft/onnxruntime-genai) for inference tasks. diff --git a/docs/cortex-tensorrt-llm.mdx b/docs/cortex-tensorrt-llm.mdx index b030766..36f3453 100644 --- a/docs/cortex-tensorrt-llm.mdx +++ b/docs/cortex-tensorrt-llm.mdx @@ -4,6 +4,10 @@ description: NVIDIA TensorRT-LLM Architecture slug: "cortex-tensorrt-llm" --- +:::warning +🚧 Cortex is under construction. +::: + ## Introduction [Cortex.tensorrt-llm](https://github.com/janhq/cortex.tensorrt-llm) is a C++ inference library for NVIDIA GPUs. It submodules NVIDIA’s [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) for GPU accelerated inference. diff --git a/docs/hardware.md b/docs/hardware.md index af9ced5..dcb8ff9 100644 --- a/docs/hardware.md +++ b/docs/hardware.md @@ -4,6 +4,10 @@ description: Get started quickly with Jan, a ChatGPT-alternative that runs on yo slug: "/hardware" --- +:::warning +🚧 Cortex is under construction. +::: + To run LLMs on device, Cortex has the following hardware requirements: :::info @@ -16,6 +20,21 @@ These are the general hardware requirements for running Cortex on your system. P - Windows 10 or higher. - Ubuntu 12.04 and later. +## CPU +- Mac: + - Support Intel and Mac Apple Silicon CPUs. +- Windows & Linux: + - Haswell processors (Q2 2013) and newer. + - Tiger Lake (Q3 2020) and newer for Celeron and Pentium processors. +:::info +Cortex supports multiple CPU instructions: AVX, AVX2, and AVX512. +::: +## GPU +- At least 6GB VRAM is recommended when using NVIDIA, AMD, or Intel Arc GPUs. + +:::info +Cortex Supports NVIDIA GPU acceleration (Support for other GPUs will come soon!) +::: ## RAM (CPU Mode) - 8GB for running up to 3B models. diff --git a/docs/telemetry.mdx b/docs/telemetry.mdx index 39059ed..3e0090a 100644 --- a/docs/telemetry.mdx +++ b/docs/telemetry.mdx @@ -8,6 +8,9 @@ slug: "telemetry" import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; +:::warning +🚧 Cortex is under construction. +::: ## Introduction We collect anonymous usage data to enhance our product development. From 221771fedc89fb2464460644d67d7d013230d8a3 Mon Sep 17 00:00:00 2001 From: irfanpena Date: Wed, 26 Jun 2024 10:11:23 +0700 Subject: [PATCH 4/4] cortex -> cortexso --- docs/installation/linux.mdx | 4 ++-- docs/installation/mac.mdx | 4 ++-- docs/installation/windows.mdx | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/installation/linux.mdx b/docs/installation/linux.mdx index dcd65bb..073f913 100644 --- a/docs/installation/linux.mdx +++ b/docs/installation/linux.mdx @@ -110,7 +110,7 @@ Install NPM on your machine before proceeding with this step. ```sh # Install globally on your system -npm i -g @janhq/cortexso +npm i -g cortexso ``` :::info Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance. @@ -160,5 +160,5 @@ cortex -h Run the following command to uninstall Cortex globally on your machine: ```sh # Uninstall globally on your system -npm uninstall -g @janhq/cortex +npm uninstall -g cortexso ``` \ No newline at end of file diff --git a/docs/installation/mac.mdx b/docs/installation/mac.mdx index e20840b..9ceafa6 100644 --- a/docs/installation/mac.mdx +++ b/docs/installation/mac.mdx @@ -66,7 +66,7 @@ Install NPM on your machine before proceeding with this step. ```sh # Install globally on your system -npm i -g @janhq/cortexso +npm i -g cortexso ``` :::info Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance. @@ -115,5 +115,5 @@ cortex -h Run the following command to uninstall Cortex globally on your machine: ```sh # Uninstall globally using NPM -npm uninstall -g @janhq/cortex +npm uninstall -g cortexso ``` \ No newline at end of file diff --git a/docs/installation/windows.mdx b/docs/installation/windows.mdx index 2e853b0..b0b3aca 100644 --- a/docs/installation/windows.mdx +++ b/docs/installation/windows.mdx @@ -88,7 +88,7 @@ Install NPM on your machine before proceeding with this step. ```sh # Install globally on your system -npm i -g @janhq/cortexso +npm i -g cortexso ``` :::info Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance. @@ -122,7 +122,7 @@ Install NPM on your machine before proceeding with this step. ```sh # Install globally on your system -npm i -g @janhq/cortexso +npm i -g cortexso ``` :::info Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance. @@ -171,5 +171,5 @@ cortex -h Run the following command to uninstall Cortex globally on your machine: ```sh # Uninstall globally on your system -npm uninstall -g @janhq/cortex +npm uninstall -g cortexso ``` \ No newline at end of file