From 06aefc512da3c1ff4beb7a85401bd1f9030d0b65 Mon Sep 17 00:00:00 2001
From: irfanpena <irfan@penateam.com>
Date: Wed, 26 Jun 2024 06:56:36 +0700
Subject: [PATCH 1/4] Add the tensorrt-llm yaml example

---
 docs/installation/linux.mdx   |  2 +-
 docs/installation/mac.mdx     |  2 +-
 docs/installation/windows.mdx |  4 +--
 docs/model-operations.mdx     | 63 +++++++++++++++++++++++++++++++++++
 4 files changed, 67 insertions(+), 4 deletions(-)
diff --git a/docs/installation/linux.mdx b/docs/installation/linux.mdx
index 5ea4ca9..dcd65bb 100644
--- a/docs/installation/linux.mdx
+++ b/docs/installation/linux.mdx
@@ -110,7 +110,7 @@ Install NPM on your machine before proceeding with this step.
 
 ```sh
 # Install globally on your system
-npm i -g @janhq/cortex
+npm i -g @janhq/cortexso
 ```
 :::info
 Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance.
diff --git a/docs/installation/mac.mdx b/docs/installation/mac.mdx
index 4cadbde..e20840b 100644
--- a/docs/installation/mac.mdx
+++ b/docs/installation/mac.mdx
@@ -66,7 +66,7 @@ Install NPM on your machine before proceeding with this step.
 
 ```sh
 # Install globally on your system
-npm i -g @janhq/cortex
+npm i -g @janhq/cortexso
 ```
 :::info
 Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance.
diff --git a/docs/installation/windows.mdx b/docs/installation/windows.mdx
index 3ab1d1b..2e853b0 100644
--- a/docs/installation/windows.mdx
+++ b/docs/installation/windows.mdx
@@ -88,7 +88,7 @@ Install NPM on your machine before proceeding with this step.
 
 ```sh
 # Install globally on your system
-npm i -g @janhq/cortex
+npm i -g @janhq/cortexso
 ```
 :::info
 Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance.
@@ -122,7 +122,7 @@ Install NPM on your machine before proceeding with this step.
 
 ```sh
 # Install globally on your system
-npm i -g @janhq/cortex
+npm i -g @janhq/cortexso
 ```
 :::info
 Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance.
diff --git a/docs/model-operations.mdx b/docs/model-operations.mdx
index 6265ecc..60afa5a 100644
--- a/docs/model-operations.mdx
+++ b/docs/model-operations.mdx
@@ -194,6 +194,69 @@ If a `model.yaml` is not available, Cortex autogenerates it from model metadata.
 
   </TabItem>
   <TabItem value="TensorRT-LLM" label="TensorRT-LLM">
+  **Example `model.yaml` for `llama3 8B-tensorrt-llm-windows-ada`**
+  ```yaml
+    name: Llama 3
+    model: llama3:8B
+    version: 1
+
+    # files:
+
+    # Results Preferences
+    top_p: 0.95
+    temperature: 0.7
+    frequency_penalty: 0
+    presence_penalty: 0
+    max_tokens: 8192 # Infer from base config.json -> max_position_embeddings
+    stream: true # true | false
+
+    # Engine / Model Settings
+    engine: cortex.tensorrtllm
+    os: windows # from CI env var
+    gpu_arch: ada # from CI env var
+    quantization_method: awq # from CI env var
+    precision: int4 # from CI env var
+    tp: 1 # from CI env var
+    trtllm_version: 0.9.0 # From CI env var
+    ctx_len: 8192 # Infer from base config.json -> max_position_embeddings
+    text_model: false # Fixed value - https://github.com/janhq/jan/blob/dev/extensions/tensorrt-llm-extension/resources/models.json#L41C7-L41C26
+    prompt_template: "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
+    # Prompt template: Can only be retrieved from instruct model
+    # - https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/main/tokenizer_config.json#L2053
+    # - Requires jinja format parser
+    ```
+    **Example `model.yaml` for `llama3 8B-tensorrt-llm-linux-ada`**
+    
+    ```yaml
+    name: Llama 3
+    model: llama3:8B
+    version: 1
+
+    # files:
+
+    # Results Preferences
+    top_p: 0.95
+    temperature: 0.7
+    frequency_penalty: 0
+    presence_penalty: 0
+    max_tokens: 8192 # Infer from base config.json -> max_position_embeddings
+    stream: true # true | false
+
+    # Engine / Model Settings
+    engine: cortex.tensorrtllm
+    os: linux # from CI env var
+    gpu_arch: ada # from CI env var
+    quantization_method: awq # from CI env var
+    precision: int4 # from CI env var
+    tp: 1 # from CI env var
+    trtllm_version: 0.9.0 # From CI env var
+    ctx_len: 8192 # Infer from base config.json -> max_position_embeddings
+    text_model: false # Fixed value - https://github.com/janhq/jan/blob/dev/extensions/tensorrt-llm-extension/resources/models.json#L41C7-L41C26
+    prompt_template: "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
+    # Prompt template: Can only be retrieved from instruct model
+    # - https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/main/tokenizer_config.json#L2053
+    # - Requires jinja format parser
+    ```
   </TabItem>
 </Tabs>
 

From 8a1fd80b0809e256a2650278784b45ea14cbd719 Mon Sep 17 00:00:00 2001
From: irfanpena <irfan@penateam.com>
Date: Wed, 26 Jun 2024 07:00:11 +0700
Subject: [PATCH 2/4] Update the format

---
 docs/quickstart.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/quickstart.md b/docs/quickstart.md
index abf4657..d9a89e6 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -58,8 +58,9 @@ cortex pull bartowski/Hermes-2-Theta-Llama-3-70B-GGUF
 # Download a preconfigured model from https://huggingface.co/cortexhub
 cortex pull llama3
 ```
-
+:::info
 Read more about [model operations](./model-operations).
+:::
 
 ## Get Help
 
@@ -69,9 +70,11 @@ cortex
 cortex CMD -h
 ```
 
+:::info
 Cortex is still in early development, so if you have any questions, please reach out to us: 
 - [GitHub](https://github.com/janhq/cortex)
 - [Discord](https://discord.gg/YFKKeuVu)
+:::
 
 ## Next Steps
 

From 71e0cca6a079d2c45b15ec36cacd8d5e520b5042 Mon Sep 17 00:00:00 2001
From: irfanpena <irfan@penateam.com>
Date: Wed, 26 Jun 2024 07:17:43 +0700
Subject: [PATCH 3/4] Update the hardware

---
 docs/cortex-onnx.mdx         |  4 ++++
 docs/cortex-tensorrt-llm.mdx |  4 ++++
 docs/hardware.md             | 19 +++++++++++++++++++
 docs/telemetry.mdx           |  3 +++
 4 files changed, 30 insertions(+)

diff --git a/docs/cortex-onnx.mdx b/docs/cortex-onnx.mdx
index f76840c..05f51a1 100644
--- a/docs/cortex-onnx.mdx
+++ b/docs/cortex-onnx.mdx
@@ -4,6 +4,10 @@ description: Onnx Architecture
 slug: "cortex-onnx"
 ---
 
+:::warning
+🚧 Cortex is under construction.
+:::
+
 ## Introduction
 Cortex.onnx is a C++ inference library for Windows that relies on [onnxruntime-genai](https://github.com/microsoft/onnxruntime-genai), utilizing DirectML for hardware acceleration. [DirectML](https://github.com/microsoft/DirectML) is a high-performance DirectX 12 library for machine learning, providing GPU acceleration across various hardware and drivers, including AMD, Intel, NVIDIA, and Qualcomm GPUs. It integrates and sometimes upstreams [onnxruntime-genai](https://github.com/microsoft/onnxruntime-genai) for inference tasks.
 
diff --git a/docs/cortex-tensorrt-llm.mdx b/docs/cortex-tensorrt-llm.mdx
index b030766..36f3453 100644
--- a/docs/cortex-tensorrt-llm.mdx
+++ b/docs/cortex-tensorrt-llm.mdx
@@ -4,6 +4,10 @@ description: NVIDIA TensorRT-LLM Architecture
 slug: "cortex-tensorrt-llm"
 ---
 
+:::warning
+🚧 Cortex is under construction.
+:::
+
 ## Introduction
 
 [Cortex.tensorrt-llm](https://github.com/janhq/cortex.tensorrt-llm) is a C++ inference library for NVIDIA GPUs. It submodules NVIDIA’s [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) for GPU accelerated inference.
diff --git a/docs/hardware.md b/docs/hardware.md
index af9ced5..dcb8ff9 100644
--- a/docs/hardware.md
+++ b/docs/hardware.md
@@ -4,6 +4,10 @@ description: Get started quickly with Jan, a ChatGPT-alternative that runs on yo
 slug: "/hardware"
 ---
 
+:::warning
+🚧 Cortex is under construction.
+:::
+
 To run LLMs on device, Cortex has the following hardware requirements:
 
 :::info
@@ -16,6 +20,21 @@ These are the general hardware requirements for running Cortex on your system. P
 - Windows 10 or higher.
 - Ubuntu 12.04 and later.
 
+## CPU
+- Mac:
+    - Support Intel and Mac Apple Silicon CPUs.
+- Windows & Linux:
+    - Haswell processors (Q2 2013) and newer.
+    - Tiger Lake (Q3 2020) and newer for Celeron and Pentium processors.
+:::info
+Cortex supports multiple CPU instructions: AVX, AVX2, and AVX512.
+:::
+## GPU
+- At least 6GB VRAM is recommended when using NVIDIA, AMD, or Intel Arc GPUs.
+
+:::info
+Cortex Supports NVIDIA GPU acceleration (Support for other GPUs will come soon!)
+:::
 ## RAM (CPU Mode)
 
 - 8GB for running up to 3B models.
diff --git a/docs/telemetry.mdx b/docs/telemetry.mdx
index 39059ed..3e0090a 100644
--- a/docs/telemetry.mdx
+++ b/docs/telemetry.mdx
@@ -8,6 +8,9 @@ slug: "telemetry"
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
+:::warning
+🚧 Cortex is under construction.
+:::
 
 ## Introduction
 We collect anonymous usage data to enhance our product development.

From 221771fedc89fb2464460644d67d7d013230d8a3 Mon Sep 17 00:00:00 2001
From: irfanpena <irfan@penateam.com>
Date: Wed, 26 Jun 2024 10:11:23 +0700
Subject: [PATCH 4/4] cortex -> cortexso

---
 docs/installation/linux.mdx   | 4 ++--
 docs/installation/mac.mdx     | 4 ++--
 docs/installation/windows.mdx | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/installation/linux.mdx b/docs/installation/linux.mdx
index dcd65bb..073f913 100644
--- a/docs/installation/linux.mdx
+++ b/docs/installation/linux.mdx
@@ -110,7 +110,7 @@ Install NPM on your machine before proceeding with this step.
 
 ```sh
 # Install globally on your system
-npm i -g @janhq/cortexso
+npm i -g cortexso
 ```
 :::info
 Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance.
@@ -160,5 +160,5 @@ cortex -h
 Run the following command to uninstall Cortex globally on your machine:
 ```sh
 # Uninstall globally on your system
-npm uninstall -g @janhq/cortex
+npm uninstall -g cortexso
 ```
\ No newline at end of file
diff --git a/docs/installation/mac.mdx b/docs/installation/mac.mdx
index e20840b..9ceafa6 100644
--- a/docs/installation/mac.mdx
+++ b/docs/installation/mac.mdx
@@ -66,7 +66,7 @@ Install NPM on your machine before proceeding with this step.
 
 ```sh
 # Install globally on your system
-npm i -g @janhq/cortexso
+npm i -g cortexso
 ```
 :::info
 Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance.
@@ -115,5 +115,5 @@ cortex -h
 Run the following command to uninstall Cortex globally on your machine:
 ```sh
 # Uninstall globally using NPM
-npm uninstall -g @janhq/cortex
+npm uninstall -g cortexso
 ```
\ No newline at end of file
diff --git a/docs/installation/windows.mdx b/docs/installation/windows.mdx
index 2e853b0..b0b3aca 100644
--- a/docs/installation/windows.mdx
+++ b/docs/installation/windows.mdx
@@ -88,7 +88,7 @@ Install NPM on your machine before proceeding with this step.
 
 ```sh
 # Install globally on your system
-npm i -g @janhq/cortexso
+npm i -g cortexso
 ```
 :::info
 Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance.
@@ -122,7 +122,7 @@ Install NPM on your machine before proceeding with this step.
 
 ```sh
 # Install globally on your system
-npm i -g @janhq/cortexso
+npm i -g cortexso
 ```
 :::info
 Cortex automatically detects your CPU and GPU, downloading the appropriate CPU instruction sets and required dependencies to optimize GPU performance.
@@ -171,5 +171,5 @@ cortex -h
 Run the following command to uninstall Cortex globally on your machine:
 ```sh
 # Uninstall globally on your system
-npm uninstall -g @janhq/cortex
+npm uninstall -g cortexso
 ```
\ No newline at end of file