diff --git a/docs/docs/api.md b/docs/docs/api-reference.md similarity index 100% rename from docs/docs/api.md rename to docs/docs/api-reference.md diff --git a/docs/docs/examples/chatbox.md b/docs/docs/examples/chatbox.md new file mode 100644 index 000000000..963cfe006 --- /dev/null +++ b/docs/docs/examples/chatbox.md @@ -0,0 +1,11 @@ +--- +title: Nitro with Chatbox +--- + +:::info COMING SOON +::: + + \ No newline at end of file diff --git a/docs/docs/features/chat.md b/docs/docs/features/chat.md index b880ccbc3..c0e8c75f3 100644 --- a/docs/docs/features/chat.md +++ b/docs/docs/features/chat.md @@ -11,7 +11,7 @@ To send a single query to your chosen LLM, follow these steps:
```bash title="Nitro" -curl http://localhost:3928/inferences/llamacpp/chat_completion \ +curl http://localhost:3928/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "model": "", @@ -53,7 +53,7 @@ For ongoing conversations or multiple queries, the dialog request feature is ide
```bash title="Nitro" -curl http://localhost:3928/inferences/llamacpp/chat_completion \ +curl http://localhost:3928/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "messages": [ diff --git a/docs/docs/features/embed.md b/docs/docs/features/embed.md index a27978e00..8462e87ee 100644 --- a/docs/docs/features/embed.md +++ b/docs/docs/features/embed.md @@ -17,7 +17,7 @@ Here’s an example showing how to get the embedding result from the model:
```bash title="Nitro" {1} -curl http://localhost:3928/inferences/llamacpp/embedding \ +curl http://localhost:3928/v1/embeddings \ -H 'Content-Type: application/json' \ -d '{ "input": "Hello", diff --git a/docs/docs/features/prompt.md b/docs/docs/features/prompt.md index 53ea4be7b..bf2a07d3c 100644 --- a/docs/docs/features/prompt.md +++ b/docs/docs/features/prompt.md @@ -41,7 +41,7 @@ curl http://localhost:3928/inferences/llamacpp/loadmodel \ ### Testing the Assistant ```bash title="Pirate Assistant" -curl http://localhost:3928/inferences/llamacpp/chat_completion \ +curl http://localhost:3928/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "messages": [ diff --git a/docs/docs/new/about.md b/docs/docs/new/about.md index 54a27ce85..634bf9abb 100644 --- a/docs/docs/new/about.md +++ b/docs/docs/new/about.md @@ -24,7 +24,7 @@ For instance, compare the Nitro inference call:
```bash title="Nitro chat completion" -curl http://localhost:3928/inferences/llamacpp/chat_completion \ +curl http://localhost:3928/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "model": "gpt-3.5-turbo", diff --git a/docs/docs/new/install.md b/docs/docs/new/install.md index 41262a595..4fd4ecff1 100644 --- a/docs/docs/new/install.md +++ b/docs/docs/new/install.md @@ -141,7 +141,7 @@ Simple testcase with nitro, after starting the server, you can run the following "embedding": false }' # Send a prompt request to nitro - curl -s --location 'http://localhost:3928/inferences/llamacpp/chat_completion' \ + curl -s --location 'http://localhost:3928/v1/chat/completions' \ --header 'Content-Type: application/json' \ --data '{ "messages": [ @@ -172,7 +172,7 @@ Simple testcase with nitro, after starting the server, you can run the following # Send a prompt request to nitro set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":100,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.7}" - curl.exe -s -w "%%{http_code}" --location "http://localhost:3928/inferences/llamacpp/chat_completion" ^ + curl.exe -s -w "%%{http_code}" --location "http://localhost:3928/v1/chat/completions" ^ --header "Content-Type: application/json" ^ --data "%curl_data2%" ``` diff --git a/docs/docs/new/quickstart.md b/docs/docs/new/quickstart.md index 542e8e36e..b1bee8cac 100644 --- a/docs/docs/new/quickstart.md +++ b/docs/docs/new/quickstart.md @@ -26,7 +26,7 @@ Next, we need to download a model. For this example, we'll use the [Llama2 7B ch - Create a `/model` and navigate into it: ```bash mkdir model && cd model -wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf?download=true +wget -O llama-2-7b-model.gguf https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf?download=true ``` ## Step 3: Run Nitro server @@ -43,14 +43,28 @@ To check if the Nitro server is running: curl http://localhost:3928/healthz ``` -## Step 4: Making an Inference +## Step 4: Load model + +To load the model to Nitro server, you need to run: + +```bash title="Load model" +curl http://localhost:3928/inferences/llamacpp/loadmodel \ + -H 'Content-Type: application/json' \ + -d '{ + "llama_model_path": "/model/llama-2-7b-model.gguf", + "ctx_len": 512, + "ngl": 100, + }' +``` + +## Step 5: Making an Inference Finally, let's make an actual inference call using Nitro. - In your terminal, execute: ```bash title="Nitro Inference" -curl http://localhost:3928/inferences/llamacpp/chat_completion \ +curl http://localhost:3928/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "messages": [ diff --git a/docs/docusaurus.config.js b/docs/docusaurus.config.js index 6f9f2d25a..f59378fec 100644 --- a/docs/docusaurus.config.js +++ b/docs/docusaurus.config.js @@ -103,7 +103,7 @@ const config = { { spec: "openapi/NitroAPI.yaml", // can be local file, url, or parsed json object // spec: "openapi/OpenAIAPI.yaml", - route: "/api/", + route: "/api-reference/", }, ], theme: { diff --git a/docs/openapi/NitroAPI.yaml b/docs/openapi/NitroAPI.yaml index f86863df1..bc952bfc5 100644 --- a/docs/openapi/NitroAPI.yaml +++ b/docs/openapi/NitroAPI.yaml @@ -134,7 +134,7 @@ paths: schema: $ref: "#/components/schemas/StatusResponse" - /inferences/llamacpp/embedding: + /v1/embeddings: post: operationId: createEmbedding tags: @@ -162,7 +162,7 @@ paths: schema: $ref: "#/components/schemas/CreateEmbeddingResponse" - /inferences/llamacpp/chat_completion: + /v1/chat/completions: post: operationId: createChatCompletion tags: diff --git a/docs/sidebars.js b/docs/sidebars.js index 07de6696d..af8dbd21e 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -49,7 +49,7 @@ const sidebars = { label: "Guides", collapsible: false, collapsed: false, - items: ["examples/llm"], + items: ["examples/chatbox"], }, // { // type: "category", @@ -61,7 +61,7 @@ const sidebars = { ], apiSidebar: [ - "api" + "api-reference" ], // communitySidebar: [ diff --git a/docs/src/styles/base.scss b/docs/src/styles/base.scss index fb6388922..64273262f 100644 --- a/docs/src/styles/base.scss +++ b/docs/src/styles/base.scss @@ -1,7 +1,4 @@ @layer base { - html { - @apply scroll-smooth; - } html[data-theme="light"] { --ifm-background-color: white; --ifm-color-primary: #2563eb; /* New Primary Blue */