inference-gateway · edenreich · Mar 31, 2025 · Mar 31, 2025 · Mar 31, 2025 · Mar 31, 2025
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -6,6 +6,13 @@ ENV ZSH_CUSTOM=/home/node/.oh-my-zsh/custom \
 RUN apt-get update && \
     # Install Task
     curl -s https://taskfile.dev/install.sh | sh -s -- -b /usr/local/bin ${TASK_VERSION} && \
+    # Install GitHub CLI
+    curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg && \
+    chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg && \
+    echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null && \
+    apt-get update && \
+    apt-get install -y gh && \
+    # Cleanup
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 

diff --git a/README.md b/README.md
@@ -1,16 +1,18 @@
-# Inference Gateway Typescript SDK
+# Inference Gateway TypeScript SDK
 
-An SDK written in Typescript for the [Inference Gateway](https://github.com/edenreich/inference-gateway).
+An SDK written in TypeScript for the [Inference Gateway](https://github.com/edenreich/inference-gateway).
 
-- [Inference Gateway Typescript SDK](#inference-gateway-typescript-sdk)
+- [Inference Gateway TypeScript SDK](#inference-gateway-typescript-sdk)
   - [Installation](#installation)
   - [Usage](#usage)
     - [Creating a Client](#creating-a-client)
-    - [Listing All Models](#listing-all-models)
-    - [List Models by Provider](#list-models-by-provider)
-    - [Generating Content](#generating-content)
-    - [Streaming Content](#streaming-content)
+    - [Listing Models](#listing-models)
+    - [Creating Chat Completions](#creating-chat-completions)
+    - [Streaming Chat Completions](#streaming-chat-completions)
+    - [Tool Calls](#tool-calls)
+    - [Proxying Requests](#proxying-requests)
     - [Health Check](#health-check)
+    - [Creating a Client with Custom Options](#creating-a-client-with-custom-options)
   - [Contributing](#contributing)
   - [License](#license)
 
@@ -22,152 +24,215 @@ Run `npm i @inference-gateway/sdk`.
 
 ### Creating a Client
 
+```typescript
+import { InferenceGatewayClient } from '@inference-gateway/sdk';
+
+// Create a client with default options
+const client = new InferenceGatewayClient({
+  baseURL: 'http://localhost:8080/v1',
+  apiKey: 'your-api-key', // Optional
+});
+```
+
+### Listing Models
+
+To list all available models:
+
+```typescript
+import { InferenceGatewayClient, Provider } from '@inference-gateway/sdk';
+
+const client = new InferenceGatewayClient({
+  baseURL: 'http://localhost:8080/v1',
+});
+
+try {
+  // List all models
+  const models = await client.listModels();
+  console.log('All models:', models);
+
+  // List models from a specific provider
+  const openaiModels = await client.listModels(Provider.OpenAI);
+  console.log('OpenAI models:', openaiModels);
+} catch (error) {
+  console.error('Error:', error);
+}
+```
+
+### Creating Chat Completions
+
+To generate content using a model:
+
 ```typescript
 import {
   InferenceGatewayClient,
-  Message,
+  MessageRole,
   Provider,
 } from '@inference-gateway/sdk';
 
-async function main() {
-  const client = new InferenceGatewayClient('http://localhost:8080');
-
-  try {
-    // List available models
-    const models = await client.listModels();
-    models.forEach((providerModels) => {
-      console.log(`Provider: ${providerModels.provider}`);
-      providerModels.models.forEach((model) => {
-        console.log(`Model: ${model.name}`);
-      });
-    });
-
-    // Generate content
-    const response = await client.generateContent({
-      provider: Provider.Ollama,
-      model: 'llama2',
+const client = new InferenceGatewayClient({
+  baseURL: 'http://localhost:8080/v1',
+});
+
+try {
+  const response = await client.createChatCompletion(
+    {
+      model: 'gpt-4o',
       messages: [
         {
           role: MessageRole.System,
-          content: 'You are a helpful llama',
+          content: 'You are a helpful assistant',
         },
         {
           role: MessageRole.User,
           content: 'Tell me a joke',
         },
       ],
-    });
-
-    console.log('Response:', response);
-  } catch (error) {
-    console.error('Error:', error);
-  }
-}
-
-main();
-```
-
-### Listing All Models
-
-To list all available models from all providers, use the `listModels` method:
+    },
+    Provider.OpenAI
+  ); // Provider is optional
 
-```typescript
-try {
-  const models = await client.listModels();
-  models.forEach((providerModels) => {
-    console.log(`Provider: ${providerModels.provider}`);
-    providerModels.models.forEach((model) => {
-      console.log(`Model: ${model.name}`);
-    });
-  });
+  console.log('Response:', response.choices[0].message.content);
 } catch (error) {
   console.error('Error:', error);
 }
 ```
 
-### List Models by Provider
+### Streaming Chat Completions
 
-To list all available models from a specific provider, use the `listModelsByProvider` method:
+To stream content from a model:
 
 ```typescript
+import {
+  InferenceGatewayClient,
+  MessageRole,
+  Provider,
+} from '@inference-gateway/sdk';
+
+const client = new InferenceGatewayClient({
+  baseURL: 'http://localhost:8080/v1',
+});
+
 try {
-  const providerModels = await client.listModelsByProvider(Provider.OpenAI);
-  console.log(`Provider: ${providerModels.provider}`);
-  providerModels.models.forEach((model) => {
-    console.log(`Model: ${model.name}`);
-  });
+  await client.streamChatCompletion(
+    {
+      model: 'llama-3.3-70b-versatile',
+      messages: [
+        {
+          role: MessageRole.User,
+          content: 'Tell me a story',
+        },
+      ],
+    },
+    {
+      onOpen: () => console.log('Stream opened'),
+      onContent: (content) => process.stdout.write(content),
+      onChunk: (chunk) => console.log('Received chunk:', chunk.id),
+      onFinish: () => console.log('\nStream completed'),
+      onError: (error) => console.error('Stream error:', error),
+    },
+    Provider.Groq // Provider is optional
+  );
 } catch (error) {
   console.error('Error:', error);
 }
 ```
 
-### Generating Content
+### Tool Calls
 
-To generate content using a model, use the `generateContent` method:
+To use tool calls with models that support them:
 
 ```typescript
 import {
   InferenceGatewayClient,
-  Message,
   MessageRole,
   Provider,
 } from '@inference-gateway/sdk';
 
-const client = new InferenceGatewayClient('http://localhost:8080');
+const client = new InferenceGatewayClient({
+  baseURL: 'http://localhost:8080/v1',
+});
 
-  const response = await client.generateContent({
-    provider: Provider.Ollama,
-    model: 'llama2',
-    messages: [
-      {
-        role: MessageRole.System,
-        content: 'You are a helpful llama',
-      },
-      {
-        role: MessageRole.User,
-        content: 'Tell me a joke',
+try {
+  await client.streamChatCompletion(
+    {
+      model: 'gpt-4o',
+      messages: [
+        {
+          role: MessageRole.User,
+          content: 'What's the weather in San Francisco?',
+        },
+      ],
+      tools: [
+        {
+          type: 'function',
+          function: {
+            name: 'get_weather',
+            parameters: {
+              type: 'object',
+              properties: {
+                location: {
+                  type: 'string',
+                  description: 'The city and state, e.g. San Francisco, CA',
+                },
+              },
+              required: ['location'],
+            },
+          },
+        },
+      ],
+    },
+    {
+      onTool: (toolCall) => {
+        console.log('Tool call:', toolCall.function.name);
+        console.log('Arguments:', toolCall.function.arguments);
       },
-    ],
-  });
-
-  console.log('Provider:', response.provider);
-  console.log('Response:', response.response);
+      onContent: (content) => process.stdout.write(content),
+      onFinish: () => console.log('\nStream completed'),
+    },
+    Provider.OpenAI
+  );
 } catch (error) {
   console.error('Error:', error);
 }
 ```
 
-### Streaming Content
+### Proxying Requests
 
-To stream content using a model, use the `streamContent` method:
+To proxy requests directly to a provider:
 
 ```typescript
-const client = new InferenceGatewayClient('http://localhost:8080');
-
-await client.generateContentStream(
-  {
-    provider: Provider.Groq,
-    model: 'deepseek-r1-distill-llama-70b',
-    messages: [
-      {
-        role: MessageRole.User,
-        content: 'Tell me a story',
-      },
-    ],
-  },
-  {
-    onMessageStart: (role) => console.log('Message started:', role),
-    onContentDelta: (content) => process.stdout.write(content),
-    onStreamEnd: () => console.log('\nStream completed'),
-  }
-);
+import { InferenceGatewayClient, Provider } from '@inference-gateway/sdk';
+
+const client = new InferenceGatewayClient({
+  baseURL: 'http://localhost:8080/v1',
+});
+
+try {
+  const response = await client.proxy(Provider.OpenAI, 'embeddings', {
+    method: 'POST',
+    body: JSON.stringify({
+      model: 'text-embedding-ada-002',
+      input: 'Hello world',
+    }),
+  });
+
+  console.log('Embeddings:', response);
+} catch (error) {
+  console.error('Error:', error);
+}
 ```
 
 ### Health Check
 
-To check if the Inference Gateway is running, use the `healthCheck` method:
+To check if the Inference Gateway is running:
 
 ```typescript
+import { InferenceGatewayClient } from '@inference-gateway/sdk';
+
+const client = new InferenceGatewayClient({
+  baseURL: 'http://localhost:8080/v1',
+});
+
 try {
   const isHealthy = await client.healthCheck();
   console.log('API is healthy:', isHealthy);
@@ -176,6 +241,26 @@ try {
 }
 ```
 
+### Creating a Client with Custom Options
+
+You can create a new client with custom options using the `withOptions` method:
+
+```typescript
+import { InferenceGatewayClient } from '@inference-gateway/sdk';
+
+const client = new InferenceGatewayClient({
+  baseURL: 'http://localhost:8080/v1',
+});
+
+// Create a new client with custom headers
+const clientWithHeaders = client.withOptions({
+  defaultHeaders: {
+    'X-Custom-Header': 'value',
+  },
+  timeout: 60000, // 60 seconds
+});
+```
+
 ## Contributing
 
 Please refer to the [CONTRIBUTING.md](CONTRIBUTING.md) file for information about how to get involved. We welcome issues, questions, and pull requests.