huggingface · julien-c · Mar 7, 2025 · Mar 7, 2025 · Mar 7, 2025 · Mar 7, 2025
diff --git a/README.md b/README.md
@@ -31,6 +31,7 @@ await uploadFile({
 
 await inference.chatCompletion({
   model: "meta-llama/Llama-3.1-8B-Instruct",
+  provider: "sambanova", // or together, fal-ai, replicate, cohere …
   messages: [
     {
       role: "user",
@@ -39,11 +40,11 @@ await inference.chatCompletion({
   ],
   max_tokens: 512,
   temperature: 0.5,
-  provider: "sambanova", // or together, fal-ai, replicate, cohere …
 });
 
 await inference.textToImage({
   model: "black-forest-labs/FLUX.1-dev",
+  provider: "replicate",
   inputs: "a picture of a green bird",
 });
 
@@ -54,7 +55,7 @@ await inference.textToImage({
 
 This is a collection of JS libraries to interact with the Hugging Face API, with TS types included.
 
-- [@huggingface/inference](packages/inference/README.md): Use HF Inference API (serverless), Inference Endpoints (dedicated) and third-party Inference Providers to make calls to 100,000+ Machine Learning models
+- [@huggingface/inference](packages/inference/README.md): Use HF Inference API (serverless), Inference Endpoints (dedicated) and all supported Inference Providers to make calls to 100,000+ Machine Learning models
 - [@huggingface/hub](packages/hub/README.md): Interact with huggingface.co to create or delete repos and commit / download files
 - [@huggingface/agents](packages/agents/README.md): Interact with HF models through a natural language interface
 - [@huggingface/gguf](packages/gguf/README.md): A GGUF parser that works on remotely hosted files.
@@ -84,7 +85,7 @@ npm install @huggingface/agents
 Then import the libraries in your code:
 
 ```ts
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 import { HfAgent } from "@huggingface/agents";
 import { createRepo, commit, deleteRepo, listFiles } from "@huggingface/hub";
 import type { RepoId } from "@huggingface/hub";
@@ -96,7 +97,7 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or
 
 ```html
 <script type="module">
-    import { HfInference } from 'https://cdn.jsdelivr.net/npm/@huggingface/inference@3.5.1/+esm';
+    import { InferenceClient } from 'https://cdn.jsdelivr.net/npm/@huggingface/inference@3.5.1/+esm';
     import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/hub@1.0.2/+esm";
 </script>
 ```
@@ -105,12 +106,12 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or
 
 ```ts
 // esm.sh
-import { HfInference } from "https://esm.sh/@huggingface/inference"
+import { InferenceClient } from "https://esm.sh/@huggingface/inference"
 import { HfAgent } from "https://esm.sh/@huggingface/agents";
 
 import { createRepo, commit, deleteRepo, listFiles } from "https://esm.sh/@huggingface/hub"
 // or npm:
-import { HfInference } from "npm:@huggingface/inference"
+import { InferenceClient } from "npm:@huggingface/inference"
 import { HfAgent } from "npm:@huggingface/agents";
 
 import { createRepo, commit, deleteRepo, listFiles } from "npm:@huggingface/hub"
@@ -123,11 +124,11 @@ Get your HF access token in your [account settings](https://huggingface.co/setti
 ### @huggingface/inference examples
 
 ```ts
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
 const HF_TOKEN = "hf_...";
 
-const inference = new HfInference(HF_TOKEN);
+const inference = new InferenceClient(HF_TOKEN);
 
 // Chat completion API
 const out = await inference.chatCompletion({
@@ -179,7 +180,7 @@ await inference.imageToText({
 
 // Using your own dedicated inference endpoint: https://hf.co/docs/inference-endpoints/
 const gpt2 = inference.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2');
-const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the universe is'});
+const { generated_text } = await gpt2.textGeneration({ inputs: 'The answer to the universe is' });
 
 // Chat Completion
 const llamaEndpoint = inference.endpoint(

diff --git a/e2e/deno/index.ts b/e2e/deno/index.ts
@@ -1,4 +1,4 @@
-import { HfInference } from "npm:@huggingface/inference@*";
+import { InferenceClient } from "npm:@huggingface/inference@*";
 import { whoAmI, listFiles } from "npm:@huggingface/hub@*";
 
 const info = await whoAmI({ credentials: { accessToken: "hf_hub.js" }, hubUrl: "https://hub-ci.huggingface.co" });
@@ -10,7 +10,7 @@ for await (const file of listFiles({ repo: "gpt2" })) {
 
 const token = Deno.env.get("HF_TOKEN");
 if (token) {
-	const hf = new HfInference(token);
+	const hf = new InferenceClient(token);
 
 	const tokenInfo = await whoAmI({ credentials: { accessToken: token } });
 	console.log(tokenInfo);

diff --git a/e2e/svelte/src/routes/+page.svelte b/e2e/svelte/src/routes/+page.svelte
@@ -1,8 +1,8 @@
 <script>
 	import { whoAmI, listFiles } from "@huggingface/hub";
-	import { HfInference } from "@huggingface/inference";
+	import { InferenceClient } from "@huggingface/inference";
 
-	const hf = new HfInference();
+	const hf = new InferenceClient();
 
 	const test = async () => {
 		const info = await whoAmI({ credentials: { accessToken: "hf_hub.js" }, hubUrl: "https://hub-ci.huggingface.co" });

diff --git a/e2e/ts/src/index.ts b/e2e/ts/src/index.ts
@@ -1,9 +1,9 @@
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 import { whoAmI } from "@huggingface/hub";
 
 const hfToken = process.env.token;
 
-const hf = new HfInference(hfToken);
+const hf = new InferenceClient(hfToken);
 
 (async () => {
 	const info = await whoAmI({ credentials: { accessToken: "hf_hub.js" }, hubUrl: "https://hub-ci.huggingface.co" });

diff --git a/packages/agents/package.json b/packages/agents/package.json
@@ -56,6 +56,6 @@
 		"@types/node": "^18.13.0"
 	},
 	"dependencies": {
-		"@huggingface/inference": "^2.6.1"
+		"@huggingface/inference": "workspace:^"
 	}
 }
diff --git a/packages/agents/pnpm-lock.yaml b/packages/agents/pnpm-lock.yaml
diff --git a/packages/agents/src/lib/evalBuilder.ts b/packages/agents/src/lib/evalBuilder.ts
@@ -1,4 +1,4 @@
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 import type { Data, Tool } from "../types";
 
 // this function passes the tools & files to the context before calling eval
@@ -17,7 +17,7 @@ export async function evalBuilder(
 
 		// add tools to context
 		for (const tool of tools) {
-			const toolCall = (input: Promise<Data>) => tool.call?.(input, new HfInference(accessToken ?? ""));
+			const toolCall = (input: Promise<Data>) => tool.call?.(input, new InferenceClient(accessToken ?? ""));
 			// @ts-expect-error adding to the scope
 			globalThis[tool.name] = toolCall;
 		}

diff --git a/packages/agents/src/llms/LLMHF.ts b/packages/agents/src/llms/LLMHF.ts
@@ -1,8 +1,8 @@
 import type { LLM } from "../types";
-import { HfInference } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 
 export function LLMFromHub(accessToken?: string, model?: string): LLM {
-	const inference = new HfInference(accessToken);
+	const inference = new InferenceClient(accessToken);
 
 	return async (prompt: string): Promise<string> => {
 		const formattedPrompt = "<|user|>" + prompt + "<|end|><|assistant|>";
@@ -20,7 +20,7 @@ export function LLMFromHub(accessToken?: string, model?: string): LLM {
 }
 
 export function LLMFromEndpoint(accessToken: string, endpoint: string): LLM {
-	const inference = new HfInference(accessToken).endpoint(endpoint);
+	const inference = new InferenceClient(accessToken).endpoint(endpoint);
 	return async (prompt: string): Promise<string> => {
 		const formattedPrompt = "<|user|>" + prompt + "<|end|><|assistant|>";
 

diff --git a/packages/agents/src/tools/imageToText.ts b/packages/agents/src/tools/imageToText.ts
@@ -15,9 +15,12 @@ export const imageToTextTool: Tool = {
 		if (typeof data === "string") throw "Input must be a blob.";
 
 		return (
-			await inference.imageToText({
-				data,
-			})
-		).generated_text;
+			// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+			(
+				await inference.imageToText({
+					data,
+				})
+			).generated_text!
+		);
 	},
 };
diff --git a/packages/agents/src/types.d.ts b/packages/agents/src/types.d.ts
@@ -1,12 +1,12 @@
-import type { HfInference } from "@huggingface/inference";
+import type { InferenceClient } from "@huggingface/inference";
 
 export type Data = string | Blob | ArrayBuffer;
 
 export interface Tool {
 	name: string;
 	description: string;
 	examples: Array<Example>;
-	call?: (input: Promise<Data>, inference: HfInference) => Promise<Data>;
+	call?: (input: Promise<Data>, inference: InferenceClient) => Promise<Data>;
 }
 
 export interface Example {

diff --git a/packages/agents/test/HfAgent.spec.ts b/packages/agents/test/HfAgent.spec.ts
@@ -1,7 +1,7 @@
 import { describe, expect, it } from "vitest";
 import { HfAgent, defaultTools, LLMFromHub, LLMFromEndpoint } from "../src";
 import type { Data } from "../src/types";
-import type { HfInference } from "@huggingface/inference";
+import type { InferenceClient } from "@huggingface/inference";
 
 const env = import.meta.env;
 if (!env.HF_TOKEN) {
@@ -33,7 +33,7 @@ describe("HfAgent", () => {
 				},
 			],
 			// eslint-disable-next-line @typescript-eslint/no-unused-vars
-			call: async (input: Promise<Data>, inference: HfInference): Promise<Data> => {
+			call: async (input: Promise<Data>, inference: InferenceClient): Promise<Data> => {
 				const data = await input;
 				if (typeof data !== "string") {
 					throw new Error("Input must be a string");

@@ -1,6 +1,7 @@
 #!/usr/bin/env node
 
-import { GGMLQuantizationType, gguf, ggufAllShards, GGUFParseOutput } from ".";
+import type { GGUFParseOutput } from ".";
+import { GGMLQuantizationType, ggufAllShards } from ".";
 import { GGML_QUANT_SIZES } from "./quant-descriptions";
 
 interface PrintColumnHeader {

@@ -1,7 +1,7 @@
 # 🤗 Hugging Face Inference
 
-A Typescript powered wrapper for the HF Inference API (serverless), Inference Endpoints (dedicated), and third-party Inference Providers.
-It works with [Inference API (serverless)](https://huggingface.co/docs/api-inference/index) and [Inference Endpoints (dedicated)](https://huggingface.co/docs/inference-endpoints/index), and even with supported third-party Inference Providers.
+A Typescript powered wrapper for the HF Inference API (serverless), Inference Endpoints (dedicated), and all supported Inference Providers.
+It works with [Inference API (serverless)](https://huggingface.co/docs/api-inference/index) and [Inference Endpoints (dedicated)](https://huggingface.co/docs/inference-endpoints/index), and even with all supported third-party Inference Providers.
 
 Check out the [full documentation](https://huggingface.co/docs/huggingface.js/inference/README).
 
@@ -25,24 +25,24 @@ yarn add @huggingface/inference
 
 ```ts
 // esm.sh
-import { HfInference } from "https://esm.sh/@huggingface/inference"
+import { InferenceClient } from "https://esm.sh/@huggingface/inference"
 // or npm:
-import { HfInference } from "npm:@huggingface/inference"
+import { InferenceClient } from "npm:@huggingface/inference"
 ```
 
 ### Initialize
 
 ```typescript
-import { HfInference } from '@huggingface/inference'
+import { InferenceClient } from '@huggingface/inference'
 
-const hf = new HfInference('your access token')
+const hf = new InferenceClient('your access token')
 ```
 
 ❗**Important note:** Using an access token is optional to get started, however you will be rate limited eventually. Join [Hugging Face](https://huggingface.co/join) and then visit [access tokens](https://huggingface.co/settings/tokens) to generate your access token for **free**.
 
 Your access token should be kept private. If you need to protect it in front-end applications, we suggest setting up a proxy server that stores the access token.
 
-### Third-party inference providers
+### All supported inference providers
 
 You can send inference requests to third-party providers with the inference client.
 
@@ -63,7 +63,7 @@ To send requests to a third-party provider, you have to pass the `provider` para
 ```ts
 const accessToken = "hf_..."; // Either a HF access token, or an API key from the third-party provider (Replicate in this example)
 
-const client = new HfInference(accessToken);
+const client = new InferenceClient(accessToken);
 await client.textToImage({
   provider: "replicate",
   model:"black-forest-labs/Flux.1-dev",
@@ -93,7 +93,7 @@ This is not an issue for LLMs as everyone converged on the OpenAI API anyways, b
 
 ### Tree-shaking
 
-You can import the functions you need directly from the module instead of using the `HfInference` class.
+You can import the functions you need directly from the module instead of using the `InferenceClient` class.
 
 ```ts
 import { textGeneration } from "@huggingface/inference";
@@ -165,7 +165,7 @@ for await (const chunk of hf.chatCompletionStream({
 It's also possible to call Mistral or OpenAI endpoints directly:
 
 ```typescript
-const openai = new HfInference(OPENAI_TOKEN).endpoint("https://api.openai.com");
+const openai = new InferenceClient(OPENAI_TOKEN).endpoint("https://api.openai.com");
 
 let out = "";
 for await (const chunk of openai.chatCompletionStream({
@@ -602,7 +602,7 @@ You can use any Chat Completion API-compatible provider with the `chatCompletion
 ```typescript
 // Chat Completion Example
 const MISTRAL_KEY = process.env.MISTRAL_KEY;
-const hf = new HfInference(MISTRAL_KEY);
+const hf = new InferenceClient(MISTRAL_KEY);
 const ep = hf.endpoint("https://api.mistral.ai");
 const stream = ep.chatCompletionStream({
   model: "mistral-tiny",

@@ -3,8 +3,8 @@
 	"version": "3.5.1",
 	"packageManager": "pnpm@8.10.5",
 	"license": "MIT",
-	"author": "Tim Mikeladze <tim.mikeladze@gmail.com>",
-	"description": "Typescript wrapper for the Hugging Face Inference Endpoints & Inference API",
+	"author": "Hugging Face and Tim Mikeladze <tim.mikeladze@gmail.com>",
+	"description": "Typescript client for the Hugging Face Inference Providers and Inference Endpoints",
 	"repository": {
 		"type": "git",
 		"url": "https://github.com/huggingface/huggingface.js.git"

@@ -61,12 +61,12 @@ for (const dir of dirs) {
 
 appendFileSync(
 	"./dist/index.d.ts",
-	`export class HfInference {
+	`export class InferenceClient {
 \tconstructor(accessToken?: string, defaultOptions?: Options);
 \t/**
-\t * Returns copy of HfInference tied to a specified endpoint.
+\t * Returns copy of InferenceClient tied to a specified endpoint.
 \t */
-\tendpoint(endpointUrl: string): HfInferenceEndpoint;
+\tendpoint(endpointUrl: string): InferenceClientEndpoint;
 ` +
 		fns
 			.map(
@@ -84,7 +84,7 @@ appendFileSync(
 
 appendFileSync(
 	"./dist/index.d.ts",
-	`export class HfInferenceEndpoint {\n\tconstructor(endpointUrl: string, accessToken?: string, defaultOptions?: Options);\n` +
+	`export class InferenceClientEndpoint {\n\tconstructor(endpointUrl: string, accessToken?: string, defaultOptions?: Options);\n` +
 		fns
 			.map(
 				(fn) =>