From ad1ee5f8891310c105ea46fc57e81f24fdaa516b Mon Sep 17 00:00:00 2001 From: Franck Abgrall Date: Thu, 27 Feb 2025 16:53:06 +0100 Subject: [PATCH 1/7] Add text-to-video snippets --- packages/tasks/src/snippets/inputs.ts | 3 +++ packages/tasks/src/snippets/js.ts | 24 +++++++++++++++++++ packages/tasks/src/snippets/python.ts | 19 +++++++++++++++ .../tasks/src/tasks/text-to-video/data.ts | 2 +- 4 files changed, 47 insertions(+), 1 deletion(-) diff --git a/packages/tasks/src/snippets/inputs.ts b/packages/tasks/src/snippets/inputs.ts index 49e270dff8..4d08209c44 100644 --- a/packages/tasks/src/snippets/inputs.ts +++ b/packages/tasks/src/snippets/inputs.ts @@ -96,6 +96,8 @@ const inputsAudioClassification = () => `"sample1.flac"`; const inputsTextToImage = () => `"Astronaut riding a horse"`; +const inputsTextToVideo = () => `"A young man walking on the street"`; + const inputsTextToSpeech = () => `"The answer to the universe is 42"`; const inputsTextToAudio = () => `"liquid drum and bass, atmospheric synths, airy sounds"`; @@ -130,6 +132,7 @@ const modelInputSnippets: { "text-generation": inputsTextGeneration, "image-text-to-text": inputsTextGeneration, "text-to-image": inputsTextToImage, + "text-to-video": inputsTextToVideo, "text-to-speech": inputsTextToSpeech, "text-to-audio": inputsTextToAudio, "text2text-generation": inputsText2TextGeneration, diff --git a/packages/tasks/src/snippets/js.ts b/packages/tasks/src/snippets/js.ts index f918a4a763..53613681d0 100644 --- a/packages/tasks/src/snippets/js.ts +++ b/packages/tasks/src/snippets/js.ts @@ -275,6 +275,29 @@ query({"inputs": ${getModelInputSnippet(model)}}).then((response) => { ]; }; +export const snippetTextToVideo = ( + model: ModelDataMinimal, + accessToken: string, + provider: SnippetInferenceProvider +): InferenceSnippet[] => { + return [...(["fal-ai", "replicate"].includes(provider) ? [{ + client: "huggingface.js", + content: `\ +import { HfInference } from "@huggingface/inference"; + +const client = new HfInference("${accessToken || `{API_TOKEN}`}"); + +const video = await client.textToVideo({ + model: "${model.id}", + inputs: ${getModelInputSnippet(model)}, + parameters: { num_inference_steps: 5 }, + provider: "${provider}", +}); +// Use the generated video (it's a Blob) +`, + }] : [])]; +}; + export const snippetTextToAudio = ( model: ModelDataMinimal, accessToken: string, @@ -420,6 +443,7 @@ export const jsSnippets: Partial< "sentence-similarity": snippetBasic, "automatic-speech-recognition": snippetAutomaticSpeechRecognition, "text-to-image": snippetTextToImage, + "text-to-video": snippetTextToVideo, "text-to-speech": snippetTextToAudio, "text-to-audio": snippetTextToAudio, "audio-to-audio": snippetFile, diff --git a/packages/tasks/src/snippets/python.ts b/packages/tasks/src/snippets/python.ts index 832d072637..9f96d36280 100644 --- a/packages/tasks/src/snippets/python.ts +++ b/packages/tasks/src/snippets/python.ts @@ -308,6 +308,24 @@ image = Image.open(io.BytesIO(image_bytes))`, ]; }; +export const snippetTextToVideo = ( + model: ModelDataMinimal, + accessToken: string, + provider: SnippetInferenceProvider, +): InferenceSnippet[] => { + return [ + ...(["fal-ai", "replicate"].includes(provider) ? [{ + client: "huggingface_hub", + content: `\ +${snippetImportInferenceClient(accessToken, provider)} + +video = client.text_to_video( + ${getModelInputSnippet(model)}, + model="${model.id}" +)`, + }] : [])]; +}; + export const snippetTabular = (model: ModelDataMinimal): InferenceSnippet[] => { return [ { @@ -412,6 +430,7 @@ export const pythonSnippets: Partial< "sentence-similarity": snippetBasic, "automatic-speech-recognition": snippetFile, "text-to-image": snippetTextToImage, + "text-to-video": snippetTextToVideo, "text-to-speech": snippetTextToAudio, "text-to-audio": snippetTextToAudio, "audio-to-audio": snippetFile, diff --git a/packages/tasks/src/tasks/text-to-video/data.ts b/packages/tasks/src/tasks/text-to-video/data.ts index 967ffa72c0..9439a693f7 100644 --- a/packages/tasks/src/tasks/text-to-video/data.ts +++ b/packages/tasks/src/tasks/text-to-video/data.ts @@ -95,7 +95,7 @@ const taskData: TaskDataCustom = { ], summary: "Text-to-video models can be used in any application that requires generating consistent sequence of images from text. ", - widgetModels: [], + widgetModels: [], youtubeId: undefined, }; From c96d2f9dffc00ce66981dd13e8b602b513887e5d Mon Sep 17 00:00:00 2001 From: Franck Abgrall Date: Thu, 27 Feb 2025 16:54:32 +0100 Subject: [PATCH 2/7] add widget model for text to video on task apge --- packages/tasks/src/tasks/text-to-video/data.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/tasks/src/tasks/text-to-video/data.ts b/packages/tasks/src/tasks/text-to-video/data.ts index 9439a693f7..88218ab721 100644 --- a/packages/tasks/src/tasks/text-to-video/data.ts +++ b/packages/tasks/src/tasks/text-to-video/data.ts @@ -95,7 +95,7 @@ const taskData: TaskDataCustom = { ], summary: "Text-to-video models can be used in any application that requires generating consistent sequence of images from text. ", - widgetModels: [], + widgetModels: ["tencent/HunyuanVideo"], youtubeId: undefined, }; From 3938700eb923679152555c8b2e44963f39636887 Mon Sep 17 00:00:00 2001 From: Franck Abgrall Date: Thu, 27 Feb 2025 16:55:08 +0100 Subject: [PATCH 3/7] use tab --- packages/tasks/src/tasks/text-to-video/data.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/tasks/src/tasks/text-to-video/data.ts b/packages/tasks/src/tasks/text-to-video/data.ts index 88218ab721..bce3a6b899 100644 --- a/packages/tasks/src/tasks/text-to-video/data.ts +++ b/packages/tasks/src/tasks/text-to-video/data.ts @@ -95,7 +95,7 @@ const taskData: TaskDataCustom = { ], summary: "Text-to-video models can be used in any application that requires generating consistent sequence of images from text. ", - widgetModels: ["tencent/HunyuanVideo"], + widgetModels: ["tencent/HunyuanVideo"], youtubeId: undefined, }; From 0a7286ad30feb3366508d72907a22c6dd843f4ac Mon Sep 17 00:00:00 2001 From: Franck Abgrall Date: Thu, 27 Feb 2025 17:01:41 +0100 Subject: [PATCH 4/7] some refactor --- packages/tasks/src/snippets/js.ts | 16 ++++++++++------ packages/tasks/src/snippets/python.ts | 25 ++++++++++++++----------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/packages/tasks/src/snippets/js.ts b/packages/tasks/src/snippets/js.ts index 53613681d0..f4ee8c4da5 100644 --- a/packages/tasks/src/snippets/js.ts +++ b/packages/tasks/src/snippets/js.ts @@ -43,7 +43,7 @@ const output = await client.${HFJS_METHODS[model.pipeline_tag]}({ console.log(output); `, }, - ] + ] : []), { client: "fetch", @@ -270,7 +270,7 @@ query({"inputs": ${getModelInputSnippet(model)}}).then((response) => { // Use image });`, }, - ] + ] : []), ]; }; @@ -280,9 +280,11 @@ export const snippetTextToVideo = ( accessToken: string, provider: SnippetInferenceProvider ): InferenceSnippet[] => { - return [...(["fal-ai", "replicate"].includes(provider) ? [{ - client: "huggingface.js", - content: `\ + return ["fal-ai", "replicate"].includes(provider) + ? [ + { + client: "huggingface.js", + content: `\ import { HfInference } from "@huggingface/inference"; const client = new HfInference("${accessToken || `{API_TOKEN}`}"); @@ -295,7 +297,9 @@ const video = await client.textToVideo({ }); // Use the generated video (it's a Blob) `, - }] : [])]; + }, + ] + : []; }; export const snippetTextToAudio = ( diff --git a/packages/tasks/src/snippets/python.ts b/packages/tasks/src/snippets/python.ts index 9f96d36280..72308d7dcf 100644 --- a/packages/tasks/src/snippets/python.ts +++ b/packages/tasks/src/snippets/python.ts @@ -217,7 +217,7 @@ result = client.${HFH_INFERENCE_CLIENT_METHODS[model.pipeline_tag]}( print(result) `, }, - ] + ] : []), { client: "requests", @@ -283,7 +283,7 @@ result = fal_client.subscribe( print(result) `, }, - ] + ] : []), ...(provider === "hf-inference" ? [ @@ -303,7 +303,7 @@ import io from PIL import Image image = Image.open(io.BytesIO(image_bytes))`, }, - ] + ] : []), ]; }; @@ -311,19 +311,22 @@ image = Image.open(io.BytesIO(image_bytes))`, export const snippetTextToVideo = ( model: ModelDataMinimal, accessToken: string, - provider: SnippetInferenceProvider, + provider: SnippetInferenceProvider ): InferenceSnippet[] => { - return [ - ...(["fal-ai", "replicate"].includes(provider) ? [{ - client: "huggingface_hub", - content: `\ + return ["fal-ai", "replicate"].includes(provider) + ? [ + { + client: "huggingface_hub", + content: `\ ${snippetImportInferenceClient(accessToken, provider)} video = client.text_to_video( - ${getModelInputSnippet(model)}, - model="${model.id}" +${getModelInputSnippet(model)}, +model="${model.id}" )`, - }] : [])]; + }, + ] + : []; }; export const snippetTabular = (model: ModelDataMinimal): InferenceSnippet[] => { From 15657a4ce63896b477d7c12bc14bc68c4090a7d0 Mon Sep 17 00:00:00 2001 From: Franck Abgrall Date: Thu, 27 Feb 2025 17:06:44 +0100 Subject: [PATCH 5/7] fix format --- packages/tasks/src/snippets/js.ts | 6 +++--- packages/tasks/src/snippets/python.ts | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/tasks/src/snippets/js.ts b/packages/tasks/src/snippets/js.ts index f4ee8c4da5..714d35c232 100644 --- a/packages/tasks/src/snippets/js.ts +++ b/packages/tasks/src/snippets/js.ts @@ -43,7 +43,7 @@ const output = await client.${HFJS_METHODS[model.pipeline_tag]}({ console.log(output); `, }, - ] + ] : []), { client: "fetch", @@ -270,7 +270,7 @@ query({"inputs": ${getModelInputSnippet(model)}}).then((response) => { // Use image });`, }, - ] + ] : []), ]; }; @@ -298,7 +298,7 @@ const video = await client.textToVideo({ // Use the generated video (it's a Blob) `, }, - ] + ] : []; }; diff --git a/packages/tasks/src/snippets/python.ts b/packages/tasks/src/snippets/python.ts index 72308d7dcf..294550aa39 100644 --- a/packages/tasks/src/snippets/python.ts +++ b/packages/tasks/src/snippets/python.ts @@ -217,7 +217,7 @@ result = client.${HFH_INFERENCE_CLIENT_METHODS[model.pipeline_tag]}( print(result) `, }, - ] + ] : []), { client: "requests", @@ -283,7 +283,7 @@ result = fal_client.subscribe( print(result) `, }, - ] + ] : []), ...(provider === "hf-inference" ? [ @@ -303,7 +303,7 @@ import io from PIL import Image image = Image.open(io.BytesIO(image_bytes))`, }, - ] + ] : []), ]; }; @@ -325,7 +325,7 @@ ${getModelInputSnippet(model)}, model="${model.id}" )`, }, - ] + ] : []; }; From 317512bae35b6119dfb9789f848c5635d043eccc Mon Sep 17 00:00:00 2001 From: Franck Abgrall Date: Fri, 28 Feb 2025 09:40:23 +0100 Subject: [PATCH 6/7] change param order --- packages/tasks/src/snippets/js.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/tasks/src/snippets/js.ts b/packages/tasks/src/snippets/js.ts index 714d35c232..bcb96dce8c 100644 --- a/packages/tasks/src/snippets/js.ts +++ b/packages/tasks/src/snippets/js.ts @@ -291,9 +291,9 @@ const client = new HfInference("${accessToken || `{API_TOKEN}`}"); const video = await client.textToVideo({ model: "${model.id}", + provider: "${provider}", inputs: ${getModelInputSnippet(model)}, parameters: { num_inference_steps: 5 }, - provider: "${provider}", }); // Use the generated video (it's a Blob) `, From d8b624e2063c96c1d4e3aa303e8cade726f248b4 Mon Sep 17 00:00:00 2001 From: Franck Abgrall Date: Fri, 28 Feb 2025 11:54:14 +0100 Subject: [PATCH 7/7] add generated snippets --- .../tasks-gen/scripts/generate-snippets-fixtures.ts | 11 +++++++++++ .../text-to-video/0.huggingface.js.fal-ai.js | 11 +++++++++++ .../text-to-video/0.huggingface.js.replicate.js | 11 +++++++++++ .../text-to-video/0.huggingface_hub.fal-ai.py | 11 +++++++++++ .../text-to-video/0.huggingface_hub.replicate.py | 11 +++++++++++ packages/tasks/src/snippets/python.ts | 4 ++-- 6 files changed, 57 insertions(+), 2 deletions(-) create mode 100644 packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface.js.fal-ai.js create mode 100644 packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface.js.replicate.js create mode 100644 packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface_hub.fal-ai.py create mode 100644 packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface_hub.replicate.py diff --git a/packages/tasks-gen/scripts/generate-snippets-fixtures.ts b/packages/tasks-gen/scripts/generate-snippets-fixtures.ts index 1af87f766e..56d2658205 100644 --- a/packages/tasks-gen/scripts/generate-snippets-fixtures.ts +++ b/packages/tasks-gen/scripts/generate-snippets-fixtures.ts @@ -90,6 +90,17 @@ const TEST_CASES: { providers: ["hf-inference", "fal-ai"], languages: ["sh", "js", "py"], }, + { + testName: "text-to-video", + model: { + id: "tencent/HunyuanVideo", + pipeline_tag: "text-to-video", + tags: [], + inference: "", + }, + providers: ["replicate", "fal-ai"], + languages: ["js", "py"], + }, { testName: "text-classification", model: { diff --git a/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface.js.fal-ai.js b/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface.js.fal-ai.js new file mode 100644 index 0000000000..1176dc2716 --- /dev/null +++ b/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface.js.fal-ai.js @@ -0,0 +1,11 @@ +import { HfInference } from "@huggingface/inference"; + +const client = new HfInference("api_token"); + +const video = await client.textToVideo({ + model: "tencent/HunyuanVideo", + provider: "fal-ai", + inputs: "A young man walking on the street", + parameters: { num_inference_steps: 5 }, +}); +// Use the generated video (it's a Blob) diff --git a/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface.js.replicate.js b/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface.js.replicate.js new file mode 100644 index 0000000000..fa0c617b3b --- /dev/null +++ b/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface.js.replicate.js @@ -0,0 +1,11 @@ +import { HfInference } from "@huggingface/inference"; + +const client = new HfInference("api_token"); + +const video = await client.textToVideo({ + model: "tencent/HunyuanVideo", + provider: "replicate", + inputs: "A young man walking on the street", + parameters: { num_inference_steps: 5 }, +}); +// Use the generated video (it's a Blob) diff --git a/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface_hub.fal-ai.py b/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface_hub.fal-ai.py new file mode 100644 index 0000000000..54cc650531 --- /dev/null +++ b/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface_hub.fal-ai.py @@ -0,0 +1,11 @@ +from huggingface_hub import InferenceClient + +client = InferenceClient( + provider="fal-ai", + api_key="api_token" +) + +video = client.text_to_video( + "A young man walking on the street", + model="tencent/HunyuanVideo" +) \ No newline at end of file diff --git a/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface_hub.replicate.py b/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface_hub.replicate.py new file mode 100644 index 0000000000..eecb00a94e --- /dev/null +++ b/packages/tasks-gen/snippets-fixtures/text-to-video/0.huggingface_hub.replicate.py @@ -0,0 +1,11 @@ +from huggingface_hub import InferenceClient + +client = InferenceClient( + provider="replicate", + api_key="api_token" +) + +video = client.text_to_video( + "A young man walking on the street", + model="tencent/HunyuanVideo" +) \ No newline at end of file diff --git a/packages/tasks/src/snippets/python.ts b/packages/tasks/src/snippets/python.ts index 294550aa39..3ce2d16e17 100644 --- a/packages/tasks/src/snippets/python.ts +++ b/packages/tasks/src/snippets/python.ts @@ -321,8 +321,8 @@ export const snippetTextToVideo = ( ${snippetImportInferenceClient(accessToken, provider)} video = client.text_to_video( -${getModelInputSnippet(model)}, -model="${model.id}" + ${getModelInputSnippet(model)}, + model="${model.id}" )`, }, ]