diff --git a/src/content/workers-ai-models/deepseek-r1-distill-qwen-32b.json b/src/content/workers-ai-models/deepseek-r1-distill-qwen-32b.json new file mode 100644 index 00000000000000..2d31b8c4465916 --- /dev/null +++ b/src/content/workers-ai-models/deepseek-r1-distill-qwen-32b.json @@ -0,0 +1,395 @@ +{ + "id": "ad01ab83-baf8-4e7b-8fed-a0a219d4eb45", + "source": 1, + "name": "@cf/deepseek-ai/deepseek-r1-distill-qwen-32b", + "description": "Deepseek R1 Distilled Qwen 32B", + "task": { + "id": "c329a1f9-323d-4e91-b2aa-582dd4188d34", + "name": "Text Generation", + "description": "Family of generative text models, such as large language models (LLM), that can be adapted for a variety of natural language tasks." + }, + "tags": [], + "properties": [], + "schema": { + "input": { + "type": "object", + "oneOf": [ + { + "title": "Prompt", + "properties": { + "prompt": { + "type": "string", + "minLength": 1, + "maxLength": 131072, + "description": "The input text prompt for the model to generate a response." + }, + "raw": { + "type": "boolean", + "default": false, + "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting." + }, + "stream": { + "type": "boolean", + "default": false, + "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events." + }, + "max_tokens": { + "type": "integer", + "default": 256, + "description": "The maximum number of tokens to generate in the response." + }, + "temperature": { + "type": "number", + "default": 0.6, + "minimum": 0, + "maximum": 5, + "description": "Controls the randomness of the output; higher values produce more random results." + }, + "top_p": { + "type": "number", + "minimum": 0, + "maximum": 2, + "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses." + }, + "top_k": { + "type": "integer", + "minimum": 1, + "maximum": 50, + "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises." + }, + "seed": { + "type": "integer", + "minimum": 1, + "maximum": 9999999999, + "description": "Random seed for reproducibility of the generation." + }, + "repetition_penalty": { + "type": "number", + "minimum": 0, + "maximum": 2, + "description": "Penalty for repeated tokens; higher values discourage repetition." + }, + "frequency_penalty": { + "type": "number", + "minimum": 0, + "maximum": 2, + "description": "Decreases the likelihood of the model repeating the same lines verbatim." + }, + "presence_penalty": { + "type": "number", + "minimum": 0, + "maximum": 2, + "description": "Increases the likelihood of the model introducing new topics." + }, + "lora": { + "type": "string", + "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model." + } + }, + "required": [ + "prompt" + ] + }, + { + "title": "Messages", + "properties": { + "messages": { + "type": "array", + "description": "An array of message objects representing the conversation history.", + "items": { + "type": "object", + "properties": { + "role": { + "type": "string", + "description": "The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool')." + }, + "content": { + "type": "string", + "maxLength": 131072, + "description": "The content of the message as a string." + } + }, + "required": [ + "role", + "content" + ] + } + }, + "functions": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "code": { + "type": "string" + } + }, + "required": [ + "name", + "code" + ] + } + }, + "tools": { + "type": "array", + "description": "A list of tools available for the assistant to use.", + "items": { + "type": "object", + "oneOf": [ + { + "properties": { + "name": { + "type": "string", + "description": "The name of the tool. More descriptive the better." + }, + "description": { + "type": "string", + "description": "A brief description of what the tool does." + }, + "parameters": { + "type": "object", + "description": "Schema defining the parameters accepted by the tool.", + "properties": { + "type": { + "type": "string", + "description": "The type of the parameters object (usually 'object')." + }, + "required": { + "type": "array", + "description": "List of required parameter names.", + "items": { + "type": "string" + } + }, + "properties": { + "type": "object", + "description": "Definitions of each parameter.", + "additionalProperties": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "The data type of the parameter." + }, + "description": { + "type": "string", + "description": "A description of the expected parameter." + } + }, + "required": [ + "type", + "description" + ] + } + } + }, + "required": [ + "type", + "properties" + ] + } + }, + "required": [ + "name", + "description", + "parameters" + ] + }, + { + "properties": { + "type": { + "type": "string", + "description": "Specifies the type of tool (e.g., 'function')." + }, + "function": { + "type": "object", + "description": "Details of the function tool.", + "properties": { + "name": { + "type": "string", + "description": "The name of the function." + }, + "description": { + "type": "string", + "description": "A brief description of what the function does." + }, + "parameters": { + "type": "object", + "description": "Schema defining the parameters accepted by the function.", + "properties": { + "type": { + "type": "string", + "description": "The type of the parameters object (usually 'object')." + }, + "required": { + "type": "array", + "description": "List of required parameter names.", + "items": { + "type": "string" + } + }, + "properties": { + "type": "object", + "description": "Definitions of each parameter.", + "additionalProperties": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "The data type of the parameter." + }, + "description": { + "type": "string", + "description": "A description of the expected parameter." + } + }, + "required": [ + "type", + "description" + ] + } + } + }, + "required": [ + "type", + "properties" + ] + } + }, + "required": [ + "name", + "description", + "parameters" + ] + } + }, + "required": [ + "type", + "function" + ] + } + ] + } + }, + "stream": { + "type": "boolean", + "default": false, + "description": "If true, the response will be streamed back incrementally." + }, + "max_tokens": { + "type": "integer", + "default": 256, + "description": "The maximum number of tokens to generate in the response." + }, + "temperature": { + "type": "number", + "default": 0.6, + "minimum": 0, + "maximum": 5, + "description": "Controls the randomness of the output; higher values produce more random results." + }, + "top_p": { + "type": "number", + "minimum": 0, + "maximum": 2, + "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses." + }, + "top_k": { + "type": "integer", + "minimum": 1, + "maximum": 50, + "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises." + }, + "seed": { + "type": "integer", + "minimum": 1, + "maximum": 9999999999, + "description": "Random seed for reproducibility of the generation." + }, + "repetition_penalty": { + "type": "number", + "minimum": 0, + "maximum": 2, + "description": "Penalty for repeated tokens; higher values discourage repetition." + }, + "frequency_penalty": { + "type": "number", + "minimum": 0, + "maximum": 2, + "description": "Decreases the likelihood of the model repeating the same lines verbatim." + }, + "presence_penalty": { + "type": "number", + "minimum": 0, + "maximum": 2, + "description": "Increases the likelihood of the model introducing new topics." + } + }, + "required": [ + "messages" + ] + } + ] + }, + "output": { + "oneOf": [ + { + "type": "object", + "contentType": "application/json", + "properties": { + "response": { + "type": "string", + "description": "The generated text response from the model" + }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, + "tool_calls": { + "type": "array", + "description": "An array of tool calls requests made during the response generation", + "items": { + "type": "object", + "properties": { + "arguments": { + "type": "object", + "description": "The arguments passed to be passed to the tool call request" + }, + "name": { + "type": "string", + "description": "The name of the tool to be called" + } + } + } + } + } + }, + { + "type": "string", + "contentType": "text/event-stream", + "format": "binary" + } + ] + } + } +} \ No newline at end of file