Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions dapr/clients/grpc/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@

import grpc # type: ignore
from google.protobuf.any_pb2 import Any as GrpcAny
from google.protobuf.duration_pb2 import Duration as GrpcDuration
from google.protobuf.empty_pb2 import Empty as GrpcEmpty
from google.protobuf.message import Message as GrpcMessage
from google.protobuf.struct_pb2 import Struct as GrpcStruct
from grpc import ( # type: ignore
RpcError,
StreamStreamClientInterceptor,
Expand Down Expand Up @@ -1787,6 +1789,8 @@ def converse_alpha2(
temperature: Optional[float] = None,
tools: Optional[List[conversation.ConversationTools]] = None,
tool_choice: Optional[str] = None,
response_format: Optional[GrpcStruct] = None,
prompt_cache_retention: Optional[GrpcDuration] = None,
) -> conversation.ConversationResponseAlpha2:
"""Invoke an LLM using the conversation API (Alpha2) with tool calling support.

Expand All @@ -1800,6 +1804,8 @@ def converse_alpha2(
temperature: Optional temperature setting for the LLM to optimize for creativity or predictability
tools: Optional list of tools available for the LLM to call
tool_choice: Optional control over which tools can be called ('none', 'auto', 'required', or specific tool name)
response_format: Optional response format (google.protobuf.struct_pb2.Struct, ex: json_schema for structured output)
prompt_cache_retention: Optional retention for prompt cache (google.protobuf.duration_pb2.Duration)

Returns:
ConversationResponseAlpha2 containing the conversation results with choices and tool calls
Expand Down Expand Up @@ -1856,6 +1862,10 @@ def converse_alpha2(
request.temperature = temperature
if tool_choice is not None:
request.tool_choice = tool_choice
if response_format is not None and hasattr(request, 'response_format'):
request.response_format.CopyFrom(response_format)
if prompt_cache_retention is not None and hasattr(request, 'prompt_cache_retention'):
request.prompt_cache_retention.CopyFrom(prompt_cache_retention)

try:
response, call = self.retry_policy.run_rpc(self._stub.ConverseAlpha2.with_call, request)
Expand Down
70 changes: 69 additions & 1 deletion dapr/clients/grpc/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,11 +338,46 @@ class ConversationResultAlpha2Choices:
message: ConversationResultAlpha2Message


@dataclass
class ConversationResultAlpha2CompletionUsageCompletionTokensDetails:
"""Breakdown of tokens used in the completion."""

accepted_prediction_tokens: int = 0
audio_tokens: int = 0
reasoning_tokens: int = 0
rejected_prediction_tokens: int = 0


@dataclass
class ConversationResultAlpha2CompletionUsagePromptTokensDetails:
"""Breakdown of tokens used in the prompt."""

audio_tokens: int = 0
cached_tokens: int = 0


@dataclass
class ConversationResultAlpha2CompletionUsage:
"""Token usage for one Alpha2 conversation result."""

completion_tokens: int = 0
prompt_tokens: int = 0
total_tokens: int = 0
completion_tokens_details: Optional[
ConversationResultAlpha2CompletionUsageCompletionTokensDetails
] = None
prompt_tokens_details: Optional[ConversationResultAlpha2CompletionUsagePromptTokensDetails] = (
None
)


@dataclass
class ConversationResultAlpha2:
"""One of the outputs in Alpha2 response from conversation input."""

choices: List[ConversationResultAlpha2Choices] = field(default_factory=list)
model: Optional[str] = None
usage: Optional[ConversationResultAlpha2CompletionUsage] = None


@dataclass
Expand Down Expand Up @@ -657,5 +692,38 @@ def _get_outputs_from_grpc_response(
)
)

outputs.append(ConversationResultAlpha2(choices=choices))
model: Optional[str] = None
usage: Optional[ConversationResultAlpha2CompletionUsage] = None
if hasattr(output, 'model') and getattr(output, 'model', None):
model = output.model
if hasattr(output, 'usage') and output.usage:
u = output.usage
completion_details: Optional[
ConversationResultAlpha2CompletionUsageCompletionTokensDetails
] = None
prompt_details: Optional[ConversationResultAlpha2CompletionUsagePromptTokensDetails] = (
None
)
if hasattr(u, 'completion_tokens_details') and u.completion_tokens_details:
cd = u.completion_tokens_details
completion_details = ConversationResultAlpha2CompletionUsageCompletionTokensDetails(
accepted_prediction_tokens=getattr(cd, 'accepted_prediction_tokens', 0) or 0,
audio_tokens=getattr(cd, 'audio_tokens', 0) or 0,
reasoning_tokens=getattr(cd, 'reasoning_tokens', 0) or 0,
rejected_prediction_tokens=getattr(cd, 'rejected_prediction_tokens', 0) or 0,
)
if hasattr(u, 'prompt_tokens_details') and u.prompt_tokens_details:
pd = u.prompt_tokens_details
prompt_details = ConversationResultAlpha2CompletionUsagePromptTokensDetails(
audio_tokens=getattr(pd, 'audio_tokens', 0) or 0,
cached_tokens=getattr(pd, 'cached_tokens', 0) or 0,
)
usage = ConversationResultAlpha2CompletionUsage(
completion_tokens=getattr(u, 'completion_tokens', 0) or 0,
prompt_tokens=getattr(u, 'prompt_tokens', 0) or 0,
total_tokens=getattr(u, 'total_tokens', 0) or 0,
completion_tokens_details=completion_details,
prompt_tokens_details=prompt_details,
)
outputs.append(ConversationResultAlpha2(choices=choices, model=model, usage=usage))
return outputs
Loading