diff --git a/guardrails/llm_providers.py b/guardrails/llm_providers.py index b11905415..b6418e80b 100644 --- a/guardrails/llm_providers.py +++ b/guardrails/llm_providers.py @@ -28,6 +28,7 @@ ) from guardrails.utils.pydantic_utils import convert_pydantic_model_to_openai_fn from guardrails.utils.safe_get import safe_get +from guardrails.utils.telemetry_utils import trace_llm_call, trace_operation class PromptCallableException(Exception): @@ -149,6 +150,13 @@ def _invoke_llm( engine = kwargs.pop("model") client = OpenAIClient(api_key=api_key) + trace_llm_call( + input_messages=[ + {"role": "system", "content": instructions}, + {"role": "user", "content": text}, + ], + model_name=engine, + ) return client.create_completion( engine=engine, prompt=nonchat_prompt(prompt=text, instructions=instructions), @@ -258,8 +266,29 @@ def _invoke_llm( "Install with `poetry add manifest-ml`" ) client = cast(manifest.Manifest, client) - manifest_response = client.run( - nonchat_prompt(prompt=text, instructions=instructions), *args, **kwargs + prompt = nonchat_prompt(prompt=text, instructions=instructions) + trace_operation( + input_mime_type="application/json", + input_value={ + **kwargs, + "prompt": prompt, + "args": args, + }, + ) + + trace_llm_call( + input_messages=chat_prompt(text, instructions), + invocation_parameters={ + **kwargs, + "prompt": prompt, + }, + ) + manifest_response = client.run(prompt, *args, **kwargs) + trace_operation( + output_mime_type="application/json", output_value=manifest_response + ) + trace_llm_call( + output_messages=[{"role": "assistant", "content": manifest_response}] ) return LLMResponse( output=manifest_response, @@ -282,6 +311,7 @@ def _invoke_llm( ``` """ # noqa + trace_input_messages = chat_prompt(prompt, kwargs.get("instructions")) if "instructions" in kwargs: prompt = kwargs.pop("instructions") + "\n\n" + prompt @@ -297,14 +327,44 @@ def is_base_cohere_chat(func): # TODO: When cohere totally gets rid of `generate`, # remove this cond and the final return if is_base_cohere_chat(client_callable): + trace_operation( + input_mime_type="application/json", + input_value={**kwargs, "message": prompt, "args": args, "model": model}, + ) + + trace_llm_call( + input_messages=trace_input_messages, + invocation_parameters={**kwargs, "message": prompt, "model": model}, + ) cohere_response = client_callable( message=prompt, model=model, *args, **kwargs ) + trace_operation( + output_mime_type="application/json", output_value=cohere_response + ) + trace_llm_call( + output_messages=[{"role": "assistant", "content": cohere_response.text}] + ) return LLMResponse( output=cohere_response.text, ) + trace_operation( + input_mime_type="application/json", + input_value={**kwargs, "prompt": prompt, "args": args, "model": model}, + ) + + trace_llm_call( + input_messages=trace_input_messages, + invocation_parameters={**kwargs, "prompt": prompt, "model": model}, + ) cohere_response = client_callable(prompt=prompt, model=model, *args, **kwargs) + trace_operation( + output_mime_type="application/json", output_value=cohere_response + ) + trace_llm_call( + output_messages=[{"role": "assistant", "content": cohere_response[0].text}] + ) return LLMResponse( output=cohere_response[0].text, ) @@ -342,11 +402,33 @@ def _invoke_llm( "Install with `pip install anthropic`" ) + trace_input_messages = chat_prompt(prompt, kwargs.get("instructions")) if "instructions" in kwargs: prompt = kwargs.pop("instructions") + "\n\n" + prompt anthropic_prompt = f"{anthropic.HUMAN_PROMPT} {prompt} {anthropic.AI_PROMPT}" + trace_operation( + input_mime_type="application/json", + input_value={ + **kwargs, + "model": model, + "prompt": anthropic_prompt, + "max_tokens_to_sample": max_tokens_to_sample, + "args": args, + }, + ) + + trace_llm_call( + input_messages=trace_input_messages, + invocation_parameters={ + **kwargs, + "model": model, + "prompt": anthropic_prompt, + "max_tokens_to_sample": max_tokens_to_sample, + }, + ) + anthropic_response = client_callable( model=model, prompt=anthropic_prompt, @@ -354,6 +436,14 @@ def _invoke_llm( *args, **kwargs, ) + trace_operation( + output_mime_type="application/json", output_value=anthropic_response + ) + trace_llm_call( + output_messages=[ + {"role": "assistant", "content": anthropic_response.completion} + ] + ) return LLMResponse(output=anthropic_response.completion) @@ -395,6 +485,31 @@ def _invoke_llm( ) kwargs["messages"] = messages + trace_operation( + input_mime_type="application/json", + input_value={ + **kwargs, + "model": model, + "args": args, + }, + ) + + function_calling_tools = [ + tool.get("function") + for tool in kwargs.get("tools", []) + if isinstance(tool, Dict) and tool.get("type") == "function" + ] + trace_llm_call( + input_messages=kwargs.get("messages"), + invocation_parameters={ + **kwargs, + "model": model, + }, + function_call=kwargs.get( + "function_call", safe_get(function_calling_tools, 0) + ), + ) + response = completion( model=model, *args, @@ -410,6 +525,7 @@ def _invoke_llm( stream_output=llm_response, ) + trace_operation(output_mime_type="application/json", output_value=response) if response.choices[0].message.content is not None: # type: ignore output = response.choices[0].message.content # type: ignore else: @@ -425,10 +541,22 @@ def _invoke_llm( " call arguments returned from OpenAI" ) from ae_tools + completion_tokens = response.usage.completion_tokens # type: ignore + prompt_tokens = response.usage.prompt_tokens # type: ignore + total_tokens = None + if completion_tokens or prompt_tokens: + total_tokens = (completion_tokens or 0) + (prompt_tokens or 0) + + trace_llm_call( + output_messages=[choice.message for choice in response.choices], # type: ignore + token_count_completion=completion_tokens, # type: ignore + token_count_prompt=prompt_tokens, # type: ignore + token_count_total=total_tokens, # type: ignore + ) return LLMResponse( output=output, # type: ignore - prompt_token_count=response.usage.prompt_tokens, # type: ignore - response_token_count=response.usage.completion_tokens, # type: ignore + prompt_token_count=prompt_tokens, # type: ignore + response_token_count=completion_tokens, # type: ignore ) @@ -493,11 +621,29 @@ def _invoke_llm( model_inputs["do_sample"] = do_sample model_inputs["temperature"] = temperature + trace_operation( + input_mime_type="application/json", + input_value={ + **model_inputs, + **kwargs, + }, + ) + + trace_llm_call( + input_messages=chat_prompt(prompt, kwargs.get("instructions")), + invocation_parameters={ + **model_inputs, + **kwargs, + }, + ) + output = model_generate( **model_inputs, **kwargs, ) + trace_operation(output_mime_type="application/json", output_value=output) + # NOTE: This is currently restricted to single outputs # Should we choose to support multiple return sequences, # We would need to either validate all of them @@ -507,6 +653,10 @@ def _invoke_llm( output[0], skip_special_tokens=skip_special_tokens ) + trace_llm_call( + output_messages=[{"role": "assistant", "content": decoded_output}] + ) + return LLMResponse(output=decoded_output) @@ -533,6 +683,25 @@ def _invoke_llm(self, prompt: str, pipeline: Any, *args, **kwargs) -> LLMRespons if temperature == 0: temperature = None + trace_operation( + input_mime_type="application/json", + input_value={ + **kwargs, + "prompt": prompt, + "temperature": temperature, + "args": args, + }, + ) + + trace_llm_call( + input_messages=chat_prompt(prompt, kwargs.get("instructions")), + invocation_parameters={ + **kwargs, + "prompt": prompt, + "temperature": temperature, + }, + ) + output = pipeline( prompt, temperature=temperature, @@ -540,6 +709,8 @@ def _invoke_llm(self, prompt: str, pipeline: Any, *args, **kwargs) -> LLMRespons **kwargs, ) + trace_operation(output_mime_type="application/json", output_value=output) + # NOTE: This is currently restricted to single outputs # Should we choose to support multiple return sequences, # We would need to either validate all of them @@ -547,6 +718,8 @@ def _invoke_llm(self, prompt: str, pipeline: Any, *args, **kwargs) -> LLMRespons # or accept a selection function content = safe_get(output[0], content_key) + trace_llm_call(output_messages=[{"role": "assistant", "content": content}]) + return LLMResponse(output=content) @@ -567,6 +740,24 @@ def _invoke_llm(self, *args, **kwargs) -> LLMResponse: ) ``` """ + + trace_operation( + input_mime_type="application/json", + input_value={ + **kwargs, + "args": args, + }, + ) + + trace_llm_call( + input_messages=chat_prompt( + kwargs.get("prompt", ""), kwargs.get("instructions") + ), + invocation_parameters={ + **kwargs, + }, + ) + # Get the response from the callable # The LLM response should either be a # string or an generator object of strings @@ -582,6 +773,8 @@ def _invoke_llm(self, *args, **kwargs) -> LLMResponse: stream_output=llm_response, ) + trace_operation(output_mime_type="application/json", output_value=llm_response) + trace_llm_call(output_messages=[{"role": "assistant", "content": llm_response}]) # Else, the callable returns a string llm_response = cast(str, llm_response) return LLMResponse( @@ -860,10 +1053,32 @@ async def invoke_llm( ) kwargs["messages"] = messages + trace_operation( + input_mime_type="application/json", + input_value={ + **kwargs, + "args": args, + }, + ) + + function_calling_tools = [ + tool.get("function") + for tool in kwargs.get("tools", []) + if isinstance(tool, Dict) and tool.get("type") == "function" + ] + trace_llm_call( + input_messages=kwargs.get("messages"), + invocation_parameters={**kwargs}, + function_call=kwargs.get( + "function_call", safe_get(function_calling_tools, 0) + ), + ) + response = await acompletion( *args, **kwargs, ) + if kwargs.get("stream", False): # If stream is defined and set to True, # the callable returns a generator object @@ -873,6 +1088,7 @@ async def invoke_llm( async_stream_output=response.completion_stream, # pyright: ignore[reportGeneralTypeIssues] ) + trace_operation(output_mime_type="application/json", output_value=response) if response.choices[0].message.content is not None: # type: ignore output = response.choices[0].message.content # type: ignore else: @@ -888,10 +1104,21 @@ async def invoke_llm( " call arguments returned from OpenAI" ) from ae_tools + completion_tokens = response.usage.completion_tokens # type: ignore + prompt_tokens = response.usage.prompt_tokens # type: ignore + total_tokens = None + if completion_tokens or prompt_tokens: + total_tokens = (completion_tokens or 0) + (prompt_tokens or 0) + trace_llm_call( + output_messages=[choice.message for choice in response.choices], # type: ignore + token_count_completion=completion_tokens, # type: ignore + token_count_prompt=prompt_tokens, # type: ignore + token_count_total=total_tokens, # type: ignore + ) return LLMResponse( output=output, # type: ignore - prompt_token_count=response.usage.prompt_tokens, # type: ignore - response_token_count=response.usage.completion_tokens, # type: ignore + prompt_token_count=prompt_tokens, # type: ignore + response_token_count=completion_tokens, # type: ignore ) @@ -922,9 +1149,29 @@ async def invoke_llm( "The `manifest` package is not installed. " "Install with `poetry add manifest-ml`" ) + + prompts = [nonchat_prompt(prompt=text, instructions=instructions)] + + trace_operation( + input_mime_type="application/json", + input_value={ + **kwargs, + "prompts": prompts, + "args": args, + }, + ) + + trace_llm_call( + input_messages=chat_prompt(text, instructions), + invocation_parameters={ + **kwargs, + "prompts": prompts, + }, + ) + client = cast(manifest.Manifest, client) manifest_response = await client.arun_batch( - prompts=[nonchat_prompt(prompt=text, instructions=instructions)], + prompts=prompts, *args, **kwargs, ) @@ -932,6 +1179,12 @@ async def invoke_llm( raise NotImplementedError( "Manifest async streaming is not yet supported by manifest." ) + trace_operation( + output_mime_type="application/json", output_value=manifest_response + ) + trace_llm_call( + output_messages=[{"role": "assistant", "content": manifest_response[0]}] + ) return LLMResponse( output=manifest_response[0], ) @@ -954,6 +1207,24 @@ async def invoke_llm(self, *args, **kwargs) -> LLMResponse: ) ``` """ + + trace_operation( + input_mime_type="application/json", + input_value={ + **kwargs, + "args": args, + }, + ) + + trace_llm_call( + input_messages=chat_prompt( + kwargs.get("prompt", ""), kwargs.get("instructions") + ), + invocation_parameters={ + **kwargs, + }, + ) + output = await self.llm_api(*args, **kwargs) if kwargs.get("stream", False): # If stream is defined and set to True, @@ -962,6 +1233,10 @@ async def invoke_llm(self, *args, **kwargs) -> LLMResponse: output="", async_stream_output=output.completion_stream, ) + + trace_operation(output_mime_type="application/json", output_value=output) + trace_llm_call(output_messages=[{"role": "assistant", "content": output}]) + return LLMResponse( output=output, ) diff --git a/guardrails/run/stream_runner.py b/guardrails/run/stream_runner.py index 2b95f99c2..d53f3dc2e 100644 --- a/guardrails/run/stream_runner.py +++ b/guardrails/run/stream_runner.py @@ -18,6 +18,7 @@ ) from guardrails.actions.reask import ReAsk, SkeletonReAsk from guardrails.constants import pass_status +from guardrails.utils.telemetry_utils import trace class StreamRunner(Runner): @@ -72,6 +73,7 @@ def __call__( call_log=call_log, ) + @trace(name="step") def step( self, index: int, diff --git a/guardrails/utils/openai_utils/v1.py b/guardrails/utils/openai_utils/v1.py index df0bf6d10..af5b38095 100644 --- a/guardrails/utils/openai_utils/v1.py +++ b/guardrails/utils/openai_utils/v1.py @@ -9,6 +9,8 @@ num_tokens_from_messages, num_tokens_from_string, ) +from guardrails.utils.safe_get import safe_get +from guardrails.utils.telemetry_utils import trace_llm_call, trace_operation def get_static_openai_create_func(): @@ -56,10 +58,30 @@ def create_embedding( def create_completion( self, engine: str, prompt: str, *args, **kwargs ) -> LLMResponse: + trace_operation( + input_mime_type="application/json", + input_value={ + **kwargs, + "model": engine, + "prompt": prompt, + "args": args, + }, + ) + + trace_llm_call( + invocation_parameters={ + **kwargs, + "model": engine, + "prompt": prompt, + } + ) + response = self.client.completions.create( model=engine, prompt=prompt, *args, **kwargs ) + trace_operation(output_mime_type="application/json", output_value=response) + return self.construct_nonchat_response( stream=kwargs.get("stream", False), openai_response=response, @@ -90,6 +112,14 @@ def construct_nonchat_response( raise ValueError("No choices returned from OpenAI") if openai_response.usage is None: raise ValueError("No token counts returned from OpenAI") + trace_llm_call( + output_messages=[ + {"role": "assistant", "content": openai_response.choices[0].text} + ], + token_count_completion=openai_response.usage.completion_tokens, + token_count_prompt=openai_response.usage.prompt_tokens, + token_count_total=openai_response.usage.total_tokens, + ) return LLMResponse( output=openai_response.choices[0].text, # type: ignore prompt_token_count=openai_response.usage.prompt_tokens, # type: ignore @@ -99,10 +129,34 @@ def construct_nonchat_response( def create_chat_completion( self, model: str, messages: List[Any], *args, **kwargs ) -> LLMResponse: + trace_operation( + input_mime_type="application/json", + input_value={ + **kwargs, + "model": model, + "messages": messages, + "args": args, + }, + ) + function_calling_tools = [ + tool.get("function") + for tool in kwargs.get("tools", []) + if isinstance(tool, Dict) and tool.get("type") == "function" + ] + trace_llm_call( + input_messages=messages, + model_name=model, + invocation_parameters={**kwargs, "model": model, "messages": messages}, + function_call=kwargs.get( + "function_call", safe_get(function_calling_tools, 0) + ), + ) response = self.client.chat.completions.create( model=model, messages=messages, *args, **kwargs ) + trace_operation(output_mime_type="application/json", output_value=response) + return self.construct_chat_response( stream=kwargs.get("stream", False), openai_response=response, @@ -150,7 +204,12 @@ def construct_chat_response( "No message content or function" " call arguments returned from OpenAI" ) from ae_tools - + trace_llm_call( + output_messages=[choice.message for choice in openai_response.choices], # type: ignore + token_count_completion=openai_response.usage.completion_tokens, + token_count_prompt=openai_response.usage.prompt_tokens, + token_count_total=openai_response.usage.total_tokens, + ) return LLMResponse( output=output, prompt_token_count=openai_response.usage.prompt_tokens, # type: ignore diff --git a/guardrails/utils/telemetry_utils.py b/guardrails/utils/telemetry_utils.py index 257980347..4f75bb0ba 100644 --- a/guardrails/utils/telemetry_utils.py +++ b/guardrails/utils/telemetry_utils.py @@ -1,25 +1,28 @@ +import json import sys from functools import wraps from operator import attrgetter -from typing import Any, Callable, List, Optional, Union +from typing import Any, Callable, Dict, List, Optional, Union from opentelemetry import context from opentelemetry.context import Context -from opentelemetry.trace import StatusCode, Tracer +from opentelemetry.trace import StatusCode, Tracer, Span + +from guardrails_api_client.models import Reask from guardrails.call_tracing import TraceHandler from guardrails.stores.context import get_tracer as get_context_tracer from guardrails.stores.context import get_tracer_context from guardrails.utils.casting_utils import to_string from guardrails.classes.validation.validator_logs import ValidatorLogs -from guardrails.actions.reask import ReAsk -from guardrails.actions import Filter, Refrain from guardrails.logger import logger +from guardrails.actions.filter import Filter +from guardrails.actions.refrain import Refrain def get_result_type(before_value: Any, after_value: Any, outcome: str): try: - if isinstance(after_value, (Filter, Refrain, ReAsk)): + if isinstance(after_value, (Filter, Refrain, Reask)): name = after_value.__class__.__name__.lower() elif after_value != before_value: name = "fix" @@ -46,7 +49,7 @@ def get_current_context() -> Union[Context, None]: return otel_current_context or tracer_context -def get_span(span=None): +def get_span(span: Optional[Span] = None) -> Optional[Span]: if span is not None and hasattr(span, "add_event"): return span try: @@ -184,6 +187,37 @@ def without_a_trace(*args, **kwargs): return trace_validator_wrapper +def serialize(val: Any) -> Optional[str]: + try: + if val is None: + return None + if hasattr(val, "to_dict"): + return json.dumps(val.to_dict()) + elif hasattr(val, "__dict__"): + return json.dumps(val.__dict__) + elif isinstance(val, dict) or isinstance(val, list): + return json.dumps(val) + return str(val) + except Exception: + return None + + +def to_dict(val: Any) -> Dict: + try: + if val is None: + return {} + elif isinstance(val, dict): + return val + elif hasattr(val, "to_dict"): + return val.to_dict() + elif hasattr(val, "__dict__"): + return val.__dict__ + else: + return dict(val) + except Exception: + return {} + + def trace(name: str, tracer: Optional[Tracer] = None): def trace_wrapper(fn): @wraps(fn) @@ -194,8 +228,32 @@ def to_trace_or_not_to_trace(*args, **kwargs): trace_context = get_current_context() with _tracer.start_as_current_span(name, trace_context) as trace_span: # type: ignore (Fails in Python 3.9 for invalid reason) try: + ser_args = [serialize(arg) for arg in args] + ser_kwargs = {k: serialize(v) for k, v in kwargs.items()} + inputs = { + "args": [sarg for sarg in ser_args if sarg is not None], + "kwargs": { + k: v for k, v in ser_kwargs.items() if v is not None + }, + } + trace_span.set_attribute("input.mime_type", "application/json") + trace_span.set_attribute("input.value", json.dumps(inputs)) # TODO: Capture args and kwargs as attributes? response = fn(*args, **kwargs) + + ser_output = serialize(response) + if ser_output: + trace_span.set_attribute( + "output.mime_type", "application/json" + ) + trace_span.set_attribute( + "output.value", + ( + json.dumps(ser_output) + if isinstance(ser_output, dict) + else ser_output + ), + ) return response except Exception as e: trace_span.set_status( @@ -220,8 +278,33 @@ async def to_trace_or_not_to_trace(*args, **kwargs): trace_context = get_current_context() with _tracer.start_as_current_span(name, trace_context) as trace_span: # type: ignore (Fails in Python 3.9 for invalid reason) try: + ser_args = [serialize(arg) for arg in args] + ser_kwargs = {k: serialize(v) for k, v in kwargs.items()} + inputs = { + "args": [sarg for sarg in ser_args if sarg is not None], + "kwargs": { + k: v for k, v in ser_kwargs.items() if v is not None + }, + } + trace_span.set_attribute("input.mime_type", "application/json") + trace_span.set_attribute("input.value", json.dumps(inputs)) # TODO: Capture args and kwargs as attributes? response = await fn(*args, **kwargs) + + ser_output = serialize(response) + if ser_output: + trace_span.set_attribute( + "output.mime_type", "application/json" + ) + trace_span.set_attribute( + "output.value", + ( + json.dumps(ser_output) + if isinstance(ser_output, dict) + else ser_output + ), + ) + return response except Exception as e: trace_span.set_status( @@ -332,3 +415,139 @@ def default_otlp_tracer(resource_name: str = "guardsrails"): trace.set_tracer_provider(traceProvider) return trace.get_tracer("guardrails-ai") + + +def trace_operation( + *, + input_mime_type: Optional[str] = None, + input_value: Optional[Any] = None, + output_mime_type: Optional[str] = None, + output_value: Optional[Any] = None, +): + """Traces an operation (any function call) using OpenInference semantic + conventions.""" + current_span = get_span() + + if current_span is None: + return + + ser_input_mime_type = serialize(input_mime_type) + if ser_input_mime_type: + current_span.set_attribute("input.mime_type", ser_input_mime_type) + + ser_input_value = serialize(input_value) + if ser_input_value: + current_span.set_attribute("input.value", ser_input_value) + + ser_output_mime_type = serialize(output_mime_type) + if ser_output_mime_type: + current_span.set_attribute("output.mime_type", ser_output_mime_type) + + ser_output_value = serialize(output_value) + if ser_output_value: + current_span.set_attribute("output.value", ser_output_value) + + +def trace_llm_call( + *, + function_call: Optional[ + Dict[str, Any] + ] = None, # JSON String "{function_name: 'add', args: [1, 2]}" Object recording details of a function call in models or APIs # noqa + input_messages: Optional[ + List[Dict[str, Any]] + ] = None, # List of objects† [{"message.role": "user", "message.content": "hello"}] List of messages sent to the LLM in a chat API request # noqa + invocation_parameters: Optional[ + Dict[str, Any] + ] = None, # JSON string "{model_name: 'gpt-3', temperature: 0.7}" Parameters used during the invocation of an LLM or API # noqa + model_name: Optional[ + str + ] = None, # String "gpt-3.5-turbo" The name of the language model being utilized # noqa + output_messages: Optional[ + List[Dict[str, Any]] + ] = None, # List of objects [{"message.role": "user", "message.content": "hello"}] List of messages received from the LLM in a chat API request # noqa + prompt_template_template: Optional[ + str + ] = None, # String "Weather forecast for {city} on {date}" Template used to generate prompts as Python f-strings # noqa + prompt_template_variables: Optional[ + Dict[str, Any] + ] = None, # JSON String { context: "", subject: "math" } JSON of key value pairs applied to the prompt template # noqa + prompt_template_version: Optional[ + str + ] = None, # String "v1.0" The version of the prompt template # noqa + token_count_completion: Optional[ + int + ] = None, # Integer 15 The number of tokens in the completion # noqa + token_count_prompt: Optional[ + int + ] = None, # Integer 5 The number of tokens in the prompt # noqa + token_count_total: Optional[ + int + ] = None, # Integer 20 Total number of tokens, including prompt and completion # noqa +): + """Traces an LLM call using OpenInference semantic conventions.""" + current_span = get_span() + + if current_span is None: + return + + ser_function_call = serialize(function_call) + if ser_function_call: + current_span.set_attribute("llm.function_call", ser_function_call) + + if input_messages and isinstance(input_messages, list): + for i, message in enumerate(input_messages): + msg_obj = to_dict(message) + for key, value in msg_obj.items(): + standardized_key = f"message.{key}" if "message" not in key else key + current_span.set_attribute( + f"llm.input_messages.{i}.{standardized_key}", + serialize(value), # type: ignore + ) + + ser_invocation_parameters = serialize(invocation_parameters) + if ser_invocation_parameters: + current_span.set_attribute( + "llm.invocation_parameters", ser_invocation_parameters + ) + + ser_model_name = serialize(model_name) + if ser_model_name: + current_span.set_attribute("llm.model_name", ser_model_name) + + if output_messages and isinstance(output_messages, list): + for i, message in enumerate(output_messages): + # Most responses are either dictionaries or Pydantic models + msg_obj = to_dict(message) + for key, value in msg_obj.items(): + standardized_key = f"message.{key}" if "message" not in key else key + current_span.set_attribute( + f"llm.output_messages.{i}.{standardized_key}", + serialize(value), # type: ignore + ) + + ser_prompt_template_template = serialize(prompt_template_template) + if ser_prompt_template_template: + current_span.set_attribute( + "llm.prompt_template.template", ser_prompt_template_template + ) + + ser_prompt_template_variables = serialize(prompt_template_variables) + if ser_prompt_template_variables: + current_span.set_attribute( + "llm.prompt_template.variables", ser_prompt_template_variables + ) + + ser_prompt_template_version = serialize(prompt_template_version) + if ser_prompt_template_version: + current_span.set_attribute( + "llm.prompt_template.version", ser_prompt_template_version + ) + + if token_count_completion: + current_span.set_attribute("llm.token_count.completion", token_count_completion) + + if token_count_prompt: + current_span.set_attribute("llm.token_count.prompt", token_count_prompt) + + if token_count_total: + current_span.set_attribute("llm.token_count.total", token_count_total) diff --git a/tests/conftest.py b/tests/conftest.py index 2fcf23c65..1480c1054 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,3 +1,28 @@ import os +from unittest.mock import patch, MagicMock + +import pytest + os.environ["OPENAI_API_KEY"] = "mocked" + + +@pytest.fixture(scope="session", autouse=True) +def mock_tracer(): + with patch("guardrails.utils.telemetry_utils.get_tracer") as mock_get_tracer: + mock_get_tracer.return_value = None + yield mock_get_tracer + + +@pytest.fixture(scope="session", autouse=True) +def mock_span(): + with patch("guardrails.utils.telemetry_utils.get_span") as mock_get_span: + mock_get_span.return_value = None + yield mock_get_span + + +@pytest.fixture(scope="session", autouse=True) +def mock_hub_telemetry(): + with patch("guardrails.utils.hub_telemetry_utils.HubTelemetry") as MockHubTelemetry: + MockHubTelemetry.return_value = MagicMock + yield MockHubTelemetry