From c5eadb35bd29934c13912a43562dea34eb56f0f8 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 19 Nov 2025 19:39:22 +0100 Subject: [PATCH 1/5] feat: add instrumentation to embedding functions for various backends --- sentry_sdk/consts.py | 6 + sentry_sdk/integrations/langchain.py | 154 +++++ .../integrations/langchain/test_langchain.py | 650 ++++++++++++++++++ 3 files changed, 810 insertions(+) diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 641d095ca6..a3d328274c 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -465,6 +465,12 @@ class SPANDATA: Example: "The weather in Paris is rainy and overcast, with temperatures around 57°F" """ + GEN_AI_EMBEDDINGS_INPUT = "gen_ai.embeddings.input" + """ + The input to the embeddings operation. + Example: "Hello!" + """ + GEN_AI_OPERATION_NAME = "gen_ai.operation.name" """ The name of the operation being performed. diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 8cb98bde0b..f5b69703c9 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -63,6 +63,48 @@ AgentExecutor = None +# Conditional imports for embeddings providers +try: + from langchain_openai import OpenAIEmbeddings +except ImportError: + OpenAIEmbeddings = None # type: ignore[assignment, misc] + +try: + from langchain_openai import AzureOpenAIEmbeddings +except ImportError: + AzureOpenAIEmbeddings = None # type: ignore[assignment, misc] + +try: + from langchain_google_vertexai import VertexAIEmbeddings +except ImportError: + VertexAIEmbeddings = None # type: ignore[assignment, misc] + +try: + from langchain_aws import BedrockEmbeddings +except ImportError: + BedrockEmbeddings = None # type: ignore[assignment, misc] + +try: + from langchain_cohere import CohereEmbeddings +except ImportError: + CohereEmbeddings = None # type: ignore[assignment, misc] + +try: + from langchain_mistralai import MistralAIEmbeddings +except ImportError: + MistralAIEmbeddings = None # type: ignore[assignment, misc] + +try: + from langchain_huggingface import HuggingFaceEmbeddings +except ImportError: + HuggingFaceEmbeddings = None # type: ignore[assignment, misc] + +try: + from langchain_ollama import OllamaEmbeddings +except ImportError: + OllamaEmbeddings = None # type: ignore[assignment, misc] + + DATA_FIELDS = { "frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY, "function_call": SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, @@ -140,6 +182,16 @@ def setup_once(): AgentExecutor.invoke = _wrap_agent_executor_invoke(AgentExecutor.invoke) AgentExecutor.stream = _wrap_agent_executor_stream(AgentExecutor.stream) + # Patch embeddings providers + _patch_embeddings_provider(OpenAIEmbeddings) + _patch_embeddings_provider(AzureOpenAIEmbeddings) + _patch_embeddings_provider(VertexAIEmbeddings) + _patch_embeddings_provider(BedrockEmbeddings) + _patch_embeddings_provider(CohereEmbeddings) + _patch_embeddings_provider(MistralAIEmbeddings) + _patch_embeddings_provider(HuggingFaceEmbeddings) + _patch_embeddings_provider(OllamaEmbeddings) + class WatchedSpan: span = None # type: Span @@ -976,3 +1028,105 @@ async def new_iterator_async(): return result return new_stream + + +def _patch_embeddings_provider(provider_class): + # type: (Any) -> None + """Patch an embeddings provider class with monitoring wrappers.""" + if provider_class is None: + return + + if hasattr(provider_class, "embed_documents"): + provider_class.embed_documents = _wrap_embedding_method( + provider_class.embed_documents + ) + if hasattr(provider_class, "embed_query"): + provider_class.embed_query = _wrap_embedding_method(provider_class.embed_query) + if hasattr(provider_class, "aembed_documents"): + provider_class.aembed_documents = _wrap_async_embedding_method( + provider_class.aembed_documents + ) + if hasattr(provider_class, "aembed_query"): + provider_class.aembed_query = _wrap_async_embedding_method( + provider_class.aembed_query + ) + + +def _wrap_embedding_method(f): + # type: (Callable[..., Any]) -> Callable[..., Any] + """Wrap sync embedding methods (embed_documents and embed_query).""" + + @wraps(f) + def new_embedding_method(self, *args, **kwargs): + # type: (Any, Any, Any) -> Any + integration = sentry_sdk.get_client().get_integration(LangchainIntegration) + if integration is None: + return f(self, *args, **kwargs) + + model_name = getattr(self, "model", None) or getattr(self, "model_name", None) + with sentry_sdk.start_span( + op=OP.GEN_AI_EMBEDDINGS, + name=f"embeddings {model_name}" if model_name else "embeddings", + origin=LangchainIntegration.origin, + ) as span: + span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "embeddings") + if model_name: + span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model_name) + + # Capture input if PII is allowed + if ( + should_send_default_pii() + and integration.include_prompts + and len(args) > 0 + ): + input_data = args[0] + # Normalize to list format + texts = input_data if isinstance(input_data, list) else [input_data] + set_data_normalized( + span, SPANDATA.GEN_AI_EMBEDDINGS_INPUT, texts, unpack=False + ) + + result = f(self, *args, **kwargs) + return result + + return new_embedding_method + + +def _wrap_async_embedding_method(f): + # type: (Callable[..., Any]) -> Callable[..., Any] + """Wrap async embedding methods (aembed_documents and aembed_query).""" + + @wraps(f) + async def new_async_embedding_method(self, *args, **kwargs): + # type: (Any, Any, Any) -> Any + integration = sentry_sdk.get_client().get_integration(LangchainIntegration) + if integration is None: + return await f(self, *args, **kwargs) + + model_name = getattr(self, "model", None) or getattr(self, "model_name", None) + with sentry_sdk.start_span( + op=OP.GEN_AI_EMBEDDINGS, + name=f"embeddings {model_name}" if model_name else "embeddings", + origin=LangchainIntegration.origin, + ) as span: + span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "embeddings") + if model_name: + span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model_name) + + # Capture input if PII is allowed + if ( + should_send_default_pii() + and integration.include_prompts + and len(args) > 0 + ): + input_data = args[0] + # Normalize to list format + texts = input_data if isinstance(input_data, list) else [input_data] + set_data_normalized( + span, SPANDATA.GEN_AI_EMBEDDINGS_INPUT, texts, unpack=False + ) + + result = await f(self, *args, **kwargs) + return result + + return new_async_embedding_method diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index c3625a4157..59e9d719e4 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -20,6 +20,7 @@ from langchain_core.runnables import RunnableConfig from langchain_core.language_models.chat_models import BaseChatModel +import sentry_sdk from sentry_sdk import start_transaction from sentry_sdk.integrations.langchain import ( LangchainIntegration, @@ -1035,3 +1036,652 @@ def test_langchain_message_truncation(sentry_init, capture_events): assert "small message 4" in str(parsed_messages[0]) assert "small message 5" in str(parsed_messages[1]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 + + +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [ + (True, True), + (True, False), + (False, True), + (False, False), + ], +) +def test_langchain_embeddings_sync( + sentry_init, capture_events, send_default_pii, include_prompts +): + """Test that sync embedding methods (embed_documents, embed_query) are properly traced.""" + try: + from langchain_openai import OpenAIEmbeddings + except ImportError: + pytest.skip("langchain_openai not installed") + + sentry_init( + integrations=[LangchainIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + # Mock the actual API call + with mock.patch.object( + OpenAIEmbeddings, + "embed_documents", + wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], + ) as mock_embed_documents: + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) + + # Force setup to re-run to ensure our mock is wrapped + LangchainIntegration.setup_once() + + with start_transaction(name="test_embeddings"): + # Test embed_documents + result = embeddings.embed_documents(["Hello world", "Test document"]) + + assert len(result) == 2 + mock_embed_documents.assert_called_once() + + # Check captured events + assert len(events) >= 1 + tx = events[0] + assert tx["type"] == "transaction" + + # Find embeddings span + embeddings_spans = [ + span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" + ] + assert len(embeddings_spans) == 1 + + embeddings_span = embeddings_spans[0] + assert embeddings_span["description"] == "embeddings text-embedding-ada-002" + assert embeddings_span["origin"] == "auto.ai.langchain" + assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings" + assert embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002" + + # Check if input is captured based on PII settings + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"] + input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + # Could be serialized as string + if isinstance(input_data, str): + assert "Hello world" in input_data + assert "Test document" in input_data + else: + assert "Hello world" in input_data + assert "Test document" in input_data + else: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get("data", {}) + + +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [ + (True, True), + (False, False), + ], +) +def test_langchain_embeddings_embed_query( + sentry_init, capture_events, send_default_pii, include_prompts +): + """Test that embed_query method is properly traced.""" + try: + from langchain_openai import OpenAIEmbeddings + except ImportError: + pytest.skip("langchain_openai not installed") + + sentry_init( + integrations=[LangchainIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + # Mock the actual API call + with mock.patch.object( + OpenAIEmbeddings, + "embed_query", + wraps=lambda self, text: [0.1, 0.2, 0.3], + ) as mock_embed_query: + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) + + # Force setup to re-run to ensure our mock is wrapped + LangchainIntegration.setup_once() + + with start_transaction(name="test_embeddings_query"): + result = embeddings.embed_query("What is the capital of France?") + + assert len(result) == 3 + mock_embed_query.assert_called_once() + + # Check captured events + assert len(events) >= 1 + tx = events[0] + assert tx["type"] == "transaction" + + # Find embeddings span + embeddings_spans = [ + span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" + ] + assert len(embeddings_spans) == 1 + + embeddings_span = embeddings_spans[0] + assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings" + assert embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002" + + # Check if input is captured based on PII settings + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"] + input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + # Could be serialized as string + if isinstance(input_data, str): + assert "What is the capital of France?" in input_data + else: + assert "What is the capital of France?" in input_data + else: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get("data", {}) + + +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [ + (True, True), + (False, False), + ], +) +@pytest.mark.asyncio +async def test_langchain_embeddings_async( + sentry_init, capture_events, send_default_pii, include_prompts +): + """Test that async embedding methods (aembed_documents, aembed_query) are properly traced.""" + try: + from langchain_openai import OpenAIEmbeddings + except ImportError: + pytest.skip("langchain_openai not installed") + + sentry_init( + integrations=[LangchainIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + async def mock_aembed_documents(self, texts): + return [[0.1, 0.2, 0.3] for _ in texts] + + # Mock the actual API call + with mock.patch.object( + OpenAIEmbeddings, + "aembed_documents", + wraps=mock_aembed_documents, + ) as mock_aembed: + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) + + # Force setup to re-run to ensure our mock is wrapped + LangchainIntegration.setup_once() + + with start_transaction(name="test_async_embeddings"): + result = await embeddings.aembed_documents( + ["Async hello", "Async test document"] + ) + + assert len(result) == 2 + mock_aembed.assert_called_once() + + # Check captured events + assert len(events) >= 1 + tx = events[0] + assert tx["type"] == "transaction" + + # Find embeddings span + embeddings_spans = [ + span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" + ] + assert len(embeddings_spans) == 1 + + embeddings_span = embeddings_spans[0] + assert embeddings_span["description"] == "embeddings text-embedding-ada-002" + assert embeddings_span["origin"] == "auto.ai.langchain" + assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings" + assert embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002" + + # Check if input is captured based on PII settings + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"] + input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + # Could be serialized as string + if isinstance(input_data, str): + assert "Async hello" in input_data or "Async test document" in input_data + else: + assert "Async hello" in input_data or "Async test document" in input_data + else: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embeddings_span.get("data", {}) + + +@pytest.mark.asyncio +async def test_langchain_embeddings_aembed_query(sentry_init, capture_events): + """Test that aembed_query method is properly traced.""" + try: + from langchain_openai import OpenAIEmbeddings + except ImportError: + pytest.skip("langchain_openai not installed") + + sentry_init( + integrations=[LangchainIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + async def mock_aembed_query(self, text): + return [0.1, 0.2, 0.3] + + # Mock the actual API call + with mock.patch.object( + OpenAIEmbeddings, + "aembed_query", + wraps=mock_aembed_query, + ) as mock_aembed: + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) + + # Force setup to re-run to ensure our mock is wrapped + LangchainIntegration.setup_once() + + with start_transaction(name="test_async_embeddings_query"): + result = await embeddings.aembed_query("Async query test") + + assert len(result) == 3 + mock_aembed.assert_called_once() + + # Check captured events + assert len(events) >= 1 + tx = events[0] + assert tx["type"] == "transaction" + + # Find embeddings span + embeddings_spans = [ + span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" + ] + assert len(embeddings_spans) == 1 + + embeddings_span = embeddings_spans[0] + assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings" + assert embeddings_span["data"]["gen_ai.request.model"] == "text-embedding-ada-002" + + # Check if input is captured + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in embeddings_span["data"] + input_data = embeddings_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + # Could be serialized as string + if isinstance(input_data, str): + assert "Async query test" in input_data + else: + assert "Async query test" in input_data + + +def test_langchain_embeddings_no_model_name(sentry_init, capture_events): + """Test embeddings when model name is not available.""" + try: + from langchain_openai import OpenAIEmbeddings + except ImportError: + pytest.skip("langchain_openai not installed") + + sentry_init( + integrations=[LangchainIntegration(include_prompts=False)], + traces_sample_rate=1.0, + ) + events = capture_events() + + # Mock the actual API call and remove model attribute + with mock.patch.object( + OpenAIEmbeddings, + "embed_documents", + wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], + ): + embeddings = OpenAIEmbeddings(openai_api_key="test-key") + # Remove model attribute to test fallback + delattr(embeddings, "model") + if hasattr(embeddings, "model_name"): + delattr(embeddings, "model_name") + + # Force setup to re-run to ensure our mock is wrapped + LangchainIntegration.setup_once() + + with start_transaction(name="test_embeddings_no_model"): + embeddings.embed_documents(["Test"]) + + # Check captured events + assert len(events) >= 1 + tx = events[0] + assert tx["type"] == "transaction" + + # Find embeddings span + embeddings_spans = [ + span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" + ] + assert len(embeddings_spans) == 1 + + embeddings_span = embeddings_spans[0] + assert embeddings_span["description"] == "embeddings" + assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings" + # Model name should not be set if not available + assert ( + "gen_ai.request.model" not in embeddings_span["data"] + or embeddings_span["data"]["gen_ai.request.model"] is None + ) + + +def test_langchain_embeddings_integration_disabled(sentry_init, capture_events): + """Test that embeddings are not traced when integration is disabled.""" + try: + from langchain_openai import OpenAIEmbeddings + except ImportError: + pytest.skip("langchain_openai not installed") + + # Initialize without LangchainIntegration + sentry_init(traces_sample_rate=1.0) + events = capture_events() + + with mock.patch.object( + OpenAIEmbeddings, + "embed_documents", + return_value=[[0.1, 0.2, 0.3]], + ): + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) + + with start_transaction(name="test_embeddings_disabled"): + embeddings.embed_documents(["Test"]) + + # Check that no embeddings spans were created + if events: + tx = events[0] + embeddings_spans = [ + span + for span in tx.get("spans", []) + if span.get("op") == "gen_ai.embeddings" + ] + # Should be empty since integration is disabled + assert len(embeddings_spans) == 0 + + +def test_langchain_embeddings_multiple_providers(sentry_init, capture_events): + """Test that embeddings work with different providers.""" + try: + from langchain_openai import OpenAIEmbeddings, AzureOpenAIEmbeddings + except ImportError: + pytest.skip("langchain_openai not installed") + + sentry_init( + integrations=[LangchainIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + # Mock both providers + with mock.patch.object( + OpenAIEmbeddings, + "embed_documents", + wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], + ), mock.patch.object( + AzureOpenAIEmbeddings, + "embed_documents", + wraps=lambda self, texts: [[0.4, 0.5, 0.6] for _ in texts], + ): + openai_embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) + azure_embeddings = AzureOpenAIEmbeddings( + model="text-embedding-ada-002", + azure_endpoint="https://test.openai.azure.com/", + openai_api_key="test-key", + ) + + # Force setup to re-run + LangchainIntegration.setup_once() + + with start_transaction(name="test_multiple_providers"): + openai_embeddings.embed_documents(["OpenAI test"]) + azure_embeddings.embed_documents(["Azure test"]) + + # Check captured events + assert len(events) >= 1 + tx = events[0] + assert tx["type"] == "transaction" + + # Find embeddings spans + embeddings_spans = [ + span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" + ] + # Should have 2 spans, one for each provider + assert len(embeddings_spans) == 2 + + # Verify both spans have proper data + for span in embeddings_spans: + assert span["data"]["gen_ai.operation.name"] == "embeddings" + assert span["data"]["gen_ai.request.model"] == "text-embedding-ada-002" + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["data"] + + +def test_langchain_embeddings_error_handling(sentry_init, capture_events): + """Test that errors in embeddings are properly captured.""" + try: + from langchain_openai import OpenAIEmbeddings + except ImportError: + pytest.skip("langchain_openai not installed") + + sentry_init( + integrations=[LangchainIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + # Mock the API call to raise an error + with mock.patch.object( + OpenAIEmbeddings, + "embed_documents", + side_effect=ValueError("API error"), + ): + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) + + # Force setup to re-run + LangchainIntegration.setup_once() + + with start_transaction(name="test_embeddings_error"): + with pytest.raises(ValueError): + embeddings.embed_documents(["Test"]) + + # The error should be captured + assert len(events) >= 1 + # We should have both the transaction and potentially an error event + [e for e in events if e.get("level") == "error"] + # Note: errors might not be auto-captured depending on SDK settings, + # but the span should still be created + + +def test_langchain_embeddings_multiple_calls(sentry_init, capture_events): + """Test that multiple embeddings calls within a transaction are all traced.""" + try: + from langchain_openai import OpenAIEmbeddings + except ImportError: + pytest.skip("langchain_openai not installed") + + sentry_init( + integrations=[LangchainIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + # Mock the actual API calls + with mock.patch.object( + OpenAIEmbeddings, + "embed_documents", + wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], + ), mock.patch.object( + OpenAIEmbeddings, + "embed_query", + wraps=lambda self, text: [0.4, 0.5, 0.6], + ): + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) + + # Force setup to re-run + LangchainIntegration.setup_once() + + with start_transaction(name="test_multiple_embeddings"): + # Call embed_documents + embeddings.embed_documents(["First batch", "Second batch"]) + # Call embed_query + embeddings.embed_query("Single query") + # Call embed_documents again + embeddings.embed_documents(["Third batch"]) + + # Check captured events + assert len(events) >= 1 + tx = events[0] + assert tx["type"] == "transaction" + + # Find embeddings spans - should have 3 (2 embed_documents + 1 embed_query) + embeddings_spans = [ + span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" + ] + assert len(embeddings_spans) == 3 + + # Verify all spans have proper data + for span in embeddings_spans: + assert span["data"]["gen_ai.operation.name"] == "embeddings" + assert span["data"]["gen_ai.request.model"] == "text-embedding-ada-002" + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["data"] + + # Verify the input data is different for each span + input_data_list = [ + span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] for span in embeddings_spans + ] + # They should all be different (different inputs) + assert len(set(str(data) for data in input_data_list)) == 3 + + +def test_langchain_embeddings_span_hierarchy(sentry_init, capture_events): + """Test that embeddings spans are properly nested within parent spans.""" + try: + from langchain_openai import OpenAIEmbeddings + except ImportError: + pytest.skip("langchain_openai not installed") + + sentry_init( + integrations=[LangchainIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + # Mock the actual API call + with mock.patch.object( + OpenAIEmbeddings, + "embed_documents", + wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], + ): + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) + + # Force setup to re-run + LangchainIntegration.setup_once() + + with start_transaction(name="test_span_hierarchy"): + with sentry_sdk.start_span(op="custom", name="custom operation"): + embeddings.embed_documents(["Test within custom span"]) + + # Check captured events + assert len(events) >= 1 + tx = events[0] + assert tx["type"] == "transaction" + + # Find all spans + embeddings_spans = [ + span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" + ] + custom_spans = [span for span in tx.get("spans", []) if span.get("op") == "custom"] + + assert len(embeddings_spans) == 1 + assert len(custom_spans) == 1 + + # Both spans should exist + embeddings_span = embeddings_spans[0] + custom_span = custom_spans[0] + + assert embeddings_span["data"]["gen_ai.operation.name"] == "embeddings" + assert custom_span["description"] == "custom operation" + + +def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_events): + """Test that embeddings correctly handle both list and string inputs.""" + try: + from langchain_openai import OpenAIEmbeddings + except ImportError: + pytest.skip("langchain_openai not installed") + + sentry_init( + integrations=[LangchainIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + # Mock the actual API calls + with mock.patch.object( + OpenAIEmbeddings, + "embed_documents", + wraps=lambda self, texts: [[0.1, 0.2, 0.3] for _ in texts], + ), mock.patch.object( + OpenAIEmbeddings, + "embed_query", + wraps=lambda self, text: [0.4, 0.5, 0.6], + ): + embeddings = OpenAIEmbeddings( + model="text-embedding-ada-002", openai_api_key="test-key" + ) + + # Force setup to re-run + LangchainIntegration.setup_once() + + with start_transaction(name="test_input_types"): + # embed_documents takes a list + embeddings.embed_documents(["List item 1", "List item 2", "List item 3"]) + # embed_query takes a string + embeddings.embed_query("Single string query") + + # Check captured events + assert len(events) >= 1 + tx = events[0] + assert tx["type"] == "transaction" + + # Find embeddings spans + embeddings_spans = [ + span for span in tx.get("spans", []) if span.get("op") == "gen_ai.embeddings" + ] + assert len(embeddings_spans) == 2 + + # Both should have input data captured as lists + for span in embeddings_spans: + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT in span["data"] + input_data = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + # Input should be normalized to list format + if isinstance(input_data, str): + # If serialized, should contain the input text + assert "List item" in input_data or "Single string query" in input_data, ( + f"Expected input text in serialized data: {input_data}" + ) From de6083173f9d0455f0360842cff7a2cab16db8f3 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 20 Nov 2025 10:37:48 +0100 Subject: [PATCH 2/5] fix: add pytest-asyncio to langchain test setup --- scripts/populate_tox/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/populate_tox/config.py b/scripts/populate_tox/config.py index 7e1438ac4b..6770c0b1cb 100644 --- a/scripts/populate_tox/config.py +++ b/scripts/populate_tox/config.py @@ -201,7 +201,7 @@ "package": "langchain", "integration_name": "langchain", "deps": { - "*": ["openai", "tiktoken", "langchain-openai"], + "*": ["pytest-asyncio", "openai", "tiktoken", "langchain-openai"], "<=0.1": ["httpx<0.28.0"], ">=0.3": ["langchain-community"], ">=1.0": ["langchain-classic"], @@ -214,7 +214,7 @@ "package": "langchain", "integration_name": "langchain", "deps": { - "*": ["openai", "langchain-openai"], + "*": ["pytest-asyncio", "openai", "langchain-openai"], "<=0.1": ["httpx<0.28.0"], ">=0.3": ["langchain-community"], ">=1.0": ["langchain-classic"], From ec69c3cde5ba5563fb0a091d6a5f3db4c1d63272 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 20 Nov 2025 10:53:29 +0100 Subject: [PATCH 3/5] chore: update tox.ini from changed config --- tox.ini | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tox.ini b/tox.ini index da0961eb8d..d9077773f5 100644 --- a/tox.ini +++ b/tox.ini @@ -399,6 +399,7 @@ deps = langchain-base-v0.1.20: langchain==0.1.20 langchain-base-v0.3.27: langchain==0.3.27 langchain-base-v1.0.8: langchain==1.0.8 + langchain-base: pytest-asyncio langchain-base: openai langchain-base: tiktoken langchain-base: langchain-openai @@ -409,6 +410,7 @@ deps = langchain-notiktoken-v0.1.20: langchain==0.1.20 langchain-notiktoken-v0.3.27: langchain==0.3.27 langchain-notiktoken-v1.0.8: langchain==1.0.8 + langchain-notiktoken: pytest-asyncio langchain-notiktoken: openai langchain-notiktoken: langchain-openai langchain-notiktoken-v0.3.27: langchain-community From dd894b6e685ffaccc3df33601aec853e98971d7c Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 20 Nov 2025 11:10:19 +0100 Subject: [PATCH 4/5] fix: lint issues --- sentry_sdk/integrations/langchain.py | 32 ++++++++++++++-------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index f5b69703c9..0362346151 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -65,44 +65,44 @@ # Conditional imports for embeddings providers try: - from langchain_openai import OpenAIEmbeddings + from langchain_openai import OpenAIEmbeddings # type: ignore[import-not-found] except ImportError: - OpenAIEmbeddings = None # type: ignore[assignment, misc] + OpenAIEmbeddings = None try: - from langchain_openai import AzureOpenAIEmbeddings + from langchain_openai import AzureOpenAIEmbeddings # type: ignore[import-not-found] except ImportError: - AzureOpenAIEmbeddings = None # type: ignore[assignment, misc] + AzureOpenAIEmbeddings = None try: - from langchain_google_vertexai import VertexAIEmbeddings + from langchain_google_vertexai import VertexAIEmbeddings # type: ignore[import-not-found] except ImportError: - VertexAIEmbeddings = None # type: ignore[assignment, misc] + VertexAIEmbeddings = None try: - from langchain_aws import BedrockEmbeddings + from langchain_aws import BedrockEmbeddings # type: ignore[import-not-found] except ImportError: - BedrockEmbeddings = None # type: ignore[assignment, misc] + BedrockEmbeddings = None try: - from langchain_cohere import CohereEmbeddings + from langchain_cohere import CohereEmbeddings # type: ignore[import-not-found] except ImportError: - CohereEmbeddings = None # type: ignore[assignment, misc] + CohereEmbeddings = None try: - from langchain_mistralai import MistralAIEmbeddings + from langchain_mistralai import MistralAIEmbeddings # type: ignore[import-not-found] except ImportError: - MistralAIEmbeddings = None # type: ignore[assignment, misc] + MistralAIEmbeddings = None try: - from langchain_huggingface import HuggingFaceEmbeddings + from langchain_huggingface import HuggingFaceEmbeddings # type: ignore[import-not-found] except ImportError: - HuggingFaceEmbeddings = None # type: ignore[assignment, misc] + HuggingFaceEmbeddings = None try: - from langchain_ollama import OllamaEmbeddings + from langchain_ollama import OllamaEmbeddings # type: ignore[import-not-found] except ImportError: - OllamaEmbeddings = None # type: ignore[assignment, misc] + OllamaEmbeddings = None DATA_FIELDS = { From 05deaa3f63c19cdd31ad2d07395554d41185e731 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 20 Nov 2025 14:01:22 +0100 Subject: [PATCH 5/5] fix: lint error --- sentry_sdk/integrations/langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 0362346151..1d3646f1c3 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -70,7 +70,7 @@ OpenAIEmbeddings = None try: - from langchain_openai import AzureOpenAIEmbeddings # type: ignore[import-not-found] + from langchain_openai import AzureOpenAIEmbeddings except ImportError: AzureOpenAIEmbeddings = None