Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions scripts/populate_tox/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@
"package": "langchain",
"integration_name": "langchain",
"deps": {
"*": ["openai", "tiktoken", "langchain-openai"],
"*": ["pytest-asyncio", "openai", "tiktoken", "langchain-openai"],
"<=0.1": ["httpx<0.28.0"],
">=0.3": ["langchain-community"],
">=1.0": ["langchain-classic"],
Expand All @@ -214,7 +214,7 @@
"package": "langchain",
"integration_name": "langchain",
"deps": {
"*": ["openai", "langchain-openai"],
"*": ["pytest-asyncio", "openai", "langchain-openai"],
"<=0.1": ["httpx<0.28.0"],
">=0.3": ["langchain-community"],
">=1.0": ["langchain-classic"],
Expand Down
6 changes: 6 additions & 0 deletions sentry_sdk/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,12 @@ class SPANDATA:
Example: "The weather in Paris is rainy and overcast, with temperatures around 57°F"
"""

GEN_AI_EMBEDDINGS_INPUT = "gen_ai.embeddings.input"
"""
The input to the embeddings operation.
Example: "Hello!"
"""

GEN_AI_OPERATION_NAME = "gen_ai.operation.name"
"""
The name of the operation being performed.
Expand Down
154 changes: 154 additions & 0 deletions sentry_sdk/integrations/langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,48 @@
AgentExecutor = None


# Conditional imports for embeddings providers
try:
from langchain_openai import OpenAIEmbeddings # type: ignore[import-not-found]
except ImportError:
OpenAIEmbeddings = None

try:
from langchain_openai import AzureOpenAIEmbeddings
except ImportError:
AzureOpenAIEmbeddings = None

try:
from langchain_google_vertexai import VertexAIEmbeddings # type: ignore[import-not-found]
except ImportError:
VertexAIEmbeddings = None

try:
from langchain_aws import BedrockEmbeddings # type: ignore[import-not-found]
except ImportError:
BedrockEmbeddings = None

try:
from langchain_cohere import CohereEmbeddings # type: ignore[import-not-found]
except ImportError:
CohereEmbeddings = None

try:
from langchain_mistralai import MistralAIEmbeddings # type: ignore[import-not-found]
except ImportError:
MistralAIEmbeddings = None

try:
from langchain_huggingface import HuggingFaceEmbeddings # type: ignore[import-not-found]
except ImportError:
HuggingFaceEmbeddings = None

try:
from langchain_ollama import OllamaEmbeddings # type: ignore[import-not-found]
except ImportError:
OllamaEmbeddings = None


DATA_FIELDS = {
"frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY,
"function_call": SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS,
Expand Down Expand Up @@ -140,6 +182,16 @@ def setup_once():
AgentExecutor.invoke = _wrap_agent_executor_invoke(AgentExecutor.invoke)
AgentExecutor.stream = _wrap_agent_executor_stream(AgentExecutor.stream)

# Patch embeddings providers
_patch_embeddings_provider(OpenAIEmbeddings)
_patch_embeddings_provider(AzureOpenAIEmbeddings)
_patch_embeddings_provider(VertexAIEmbeddings)
_patch_embeddings_provider(BedrockEmbeddings)
_patch_embeddings_provider(CohereEmbeddings)
_patch_embeddings_provider(MistralAIEmbeddings)
_patch_embeddings_provider(HuggingFaceEmbeddings)
_patch_embeddings_provider(OllamaEmbeddings)


class WatchedSpan:
span = None # type: Span
Expand Down Expand Up @@ -976,3 +1028,105 @@ async def new_iterator_async():
return result

return new_stream


def _patch_embeddings_provider(provider_class):
# type: (Any) -> None
"""Patch an embeddings provider class with monitoring wrappers."""
if provider_class is None:
return

if hasattr(provider_class, "embed_documents"):
provider_class.embed_documents = _wrap_embedding_method(
provider_class.embed_documents
)
if hasattr(provider_class, "embed_query"):
provider_class.embed_query = _wrap_embedding_method(provider_class.embed_query)
if hasattr(provider_class, "aembed_documents"):
provider_class.aembed_documents = _wrap_async_embedding_method(
provider_class.aembed_documents
)
if hasattr(provider_class, "aembed_query"):
provider_class.aembed_query = _wrap_async_embedding_method(
provider_class.aembed_query
)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Missing guard against double-wrapping embedding methods

The _patch_embeddings_provider function lacks a guard to prevent double-wrapping when setup_once is called multiple times. Each call wraps the methods again, creating nested wrappers that can cause incorrect behavior. Other integrations like Celery, Huey, and Beam use a _sentry_is_patched flag to prevent this issue. The tests explicitly call setup_once multiple times, which triggers this bug.

Fix in Cursor Fix in Web



def _wrap_embedding_method(f):
# type: (Callable[..., Any]) -> Callable[..., Any]
"""Wrap sync embedding methods (embed_documents and embed_query)."""

@wraps(f)
def new_embedding_method(self, *args, **kwargs):
# type: (Any, Any, Any) -> Any
integration = sentry_sdk.get_client().get_integration(LangchainIntegration)
if integration is None:
return f(self, *args, **kwargs)

model_name = getattr(self, "model", None) or getattr(self, "model_name", None)
with sentry_sdk.start_span(
op=OP.GEN_AI_EMBEDDINGS,
name=f"embeddings {model_name}" if model_name else "embeddings",
origin=LangchainIntegration.origin,
) as span:
span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "embeddings")
if model_name:
span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model_name)

# Capture input if PII is allowed
if (
should_send_default_pii()
and integration.include_prompts
and len(args) > 0
):
input_data = args[0]
# Normalize to list format
texts = input_data if isinstance(input_data, list) else [input_data]
set_data_normalized(
span, SPANDATA.GEN_AI_EMBEDDINGS_INPUT, texts, unpack=False
)

result = f(self, *args, **kwargs)
return result

return new_embedding_method


def _wrap_async_embedding_method(f):
# type: (Callable[..., Any]) -> Callable[..., Any]
"""Wrap async embedding methods (aembed_documents and aembed_query)."""

@wraps(f)
async def new_async_embedding_method(self, *args, **kwargs):
# type: (Any, Any, Any) -> Any
integration = sentry_sdk.get_client().get_integration(LangchainIntegration)
if integration is None:
return await f(self, *args, **kwargs)

model_name = getattr(self, "model", None) or getattr(self, "model_name", None)
with sentry_sdk.start_span(
op=OP.GEN_AI_EMBEDDINGS,
name=f"embeddings {model_name}" if model_name else "embeddings",
origin=LangchainIntegration.origin,
) as span:
span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "embeddings")
if model_name:
span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model_name)

# Capture input if PII is allowed
if (
should_send_default_pii()
and integration.include_prompts
and len(args) > 0
):
input_data = args[0]
# Normalize to list format
texts = input_data if isinstance(input_data, list) else [input_data]
set_data_normalized(
span, SPANDATA.GEN_AI_EMBEDDINGS_INPUT, texts, unpack=False
)

result = await f(self, *args, **kwargs)
return result

return new_async_embedding_method
Loading