diff --git a/.circleci/config.yml b/.circleci/config.yml index 3a3fbfd53b56..35707dbffd2a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -41,7 +41,6 @@ jobs: pip install langchain pip install lunary==0.2.5 pip install "langfuse==2.27.1" - pip install "logfire==0.29.0" pip install numpydoc pip install traceloop-sdk==0.0.69 pip install openai @@ -87,6 +86,7 @@ jobs: exit 1 fi cd .. + # Run pytest and generate JUnit XML report - run: @@ -94,7 +94,7 @@ jobs: command: | pwd ls - python -m pytest -vv litellm/tests/ -x --junitxml=test-results/junit.xml --durations=5 + python -m pytest -vv litellm/tests/ -x --junitxml=test-results/junit.xml --durations=5 no_output_timeout: 120m # Store test results @@ -170,7 +170,6 @@ jobs: pip install "aioboto3==12.3.0" pip install langchain pip install "langfuse>=2.0.0" - pip install "logfire==0.29.0" pip install numpydoc pip install prisma pip install fastapi @@ -223,7 +222,7 @@ jobs: name: Start outputting logs command: docker logs -f my-app background: true - - run: + - run: name: Wait for app to be ready command: dockerize -wait http://localhost:4000 -timeout 5m - run: @@ -231,7 +230,7 @@ jobs: command: | pwd ls - python -m pytest -vv tests/ -x --junitxml=test-results/junit.xml --durations=5 + python -m pytest -vv tests/ -x --junitxml=test-results/junit.xml --durations=5 no_output_timeout: 120m # Store test results @@ -253,7 +252,7 @@ jobs: name: Copy model_prices_and_context_window File to model_prices_and_context_window_backup command: | cp model_prices_and_context_window.json litellm/model_prices_and_context_window_backup.json - + - run: name: Check if litellm dir was updated or if pyproject.toml was modified command: | @@ -338,4 +337,4 @@ workflows: filters: branches: only: - - main + - main \ No newline at end of file diff --git a/docs/my-website/docs/observability/logfire_integration.md b/docs/my-website/docs/observability/logfire_integration.md deleted file mode 100644 index c1f425f4251a..000000000000 --- a/docs/my-website/docs/observability/logfire_integration.md +++ /dev/null @@ -1,60 +0,0 @@ -import Image from '@theme/IdealImage'; - -# Logfire - Logging LLM Input/Output - -Logfire is open Source Observability & Analytics for LLM Apps -Detailed production traces and a granular view on quality, cost and latency - - - -:::info -We want to learn how we can make the callbacks better! Meet the LiteLLM [founders](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version) or -join our [discord](https://discord.gg/wuPM9dRgDw) -::: - -## Pre-Requisites - -Ensure you have run `pip install logfire` for this integration - -```shell -pip install logfire litellm -``` - -## Quick Start - -Get your Logfire token from [Logfire](https://logfire.pydantic.dev/) - -```python -litellm.success_callback = ["logfire"] -litellm.failure_callback = ["logfire"] # logs errors to logfire -``` - -```python -# pip install logfire -import litellm -import os - -# from https://logfire.pydantic.dev/ -os.environ["LOGFIRE_TOKEN"] = "" - -# LLM API Keys -os.environ['OPENAI_API_KEY']="" - -# set logfire as a callback, litellm will send the data to logfire -litellm.success_callback = ["logfire"] - -# openai call -response = litellm.completion( - model="gpt-3.5-turbo", - messages=[ - {"role": "user", "content": "Hi 👋 - i'm openai"} - ] -) -``` - -## Support & Talk to Founders - -- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version) -- [Community Discord 💭](https://discord.gg/wuPM9dRgDw) -- Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬ -- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai diff --git a/docs/my-website/img/logfire.png b/docs/my-website/img/logfire.png deleted file mode 100644 index 2a6be87e2339..000000000000 Binary files a/docs/my-website/img/logfire.png and /dev/null differ diff --git a/litellm/integrations/logfire_logger.py b/litellm/integrations/logfire_logger.py deleted file mode 100644 index e27d848fb4d0..000000000000 --- a/litellm/integrations/logfire_logger.py +++ /dev/null @@ -1,178 +0,0 @@ -#### What this does #### -# On success + failure, log events to Logfire - -import dotenv, os - -dotenv.load_dotenv() # Loading env variables using dotenv -import traceback -import uuid -from litellm._logging import print_verbose, verbose_logger - -from enum import Enum -from typing import Any, Dict, NamedTuple -from typing_extensions import LiteralString - - -class SpanConfig(NamedTuple): - message_template: LiteralString - span_data: Dict[str, Any] - - -class LogfireLevel(str, Enum): - INFO = "info" - ERROR = "error" - - -class LogfireLogger: - # Class variables or attributes - def __init__(self): - try: - verbose_logger.debug(f"in init logfire logger") - import logfire - - # only setting up logfire if we are sending to logfire - # in testing, we don't want to send to logfire - if logfire.DEFAULT_LOGFIRE_INSTANCE.config.send_to_logfire: - logfire.configure(token=os.getenv("LOGFIRE_TOKEN")) - except Exception as e: - print_verbose(f"Got exception on init logfire client {str(e)}") - raise e - - def _get_span_config(self, payload) -> SpanConfig: - if ( - payload["call_type"] == "completion" - or payload["call_type"] == "acompletion" - ): - return SpanConfig( - message_template="Chat Completion with {request_data[model]!r}", - span_data={"request_data": payload}, - ) - elif ( - payload["call_type"] == "embedding" or payload["call_type"] == "aembedding" - ): - return SpanConfig( - message_template="Embedding Creation with {request_data[model]!r}", - span_data={"request_data": payload}, - ) - elif ( - payload["call_type"] == "image_generation" - or payload["call_type"] == "aimage_generation" - ): - return SpanConfig( - message_template="Image Generation with {request_data[model]!r}", - span_data={"request_data": payload}, - ) - else: - return SpanConfig( - message_template="Litellm Call with {request_data[model]!r}", - span_data={"request_data": payload}, - ) - - async def _async_log_event( - self, - kwargs, - response_obj, - start_time, - end_time, - print_verbose, - level: LogfireLevel, - ): - self.log_event( - kwargs=kwargs, - response_obj=response_obj, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - level=level, - ) - - def log_event( - self, - kwargs, - start_time, - end_time, - print_verbose, - level: LogfireLevel, - response_obj, - ): - try: - import logfire - - verbose_logger.debug( - f"logfire Logging - Enters logging function for model {kwargs}" - ) - - if not response_obj: - response_obj = {} - litellm_params = kwargs.get("litellm_params", {}) - metadata = ( - litellm_params.get("metadata", {}) or {} - ) # if litellm_params['metadata'] == None - messages = kwargs.get("messages") - optional_params = kwargs.get("optional_params", {}) - call_type = kwargs.get("call_type", "completion") - cache_hit = kwargs.get("cache_hit", False) - usage = response_obj.get("usage", {}) - id = response_obj.get("id", str(uuid.uuid4())) - try: - response_time = (end_time - start_time).total_seconds() - except: - response_time = None - - # Clean Metadata before logging - never log raw metadata - # the raw metadata can contain circular references which leads to infinite recursion - # we clean out all extra litellm metadata params before logging - clean_metadata = {} - if isinstance(metadata, dict): - for key, value in metadata.items(): - # clean litellm metadata before logging - if key in [ - "endpoint", - "caching_groups", - "previous_models", - ]: - continue - else: - clean_metadata[key] = value - - # Build the initial payload - payload = { - "id": id, - "call_type": call_type, - "cache_hit": cache_hit, - "startTime": start_time, - "endTime": end_time, - "responseTime (seconds)": response_time, - "model": kwargs.get("model", ""), - "user": kwargs.get("user", ""), - "modelParameters": optional_params, - "spend": kwargs.get("response_cost", 0), - "messages": messages, - "response": response_obj, - "usage": usage, - "metadata": clean_metadata, - } - logfire_openai = logfire.with_settings(custom_scope_suffix="openai") - message_template, span_data = self._get_span_config(payload) - if level == LogfireLevel.INFO: - logfire_openai.info( - message_template, - **span_data, - ) - elif level == LogfireLevel.ERROR: - logfire_openai.error( - message_template, - **span_data, - _exc_info=True, - ) - print_verbose(f"\ndd Logger - Logging payload = {payload}") - - print_verbose( - f"Logfire Layer Logging - final response object: {response_obj}" - ) - except Exception as e: - traceback.print_exc() - verbose_logger.debug( - f"Logfire Layer Error - {str(e)}\n{traceback.format_exc()}" - ) - pass diff --git a/litellm/tests/test_logfire.py b/litellm/tests/test_logfire.py deleted file mode 100644 index da1cb7bde864..000000000000 --- a/litellm/tests/test_logfire.py +++ /dev/null @@ -1,117 +0,0 @@ -import sys -import os -import json -import time - -import logfire -import litellm -import pytest -from logfire.testing import TestExporter, SimpleSpanProcessor - -sys.path.insert(0, os.path.abspath("../..")) - -# Testing scenarios for logfire logging: -# 1. Test logfire logging for completion -# 2. Test logfire logging for acompletion -# 3. Test logfire logging for completion while streaming is enabled -# 4. Test logfire logging for completion while streaming is enabled - - -@pytest.mark.parametrize("stream", [False, True]) -def test_completion_logfire_logging(stream): - litellm.success_callback = ["logfire"] - litellm.set_verbose = True - - exporter = TestExporter() - logfire.configure( - send_to_logfire=False, - console=False, - processors=[SimpleSpanProcessor(exporter)], - collect_system_metrics=False, - ) - messages = [{"role": "user", "content": "what llm are u"}] - temperature = 0.3 - max_tokens = 10 - response = litellm.completion( - model="gpt-3.5-turbo", - messages=messages, - max_tokens=max_tokens, - temperature=temperature, - stream=stream, - ) - print(response) - - if stream: - for chunk in response: - print(chunk) - - time.sleep(5) - exported_spans = exporter.exported_spans_as_dict() - - assert len(exported_spans) == 1 - assert ( - exported_spans[0]["attributes"]["logfire.msg"] - == "Chat Completion with 'gpt-3.5-turbo'" - ) - - request_data = json.loads(exported_spans[0]["attributes"]["request_data"]) - - assert request_data["model"] == "gpt-3.5-turbo" - assert request_data["messages"] == messages - - assert "completion_tokens" in request_data["usage"] - assert "prompt_tokens" in request_data["usage"] - assert "total_tokens" in request_data["usage"] - assert request_data["response"]["choices"][0]["message"]["content"] - assert request_data["modelParameters"]["max_tokens"] == max_tokens - assert request_data["modelParameters"]["temperature"] == temperature - - -@pytest.mark.asyncio -@pytest.mark.parametrize("stream", [False, True]) -async def test_acompletion_logfire_logging(stream): - litellm.success_callback = ["logfire"] - litellm.set_verbose = True - - exporter = TestExporter() - logfire.configure( - send_to_logfire=False, - console=False, - processors=[SimpleSpanProcessor(exporter)], - collect_system_metrics=False, - ) - messages = [{"role": "user", "content": "what llm are u"}] - temperature = 0.3 - max_tokens = 10 - response = await litellm.acompletion( - model="gpt-3.5-turbo", - messages=messages, - max_tokens=max_tokens, - temperature=temperature, - ) - print(response) - if stream: - for chunk in response: - print(chunk) - - time.sleep(5) - exported_spans = exporter.exported_spans_as_dict() - print("exported_spans", exported_spans) - - assert len(exported_spans) == 1 - assert ( - exported_spans[0]["attributes"]["logfire.msg"] - == "Chat Completion with 'gpt-3.5-turbo'" - ) - - request_data = json.loads(exported_spans[0]["attributes"]["request_data"]) - - assert request_data["model"] == "gpt-3.5-turbo" - assert request_data["messages"] == messages - - assert "completion_tokens" in request_data["usage"] - assert "prompt_tokens" in request_data["usage"] - assert "total_tokens" in request_data["usage"] - assert request_data["response"]["choices"][0]["message"]["content"] - assert request_data["modelParameters"]["max_tokens"] == max_tokens - assert request_data["modelParameters"]["temperature"] == temperature diff --git a/litellm/utils.py b/litellm/utils.py index c421ab64b54d..00492a7d995a 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -6,6 +6,7 @@ # +-----------------------------------------------+ # # Thank you users! We ❤️ you! - Krrish & Ishaan + import sys, re, binascii, struct import litellm import dotenv, json, traceback, threading, base64, ast @@ -65,7 +66,6 @@ from .integrations.lunary import LunaryLogger from .integrations.prompt_layer import PromptLayerLogger from .integrations.langsmith import LangsmithLogger -from .integrations.logfire_logger import LogfireLogger, LogfireLevel from .integrations.weights_biases import WeightsBiasesLogger from .integrations.custom_logger import CustomLogger from .integrations.langfuse import LangFuseLogger @@ -137,7 +137,6 @@ athinaLogger = None promptLayerLogger = None langsmithLogger = None -logfireLogger = None weightsBiasesLogger = None customLogger = None langFuseLogger = None @@ -1091,7 +1090,7 @@ class CallTypes(Enum): # Logging function -> log the exact model details + what's being sent | Non-BlockingP class Logging: - global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger + global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, capture_exception, add_breadcrumb, lunaryLogger custom_pricing: bool = False @@ -1633,7 +1632,7 @@ def success_handler( # this only logs streaming once, complete_streaming_response exists i.e when stream ends if self.stream: if "complete_streaming_response" not in kwargs: - continue + return else: print_verbose("reaches supabase for streaming logging!") result = kwargs["complete_streaming_response"] @@ -1667,7 +1666,7 @@ def success_handler( print_verbose("reaches langsmith for logging!") if self.stream: if "complete_streaming_response" not in kwargs: - continue + break else: print_verbose( "reaches langsmith for streaming logging!" @@ -1680,33 +1679,6 @@ def success_handler( end_time=end_time, print_verbose=print_verbose, ) - if callback == "logfire": - global logfireLogger - verbose_logger.debug("reaches logfire for success logging!") - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if self.stream: - if "complete_streaming_response" not in kwargs: - continue - else: - print_verbose("reaches logfire for streaming logging!") - result = kwargs["complete_streaming_response"] - - logfireLogger.log_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - level=LogfireLevel.INFO.value, - ) - if callback == "lunary": print_verbose("reaches lunary for logging!") model = self.model @@ -1723,7 +1695,7 @@ def success_handler( # this only logs streaming once, complete_streaming_response exists i.e when stream ends if self.stream: if "complete_streaming_response" not in kwargs: - continue + break else: result = kwargs["complete_streaming_response"] @@ -1868,7 +1840,7 @@ def success_handler( f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" ) if complete_streaming_response is None: - continue + break else: print_verbose("reaches langfuse for streaming logging!") result = kwargs["complete_streaming_response"] @@ -1897,7 +1869,7 @@ def success_handler( f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" ) if complete_streaming_response is None: - continue + break else: print_verbose( "reaches clickhouse for streaming logging!" @@ -1926,7 +1898,7 @@ def success_handler( f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" ) if complete_streaming_response is None: - continue + break else: print_verbose( "reaches greenscale for streaming logging!" @@ -2397,9 +2369,7 @@ def _failure_handler_helper_fn( def failure_handler( self, exception, traceback_exception, start_time=None, end_time=None ): - print_verbose( - f"Logging Details LiteLLM-Failure Call: {litellm.failure_callback}" - ) + print_verbose(f"Logging Details LiteLLM-Failure Call") try: start_time, end_time = self._failure_handler_helper_fn( exception=exception, @@ -2454,7 +2424,7 @@ def failure_handler( call_type=self.call_type, stream=self.stream, ) - if callback == "lunary": + elif callback == "lunary": print_verbose("reaches lunary for logging error!") model = self.model @@ -2479,7 +2449,7 @@ def failure_handler( end_time=end_time, print_verbose=print_verbose, ) - if callback == "sentry": + elif callback == "sentry": print_verbose("sending exception to sentry") if capture_exception: capture_exception(exception) @@ -2487,7 +2457,7 @@ def failure_handler( print_verbose( f"capture exception not initialized: {capture_exception}" ) - if callable(callback): # custom logger functions + elif callable(callback): # custom logger functions customLogger.log_event( kwargs=self.model_call_details, response_obj=result, @@ -2496,7 +2466,7 @@ def failure_handler( print_verbose=print_verbose, callback_func=callback, ) - if ( + elif ( isinstance(callback, CustomLogger) and self.model_call_details.get("litellm_params", {}).get( "acompletion", False @@ -2513,7 +2483,7 @@ def failure_handler( response_obj=result, kwargs=self.model_call_details, ) - if callback == "langfuse": + elif callback == "langfuse": global langFuseLogger verbose_logger.debug("reaches langfuse for logging failure") kwargs = {} @@ -2549,7 +2519,7 @@ def failure_handler( level="ERROR", kwargs=self.model_call_details, ) - if callback == "prometheus": + elif callback == "prometheus": global prometheusLogger verbose_logger.debug("reaches prometheus for success logging!") kwargs = {} @@ -2567,26 +2537,6 @@ def failure_handler( user_id=kwargs.get("user", None), print_verbose=print_verbose, ) - - if callback == "logfire": - global logfireLogger - verbose_logger.debug("reaches logfire for failure logging!") - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - kwargs["exception"] = exception - - logfireLogger.log_event( - kwargs=kwargs, - response_obj=result, - start_time=start_time, - end_time=end_time, - level=LogfireLevel.ERROR.value, - print_verbose=print_verbose, - ) except Exception as e: print_verbose( f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {str(e)}" @@ -3336,7 +3286,6 @@ def wrapper(*args, **kwargs): return original_function(*args, **kwargs) traceback_exception = traceback.format_exc() end_time = datetime.datetime.now() - # LOG FAILURE - handle streaming failure logging in the _next_ object, remove `handle_failure` once it's deprecated if logging_obj: logging_obj.failure_handler( @@ -7345,7 +7294,7 @@ def validate_environment(model: Optional[str] = None) -> dict: def set_callbacks(callback_list, function_id=None): - global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger, greenscaleLogger, openMeterLogger + global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger, greenscaleLogger, openMeterLogger try: for callback in callback_list: @@ -7427,8 +7376,6 @@ def set_callbacks(callback_list, function_id=None): weightsBiasesLogger = WeightsBiasesLogger() elif callback == "langsmith": langsmithLogger = LangsmithLogger() - elif callback == "logfire": - logfireLogger = LogfireLogger() elif callback == "aispend": aispendLogger = AISpendLogger() elif callback == "berrispend":