diff --git a/poetry.lock b/poetry.lock index efe5954..c3c7763 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -1568,18 +1568,18 @@ files = [ [[package]] name = "langchain" -version = "0.2.14" +version = "0.2.16" description = "Building applications with LLMs through composability" optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langchain-0.2.14-py3-none-any.whl", hash = "sha256:eed76194ee7d9c081037a3df7868d4de90e0410b51fc1ca933a8379e464bf40c"}, - {file = "langchain-0.2.14.tar.gz", hash = "sha256:dc2aa5a58882054fb5d043c39ab8332ebd055f88f17839da68e1c7fd0a4fefe2"}, + {file = "langchain-0.2.16-py3-none-any.whl", hash = "sha256:8f59ee8b45f268df4b924ea3b9c63e49286efa756d16b3f6a9de5c6e502c36e1"}, + {file = "langchain-0.2.16.tar.gz", hash = "sha256:ffb426a76a703b73ac69abad77cd16eaf03dda76b42cff55572f592d74944166"}, ] [package.dependencies] aiohttp = ">=3.8.3,<4.0.0" -langchain-core = ">=0.2.32,<0.3.0" +langchain-core = ">=0.2.38,<0.3.0" langchain-text-splitters = ">=0.2.0,<0.3.0" langsmith = ">=0.1.17,<0.2.0" numpy = [ @@ -1610,21 +1610,21 @@ langchain-core = ">=0.2.26,<0.3.0" [[package]] name = "langchain-community" -version = "0.2.12" +version = "0.2.17" description = "Community contributed LangChain integrations." optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langchain_community-0.2.12-py3-none-any.whl", hash = "sha256:50e74473dd2309bdef561760afbbf0c5ea17ed91fc4dfa0d52279dd16d6d34e0"}, - {file = "langchain_community-0.2.12.tar.gz", hash = "sha256:d671cfc6a4f3b65f49a2e59ab420d0164f109d0a56fc4b4996518205c63b8c7e"}, + {file = "langchain_community-0.2.17-py3-none-any.whl", hash = "sha256:d07c31b641e425fb8c3e7148ad6a62e1b54a9adac6e1173021a7dd3148266063"}, + {file = "langchain_community-0.2.17.tar.gz", hash = "sha256:b0745c1fcf1bd532ed4388f90b47139d6a6c6ba48a87aa68aa32d4d6bb97259d"}, ] [package.dependencies] aiohttp = ">=3.8.3,<4.0.0" dataclasses-json = ">=0.5.7,<0.7" -langchain = ">=0.2.13,<0.3.0" -langchain-core = ">=0.2.30,<0.3.0" -langsmith = ">=0.1.0,<0.2.0" +langchain = ">=0.2.16,<0.3.0" +langchain-core = ">=0.2.39,<0.3.0" +langsmith = ">=0.1.112,<0.2.0" numpy = [ {version = ">=1,<2", markers = "python_version < \"3.12\""}, {version = ">=1.26.0,<2.0.0", markers = "python_version >= \"3.12\""}, @@ -1636,18 +1636,18 @@ tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0" [[package]] name = "langchain-core" -version = "0.2.33" +version = "0.2.41" description = "Building applications with LLMs through composability" optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langchain_core-0.2.33-py3-none-any.whl", hash = "sha256:c8de411336c13fa440b7a52895bfd1c064f04d315344855962988483902cc532"}, - {file = "langchain_core-0.2.33.tar.gz", hash = "sha256:dd2659e0a560fc987b210107bf989aa14a6f4b67dd214c13a2c9669036cda975"}, + {file = "langchain_core-0.2.41-py3-none-any.whl", hash = "sha256:3278fda5ba9a05defae8bb19f1226032add6aab21917db7b3bc74e750e263e84"}, + {file = "langchain_core-0.2.41.tar.gz", hash = "sha256:bc12032c5a298d85be754ccb129bc13ea21ccb1d6e22f8d7ba18b8da64315bb5"}, ] [package.dependencies] jsonpatch = ">=1.33,<2.0" -langsmith = ">=0.1.75,<0.2.0" +langsmith = ">=0.1.112,<0.2.0" packaging = ">=23.2,<25" pydantic = [ {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""}, @@ -1689,22 +1689,24 @@ langchain-core = ">=0.2.10,<0.3.0" [[package]] name = "langsmith" -version = "0.1.99" +version = "0.1.132" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langsmith-0.1.99-py3-none-any.whl", hash = "sha256:ef8d1d74a2674c514aa429b0171a9fbb661207dc3835142cca0e8f1bf97b26b0"}, - {file = "langsmith-0.1.99.tar.gz", hash = "sha256:b5c6a1f158abda61600a4a445081ee848b4a28b758d91f2793dc02aeffafcaf1"}, + {file = "langsmith-0.1.132-py3-none-any.whl", hash = "sha256:2320894203675c1c292b818cbecf68b69e47a9f7814d4e950237d1faaafd5dee"}, + {file = "langsmith-0.1.132.tar.gz", hash = "sha256:007b8fac469138abdba89db931900a26c5d316640e27ff4660d28c92a766aae1"}, ] [package.dependencies] +httpx = ">=0.23.0,<1" orjson = ">=3.9.14,<4.0.0" pydantic = [ {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""}, {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""}, ] requests = ">=2,<3" +requests-toolbelt = ">=1.0.0,<2.0.0" [[package]] name = "markdown" @@ -3213,6 +3215,20 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "requests-toolbelt" +version = "1.0.0" +description = "A utility belt for advanced users of python-requests" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"}, + {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"}, +] + +[package.dependencies] +requests = ">=2.0.1,<3.0.0" + [[package]] name = "safetensors" version = "0.4.4" diff --git a/promptolution/llms/api_llm.py b/promptolution/llms/api_llm.py index a3dcdc7..d169156 100644 --- a/promptolution/llms/api_llm.py +++ b/promptolution/llms/api_llm.py @@ -8,12 +8,10 @@ import openai import requests from langchain_anthropic import ChatAnthropic -from langchain_community.chat_models.deepinfra import ChatDeepInfraException +from langchain_community.chat_models.deepinfra import ChatDeepInfra, ChatDeepInfraException from langchain_core.messages import HumanMessage from langchain_openai import ChatOpenAI -from promptolution.llms.deepinfra import ChatDeepInfra - logger = Logger(__name__) logger.setLevel(INFO) diff --git a/promptolution/llms/deepinfra.py b/promptolution/llms/deepinfra.py deleted file mode 100644 index d91603c..0000000 --- a/promptolution/llms/deepinfra.py +++ /dev/null @@ -1,311 +0,0 @@ -"""DeepInfra API module for language models.""" - -from __future__ import annotations - -from typing import Any, AsyncIterator, Callable, Dict, Iterator, List, Mapping, Optional, Sequence, Tuple, Type, Union - -from langchain_community.chat_models.deepinfra import ( - ChatDeepInfraException, - _convert_dict_to_message, - _convert_message_to_dict, - _create_retry_decorator, - _handle_sse_line, - _parse_stream, - _parse_stream_async, -) -from langchain_community.utilities.requests import Requests -from langchain_core.callbacks.manager import AsyncCallbackManagerForLLMRun, CallbackManagerForLLMRun -from langchain_core.language_models import LanguageModelInput -from langchain_core.language_models.chat_models import BaseChatModel, agenerate_from_stream, generate_from_stream -from langchain_core.messages import BaseMessage -from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult -from langchain_core.pydantic_v1 import BaseModel, Field, root_validator -from langchain_core.runnables import Runnable -from langchain_core.tools import BaseTool -from langchain_core.utils import get_from_dict_or_env -from langchain_core.utils.function_calling import convert_to_openai_tool - - -class ChatDeepInfra(BaseChatModel): - """A chat model that uses the DeepInfra API.""" - - # client: Any #: :meta private: - model_name: str = Field(alias="model") - """The model name to use for the chat model.""" - deepinfra_api_token: Optional[str] = None - request_timeout: Optional[float] = Field(default=None, alias="timeout") - temperature: Optional[float] = 1 - model_kwargs: Dict[str, Any] = Field(default_factory=dict) - """Run inference with this temperature. Must be in the closed - interval [0.0, 1.0].""" - top_p: Optional[float] = None - """Decode using nucleus sampling: consider the smallest set of tokens whose - probability sum is at least top_p. Must be in the closed interval [0.0, 1.0].""" - top_k: Optional[int] = None - """Decode using top-k sampling: consider the set of top_k most probable tokens. - Must be positive.""" - n: int = 1 - """Number of chat completions to generate for each prompt. Note that the API may - not return the full n completions if duplicates are generated.""" - max_tokens: int = 256 - streaming: bool = False - max_retries: int = 1 - - def __init__(self, model_name: str, **kwargs: Any): - """Initialize the DeepInfra chat model.""" - super().__init__(model=model_name, **kwargs) - - @property - def _default_params(self) -> Dict[str, Any]: - """Get the default parameters for calling OpenAI API.""" - return { - "model": self.model_name, - "max_tokens": self.max_tokens, - "stream": self.streaming, - "n": self.n, - "temperature": self.temperature, - "request_timeout": self.request_timeout, - **self.model_kwargs, - } - - @property - def _client_params(self) -> Dict[str, Any]: - """Get the parameters used for the openai client.""" - return {**self._default_params} - - def completion_with_retry(self, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any) -> Any: - """Use tenacity to retry the completion call.""" - retry_decorator = _create_retry_decorator(self, run_manager=run_manager) - - @retry_decorator - def _completion_with_retry(**kwargs: Any) -> Any: - try: - request_timeout = kwargs.pop("request_timeout") - request = Requests(headers=self._headers()) - response = request.post(url=self._url(), data=self._body(kwargs), timeout=request_timeout) - self._handle_status(response.status_code, response.text) - return response - except Exception as e: - # import pdb; pdb.set_trace() - print("EX", e) # noqa: T201 - raise - - return _completion_with_retry(**kwargs) - - async def acompletion_with_retry( - self, - run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, - **kwargs: Any, - ) -> Any: - """Use tenacity to retry the async completion call.""" - retry_decorator = _create_retry_decorator(self, run_manager=run_manager) - - @retry_decorator - async def _completion_with_retry(**kwargs: Any) -> Any: - try: - request_timeout = kwargs.pop("request_timeout") - request = Requests(headers=self._headers()) - async with request.apost(url=self._url(), data=self._body(kwargs), timeout=request_timeout) as response: - self._handle_status(response.status, response.text) - return await response.json() - except Exception as e: - print("EX", e) # noqa: T201 - raise - - return await _completion_with_retry(**kwargs) - - @root_validator(pre=True) - def init_defaults(cls, values: Dict) -> Dict: - """Validate api key, python package exists, temperature, top_p, and top_k.""" - # For compatibility with LiteLLM - api_key = get_from_dict_or_env( - values, - "deepinfra_api_key", - "DEEPINFRA_API_KEY", - default="", - ) - values["deepinfra_api_token"] = get_from_dict_or_env( - values, - "deepinfra_api_token", - "DEEPINFRA_API_TOKEN", - default=api_key, - ) - # set model id - # values["model_name"] = get_from_dict_or_env( - # values, - # "model_name", - # "DEEPINFRA_MODEL_NAME", - # default="", - # ) - return values - - @root_validator(pre=False, skip_on_failure=True) - def validate_environment(cls, values: Dict) -> Dict: - """Validate the environment variables.""" - if values["temperature"] is not None and not 0 <= values["temperature"] <= 1: - raise ValueError("temperature must be in the range [0.0, 1.0]") - - if values["top_p"] is not None and not 0 <= values["top_p"] <= 1: - raise ValueError("top_p must be in the range [0.0, 1.0]") - - if values["top_k"] is not None and values["top_k"] <= 0: - raise ValueError("top_k must be positive") - - return values - - def _generate( - self, - messages: List[BaseMessage], - stop: Optional[List[str]] = None, - run_manager: Optional[CallbackManagerForLLMRun] = None, - stream: Optional[bool] = None, - **kwargs: Any, - ) -> ChatResult: - should_stream = stream if stream is not None else self.streaming - if should_stream: - stream_iter = self._stream(messages, stop=stop, run_manager=run_manager, **kwargs) - return generate_from_stream(stream_iter) - - message_dicts, params = self._create_message_dicts(messages, stop) - params = {**params, **kwargs} - response = self.completion_with_retry(messages=message_dicts, run_manager=run_manager, **params) - return self._create_chat_result(response.json()) - - def _create_chat_result(self, response: Mapping[str, Any]) -> ChatResult: - generations = [] - for res in response["choices"]: - message = _convert_dict_to_message(res["message"]) - gen = ChatGeneration( - message=message, - generation_info=dict(finish_reason=res.get("finish_reason")), - ) - generations.append(gen) - token_usage = response.get("usage", {}) - llm_output = {"token_usage": token_usage, "model": self.model_name} - res = ChatResult(generations=generations, llm_output=llm_output) - return res - - def _create_message_dicts( - self, messages: List[BaseMessage], stop: Optional[List[str]] - ) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]: - params = self._client_params - if stop is not None: - if "stop" in params: - raise ValueError("`stop` found in both the input and default params.") - params["stop"] = stop - message_dicts = [_convert_message_to_dict(m) for m in messages] - return message_dicts, params - - def _stream( - self, - messages: List[BaseMessage], - stop: Optional[List[str]] = None, - run_manager: Optional[CallbackManagerForLLMRun] = None, - **kwargs: Any, - ) -> Iterator[ChatGenerationChunk]: - message_dicts, params = self._create_message_dicts(messages, stop) - params = {**params, **kwargs, "stream": True} - - response = self.completion_with_retry(messages=message_dicts, run_manager=run_manager, **params) - for line in _parse_stream(response.iter_lines()): - chunk = _handle_sse_line(line) - if chunk: - cg_chunk = ChatGenerationChunk(message=chunk, generation_info=None) - if run_manager: - run_manager.on_llm_new_token(str(chunk.content), chunk=cg_chunk) - yield cg_chunk - - async def _astream( - self, - messages: List[BaseMessage], - stop: Optional[List[str]] = None, - run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, - **kwargs: Any, - ) -> AsyncIterator[ChatGenerationChunk]: - message_dicts, params = self._create_message_dicts(messages, stop) - params = {"messages": message_dicts, "stream": True, **params, **kwargs} - - request_timeout = params.pop("request_timeout") - request = Requests(headers=self._headers()) - async with request.apost(url=self._url(), data=self._body(params), timeout=request_timeout) as response: - async for line in _parse_stream_async(response.content): - chunk = _handle_sse_line(line) - if chunk: - cg_chunk = ChatGenerationChunk(message=chunk, generation_info=None) - if run_manager: - await run_manager.on_llm_new_token(str(chunk.content), chunk=cg_chunk) - yield cg_chunk - - async def _agenerate( - self, - messages: List[BaseMessage], - stop: Optional[List[str]] = None, - run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, - stream: Optional[bool] = None, - **kwargs: Any, - ) -> ChatResult: - should_stream = stream if stream is not None else self.streaming - if should_stream: - stream_iter = self._astream(messages, stop=stop, run_manager=run_manager, **kwargs) - return await agenerate_from_stream(stream_iter) - - message_dicts, params = self._create_message_dicts(messages, stop) - params = {"messages": message_dicts, **params, **kwargs} - - res = await self.acompletion_with_retry(run_manager=run_manager, **params) - return self._create_chat_result(res) - - @property - def _identifying_params(self) -> Dict[str, Any]: - """Get the identifying parameters.""" - return { - "model": self.model_name, - "temperature": self.temperature, - "top_p": self.top_p, - "top_k": self.top_k, - "n": self.n, - } - - @property - def _llm_type(self) -> str: - return "deepinfra-chat" - - def _handle_status(self, code: int, text: Any) -> None: - if code >= 500: - raise ChatDeepInfraException(f"DeepInfra Server: Error {code}") - elif code >= 400: - raise ValueError(f"DeepInfra received an invalid payload: {text}") - elif code != 200: - raise Exception(f"DeepInfra returned an unexpected response with status " f"{code}: {text}") - - def _url(self) -> str: - return "https://stage.api.deepinfra.com/v1/openai/chat/completions" - - def _headers(self) -> Dict: - return { - "Authorization": f"bearer {self.deepinfra_api_token}", - "Content-Type": "application/json", - } - - def _body(self, kwargs: Any) -> Dict: - return kwargs - - def bind_tools( - self, - tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]], - **kwargs: Any, - ) -> Runnable[LanguageModelInput, BaseMessage]: - """Bind tool-like objects to this chat model. - - Assumes model is compatible with OpenAI tool-calling API. - - Args: - tools: A list of tool definitions to bind to this chat model. - Can be a dictionary, pydantic model, callable, or BaseTool. Pydantic - models, callables, and BaseTools will be automatically converted to - their schema dictionary representation. - **kwargs: Any additional parameters to pass to the - :class:`~langchain.runnable.Runnable` constructor. - """ - formatted_tools = [convert_to_openai_tool(tool) for tool in tools] - return super().bind(tools=formatted_tools, **kwargs)