diff --git a/.gitignore b/.gitignore index 9c50c6785..381999baf 100644 --- a/.gitignore +++ b/.gitignore @@ -168,4 +168,6 @@ cython_debug/ experiments/ **/fil-result/ src/ragas/_version.py -.vscode \ No newline at end of file +.vscode +.envrc +uv.lock diff --git a/pyproject.toml b/pyproject.toml index 55fde7722..1b08a27eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,6 @@ [project] name = "ragas" +requires-python = ">=3.9" dependencies = [ "numpy", "datasets", @@ -40,6 +41,32 @@ docs = [ "mkdocs-git-committers-plugin-2", "mkdocs-git-revision-date-localized-plugin", ] +dev = [ + "rich", + "ruff", + "isort", + "black[jupyter]", + "pyright", + "llama_index", + "notebook", + "sphinx-autobuild", + "sentence-transformers", + "transformers", + "fastembed", + "graphene", + "rouge_score", + "nltk", + "rapidfuzz", + "pandas", + "datacompy", +] +test = [ + "pytest", + "pytest-xdist[psutil]", + "pytest-asyncio", + "llama_index", + "nbmake", +] [tool.setuptools] package-dir = {"" = "src"} diff --git a/src/ragas/dataset_schema.py b/src/ragas/dataset_schema.py index 200f3792c..32bb8cffc 100644 --- a/src/ragas/dataset_schema.py +++ b/src/ragas/dataset_schema.py @@ -13,8 +13,10 @@ from ragas.callbacks import ChainRunEncoder, parse_run_traces from ragas.cost import CostCallbackHandler +from ragas.exceptions import UploadException from ragas.messages import AIMessage, HumanMessage, ToolCall, ToolMessage -from ragas.utils import RAGAS_API_URL, safe_nanmean +from ragas.sdk import RAGAS_API_URL, RAGAS_APP_URL, upload_packet +from ragas.utils import safe_nanmean if t.TYPE_CHECKING: from pathlib import Path @@ -499,8 +501,6 @@ def total_cost( def upload(self, base_url: str = RAGAS_API_URL, verbose: bool = True) -> str: from datetime import datetime, timezone - import requests - timestamp = datetime.now(timezone.utc).isoformat() root_trace = [ trace for trace in self.ragas_traces.values() if trace.parent_run_id is None @@ -513,25 +513,33 @@ def upload(self, base_url: str = RAGAS_API_URL, verbose: bool = True) -> str: }, cls=ChainRunEncoder, ) - - response = requests.post( - f"{base_url}/alignment/evaluation", - data=packet, - headers={"Content-Type": "application/json"}, + response = upload_packet( + path="/alignment/evaluation", + data_json_string=packet, + base_url=base_url, ) - if response.status_code != 200: - raise Exception(f"Failed to upload results: {response.text}") - + # check status codes evaluation_endpoint = ( - f"https://app.ragas.io/alignment/evaluation/{root_trace.run_id}" + f"{RAGAS_APP_URL}/alignment/evaluation/{root_trace.run_id}" ) + if response.status_code == 409: + # this evalution already exists + if verbose: + print(f"Evaluation run already exists. View at {evaluation_endpoint}") + return evaluation_endpoint + elif response.status_code != 200: + # any other error + raise UploadException( + status_code=response.status_code, + message=f"Failed to upload results: {response.text}", + ) + if verbose: print(f"Evaluation results uploaded! View at {evaluation_endpoint}") return evaluation_endpoint - class PromptAnnotation(BaseModel): prompt_input: t.Dict[str, t.Any] prompt_output: t.Dict[str, t.Any] @@ -554,7 +562,6 @@ def __getitem__(self, key): class MetricAnnotation(BaseModel): - root: t.Dict[str, t.List[SampleAnnotation]] def __getitem__(self, key): @@ -562,7 +569,6 @@ def __getitem__(self, key): @classmethod def from_json(cls, path, metric_name: t.Optional[str]) -> "MetricAnnotation": - dataset = json.load(open(path)) if metric_name is not None and metric_name not in dataset: raise ValueError(f"Split {metric_name} not found in the dataset.") @@ -604,7 +610,6 @@ def select(self, indices: t.List[int]) -> "SingleMetricAnnotation": @classmethod def from_json(cls, path) -> "SingleMetricAnnotation": - dataset = json.load(open(path)) return cls( @@ -613,7 +618,6 @@ def from_json(cls, path) -> "SingleMetricAnnotation": ) def filter(self, function: t.Optional[t.Callable] = None): - if function is None: function = lambda x: True # noqa: E731 diff --git a/src/ragas/exceptions.py b/src/ragas/exceptions.py index 09782c7a1..a419af953 100644 --- a/src/ragas/exceptions.py +++ b/src/ragas/exceptions.py @@ -39,3 +39,13 @@ class LLMDidNotFinishException(RagasException): def __init__(self): msg = "The LLM generation was not completed. Please increase try increasing the max_tokens and try again." super().__init__(msg) + + +class UploadException(RagasException): + """ + Exception raised when the app fails to upload the results. + """ + + def __init__(self, status_code: int, message: str): + self.status_code = status_code + super().__init__(message) diff --git a/src/ragas/executor.py b/src/ragas/executor.py index ccfa4d889..cd672b5c0 100644 --- a/src/ragas/executor.py +++ b/src/ragas/executor.py @@ -5,13 +5,13 @@ import typing as t from dataclasses import dataclass, field +import nest_asyncio import numpy as np from tqdm.auto import tqdm from ragas.run_config import RunConfig from ragas.utils import batched -import nest_asyncio nest_asyncio.apply() logger = logging.getLogger(__name__) diff --git a/src/ragas/sdk.py b/src/ragas/sdk.py new file mode 100644 index 000000000..7c257367a --- /dev/null +++ b/src/ragas/sdk.py @@ -0,0 +1,44 @@ +""" +SDK module for interacting with the Ragas API service. +""" + +import os +from functools import lru_cache + +import requests + +from ragas._version import __version__ +from ragas.exceptions import UploadException + +# endpoint for uploading results +RAGAS_API_URL = "https://api.ragas.io" +RAGAS_APP_URL = "https://app.ragas.io" +RAGAS_API_SOURCE = "ragas_py" + + +@lru_cache(maxsize=1) +def get_app_token() -> str: + app_token = os.environ.get("RAGAS_APP_TOKEN") + if app_token is None: + raise ValueError("RAGAS_APP_TOKEN is not set") + return app_token + + +def upload_packet(path: str, data_json_string: str, base_url: str = RAGAS_API_URL): + app_token = get_app_token() + response = requests.post( + f"{base_url}/api/v1{path}", + data=data_json_string, + headers={ + "Content-Type": "application/json", + "x-app-token": app_token, + "x-source": RAGAS_API_SOURCE, + "x-app-version": __version__, + }, + ) + if response.status_code == 403: + raise UploadException( + status_code=response.status_code, + message="AUTHENTICATION_ERROR: The app token is invalid. Please check your RAGAS_APP_TOKEN environment variable.", + ) + return response diff --git a/src/ragas/testset/synthesizers/testset_schema.py b/src/ragas/testset/synthesizers/testset_schema.py index fc39f090d..cca8a29e0 100644 --- a/src/ragas/testset/synthesizers/testset_schema.py +++ b/src/ragas/testset/synthesizers/testset_schema.py @@ -15,7 +15,8 @@ RagasDataset, SingleTurnSample, ) -from ragas.utils import RAGAS_API_URL +from ragas.exceptions import UploadException +from ragas.sdk import RAGAS_API_URL, RAGAS_APP_URL, upload_packet class TestsetSample(BaseSample): @@ -136,16 +137,24 @@ def total_cost( ) def upload(self, base_url: str = RAGAS_API_URL, verbose: bool = True) -> str: - import requests - packet = TestsetPacket(samples_original=self.samples, run_id=self.run_id) - response = requests.post( - f"{base_url}/alignment/testset", json=packet.model_dump() + response = upload_packet( + path="/alignment/testset", + data_json_string=packet.model_dump_json(), + base_url=base_url, ) - if response.status_code != 200: - raise Exception(f"Failed to upload results: {response.text}") - - testset_endpoint = f"https://app.ragas.io/alignment/testset/{packet.run_id}" + testset_endpoint = f"{RAGAS_APP_URL}/alignment/testset/{self.run_id}" + if response.status_code == 409: + # this testset already exists + if verbose: + print(f"Testset already exists. View at {testset_endpoint}") + return testset_endpoint + elif response.status_code != 200: + # any other error + raise UploadException( + status_code=response.status_code, + message=f"Failed to upload results: {response.text}", + ) if verbose: print(f"Testset uploaded! View at {testset_endpoint}") return testset_endpoint diff --git a/src/ragas/utils.py b/src/ragas/utils.py index 62413cbc2..c6577fadf 100644 --- a/src/ragas/utils.py +++ b/src/ragas/utils.py @@ -20,8 +20,6 @@ RAGAS_SUPPORTED_LANGUAGE_CODES = { v.__name__.lower(): k for k, v in LANGUAGE_CODES.items() } -# endpoint for uploading results -RAGAS_API_URL = "https://api.ragas.io" @lru_cache(maxsize=1)