diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 923426d27..e319b3bb7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -23,8 +23,8 @@ jobs: ragas: ${{ steps.filter.outputs.ragas }} docs: ${{ steps.filter.outputs.docs }} steps: - - uses: actions/checkout@v3 - - uses: dorny/paths-filter@v2 + - uses: actions/checkout@v4 + - uses: dorny/paths-filter@v3 id: filter with: base: "main" @@ -59,7 +59,7 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 # fetch all tags and branches @@ -108,7 +108,7 @@ jobs: if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.ragas == 'true') || github.event_name == 'push' }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup python uses: actions/setup-python@v4 diff --git a/src/ragas/_analytics.py b/src/ragas/_analytics.py index e170690f3..5346e77e5 100644 --- a/src/ragas/_analytics.py +++ b/src/ragas/_analytics.py @@ -87,12 +87,14 @@ class EvaluationEvent(BaseEvent): metrics: t.List[str] evaluation_mode: str num_rows: int + language: str class TesetGenerationEvent(BaseEvent): evolution_names: t.List[str] evolution_percentages: t.List[float] num_rows: int + language: str @silent diff --git a/src/ragas/evaluation.py b/src/ragas/evaluation.py index 11e200d97..fe238e4f7 100644 --- a/src/ragas/evaluation.py +++ b/src/ragas/evaluation.py @@ -18,6 +18,7 @@ from ragas.metrics.base import Metric, MetricWithEmbeddings, MetricWithLLM from ragas.metrics.critique import AspectCritique from ragas.run_config import RunConfig +from ragas.utils import get_feature_language # from ragas.metrics.critique import AspectCritique from ragas.validation import ( @@ -249,12 +250,15 @@ def evaluate( # log the evaluation event metrics_names = [m.name for m in metrics] + metric_lang = [get_feature_language(m) for m in metrics] + metric_lang = np.unique([m for m in metric_lang if m is not None]) track( EvaluationEvent( event_type="evaluation", metrics=metrics_names, evaluation_mode="", num_rows=dataset.shape[0], + language=metric_lang[0] if len(metric_lang) > 0 else "", ) ) return result diff --git a/src/ragas/testset/generator.py b/src/ragas/testset/generator.py index 922f68ce2..b7169908a 100644 --- a/src/ragas/testset/generator.py +++ b/src/ragas/testset/generator.py @@ -28,7 +28,7 @@ ) from ragas.testset.extractor import KeyphraseExtractor from ragas.testset.filters import EvolutionFilter, NodeFilter, QuestionFilter -from ragas.utils import check_if_sum_is_close, is_nan +from ragas.utils import check_if_sum_is_close, get_feature_language, is_nan if t.TYPE_CHECKING: from langchain_core.documents import Document as LCDocument @@ -251,12 +251,15 @@ def generate( # due to failed evolutions. MaxRetriesExceeded is a common reason test_data_rows = [r for r in test_data_rows if not is_nan(r)] test_dataset = TestDataset(test_data=test_data_rows) + evol_lang = [get_feature_language(e) for e in distributions] + evol_lang = [e for e in evol_lang if e is not None] track( TesetGenerationEvent( event_type="testset_generation", evolution_names=[e.__class__.__name__.lower() for e in distributions], evolution_percentages=[distributions[e] for e in distributions], num_rows=len(test_dataset.test_data), + language=evol_lang[0] if len(evol_lang) > 0 else "", ) ) diff --git a/src/ragas/utils.py b/src/ragas/utils.py index e2358e8cb..be8cea269 100644 --- a/src/ragas/utils.py +++ b/src/ragas/utils.py @@ -7,6 +7,10 @@ import numpy as np +if t.TYPE_CHECKING: + from ragas.metrics.base import Metric + from ragas.testset.evolutions import Evolution + DEBUG_ENV_VAR = "RAGAS_DEBUG" @@ -57,3 +61,14 @@ def is_nan(x): return np.isnan(x) except TypeError: return False + + +def get_feature_language(feature: t.Union[Metric, Evolution]) -> t.Optional[str]: + from ragas.llms.prompt import Prompt + + languags = [ + value.language + for _, value in vars(feature).items() + if isinstance(value, Prompt) + ] + return languags[0] if len(languags) > 0 else None diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index cec727cb5..7dbeec696 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -21,13 +21,18 @@ def test_evaluation_event(): from ragas._analytics import EvaluationEvent evaluation_event = EvaluationEvent( - event_type="evaluation", metrics=["harmfulness"], num_rows=1, evaluation_mode="" + event_type="evaluation", + metrics=["harmfulness"], + num_rows=1, + evaluation_mode="", + language="english", ) payload = dict(evaluation_event) assert isinstance(payload.get("user_id"), str) assert isinstance(payload.get("evaluation_mode"), str) assert isinstance(payload.get("metrics"), list) + assert isinstance(payload.get("language"), str) def setup_user_id_filepath(tmp_path, monkeypatch): @@ -101,6 +106,7 @@ def test_testset_generation_tracking(monkeypatch): evolution_names=[e.__class__.__name__.lower() for e in distributions], evolution_percentages=[distributions[e] for e in distributions], num_rows=10, + language="english", ) assert dict(testset_event_payload)["evolution_names"] == [