From 55064c2a9423e855757ab9e6532274b9d50e19e0 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Thu, 8 Feb 2024 16:47:29 -0800 Subject: [PATCH 1/7] ensure dict type --- src/ragas/testset/evolutions.py | 7 +++++-- src/ragas/testset/extractor.py | 1 + src/ragas/testset/filters.py | 3 +++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/ragas/testset/evolutions.py b/src/ragas/testset/evolutions.py index 09802a188..90c275e63 100644 --- a/src/ragas/testset/evolutions.py +++ b/src/ragas/testset/evolutions.py @@ -180,9 +180,12 @@ async def generate_datarow( relevent_contexts_result = await json_loader.safe_load( results.generations[0][0].text.strip(), llm=self.generator_llm ) - relevant_context_indices = relevent_contexts_result.get( - "relevant_context", None + relevant_context_indices = ( + relevent_contexts_result.get("relevant_context", None) + if isinstance(relevent_contexts_result, dict) + else None ) + if relevant_context_indices is None: relevant_context = CurrentNodes( root_node=current_nodes.root_node, nodes=current_nodes.nodes diff --git a/src/ragas/testset/extractor.py b/src/ragas/testset/extractor.py index 77c586c2e..09154d8cf 100644 --- a/src/ragas/testset/extractor.py +++ b/src/ragas/testset/extractor.py @@ -50,6 +50,7 @@ async def extract(self, node: Node, is_async: bool = True) -> t.List[str]: keyphrases = await json_loader.safe_load( results.generations[0][0].text.strip(), llm=self.llm, is_async=is_async ) + keyphrases = keyphrases if isinstance(keyphrases, dict) else {} logger.debug("keyphrases: %s", keyphrases) return keyphrases.get("keyphrases", []) diff --git a/src/ragas/testset/filters.py b/src/ragas/testset/filters.py index 0eb06b77b..9d017b7e5 100644 --- a/src/ragas/testset/filters.py +++ b/src/ragas/testset/filters.py @@ -54,6 +54,7 @@ async def filter(self, node: Node) -> t.Dict: results = await self.llm.generate(prompt=prompt) output = results.generations[0][0].text.strip() score = await json_loader.safe_load(output, llm=self.llm) + score = score if isinstance(score, dict) else {} logger.debug("node filter: %s", score) score.update({"score": score.get("score", 0) >= self.threshold}) return score @@ -85,6 +86,7 @@ async def filter(self, question: str) -> bool: results = await self.llm.generate(prompt=prompt) results = results.generations[0][0].text.strip() json_results = await json_loader.safe_load(results, llm=self.llm) + json_results = json_results if isinstance(json_results, dict) else {} logger.debug("filtered question: %s", json_results) return json_results.get("verdict") == "1" @@ -117,6 +119,7 @@ async def filter(self, simple_question: str, compressed_question: str) -> bool: results = await self.llm.generate(prompt=prompt) results = results.generations[0][0].text.strip() json_results = await json_loader.safe_load(results, llm=self.llm) + json_results = json_results if isinstance(json_results, dict) else {} logger.debug("evolution filter: %s", json_results) return json_results.get("verdict") == "1" From 7760bbf222f4024c573f2a46e23539161a5f9581 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Mon, 12 Feb 2024 12:23:12 -0800 Subject: [PATCH 2/7] add language --- src/ragas/_analytics.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ragas/_analytics.py b/src/ragas/_analytics.py index e170690f3..5346e77e5 100644 --- a/src/ragas/_analytics.py +++ b/src/ragas/_analytics.py @@ -87,12 +87,14 @@ class EvaluationEvent(BaseEvent): metrics: t.List[str] evaluation_mode: str num_rows: int + language: str class TesetGenerationEvent(BaseEvent): evolution_names: t.List[str] evolution_percentages: t.List[float] num_rows: int + language: str @silent From c91b13d337b4766b55e94d55a82e9ec12eef47b8 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Mon, 12 Feb 2024 12:23:27 -0800 Subject: [PATCH 3/7] post language to event --- src/ragas/evaluation.py | 4 ++++ src/ragas/testset/generator.py | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/ragas/evaluation.py b/src/ragas/evaluation.py index 11e200d97..fe238e4f7 100644 --- a/src/ragas/evaluation.py +++ b/src/ragas/evaluation.py @@ -18,6 +18,7 @@ from ragas.metrics.base import Metric, MetricWithEmbeddings, MetricWithLLM from ragas.metrics.critique import AspectCritique from ragas.run_config import RunConfig +from ragas.utils import get_feature_language # from ragas.metrics.critique import AspectCritique from ragas.validation import ( @@ -249,12 +250,15 @@ def evaluate( # log the evaluation event metrics_names = [m.name for m in metrics] + metric_lang = [get_feature_language(m) for m in metrics] + metric_lang = np.unique([m for m in metric_lang if m is not None]) track( EvaluationEvent( event_type="evaluation", metrics=metrics_names, evaluation_mode="", num_rows=dataset.shape[0], + language=metric_lang[0] if len(metric_lang) > 0 else "", ) ) return result diff --git a/src/ragas/testset/generator.py b/src/ragas/testset/generator.py index 57c8cbfcf..48f5b1b9d 100644 --- a/src/ragas/testset/generator.py +++ b/src/ragas/testset/generator.py @@ -28,7 +28,7 @@ ) from ragas.testset.extractor import KeyphraseExtractor from ragas.testset.filters import EvolutionFilter, NodeFilter, QuestionFilter -from ragas.utils import check_if_sum_is_close, is_nan +from ragas.utils import check_if_sum_is_close, get_feature_language, is_nan if t.TYPE_CHECKING: from langchain_core.documents import Document as LCDocument @@ -251,12 +251,15 @@ def generate( # due to failed evolutions. MaxRetriesExceeded is a common reason test_data_rows = [r for r in test_data_rows if not is_nan(r)] test_dataset = TestDataset(test_data=test_data_rows) + evol_lang = [get_feature_language(e) for e in distributions] + evol_lang = [e for e in evol_lang if e is not None] track( TesetGenerationEvent( event_type="testset_generation", evolution_names=[e.__class__.__name__.lower() for e in distributions], evolution_percentages=[distributions[e] for e in distributions], num_rows=len(test_dataset.test_data), + language=evol_lang[0] if len(evol_lang) > 0 else "", ) ) From b75b2d6c270ed23931047a6df36b180b7bc13756 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Mon, 12 Feb 2024 12:23:36 -0800 Subject: [PATCH 4/7] get language from feature --- src/ragas/utils.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/ragas/utils.py b/src/ragas/utils.py index e2358e8cb..f81981065 100644 --- a/src/ragas/utils.py +++ b/src/ragas/utils.py @@ -7,6 +7,9 @@ import numpy as np +if t.TYPE_CHECKING: + pass + DEBUG_ENV_VAR = "RAGAS_DEBUG" @@ -57,3 +60,14 @@ def is_nan(x): return np.isnan(x) except TypeError: return False + + +def get_feature_language(feature): + from ragas.llms.prompt import Prompt + + languags = [ + value.language + for name, value in vars(feature).items() + if isinstance(value, Prompt) + ] + return languags[0] if len(languags) > 0 else None From c717c370909d0c2e6f0a4a476e8cd7250d7666be Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Mon, 12 Feb 2024 12:23:45 -0800 Subject: [PATCH 5/7] add language param --- tests/unit/test_analytics.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index cec727cb5..7dbeec696 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -21,13 +21,18 @@ def test_evaluation_event(): from ragas._analytics import EvaluationEvent evaluation_event = EvaluationEvent( - event_type="evaluation", metrics=["harmfulness"], num_rows=1, evaluation_mode="" + event_type="evaluation", + metrics=["harmfulness"], + num_rows=1, + evaluation_mode="", + language="english", ) payload = dict(evaluation_event) assert isinstance(payload.get("user_id"), str) assert isinstance(payload.get("evaluation_mode"), str) assert isinstance(payload.get("metrics"), list) + assert isinstance(payload.get("language"), str) def setup_user_id_filepath(tmp_path, monkeypatch): @@ -101,6 +106,7 @@ def test_testset_generation_tracking(monkeypatch): evolution_names=[e.__class__.__name__.lower() for e in distributions], evolution_percentages=[distributions[e] for e in distributions], num_rows=10, + language="english", ) assert dict(testset_event_payload)["evolution_names"] == [ From e553f48c1097994d61f1b0e12882295dba5adb92 Mon Sep 17 00:00:00 2001 From: jjmachan Date: Thu, 15 Feb 2024 00:01:26 -0800 Subject: [PATCH 6/7] type annotate --- src/ragas/utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/ragas/utils.py b/src/ragas/utils.py index f81981065..be8cea269 100644 --- a/src/ragas/utils.py +++ b/src/ragas/utils.py @@ -8,7 +8,8 @@ import numpy as np if t.TYPE_CHECKING: - pass + from ragas.metrics.base import Metric + from ragas.testset.evolutions import Evolution DEBUG_ENV_VAR = "RAGAS_DEBUG" @@ -62,12 +63,12 @@ def is_nan(x): return False -def get_feature_language(feature): +def get_feature_language(feature: t.Union[Metric, Evolution]) -> t.Optional[str]: from ragas.llms.prompt import Prompt languags = [ value.language - for name, value in vars(feature).items() + for _, value in vars(feature).items() if isinstance(value, Prompt) ] return languags[0] if len(languags) > 0 else None From 9fc55b8db81d58e04a934cef0f38bc5351829ba9 Mon Sep 17 00:00:00 2001 From: jjmachan Date: Thu, 15 Feb 2024 00:13:04 -0800 Subject: [PATCH 7/7] update version of actions --- .github/workflows/ci.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 923426d27..e319b3bb7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -23,8 +23,8 @@ jobs: ragas: ${{ steps.filter.outputs.ragas }} docs: ${{ steps.filter.outputs.docs }} steps: - - uses: actions/checkout@v3 - - uses: dorny/paths-filter@v2 + - uses: actions/checkout@v4 + - uses: dorny/paths-filter@v3 id: filter with: base: "main" @@ -59,7 +59,7 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 # fetch all tags and branches @@ -108,7 +108,7 @@ jobs: if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.ragas == 'true') || github.event_name == 'push' }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup python uses: actions/setup-python@v4