From 3ff9b6239e119fff8cfbff5217093d6afdb8fb2f Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Thu, 7 Nov 2024 13:09:58 +0530 Subject: [PATCH 1/2] change llm reference --- .../available_metrics/general_purpose.md | 31 ++++++++++--------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/docs/concepts/metrics/available_metrics/general_purpose.md b/docs/concepts/metrics/available_metrics/general_purpose.md index b7ea35ddf..78c25d073 100644 --- a/docs/concepts/metrics/available_metrics/general_purpose.md +++ b/docs/concepts/metrics/available_metrics/general_purpose.md @@ -22,8 +22,9 @@ sample = SingleTurnSample( scorer = AspectCritic( name="maliciousness", definition="Is the submission intended to harm, deceive, or exploit users?", + llm=evaluator_llm + ) -scorer.llm = openai_model await scorer.single_turn_ascore(sample) ``` @@ -45,8 +46,10 @@ sample = SingleTurnSample( scorer = AspectCritic( name="correctness", definition="Is the response factually similar to the reference?", + llm=evaluator_llm + ) -scorer.llm = openai_model + await scorer.single_turn_ascore(sample) ``` @@ -83,8 +86,10 @@ sample = SingleTurnSample( response="The Eiffel Tower is located in Paris.", ) -scorer = SimpleCriteriaScoreWithoutReference(name="course_grained_score", definition="Score 0 to 5 for correctness") -scorer.llm = openai_model +scorer = SimpleCriteriaScoreWithoutReference(name="course_grained_score", + definition="Score 0 to 5 for correctness", + llm=evaluator_llm +) await scorer.single_turn_ascore(sample) ``` @@ -101,8 +106,10 @@ sample = SingleTurnSample( reference="The Eiffel Tower is located in Egypt" ) -scorer = SimpleCriteriaScoreWithReference(name="course_grained_score", definition="Score 0 to 5 by similarity") -scorer.llm = openai_model +scorer = SimpleCriteriaScoreWithReference(name="course_grained_score", + definition="Score 0 to 5 by similarity", + llm=evaluator_llm) + await scorer.single_turn_ascore(sample) ``` @@ -130,8 +137,7 @@ rubrics = { "score4_description": "The response is mostly accurate and aligns well with the ground truth, with only minor issues or missing details.", "score5_description": "The response is fully accurate, aligns completely with the ground truth, and is clear and detailed.", } -scorer = RubricsScoreWithReference(rubrics=) -scorer.llm = openai_model +scorer = RubricsScoreWithReference(rubrics=rubrics, llm=evaluator_llm) await scorer.single_turn_ascore(sample) ``` @@ -148,8 +154,7 @@ sample = SingleTurnSample( response="The Eiffel Tower is located in Paris.", ) -scorer = RubricsScoreWithoutReference() -scorer.llm = openai_model +scorer = RubricsScoreWithoutReference(rubrics=rubrics, llm=evaluator_llm) await scorer.single_turn_ascore(sample) ``` @@ -181,8 +186,7 @@ SingleTurnSample( } ) -scorer = InstanceRubricsWithReference() -scorer.llm = openai_model +scorer = InstanceRubricsWithReference(llm=evaluator_llm) await scorer.single_turn_ascore(sample) ``` @@ -208,7 +212,6 @@ SingleTurnSample( } ) -scorer = InstanceRubricsScoreWithoutReference() -scorer.llm = openai_model +scorer = InstanceRubricsScoreWithoutReference(llm=evaluator_llm) await scorer.single_turn_ascore(sample) ``` From f2fac8d9464d7055c5e723f522f49620a50f96bb Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Thu, 7 Nov 2024 13:32:02 +0530 Subject: [PATCH 2/2] fix broken link --- docs/concepts/metrics/available_metrics/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/concepts/metrics/available_metrics/index.md b/docs/concepts/metrics/available_metrics/index.md index c9c537e75..a0b2c596f 100644 --- a/docs/concepts/metrics/available_metrics/index.md +++ b/docs/concepts/metrics/available_metrics/index.md @@ -16,7 +16,7 @@ Each metric are essentially paradigms that are designed to evaluate a particular ## Agents or Tool use cases -- [Topic adherence](topic_adherence.md) +- [Topic adherence](agents.md#topic_adherence) - [Tool call Accuracy](agents.md#tool-call-accuracy) - [Agent Goal Accuracy](agents.md#agent-goal-accuracy)