explodinggradients · jjmachan · Feb 5, 2024 · Feb 5, 2024
diff --git a/src/ragas/metrics/_answer_correctness.py b/src/ragas/metrics/_answer_correctness.py
@@ -130,6 +130,9 @@ def _compute_statement_presence(self, prediction: t.Any) -> float:
             ]
             if any([np.isnan(i) for i in [tp, fp, fn]]):
                 score = np.nan
+                logger.warning(
+                    "Invalid prediction format. Expected a list of dictionaries with keys 'TP', 'FP', 'FN'"
+                )
             else:
                 score = tp / (tp + 0.5 * (fp + fn)) if tp > 0 else 0
         else:

diff --git a/src/ragas/metrics/_answer_relevance.py b/src/ragas/metrics/_answer_relevance.py
@@ -102,8 +102,14 @@ def _calculate_score(self, response: t.Sequence[t.Any], row: t.Dict) -> float:
                 if isinstance(item, dict)
             ]
         )
-        cosine_sim = self.calculate_similarity(question, gen_questions)
-        score = cosine_sim.mean() * int(not committal)
+        if all(q == "" for q in gen_questions):
+            logger.warning(
+                "Invalid JSON response. Expected dictionary with key 'question'"
+            )
+            score = np.nan
+        else:
+            cosine_sim = self.calculate_similarity(question, gen_questions)
+            score = cosine_sim.mean() * int(not committal)
 
         return score
 

diff --git a/src/ragas/metrics/_context_precision.py b/src/ragas/metrics/_context_precision.py
@@ -109,6 +109,10 @@ def _calculate_average_precision(self, json_responses: t.List[t.Dict]) -> float:
             ]
         )
         score = numerator / denominator
+        if np.isnan(score):
+            logger.warning(
+                "Invalid response format. Expected a list of dictionaries with keys 'verdict'"
+            )
         return score
 
     async def _ascore(

diff --git a/src/ragas/metrics/_context_recall.py b/src/ragas/metrics/_context_recall.py
@@ -101,6 +101,9 @@ def _compute_score(self, response: t.Any) -> float:
             numerator = sum(response)
             return numerator / denom
         else:
+            logger.warning(
+                "Invalid JSON response. Expected dictionary with key 'Attributed'"
+            )
             return np.nan
 
     async def _ascore(self, row: t.Dict, callbacks: Callbacks, is_async: bool) -> float:

diff --git a/src/ragas/metrics/_faithfulness.py b/src/ragas/metrics/_faithfulness.py
@@ -162,6 +162,9 @@ def _compute_score(self, output: t.Any):
         if num_statements:
             score = faithful_statements / num_statements
         else:
+            logger.warning(
+                "Invalid JSON response. Expected dictionary with key 'verdict'"
+            )
             score = np.nan
 
         return score