explodinggradients · shahules786 · Dec 29, 2023 · Dec 25, 2023 · Dec 26, 2023 · Dec 26, 2023
diff --git a/src/ragas/metrics/_answer_correctness.py b/src/ragas/metrics/_answer_correctness.py
@@ -56,7 +56,7 @@ class AnswerCorrectness(MetricWithLLM):
 
     """
     Measures answer correctness compared to ground truth as a combination of
-    semantic similarity and factuality
+    factuality and semantic similarity.
 
     Attributes
     ----------
@@ -78,7 +78,14 @@ class AnswerCorrectness(MetricWithLLM):
     answer_similarity: AnswerSimilarity | None = None
 
     def __post_init__(self: t.Self):
-        if self.answer_similarity is None:
+        if len(self.weights) != 2:
+            raise ValueError("Expects a list of two weights. First for factuality, second for semantic similarity")
+        if all([w == 0 for w in self.weights]):
+            raise ValueError("At least one weight must be non-zero")
+        if not all([w >= 0 for w in self.weights]):
+            raise ValueError("Weights must be non-negative")
+
+        if self.answer_similarity is None and self.weights[1] != 0:
             self.answer_similarity = AnswerSimilarity(
                 llm=self.llm, batch_size=self.batch_size
             )
@@ -113,7 +120,7 @@ def _score_batch(
                 "FP": "statements present in the answer but not found in the ground truth",
                 "FN": "relevant statements found in the ground truth but omitted in the answer",  # noqa: E501
             }
-    
+
             f1_score = []
             for prediction in outputs:
                 prediction = json_loader.safe_load(prediction[0].text, self.llm)
@@ -131,10 +138,13 @@ def _score_batch(
                     score = tp / (tp + 0.5 * (fp + fn))
                 else:
                     score = np.nan
-    
+
                 f1_score.append(score)
-
-            similarity_scores = self.answer_similarity._score_batch(dataset, callbacks=batch_group)  # type: ignore
+
+            if self.weights[1] == 0:
+                similarity_scores = np.zeros(len(f1_score))
+            else:
+                similarity_scores = self.answer_similarity._score_batch(dataset, callbacks=batch_group)  # type: ignore
             scores_stacked = np.vstack([f1_score, similarity_scores])
             scores = np.average(
                 scores_stacked,