From 6ae265686aa4edc6e26f5262904ed6a2234b989d Mon Sep 17 00:00:00 2001 From: Truong Nguyen Date: Wed, 31 Jan 2024 13:47:15 +0700 Subject: [PATCH] add normalization factor to cosine similarity score calculation --- src/ragas/metrics/_answer_similarity.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/ragas/metrics/_answer_similarity.py b/src/ragas/metrics/_answer_similarity.py index da952fc6b..1113d44a2 100644 --- a/src/ragas/metrics/_answer_similarity.py +++ b/src/ragas/metrics/_answer_similarity.py @@ -71,7 +71,12 @@ async def _ascore( else: embeddings_1 = np.array(await self.embeddings.embed_texts(ground_truth)) embeddings_2 = np.array(await self.embeddings.embed_texts(answers)) - similarity = embeddings_1 @ embeddings_2.T + # Normalization factors of the above embeddings + norms_1 = np.linalg.norm(embeddings_1, axis=1, keepdims=True) + norms_2 = np.linalg.norm(embeddings_2, axis=1, keepdims=True) + embeddings_1_normalized = embeddings_1 / norms_1 + embeddings_2_normalized = embeddings_2 / norms_2 + similarity = embeddings_1_normalized @ embeddings_2_normalized.T if similarity.size == 1: scores = similarity.flatten() else: