From e0658eb4bc3275bbc290de644ba66d351e0b9ec7 Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Sat, 20 May 2023 20:07:40 +0530
Subject: [PATCH 1/6] added init_model to baseline

---
 ragas/metrics/base.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/ragas/metrics/base.py b/ragas/metrics/base.py
index b2acdc9f5..9afd921cb 100644
--- a/ragas/metrics/base.py
+++ b/ragas/metrics/base.py
@@ -13,17 +13,33 @@ class Metric(ABC):
     @property
     @abstractmethod
     def name(self: t.Self) -> str:
+        """
+        the metric name
+        """
         ...
 
     @property
     @abstractmethod
     def is_batchable(self: t.Self) -> bool:
+        """
+        Attribute to check if this metric is is_batchable
+        """
+        ...
+
+    @abstractmethod
+    def init_model():
+        """
+        This method will lazy initialize the model.
+        """
         ...
 
     @abstractmethod
     def score(
         self: t.Self, ground_truth: list[str], generated_text: list[str]
     ) -> list[float]:
+        """
+        Run the metric on the ground_truth and generated_text and return score.
+        """
         ...
 
 

From b14b194525758fcaaa3a3f79525afbdcb266e152 Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Sat, 20 May 2023 20:33:44 +0530
Subject: [PATCH 2/6] added init_model to everything

---
 ragas/metrics/base.py         |  4 ++++
 ragas/metrics/factual.py      |  4 ++--
 ragas/metrics/similarity.py   |  2 +-
 ragas/metrics/simple.py       |  8 +++++++-
 tests/benchmarks/benchmark.py | 14 ++++++++------
 5 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/ragas/metrics/base.py b/ragas/metrics/base.py
index 9afd921cb..21cde9b2e 100644
--- a/ragas/metrics/base.py
+++ b/ragas/metrics/base.py
@@ -53,6 +53,10 @@ def eval(self, ground_truth: list[list[str]], generated_text: list[str]) -> Resu
         ds = Dataset.from_dict(
             {"ground_truth": ground_truth, "generated_text": generated_text}
         )
+
+        # initialize all the models in the metrics
+        [m.init_model() for m in self.metrics]
+
         ds = ds.map(
             self._get_score,
             batched=self.batched,
diff --git a/ragas/metrics/factual.py b/ragas/metrics/factual.py
index d9ab22909..7dc452fd8 100644
--- a/ragas/metrics/factual.py
+++ b/ragas/metrics/factual.py
@@ -52,7 +52,7 @@ class EntailmentScore(Metric):
     batch_size: int = 4
     device: t.Literal["cpu", "cuda"] | Device = "cpu"
 
-    def __post_init__(self):
+    def init_model(self):
         self.device = device_check(self.device)
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
         self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name)
@@ -212,7 +212,7 @@ class Qsquare(Metric):
     include_nouns: bool = True
     save_results: bool = False
 
-    def __post_init__(self):
+    def init_model(self):
         self.qa = QAGQ.from_pretrained(self.qa_model_name)
         self.qg = QAGQ.from_pretrained(self.qg_model_name)
         self.nli = EntailmentScore()
diff --git a/ragas/metrics/similarity.py b/ragas/metrics/similarity.py
index 60b3d1d2e..a79468b69 100644
--- a/ragas/metrics/similarity.py
+++ b/ragas/metrics/similarity.py
@@ -18,7 +18,7 @@ class BERTScore(Metric):
     model_path: str = "all-MiniLM-L6-v2"
     batch_size: int = 1000
 
-    def __post_init__(self):
+    def init_model(self):
         self.model = SentenceTransformer(self.model_path)
 
     @property
diff --git a/ragas/metrics/simple.py b/ragas/metrics/simple.py
index 2ac8ee8b9..643d5d777 100644
--- a/ragas/metrics/simple.py
+++ b/ragas/metrics/simple.py
@@ -26,6 +26,9 @@ def name(self):
     def is_batchable(self):
         return True
 
+    def init_model(self):
+        ...
+
     def score(self, ground_truth: t.List[str], generated_text: t.List[str]):
         ground_truth_ = [[word_tokenize(text)] for text in ground_truth]
         generated_text_ = [word_tokenize(text) for text in generated_text]
@@ -45,7 +48,7 @@ class ROUGE(Metric):
     type: t.Literal[ROUGE_TYPES]
     use_stemmer: bool = False
 
-    def __post_init__(self):
+    def init_model(self):
         self.scorer = rouge_scorer.RougeScorer(
             [self.type], use_stemmer=self.use_stemmer
         )
@@ -80,6 +83,9 @@ def name(self) -> str:
     def is_batchable(self):
         return True
 
+    def init_model(self):
+        ...
+
     def score(self, ground_truth: t.List[str], generated_text: t.List[str]):
         if self.measure == "distance":
             score = [distance(s1, s2) for s1, s2 in zip(ground_truth, generated_text)]
diff --git a/tests/benchmarks/benchmark.py b/tests/benchmarks/benchmark.py
index 5bb413fc0..f98d8c564 100644
--- a/tests/benchmarks/benchmark.py
+++ b/tests/benchmarks/benchmark.py
@@ -7,8 +7,10 @@
 
 from ragas.metrics import (
     Evaluation,
+    bert_score,
     edit_distance,
     edit_ratio,
+    entailment_score,
     q_square,
     rouge1,
     rouge2,
@@ -16,17 +18,17 @@
 )
 
 DEVICE = "cuda" if is_available() else "cpu"
-BATCHES = [0, 1]
+BATCHES = [0, 1, 30, 60]
 
 METRICS = {
     "Rouge1": rouge1,
-    "Rouge2": rouge2,
-    "RougeL": rougeL,
+    # "Rouge2": rouge2,
+    # "RougeL": rougeL,
     "EditRatio": edit_ratio,
-    "EditDistance": edit_distance,
-    # "SBERTScore": bert_score,
+    # "EditDistance": edit_distance,
+    "SBERTScore": bert_score,
     # "EntailmentScore": entailment_score,
-    "Qsquare": q_square,
+    # "Qsquare": q_square,
 }
 DS = load_dataset("explodinggradients/eli5-test", split="test_eli5")
 assert isinstance(DS, arrow_dataset.Dataset), "Not an arrow_dataset"

From b9be515ee5eacd75e2c504962b89f672a3066109 Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Sat, 20 May 2023 20:48:26 +0530
Subject: [PATCH 3/6] fix lint issues

---
 tests/benchmarks/benchmark.py | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/tests/benchmarks/benchmark.py b/tests/benchmarks/benchmark.py
index f98d8c564..e87c404d1 100644
--- a/tests/benchmarks/benchmark.py
+++ b/tests/benchmarks/benchmark.py
@@ -5,17 +5,7 @@
 from tqdm import tqdm
 from utils import print_table, timeit
 
-from ragas.metrics import (
-    Evaluation,
-    bert_score,
-    edit_distance,
-    edit_ratio,
-    entailment_score,
-    q_square,
-    rouge1,
-    rouge2,
-    rougeL,
-)
+from ragas.metrics import Evaluation, bert_score, edit_ratio, rouge1
 
 DEVICE = "cuda" if is_available() else "cpu"
 BATCHES = [0, 1, 30, 60]

From acf72289588621f361395cc866d5db937815002c Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Mon, 22 May 2023 19:17:21 +0530
Subject: [PATCH 4/6] added init model to qsquare

---
 ragas/metrics/factual.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ragas/metrics/factual.py b/ragas/metrics/factual.py
index 7dc452fd8..fd5891569 100644
--- a/ragas/metrics/factual.py
+++ b/ragas/metrics/factual.py
@@ -216,6 +216,7 @@ def init_model(self):
         self.qa = QAGQ.from_pretrained(self.qa_model_name)
         self.qg = QAGQ.from_pretrained(self.qg_model_name)
         self.nli = EntailmentScore()
+        self.nli.init_model()
         try:
             self.nlp = spacy.load(SPACY_MODEL)
         except OSError:

From 6fd5902cfcaa177d752166334c263f5472344199 Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Mon, 22 May 2023 19:33:47 +0530
Subject: [PATCH 5/6] ignore type issue

---
 ragas/metrics/factual.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ragas/metrics/factual.py b/ragas/metrics/factual.py
index fd5891569..a936d4ff8 100644
--- a/ragas/metrics/factual.py
+++ b/ragas/metrics/factual.py
@@ -327,7 +327,7 @@ def score(self, ground_truth: list[str], generated_text: list[str], **kwargs):
                 )
             gnd_qans[i] = [
                 {"question": qstn, "answer": ans}
-                for qstn, ans in zip(questions, candidates)
+                for qstn, ans in zip(questions, candidates)  # type: ignore
             ]
 
         for i, gen_text in enumerate(generated_text):
@@ -335,7 +335,7 @@ def score(self, ground_truth: list[str], generated_text: list[str], **kwargs):
             gen_answers = self.generate_answers(questions, gen_text)
             _ = [
                 item.update({"predicted_answer": ans})
-                for item, ans in zip(gnd_qans[i], gen_answers)
+                for item, ans in zip(gnd_qans[i], gen_answers)  # type: ignore
             ]
 
         # del self.qa

From d833553b22bf899da32e0f3f894b751e0053dca6 Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Mon, 22 May 2023 19:35:16 +0530
Subject: [PATCH 6/6] fix linting

---
 Makefile                      | 2 +-
 tests/benchmarks/benchmark.py | 7 ++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index fc5ab0fa8..52ce482a3 100644
--- a/Makefile
+++ b/Makefile
@@ -18,7 +18,7 @@ lint: ## Running lint checker: ruff
 	@ruff check ragas examples tests
 type: ## Running type checker: pyright
 	@echo "(pyright) Typechecking codebase..."
-	@pyright -p ragas
+	@pyright ragas
 clean: ## Clean all generated files
 	@echo "Cleaning all generated files..."
 	@cd $(GIT_ROOT)/docs && make clean
diff --git a/tests/benchmarks/benchmark.py b/tests/benchmarks/benchmark.py
index e87c404d1..c868ceff8 100644
--- a/tests/benchmarks/benchmark.py
+++ b/tests/benchmarks/benchmark.py
@@ -5,7 +5,12 @@
 from tqdm import tqdm
 from utils import print_table, timeit
 
-from ragas.metrics import Evaluation, bert_score, edit_ratio, rouge1
+from ragas.metrics import (
+    Evaluation,
+    bert_score,
+    edit_ratio,
+    rouge1,
+)
 
 DEVICE = "cuda" if is_available() else "cpu"
 BATCHES = [0, 1, 30, 60]