From bb6ff9627935f7ca845c7adb13d26f734b4682c6 Mon Sep 17 00:00:00 2001
From: Shahules786 <Shahules786@gmail.com>
Date: Sat, 13 May 2023 04:09:29 +0530
Subject: [PATCH 01/18] add max_length

---
 belar/metrics/factual.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/belar/metrics/factual.py b/belar/metrics/factual.py
index adc0509f7..e636a2793 100644
--- a/belar/metrics/factual.py
+++ b/belar/metrics/factual.py
@@ -1,4 +1,5 @@
 from __future__ import annotations
+from tkinter.ttk import _Padding
 
 import typing as t
 from dataclasses import dataclass
@@ -20,6 +21,7 @@ class EntailmentScore(Metric):
     batch_size: int = 4
     device: t.Literal["cpu", "cuda"] = "cpu"
 
+
     def __post_init__(self):
         self.device = device_check(self.device)
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)

From 8c017f4abfadaf2ca6585f55b4af7896bc387ba4 Mon Sep 17 00:00:00 2001
From: Shahules786 <Shahules786@gmail.com>
Date: Sat, 13 May 2023 04:19:26 +0530
Subject: [PATCH 02/18] add max_length

---
 belar/metrics/factual.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/belar/metrics/factual.py b/belar/metrics/factual.py
index e636a2793..3b8655214 100644
--- a/belar/metrics/factual.py
+++ b/belar/metrics/factual.py
@@ -1,5 +1,4 @@
 from __future__ import annotations
-from tkinter.ttk import _Padding
 
 import typing as t
 from dataclasses import dataclass

From fe8a01781e2cd370efe75ac8c92adb19c0d468cd Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Sat, 13 May 2023 12:30:37 +0530
Subject: [PATCH 03/18] black fixes

---
 belar/metrics/factual.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/belar/metrics/factual.py b/belar/metrics/factual.py
index 3b8655214..adc0509f7 100644
--- a/belar/metrics/factual.py
+++ b/belar/metrics/factual.py
@@ -20,7 +20,6 @@ class EntailmentScore(Metric):
     batch_size: int = 4
     device: t.Literal["cpu", "cuda"] = "cpu"
 
-
     def __post_init__(self):
         self.device = device_check(self.device)
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)

From 0db62659cda9741c060c7ab990abb9ac77d36b17 Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Sat, 13 May 2023 16:18:39 +0530
Subject: [PATCH 04/18] added benchmarks

---
 pyproject.toml                       |  7 +++
 tests/integration/benchmark.py       | 66 ++++++++++++++++++++++++++++
 tests/integration/run_all_metrics.py | 26 -----------
 tests/integration/utils.py           | 27 ++++++++++++
 4 files changed, 100 insertions(+), 26 deletions(-)
 create mode 100644 tests/integration/benchmark.py
 delete mode 100644 tests/integration/run_all_metrics.py
 create mode 100644 tests/integration/utils.py

diff --git a/pyproject.toml b/pyproject.toml
index e1af83e8d..ad537b4bd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,6 +11,13 @@ dependencies = [
 ]
 dynamic = ["version", "readme"]
 
+[project.optional-dependencies]
+test = [
+    # "pytest-cov",
+    "pydantic",
+    "pytest",
+]
+
 [tool.setuptools.dynamic]
 readme = {file = ["README.md"], content-type = "text/plain"}
 
diff --git a/tests/integration/benchmark.py b/tests/integration/benchmark.py
new file mode 100644
index 000000000..fd2bd9e36
--- /dev/null
+++ b/tests/integration/benchmark.py
@@ -0,0 +1,66 @@
+import typing as t
+from dataclasses import dataclass
+
+from datasets import Dataset, load_dataset
+from tqdm import tqdm
+from utils import timeit
+
+from belar.metrics import (
+    EditDistance,
+    EditRatio,
+    EntailmentScore,
+    Evaluation,
+    Rouge1,
+    Rouge2,
+    RougeL,
+    SBERTScore,
+)
+
+DEVICE = ("cuda",)
+BATCHES = [0, 1, 10, 20, 30, 60]
+# init metrics
+sbert_score = SBERTScore(similarity_metric="cosine")
+entail = EntailmentScore(max_length=512)
+METRICS = {
+    "Rouge1": Rouge1,
+    "Rouge2": Rouge2,
+    "RougeL": RougeL,
+    "EditRatio": EditRatio,
+    "EditDistance": EditDistance,
+}
+
+
+@dataclass
+class BenchmarkConfig:
+    device: t.Literal["cpu", "cuda"]
+    batch_sizes: list[int]
+    metrics: list[str]
+
+
+def setup() -> t.Iterator[tuple[str, Evaluation, Dataset]]:
+    metrics = [m for m in METRICS.values()]
+    for b in BATCHES:
+        setup_name = f"batch-{b}"
+        ds = load_dataset("explodinggradients/eli5-test", split="test_eli5")
+        assert isinstance(ds, Dataset), f"{type(ds)} found in the place of Dataset!"
+        batched = False if b == 0 else True
+        e = Evaluation(
+            metrics=metrics,
+            batched=batched,
+            batch_size=b,
+        )
+        yield setup_name, e, ds
+
+
+@timeit
+def evaluate(e: Evaluation, ds: Dataset):
+    e.eval(ds["ground_truth"], ds["generated_text"])
+
+
+if __name__ == "__main__":
+    results = {}
+    for setup_name, e, ds in tqdm(setup()):
+        mean, var = evaluate(e, ds)
+        results[setup_name] = (mean, var)
+
+    print(results)
diff --git a/tests/integration/run_all_metrics.py b/tests/integration/run_all_metrics.py
deleted file mode 100644
index 099572a18..000000000
--- a/tests/integration/run_all_metrics.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from datasets import concatenate_datasets, load_dataset
-
-from belar.metrics import (
-    EditDistance,
-    EditRatio,
-    EntailmentScore,
-    Evaluation,
-    Rouge1,
-    Rouge2,
-    RougeL,
-    SBERTScore,
-)
-
-ds = load_dataset("explodinggradients/eli5-test", split="test_eli5")
-print(ds.shape)
-sbert_score = SBERTScore(similarity_metric="cosine")
-entail = EntailmentScore(max_length=512)
-
-e = Evaluation(
-    metrics=[Rouge1, Rouge2, RougeL, sbert_score, EditDistance, EditRatio, entail],
-    batched=False,
-    batch_size=30,
-)
-r = e.eval(ds["ground_truth"], ds["generated_text"])
-print(r)
-print(r.describe())
diff --git a/tests/integration/utils.py b/tests/integration/utils.py
new file mode 100644
index 000000000..c7acde3bb
--- /dev/null
+++ b/tests/integration/utils.py
@@ -0,0 +1,27 @@
+from __future__ import annotations
+
+import time
+
+import numpy as np
+
+
+def timeit(func, iteration=3):
+    def function_timer(*args, **kwargs) -> tuple(np.floating, np.floating):
+        """
+        Time the execution of a function and returns the time taken
+        """
+        # warmup
+        func(*args, **kwargs)
+
+        runtimes = []
+        for _ in range(iteration):
+            start = time.time()
+            # we dont care about the return value
+            func(*args, **kwargs)
+            end = time.time()
+            runtime = end - start
+            runtimes.append(runtime)
+
+        return np.mean(runtimes), np.var(runtimes)
+
+    return function_timer

From ab7b0747d06bf664c73530122a044afcc5868f01 Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Sat, 13 May 2023 16:39:13 +0530
Subject: [PATCH 05/18] pretty print benchmarks

---
 pyproject.toml                 |  2 +-
 tests/integration/benchmark.py | 26 +++++++++++---------------
 tests/integration/utils.py     | 12 ++++++++++++
 3 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index ad537b4bd..50e48f46f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,8 +14,8 @@ dynamic = ["version", "readme"]
 [project.optional-dependencies]
 test = [
     # "pytest-cov",
-    "pydantic",
     "pytest",
+    "rich",
 ]
 
 [tool.setuptools.dynamic]
diff --git a/tests/integration/benchmark.py b/tests/integration/benchmark.py
index fd2bd9e36..643e66d84 100644
--- a/tests/integration/benchmark.py
+++ b/tests/integration/benchmark.py
@@ -2,8 +2,9 @@
 from dataclasses import dataclass
 
 from datasets import Dataset, load_dataset
+from torch.cuda import is_available
 from tqdm import tqdm
-from utils import timeit
+from utils import print_table, timeit
 
 from belar.metrics import (
     EditDistance,
@@ -16,40 +17,35 @@
     SBERTScore,
 )
 
-DEVICE = ("cuda",)
+DEVICE = "cuda" if is_available() else "cpu"
 BATCHES = [0, 1, 10, 20, 30, 60]
 # init metrics
 sbert_score = SBERTScore(similarity_metric="cosine")
-entail = EntailmentScore(max_length=512)
+entail = EntailmentScore(max_length=512, device=DEVICE)
 METRICS = {
     "Rouge1": Rouge1,
     "Rouge2": Rouge2,
     "RougeL": RougeL,
     "EditRatio": EditRatio,
     "EditDistance": EditDistance,
+    "SBERTScore": sbert_score,
+    "EntailmentScore": entail,
 }
-
-
-@dataclass
-class BenchmarkConfig:
-    device: t.Literal["cpu", "cuda"]
-    batch_sizes: list[int]
-    metrics: list[str]
+DS = load_dataset("explodinggradients/eli5-test", split="test_eli5")
 
 
 def setup() -> t.Iterator[tuple[str, Evaluation, Dataset]]:
     metrics = [m for m in METRICS.values()]
     for b in BATCHES:
         setup_name = f"batch-{b}"
-        ds = load_dataset("explodinggradients/eli5-test", split="test_eli5")
-        assert isinstance(ds, Dataset), f"{type(ds)} found in the place of Dataset!"
+        assert isinstance(DS, Dataset), f"{type(DS)} found in the place of Dataset!"
         batched = False if b == 0 else True
         e = Evaluation(
             metrics=metrics,
             batched=batched,
             batch_size=b,
         )
-        yield setup_name, e, ds
+        yield setup_name, e, DS
 
 
 @timeit
@@ -59,8 +55,8 @@ def evaluate(e: Evaluation, ds: Dataset):
 
 if __name__ == "__main__":
     results = {}
-    for setup_name, e, ds in tqdm(setup()):
+    for setup_name, e, ds in tqdm(setup(), total=len(BATCHES)):
         mean, var = evaluate(e, ds)
         results[setup_name] = (mean, var)
 
-    print(results)
+    print_table(results)
diff --git a/tests/integration/utils.py b/tests/integration/utils.py
index c7acde3bb..9ea962a65 100644
--- a/tests/integration/utils.py
+++ b/tests/integration/utils.py
@@ -3,6 +3,8 @@
 import time
 
 import numpy as np
+from rich.console import Console
+from rich.table import Table
 
 
 def timeit(func, iteration=3):
@@ -25,3 +27,13 @@ def function_timer(*args, **kwargs) -> tuple(np.floating, np.floating):
         return np.mean(runtimes), np.var(runtimes)
 
     return function_timer
+
+
+def print_table(result):
+    table = Table("Batch Name", "(mean, var)", title="Benchmark Results")
+
+    for batch_name, (mean, var) in result.items():
+        table.add_row(batch_name, f"{mean:.4f}, {var:.4f}")
+
+    console = Console()
+    console.print(table)

From b50ce520084d46c97620577e5aa4c0087e61c849 Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Sat, 13 May 2023 17:27:57 +0530
Subject: [PATCH 06/18] added makefile for all CI/CD

---
 Makefile                                      | 30 +++++++++++++++++++
 pyproject.toml                                |  7 +++++
 .../{integration => benchmarks}/benchmark.py  |  6 ++--
 tests/{integration => benchmarks}/utils.py    |  0
 4 files changed, 40 insertions(+), 3 deletions(-)
 create mode 100644 Makefile
 rename tests/{integration => benchmarks}/benchmark.py (93%)
 rename tests/{integration => benchmarks}/utils.py (100%)

diff --git a/Makefile b/Makefile
new file mode 100644
index 000000000..1bc7b4ef1
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,30 @@
+GIT_ROOT ?= $(shell git rev-parse --show-toplevel)
+
+help: ## Show all Makefile targets
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}'
+
+.PHONY: format lint type style clean run-benchmarks
+format: ## Running code formatter: black and isort
+	@echo "(black) Formatting codebase..."
+	@black --config pyproject.toml belar tests docs examples
+	@echo "(black) Formatting stubs..."
+	@find src -name "*.pyi" ! -name "*_pb2*" -exec black --pyi --config pyproject.toml {} \;
+	@echo "(isort) Reordering imports..."
+	@isort .
+	@echo "(ruff) Running fix only..."
+	@ruff check belar examples tests --fix-only
+lint: ## Running lint checker: ruff
+	@echo "(ruff) Linting development project..."
+	@ruff check belar examples tests
+type: ## Running type checker: pyright
+	@echo "(pyright) Typechecking codebase..."
+	@pyright -p belar -w
+style: format lint
+clean: ## Clean all generated files
+	@echo "Cleaning all generated files..."
+	@cd $(GIT_ROOT)/docs && make clean
+	@cd $(GIT_ROOT) || exit 1
+	@find . -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete
+run-benchmarks: ## Run benchmarks
+	@echo "Running benchmarks..."
+	@cd $(GIT_ROOT)/tests/benchmarks && python benchmark.py 
diff --git a/pyproject.toml b/pyproject.toml
index 50e48f46f..9d87e4158 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,6 +18,13 @@ test = [
     "rich",
 ]
 
+dev = [
+    "ruff",
+    "isort",
+    "black",
+    "pyright",
+]
+
 [tool.setuptools.dynamic]
 readme = {file = ["README.md"], content-type = "text/plain"}
 
diff --git a/tests/integration/benchmark.py b/tests/benchmarks/benchmark.py
similarity index 93%
rename from tests/integration/benchmark.py
rename to tests/benchmarks/benchmark.py
index 643e66d84..525a86847 100644
--- a/tests/integration/benchmark.py
+++ b/tests/benchmarks/benchmark.py
@@ -18,7 +18,7 @@
 )
 
 DEVICE = "cuda" if is_available() else "cpu"
-BATCHES = [0, 1, 10, 20, 30, 60]
+BATCHES = [0, 1]
 # init metrics
 sbert_score = SBERTScore(similarity_metric="cosine")
 entail = EntailmentScore(max_length=512, device=DEVICE)
@@ -28,8 +28,8 @@
     "RougeL": RougeL,
     "EditRatio": EditRatio,
     "EditDistance": EditDistance,
-    "SBERTScore": sbert_score,
-    "EntailmentScore": entail,
+    # "SBERTScore": sbert_score,
+    # "EntailmentScore": entail,
 }
 DS = load_dataset("explodinggradients/eli5-test", split="test_eli5")
 
diff --git a/tests/integration/utils.py b/tests/benchmarks/utils.py
similarity index 100%
rename from tests/integration/utils.py
rename to tests/benchmarks/utils.py

From 71ff9e9fc38ebc95d50a2d55b116cd8e12e180f1 Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Sat, 13 May 2023 18:47:46 +0530
Subject: [PATCH 07/18] fix Makefiles

---
 Makefile                  |   6 +-
 examples/quickstart.ipynb | 163 ++++++++++++--------------------------
 pyproject.toml            |   2 +-
 3 files changed, 54 insertions(+), 117 deletions(-)

diff --git a/Makefile b/Makefile
index 1bc7b4ef1..c613129fe 100644
--- a/Makefile
+++ b/Makefile
@@ -6,9 +6,9 @@ help: ## Show all Makefile targets
 .PHONY: format lint type style clean run-benchmarks
 format: ## Running code formatter: black and isort
 	@echo "(black) Formatting codebase..."
-	@black --config pyproject.toml belar tests docs examples
+	@black --config pyproject.toml belar tests examples
 	@echo "(black) Formatting stubs..."
-	@find src -name "*.pyi" ! -name "*_pb2*" -exec black --pyi --config pyproject.toml {} \;
+	@find belar -name "*.pyi" ! -name "*_pb2*" -exec black --pyi --config pyproject.toml {} \;
 	@echo "(isort) Reordering imports..."
 	@isort .
 	@echo "(ruff) Running fix only..."
@@ -18,7 +18,7 @@ lint: ## Running lint checker: ruff
 	@ruff check belar examples tests
 type: ## Running type checker: pyright
 	@echo "(pyright) Typechecking codebase..."
-	@pyright -p belar -w
+	@pyright -p belar
 style: format lint
 clean: ## Clean all generated files
 	@echo "Cleaning all generated files..."
diff --git a/examples/quickstart.ipynb b/examples/quickstart.ipynb
index 43166fffe..f726fcf23 100644
--- a/examples/quickstart.ipynb
+++ b/examples/quickstart.ipynb
@@ -1,109 +1,37 @@
 {
  "cells": [
   {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "54b66a67",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%load_ext autoreload\n",
-    "%autoreload 2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9710719d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from datasets import load_dataset, concatenate_datasets\n",
-    "\n",
-    "def format_for_belar(row):\n",
-    "    row[\"context\"] = row[\"selftext\"]\n",
-    "    row[\"prompt\"] = row[\"title\"]\n",
-    "    row['ground_truth'] = row[\"answers\"][\"text\"]\n",
-    "    return row\n",
-    "    \n",
-    "d = load_dataset(\"eli5\")\n",
-    "ds = d['test_eli5'].map(format_for_belar, batched=False)\n",
-    "ds = ds.select_columns([\"context\", \"prompt\", \"ground_truth\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "23c3f231",
+   "cell_type": "markdown",
+   "id": "aeb5819b",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "ds = ds.shuffle(seed=42).select(range(500))\n",
-    "ds.shape"
+    "# Quickstart"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "81205b31",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ds.column_names"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2c5671fe",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "207d0e48",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import concurrent.futures as f\n",
-    "from langchain.llms import OpenAI\n",
-    "\n",
-    "llm = OpenAI()\n",
-    "prompt = \"\"\"\n",
-    "{context}\n",
-    "with the above context explain like I'm five: {prompt}\n",
-    "\"\"\"\n",
-    "\n",
-    "def get_answers(row):\n",
-    "    qs, cs = row[\"prompt\"], row[\"context\"]\n",
-    "    \n",
-    "    generated_answers = []\n",
-    "    with f.ThreadPoolExecutor(max_workers=10) as executor:\n",
-    "        results = executor.map(llm, \n",
-    "            [prompt.format(context=cs[i], prompt=qs[i]) for i in range(len(qs))])\n",
-    "        for result in results:\n",
-    "            generated_answers.append(result)\n",
-    "     \n",
-    "    row[\"generated_answers\"] = generated_answers\n",
-    "    return row\n",
-    "    \n",
-    "ds = ds.map(get_answers, batched=True, batch_size=10)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5d93c658",
+   "execution_count": 30,
+   "id": "22c7dd25",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
    "source": [
-    "## Evalutate"
+    "%load_ext autoreload\n",
+    "%autoreload 2"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 2,
-   "id": "076f2dbf",
+   "id": "0b5d4d41",
    "metadata": {},
    "outputs": [
     {
@@ -136,18 +64,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
-   "id": "7c0cda03",
+   "execution_count": 24,
+   "id": "0b5abd7d",
    "metadata": {},
    "outputs": [],
    "source": [
-    "from belar.metrics import Rouge1, Evaluation, Rouge2, RougeL, SBERTScore, EntailmentScore, EditRatio, EditDistance"
+    "from belar.metrics import (\n",
+    "    Rouge1,\n",
+    "    Evaluation,\n",
+    "    Rouge2,\n",
+    "    RougeL,\n",
+    "    SBERTScore,\n",
+    "    EntailmentScore,\n",
+    "    EditRatio,\n",
+    "    EditDistance,\n",
+    ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
-   "id": "887b613c",
+   "execution_count": 28,
+   "id": "a77c805d",
    "metadata": {},
    "outputs": [
     {
@@ -163,17 +100,19 @@
    ],
    "source": [
     "sbert_score = SBERTScore(similarity_metric=\"cosine\")\n",
-    "entail = EntailmentScore()\n",
+    "entail = EntailmentScore(max_length=512)\n",
     "\n",
     "e = Evaluation(\n",
-    "    metrics=[Rouge1, Rouge2, RougeL, sbert_score, EditDistance, EditRatio],\n",
-    "    batched=False, batch_size=30)"
+    "    metrics=[Rouge1, Rouge2, RougeL, sbert_score, EditDistance, EditRatio, entail],\n",
+    "    batched=False,\n",
+    "    batch_size=30,\n",
+    ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
-   "id": "32e338ad",
+   "execution_count": 29,
+   "id": "e879f51b",
    "metadata": {},
    "outputs": [
     {
@@ -198,7 +137,7 @@
   {
    "cell_type": "code",
    "execution_count": 20,
-   "id": "b90661cb",
+   "id": "f64c1915",
    "metadata": {},
    "outputs": [
     {
@@ -219,7 +158,7 @@
   {
    "cell_type": "code",
    "execution_count": 21,
-   "id": "8c926330",
+   "id": "7c812dfe",
    "metadata": {},
    "outputs": [
     {
@@ -234,13 +173,13 @@
     }
    ],
    "source": [
-    "r['rouge1_score']"
+    "r[\"rouge1_score\"]"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 22,
-   "id": "d65834d4",
+   "id": "4c8c51b1",
    "metadata": {},
    "outputs": [
     {
@@ -302,7 +241,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "b59d1d8a",
+   "id": "ebf0a29d",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -312,7 +251,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "29814470",
+   "id": "4882982d",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -322,7 +261,7 @@
   {
    "cell_type": "code",
    "execution_count": 5,
-   "id": "0e1da651",
+   "id": "08ef4d51",
    "metadata": {},
    "outputs": [
     {
@@ -343,7 +282,7 @@
   {
    "cell_type": "code",
    "execution_count": 6,
-   "id": "52bb6cee",
+   "id": "f8a58fa8",
    "metadata": {},
    "outputs": [
     {
@@ -857,15 +796,13 @@
     }
    ],
    "source": [
-    "ds_eval['SBERT_cosine_score']"
+    "ds_eval[\"SBERT_cosine_score\"]"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "50eba21d",
+   "cell_type": "markdown",
+   "id": "3893e1c7",
    "metadata": {},
-   "outputs": [],
    "source": []
   }
  ],
diff --git a/pyproject.toml b/pyproject.toml
index 9d87e4158..a8a74c469 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,7 +21,7 @@ test = [
 dev = [
     "ruff",
     "isort",
-    "black",
+    "black[jupyter]",
     "pyright",
 ]
 

From 5678fd8294642ad6093a54f47b16f4579b692b71 Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Sat, 13 May 2023 19:06:35 +0530
Subject: [PATCH 08/18] new CI workflow

---
 .github/workflows/ci.yaml | 42 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 .github/workflows/ci.yaml

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
new file mode 100644
index 000000000..e6f085d33
--- /dev/null
+++ b/.github/workflows/ci.yaml
@@ -0,0 +1,42 @@
+name: CI
+
+on:
+  pull_request:
+
+env:
+  LINES: 120
+  COLUMNS: 120
+
+# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#defaultsrun
+defaults:
+  run:
+    shell: bash --noprofile --norc -exo pipefail {0}
+
+jobs:
+  diff:
+    runs-on: ubuntu-latest
+    outputs:
+      related: ${{ steps.filter.outputs.related }}
+      bentoml: ${{ steps.filter.outputs.bentoml }}
+      docs: ${{ steps.filter.outputs.docs }}
+      protos: ${{ steps.filter.outputs.protos }}
+    steps:
+      - uses: actions/checkout@v3
+      - uses: dorny/paths-filter@v2
+        id: filter
+        with:
+          base: "main"
+          filters: |
+            related: &related
+              - .github/workflows/ci.yml
+              - codecov.yml
+              - pyproject.toml
+            belar:
+              - "belar/**"
+              - "tests/**"
+              - "examples/**"
+            docs:
+              - *related
+              - requirements/docs-requirements.txt
+              - "docs/**"
+

From 2b270cce7d4762b9404fd4cff13544b99f9ea875 Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Sat, 13 May 2023 19:29:11 +0530
Subject: [PATCH 09/18] unit-test workflow

---
 .github/workflows/ci.yaml           | 54 +++++++++++++++++++++++++++--
 requirements/tests-requirements.txt |  7 ++++
 tests/unit/test_simple.py           |  4 +++
 3 files changed, 63 insertions(+), 2 deletions(-)
 create mode 100644 requirements/tests-requirements.txt
 create mode 100644 tests/unit/test_simple.py

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index e6f085d33..dc69103ba 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -17,9 +17,8 @@ jobs:
     runs-on: ubuntu-latest
     outputs:
       related: ${{ steps.filter.outputs.related }}
-      bentoml: ${{ steps.filter.outputs.bentoml }}
+      belar: ${{ steps.filter.outputs.belar }}
       docs: ${{ steps.filter.outputs.docs }}
-      protos: ${{ steps.filter.outputs.protos }}
     steps:
       - uses: actions/checkout@v3
       - uses: dorny/paths-filter@v2
@@ -31,6 +30,7 @@ jobs:
               - .github/workflows/ci.yml
               - codecov.yml
               - pyproject.toml
+              - requirements/tests-requirements.txt
             belar:
               - "belar/**"
               - "tests/**"
@@ -40,3 +40,53 @@ jobs:
               - requirements/docs-requirements.txt
               - "docs/**"
 
+  unit_tests:
+    needs:
+      - diff
+
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-latest]
+        python-version: ["3.7", "3.8", "3.9", "3.10"]
+
+    if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.belar == 'true') || github.event_name == 'push' }}
+    name: python${{ matrix.python-version }}_unit_tests (${{ matrix.os }})
+    runs-on: ${{ matrix.os }}
+
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0 # fetch all tags and branches
+
+      - name: Setup python
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+          architecture: x64
+
+      - name: Get pip cache dir
+        id: cache-dir
+        run: |
+          echo ::set-output name=dir::$(pip cache dir)
+
+      - name: Cache pip dependencies
+        uses: actions/cache@v3
+        id: cache-pip
+        with:
+          path: ${{ steps.cache-dir.outputs.dir }}
+          key: ${{ runner.os }}-tests-${{ hashFiles('requirements/tests-requirements.txt') }}
+
+      - name: Install dependencies
+        run: |
+          pip install -r requirements/tests-requirements.txt
+
+      - name: Run unit tests
+        run: |
+          # OPTS=(--cov-config pyproject.toml --cov=src/bentoml --cov-append)
+          if [ "${{ matrix.os }}" != 'windows-latest' ]; then
+            # we will use pytest-xdist to improve tests run-time.
+            OPTS=(--dist loadfile -n auto)
+          fi
+          # Now run the unit tests
+          pytest tests/unit "${OPTS[@]}"
diff --git a/requirements/tests-requirements.txt b/requirements/tests-requirements.txt
new file mode 100644
index 000000000..cdb7e56b4
--- /dev/null
+++ b/requirements/tests-requirements.txt
@@ -0,0 +1,7 @@
+pytest
+rich
+ruff
+isort
+black[jupyter]
+pyright
+pytest-xdist[psutil]
diff --git a/tests/unit/test_simple.py b/tests/unit/test_simple.py
new file mode 100644
index 000000000..a247ee597
--- /dev/null
+++ b/tests/unit/test_simple.py
@@ -0,0 +1,4 @@
+def test_import():
+    import belar
+
+    assert belar is not None

From 91d7c8aa9f53185582fa87fde0159b8a01928346 Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Sat, 13 May 2023 19:45:57 +0530
Subject: [PATCH 10/18] add github token

---
 .github/workflows/ci.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index dc69103ba..ffa0ae299 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -25,6 +25,7 @@ jobs:
         id: filter
         with:
           base: "main"
+          token: ${{ github.token }}
           filters: |
             related: &related
               - .github/workflows/ci.yml

From 3b601b524ed95700b692a830c7baef7788bb7845 Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Sat, 13 May 2023 20:54:35 +0530
Subject: [PATCH 11/18] permissions

---
 .github/workflows/ci.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index ffa0ae299..9bd83443f 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -3,6 +3,9 @@ name: CI
 on:
   pull_request:
 
+permissions:
+  contents: read
+
 env:
   LINES: 120
   COLUMNS: 120

From 5e790a1fd5153162317e658db6e05efe1df34da1 Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Sat, 13 May 2023 20:56:35 +0530
Subject: [PATCH 12/18] install library

---
 .github/workflows/ci.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 9bd83443f..dd20cc300 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -83,6 +83,7 @@ jobs:
 
       - name: Install dependencies
         run: |
+          pip install "."
           pip install -r requirements/tests-requirements.txt
 
       - name: Run unit tests

From 39d72c62cafa5ba2505de67e25ca9eb2cf96b89a Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Sat, 13 May 2023 21:07:30 +0530
Subject: [PATCH 13/18] add code stype checks

---
 .github/workflows/ci.yaml                     | 56 ++++++++++++++++++-
 .../{tests-requirements.txt => dev.txt}       |  2 -
 requirements/test.txt                         |  2 +
 3 files changed, 55 insertions(+), 5 deletions(-)
 rename requirements/{tests-requirements.txt => dev.txt} (58%)
 create mode 100644 requirements/test.txt

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index dd20cc300..34cfebcd1 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -34,7 +34,7 @@ jobs:
               - .github/workflows/ci.yml
               - codecov.yml
               - pyproject.toml
-              - requirements/tests-requirements.txt
+              - requirements/test.txt
             belar:
               - "belar/**"
               - "tests/**"
@@ -79,12 +79,12 @@ jobs:
         id: cache-pip
         with:
           path: ${{ steps.cache-dir.outputs.dir }}
-          key: ${{ runner.os }}-tests-${{ hashFiles('requirements/tests-requirements.txt') }}
+          key: ${{ runner.os }}-tests-${{ hashFiles('requirements/test.txt') }}
 
       - name: Install dependencies
         run: |
           pip install "."
-          pip install -r requirements/tests-requirements.txt
+          pip install -r requirements/test.txt
 
       - name: Run unit tests
         run: |
@@ -95,3 +95,53 @@ jobs:
           fi
           # Now run the unit tests
           pytest tests/unit "${OPTS[@]}"
+
+  codestyle_check:
+    runs-on: ubuntu-latest
+    needs:
+      - diff
+
+    if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.belar == 'true') || github.event_name == 'push' }}
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Setup python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10.6"
+          architecture: x64
+
+      - name: Get pip cache dir
+        id: cache-dir
+        run: |
+          echo ::set-output name=dir::$(pip cache dir)
+
+      - name: Fetch origin
+        run: git fetch origin "$GITHUB_BASE_REF"
+
+      - name: Setup node
+        uses: actions/setup-node@v3
+        with:
+          node-version: "17"
+
+      - name: Cache pip dependencies
+        uses: actions/cache@v3
+        id: cache-pip
+        with:
+          path: ${{ steps.cache-dir.outputs.dir }}
+          key: codestyle-${{ hashFiles('requirements/dev.txt') }}
+
+      - name: Install dependencies
+        run: |
+          pip install .
+          pip install -r requirements/dev.txt
+
+      - name: Format check
+        run: |
+          make format
+      - name: Lint check
+        run: make lint
+      - name: Type check
+        if: ${{ github.event_name == 'pull_request' }}
+        run: git diff --name-only --diff-filter=AM "origin/$GITHUB_BASE_REF" -z -- '*.py{,i}' | xargs -0 --no-run-if-empty pyright
diff --git a/requirements/tests-requirements.txt b/requirements/dev.txt
similarity index 58%
rename from requirements/tests-requirements.txt
rename to requirements/dev.txt
index cdb7e56b4..ea06f113a 100644
--- a/requirements/tests-requirements.txt
+++ b/requirements/dev.txt
@@ -1,7 +1,5 @@
-pytest
 rich
 ruff
 isort
 black[jupyter]
 pyright
-pytest-xdist[psutil]
diff --git a/requirements/test.txt b/requirements/test.txt
new file mode 100644
index 000000000..3951f472c
--- /dev/null
+++ b/requirements/test.txt
@@ -0,0 +1,2 @@
+pytest
+pytest-xdist[psutil]

From ade603c5362c13e7abfda28dfe0c301abfe64ebe Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Sat, 13 May 2023 21:21:28 +0530
Subject: [PATCH 14/18] fix linting and formating

---
 belar/metrics/__init__.py     | 16 +++++++++++++++-
 belar/metrics/base.py         |  1 -
 belar/metrics/simple.py       |  6 ++----
 belar/utils.py                |  3 ++-
 tests/benchmarks/benchmark.py | 13 ++-----------
 5 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/belar/metrics/__init__.py b/belar/metrics/__init__.py
index 9131315af..70fe4a3e5 100644
--- a/belar/metrics/__init__.py
+++ b/belar/metrics/__init__.py
@@ -1,4 +1,18 @@
 from belar.metrics.base import Evaluation, Metric
 from belar.metrics.factual import EntailmentScore
 from belar.metrics.similarity import SBERTScore
-from belar.metrics.simple import *
+from belar.metrics.simple import (BLUE, EditDistance, EditRatio, Rouge1,
+                                  Rouge2, RougeL)
+
+__all__ = [
+    "Evaluation",
+    "Metric",
+    "EntailmentScore",
+    "SBERTScore",
+    "BLUE",
+    "EditDistance",
+    "EditRatio",
+    "RougeL",
+    "Rouge1",
+    "Rouge2",
+]
diff --git a/belar/metrics/base.py b/belar/metrics/base.py
index 5d0108767..95445a67d 100644
--- a/belar/metrics/base.py
+++ b/belar/metrics/base.py
@@ -2,7 +2,6 @@
 
 import typing as t
 from abc import ABC, abstractmethod
-from collections import namedtuple
 from dataclasses import dataclass
 
 import numpy as np
diff --git a/belar/metrics/simple.py b/belar/metrics/simple.py
index 882445a43..116225776 100644
--- a/belar/metrics/simple.py
+++ b/belar/metrics/simple.py
@@ -14,7 +14,7 @@
 
 
 @dataclass
-class BLEU(Metric):
+class BLEUScore(Metric):
     weights: list[float] = field(default_factory=lambda: [0.25, 0.25, 0.25, 0.25])
     smoothing_function = None
 
@@ -94,8 +94,6 @@ def score(self, ground_truth: t.List[str], generated_text: t.List[str]):
 Rouge1 = ROUGE("rouge1")
 Rouge2 = ROUGE("rouge2")
 RougeL = ROUGE("rougeL")
-BLUE = BLEU()
+BLUE = BLEUScore()
 EditDistance = EditScore("distance")
 EditRatio = EditScore("ratio")
-
-__all__ = ["Rouge1", "Rouge2", "RougeL", "BLEU", "EditDistance", "EditRatio"]
diff --git a/belar/utils.py b/belar/utils.py
index 9564cfa66..ea69b3d5e 100644
--- a/belar/utils.py
+++ b/belar/utils.py
@@ -1,7 +1,8 @@
-import torch
 import typing as t
 from warnings import warn
 
+import torch
+
 DEVICES = ["cpu", "cuda"]
 
 
diff --git a/tests/benchmarks/benchmark.py b/tests/benchmarks/benchmark.py
index 525a86847..50d63beb5 100644
--- a/tests/benchmarks/benchmark.py
+++ b/tests/benchmarks/benchmark.py
@@ -1,21 +1,12 @@
 import typing as t
-from dataclasses import dataclass
 
 from datasets import Dataset, load_dataset
 from torch.cuda import is_available
 from tqdm import tqdm
 from utils import print_table, timeit
 
-from belar.metrics import (
-    EditDistance,
-    EditRatio,
-    EntailmentScore,
-    Evaluation,
-    Rouge1,
-    Rouge2,
-    RougeL,
-    SBERTScore,
-)
+from belar.metrics import (EditDistance, EditRatio, EntailmentScore,
+                           Evaluation, Rouge1, Rouge2, RougeL, SBERTScore)
 
 DEVICE = "cuda" if is_available() else "cpu"
 BATCHES = [0, 1]

From 6d94b12610757258426ffd3aa804e74513b46784 Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Sat, 13 May 2023 22:18:30 +0530
Subject: [PATCH 15/18] fix type annotation errors

---
 belar/metrics/base.py       | 14 ++++++++++----
 belar/metrics/factual.py    |  5 ++++-
 belar/metrics/similarity.py |  6 ++++++
 belar/utils.py              | 21 +++++++++++----------
 tests/benchmarks/utils.py   | 12 ++++++++++--
 5 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/belar/metrics/base.py b/belar/metrics/base.py
index 95445a67d..53a585be8 100644
--- a/belar/metrics/base.py
+++ b/belar/metrics/base.py
@@ -12,16 +12,18 @@
 class Metric(ABC):
     @property
     @abstractmethod
-    def name(self) -> str:
+    def name(self: t.Self) -> str:
         ...
 
     @property
     @abstractmethod
-    def is_batchable(self) -> bool:
+    def is_batchable(self: t.Self) -> bool:
         ...
 
     @abstractmethod
-    def score(self, ground_truth: list[str], generated_text: list[str]) -> list[float]:
+    def score(
+        self: t.Self, ground_truth: list[str], generated_text: list[str]
+    ) -> list[float]:
         ...
 
 
@@ -67,7 +69,11 @@ def _get_score(self, row: dict[str, list[t.Any]] | dict[str, t.Any]):
             else:  # not batched
                 split_indices = len(row["ground_truth"])
                 ground_truths = row["ground_truth"]
-                generated_texts = [row["generated_text"]] * split_indices
+                generated_text = row["generated_text"]
+                assert isinstance(
+                    generated_text, str
+                ), f"generated_text should be str but got {type(generated_text)}"
+                generated_texts = [generated_text] * split_indices
                 scores = metric.score(ground_truths, generated_texts)
                 score = np.max(scores)
 
diff --git a/belar/metrics/factual.py b/belar/metrics/factual.py
index adc0509f7..8999f6d9f 100644
--- a/belar/metrics/factual.py
+++ b/belar/metrics/factual.py
@@ -8,6 +8,9 @@
 from belar.metrics import Metric
 from belar.utils import device_check
 
+if t.TYPE_CHECKING:
+    from torch import device as Device
+
 
 @dataclass
 class EntailmentScore(Metric):
@@ -18,7 +21,7 @@ class EntailmentScore(Metric):
     model_name: str = "typeform/distilbert-base-uncased-mnli"
     max_length: int = 512
     batch_size: int = 4
-    device: t.Literal["cpu", "cuda"] = "cpu"
+    device: t.Literal["cpu", "cuda"] | Device = "cpu"
 
     def __post_init__(self):
         self.device = device_check(self.device)
diff --git a/belar/metrics/similarity.py b/belar/metrics/similarity.py
index 4dd55985a..85f55df11 100644
--- a/belar/metrics/similarity.py
+++ b/belar/metrics/similarity.py
@@ -9,6 +9,9 @@
 
 from belar.metrics.base import Metric
 
+if t.TYPE_CHECKING:
+    from torch import Tensor
+
 SBERT_METRIC = t.Literal["cosine", "euclidean"]
 
 
@@ -42,6 +45,9 @@ def score(
         gentext_emb = self.model.encode(
             generated_text, batch_size=self.batch_size, convert_to_numpy=True
         )
+        assert isinstance(gentext_emb, Tensor) and isinstance(
+            gndtruth_emb, Tensor
+        ), f"Both gndtruth_emb[{type(gentext_emb)}], gentext_emb[{type(gentext_emb)}] should be Tensor."
 
         if self.similarity_metric == "cosine":
             score = np.dot(gndtruth_emb, gentext_emb.T) / (
diff --git a/belar/utils.py b/belar/utils.py
index ea69b3d5e..913e827b1 100644
--- a/belar/utils.py
+++ b/belar/utils.py
@@ -3,18 +3,19 @@
 
 import torch
 
+if t.TYPE_CHECKING:
+    from torch import device as Device
+
 DEVICES = ["cpu", "cuda"]
 
 
-def device_check(device: t.Literal[DEVICES]):
-    if device == "cuda":
-        if torch.cuda.is_available():
-            device = torch.device("cuda")
-        else:
-            warn("cuda not available, using cpu")
-    elif device == "cpu":
-        device = torch.device("cpu")
-    else:
+def device_check(device: t.Literal["cpu", "cuda"] | Device) -> torch.device:
+    if isinstance(device, Device):
+        return device
+    if device not in DEVICES:
         raise ValueError(f"Invalid device {device}")
+    if device == "cuda" and not torch.cuda.is_available():
+        warn("cuda not available, using cpu")
+        device = "cpu"
 
-    return device
+    return torch.device(device)
diff --git a/tests/benchmarks/utils.py b/tests/benchmarks/utils.py
index 9ea962a65..2dd81d6cf 100644
--- a/tests/benchmarks/utils.py
+++ b/tests/benchmarks/utils.py
@@ -1,14 +1,22 @@
 from __future__ import annotations
 
 import time
+import typing as t
 
 import numpy as np
 from rich.console import Console
 from rich.table import Table
 
+P = t.ParamSpec("P")
+R = t.TypeVar("R")
+OrigFunc = t.Callable[P, R]
+DecoratedFunc = t.Callable[P, tuple[np.floating, np.floating]]
 
-def timeit(func, iteration=3):
-    def function_timer(*args, **kwargs) -> tuple(np.floating, np.floating):
+
+def timeit(func: OrigFunc, iteration: int = 3) -> DecoratedFunc:
+    def function_timer(
+        *args: P.args, **kwargs: P.kwargs
+    ) -> tuple[np.floating, np.floating]:
         """
         Time the execution of a function and returns the time taken
         """

From 97e1b417f8bb098e7a5c4c51b2cfa0360e9596ac Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Sat, 13 May 2023 22:30:14 +0530
Subject: [PATCH 16/18] fix check for types

---
 .github/workflows/ci.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 34cfebcd1..eca5ee941 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -144,4 +144,4 @@ jobs:
         run: make lint
       - name: Type check
         if: ${{ github.event_name == 'pull_request' }}
-        run: git diff --name-only --diff-filter=AM "origin/$GITHUB_BASE_REF" -z -- '*.py{,i}' | xargs -0 --no-run-if-empty pyright
+        run: git diff --name-only --diff-filter=AM "origin/$GITHUB_BASE_REF" -z -- '**/*.py' '**/*.pyi' | xargs -0 --no-run-if-empty pyright

From 99c9054332dee51cb023540af18bd7092199ee8a Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Sat, 13 May 2023 22:33:27 +0530
Subject: [PATCH 17/18] fix lint

---
 Makefile                    | 2 +-
 belar/metrics/similarity.py | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index c613129fe..27a1eb38a 100644
--- a/Makefile
+++ b/Makefile
@@ -19,12 +19,12 @@ lint: ## Running lint checker: ruff
 type: ## Running type checker: pyright
 	@echo "(pyright) Typechecking codebase..."
 	@pyright -p belar
-style: format lint
 clean: ## Clean all generated files
 	@echo "Cleaning all generated files..."
 	@cd $(GIT_ROOT)/docs && make clean
 	@cd $(GIT_ROOT) || exit 1
 	@find . -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete
+run-ci: format lint type ## Running all CI checks
 run-benchmarks: ## Run benchmarks
 	@echo "Running benchmarks..."
 	@cd $(GIT_ROOT)/tests/benchmarks && python benchmark.py 
diff --git a/belar/metrics/similarity.py b/belar/metrics/similarity.py
index 85f55df11..8c38137c9 100644
--- a/belar/metrics/similarity.py
+++ b/belar/metrics/similarity.py
@@ -45,9 +45,10 @@ def score(
         gentext_emb = self.model.encode(
             generated_text, batch_size=self.batch_size, convert_to_numpy=True
         )
-        assert isinstance(gentext_emb, Tensor) and isinstance(
-            gndtruth_emb, Tensor
-        ), f"Both gndtruth_emb[{type(gentext_emb)}], gentext_emb[{type(gentext_emb)}] should be Tensor."
+        assert isinstance(gentext_emb, Tensor) and isinstance(gndtruth_emb, Tensor), (
+            f"Both gndtruth_emb[{type(gentext_emb)}], gentext_emb[{type(gentext_emb)}]"
+            " should be Tensor."
+        )
 
         if self.similarity_metric == "cosine":
             score = np.dot(gndtruth_emb, gentext_emb.T) / (

From 295f422922c73da6d1888d77be47f08999117711 Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Sat, 13 May 2023 22:43:04 +0530
Subject: [PATCH 18/18] fix old state storage

---
 .github/workflows/ci.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index eca5ee941..e5c930a5c 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -72,7 +72,7 @@ jobs:
       - name: Get pip cache dir
         id: cache-dir
         run: |
-          echo ::set-output name=dir::$(pip cache dir)
+          echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
 
       - name: Cache pip dependencies
         uses: actions/cache@v3
@@ -115,7 +115,7 @@ jobs:
       - name: Get pip cache dir
         id: cache-dir
         run: |
-          echo ::set-output name=dir::$(pip cache dir)
+          echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
 
       - name: Fetch origin
         run: git fetch origin "$GITHUB_BASE_REF"