From bb6ff9627935f7ca845c7adb13d26f734b4682c6 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Sat, 13 May 2023 04:09:29 +0530 Subject: [PATCH 01/18] add max_length --- belar/metrics/factual.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/belar/metrics/factual.py b/belar/metrics/factual.py index adc0509f7..e636a2793 100644 --- a/belar/metrics/factual.py +++ b/belar/metrics/factual.py @@ -1,4 +1,5 @@ from __future__ import annotations +from tkinter.ttk import _Padding import typing as t from dataclasses import dataclass @@ -20,6 +21,7 @@ class EntailmentScore(Metric): batch_size: int = 4 device: t.Literal["cpu", "cuda"] = "cpu" + def __post_init__(self): self.device = device_check(self.device) self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) From 8c017f4abfadaf2ca6585f55b4af7896bc387ba4 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Sat, 13 May 2023 04:19:26 +0530 Subject: [PATCH 02/18] add max_length --- belar/metrics/factual.py | 1 - 1 file changed, 1 deletion(-) diff --git a/belar/metrics/factual.py b/belar/metrics/factual.py index e636a2793..3b8655214 100644 --- a/belar/metrics/factual.py +++ b/belar/metrics/factual.py @@ -1,5 +1,4 @@ from __future__ import annotations -from tkinter.ttk import _Padding import typing as t from dataclasses import dataclass From fe8a01781e2cd370efe75ac8c92adb19c0d468cd Mon Sep 17 00:00:00 2001 From: Jithin James Date: Sat, 13 May 2023 12:30:37 +0530 Subject: [PATCH 03/18] black fixes --- belar/metrics/factual.py | 1 - 1 file changed, 1 deletion(-) diff --git a/belar/metrics/factual.py b/belar/metrics/factual.py index 3b8655214..adc0509f7 100644 --- a/belar/metrics/factual.py +++ b/belar/metrics/factual.py @@ -20,7 +20,6 @@ class EntailmentScore(Metric): batch_size: int = 4 device: t.Literal["cpu", "cuda"] = "cpu" - def __post_init__(self): self.device = device_check(self.device) self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) From 0db62659cda9741c060c7ab990abb9ac77d36b17 Mon Sep 17 00:00:00 2001 From: Jithin James Date: Sat, 13 May 2023 16:18:39 +0530 Subject: [PATCH 04/18] added benchmarks --- pyproject.toml | 7 +++ tests/integration/benchmark.py | 66 ++++++++++++++++++++++++++++ tests/integration/run_all_metrics.py | 26 ----------- tests/integration/utils.py | 27 ++++++++++++ 4 files changed, 100 insertions(+), 26 deletions(-) create mode 100644 tests/integration/benchmark.py delete mode 100644 tests/integration/run_all_metrics.py create mode 100644 tests/integration/utils.py diff --git a/pyproject.toml b/pyproject.toml index e1af83e8d..ad537b4bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,13 @@ dependencies = [ ] dynamic = ["version", "readme"] +[project.optional-dependencies] +test = [ + # "pytest-cov", + "pydantic", + "pytest", +] + [tool.setuptools.dynamic] readme = {file = ["README.md"], content-type = "text/plain"} diff --git a/tests/integration/benchmark.py b/tests/integration/benchmark.py new file mode 100644 index 000000000..fd2bd9e36 --- /dev/null +++ b/tests/integration/benchmark.py @@ -0,0 +1,66 @@ +import typing as t +from dataclasses import dataclass + +from datasets import Dataset, load_dataset +from tqdm import tqdm +from utils import timeit + +from belar.metrics import ( + EditDistance, + EditRatio, + EntailmentScore, + Evaluation, + Rouge1, + Rouge2, + RougeL, + SBERTScore, +) + +DEVICE = ("cuda",) +BATCHES = [0, 1, 10, 20, 30, 60] +# init metrics +sbert_score = SBERTScore(similarity_metric="cosine") +entail = EntailmentScore(max_length=512) +METRICS = { + "Rouge1": Rouge1, + "Rouge2": Rouge2, + "RougeL": RougeL, + "EditRatio": EditRatio, + "EditDistance": EditDistance, +} + + +@dataclass +class BenchmarkConfig: + device: t.Literal["cpu", "cuda"] + batch_sizes: list[int] + metrics: list[str] + + +def setup() -> t.Iterator[tuple[str, Evaluation, Dataset]]: + metrics = [m for m in METRICS.values()] + for b in BATCHES: + setup_name = f"batch-{b}" + ds = load_dataset("explodinggradients/eli5-test", split="test_eli5") + assert isinstance(ds, Dataset), f"{type(ds)} found in the place of Dataset!" + batched = False if b == 0 else True + e = Evaluation( + metrics=metrics, + batched=batched, + batch_size=b, + ) + yield setup_name, e, ds + + +@timeit +def evaluate(e: Evaluation, ds: Dataset): + e.eval(ds["ground_truth"], ds["generated_text"]) + + +if __name__ == "__main__": + results = {} + for setup_name, e, ds in tqdm(setup()): + mean, var = evaluate(e, ds) + results[setup_name] = (mean, var) + + print(results) diff --git a/tests/integration/run_all_metrics.py b/tests/integration/run_all_metrics.py deleted file mode 100644 index 099572a18..000000000 --- a/tests/integration/run_all_metrics.py +++ /dev/null @@ -1,26 +0,0 @@ -from datasets import concatenate_datasets, load_dataset - -from belar.metrics import ( - EditDistance, - EditRatio, - EntailmentScore, - Evaluation, - Rouge1, - Rouge2, - RougeL, - SBERTScore, -) - -ds = load_dataset("explodinggradients/eli5-test", split="test_eli5") -print(ds.shape) -sbert_score = SBERTScore(similarity_metric="cosine") -entail = EntailmentScore(max_length=512) - -e = Evaluation( - metrics=[Rouge1, Rouge2, RougeL, sbert_score, EditDistance, EditRatio, entail], - batched=False, - batch_size=30, -) -r = e.eval(ds["ground_truth"], ds["generated_text"]) -print(r) -print(r.describe()) diff --git a/tests/integration/utils.py b/tests/integration/utils.py new file mode 100644 index 000000000..c7acde3bb --- /dev/null +++ b/tests/integration/utils.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +import time + +import numpy as np + + +def timeit(func, iteration=3): + def function_timer(*args, **kwargs) -> tuple(np.floating, np.floating): + """ + Time the execution of a function and returns the time taken + """ + # warmup + func(*args, **kwargs) + + runtimes = [] + for _ in range(iteration): + start = time.time() + # we dont care about the return value + func(*args, **kwargs) + end = time.time() + runtime = end - start + runtimes.append(runtime) + + return np.mean(runtimes), np.var(runtimes) + + return function_timer From ab7b0747d06bf664c73530122a044afcc5868f01 Mon Sep 17 00:00:00 2001 From: Jithin James Date: Sat, 13 May 2023 16:39:13 +0530 Subject: [PATCH 05/18] pretty print benchmarks --- pyproject.toml | 2 +- tests/integration/benchmark.py | 26 +++++++++++--------------- tests/integration/utils.py | 12 ++++++++++++ 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ad537b4bd..50e48f46f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,8 +14,8 @@ dynamic = ["version", "readme"] [project.optional-dependencies] test = [ # "pytest-cov", - "pydantic", "pytest", + "rich", ] [tool.setuptools.dynamic] diff --git a/tests/integration/benchmark.py b/tests/integration/benchmark.py index fd2bd9e36..643e66d84 100644 --- a/tests/integration/benchmark.py +++ b/tests/integration/benchmark.py @@ -2,8 +2,9 @@ from dataclasses import dataclass from datasets import Dataset, load_dataset +from torch.cuda import is_available from tqdm import tqdm -from utils import timeit +from utils import print_table, timeit from belar.metrics import ( EditDistance, @@ -16,40 +17,35 @@ SBERTScore, ) -DEVICE = ("cuda",) +DEVICE = "cuda" if is_available() else "cpu" BATCHES = [0, 1, 10, 20, 30, 60] # init metrics sbert_score = SBERTScore(similarity_metric="cosine") -entail = EntailmentScore(max_length=512) +entail = EntailmentScore(max_length=512, device=DEVICE) METRICS = { "Rouge1": Rouge1, "Rouge2": Rouge2, "RougeL": RougeL, "EditRatio": EditRatio, "EditDistance": EditDistance, + "SBERTScore": sbert_score, + "EntailmentScore": entail, } - - -@dataclass -class BenchmarkConfig: - device: t.Literal["cpu", "cuda"] - batch_sizes: list[int] - metrics: list[str] +DS = load_dataset("explodinggradients/eli5-test", split="test_eli5") def setup() -> t.Iterator[tuple[str, Evaluation, Dataset]]: metrics = [m for m in METRICS.values()] for b in BATCHES: setup_name = f"batch-{b}" - ds = load_dataset("explodinggradients/eli5-test", split="test_eli5") - assert isinstance(ds, Dataset), f"{type(ds)} found in the place of Dataset!" + assert isinstance(DS, Dataset), f"{type(DS)} found in the place of Dataset!" batched = False if b == 0 else True e = Evaluation( metrics=metrics, batched=batched, batch_size=b, ) - yield setup_name, e, ds + yield setup_name, e, DS @timeit @@ -59,8 +55,8 @@ def evaluate(e: Evaluation, ds: Dataset): if __name__ == "__main__": results = {} - for setup_name, e, ds in tqdm(setup()): + for setup_name, e, ds in tqdm(setup(), total=len(BATCHES)): mean, var = evaluate(e, ds) results[setup_name] = (mean, var) - print(results) + print_table(results) diff --git a/tests/integration/utils.py b/tests/integration/utils.py index c7acde3bb..9ea962a65 100644 --- a/tests/integration/utils.py +++ b/tests/integration/utils.py @@ -3,6 +3,8 @@ import time import numpy as np +from rich.console import Console +from rich.table import Table def timeit(func, iteration=3): @@ -25,3 +27,13 @@ def function_timer(*args, **kwargs) -> tuple(np.floating, np.floating): return np.mean(runtimes), np.var(runtimes) return function_timer + + +def print_table(result): + table = Table("Batch Name", "(mean, var)", title="Benchmark Results") + + for batch_name, (mean, var) in result.items(): + table.add_row(batch_name, f"{mean:.4f}, {var:.4f}") + + console = Console() + console.print(table) From b50ce520084d46c97620577e5aa4c0087e61c849 Mon Sep 17 00:00:00 2001 From: Jithin James Date: Sat, 13 May 2023 17:27:57 +0530 Subject: [PATCH 06/18] added makefile for all CI/CD --- Makefile | 30 +++++++++++++++++++ pyproject.toml | 7 +++++ .../{integration => benchmarks}/benchmark.py | 6 ++-- tests/{integration => benchmarks}/utils.py | 0 4 files changed, 40 insertions(+), 3 deletions(-) create mode 100644 Makefile rename tests/{integration => benchmarks}/benchmark.py (93%) rename tests/{integration => benchmarks}/utils.py (100%) diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..1bc7b4ef1 --- /dev/null +++ b/Makefile @@ -0,0 +1,30 @@ +GIT_ROOT ?= $(shell git rev-parse --show-toplevel) + +help: ## Show all Makefile targets + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' + +.PHONY: format lint type style clean run-benchmarks +format: ## Running code formatter: black and isort + @echo "(black) Formatting codebase..." + @black --config pyproject.toml belar tests docs examples + @echo "(black) Formatting stubs..." + @find src -name "*.pyi" ! -name "*_pb2*" -exec black --pyi --config pyproject.toml {} \; + @echo "(isort) Reordering imports..." + @isort . + @echo "(ruff) Running fix only..." + @ruff check belar examples tests --fix-only +lint: ## Running lint checker: ruff + @echo "(ruff) Linting development project..." + @ruff check belar examples tests +type: ## Running type checker: pyright + @echo "(pyright) Typechecking codebase..." + @pyright -p belar -w +style: format lint +clean: ## Clean all generated files + @echo "Cleaning all generated files..." + @cd $(GIT_ROOT)/docs && make clean + @cd $(GIT_ROOT) || exit 1 + @find . -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete +run-benchmarks: ## Run benchmarks + @echo "Running benchmarks..." + @cd $(GIT_ROOT)/tests/benchmarks && python benchmark.py diff --git a/pyproject.toml b/pyproject.toml index 50e48f46f..9d87e4158 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,13 @@ test = [ "rich", ] +dev = [ + "ruff", + "isort", + "black", + "pyright", +] + [tool.setuptools.dynamic] readme = {file = ["README.md"], content-type = "text/plain"} diff --git a/tests/integration/benchmark.py b/tests/benchmarks/benchmark.py similarity index 93% rename from tests/integration/benchmark.py rename to tests/benchmarks/benchmark.py index 643e66d84..525a86847 100644 --- a/tests/integration/benchmark.py +++ b/tests/benchmarks/benchmark.py @@ -18,7 +18,7 @@ ) DEVICE = "cuda" if is_available() else "cpu" -BATCHES = [0, 1, 10, 20, 30, 60] +BATCHES = [0, 1] # init metrics sbert_score = SBERTScore(similarity_metric="cosine") entail = EntailmentScore(max_length=512, device=DEVICE) @@ -28,8 +28,8 @@ "RougeL": RougeL, "EditRatio": EditRatio, "EditDistance": EditDistance, - "SBERTScore": sbert_score, - "EntailmentScore": entail, + # "SBERTScore": sbert_score, + # "EntailmentScore": entail, } DS = load_dataset("explodinggradients/eli5-test", split="test_eli5") diff --git a/tests/integration/utils.py b/tests/benchmarks/utils.py similarity index 100% rename from tests/integration/utils.py rename to tests/benchmarks/utils.py From 71ff9e9fc38ebc95d50a2d55b116cd8e12e180f1 Mon Sep 17 00:00:00 2001 From: Jithin James Date: Sat, 13 May 2023 18:47:46 +0530 Subject: [PATCH 07/18] fix Makefiles --- Makefile | 6 +- examples/quickstart.ipynb | 163 ++++++++++++-------------------------- pyproject.toml | 2 +- 3 files changed, 54 insertions(+), 117 deletions(-) diff --git a/Makefile b/Makefile index 1bc7b4ef1..c613129fe 100644 --- a/Makefile +++ b/Makefile @@ -6,9 +6,9 @@ help: ## Show all Makefile targets .PHONY: format lint type style clean run-benchmarks format: ## Running code formatter: black and isort @echo "(black) Formatting codebase..." - @black --config pyproject.toml belar tests docs examples + @black --config pyproject.toml belar tests examples @echo "(black) Formatting stubs..." - @find src -name "*.pyi" ! -name "*_pb2*" -exec black --pyi --config pyproject.toml {} \; + @find belar -name "*.pyi" ! -name "*_pb2*" -exec black --pyi --config pyproject.toml {} \; @echo "(isort) Reordering imports..." @isort . @echo "(ruff) Running fix only..." @@ -18,7 +18,7 @@ lint: ## Running lint checker: ruff @ruff check belar examples tests type: ## Running type checker: pyright @echo "(pyright) Typechecking codebase..." - @pyright -p belar -w + @pyright -p belar style: format lint clean: ## Clean all generated files @echo "Cleaning all generated files..." diff --git a/examples/quickstart.ipynb b/examples/quickstart.ipynb index 43166fffe..f726fcf23 100644 --- a/examples/quickstart.ipynb +++ b/examples/quickstart.ipynb @@ -1,109 +1,37 @@ { "cells": [ { - "cell_type": "code", - "execution_count": 1, - "id": "54b66a67", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9710719d", - "metadata": {}, - "outputs": [], - "source": [ - "from datasets import load_dataset, concatenate_datasets\n", - "\n", - "def format_for_belar(row):\n", - " row[\"context\"] = row[\"selftext\"]\n", - " row[\"prompt\"] = row[\"title\"]\n", - " row['ground_truth'] = row[\"answers\"][\"text\"]\n", - " return row\n", - " \n", - "d = load_dataset(\"eli5\")\n", - "ds = d['test_eli5'].map(format_for_belar, batched=False)\n", - "ds = ds.select_columns([\"context\", \"prompt\", \"ground_truth\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "23c3f231", + "cell_type": "markdown", + "id": "aeb5819b", "metadata": {}, - "outputs": [], "source": [ - "ds = ds.shuffle(seed=42).select(range(500))\n", - "ds.shape" + "# Quickstart" ] }, { "cell_type": "code", - "execution_count": null, - "id": "81205b31", - "metadata": {}, - "outputs": [], - "source": [ - "ds.column_names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2c5671fe", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "207d0e48", - "metadata": {}, - "outputs": [], - "source": [ - "import concurrent.futures as f\n", - "from langchain.llms import OpenAI\n", - "\n", - "llm = OpenAI()\n", - "prompt = \"\"\"\n", - "{context}\n", - "with the above context explain like I'm five: {prompt}\n", - "\"\"\"\n", - "\n", - "def get_answers(row):\n", - " qs, cs = row[\"prompt\"], row[\"context\"]\n", - " \n", - " generated_answers = []\n", - " with f.ThreadPoolExecutor(max_workers=10) as executor:\n", - " results = executor.map(llm, \n", - " [prompt.format(context=cs[i], prompt=qs[i]) for i in range(len(qs))])\n", - " for result in results:\n", - " generated_answers.append(result)\n", - " \n", - " row[\"generated_answers\"] = generated_answers\n", - " return row\n", - " \n", - "ds = ds.map(get_answers, batched=True, batch_size=10)" - ] - }, - { - "cell_type": "markdown", - "id": "5d93c658", + "execution_count": 30, + "id": "22c7dd25", "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ - "## Evalutate" + "%load_ext autoreload\n", + "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 2, - "id": "076f2dbf", + "id": "0b5d4d41", "metadata": {}, "outputs": [ { @@ -136,18 +64,27 @@ }, { "cell_type": "code", - "execution_count": 11, - "id": "7c0cda03", + "execution_count": 24, + "id": "0b5abd7d", "metadata": {}, "outputs": [], "source": [ - "from belar.metrics import Rouge1, Evaluation, Rouge2, RougeL, SBERTScore, EntailmentScore, EditRatio, EditDistance" + "from belar.metrics import (\n", + " Rouge1,\n", + " Evaluation,\n", + " Rouge2,\n", + " RougeL,\n", + " SBERTScore,\n", + " EntailmentScore,\n", + " EditRatio,\n", + " EditDistance,\n", + ")" ] }, { "cell_type": "code", - "execution_count": 18, - "id": "887b613c", + "execution_count": 28, + "id": "a77c805d", "metadata": {}, "outputs": [ { @@ -163,17 +100,19 @@ ], "source": [ "sbert_score = SBERTScore(similarity_metric=\"cosine\")\n", - "entail = EntailmentScore()\n", + "entail = EntailmentScore(max_length=512)\n", "\n", "e = Evaluation(\n", - " metrics=[Rouge1, Rouge2, RougeL, sbert_score, EditDistance, EditRatio],\n", - " batched=False, batch_size=30)" + " metrics=[Rouge1, Rouge2, RougeL, sbert_score, EditDistance, EditRatio, entail],\n", + " batched=False,\n", + " batch_size=30,\n", + ")" ] }, { "cell_type": "code", - "execution_count": 19, - "id": "32e338ad", + "execution_count": 29, + "id": "e879f51b", "metadata": {}, "outputs": [ { @@ -198,7 +137,7 @@ { "cell_type": "code", "execution_count": 20, - "id": "b90661cb", + "id": "f64c1915", "metadata": {}, "outputs": [ { @@ -219,7 +158,7 @@ { "cell_type": "code", "execution_count": 21, - "id": "8c926330", + "id": "7c812dfe", "metadata": {}, "outputs": [ { @@ -234,13 +173,13 @@ } ], "source": [ - "r['rouge1_score']" + "r[\"rouge1_score\"]" ] }, { "cell_type": "code", "execution_count": 22, - "id": "d65834d4", + "id": "4c8c51b1", "metadata": {}, "outputs": [ { @@ -302,7 +241,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b59d1d8a", + "id": "ebf0a29d", "metadata": {}, "outputs": [], "source": [ @@ -312,7 +251,7 @@ { "cell_type": "code", "execution_count": null, - "id": "29814470", + "id": "4882982d", "metadata": {}, "outputs": [], "source": [ @@ -322,7 +261,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "0e1da651", + "id": "08ef4d51", "metadata": {}, "outputs": [ { @@ -343,7 +282,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "52bb6cee", + "id": "f8a58fa8", "metadata": {}, "outputs": [ { @@ -857,15 +796,13 @@ } ], "source": [ - "ds_eval['SBERT_cosine_score']" + "ds_eval[\"SBERT_cosine_score\"]" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "50eba21d", + "cell_type": "markdown", + "id": "3893e1c7", "metadata": {}, - "outputs": [], "source": [] } ], diff --git a/pyproject.toml b/pyproject.toml index 9d87e4158..a8a74c469 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ test = [ dev = [ "ruff", "isort", - "black", + "black[jupyter]", "pyright", ] From 5678fd8294642ad6093a54f47b16f4579b692b71 Mon Sep 17 00:00:00 2001 From: Jithin James Date: Sat, 13 May 2023 19:06:35 +0530 Subject: [PATCH 08/18] new CI workflow --- .github/workflows/ci.yaml | 42 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 .github/workflows/ci.yaml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 000000000..e6f085d33 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,42 @@ +name: CI + +on: + pull_request: + +env: + LINES: 120 + COLUMNS: 120 + +# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#defaultsrun +defaults: + run: + shell: bash --noprofile --norc -exo pipefail {0} + +jobs: + diff: + runs-on: ubuntu-latest + outputs: + related: ${{ steps.filter.outputs.related }} + bentoml: ${{ steps.filter.outputs.bentoml }} + docs: ${{ steps.filter.outputs.docs }} + protos: ${{ steps.filter.outputs.protos }} + steps: + - uses: actions/checkout@v3 + - uses: dorny/paths-filter@v2 + id: filter + with: + base: "main" + filters: | + related: &related + - .github/workflows/ci.yml + - codecov.yml + - pyproject.toml + belar: + - "belar/**" + - "tests/**" + - "examples/**" + docs: + - *related + - requirements/docs-requirements.txt + - "docs/**" + From 2b270cce7d4762b9404fd4cff13544b99f9ea875 Mon Sep 17 00:00:00 2001 From: Jithin James Date: Sat, 13 May 2023 19:29:11 +0530 Subject: [PATCH 09/18] unit-test workflow --- .github/workflows/ci.yaml | 54 +++++++++++++++++++++++++++-- requirements/tests-requirements.txt | 7 ++++ tests/unit/test_simple.py | 4 +++ 3 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 requirements/tests-requirements.txt create mode 100644 tests/unit/test_simple.py diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e6f085d33..dc69103ba 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -17,9 +17,8 @@ jobs: runs-on: ubuntu-latest outputs: related: ${{ steps.filter.outputs.related }} - bentoml: ${{ steps.filter.outputs.bentoml }} + belar: ${{ steps.filter.outputs.belar }} docs: ${{ steps.filter.outputs.docs }} - protos: ${{ steps.filter.outputs.protos }} steps: - uses: actions/checkout@v3 - uses: dorny/paths-filter@v2 @@ -31,6 +30,7 @@ jobs: - .github/workflows/ci.yml - codecov.yml - pyproject.toml + - requirements/tests-requirements.txt belar: - "belar/**" - "tests/**" @@ -40,3 +40,53 @@ jobs: - requirements/docs-requirements.txt - "docs/**" + unit_tests: + needs: + - diff + + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python-version: ["3.7", "3.8", "3.9", "3.10"] + + if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.belar == 'true') || github.event_name == 'push' }} + name: python${{ matrix.python-version }}_unit_tests (${{ matrix.os }}) + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 # fetch all tags and branches + + - name: Setup python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + + - name: Get pip cache dir + id: cache-dir + run: | + echo ::set-output name=dir::$(pip cache dir) + + - name: Cache pip dependencies + uses: actions/cache@v3 + id: cache-pip + with: + path: ${{ steps.cache-dir.outputs.dir }} + key: ${{ runner.os }}-tests-${{ hashFiles('requirements/tests-requirements.txt') }} + + - name: Install dependencies + run: | + pip install -r requirements/tests-requirements.txt + + - name: Run unit tests + run: | + # OPTS=(--cov-config pyproject.toml --cov=src/bentoml --cov-append) + if [ "${{ matrix.os }}" != 'windows-latest' ]; then + # we will use pytest-xdist to improve tests run-time. + OPTS=(--dist loadfile -n auto) + fi + # Now run the unit tests + pytest tests/unit "${OPTS[@]}" diff --git a/requirements/tests-requirements.txt b/requirements/tests-requirements.txt new file mode 100644 index 000000000..cdb7e56b4 --- /dev/null +++ b/requirements/tests-requirements.txt @@ -0,0 +1,7 @@ +pytest +rich +ruff +isort +black[jupyter] +pyright +pytest-xdist[psutil] diff --git a/tests/unit/test_simple.py b/tests/unit/test_simple.py new file mode 100644 index 000000000..a247ee597 --- /dev/null +++ b/tests/unit/test_simple.py @@ -0,0 +1,4 @@ +def test_import(): + import belar + + assert belar is not None From 91d7c8aa9f53185582fa87fde0159b8a01928346 Mon Sep 17 00:00:00 2001 From: Jithin James Date: Sat, 13 May 2023 19:45:57 +0530 Subject: [PATCH 10/18] add github token --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index dc69103ba..ffa0ae299 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -25,6 +25,7 @@ jobs: id: filter with: base: "main" + token: ${{ github.token }} filters: | related: &related - .github/workflows/ci.yml From 3b601b524ed95700b692a830c7baef7788bb7845 Mon Sep 17 00:00:00 2001 From: Jithin James Date: Sat, 13 May 2023 20:54:35 +0530 Subject: [PATCH 11/18] permissions --- .github/workflows/ci.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ffa0ae299..9bd83443f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -3,6 +3,9 @@ name: CI on: pull_request: +permissions: + contents: read + env: LINES: 120 COLUMNS: 120 From 5e790a1fd5153162317e658db6e05efe1df34da1 Mon Sep 17 00:00:00 2001 From: Jithin James Date: Sat, 13 May 2023 20:56:35 +0530 Subject: [PATCH 12/18] install library --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 9bd83443f..dd20cc300 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -83,6 +83,7 @@ jobs: - name: Install dependencies run: | + pip install "." pip install -r requirements/tests-requirements.txt - name: Run unit tests From 39d72c62cafa5ba2505de67e25ca9eb2cf96b89a Mon Sep 17 00:00:00 2001 From: Jithin James Date: Sat, 13 May 2023 21:07:30 +0530 Subject: [PATCH 13/18] add code stype checks --- .github/workflows/ci.yaml | 56 ++++++++++++++++++- .../{tests-requirements.txt => dev.txt} | 2 - requirements/test.txt | 2 + 3 files changed, 55 insertions(+), 5 deletions(-) rename requirements/{tests-requirements.txt => dev.txt} (58%) create mode 100644 requirements/test.txt diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index dd20cc300..34cfebcd1 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -34,7 +34,7 @@ jobs: - .github/workflows/ci.yml - codecov.yml - pyproject.toml - - requirements/tests-requirements.txt + - requirements/test.txt belar: - "belar/**" - "tests/**" @@ -79,12 +79,12 @@ jobs: id: cache-pip with: path: ${{ steps.cache-dir.outputs.dir }} - key: ${{ runner.os }}-tests-${{ hashFiles('requirements/tests-requirements.txt') }} + key: ${{ runner.os }}-tests-${{ hashFiles('requirements/test.txt') }} - name: Install dependencies run: | pip install "." - pip install -r requirements/tests-requirements.txt + pip install -r requirements/test.txt - name: Run unit tests run: | @@ -95,3 +95,53 @@ jobs: fi # Now run the unit tests pytest tests/unit "${OPTS[@]}" + + codestyle_check: + runs-on: ubuntu-latest + needs: + - diff + + if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.belar == 'true') || github.event_name == 'push' }} + + steps: + - uses: actions/checkout@v3 + + - name: Setup python + uses: actions/setup-python@v4 + with: + python-version: "3.10.6" + architecture: x64 + + - name: Get pip cache dir + id: cache-dir + run: | + echo ::set-output name=dir::$(pip cache dir) + + - name: Fetch origin + run: git fetch origin "$GITHUB_BASE_REF" + + - name: Setup node + uses: actions/setup-node@v3 + with: + node-version: "17" + + - name: Cache pip dependencies + uses: actions/cache@v3 + id: cache-pip + with: + path: ${{ steps.cache-dir.outputs.dir }} + key: codestyle-${{ hashFiles('requirements/dev.txt') }} + + - name: Install dependencies + run: | + pip install . + pip install -r requirements/dev.txt + + - name: Format check + run: | + make format + - name: Lint check + run: make lint + - name: Type check + if: ${{ github.event_name == 'pull_request' }} + run: git diff --name-only --diff-filter=AM "origin/$GITHUB_BASE_REF" -z -- '*.py{,i}' | xargs -0 --no-run-if-empty pyright diff --git a/requirements/tests-requirements.txt b/requirements/dev.txt similarity index 58% rename from requirements/tests-requirements.txt rename to requirements/dev.txt index cdb7e56b4..ea06f113a 100644 --- a/requirements/tests-requirements.txt +++ b/requirements/dev.txt @@ -1,7 +1,5 @@ -pytest rich ruff isort black[jupyter] pyright -pytest-xdist[psutil] diff --git a/requirements/test.txt b/requirements/test.txt new file mode 100644 index 000000000..3951f472c --- /dev/null +++ b/requirements/test.txt @@ -0,0 +1,2 @@ +pytest +pytest-xdist[psutil] From ade603c5362c13e7abfda28dfe0c301abfe64ebe Mon Sep 17 00:00:00 2001 From: Jithin James Date: Sat, 13 May 2023 21:21:28 +0530 Subject: [PATCH 14/18] fix linting and formating --- belar/metrics/__init__.py | 16 +++++++++++++++- belar/metrics/base.py | 1 - belar/metrics/simple.py | 6 ++---- belar/utils.py | 3 ++- tests/benchmarks/benchmark.py | 13 ++----------- 5 files changed, 21 insertions(+), 18 deletions(-) diff --git a/belar/metrics/__init__.py b/belar/metrics/__init__.py index 9131315af..70fe4a3e5 100644 --- a/belar/metrics/__init__.py +++ b/belar/metrics/__init__.py @@ -1,4 +1,18 @@ from belar.metrics.base import Evaluation, Metric from belar.metrics.factual import EntailmentScore from belar.metrics.similarity import SBERTScore -from belar.metrics.simple import * +from belar.metrics.simple import (BLUE, EditDistance, EditRatio, Rouge1, + Rouge2, RougeL) + +__all__ = [ + "Evaluation", + "Metric", + "EntailmentScore", + "SBERTScore", + "BLUE", + "EditDistance", + "EditRatio", + "RougeL", + "Rouge1", + "Rouge2", +] diff --git a/belar/metrics/base.py b/belar/metrics/base.py index 5d0108767..95445a67d 100644 --- a/belar/metrics/base.py +++ b/belar/metrics/base.py @@ -2,7 +2,6 @@ import typing as t from abc import ABC, abstractmethod -from collections import namedtuple from dataclasses import dataclass import numpy as np diff --git a/belar/metrics/simple.py b/belar/metrics/simple.py index 882445a43..116225776 100644 --- a/belar/metrics/simple.py +++ b/belar/metrics/simple.py @@ -14,7 +14,7 @@ @dataclass -class BLEU(Metric): +class BLEUScore(Metric): weights: list[float] = field(default_factory=lambda: [0.25, 0.25, 0.25, 0.25]) smoothing_function = None @@ -94,8 +94,6 @@ def score(self, ground_truth: t.List[str], generated_text: t.List[str]): Rouge1 = ROUGE("rouge1") Rouge2 = ROUGE("rouge2") RougeL = ROUGE("rougeL") -BLUE = BLEU() +BLUE = BLEUScore() EditDistance = EditScore("distance") EditRatio = EditScore("ratio") - -__all__ = ["Rouge1", "Rouge2", "RougeL", "BLEU", "EditDistance", "EditRatio"] diff --git a/belar/utils.py b/belar/utils.py index 9564cfa66..ea69b3d5e 100644 --- a/belar/utils.py +++ b/belar/utils.py @@ -1,7 +1,8 @@ -import torch import typing as t from warnings import warn +import torch + DEVICES = ["cpu", "cuda"] diff --git a/tests/benchmarks/benchmark.py b/tests/benchmarks/benchmark.py index 525a86847..50d63beb5 100644 --- a/tests/benchmarks/benchmark.py +++ b/tests/benchmarks/benchmark.py @@ -1,21 +1,12 @@ import typing as t -from dataclasses import dataclass from datasets import Dataset, load_dataset from torch.cuda import is_available from tqdm import tqdm from utils import print_table, timeit -from belar.metrics import ( - EditDistance, - EditRatio, - EntailmentScore, - Evaluation, - Rouge1, - Rouge2, - RougeL, - SBERTScore, -) +from belar.metrics import (EditDistance, EditRatio, EntailmentScore, + Evaluation, Rouge1, Rouge2, RougeL, SBERTScore) DEVICE = "cuda" if is_available() else "cpu" BATCHES = [0, 1] From 6d94b12610757258426ffd3aa804e74513b46784 Mon Sep 17 00:00:00 2001 From: Jithin James Date: Sat, 13 May 2023 22:18:30 +0530 Subject: [PATCH 15/18] fix type annotation errors --- belar/metrics/base.py | 14 ++++++++++---- belar/metrics/factual.py | 5 ++++- belar/metrics/similarity.py | 6 ++++++ belar/utils.py | 21 +++++++++++---------- tests/benchmarks/utils.py | 12 ++++++++++-- 5 files changed, 41 insertions(+), 17 deletions(-) diff --git a/belar/metrics/base.py b/belar/metrics/base.py index 95445a67d..53a585be8 100644 --- a/belar/metrics/base.py +++ b/belar/metrics/base.py @@ -12,16 +12,18 @@ class Metric(ABC): @property @abstractmethod - def name(self) -> str: + def name(self: t.Self) -> str: ... @property @abstractmethod - def is_batchable(self) -> bool: + def is_batchable(self: t.Self) -> bool: ... @abstractmethod - def score(self, ground_truth: list[str], generated_text: list[str]) -> list[float]: + def score( + self: t.Self, ground_truth: list[str], generated_text: list[str] + ) -> list[float]: ... @@ -67,7 +69,11 @@ def _get_score(self, row: dict[str, list[t.Any]] | dict[str, t.Any]): else: # not batched split_indices = len(row["ground_truth"]) ground_truths = row["ground_truth"] - generated_texts = [row["generated_text"]] * split_indices + generated_text = row["generated_text"] + assert isinstance( + generated_text, str + ), f"generated_text should be str but got {type(generated_text)}" + generated_texts = [generated_text] * split_indices scores = metric.score(ground_truths, generated_texts) score = np.max(scores) diff --git a/belar/metrics/factual.py b/belar/metrics/factual.py index adc0509f7..8999f6d9f 100644 --- a/belar/metrics/factual.py +++ b/belar/metrics/factual.py @@ -8,6 +8,9 @@ from belar.metrics import Metric from belar.utils import device_check +if t.TYPE_CHECKING: + from torch import device as Device + @dataclass class EntailmentScore(Metric): @@ -18,7 +21,7 @@ class EntailmentScore(Metric): model_name: str = "typeform/distilbert-base-uncased-mnli" max_length: int = 512 batch_size: int = 4 - device: t.Literal["cpu", "cuda"] = "cpu" + device: t.Literal["cpu", "cuda"] | Device = "cpu" def __post_init__(self): self.device = device_check(self.device) diff --git a/belar/metrics/similarity.py b/belar/metrics/similarity.py index 4dd55985a..85f55df11 100644 --- a/belar/metrics/similarity.py +++ b/belar/metrics/similarity.py @@ -9,6 +9,9 @@ from belar.metrics.base import Metric +if t.TYPE_CHECKING: + from torch import Tensor + SBERT_METRIC = t.Literal["cosine", "euclidean"] @@ -42,6 +45,9 @@ def score( gentext_emb = self.model.encode( generated_text, batch_size=self.batch_size, convert_to_numpy=True ) + assert isinstance(gentext_emb, Tensor) and isinstance( + gndtruth_emb, Tensor + ), f"Both gndtruth_emb[{type(gentext_emb)}], gentext_emb[{type(gentext_emb)}] should be Tensor." if self.similarity_metric == "cosine": score = np.dot(gndtruth_emb, gentext_emb.T) / ( diff --git a/belar/utils.py b/belar/utils.py index ea69b3d5e..913e827b1 100644 --- a/belar/utils.py +++ b/belar/utils.py @@ -3,18 +3,19 @@ import torch +if t.TYPE_CHECKING: + from torch import device as Device + DEVICES = ["cpu", "cuda"] -def device_check(device: t.Literal[DEVICES]): - if device == "cuda": - if torch.cuda.is_available(): - device = torch.device("cuda") - else: - warn("cuda not available, using cpu") - elif device == "cpu": - device = torch.device("cpu") - else: +def device_check(device: t.Literal["cpu", "cuda"] | Device) -> torch.device: + if isinstance(device, Device): + return device + if device not in DEVICES: raise ValueError(f"Invalid device {device}") + if device == "cuda" and not torch.cuda.is_available(): + warn("cuda not available, using cpu") + device = "cpu" - return device + return torch.device(device) diff --git a/tests/benchmarks/utils.py b/tests/benchmarks/utils.py index 9ea962a65..2dd81d6cf 100644 --- a/tests/benchmarks/utils.py +++ b/tests/benchmarks/utils.py @@ -1,14 +1,22 @@ from __future__ import annotations import time +import typing as t import numpy as np from rich.console import Console from rich.table import Table +P = t.ParamSpec("P") +R = t.TypeVar("R") +OrigFunc = t.Callable[P, R] +DecoratedFunc = t.Callable[P, tuple[np.floating, np.floating]] -def timeit(func, iteration=3): - def function_timer(*args, **kwargs) -> tuple(np.floating, np.floating): + +def timeit(func: OrigFunc, iteration: int = 3) -> DecoratedFunc: + def function_timer( + *args: P.args, **kwargs: P.kwargs + ) -> tuple[np.floating, np.floating]: """ Time the execution of a function and returns the time taken """ From 97e1b417f8bb098e7a5c4c51b2cfa0360e9596ac Mon Sep 17 00:00:00 2001 From: Jithin James Date: Sat, 13 May 2023 22:30:14 +0530 Subject: [PATCH 16/18] fix check for types --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 34cfebcd1..eca5ee941 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -144,4 +144,4 @@ jobs: run: make lint - name: Type check if: ${{ github.event_name == 'pull_request' }} - run: git diff --name-only --diff-filter=AM "origin/$GITHUB_BASE_REF" -z -- '*.py{,i}' | xargs -0 --no-run-if-empty pyright + run: git diff --name-only --diff-filter=AM "origin/$GITHUB_BASE_REF" -z -- '**/*.py' '**/*.pyi' | xargs -0 --no-run-if-empty pyright From 99c9054332dee51cb023540af18bd7092199ee8a Mon Sep 17 00:00:00 2001 From: Jithin James Date: Sat, 13 May 2023 22:33:27 +0530 Subject: [PATCH 17/18] fix lint --- Makefile | 2 +- belar/metrics/similarity.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index c613129fe..27a1eb38a 100644 --- a/Makefile +++ b/Makefile @@ -19,12 +19,12 @@ lint: ## Running lint checker: ruff type: ## Running type checker: pyright @echo "(pyright) Typechecking codebase..." @pyright -p belar -style: format lint clean: ## Clean all generated files @echo "Cleaning all generated files..." @cd $(GIT_ROOT)/docs && make clean @cd $(GIT_ROOT) || exit 1 @find . -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete +run-ci: format lint type ## Running all CI checks run-benchmarks: ## Run benchmarks @echo "Running benchmarks..." @cd $(GIT_ROOT)/tests/benchmarks && python benchmark.py diff --git a/belar/metrics/similarity.py b/belar/metrics/similarity.py index 85f55df11..8c38137c9 100644 --- a/belar/metrics/similarity.py +++ b/belar/metrics/similarity.py @@ -45,9 +45,10 @@ def score( gentext_emb = self.model.encode( generated_text, batch_size=self.batch_size, convert_to_numpy=True ) - assert isinstance(gentext_emb, Tensor) and isinstance( - gndtruth_emb, Tensor - ), f"Both gndtruth_emb[{type(gentext_emb)}], gentext_emb[{type(gentext_emb)}] should be Tensor." + assert isinstance(gentext_emb, Tensor) and isinstance(gndtruth_emb, Tensor), ( + f"Both gndtruth_emb[{type(gentext_emb)}], gentext_emb[{type(gentext_emb)}]" + " should be Tensor." + ) if self.similarity_metric == "cosine": score = np.dot(gndtruth_emb, gentext_emb.T) / ( From 295f422922c73da6d1888d77be47f08999117711 Mon Sep 17 00:00:00 2001 From: Jithin James Date: Sat, 13 May 2023 22:43:04 +0530 Subject: [PATCH 18/18] fix old state storage --- .github/workflows/ci.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index eca5ee941..e5c930a5c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -72,7 +72,7 @@ jobs: - name: Get pip cache dir id: cache-dir run: | - echo ::set-output name=dir::$(pip cache dir) + echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT - name: Cache pip dependencies uses: actions/cache@v3 @@ -115,7 +115,7 @@ jobs: - name: Get pip cache dir id: cache-dir run: | - echo ::set-output name=dir::$(pip cache dir) + echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT - name: Fetch origin run: git fetch origin "$GITHUB_BASE_REF"