Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 147 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
name: CI

on:
pull_request:

permissions:
contents: read

env:
LINES: 120
COLUMNS: 120

# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#defaultsrun
defaults:
run:
shell: bash --noprofile --norc -exo pipefail {0}

jobs:
diff:
runs-on: ubuntu-latest
outputs:
related: ${{ steps.filter.outputs.related }}
belar: ${{ steps.filter.outputs.belar }}
docs: ${{ steps.filter.outputs.docs }}
steps:
- uses: actions/checkout@v3
- uses: dorny/paths-filter@v2
id: filter
with:
base: "main"
token: ${{ github.token }}
filters: |
related: &related
- .github/workflows/ci.yml
- codecov.yml
- pyproject.toml
- requirements/test.txt
belar:
- "belar/**"
- "tests/**"
- "examples/**"
docs:
- *related
- requirements/docs-requirements.txt
- "docs/**"

unit_tests:
needs:
- diff

strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ["3.7", "3.8", "3.9", "3.10"]

if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.belar == 'true') || github.event_name == 'push' }}
name: python${{ matrix.python-version }}_unit_tests (${{ matrix.os }})
runs-on: ${{ matrix.os }}

steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0 # fetch all tags and branches

- name: Setup python
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
architecture: x64

- name: Get pip cache dir
id: cache-dir
run: |
echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT

- name: Cache pip dependencies
uses: actions/cache@v3
id: cache-pip
with:
path: ${{ steps.cache-dir.outputs.dir }}
key: ${{ runner.os }}-tests-${{ hashFiles('requirements/test.txt') }}

- name: Install dependencies
run: |
pip install "."
pip install -r requirements/test.txt

- name: Run unit tests
run: |
# OPTS=(--cov-config pyproject.toml --cov=src/bentoml --cov-append)
if [ "${{ matrix.os }}" != 'windows-latest' ]; then
# we will use pytest-xdist to improve tests run-time.
OPTS=(--dist loadfile -n auto)
fi
# Now run the unit tests
pytest tests/unit "${OPTS[@]}"

codestyle_check:
runs-on: ubuntu-latest
needs:
- diff

if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.belar == 'true') || github.event_name == 'push' }}

steps:
- uses: actions/checkout@v3

- name: Setup python
uses: actions/setup-python@v4
with:
python-version: "3.10.6"
architecture: x64

- name: Get pip cache dir
id: cache-dir
run: |
echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT

- name: Fetch origin
run: git fetch origin "$GITHUB_BASE_REF"

- name: Setup node
uses: actions/setup-node@v3
with:
node-version: "17"

- name: Cache pip dependencies
uses: actions/cache@v3
id: cache-pip
with:
path: ${{ steps.cache-dir.outputs.dir }}
key: codestyle-${{ hashFiles('requirements/dev.txt') }}

- name: Install dependencies
run: |
pip install .
pip install -r requirements/dev.txt

- name: Format check
run: |
make format
- name: Lint check
run: make lint
- name: Type check
if: ${{ github.event_name == 'pull_request' }}
run: git diff --name-only --diff-filter=AM "origin/$GITHUB_BASE_REF" -z -- '**/*.py' '**/*.pyi' | xargs -0 --no-run-if-empty pyright
30 changes: 30 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
GIT_ROOT ?= $(shell git rev-parse --show-toplevel)

help: ## Show all Makefile targets
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}'

.PHONY: format lint type style clean run-benchmarks
format: ## Running code formatter: black and isort
@echo "(black) Formatting codebase..."
@black --config pyproject.toml belar tests examples
@echo "(black) Formatting stubs..."
@find belar -name "*.pyi" ! -name "*_pb2*" -exec black --pyi --config pyproject.toml {} \;
@echo "(isort) Reordering imports..."
@isort .
@echo "(ruff) Running fix only..."
@ruff check belar examples tests --fix-only
lint: ## Running lint checker: ruff
@echo "(ruff) Linting development project..."
@ruff check belar examples tests
type: ## Running type checker: pyright
@echo "(pyright) Typechecking codebase..."
@pyright -p belar
clean: ## Clean all generated files
@echo "Cleaning all generated files..."
@cd $(GIT_ROOT)/docs && make clean
@cd $(GIT_ROOT) || exit 1
@find . -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete
run-ci: format lint type ## Running all CI checks
run-benchmarks: ## Run benchmarks
@echo "Running benchmarks..."
@cd $(GIT_ROOT)/tests/benchmarks && python benchmark.py
16 changes: 15 additions & 1 deletion belar/metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,18 @@
from belar.metrics.base import Evaluation, Metric
from belar.metrics.factual import EntailmentScore
from belar.metrics.similarity import SBERTScore
from belar.metrics.simple import *
from belar.metrics.simple import (BLUE, EditDistance, EditRatio, Rouge1,
Rouge2, RougeL)

__all__ = [
"Evaluation",
"Metric",
"EntailmentScore",
"SBERTScore",
"BLUE",
"EditDistance",
"EditRatio",
"RougeL",
"Rouge1",
"Rouge2",
]
15 changes: 10 additions & 5 deletions belar/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import typing as t
from abc import ABC, abstractmethod
from collections import namedtuple
from dataclasses import dataclass

import numpy as np
Expand All @@ -13,16 +12,18 @@
class Metric(ABC):
@property
@abstractmethod
def name(self) -> str:
def name(self: t.Self) -> str:
...

@property
@abstractmethod
def is_batchable(self) -> bool:
def is_batchable(self: t.Self) -> bool:
...

@abstractmethod
def score(self, ground_truth: list[str], generated_text: list[str]) -> list[float]:
def score(
self: t.Self, ground_truth: list[str], generated_text: list[str]
) -> list[float]:
...


Expand Down Expand Up @@ -68,7 +69,11 @@ def _get_score(self, row: dict[str, list[t.Any]] | dict[str, t.Any]):
else: # not batched
split_indices = len(row["ground_truth"])
ground_truths = row["ground_truth"]
generated_texts = [row["generated_text"]] * split_indices
generated_text = row["generated_text"]
assert isinstance(
generated_text, str
), f"generated_text should be str but got {type(generated_text)}"
generated_texts = [generated_text] * split_indices
scores = metric.score(ground_truths, generated_texts)
score = np.max(scores)

Expand Down
5 changes: 4 additions & 1 deletion belar/metrics/factual.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
from belar.metrics import Metric
from belar.utils import device_check

if t.TYPE_CHECKING:
from torch import device as Device


@dataclass
class EntailmentScore(Metric):
Expand All @@ -18,7 +21,7 @@ class EntailmentScore(Metric):
model_name: str = "typeform/distilbert-base-uncased-mnli"
max_length: int = 512
batch_size: int = 4
device: t.Literal["cpu", "cuda"] = "cpu"
device: t.Literal["cpu", "cuda"] | Device = "cpu"

def __post_init__(self):
self.device = device_check(self.device)
Expand Down
7 changes: 7 additions & 0 deletions belar/metrics/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@

from belar.metrics.base import Metric

if t.TYPE_CHECKING:
from torch import Tensor

SBERT_METRIC = t.Literal["cosine", "euclidean"]


Expand Down Expand Up @@ -42,6 +45,10 @@ def score(
gentext_emb = self.model.encode(
generated_text, batch_size=self.batch_size, convert_to_numpy=True
)
assert isinstance(gentext_emb, Tensor) and isinstance(gndtruth_emb, Tensor), (
f"Both gndtruth_emb[{type(gentext_emb)}], gentext_emb[{type(gentext_emb)}]"
" should be Tensor."
)

if self.similarity_metric == "cosine":
score = np.dot(gndtruth_emb, gentext_emb.T) / (
Expand Down
6 changes: 2 additions & 4 deletions belar/metrics/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@


@dataclass
class BLEU(Metric):
class BLEUScore(Metric):
weights: list[float] = field(default_factory=lambda: [0.25, 0.25, 0.25, 0.25])
smoothing_function = None

Expand Down Expand Up @@ -94,8 +94,6 @@ def score(self, ground_truth: t.List[str], generated_text: t.List[str]):
Rouge1 = ROUGE("rouge1")
Rouge2 = ROUGE("rouge2")
RougeL = ROUGE("rougeL")
BLUE = BLEU()
BLUE = BLEUScore()
EditDistance = EditScore("distance")
EditRatio = EditScore("ratio")

__all__ = ["Rouge1", "Rouge2", "RougeL", "BLEU", "EditDistance", "EditRatio"]
24 changes: 13 additions & 11 deletions belar/utils.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
import torch
import typing as t
from warnings import warn

import torch

if t.TYPE_CHECKING:
from torch import device as Device

DEVICES = ["cpu", "cuda"]


def device_check(device: t.Literal[DEVICES]):
if device == "cuda":
if torch.cuda.is_available():
device = torch.device("cuda")
else:
warn("cuda not available, using cpu")
elif device == "cpu":
device = torch.device("cpu")
else:
def device_check(device: t.Literal["cpu", "cuda"] | Device) -> torch.device:
if isinstance(device, Device):
return device
if device not in DEVICES:
raise ValueError(f"Invalid device {device}")
if device == "cuda" and not torch.cuda.is_available():
warn("cuda not available, using cpu")
device = "cpu"

return device
return torch.device(device)
Loading