Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,4 @@ ragas/_version.py
experiments/**/data
experiments/**/storage
**/fil-result/
src/ragas/_version.py
10 changes: 5 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,17 @@ format: ## Running code formatter: black and isort
@echo "(isort) Ordering imports..."
@isort .
@echo "(black) Formatting codebase..."
@black --config pyproject.toml ragas tests examples
@black --config pyproject.toml src tests examples experiments
@echo "(black) Formatting stubs..."
@find ragas -name "*.pyi" ! -name "*_pb2*" -exec black --pyi --config pyproject.toml {} \;
@find src -name "*.pyi" ! -name "*_pb2*" -exec black --pyi --config pyproject.toml {} \;
@echo "(ruff) Running fix only..."
@ruff check ragas examples tests --fix-only
@ruff check src examples tests --fix-only
lint: ## Running lint checker: ruff
@echo "(ruff) Linting development project..."
@ruff check ragas examples tests
@ruff check src examples tests
type: ## Running type checker: pyright
@echo "(pyright) Typechecking codebase..."
@pyright ragas
@pyright src
clean: ## Clean all generated files
@echo "Cleaning all generated files..."
@cd $(GIT_ROOT)/docs && make clean
Expand Down
12 changes: 7 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,23 +1,25 @@
[project]
name = "ragas"
dependencies = [
"Levenshtein",
"rouge-score",
"numpy",
"transformers",
"sentence-transformers",
"nltk",
"datasets",
"spacy<4.0.0,>=3.0.0",
"protobuf<=3.20.0",
"backoff",
"openai",
]
dynamic = ["version", "readme"]

[tool.setuptools]
package-dir = {"" = "src"}

[tool.setuptools.dynamic]
readme = {file = ["README.md"], content-type = "text/plain"}

[build-system]
requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2"]
build-backend = "setuptools.build_meta"

[tool.setuptools_scm]
write_to = "ragas/_version.py"
write_to = "src/ragas/_version.py"
File renamed without changes.
13 changes: 8 additions & 5 deletions ragas/evaluation.py → src/ragas/evaluation.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
from __future__ import annotations

import typing as t
from dataclasses import dataclass
from enum import Enum

import numpy as np
from datasets import Dataset, concatenate_datasets
from tqdm import tqdm

from ragas.metrics.base import Metric

Expand Down Expand Up @@ -44,19 +42,24 @@ def evaluate(
[m.init_model() for m in metrics]

scores = []
for metric in tqdm(metrics):
for metric in metrics:
scores.append(metric.score(dataset).select_columns(metric.name))

return Result(concatenate_datasets(scores))
return Result(concatenate_datasets(scores, axis=1))


@dataclass
class Result(dict):
scores: Dataset

def __post_init__(self):
values = []
for cn in self.scores.column_names:
self[cn] = np.mean(self.scores[cn])
value = np.mean(self.scores[cn])
self[cn] = value
values.append(value)

self["ragas_score"] = len(values) / np.sum(1.0 / np.array(values))

def describe(self):
description = {}
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def predict(
) -> npt.NDArray[np.float64]:
predictions = []
dataloader = DataLoader(
sentences, batch_size=batch_size, collate_fn=self.collate_fn
sentences, batch_size=batch_size, collate_fn=self.collate_fn # type: ignore
)

if show_progress:
Expand Down
9 changes: 6 additions & 3 deletions ragas/metrics/base.py → src/ragas/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,11 @@ def get_batches(self, dataset_size: int):
range(i, i + self.batch_size)
for i in range(0, self.batch_size * num_batches, self.batch_size)
]
batches.append(
range(self.batch_size * num_batches, self.batch_size * num_batches + tail)
)
if tail != 0:
batches.append(
range(
self.batch_size * num_batches, self.batch_size * num_batches + tail
)
)

return batches
8 changes: 4 additions & 4 deletions ragas/metrics/factual.py → src/ragas/metrics/factual.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
statements:\nShahul and Jithin were from different countries.
question:{}
answer: {}
statements:\n"""
statements:\n""" # noqa: E501

NLI_STATEMENTS = """
Prompt: Natural language inference
Expand All @@ -53,7 +53,7 @@
statements:\n{}
Now, read the following statements and determine whether they are supported by the information present in the context. Provide a brief explanation for each statement. Also provide a Final Answer (Yes/No) at the end.
Answer:
"""
""" # noqa: E501


@dataclass
Expand Down Expand Up @@ -87,7 +87,7 @@ def _score_batch(self: t.Self, ds: Dataset) -> Dataset:

response = openai_completion(prompts)
list_statements: list[list[str]] = []
for output in response["choices"]:
for output in response["choices"]: # type: ignore
statements = output["text"].split("\n")
list_statements.append(statements)

Expand All @@ -101,7 +101,7 @@ def _score_batch(self: t.Self, ds: Dataset) -> Dataset:
prompts.append(prompt)

response = openai_completion(prompts)
outputs = response["choices"]
outputs = response["choices"] # type: ignore

scores = []
for i, output in enumerate(outputs):
Expand Down
2 changes: 2 additions & 0 deletions ragas/metrics/llms.py → src/ragas/metrics/llms.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import logging
import os

Expand Down
File renamed without changes.
58 changes: 0 additions & 58 deletions tests/benchmarks/benchmark.py

This file was deleted.

25 changes: 12 additions & 13 deletions tests/benchmarks/benchmark_eval.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,21 @@
from datasets import arrow_dataset, load_dataset
import os

from datasets import Dataset, load_dataset
from torch.cuda import is_available

from ragas.metrics import Evaluation, bert_score, edit_ratio, rougeL
from ragas.metrics.factual import EntailmentScore
from ragas import evaluate
from ragas.metrics import answer_relevancy, context_relavancy, factuality

DEVICE = "cuda" if is_available() else "cpu"
entailment_score = EntailmentScore(device=DEVICE, batch_size=2)
# q_square = Qsquare(device=DEVICE, batch_size=2)

DS = load_dataset("explodinggradients/ragas-webgpt", split="train")
assert isinstance(DS, arrow_dataset.Dataset), "Not an arrow_dataset"
DS = DS.select(range(500))
PATH_TO_DATSET_GIT_REPO = "../../../datasets/fiqa/"
assert os.path.isdir(PATH_TO_DATSET_GIT_REPO), "Dataset not found"
ds = Dataset.from_json(os.path.join(PATH_TO_DATSET_GIT_REPO, "gen_ds.json"))
assert isinstance(ds, Dataset)

if __name__ == "__main__":
e = Evaluation(
metrics=[rougeL, edit_ratio, bert_score, entailment_score],
batched=True,
batch_size=64,
result = evaluate(
ds,
metrics=[answer_relevancy, context_relavancy, factuality],
)
result = e.eval(DS["ground_truth"], DS["generated_text"])
print(result)