Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
4b06818
changed name RagasLLM -> BaseRagasLLM
jjmachan Dec 14, 2023
f30cf57
added langchain_core llm
jjmachan Dec 16, 2023
1fcea66
changed faithfulness to single metric
jjmachan Dec 16, 2023
84d9cb2
fixing llm for executor
jjmachan Dec 17, 2023
8a41af7
added callbacks
jjmachan Dec 17, 2023
0c61475
ported a couple of metrics
jjmachan Dec 18, 2023
8cf84fc
executor tested
jjmachan Dec 19, 2023
21c1456
executor object created
jjmachan Dec 19, 2023
a779aea
error handling for both
jjmachan Dec 19, 2023
44558d9
fixed up BaseRagasLLM and BaseRagasEmbeddings
jjmachan Dec 19, 2023
b4e080d
as_complete functionality
jjmachan Dec 19, 2023
db71c5d
fix BaseRagasLLM
jjmachan Dec 21, 2023
7161674
basic callbacks configured
jjmachan Dec 21, 2023
f8b98f5
fixed results
jjmachan Jan 1, 2024
8d654c4
merged with main
jjmachan Jan 1, 2024
8163d45
remove DS_store
jjmachan Jan 1, 2024
9395c29
fix prompts
jjmachan Jan 1, 2024
9dcc4fc
moved tests
jjmachan Jan 1, 2024
30422c0
Merge branch 'fix/prompt' into feat/executor
jjmachan Jan 2, 2024
14aa522
fixed some metrics
jjmachan Jan 2, 2024
777db4d
answer_correctness ported
jjmachan Jan 2, 2024
baebebc
formating
jjmachan Jan 2, 2024
4318d1b
critique ported
jjmachan Jan 2, 2024
306ebe1
contex_recall ported
jjmachan Jan 3, 2024
6ff94a2
context_relevancy ported
jjmachan Jan 3, 2024
3691653
added benchmark
jjmachan Jan 3, 2024
8ccce15
Merge branch 'main' into feat/executor
jjmachan Jan 3, 2024
1ecde7b
fix tests
jjmachan Jan 3, 2024
f480eaf
fix ci
jjmachan Jan 3, 2024
9a053fc
merged with main
jjmachan Jan 3, 2024
0c2f5ca
fmt
jjmachan Jan 3, 2024
c5c7411
Merge branch 'main' into feat/executor
jjmachan Jan 3, 2024
cb521fd
fix ci
jjmachan Jan 3, 2024
60fc29f
fix tests
jjmachan Jan 4, 2024
fa4b24a
async=False is defualt
jjmachan Jan 4, 2024
e3e5104
Merge branch 'main' into feat/executor
jjmachan Jan 4, 2024
eb06f2f
fix prompt
jjmachan Jan 4, 2024
8f1ce4a
fixed context_relevancy bug
jjmachan Jan 4, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -161,11 +161,7 @@ cython_debug/

# Ragas specific
ragas/_version.py
experiments/**/data
experiments/**/storage
experiments/
**/fil-result/
experiments/baselines/fiqa/datasets
src/ragas/_version.py
.python-version
experiments/retriever-benchmarks/datasets
experiments/tmp
1 change: 0 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
from dataclasses import asdict

from sphinxawesome_theme import ThemeOptions
Expand Down
2 changes: 1 addition & 1 deletion docs/howtos/customisations/embeddings.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@
"\n",
"result = evaluate(\n",
" fiqa_eval[\"baseline\"].select(range(5)), # showing only 5 for demonstration\n",
" metrics=[answer_similarity]\n",
" metrics=[answer_similarity],\n",
")\n",
"\n",
"result"
Expand Down
17 changes: 9 additions & 8 deletions docs/howtos/customisations/gcp-vertexai.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@
"source": [
"from ragas.metrics import (\n",
" context_precision,\n",
" answer_relevancy, # AnswerRelevancy\n",
" answer_relevancy, # AnswerRelevancy\n",
" faithfulness,\n",
" context_recall,\n",
")\n",
Expand All @@ -110,7 +110,7 @@
" answer_relevancy,\n",
" context_recall,\n",
" context_precision,\n",
" harmfulness\n",
" harmfulness,\n",
"]"
]
},
Expand All @@ -137,7 +137,6 @@
"from langchain.embeddings import VertexAIEmbeddings\n",
"\n",
"\n",
"\n",
"config = {\n",
" \"project_id\": \"tmp-project-404003\",\n",
"}\n",
Expand Down Expand Up @@ -170,7 +169,7 @@
"for m in metrics:\n",
" # change LLM for metric\n",
" m.__setattr__(\"llm\", ragas_vertexai_llm)\n",
" \n",
"\n",
" # check if this metric needs embeddings\n",
" if hasattr(m, \"embeddings\"):\n",
" # if so change with VertexAI Embeddings\n",
Expand Down Expand Up @@ -276,13 +275,15 @@
],
"source": [
"from ragas import evaluate\n",
"import nest_asyncio # CHECK NOTES\n",
"import nest_asyncio # CHECK NOTES\n",
"\n",
"# NOTES: Only used when running on a jupyter notebook, otherwise comment or remove this function. \n",
"nest_asyncio.apply() \n",
"# NOTES: Only used when running on a jupyter notebook, otherwise comment or remove this function.\n",
"nest_asyncio.apply()\n",
"\n",
"result = evaluate(\n",
" fiqa_eval[\"baseline\"].select(range(1)), # using 1 as example due to quota constrains\n",
" fiqa_eval[\"baseline\"].select(\n",
" range(1)\n",
" ), # using 1 as example due to quota constrains\n",
" metrics=metrics,\n",
")\n",
"\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/howtos/integrations/zeno.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@
" ]\n",
"].copy()\n",
"\n",
"output_df['output'] = df.apply(\n",
"output_df[\"output\"] = df.apply(\n",
" lambda x: {\"answer\": x[\"answer\"], \"ground_truths\": list(x[\"ground_truths\"])}, axis=1\n",
")\n",
"output_df[\"id\"] = output_df.index\n",
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ package-dir = {"" = "src"}
[tool.setuptools.dynamic]
readme = {file = ["README.md"], content-type = "text/plain"}

[tool.ruff.lint]
ignore = ["E501"]

[build-system]
requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2"]
build-backend = "setuptools.build_meta"
Expand Down
58 changes: 58 additions & 0 deletions src/ragas/callbacks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import typing as t

from langchain_core.callbacks import (
AsyncCallbackManager,
AsyncCallbackManagerForChainGroup,
AsyncCallbackManagerForChainRun,
CallbackManager,
CallbackManagerForChainGroup,
CallbackManagerForChainRun,
Callbacks,
)


def new_group(
name: str, inputs: t.Dict, callbacks: Callbacks, is_async=False
) -> t.Tuple[CallbackManagerForChainRun, CallbackManagerForChainGroup]:
# start evaluation chain
if isinstance(callbacks, list):
cm = CallbackManager.configure(inheritable_callbacks=callbacks)
else:
cm = t.cast(CallbackManager, callbacks)
rm = cm.on_chain_start({"name": name}, inputs)
child_cm = rm.get_child()
group_cm = CallbackManagerForChainGroup(
child_cm.handlers,
child_cm.inheritable_handlers,
child_cm.parent_run_id,
parent_run_manager=rm,
tags=child_cm.tags,
inheritable_tags=child_cm.inheritable_tags,
metadata=child_cm.metadata,
inheritable_metadata=child_cm.inheritable_metadata,
)

return rm, group_cm


async def new_async_group(
name: str, inputs: t.Dict, callbacks: Callbacks
) -> t.Tuple[AsyncCallbackManagerForChainRun, AsyncCallbackManagerForChainGroup]:
# start evaluation chain
if isinstance(callbacks, list):
cm = AsyncCallbackManager.configure(inheritable_callbacks=callbacks)
else:
cm = t.cast(AsyncCallbackManager, callbacks)
rm = await cm.on_chain_start({"name": name}, inputs)
child_cm = rm.get_child()
group_cm = AsyncCallbackManagerForChainGroup(
child_cm.handlers,
child_cm.inheritable_handlers,
child_cm.parent_run_id,
parent_run_manager=rm,
tags=child_cm.tags,
inheritable_tags=child_cm.inheritable_tags,
metadata=child_cm.metadata,
inheritable_metadata=child_cm.inheritable_metadata,
)
return rm, group_cm
4 changes: 2 additions & 2 deletions src/ragas/embeddings/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
from ragas.embeddings.base import (
AzureOpenAIEmbeddings,
BaseRagasEmbeddings,
FastEmbedEmbeddings,
HuggingfaceEmbeddings,
OpenAIEmbeddings,
RagasEmbeddings,
)

__all__ = [
"HuggingfaceEmbeddings",
"OpenAIEmbeddings",
"AzureOpenAIEmbeddings",
"RagasEmbeddings",
"BaseRagasEmbeddings",
"FastEmbedEmbeddings",
]
18 changes: 7 additions & 11 deletions src/ragas/embeddings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,11 @@
DEFAULT_MODEL_NAME = "BAAI/bge-small-en-v1.5"


class RagasEmbeddings(Embeddings):
def validate_api_key(self):
"""
Validates that the api key is set for the Embeddings
"""
pass
class BaseRagasEmbeddings(Embeddings):
...


class OpenAIEmbeddings(BaseOpenAIEmbeddings, RagasEmbeddings):
class OpenAIEmbeddings(BaseOpenAIEmbeddings, BaseRagasEmbeddings):
api_key: str = NO_KEY

def __init__(self, api_key: str = NO_KEY):
Expand All @@ -48,7 +44,7 @@ def validate_api_key(self):
raise OpenAIKeyNotFound


class FastEmbedEmbeddings(BaseFastEmbedEmbeddings, RagasEmbeddings):
class FastEmbedEmbeddings(BaseFastEmbedEmbeddings, BaseRagasEmbeddings):
"""
Find the list of supported models at:
https://qdrant.github.io/fastembed/examples/Supported_Models/
Expand All @@ -66,7 +62,7 @@ def validate_api_key(self):
pass


class AzureOpenAIEmbeddings(BaseAzureOpenAIEmbeddings, RagasEmbeddings):
class AzureOpenAIEmbeddings(BaseAzureOpenAIEmbeddings, BaseRagasEmbeddings):
azure_endpoint: t.Optional[str] = None
deployment: t.Optional[str] = None
api_version: t.Optional[str] = None
Expand Down Expand Up @@ -104,7 +100,7 @@ def validate_api_key(self):


@dataclass
class HuggingfaceEmbeddings(RagasEmbeddings):
class HuggingfaceEmbeddings(BaseRagasEmbeddings):
model_name: str = DEFAULT_MODEL_NAME
"""Model name to use."""
cache_folder: t.Optional[str] = None
Expand Down Expand Up @@ -178,6 +174,6 @@ def predict(self, texts: List[List[str]]) -> List[List[float]]:
return predictions.tolist()


def embedding_factory() -> RagasEmbeddings:
def embedding_factory() -> BaseRagasEmbeddings:
openai_embeddings = OpenAIEmbeddings()
return openai_embeddings
111 changes: 92 additions & 19 deletions src/ragas/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,36 @@

import numpy as np
from datasets import Dataset, concatenate_datasets
from langchain_core.language_models import BaseLanguageModel

from ragas._analytics import EvaluationEvent, track
from ragas.metrics.base import Metric
from ragas.metrics.critique import AspectCritique
from ragas.callbacks import new_group
from ragas.embeddings.base import BaseRagasEmbeddings
from ragas.executor import Executor
from ragas.llms.base import BaseRagasLLM, LangchainLLMWrapper
from ragas.metrics.base import Metric, MetricWithLLM

# from ragas.metrics.critique import AspectCritique
from ragas.validation import (
remap_column_names,
validate_column_dtypes,
validate_evaluation_modes,
)

if t.TYPE_CHECKING:
from langchain_core.callbacks import Callbacks


def evaluate(
dataset: Dataset,
metrics: list[Metric] | None = None,
column_map: dict[str, str] = {},
llm: t.Optional[BaseRagasLLM] = None,
embeddings: t.Optional[BaseRagasEmbeddings] = None,
callbacks: Callbacks = [],
is_async: bool = False,
max_workers: t.Optional[int] = None,
raise_exceptions: bool = True,
column_map: t.Dict[str, str] = {},
) -> Result:
"""
Run the evaluation on the dataset with different metrics
Expand Down Expand Up @@ -81,24 +96,87 @@ def evaluate(
)

metrics = [answer_relevancy, context_precision, faithfulness, context_recall]
# set the llm and embeddings
if llm is None:
from ragas.llms import llm_factory

llm = llm_factory()
elif isinstance(llm, BaseLanguageModel):
llm = LangchainLLMWrapper(llm)
if embeddings is None:
from ragas.embeddings.base import embedding_factory

embeddings = embedding_factory()

# remap column names from the dataset
dataset = remap_column_names(dataset, column_map)
# validation
validate_evaluation_modes(dataset, metrics)
validate_column_dtypes(dataset)

# run the evaluation on dataset with different metrics
binary_metrics = []
for metric in metrics:
# if isinstance(metric, AspectCritique):
# binary_metrics.append(metric.name)
if isinstance(metric, MetricWithLLM):
if metric.llm is None:
metric.llm = llm

# initialize all the models in the metrics
[m.init_model() for m in metrics]

executor = Executor(
is_async=is_async, max_workers=max_workers, raise_exceptions=raise_exceptions
)
# new evaluation chain
row_run_managers = []
evaluation_rm, evaluation_group_cm = new_group(
name="ragas evaluation", inputs={}, callbacks=callbacks, is_async=is_async
)
for i, row in enumerate(dataset):
row = t.cast(t.Dict[str, t.Any], row)
row_rm, row_group_cm = new_group(
name=f"row {i}",
inputs=row,
callbacks=evaluation_group_cm,
is_async=is_async,
)
row_run_managers.append((row_rm, row_group_cm))

if is_async:
[executor.submit(metric.ascore, row, row_group_cm) for metric in metrics]
else:
[executor.submit(metric.score, row, row_group_cm) for metric in metrics]

scores = []
binary_metrics = []
for metric in metrics:
if isinstance(metric, AspectCritique):
binary_metrics.append(metric.name)
print(f"evaluating with [{metric.name}]")
scores.append(metric.score(dataset).select_columns(metric.name))
try:
# get the results
results = executor.results()
# convert results to dataset_like
for i, _ in enumerate(dataset):
s = {}
for j, m in enumerate(metrics):
s[m.name] = results[len(metrics) * i + j]
scores.append(s)
# close the row chain
row_rm, row_group_cm = row_run_managers[i]
if not row_group_cm.ended:
row_rm.on_chain_end(s)

# run evaluation task
except Exception as e:
if not evaluation_group_cm.ended:
evaluation_rm.on_chain_error(e)

raise e
finally:
result = Result(
scores=Dataset.from_list(scores),
dataset=dataset,
binary_columns=binary_metrics,
)
if not evaluation_group_cm.ended:
evaluation_rm.on_chain_end(result)

# log the evaluation event
metrics_names = [m.name for m in metrics]
Expand All @@ -110,23 +188,18 @@ def evaluate(
num_rows=dataset.shape[0],
)
)

return Result(
scores=concatenate_datasets(scores, axis=1),
dataset=dataset,
binary_columns=binary_metrics,
)
return result


@dataclass
class Result(dict):
scores: Dataset
dataset: Dataset | None = None
binary_columns: list[str] = field(default_factory=list)
dataset: t.Optional[Dataset] = None
binary_columns: t.List[str] = field(default_factory=list)

def __post_init__(self):
values = []
for cn in self.scores.column_names:
for cn in self.scores[0].keys():
value = np.nanmean(self.scores[cn])
self[cn] = value
if cn not in self.binary_columns:
Expand Down
Loading