Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
cbc0d14
add optimizer class
shahules786 Nov 25, 2024
bba3004
add train method
shahules786 Nov 25, 2024
1ee5e06
train configs
shahules786 Nov 25, 2024
35d3d21
add core schema
shahules786 Nov 25, 2024
e4e362c
add embedding
shahules786 Nov 25, 2024
0e7ff21
add type annotation
shahules786 Nov 25, 2024
b39be13
add train
shahules786 Nov 26, 2024
6ccd177
add optimizer genetic based
shahules786 Nov 26, 2024
0444af8
added prompts
shahules786 Nov 26, 2024
cc60cd2
add loss and output type
shahules786 Nov 26, 2024
adfda67
add loss
shahules786 Nov 26, 2024
919e5d4
dataset and optimizer conf
shahules786 Nov 27, 2024
8f65e47
add filter and load
shahules786 Nov 27, 2024
681ea72
add batching
shahules786 Nov 27, 2024
3b613be
add batching and reverse engineering
shahules786 Nov 28, 2024
8c3e64a
add executors and population size
shahules786 Nov 28, 2024
c931601
remove cross over
shahules786 Nov 28, 2024
703dede
allow evaluating fitness
shahules786 Nov 28, 2024
6a0577a
make runconfig optional
shahules786 Nov 29, 2024
2fb0daf
fix fitness validation
shahules786 Nov 29, 2024
d10b445
implement feedback mutation
shahules786 Nov 30, 2024
a4771f8
add sample fun
shahules786 Nov 30, 2024
80c194d
make run_id optional
shahules786 Nov 30, 2024
2bc5834
organize evaluate
shahules786 Dec 2, 2024
bab448f
add cross over mutation
shahules786 Dec 2, 2024
1085b47
add hamming distance
shahules786 Dec 2, 2024
b63757d
change optimzer return type
shahules786 Dec 2, 2024
d7aaa0d
add notimplemented error for train/test split
shahules786 Dec 2, 2024
bc2803c
fix uuid
shahules786 Dec 2, 2024
4b2c6a9
fix parsing
shahules786 Dec 3, 2024
29fe31c
Merge branch 'main' into genetic-optmin-main
shahules786 Dec 3, 2024
3dd8ec7
Merge branch 'main' into genetic-optmin-main
shahules786 Dec 3, 2024
607a950
merge changes
shahules786 Dec 3, 2024
27767eb
add defaults
shahules786 Dec 3, 2024
75cc309
fix key mapping isssues
shahules786 Dec 3, 2024
e6374e6
fixed progress bar
shahules786 Dec 5, 2024
1801ea5
added suggested improvements
shahules786 Dec 7, 2024
fc2b40e
check for empty feedback
shahules786 Dec 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions src/ragas/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,12 +133,15 @@ def __str__(self):

def parse_run_traces(
traces: t.Dict[str, ChainRun],
parent_run_id: t.Optional[str] = None,
) -> t.List[t.Dict[str, t.Any]]:

root_traces = [
chain_trace
for chain_trace in traces.values()
if chain_trace.parent_run_id is None
if chain_trace.parent_run_id == parent_run_id
]

if len(root_traces) > 1:
raise ValueError(
"Multiple root traces found! This is a bug on our end, please file an issue and we will fix it ASAP :)"
Expand All @@ -159,7 +162,7 @@ def parse_run_traces(
prompt_traces = {}
for i, prompt_uuid in enumerate(metric_trace.children):
prompt_trace = traces[prompt_uuid]
prompt_traces[f"{i}_{prompt_trace.name}"] = {
prompt_traces[f"{prompt_trace.name}"] = {
"input": prompt_trace.inputs.get("data", {}),
"output": prompt_trace.outputs.get("output", {}),
}
Expand Down
4 changes: 2 additions & 2 deletions src/ragas/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from ragas.embeddings import BaseRagasEmbeddings
from ragas.llms import BaseRagasLLM
from ragas.losses import Loss
from ragas.optimizers import Optimizer
from ragas.optimizers import GeneticOptimizer, Optimizer

DEFAULT_OPTIMIZER_CONFIG = {"max_steps": 100}

Expand All @@ -20,7 +20,7 @@ class DemonstrationConfig(BaseModel):
class InstructionConfig(BaseModel):
enabled: bool = True
loss: t.Optional[Loss] = None
optimizer: Optimizer
optimizer: Optimizer = GeneticOptimizer()
optimizer_config: t.Dict[str, t.Any] = Field(
default_factory=lambda: DEFAULT_OPTIMIZER_CONFIG
)
Expand Down
13 changes: 11 additions & 2 deletions src/ragas/dataset_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from abc import ABC, abstractmethod
from collections import defaultdict
from dataclasses import dataclass, field
from uuid import UUID

import numpy as np
from datasets import Dataset as HFDataset
Expand Down Expand Up @@ -43,6 +44,13 @@ def get_features(self) -> t.List[str]:
"""
return list(self.to_dict().keys())

def to_string(self) -> str:
"""
Get the string representation of the sample.
"""
sample_dict = self.to_dict()
return "".join(f"\n{key}:\n\t{val}\n" for key, val in sample_dict.items())


class SingleTurnSample(BaseSample):
"""
Expand Down Expand Up @@ -378,6 +386,7 @@ class EvaluationResult:
cost_cb: t.Optional[CostCallbackHandler] = None
traces: t.List[t.Dict[str, t.Any]] = field(default_factory=list)
ragas_traces: t.Dict[str, ChainRun] = field(default_factory=dict, repr=False)
run_id: t.Optional[UUID] = None

def __post_init__(self):
# transform scores from list of dicts to dict of lists
Expand All @@ -395,7 +404,8 @@ def __post_init__(self):
values.append(value + 1e-10)

# parse the traces
self.traces = parse_run_traces(self.ragas_traces)
run_id = str(self.run_id) if self.run_id is not None else None
self.traces = parse_run_traces(self.ragas_traces, run_id)

def __repr__(self) -> str:
score_strs = [f"'{k}': {v:0.4f}" for k, v in self._repr_dict.items()]
Expand Down Expand Up @@ -531,7 +541,6 @@ def upload(self, base_url: str = RAGAS_API_URL, verbose: bool = True) -> str:
return evaluation_endpoint



class PromptAnnotation(BaseModel):
prompt_input: t.Dict[str, t.Any]
prompt_output: t.Dict[str, t.Any]
Expand Down
9 changes: 8 additions & 1 deletion src/ragas/evaluation.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from __future__ import annotations

import typing as t
from uuid import UUID

from datasets import Dataset
from langchain_core.callbacks import BaseCallbackHandler, BaseCallbackManager
from langchain_core.embeddings import Embeddings as LangchainEmbeddings
from langchain_core.language_models import BaseLanguageModel as LangchainLLM
from tqdm.auto import tqdm

from ragas._analytics import track_was_completed
from ragas.callbacks import ChainType, RagasTracer, new_group
Expand Down Expand Up @@ -59,12 +61,14 @@ def evaluate(
embeddings: t.Optional[BaseRagasEmbeddings | LangchainEmbeddings] = None,
callbacks: Callbacks = None,
in_ci: bool = False,
run_config: RunConfig = RunConfig(),
run_config: t.Optional[RunConfig] = None,
token_usage_parser: t.Optional[TokenUsageParser] = None,
raise_exceptions: bool = False,
column_map: t.Optional[t.Dict[str, str]] = None,
show_progress: bool = True,
batch_size: t.Optional[int] = None,
_run_id: t.Optional[UUID] = None,
_pbar: t.Optional[tqdm] = None,
) -> EvaluationResult:
"""
Run the evaluation on the dataset with different metrics
Expand Down Expand Up @@ -146,6 +150,7 @@ def evaluate(
"""
column_map = column_map or {}
callbacks = callbacks or []
run_config = run_config or RunConfig()

if helicone_config.is_enabled:
import uuid
Expand Down Expand Up @@ -226,6 +231,7 @@ def evaluate(
run_config=run_config,
show_progress=show_progress,
batch_size=batch_size,
pbar=_pbar,
)

# Ragas Callbacks
Expand Down Expand Up @@ -333,6 +339,7 @@ def evaluate(
cost_cb,
),
ragas_traces=tracer.traces,
run_id=_run_id,
)
if not evaluation_group_cm.ended:
evaluation_rm.on_chain_end({"scores": result.scores})
Expand Down
38 changes: 24 additions & 14 deletions src/ragas/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class Executor:
batch_size: t.Optional[int] = None
run_config: t.Optional[RunConfig] = field(default=None, repr=False)
_nest_asyncio_applied: bool = field(default=False, repr=False)
pbar: t.Optional[tqdm] = None

def wrap_callable_with_index(
self, callable: t.Callable, counter: int
Expand Down Expand Up @@ -127,21 +128,22 @@ async def _process_jobs(self) -> t.List[t.Any]:
results = []

if not self.batch_size:
with tqdm(
total=len(self.jobs),
desc=self.desc,
disable=not self.show_progress,
) as pbar:
# Create coroutines
coroutines = [
afunc(*args, **kwargs) for afunc, args, kwargs, _ in self.jobs
]
for future in await as_completed(coroutines, max_workers):
result = await future
results.append(result)
pbar.update(1)
# Use external progress bar if provided, otherwise create one
if self.pbar is None:
with tqdm(
total=len(self.jobs),
desc=self.desc,
disable=not self.show_progress,
) as internal_pbar:
await self._process_coroutines(
self.jobs, internal_pbar, results, max_workers
)
else:
await self._process_coroutines(
self.jobs, self.pbar, results, max_workers
)

return results
return results

# With batching, show nested progress bars
batches = batched(self.jobs, self.batch_size) # generator of job tuples
Expand Down Expand Up @@ -179,6 +181,14 @@ async def _process_jobs(self) -> t.List[t.Any]:

return results

async def _process_coroutines(self, jobs, pbar, results, max_workers):
"""Helper function to process coroutines and update the progress bar."""
coroutines = [afunc(*args, **kwargs) for afunc, args, kwargs, _ in jobs]
for future in await as_completed(coroutines, max_workers):
result = await future
results.append(result)
pbar.update(1)

def results(self) -> t.List[t.Any]:
"""
Execute all submitted jobs and return their results. The results are returned in the order of job submission.
Expand Down
14 changes: 14 additions & 0 deletions src/ragas/losses.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import typing as t
from abc import ABC, abstractmethod

from pydantic import GetCoreSchemaHandler
from pydantic_core import CoreSchema, core_schema


class Loss(ABC):
"""
Expand All @@ -11,6 +14,17 @@ class Loss(ABC):
def __call__(self, predicted: t.List, actual: t.List) -> float:
raise NotImplementedError

@classmethod
def __get_pydantic_core_schema__(
cls, source_type: t.Any, handler: GetCoreSchemaHandler
) -> CoreSchema:
"""
Define how Pydantic generates a schema for BaseRagasEmbeddings.
"""
return core_schema.no_info_after_validator_function(
cls, core_schema.is_instance_schema(cls) # The validator function
)


class MSELoss(Loss):
"""
Expand Down
76 changes: 71 additions & 5 deletions src/ragas/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@

from ragas._analytics import EvaluationEvent, _analytics_batcher
from ragas.callbacks import ChainType, new_group
from ragas.dataset_schema import MultiTurnSample, SingleTurnSample
from ragas.dataset_schema import MetricAnnotation, MultiTurnSample, SingleTurnSample
from ragas.executor import is_event_loop_running
from ragas.losses import BinaryMetricLoss, MSELoss
from ragas.prompt import PromptMixin
from ragas.run_config import RunConfig
from ragas.utils import (
Expand Down Expand Up @@ -232,12 +233,77 @@ def init(self, run_config: RunConfig):
def train(
self,
path: str,
demonstration_config: DemonstrationConfig,
instruction_config: InstructionConfig,
callbacks: Callbacks,
demonstration_config: t.Optional[DemonstrationConfig] = None,
instruction_config: t.Optional[InstructionConfig] = None,
callbacks: t.Optional[Callbacks] = None,
run_config: t.Optional[RunConfig] = None,
batch_size: t.Optional[int] = None,
with_debugging_logs=False,
raise_exceptions: bool = True,
) -> None:

raise NotImplementedError("Training is not implemented for this metric.")
if not path.endswith(".json"):
raise ValueError("Train data must be in json format")

if instruction_config is None:
from ragas.config import InstructionConfig

instruction_config = InstructionConfig()

if demonstration_config is None:
from ragas.config import DemonstrationConfig

demonstration_config = DemonstrationConfig()

dataset = MetricAnnotation.from_json(path, metric_name=self.name)

optimizer = instruction_config.optimizer
llm = instruction_config.llm or self.llm
if llm is None:
raise ValueError(
f"Metric '{self.name}' has no valid LLM provided (self.llm is None). Please initantiate a the metric with an LLM to run." # noqa
)
if optimizer.llm is None:
optimizer.llm = llm

if instruction_config.loss is None:
if self.output_type is None:
raise ValueError(
f"Output type for metric '{self.name}' is not defined. Please set the output type in the metric or in the instruction config."
)

if self.output_type.name == MetricOutputType.BINARY.name:
loss_fun = BinaryMetricLoss()
elif (
self.output_type.name == MetricOutputType.CONTINUOUS.name
or self.output_type.name == MetricOutputType.DISCRETE.name
):
loss_fun = MSELoss()
else:
raise NotImplementedError(
f"Output type '{self.output_type.name}' not implemented"
)
else:
loss_fun = instruction_config.loss

optimizer.metric = self

optimizer_config = instruction_config.optimizer_config or {}
optimized_prompts = optimizer.optimize(
dataset[self.name],
loss_fun,
optimizer_config,
callbacks=callbacks,
run_config=run_config,
batch_size=batch_size,
with_debugging_logs=with_debugging_logs,
raise_exceptions=raise_exceptions,
)
prompts = self.get_prompts()
for key, val in optimized_prompts.items():
prompts[key].instruction = val
self.set_prompts(**prompts)
return


@dataclass
Expand Down
8 changes: 6 additions & 2 deletions src/ragas/optimizers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from .base import Optimizer
from ragas.optimizers.base import Optimizer
from ragas.optimizers.genetic import GeneticOptimizer

__all__ = ["Optimizer"]
__all__ = [
"Optimizer",
"GeneticOptimizer",
]
2 changes: 1 addition & 1 deletion src/ragas/optimizers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,4 @@ def optimize(
Dict[str, str]
The optimized prompts for given chain.
"""
pass
raise NotImplementedError("The method `optimize` must be implemented.")
Loading
Loading