In [None]:
#| default_exp metric.discrete

# DiscreteMetric
> Base class from which all discrete metrics should inherit.

In [None]:
#| export
import typing as t
from dataclasses import dataclass, field
from pydantic import BaseModel, create_model
from collections import Counter
from ragas_experimental.metric import Metric, MetricResult
from ragas_experimental.metric.decorator import create_metric_decorator


@dataclass
class DiscreteMetric(Metric):
    values: t.List[str] = field(default_factory=lambda: ["pass", "fail"])
    
    def _get_response_model(self, with_reasoning: bool) -> t.Type[BaseModel]:
        """Get or create a response model based on reasoning parameter."""
        
        if with_reasoning in self._response_models:
            return self._response_models[with_reasoning]
        
        model_name = 'response_model'
        values = tuple(self.values)
        fields = {"result": (t.Literal[values], ...)}
        
        if with_reasoning:
            fields["reason"] = (str, ...) # type: ignore
        
        model = create_model(model_name, **fields)  # type: ignore
        self._response_models[with_reasoning] = model
        return model 

    def _ensemble(self,results:t.List[MetricResult]) -> MetricResult:


        if len(results)==1:
            return results[0]
            
        candidates = [candidate.result for candidate in results]
        counter = Counter(candidates)
        max_count = max(counter.values())
        for candidate in results:
            if counter[candidate.result] == max_count:
                result = candidate.result              
                reason = candidate.reason
                return MetricResult(result=result, reason=reason)
        
        return results[0]


discrete_metric = create_metric_decorator(DiscreteMetric)

## Example usage

In [None]:

#| eval: false

from ragas_experimental.llm import ragas_llm
from openai import OpenAI

llm = ragas_llm(provider="openai",model="gpt-4o",client=OpenAI())


my_metric = DiscreteMetric(
    llm=llm,
    name='helpfulness',
    prompt="Evaluate if given answer is helpful\n\n{response}",
    values=["low","med","high"],
)

result = my_metric.score(response="this is my response")
print(result) #gives "low"
print(result.reason) #gives reasoning from llm



low
The response is incomplete and lacks any specific information. It cannot be evaluated for helpfulness without further context or content.


### Write custom discrete metric

In [None]:
#| eval: false
from ragas_experimental.metric.result import MetricResult

@discrete_metric(
    llm=llm,
    prompt="Evaluate if given answer is helpful\n\n{response}",
    name='new_metric',
    values=["low","med","high"]
)
def my_metric(llm,prompt,**kwargs):

        class response_model(BaseModel):
             output: t.List[bool]
             reason: str
        traces = {}
        traces['input'] = kwargs
        response = llm.generate(prompt.format(**kwargs),response_model=response_model)
        traces['output'] = response.model_dump()
        total = sum(response.output)
        if total < 1:
            score = 'low'
        else:
            score = 'high'
            
        return MetricResult(result=score,reason=response.reason,traces=traces)

result = my_metric.score(response='my response') # result
print(result)
print(result.reason)

low
The prompt 'my response' does not provide sufficient information or context for me to evaluate its helpfulness. An answer needs to be specific and provide insight or information relative to a clear question or context.
