In [None]:
#| default_exp metric.base

In [None]:
#| hide
from dotenv import load_dotenv
load_dotenv('/Users/shahules/Myprojects/ragas_annotator/.envrc')

True

# BaseMetric
> base class for all type of metrics in ragas

In [None]:
#| export

from abc import ABC, abstractmethod
import asyncio
from dataclasses import dataclass, field
from pydantic import BaseModel
import typing as t
from ragas_annotator.metric import MetricResult
from ragas_annotator.metric import LLM

@dataclass
class Metric(ABC):
    """Base class for all metrics in the LLM evaluation library."""
    name: str
    prompt: str
    llm: LLM
    _response_models: t.Dict[bool, t.Type[BaseModel]] = field(
        default_factory=dict, init=False, repr=False
    )
    
    @abstractmethod
    def _get_response_model(self, with_reasoning: bool) -> t.Type[BaseModel]:
        """Get the appropriate response model."""
        pass

    @abstractmethod
    def _ensemble(self, results: t.List[MetricResult]) -> MetricResult:
        pass
        
    
    def score(self, reasoning: bool = True, n: int = 1, **kwargs) -> t.Any:
        responses = []
        prompt_input = self.prompt.format(**kwargs)
        for _ in range(n):
            response = self.llm.generate(prompt_input, response_model = self._get_response_model(reasoning)) 
            response = MetricResult(**response.model_dump())
            responses.append(response)
        return self._ensemble(responses)


    async def ascore(self, reasoning: bool = True, n: int = 1, **kwargs) -> MetricResult:
        responses = []  # Added missing initialization
        prompt_input = self.prompt.format(**kwargs)
        for _ in range(n):
            response = await self.llm.agenerate(prompt_input, response_model = self._get_response_model(reasoning))
            response = MetricResult(**response.model_dump())  # Fixed missing parentheses
            responses.append(response)
        return self._ensemble(responses)
        
    def batch_score(self, inputs: t.List[t.Dict[str, t.Any]], reasoning: bool = True, n: int = 1) -> t.List[t.Any]:
        return [self.score(reasoning, n, **input_dict) for input_dict in inputs]
    
    async def abatch_score(self, inputs: t.List[t.Dict[str, t.Any]], reasoning: bool = True, n: int = 1) -> t.List[MetricResult]:
        async_tasks = []
        for input_dict in inputs:
            # Add reasoning and n to the input parameters
            async_tasks.append(self.ascore(reasoning=reasoning, n=n, **input_dict))
            
        # Run all tasks concurrently and return results
        return await asyncio.gather(*async_tasks)

  from .autonotebook import tqdm as notebook_tqdm


### Example


In [None]:


@dataclass
class CustomMetric(Metric):
    values: t.List[str] = field(default_factory=lambda: ["pass", "fail"])
    
    def _get_response_model(self, with_reasoning: bool) -> t.Type[BaseModel]:
        """Get or create a response model based on reasoning parameter."""
        
        class mymodel(BaseModel):
            result: int
            reason: t.Optional[str] = None
            
        return mymodel 

    def _ensemble(self,results:t.List[MetricResult]) -> MetricResult:
        
        return results[0]  # Placeholder for ensemble logic

    

In [None]:
my_metric = CustomMetric(name="example", prompt="What is the result of {input}?", llm=LLM())
my_metric.score(input="test")

0