In [24]:
from deepeval.models.base_model import DeepEvalBaseLLM
from deepeval import evaluate
from deepeval.metrics import FaithfulnessMetric, AnswerRelevancyMetric
from deepeval.test_case import LLMTestCase
from deepeval.dataset import EvaluationDataset
from anthropic import Anthropic, AsyncAnthropic
from typing import Literal

import sys
sys.path.append('../')

from src.llm.llm_interface import LLM
from dotenv import load_dotenv, find_dotenv
envs = load_dotenv(find_dotenv(), override=True)

class CustomAnthropic(DeepEvalBaseLLM):
    def __init__(
        self,
        model: Literal['claude-3-haiku-20240307', 'claude-3-sonnet-2024022', 'claude-3-opus-20240229']
    ):
        self.accepted_model_types = ['claude-3-haiku-20240307', 'claude-3-sonnet-2024022', 'claude-3-opus-20240229']
        if model not in self.accepted_model_types:
            raise ValueError(f'{model} is not found in {self.accepted_model_types}. Retry with acceptable model type')
        self.model = model

    def load_model(self, async_mode: bool=False):
        if async_mode:
            return AsyncAnthropic(api_key=os.environ['ANTHROPIC_API_KEY'])
        return Anthropic(api_key=os.environ['ANTHROPIC_API_KEY'])

    def generate(self, prompt: str) -> str:
        client = self.load_model()
        message = client.messages.create(
                                        max_tokens=1024,
                                        messages=[
                                            {
                                                "role": "user",
                                                "content": prompt,
                                            }
                                        ],
                                        model=self.model,
                                    )
        if message:
            return message.content[0].text
        return "no message returned"

    async def a_generate(self, prompt: str) -> str:
        aclient = self.load_model(async_mode=True)
        message = await aclient.messages.create(
                                                max_tokens=1024,
                                                messages=[
                                                    {
                                                        "role": "user",
                                                        "content": prompt,
                                                    }
                                                ],
                                                model=self.model,
                                    )
        if message:
            return message.content[0].text
        return "no message returned"

    def get_model_name(self):
        return "Custom Anthropic Model"

claude = CustomAnthropic(model='claude-3-haiku-20240307')
# print(azure_openai.generate("Write me a joke"))

In [25]:
metric = FaithfulnessMetric(threshold=0.7, model=claude)

In [26]:
atuple = ('Give a brief explanation of how brain neuroplasticity works',
 ['The noise goes up and the signal goes down. Again, incredibly important set of findings. I should also mention that hyperventilation is one way that in the laboratory anyway, or in neurosurgery units for some time, physicians would evoke seizure in seizure prone patients. The reason that works is exactly the explanation I just gave you. Seizure is a hyper excitability of the brain, not enough inhibition or suppression of the overall circuitry. So you get these waves or these storms of electrical activity.',
  'Neuroplasticity is our nervous system, which of course includes the brain, the spinal cord, and all the connections between the brain and spinal cord and the organs and tissues of the body, and then all the neural connections back from the organs and tissues of the body to the brain and spinal cord, so the whole thing in both directions, has the ability to change in response to experience in ways that are adaptive. That is, that allows us to do things that we could not do before.',
  "Neuroplasticity broadly defined is the nervous system's ability to change in response to experience. But at a cellular level, that occurs through a couple of different mechanisms. One of the main mechanisms is something called long-term potentiation. Long-term potentiation involves the strengthening of particular connections between neurons. The connection sites between neurons we call synapses. Actually, technically synapses are the gaps between those connections, but nonetheless, synapses are the point of communication between neurons and those can be strengthened so that certain neurons can talk to other neurons more robustly than they happened to before."],
 "Neuroplasticity is the nervous system's ability to change in response to experience. At a cellular level, this occurs through mechanisms such as long-term potentiation, which involves strengthening specific connections between neurons at synapses. This strengthening allows certain neurons to communicate more effectively with each other, enabling the nervous system to adapt and learn based on experiences.")

In [28]:
test_case = LLMTestCase(input=atuple[0], actual_output=atuple[-1], retrieval_context=atuple[1])

In [29]:
metric.measure(test_case)

Output()

In [30]:
metric.score

1.0

In [31]:
metric.reason

"The faithfulness score is 1.00 because there are no contradictions between the retrieval context and the actual output. The lack of any contradictions indicates that the actual output is fully aligned with the information presented in the retrieval context, demonstrating a high level of faithfulness. This is an excellent result, showcasing the system's ability to generate output that is consistent and true to the provided information."

In [32]:
metric.claims

["Neuroplasticity is the nervous system's ability to change in response to experience.",
 'At a cellular level, neuroplasticity occurs through mechanisms such as long-term potentiation, which involves strengthening specific connections between neurons at synapses.',
 'Strengthening of connections between neurons at synapses allows certain neurons to communicate more effectively with each other, enabling the nervous system to adapt and learn based on experiences.']

In [33]:
metric.evaluation_cost

In [34]:
metric.evaluation_model

'Custom Anthropic Model'

In [36]:
metric.truths

['The noise goes up and the signal goes down.',
 'Hyperventilation is one way that in the laboratory anyway, or in neurosurgery units for some time, physicians would evoke seizure in seizure prone patients.',
 'The reason that hyperventilation works is that seizure is a hyper excitability of the brain, not enough inhibition or suppression of the overall circuitry.',
 "Neuroplasticity is the nervous system's ability to change in response to experience in ways that are adaptive.",
 'Neuroplasticity includes the brain, the spinal cord, and all the connections between the brain and spinal cord and the organs and tissues of the body, and then all the neural connections back from the organs and tissues of the body to the brain and spinal cord.',
 'One of the main mechanisms of neuroplasticity is long-term potentiation, which involves the strengthening of particular connections between neurons.',
 'Synapses are the point of communication between neurons and can be strengthened so that certain

Bad pipe message: %s [b"\xe7,\t5\xca\xe8\xf8\xfb\xc4\xe1\x11\x85\xe9\xca'v(\x9c \x9dc\x81"]
Bad pipe message: %s [b'\xe8\x97\\\xe2h\xff\x80\xb9\xf2\x87\xe4\xdf\xa0\xe2\x9eP\xae\x8f\x00\x00\xa6\xc0,\xc00\x00\xa3\x00\x9f\xcc\xa9\xcc\xa8\xcc\xaa\xc0\xaf', b"\xc0\xa3\xc0\x9f\xc0]\xc0a\xc0W\xc0S\xc0+\xc0/\x00\xa2\x00\x9e\xc0\xae\xc0\xac\xc0\xa2\xc0\x9e\xc0\\\xc0`\xc0V\xc0R\xc0$\xc0(\x00k\x00j\xc0s\xc0w\x00\xc4\x00\xc3\xc0#\xc0'\x00g\x00@\xc0r\xc0v\x00\xbe\x00\xbd\xc0\n\xc0\x14\x009\x008\x00\x88\x00\x87\xc0\t\xc0\x13\x003\x002\x00\x9a\x00\x99\x00E\x00D\xc0\x07\xc0\x11\xc0\x08\xc0\x12\x00\x16\x00\x13\x00\x9d\xc0\xa1\xc0\x9d\xc0Q\x00\x9c\xc0\xa0\xc0\x9c\xc0P\x00=\x00\xc0\x00<\x00\xba\x005\x00\x84\x00/\x00\x96\x00A\x00\x05\x00\n\x00\xff\x01\x00\x00j\x00\x00\x00\x0e\x00\x0c\x00\x00\t127.0.0.1\x00\x0b\x00\x04\x03\x00\x01\x02\x00\n\x00\x0c\x00\n\x00\x1d\x00\x17\x00\x1e\x00", b'\x18\x00#\x00\x00\x00\x16\x00\x00\x00\x17\x00\x00\x00\r\x000\x00.\x04\x03\x05\x03\x06']
Bad pipe message: %s [b'\x07\x08']