In [1]:
from langchain_ollama import ChatOllama

llm = ChatOllama(model="llama3.1", temperature=0)

In [None]:
import os
from langchain.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

from rag import get_rag_chain

pdf_directory = "papers"

all_documents = []

for filename in os.listdir(pdf_directory):
    if filename.endswith(".pdf"):
        file_path = os.path.join(pdf_directory, filename)
        loader = PyPDFLoader(file_path)
        documents = loader.load()
        all_documents.extend(documents)

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
splits = text_splitter.split_documents(all_documents)

rag_chain, retriever, embeddings = get_rag_chain(splits)

In [None]:
import json

from rag import create_ragas_dataset


with open("/opt/cloudadm/llm_rag/benchmark.json", "r") as json_file:
    data = json.load(json_file)
    dataset = create_ragas_dataset(rag_chain, retriever, data['questions'], data['ground_truths'])

In [None]:
from deepeval.models.base_model import DeepEvalBaseLLM

class GoogleVertexAI(DeepEvalBaseLLM):
    def __init__(self, model):
        self.model = model

    def load_model(self):
        return self.model

    def generate(self, prompt: str) -> str:
        chat_model = self.load_model()
        return chat_model.invoke(prompt).content

    async def a_generate(self, prompt: str) -> str:
        chat_model = self.load_model()
        res = await chat_model.ainvoke(prompt)
        return res.content

    def get_model_name(self):
        return "Ollama AI Model"

# initiatialize the  wrapper class
llm = GoogleVertexAI(model=llm)

In [None]:
from doctest import DocTestCase
from deepeval.metrics import HallucinationMetric
from deepeval.test_case import LLMTestCase, LLMTestCaseParams
from deepeval.metrics import GEval

test_case = DocTestCase(
    input=dataset['user_input'].values[4],
    actual_output=dataset['response'].values[4],
    expected_output=dataset['reference'].values[4],
    retrieval_context=dataset['retrieved_contexts'].values[4]
)

correctness_metric = GEval(
    name="Correctness",
    criteria="Correctness - determine if the actual output is correct according to the expected output.",
    evaluation_params=[LLMTestCaseParams.ACTUAL_OUTPUT, LLMTestCaseParams.EXPECTED_OUTPUT],
    strict_mode=True,
    model=llm
)

In [None]:
correctness_metric.measure(test_case)
print(correctness_metric.score, correctness_metric.reason)