In [1]:
%run ../bedrock_setup.py

In [3]:
import chess
import chess.engine
import sys
import os
import json
from tqdm import tqdm

sys.path.append(os.path.abspath('../'))
from src.KB_agent import create_knowledgebase_agent

In [14]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chat_models import init_chat_model
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

model_name = "sentence-transformers/all-MiniLM-L6-v2"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
embeddings_model = HuggingFaceEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

kb = FAISS.load_local("../database/knowledge_base", embeddings_model, allow_dangerous_deserialization=True)
retriever = kb.as_retriever(search_kwargs={'k': 4})

model = init_chat_model("us.anthropic.claude-3-5-haiku-20241022-v1:0",
                      model_provider="bedrock_converse",
                      region_name="us-east-1",
                      client=bedrock_client)
    

system_prompt = (
    "You are an assistant for question-answering tasks on chess. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Be as descriptive as possible while still being "
    "factual and coherent."
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(model, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [15]:
with open("../data/chess_questions.txt") as f:
    questions = f.readlines()

In [28]:
dataset = []
for q in questions:
    results = rag_chain.invoke({"input": q})
    dataset.append(
        {
            "user_input":q,
            "retrieved_contexts":[doc.page_content for doc in results["context"]],
            "response":results["answer"]
        }
    )

In [31]:
from ragas import EvaluationDataset
evaluation_dataset = EvaluationDataset.from_list(dataset)

In [37]:
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import Faithfulness

llm = ChatBedrockConverse(
    client=bedrock_client,
    model_id="us.amazon.nova-pro-v1:0",
)

evaluator_llm = LangchainLLMWrapper(llm)

result = evaluate(dataset=evaluation_dataset,metrics=[Faithfulness()],llm=evaluator_llm)
result

Evaluating: 100%|██████████| 10/10 [00:14<00:00,  1.46s/it]


{'faithfulness': 0.9303}