In [18]:
import getpass
import os
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.llms import HuggingFaceHub

def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ")

os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"]="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"
os.environ["LANGCHAIN_PROJECT"]="langchain-project"

EMBEDDING_MODEL_NAME = "BAAI/bge-small-en-v1.5"
EMBEDDING_PATH = "C:/code/qp-ai-assessment/api/misc/embeddings/"
MODEL_KWARGS = {"device": "cpu"}
ENCODE_KWARGS = {"normalize_embeddings": True}



huggingface_embeddings = HuggingFaceBgeEmbeddings(
    model_name=EMBEDDING_MODEL_NAME,
    model_kwargs=MODEL_KWARGS,
    encode_kwargs=ENCODE_KWARGS
)



In [19]:
### Dataset name
from langsmith import Client

# Clone dataset
client = Client()
dataset = client.clone_public_dataset(
    "https://smith.langchain.com/public/730d833b-74da-43e2-a614-4e2ca2502606/d"
)

dataset_name = "LCEL-QA"

In [20]:
### INDEX

from bs4 import BeautifulSoup as Soup
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS

# Load docs
url = "https://python.langchain.com/v0.1/docs/expression_language/"
loader = RecursiveUrlLoader(
    url=url, max_depth=20, extractor=lambda x: Soup(x, "html.parser").text
)
docs = loader.load()

# Split into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=4500, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# Embed and store in Chroma
vectorstore = FAISS.from_documents(documents=splits, embedding=huggingface_embeddings)

# Index
retriever = vectorstore.as_retriever()

In [23]:
from typing import List
from langchain_community.llms import HuggingFaceHub
from langchain.schema import Document
from langsmith import traceable

class RagBot:
    def __init__(self, retriever, repo_id: str = "mistralai/Mistral-7B-v0.1"):
        self._retriever = retriever
        self._llm = HuggingFaceHub(
            repo_id=repo_id,
            model_kwargs={"temperature": 0.1, "max_length": 500}
        )

    @traceable()
    def retrieve_docs(self, question):
        return self._retriever.get_relevant_documents(question)

    @traceable()
    def invoke_llm(self, question: str, docs: List[Document]):
        docs_content = "\n\n".join([doc.page_content for doc in docs])
        system_prompt = (
            "You are a helpful AI code assistant with expertise in LCEL. "
            "Use the following docs to produce a concise code solution to the user question.\n\n"
            f"## Docs\n\n{docs_content}\n\n"
        )
        
        input_text = f"{system_prompt}Human: {question}\nAssistant:"
        
        response = self._llm(input_text)
        
        return {
            "answer": response,
            "contexts": [doc.page_content for doc in docs],
        }

    @traceable()
    def get_answer(self, question: str):
        docs = self.retrieve_docs(question)
        return self.invoke_llm(question, docs)

# Initialize the RagBot with a retriever
rag_bot = RagBot(retriever)


In [24]:
response = rag_bot.get_answer("How to build a RAG chain in LCEL?")
response["answer"][:150]

'You are a helpful AI code assistant with expertise in LCEL. Use the following docs to produce a concise code solution to the user question.\n\n## Docs\n\n'

In [25]:
def predict_rag_answer(example: dict):
    """Use this for answer evaluation"""
    response = rag_bot.get_answer(example["input_question"])
    return {"answer": response["answer"]}

def predict_rag_answer_with_context(example: dict):
    """Use this for evaluation of retrieved documents and hallucinations"""
    response = rag_bot.get_answer(example["input_question"])
    return {"answer": response["answer"], "contexts": response["contexts"]}

In [37]:
from langchain import hub
from langchain_community.llms import HuggingFaceEndpoint
from langchain.chains import LLMChain

# Grade prompt
grade_prompt_answer_accuracy = hub.pull("langchain-ai/rag-answer-vs-reference")

def answer_evaluator(run, example) -> dict:
    """
    A simple evaluator for RAG answer accuracy
    """

    # Get question, ground truth answer, RAG chain answer
    input_question = example.inputs["input_question"]
    correct_answer = example.outputs["output_answer"]
    student_answer = run.outputs["answer"]

    # LLM grader
    llm = HuggingFaceEndpoint(
        repo_id="mistralai/Mistral-7B-v0.1",
        temperature=0.1,
        model_kwargs={"max_length": 500}  # Use model_kwargs instead of max_length
    )

    # Use the StructuredPrompt directly
    grading_chain = LLMChain(llm=llm, prompt=grade_prompt_answer_accuracy)

    # Run evaluator
    result = grading_chain.run(
        question=input_question,
        correct_answer=correct_answer,
        student_answer=student_answer
    )

    # Parse the result to extract the score
    # This assumes the model outputs the score in a format like "Score: 8"
    try:
        score = float(result.split("Score:")[-1].strip().split()[0])
    except:
        score = 0  # Default score if parsing fails

    return {"key": "answer_v_reference_score", "score": score}


In [38]:
from langsmith.evaluation import evaluate

experiment_results = evaluate(
    predict_rag_answer,
    data=dataset_name,
    evaluators=[answer_evaluator],
    experiment_prefix="rag-answer-v-reference",
    metadata={"version": "LCEL context, mistralai/Mistral-7B-v0.1"},
)

View the evaluation results for experiment: 'rag-answer-v-reference-df3b5ffb' at:
https://smith.langchain.com/o/b59f3614-9d00-5c85-a713-5c5f2675f256/datasets/bd1b4c43-df52-4967-aa0a-2ff72b00592d/compare?selectedSessions=7801917f-6774-47db-b3a2-730b5751a26f




0it [00:00, ?it/s]

                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True

                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.


                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
                    max_len

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\thora\.cache\huggingface\token
Login successful
The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set

                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\thora\.cache\huggin