# BeyondLLM integration with LangChain

This notebook shows the integration of BeyondLLM with LangChain. By combining the strengths of these two tools, we demonstrate how to create and evaluate a simple document retrieval and question-answering system powered by Retrieval-Augmented Generation (RAG).

# Install necessary packages

In [3]:
!pip install langchain sentence-transformers chromadb llama-cpp-python langchain_community pypdf langchain-groq
!pip install beyondllm
!pip install faiss-gpu

Collecting langchain
  Downloading langchain-0.2.11-py3-none-any.whl.metadata (7.1 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)
Collecting chromadb
  Downloading chromadb-0.5.5-py3-none-any.whl.metadata (6.8 kB)
Collecting llama-cpp-python
  Downloading llama_cpp_python-0.2.85.tar.gz (49.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.3/49.3 MB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting langchain_community
  Downloading langchain_community-0.2.10-py3-none-any.whl.metadata (2.7 kB)
Collecting pypdf
  Downloading pypdf-4.3.1-py3-none-any.whl.metadata (7.4 kB)
Collecting langchain-groq
  Downloading langchain_groq-0.1.8-py3-none-any.whl.metadata (2.9 kB)
Col

# Import necessary libraries

In [50]:
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_groq import ChatGroq
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from beyondllm.utils import CONTEXT_RELEVENCE, GROUNDEDNESS, ANSWER_RELEVENCE
import re
import numpy as np
import pysbd

# Set your API keys here

In [126]:
GROQ_API_KEY = "<your groq api key>"

# Load PDF documents from a directory

In [99]:
loader = PyPDFDirectoryLoader("/content/sample_data/Data")
docs = loader.load()

# Split documents into manageable chunks

In [100]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=756, chunk_overlap=50)
chunks = text_splitter.split_documents(docs)

# Create embeddings for the document chunks

In [101]:
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")

In [102]:
print(embeddings)

client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
) model_name='BAAI/bge-base-en-v1.5' cache_folder=None model_kwargs={} encode_kwargs={} multi_process=False show_progress=False


# Create a vector store from the document chunks

In [103]:
vectorstore = FAISS.from_documents(chunks, embeddings)

In [104]:
vectorstore

<langchain_community.vectorstores.faiss.FAISS at 0x7926724f1f30>

In [105]:
query = "what causes heart diseases"
search = vectorstore.similarity_search(query)

# Set up the retriever for similarity search


In [106]:
retriever = vectorstore.as_retriever(
    search_kwargs={'k': 3}
)

In [107]:
retriever.invoke(query)

[Document(metadata={'source': '/content/sample_data/Data/healthyheart.pdf', 'page': 7}, page_content='What Is Heart Disease? \nCoronary heart disease—often simply called heart disease—occurs\nwhen the arteries that supply blood to the heart muscle becomehardened and narrowed due to a buildup of plaque on the arteries’inner walls. Plaque is the accumulation of fat, cholesterol, and othersubstances. As plaque continues to build up in the arteries, bloodflow to the heart is reduced.\nHeart disease can lead to a heart attack. A heart attack happens\nwhen an artery becomes totally blocked with plaque, preventingvital oxygen and nutrients from getting to the heart. A heart attackcan cause permanent damage to the heart muscle.\nHeart disease is one of several cardiovascular diseases, which are'),
 Document(metadata={'source': '/content/sample_data/Data/healthyheart.pdf', 'page': 40}, page_content='36\nA number of other factors affect heart disease, including certain\nhealth conditions, medici

# Initialize the language model


In [117]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model="llama3-8b-8192",
    groq_api_key=GROQ_API_KEY,
    temperature=0.1  # Set the temperature to 0.1
)

# Define the prompt template for the RAG chain

In [119]:
template = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are an AI assistant that follows instruction extremely well. Please be truthful and give direct answers.<|eot_id|><|start_header_id|>user<|end_header_id|>
{query}<|eot_id|>
"""

In [120]:
prompt = ChatPromptTemplate.from_template(template)

# Define the RAG chain


In [121]:
rag_chain = (
    {"context": retriever,  "query": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Function to extract numbers from the response


In [127]:
def extract_number(response):
    match = re.search(r'\b(10|[0-9]+)\b', response)
    if match:
        return int(match.group(0))
    return np.nan



# Function to tokenize sentences


In [128]:
def sent_tokenize(text: str):
    seg = pysbd.Segmenter(language="en", clean=False)
    return seg.segment(text)

# Evaluate Langchain RAG using BeyondLLM evals

## Get Context Relevancy

In [129]:
def get_context_relevancy(llm, query, context):
    total_score = 0
    score_count = 0

    for content in context:
        score_response = llm.invoke(CONTEXT_RELEVENCE.format(question=query, context=content))

        # Access the content attribute directly
        score_str = score_response.content

        # Accumulate the score
        score = float(extract_number(score_str))
        total_score += score
        score_count += 1

    average_score = total_score / score_count if score_count > 0 else 0
    return f"Context Relevancy Score: {round(average_score, 1)}"

# Example query
query = "what causes heart diseases?"

# Retrieve relevant documents based on the user query using the updated method
retrieved_docs = retriever.invoke(query)

# Prepare the context from the retrieved documents
context = [doc.page_content for doc in retrieved_docs]

# Get context relevancy score
print(get_context_relevancy(llm, query, context))



Context Relevancy Score: 7.7


## Get answer relevancy score

In [130]:
response=rag_chain.invoke(query)
answer_relevancy_score = llm.invoke(ANSWER_RELEVENCE.format(question=query, context=response))
print(f"Answer Relevancy Score: {answer_relevancy_score}")

Answer Relevancy Score: content='9' response_metadata={'token_usage': {'completion_tokens': 2, 'prompt_tokens': 795, 'total_tokens': 797, 'completion_time': 0.000814979, 'prompt_time': 0.158120416, 'queue_time': None, 'total_time': 0.158935395}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_a97cfe35ae', 'finish_reason': 'stop', 'logprobs': None} id='run-03100f78-f53c-4d56-aba1-72afb2621385-0' usage_metadata={'input_tokens': 795, 'output_tokens': 2, 'total_tokens': 797}


## Calculate groundedness score

In [125]:
total_score = 0
score_count = 0

# Tokenize the response into sentences
statements = sent_tokenize(response)

for statement in statements:
    score_response = llm.invoke(GROUNDEDNESS.format(statement=statement, context=" ".join(context)))

    # Access the content attribute directly
    score_str = score_response.content

    # Accumulate the score
    score = float(extract_number(score_str))
    total_score += score
    score_count += 1

average_groundedness = total_score / score_count if score_count > 0 else 0
print(f"Groundedness Score: {round(average_groundedness, 1)}")



Groundedness Score: 7.9
