In [1]:
from langchain_unstructured import UnstructuredLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import re
from rank_bm25 import BM25Okapi
from langchain.schema import BaseRetriever  # or from langchain.retrievers.base import BaseRetriever
from langchain.schema import BaseRetriever
from typing import Any, List, Tuple
from langchain.docstore.document import Document
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document
import os
from dotenv import load_dotenv
from langchain.llms import OpenAI, HuggingFaceHub
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

import getpass
os.environ["LANGSMITH_API_KEY"] = getpass.getpass("Enter your LangSmith API key: ")
os.environ["LANGSMITH_TRACING"] = "true"

In [2]:
# Configure the loader with parameters to handle different elements
loader = UnstructuredLoader(
    "./docs/nrma-car-pds-spds007-1023-nsw-act-qld-tas.pdf",
    chunking_strategy="by_title"
)

# Load the document
elements = loader.load()

  from .autonotebook import tqdm as notebook_tqdm
INFO: pikepdf C++ to Python logger bridge initialized


In [3]:
import os
from dotenv import load_dotenv
from langchain.embeddings import OpenAIEmbeddings, CohereEmbeddings

load_dotenv()  # Load the environment variables from .env

def get_embedding_model(model_name: str):
    """
    Factory function to return an embedding model instance.
    
    Supported model_name values:
      - "openai": Returns an OpenAIEmbeddings instance.
      - "cohere": Returns a CohereEmbeddings instance.
    """
    if model_name.lower() == "openai":
        openai_api_key = os.getenv("OPENAI_API_KEY")
        if not openai_api_key:
            raise ValueError("OPENAI_API_KEY is not set in the environment.")
        return OpenAIEmbeddings(openai_api_key=openai_api_key)
    elif model_name.lower() == "cohere":
        cohere_api_key = os.getenv("COHERE_API_KEY")
        if not cohere_api_key:
            raise ValueError("COHERE_API_KEY is not set in the environment.")
        return CohereEmbeddings(cohere_api_key=cohere_api_key)
    else:
        raise ValueError(f"Unsupported embedding model: {model_name}")

In [4]:
class BM25Retriever:
    def __init__(self, documents):
        """
        Initializes the BM25 retriever.
        
        Parameters:
            documents (list): A list of Document objects. Each document should have a 'page_content' attribute.
        """
        self.documents = documents
        # Pre-tokenize each document’s content for BM25 indexing.
        self.tokenized_corpus = [self._tokenize(doc.page_content) for doc in documents]
        self.bm25 = BM25Okapi(self.tokenized_corpus)
        
    def _tokenize(self, text):
        """
        A simple tokenizer that lowercases text and extracts word tokens.
        """
        tokens = re.findall(r"\w+", text.lower())
        return tokens
    
    def get_relevant_documents(self, query: str, k: int = 5):
        """
        Returns a list of Document objects relevant to the query.
        The BM25 scores are attached to the document metadata if needed.
        """
        query_tokens = self._tokenize(query)
        scores = self.bm25.get_scores(query_tokens)
        # Get indices of top k scores.
        top_n_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:k]
        
        docs = []
        for idx in top_n_indices:
            doc = self.documents[idx]  # This is an instance of Document.
            # Optionally, store the BM25 score in the document metadata.
            doc.metadata["score"] = scores[idx]
            docs.append(doc)
        return docs

class HybridRetriever(BaseRetriever):
    bm25_retriever: Any  # You can replace Any with the specific type if available
    dense_retriever: Any
    alpha: float = 0.5

    def _get_relevant_documents(self, query: str, k: int = 5):
        # Get BM25 documents. The BM25 retriever now returns a list of Documents.
        bm25_docs = self.bm25_retriever.get_relevant_documents(query, k=k)
        
        # Retrieve documents from the dense retriever.
        # FAISS.similarity_search_with_score returns a list of tuples (doc, score)
        dense_results = self.dense_retriever.similarity_search_with_score(query, k=k)
        
        # Convert dense retriever tuples into documents (with optional score in metadata).
        dense_docs = []
        for doc, score in dense_results:
            # Optionally, attach the dense score:
            doc.metadata["score"] = score
            dense_docs.append(doc)
        
        # Combine the two lists.
        combined_docs = bm25_docs + dense_docs
        
        # Improved deduplication using a content-based hash of the normalized page_content.
        unique_docs = {}
        for doc in combined_docs:
            # Normalize the text: strip extra whitespace and lowercase.
            normalized_content = doc.page_content.strip().lower()
            content_hash = hash(normalized_content)
            if content_hash in unique_docs:
                # If duplicate, optionally keep the document with the higher score.
                if doc.metadata.get("score", 0) > unique_docs[content_hash].metadata.get("score", 0):
                    unique_docs[content_hash] = doc
            else:
                unique_docs[content_hash] = doc

        return list(unique_docs.values())

In [5]:
embedding_model = get_embedding_model("openai")

# Build the dense vector store using FAISS.
# FAISS provides similarity_search_with_score which we'll use in our hybrid retriever.
vector_store = FAISS.from_documents(elements, embedding_model)

dense_retriever = vector_store

# Initialize BM25 retriever
bm25_retriever = BM25Retriever(elements)

# Create a HybridRetriever instance with a chosen weight.
# For example, alpha=0.5 gives equal weight to dense and BM25 scores.
hybrid_retriever = HybridRetriever(bm25_retriever=bm25_retriever, 
                                   dense_retriever=dense_retriever,
                                   alpha=0.5)

llm = ChatOpenAI(temperature=0, openai_api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")

query = "What are the key coverages in the insurance policy?"
print("Query:", query)

qa_pipeline = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=hybrid_retriever)

  return OpenAIEmbeddings(openai_api_key=openai_api_key)
INFO: HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO: Loading faiss.
INFO: Successfully loaded faiss.
INFO: Failed to load GPU Faiss: name 'GpuIndexIVFFlat' is not defined. Will not load constructor refs for GPU indexes.


Query: What are the key coverages in the insurance policy?


  llm = ChatOpenAI(temperature=0, openai_api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")


In [6]:
from ragas.testset.graph import KnowledgeGraph, Node
from ragas.testset.transforms import apply_transforms, Parallel
from ragas.testset.transforms.extractors import NERExtractor, KeyphrasesExtractor
from ragas.testset.transforms.relationship_builders.traditional import JaccardSimilarityBuilder
# from ragas.testset.synthesizers.base_query import QuerySynthesizer, SingleTurnSample
from ragas.testset.synthesizers.base import BaseSynthesizer
from ragas.dataset_schema import SingleTurnSample

# Step 1: Convert your pre-chunked documents into nodes.
# Assume each element in "elements" has a "text" attribute.
nodes = [Node(properties={"page_content": element.page_content}) for element in elements]

# Step 2: Build the Knowledge Graph from nodes.
kg = KnowledgeGraph(nodes=nodes)

# Step 3: Define and apply the transformation pipeline.
# You can run extractors in parallel and then a relationship builder.
ner_extractor = NERExtractor()
key_extractor = KeyphrasesExtractor()
rel_builder = JaccardSimilarityBuilder(property_name="entities", key_name="PER", new_property_name="entity_jaccard_similarity")

transforms = [
    Parallel(ner_extractor, key_extractor),
    rel_builder
]
# Apply the transforms asynchronously to enrich the knowledge graph.
apply_transforms(kg, transforms)


from ragas.testset import TestsetGenerator
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
generator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o", api_key=os.getenv("OPENAI_API_KEY")))
generator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings(api_key=os.getenv("OPENAI_API_KEY")))

generator = TestsetGenerator(llm=generator_llm, 
                             embedding_model=embedding_model, 
                             knowledge_graph=kg)

elementsf = [el for el in elements if len(el.page_content) > 200]

dataset = generator.generate_with_langchain_docs(elementsf, testset_size=10)

In [None]:
import re
from typing import List, Dict, Any
from langchain.chat_models import ChatOpenAI

def llm_judge_evaluation(
    dataset: List[Any],
    qa_pipeline: Any,
    judge_llm: ChatOpenAI
) -> Dict[str, Any]:
    """
    Evaluates your retriever+LLM pipeline on each sample in the dataset using an LLM judge.
    
    Parameters:
      - dataset: a list of samples. Each sample must have either attributes or keys for:
          • 'question': the query text.
          • 'solution' or 'reference': the gold standard answer.
      - qa_pipeline: your combined QA retrieval pipeline (e.g., an instance of RetrievalQA).
      - judge_llm: a ChatOpenAI instance configured as the evaluator.
      
    Returns:
      A dictionary containing individual scores, detailed evaluation info for each sample,
      and the overall average score.
    """
    scores = []
    detailed_results = []
    
    for _, sample in dataset.to_pandas().iterrows():
        user_input = sample["user_input"]
        reference = sample["reference"]
        
        # Generate the candidate answer using your QA pipeline.
        candidate_answer = qa_pipeline.run(user_input)
        
        # Construct the judge prompt.
        judge_prompt = f"""
        You are an experienced evaluator. Please score the following candidate answer on a Likert scale from 1 to 4, where:
            1 = Poor: Incorrect, irrelevant, or incomplete.
            2 = Fair: Moderately accurate but missing some details.
            3 = Good: Mostly accurate and relevant.
            4 = Excellent: Completely accurate and comprehensive.

        Question: {user_input}

        Candidate Answer: {candidate_answer}

        Reference Answer: {reference}

        Provide feedback to the candidate answer given the reference answer. Then provide the score.
"""
        # Obtain the judge's response.
        judge_response = judge_llm.invoke(judge_prompt)

        scores.append(judge_response.score)
        detailed_results.append({
            "question": user_input,
            "candidate_answer": candidate_answer,
            "reference_answer": reference,
            "judge_response": judge_response,
            "score": judge_response.score
        })
    
    average_score = sum(scores) / len(scores) if scores else 0
    return {
        "scores": scores,
        "average_score": average_score,
        "detailed_results": detailed_results
    }

from pydantic import BaseModel, Field
from typing import Optional
# judge_llm = ChatOpenAI(temperature=0, openai_api_key=os.getenv("OPENAI_API_KEY"), model="gpt-3.5-turbo")
from langchain.chat_models import init_chat_model

class JudgeResponse(BaseModel):
    feedback: str = Field(description="Feedback on the candidate answer")
    score: int = Field(description="Score on a scale of 1-4")

judge_llm = init_chat_model("gpt-4o-mini", openai_api_key=os.getenv("OPENAI_API_KEY"), model_provider="openai")
judge_llm = judge_llm.with_structured_output(JudgeResponse)
results = llm_judge_evaluation(dataset, qa_pipeline, judge_llm)

In [8]:
from typing import Optional, List, Sequence
from operator import itemgetter
from langchain_core.language_models import BaseLanguageModel
from langchain_core.prompts import PromptTemplate
from langchain_core.retrievers import BaseRetriever
from langchain_core.documents import Document
from langchain_core.runnables import RunnableSequence, RunnableParallel
from langchain_openai import ChatOpenAI

class LongQAChain:
    """A question-answering chain that uses map-reduce to handle long contexts efficiently."""
    
    def __init__(
        self,
        llm: Optional[BaseLanguageModel] = None,
        retriever: Optional[BaseRetriever] = None,
        map_prompt: Optional[PromptTemplate] = None,
        reduce_prompt: Optional[PromptTemplate] = None
    ):
        """Initialize the QA chain."""
        self.llm = llm or ChatOpenAI(model_name="gpt-3.5-turbo")
        self.retriever = retriever
        self.map_prompt = map_prompt or self._default_map_prompt()
        self.reduce_prompt = reduce_prompt or self._default_reduce_prompt()
        self.chain = self._create_chain()
    
    @staticmethod
    def _default_map_prompt() -> PromptTemplate:
        """Create default mapping prompt."""
        template = """The following is a chunk of an insurance document:
        {context}
        
        Based on this chunk, what information is relevant to answering: {question}
        
        Relevant information:"""
        
        return PromptTemplate(
            template=template,
            input_variables=["context", "question"]
        )
    
    @staticmethod
    def _default_reduce_prompt() -> PromptTemplate:
        """Create default reducing prompt."""
        template = """Given the following extracted information from different parts of an insurance document, 
        provide a comprehensive answer to the question: {question}

        Extracted information:
        {context}

        Final Answer:"""
        
        return PromptTemplate(
            template=template,
            input_variables=["context", "question"]
        )
    
    def _create_chain(self) -> RunnableSequence:
        """Create the map-reduce chain using the LangChain v0.3 syntax."""
        
        # Define the map function to process individual documents
        map_chain = (
            self.map_prompt 
            | self.llm
            | (lambda x: x.content)  # Extract content from LLM response
        )

        # Function to combine mapped results
        def combine_docs(docs: Sequence[str]) -> str:
            return "\n\n".join(docs)
        
        # Define the reduce function
        reduce_chain = (
            self.reduce_prompt
            | self.llm
            | (lambda x: x.content)  # Extract content from LLM response
        )
        
        # Create the full chain
        chain = RunnableParallel({
            "documents": lambda x: x["documents"],
            "question": lambda x: x["question"],
        }) | {
            "mapped_results": (
                lambda x: [{"context": doc.page_content, "question": x["question"]} 
                        for doc in x["documents"]]
            ) | map_chain.map(),
            "question": lambda x: x["question"],
        } | {
            "context": lambda x: combine_docs(x["mapped_results"]),
            "question": lambda x: x["question"],
        } | reduce_chain

        return chain
    
    async def arun(
        self,
        question: str,
        documents: Optional[List[Document]] = None
    ) -> str:
        """Async run the QA chain."""
        if documents is None:
            if self.retriever is None:
                raise ValueError("Either documents or a retriever must be provided")
            documents = await self.retriever.aget_relevant_documents(question)
        
        return await self.chain.ainvoke({
            "documents": documents,
            "question": question
        })
    
    def run(
        self,
        question: str,
        documents: Optional[List[Document]] = None
    ) -> str:
        """Run the QA chain on a question and documents."""
        if documents is None:
            if self.retriever is None:
                raise ValueError("Either documents or a retriever must be provided")
            documents = self.retriever.get_relevant_documents(question)
        
        return self.chain.invoke({
            "documents": documents,
            "question": question
        })

In [None]:
# Initialize the chain
qa_chain = LongQAChain(
    retriever=hybrid_retriever,
    llm=ChatOpenAI(model_name="gpt-3.5-turbo")
)

# Ask a question
answer = qa_chain.run("Summarise the policies of this document?")

  documents = self.retriever.get_relevant_documents(question)
INFO: HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK

In [11]:
answer

"The document is a Supplementary Product Disclosure Statement (SPDS) that applies to policies under the NRMA Insurance Motor NSW, ACT, TAS, and QLD Insurance Product Disclosure Statement and Policy Booklet (PDS). The SPDS provides updates to the terms contained in the PDS and should be read together with the PDS and any other applicable SPDS. It includes information on various aspects of the insurance policies, such as coverage, limits, exclusions, conditions, and benefits. Specific details are provided on Third Party Property Damage Insurance, liability cover, temporary cover, and a comparison of the 4 types of insurance offered by the company. The policy covers the cost of damage to the insured vehicle, with no excess for claims made under specified benefits. There are provisions for recovery actions in case of loss or damage, and the insured party may receive payment equal to the market value of their vehicle in certain circumstances. The document also outlines how changes can be ma

In [17]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
from langgraph.graph import StateGraph, START, END
from typing import Literal, TypedDict, List

# --- Setup your two different QA pipelines ---
# Example: short QA with small K
short_qa_pipeline = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model_name="gpt-3.5-turbo"),
    chain_type="stuff",
    retriever=hybrid_retriever  # your existing retriever
)

# Example: long QA with large K or advanced summarization approach
long_qa_pipeline = LongQAChain(
    retriever=hybrid_retriever,
    llm=ChatOpenAI(model_name="gpt-3.5-turbo")
)

class AgentState(TypedDict):
    user_query: str
    partial_answer: str
    final_answer: str
    approach: Literal["short", "long"]  # chosen approach
    done: bool

# 1. Decide tool/approach
def decide_approach(state: AgentState):
    """
    Basic logic for deciding short vs. long QA.
    Could be replaced by a classification LLM call or more advanced router.
    """
    query = state["user_query"]
    if len(query) > 200 or "summarize" in query.lower():
        return {"approach": "long"}
    return {"approach": "short"}

# 2. Generate answer using chosen approach
def generate_answer(state: AgentState):
    approach = state["approach"]
    query = state["user_query"]
    
    if approach == "short":
        answer = short_qa_pipeline.run(query)
    else:
        answer = long_qa_pipeline.run(query)
    
    return {"partial_answer": answer}

# 3. Evaluate if we should stop or gather more context
def evaluate_answer(state: AgentState):
    """
    We'll do a basic check or optionally call an LLM to see if the answer is 'complete enough'.
    For a simpler approach, we pass if partial_answer is "long enough" or
    if the user is specifically requesting more. 
    You can replace with a tool call or any custom logic you like.
    """
    partial_ans = state["partial_answer"]
    # Simple check. Replace with a real evaluator LLM if you want.
    if len(partial_ans) > 400:
        return {"done": True}
    else:
        # Possibly the user wants more details or the partial answer is found lacking.
        # For demonstration, we'll do a single loop extension for “long” approach.
        if state["approach"] == "long":
            return {"done": True}
        return {"done": True}  # Or set to False for multiple loops

# 4. If done, finalize answer
def finalize_answer(state: AgentState):
    return {"final_answer": state["partial_answer"]}

# 5. Build the Graph
graph = StateGraph(AgentState)
graph.add_node("decide_approach", decide_approach)
graph.add_node("generate_answer", generate_answer)
graph.add_node("evaluate_answer", evaluate_answer)
graph.add_node("finalize_answer", finalize_answer)

# Connect the nodes
graph.add_edge(START, "decide_approach")
graph.add_edge("decide_approach", "generate_answer")
graph.add_edge("generate_answer", "evaluate_answer")

# If done = True -> finalize_answer, else -> we could loop back to generate_answer
def route_evaluation(state: AgentState):
    if state["done"]:
        return "Done"
    else:
        return "Repeat"

graph.add_conditional_edges(
    "evaluate_answer",
    route_evaluation,
    {
        "Done": "finalize_answer",
        "Repeat": "generate_answer"  
    },
)

graph.add_edge("finalize_answer", END)

# Compile
agent_workflow = graph.compile()

# Example usage:
def run_agent_query(user_query: str):
    initial_state = {
        "user_query": user_query,
        "partial_answer": "",
        "final_answer": "",
        "approach": "short",  # placeholder
        "done": False
    }

    final_state = agent_workflow.invoke(initial_state)
    return final_state["final_answer"]

answer = run_agent_query("What is the maximum cover for a single vehicle?")

  answer = short_qa_pipeline.run(query)
INFO: HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [18]:
from pprint import pprint
pprint(answer)

('The maximum cover for a single vehicle is up to $25,000 for the agreed value '
 'in this policy.')
