RAG, RAG with Memory, Adaptive RAG, Corrective RAG, self-RAG, Agentive RAG... are you lost? Let me help you with this guide.

1/ Simple RAG
Retrieves relevant documents based on the query and uses them to generate an answer.

2/ Simple RAG with Memory
Extends Simple RAG by maintaining context from previous interactions.

3/ Branched RAG
Performs multiple retrieval steps, refining the search based on intermediate results.

4/ HyDE (Hypothetical Document Embedding)
Generates a hypothetical ideal document before retrieval to improve search relevance.

5/ Adaptive RAG
Dynamically adjusts retrieval and generation strategies based on the query type or difficulty.

6/ Corrective RAG (CRAG)
Iteratively refines generated responses by fact-checking against retrieved information.

7/ Self-RAG
The model critiques and improves its own responses using self-reflection and retrieval.

8/ Agentic RAG
Combines RAG with agentic behavior, allowing for more complex, multi-step problem-solving.


https://python.langchain.com/v0.1/docs/get_started/quickstart/

langchain quick start ^


https://python.langchain.com/docs/integrations/providers/ollama/

Ollama integrations ^

Tool calling:
https://ollama.com/blog/tool-support
https://python.langchain.com/docs/how_to/tool_calling/


- Easy example:
https://github.com/Shubhamsaboo/awesome-llm-apps/blob/main/llama3.1_local_rag/llama3.1_local_rag.py

In [74]:
import torch

# Print the PyTorch version
print(f"PyTorch version: {torch.__version__}")

# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    print("CUDA is available! GPU is ready to be used.")
    print(f"Number of GPUs available: {torch.cuda.device_count()}")
    print(f"Current GPU: {torch.cuda.get_device_name(torch.cuda.current_device())}")
else:
    print("CUDA is not available. GPU is not set up correctly.")

# Print additional GPU details
if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
        print(f"  - Total Memory: {torch.cuda.get_device_properties(i).total_memory / 1e9} GB")
        print(f"  - Compute Capability: {torch.cuda.get_device_capability(i)}")

if torch.cuda.is_available():
    # Create a random tensor and move it to the GPU
    tensor = torch.rand(3, 3).cuda()
    print("Tensor on GPU:", tensor)
else:
    print("GPU is not available, cannot move tensor to GPU.")


PyTorch version: 2.5.0+cu124
CUDA is available! GPU is ready to be used.
Number of GPUs available: 1
Current GPU: NVIDIA GeForce RTX 4090
GPU 0: NVIDIA GeForce RTX 4090
  - Total Memory: 25.756696576 GB
  - Compute Capability: (8, 9)
Tensor on GPU: tensor([[0.7445, 0.2800, 0.9652],
        [0.5206, 0.5027, 0.5118],
        [0.6726, 0.8185, 0.4314]], device='cuda:0')


In [75]:
import os
from dotenv import load_dotenv


# Print the current working directory (optional for debugging)
print(os.getcwd())

# Set the path to your .env file relative to the current working directory
dotenv_path = os.path.join(os.getcwd(), '../../.env')
load_dotenv(dotenv_path)


# Set up API keys
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")


/workspaces/custom_ollama_docker/notebooks/contextual_retreivel_rag/sports_news_rag


In [76]:
%%writefile ../../../src/sports_news_rag/modules/data_crawling.py
from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema import Document
import concurrent.futures  # For parallel processing
import os

# Define the home directory for the project
HOME_DIR = "/workspaces/custom_ollama_docker"

def crawl_and_ingest(url, debug=False):
    """
    Crawls a given URL, splits the document, generates propositions, 
    runs quality checks, and returns processed documents.
    """
    if debug:
        print(f"Crawling data from: {url}")

    # Load documents from the web URL
    loader = WebBaseLoader(url)
    docs = loader.load()

    if debug:
        print(f"Loaded {len(docs)} documents from {url}")
        print(f"Document types loaded: {[type(doc) for doc in docs]}")

    # Split the documents into smaller chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    document_chunks = text_splitter.split_documents(docs)

    if debug:
        print(f"Number of document chunks generated: {len(document_chunks)}")

    # Process each chunk to generate high-quality propositions
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        futures = [executor.submit(process_chunk, chunk, debug) for chunk in document_chunks]
        processed_documents = [future.result() for future in concurrent.futures.as_completed(futures)]

    # Flatten the processed_documents to remove any nested list structures
    proposition_documents = [doc for sublist in processed_documents for doc in (sublist if isinstance(sublist, list) else [sublist])]

    if debug:
        print(f"Total number of processed documents after flattening: {len(proposition_documents)}")
        print(f"Types of processed documents: {[type(doc) for doc in proposition_documents]}")

    return proposition_documents


def process_chunk(chunk, debug=False):
    """
    Generates and quality checks propositions for a given chunk.
    """
    propositions = generate_propositions(chunk.page_content, debug)
    high_quality_propositions = quality_check_propositions(propositions, debug)
    return [Document(page_content=prop) for prop in high_quality_propositions]

def generate_propositions(text, debug=False):
    """
    Generates propositions from the given text using an LLM.
    """
    llm = ChatOllama(model="llama3.2", temperature=0)
    max_length = 2000
    text = text[:max_length] if len(text) > max_length else text

    proposition_prompt = (
        f"Break down the following text into concise, complete, and meaningful factual statements:\n\n{text}\n\n"
        "Provide each proposition as a separate statement."
    )
    response = llm.invoke([{"role": "user", "content": proposition_prompt}]).content

    propositions = [prop.strip() for prop in response.split('\n') if prop.strip()]

    if debug:
        print(f"Generated propositions: {propositions}")

    return propositions

def quality_check_propositions(propositions, debug=False):
    """
    Checks the quality of the propositions for accuracy, clarity, completeness, and conciseness.
    """
    llm = ChatOllama(model="llama3.2", temperature=0)
    high_quality_propositions = []

    batch_size = 5
    for i in range(0, len(propositions), batch_size):
        batch = propositions[i:i + batch_size]
        quality_prompt = (
            f"Evaluate the following propositions for accuracy, clarity, completeness, and conciseness. "
            f"Score each aspect from 1 to 10 and provide an overall assessment. Reply with 'pass' if the proposition is acceptable:\n\n"
            f"{', '.join(batch)}"
        )
        response = llm.invoke([{"role": "user", "content": quality_prompt}]).content

        results = response.lower().split('\n')

        if debug:
            print(f"Batch being processed: {batch}")
            print(f"LLM Response: {response}")
            print(f"Number of results received: {len(results)}, Number of propositions in batch: {len(batch)}")

        min_length = min(len(results), len(batch))
        for j in range(min_length):
            if 'pass' in results[j]:
                high_quality_propositions.append(batch[j])

    return high_quality_propositions



def main(debug=False):
    # Sample sites for testing
    sports_sites = ["https://www.nba.com/", "https://www.espn.com/"]
    all_documents = []
    for site in sports_sites:
        documents = crawl_and_ingest(site, debug)
        all_documents.extend(documents)
    if debug:
        print(f"Total documents ingested: {len(all_documents)}")

if __name__ == "__main__":
    main(debug=True)


Overwriting ../../../src/sports_news_rag/modules/data_crawling.py


In [77]:
%%writefile ../../../src/sports_news_rag/modules/vector_store.py
from langchain_community.vectorstores import Chroma
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain.schema import Document  # Import the Document class
import os

# Define the home directory for the project
HOME_DIR = "/workspaces/custom_ollama_docker"
DATA_DIR = os.path.join(HOME_DIR, "data", "vectorstores")

def create_vectorstore(documents, site_name="nba", debug=False):
    # Specify embedding function
    embeddings = OllamaEmbeddings(model="llama3.2")
    persist_directory = os.path.join(DATA_DIR, site_name)
    os.makedirs(persist_directory, exist_ok=True)
    if debug:
        print(f"Creating vector store with {len(documents)} documents at {persist_directory}...")

    # Create Chroma vector store with embeddings
    vectorstore = Chroma.from_documents(
        documents=documents,
        embedding=embeddings,
        persist_directory=persist_directory
    )
    return vectorstore

def create_pre_ingested_vectorstore(site_name, documents):
    directory = f"../../../data/vectorstores/{site_name.lower()}"
    os.makedirs(directory, exist_ok=True)

    # Initialize the embedding function
    embeddings = OllamaEmbeddings(model="llama3.2")
    vectorstore = Chroma.from_documents(documents, embedding=embeddings, persist_directory=directory)
    print(f"Vector store for {site_name} created and saved at {directory}")

def main(debug=False):
    # Use a list of high-quality Document objects instead of dictionaries
    sample_docs = [Document(page_content="This is a high-quality sample document for testing.")]
    vectorstore = create_vectorstore(sample_docs, debug=debug)
    if debug:
        print("Vector store successfully created.")
        
    # Example usage:
    site_name = "ESPN"
    documents = [Document(page_content="This is a sample document for NFL data.")]
    create_pre_ingested_vectorstore(site_name, documents)

if __name__ == "__main__":
    main(debug=True)


Overwriting ../../../src/sports_news_rag/modules/vector_store.py


In [78]:
%%writefile ../../../src/sports_news_rag/modules/contextual_retrieval.py

import copy
from langchain_community.vectorstores import Chroma
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain.schema import Document  # Import the Document class

def create_contextual_nodes(documents, debug=False):
    """
    Creates contextual nodes by enriching each document with additional context.
    
    Parameters:
    - documents (List[Document]): List of LangChain Document objects.
    - debug (bool): Flag for printing debug information.
    
    Returns:
    - List[Document]: List of contextually enriched Document objects.
    """
    # Initialize the LLM
    llm = ChatOllama(model="llama3.2", temperature=0)
    
    contextual_documents = []
    for doc in documents:
        # Generate contextual information using LLM
        context_prompt = (
            f"Given the following document, generate contextual information that would help better understand its content:\n\n{doc.page_content}\n\n"
            "Contextual information:"
        )
        context = llm.invoke([{"role": "user", "content": context_prompt}]).content
        
        # Append the context to the document's metadata
        enriched_doc = copy.deepcopy(doc)
        enriched_doc.metadata["context"] = context
        contextual_documents.append(enriched_doc)
        
        if debug:
            print(f"Generated context for document: {context}")

    return contextual_documents

def create_embedding_retriever(documents, persist_directory='../../data/chroma_dbs', debug=False):
    """
    Creates a Chroma vector store retriever using contextual nodes.
    
    Parameters:
    - documents (List[Document]): List of contextually enriched Document objects.
    - persist_directory (str): Directory to persist the Chroma database.
    - debug (bool): Flag for printing debug information.
    
    Returns:
    - Chroma: Chroma vector store retriever object.
    """
    # Create embeddings with Ollama
    embeddings = OllamaEmbeddings(model="llama3.2")
    
    # Create the Chroma vector store
    if debug:
        print(f"Creating vector store with {len(documents)} contextually enriched documents...")
        
    vectorstore = Chroma.from_documents(
        documents=documents,
        embedding=embeddings,
        persist_directory=persist_directory
    )
    
    if debug:
        print(f"Vector store created at {persist_directory}")
    
    return vectorstore

def main(debug=True):
    """
    Main function to test the contextual retrieval pipeline.
    """
    # Sample documents for testing
    sample_docs = [Document(page_content="The Boston Celtics won the NBA Finals in 2023.")]
    
    # Create contextual nodes
    contextual_docs = create_contextual_nodes(sample_docs, debug=debug)
    
    # Create and test the vector store
    vectorstore = create_embedding_retriever(contextual_docs, debug=debug)
    
    # Output a message indicating successful creation of contextual retriever
    if debug:
        print(f"Successfully created contextual retriever with {len(contextual_docs)} contextually enriched documents.")

if __name__ == "__main__":
    main(debug=True)


Overwriting ../../../src/sports_news_rag/modules/contextual_retrieval.py


In [79]:
%%writefile ../../../src/sports_news_rag/modules/hyde_rag.py

from langchain_ollama import OllamaEmbeddings, ChatOllama
from modules.contextual_retrieval import create_contextual_nodes, create_embedding_retriever
from langchain.schema import Document

def contextual_retrieval(question, retriever, debug=False):
    """
    Performs contextual retrieval based on a given question and contextually enriched documents.
    
    Parameters:
    - question (str): The query or question to retrieve documents for.
    - retriever: The retriever object created from the contextual vector store.
    - debug (bool): Flag for printing debug information.
    
    Returns:
    - List[Document]: List of retrieved documents based on the contextual retriever.
    """
    # Generate a hypothetical answer to enrich the retrieval process
    llm = ChatOllama(model="llama3.2", temperature=0)
    hypo_prompt = f"Generate a detailed answer to the following question:\n\n{question}\n\nAnswer:"
    hypo_answer = llm.invoke([{"role": "user", "content": hypo_prompt}]).content

    if debug:
        print(f"Hypothetical answer generated: {hypo_answer}")

    # Retrieve documents using the contextual retriever
    retrieved_docs = retriever.invoke(hypo_answer)
    
    if debug:
        print(f"Number of documents retrieved based on hypothetical answer: {len(retrieved_docs)}")
        
    return retrieved_docs

def main(debug=False):
    """
    Main function to test the contextual retrieval.
    """
    question = "What are the recent updates in the NBA?"
    
    # Create a sample document
    sample_docs = [Document(page_content="The Boston Celtics won the NBA Finals in 2023.")]
    
    # Create contextual nodes and retriever
    contextual_docs = create_contextual_nodes(sample_docs, debug=debug)
    vectorstore = create_embedding_retriever(contextual_docs, debug=debug)
    retriever = vectorstore.as_retriever()
    
    # Test the contextual retrieval
    contextual_retrieval(question, retriever, debug)

if __name__ == "__main__":
    main(debug=True)


Overwriting ../../../src/sports_news_rag/modules/hyde_rag.py


In [80]:
%%writefile ../../../src/sports_news_rag/modules/corrective_rag.py
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain.schema import Document  # Import the Document class

def corrective_rag(question, retrieved_docs, debug=False):
    # Convert the list of dicts to Document objects if necessary
    if not all(isinstance(doc, Document) for doc in retrieved_docs):
        retrieved_docs = [Document(page_content=doc["page_content"]) for doc in retrieved_docs]

    llm = ChatOllama(model="llama3.2", temperature=0)
    context = "\n\n".join(doc.page_content for doc in retrieved_docs)

    initial_prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
    initial_answer = llm.invoke([{"role": "user", "content": initial_prompt}]).content

    if debug:
        print(f"Initial answer generated: {initial_answer}")

    max_iterations = 2
    for i in range(max_iterations):
        verify_prompt = f"Context: {context}\n\nAnswer: {initial_answer}\n\nIs the answer fully supported by the context? Identify any inaccuracies."
        verification = llm.invoke([{"role": "user", "content": verify_prompt}]).content

        if "no inaccuracies" in verification.lower():
            if debug:
                print(f"No inaccuracies found. Answer is verified on iteration {i + 1}.")
            break
        else:
            refine_prompt = f"Context: {context}\n\nThe initial answer may have inaccuracies: {verification}\n\nQuestion: {question}\n\nProvide a corrected answer:"
            initial_answer = llm.invoke([{"role": "user", "content": refine_prompt}]).content

    return initial_answer

def main(debug=False):
    # Sample usage for testing
    question = "Who won the NBA Finals in 2023?"
    # Use a list of Document objects instead of dictionaries for the retrieved documents
    retrieved_docs = [Document(page_content="The Boston Celtics won the NBA Finals in 2024.")]
    answer = corrective_rag(question, retrieved_docs, debug=debug)
    if debug:
        print(f"Final corrected answer: {answer}")

if __name__ == "__main__":
    main(debug=True)


Overwriting ../../../src/sports_news_rag/modules/corrective_rag.py


In [81]:
%%writefile ../../../src/sports_news_rag/modules/self_rag.py
from langchain_ollama import OllamaEmbeddings, ChatOllama

def self_rag(question, initial_answer, debug=False):
    """Refine an initial answer by performing self-reflection and improvements."""
    llm = ChatOllama(model="llama3.2", temperature=0)
    if debug:
        print(f"Initial answer before self-refinement: {initial_answer}")
    
    max_reflections = 2  # Number of self-reflection iterations
    for i in range(max_reflections):
        # Self-reflection step
        reflect_prompt = f"Answer: {initial_answer}\n\nReflect on the answer and identify any areas for improvement."
        reflection = llm.invoke([{"role": "user", "content": reflect_prompt}]).content

        if debug:
            print(f"Reflection result for iteration {i+1}: {reflection}")

        # If no improvements are needed, break out of the loop
        if "no improvements" in reflection.lower():
            if debug:
                print(f"No further improvements suggested after {i+1} iterations.")
            break
        else:
            # Improve the answer based on the reflection
            improve_prompt = f"Based on the reflection: {reflection}\n\nProvide an improved answer to the question: {question}"
            initial_answer = llm.invoke([{"role": "user", "content": improve_prompt}]).content

            if debug:
                print(f"Improved answer after iteration {i+1}: {initial_answer}")

    return initial_answer

def main(debug=False):
    # Sample usage for testing
    question = "What pick of the Draft was Bronny James Jr?"
    initial_answer = "Bronny James Jr. was selected 55th"
    refined_answer = self_rag(question, initial_answer, debug=debug)
    if debug:
        print(f"Final refined answer: {refined_answer}")

if __name__ == "__main__":
    main(debug=True)


Overwriting ../../../src/sports_news_rag/modules/self_rag.py


In [82]:
%%writefile ../../../src/sports_news_rag/modules/web_search.py
from langchain_community.retrievers import TavilySearchAPIRetriever

tavily_retriever = TavilySearchAPIRetriever(k=3)

def tavily_search(question, debug=False):
    docs = tavily_retriever.invoke(question)
    context = "\n\n".join(f"Source {i+1} ({doc.metadata.get('source')}):\n{doc.page_content}" for i, doc in enumerate(docs))
    if debug:
        print(f"Web search context retrieved: {context[:500]}...")  # Display first 500 chars
    return context

def main(debug=False):
    question = "Who was the first pick in the 2024 NBA Draft?"
    context = tavily_search(question, debug)
    if debug:
        print(f"Retrieved context from Tavily search: {context}")

if __name__ == "__main__":
    main(debug=True)


Overwriting ../../../src/sports_news_rag/modules/web_search.py


In [83]:
%%writefile ../../../src/sports_news_rag/modules/decision_mechanism.py
from modules.hyde_rag import contextual_retrieval
from modules.corrective_rag import corrective_rag
from modules.web_search import tavily_search
from modules.self_rag import self_rag  # Include the self_rag module for refinement
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain.schema import Document

def evaluate_confidence(answer, debug=False):
    """Evaluate the confidence of an answer using a language model."""
    llm = ChatOllama(model="llama3.2", temperature=0)
    eval_prompt = (
        f"Evaluate the confidence level (on a scale of 1-10) of the following answer being correct, "
        f"fully supported by reliable sources, and free from contradictions or inaccuracies:\n\n{answer}\n\n"
        "Confidence Score:"
    )
    confidence_score = llm.invoke([{"role": "user", "content": eval_prompt}]).content
    try:
        score = int(confidence_score.strip())
    except ValueError:
        score = 5  # Default to medium confidence if the evaluation fails
    if debug:
        print(f"Confidence score evaluated: {score}")
    return score

def decide_and_answer(question, retriever, progress_bar=None, progress_status=None, debug=False):
    """Generate answers using RAG and Tavily, and decide the best answer with self-refinement."""
    progress_step = 0.25

    # Step 1: Use contextual retrieval to get documents and generate an initial RAG-based answer
    if progress_status:
        progress_status.text("Step 1/4: Running HyDE retrieval...")
    retrieved_docs = contextual_retrieval(question, retriever, debug)
    if progress_bar:
        progress_bar.progress(progress_step)

    # Step 2: Generate a corrective RAG-based answer
    if progress_status:
        progress_status.text("Step 2/4: Generating a corrective RAG answer...")
    rag_answer = corrective_rag(question, retrieved_docs, debug)
    rag_refined_answer = self_rag(question, rag_answer, debug)  # Refine RAG answer with self-rag
    rag_confidence = evaluate_confidence(rag_refined_answer, debug)
    progress_step += 0.25
    if progress_bar:
        progress_bar.progress(progress_step)

    # Step 3: Use Tavily search to generate an answer
    if progress_status:
        progress_status.text("Step 3/4: Running Tavily search for additional context...")
    tavily_context = tavily_search(question, debug)
    tavily_prompt = f"Context: {tavily_context}\n\nQuestion: {question}\n\nAnswer:"
    llm = ChatOllama(model="llama3.2", temperature=0)
    tavily_initial_answer = llm.invoke([{"role": "user", "content": tavily_prompt}]).content
    tavily_refined_answer = self_rag(question, tavily_initial_answer, debug)  # Refine Tavily answer with self-rag
    tavily_confidence = evaluate_confidence(tavily_refined_answer, debug)
    progress_step += 0.25
    if progress_bar:
        progress_bar.progress(progress_step)

    # Step 4: Decision mechanism to choose the final answer based on confidence scores
    if progress_status:
        progress_status.text("Step 4/4: Making the final decision...")
    if rag_confidence > tavily_confidence:
        final_answer = rag_refined_answer
        source = "RAG-based response"
    elif tavily_confidence > rag_confidence:
        final_answer = tavily_refined_answer
        source = "Tavily-based response"
    else:
        # Combine answers if confidence scores are similar
        combined_prompt = (
            f"Here are two potential answers to the question:\n\n"
            f"Answer 1 (RAG-based):\n{rag_refined_answer}\n\n"
            f"Answer 2 (Tavily-based):\n{tavily_refined_answer}\n\n"
            f"Based on these, provide the best possible answer to the question: {question}"
        )
        final_answer = llm.invoke([{"role": "user", "content": combined_prompt}]).content
        source = "Combined response"

    if debug:
        print(f"Selected final answer from: {source}")
    return final_answer



import streamlit as st

def main(debug=False):
    """Main function to test the decision mechanism."""
    question = "What pick of the draft was Bronny James?"
    
    # Convert sample_docs into Document objects
    sample_docs = [Document(page_content="This is a sample document for testing.")]
    vectorstore = create_vectorstore(sample_docs, debug=debug)
    retriever = vectorstore.as_retriever()

    # Create Streamlit progress bar and status
    progress_bar = st.progress(0)  # Creates a Streamlit progress bar
    progress_status = st.empty()  # Placeholder for status messages

    # Pass these objects when calling decide_and_answer
    final_answer = decide_and_answer(question, retriever, progress_bar, progress_status, debug)
    st.write(f"Final answer selected: {final_answer}")  # Display the final answer

if __name__ == "__main__":
    main(debug=True)



Overwriting ../../../src/sports_news_rag/modules/decision_mechanism.py


In [84]:
%%writefile ../../../src/sports_news_rag/modules/fact_checker.py
from langchain_ollama import OllamaEmbeddings, ChatOllama
from modules.hyde_rag import contextual_retrieval  # Import contextual_retrieval from the hyde_rag module
from modules.web_search import tavily_search  # Import tavily_search from the web_search module
from langchain.schema import Document

def final_fact_check(question, answer, retriever, debug=False):
    """
    Perform a final fact-check of the answer based on a combined context from retrieved documents and web search results.

    Parameters:
    question (str): The question asked by the user.
    answer (str): The initial answer generated by the RAG or web search.
    retriever: The retriever object created from the vector store.
    debug (bool): If True, print debug information.

    Returns:
    str: The fact-checked and potentially corrected answer.
    """
    # Initialize the LLM for fact-checking
    llm = ChatOllama(model="llama3.2", temperature=0)

    # Retrieve documents using HyDE
    retrieved_docs = contextual_retrieval(question, retriever, debug=debug)
    context = "\n\n".join(doc.page_content for doc in retrieved_docs) if retrieved_docs else ""

    # Retrieve web context using Tavily search
    tavily_context = tavily_search(question, debug=debug)

    # Combine both contexts
    combined_context = context + "\n\n" + tavily_context

    # Debug output for context combination
    if debug:
        print(f"Combined context for fact-checking:\n{combined_context}")

    # Create the fact-checking prompt
    fact_check_prompt = (
        f"Context: {combined_context}\n\nAnswer: {answer}\n\n"
        f"Verify the accuracy of the answer based on the context. Provide a corrected answer if necessary."
    )

    # Generate the fact-checked answer using the LLM
    final_answer = llm.invoke([{"role": "user", "content": fact_check_prompt}]).content

    # Debug output for final answer
    if debug:
        print(f"Fact-checked answer: {final_answer}")

    return final_answer

def main(debug=False):
    """
    Test the final_fact_check function with sample input.
    """
    # Sample question and answer for testing
    question = "What pick of the Draft was Bronny James Jr?"
    initial_answer = "Bronny James Jr. was selected by the Golden State Warriors with the 55th pick."  # Sample incorrect answer

    # Use pre-loaded documents with Bronny James information
    sample_docs = [Document(page_content="Bronny James was selected as the 55th pick in the 2024 NBA Draft.")]
    vectorstore = create_vectorstore(sample_docs, debug=debug)
    retriever = vectorstore.as_retriever()

    # Run the final_fact_check function
    corrected_answer = final_fact_check(question, initial_answer, retriever, debug=debug)
    if debug:
        print(f"Corrected answer after final fact-check: {corrected_answer}")

if __name__ == "__main__":
    main(debug=True)


Overwriting ../../../src/sports_news_rag/modules/fact_checker.py


In [85]:
%%writefile ../../../src/sports_news_rag/main.py

from modules.data_crawling import crawl_and_ingest
from modules.vector_store import create_vectorstore
from modules.decision_mechanism import decide_and_answer
from modules.fact_checker import final_fact_check
from modules.hyde_rag import contextual_retrieval  # Use the new contextual retrieval function

def main(debug=False):
    # Define test sites, including new ones
    sports_sites = ["https://www.nba.com/", "https://www.espn.com/", "https://www.nfl.com/"]

    all_documents = []

    # Step 1: Crawl and ingest data from test sites
    for site in sports_sites:
        documents = crawl_and_ingest(site, debug)
        
        # Confirm each document type and content after ingestion
        if debug:
            print(f"Documents from {site}: {[type(doc) for doc in documents]}")
            print(f"Number of documents ingested from {site}: {len(documents)}")
            for doc in documents[:3]:  # Print first few documents as a sample
                print(f"Sample content from {site}: {doc.page_content[:500]}...")  # Show the first 500 chars for brevity
        
        all_documents.extend(documents)

    # Flatten list in case of nested lists
    all_documents = [doc for doc in all_documents if isinstance(doc, Document)]
    
    # Step 2: Create vector store from ingested documents
    if debug:
        print(f"Total documents after flattening: {len(all_documents)}")

    if all_documents:
        vectorstore = create_vectorstore(all_documents, debug=debug)
        retriever = vectorstore.as_retriever()

        # Step 3: Ask a sample question and check the answer generation
        question = "What pick of the Draft was Bronny James jr?"
        initial_answer = contextual_retrieval(question, retriever, debug)

        # Step 4: Fact-check and print the answer
        final_answer = final_fact_check(question, initial_answer, retriever, debug)
        print(f"Final answer for the question '{question}': {final_answer}")
    else:
        print("No documents were ingested, please check the crawl_and_ingest function for errors with the selected sites.")

if __name__ == "__main__":
    main(debug=True)



Overwriting ../../../src/sports_news_rag/main.py


In [86]:
%%writefile ../../../src/sports_news_rag/app.py
import streamlit as st
from modules.decision_mechanism import decide_and_answer, evaluate_confidence  # Import evaluate_confidence
from modules.vector_store import create_vectorstore
from modules.data_crawling import crawl_and_ingest
from modules.fact_checker import final_fact_check
from modules.hyde_rag import contextual_retrieval
from modules.corrective_rag import corrective_rag
from modules.self_rag import self_rag
from modules.web_search import tavily_search  # Import tavily_search
import os
from dotenv import load_dotenv
from langchain_community.vectorstores import Chroma
from langchain_ollama import OllamaEmbeddings, ChatOllama

# Set up the Streamlit app title and description (MUST be the first Streamlit command)
st.set_page_config(page_title="Advanced Sports News RAG Bot", layout="wide")

# Debug: Confirm the current working directory
current_working_dir = os.getcwd()
st.sidebar.write(f"Current Working Directory: {current_working_dir}")

# Load environment variables
dotenv_path = os.path.join(current_working_dir, '.env')
load_dotenv(dotenv_path)

# Define the missing function to generate an answer from context
def generate_answer_from_context(context, question, debug=False):
    """
    Generate an answer based on the provided context and user question.
    
    Parameters:
    - context (str): The text context retrieved from Tavily search.
    - question (str): The user's question to answer.
    - debug (bool): If True, enables debug output.
    
    Returns:
    - str: The generated answer based on the context.
    """
    llm = ChatOllama(model="llama3.2", temperature=0)  # Ensure the same LLM model is used
    prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
    response = llm.invoke([{"role": "user", "content": prompt}]).content

    if debug:
        print(f"Generated answer from context: {response}")

    return response


# Confirm environment path and any loaded variables
if os.path.exists(dotenv_path):
    st.sidebar.write(f".env file found at: {dotenv_path}")
else:
    st.sidebar.write(f".env file not found at: {dotenv_path}")

st.title("Advanced Sports News RAG Bot")
st.write("Get the most up-to-date sports news using advanced RAG techniques. This bot combines information from various sources and fact-checks responses for reliability.")

# Adding the introduction tab
tabs = st.tabs(["Introduction", "Ask a Question"])

# Introduction tab content
with tabs[0]:
    st.header("Approaches Used in Advanced Versatile RAG Bot")
    st.write("""
    This project leverages a variety of Retrieval-Augmented Generation (RAG) strategies to create an interactive assistant capable of providing reliable, up-to-date information for any type of website, though it has been initially applied to sports news. Below, we detail the approaches utilized, how they contribute to the quality of answers, and the innovative combination of different RAG methodologies.
    """)

    st.subheader("Simple RAG")
    st.write("""
    Simple RAG forms the foundation of our bot by retrieving documents based on a user query and generating grounded answers with a large language model (LLM). It minimizes hallucination issues by anchoring the generated responses to relevant sources.
    """)

    st.subheader("Branched RAG")
    st.write("""
    Our Branched RAG approach performs multiple retrieval layers, refining searches based on intermediate results. This iterative process enhances answer specificity and is especially valuable for complex or multi-layered queries.
    """)

    st.subheader("Contextual Retrieval")
    st.write("""
    Contextual Retrieval replaces hypothetical document generation (HyDE) by enriching each document with additional context, making retrieval results more aligned with nuanced queries. By adding contextual nodes to documents, the bot achieves higher precision and recall in retrieving relevant data.
    """)

    st.subheader("Corrective RAG (CRAG)")
    st.write("""
    Corrective RAG (CRAG) iteratively checks and refines responses by comparing them to the context from retrieved documents. This ensures answers are accurate and well-supported, enhancing reliability and factual correctness.
    """)

    st.subheader("Self-RAG")
    st.write("""
    Self-RAG adds a layer of self-reflection where the model re-evaluates its initial answer to make improvements in clarity, conciseness, and accuracy. This self-assessment strengthens response quality by ensuring coherence and completeness.
    """)

    st.subheader("Agentic RAG")
    st.write("""
    Agentic RAG orchestrates multi-step queries, allowing the bot to act as an autonomous agent. By combining retrieval, verification, and synthesis processes, Agentic RAG enables intelligent navigation of information sources, crafting answers that involve interconnected insights.
    """)

    st.subheader("Tavily Web Search")
    st.write("""
    Tavily Web Search complements RAG by dynamically searching the web for up-to-date information, especially when pre-ingested documents do not fully address a query. This integration ensures the bot’s responses reflect the latest information available.
    """)

    st.subheader("Final Fact-Check")
    st.write("""
    As a final step, the bot performs a comprehensive fact-check using combined contexts from both pre-ingested documents and Tavily search results. This step verifies that the answer provided aligns with reliable, current information, enhancing trustworthiness.
    """)

    st.header("How These Approaches Work Together")
    st.write("""
    By integrating these RAG methods, our bot achieves high accuracy, adaptability, and domain versatility. Here’s how they work in tandem:

    - **Initial Retrieval**: Simple RAG retrieves documents relevant to the query.
    - **Refinement**: Branched RAG and Contextual Retrieval enhance document selection, providing more precise data.
    - **Verification**: Corrective RAG verifies factual accuracy, and Self-RAG refines answer quality.
    - **Dynamic Updates**: Tavily Search supplements with current web-based information when needed.
    - **Multi-step Processing**: Agentic RAG manages complex queries requiring multiple information sources.
    - **Final Fact-Check**: Ensures responses are reliable and up-to-date, combining all contexts effectively.

    These methods together create a robust, adaptive assistant capable of providing clear, reliable answers to dynamic questions.
    """)

    st.header("The Value of Combined RAG Approaches")
    st.write("""
    By integrating these techniques, our system is capable of:

    - **High accuracy**: Corrective checks ensure factual answers.
    - **Adaptability**: Contextual enhancement bridges knowledge gaps.
    - **Depth in retrieval**: Branched and Agentic RAGs enable nuanced understanding.
    - **Domain versatility**: Capable of handling various domains beyond sports.
    - **Real-time information**: Tavily Search provides the latest web updates.
    - **Context retention**: Maintains relevant context across user interactions for a more interactive experience.

    These combined approaches make the Versatile RAG Bot capable of not only providing reliable answers but also refining and adapting outputs intelligently across a range of queries.
    """)



# Ask a Question tab content
with tabs[1]:
    # Sidebar configuration options
    st.sidebar.title("Configuration")
    enable_debug = st.sidebar.checkbox("Enable Debugging", value=False)
    include_fact_check = st.sidebar.checkbox("Include Final Fact-Check", value=True)
    use_pre_ingested_data = st.sidebar.checkbox("Use Pre-Ingested Data", value=True)

    # Dynamic Source Selection for Fact-Checking
    fact_check_sources = st.sidebar.text_input(
        "Enter Custom URLs for Fact-Checking (comma-separated)", 
        "https://www.nba.com, https://www.espn.com"
    )

    # Ensure the session state attributes are initialized
    if "vectorstore" not in st.session_state:
        st.session_state.vectorstore = None
    if "retriever" not in st.session_state:
        st.session_state.retriever = None
    if "all_documents" not in st.session_state:
        st.session_state.all_documents = []

    # Load or ingest data
    if use_pre_ingested_data:
        st.sidebar.subheader("Pre-Ingested Data Loading")
        known_sites = ["NBA", "ESPN", "NFL"]
        selected_site = st.sidebar.selectbox("Select Pre-Ingested Site:", known_sites)
        
        if st.sidebar.button("Load Pre-Ingested Data"):
            with st.spinner(f"Loading pre-ingested data for {selected_site}..."):
                pre_ingested_vectorstore_path = os.path.join(current_working_dir, f"data/vectorstores/{selected_site.lower()}")

                # Ensure the embedding function is specified
                embeddings = OllamaEmbeddings(model="llama3.2")

                if os.path.exists(pre_ingested_vectorstore_path):
                    try:
                        # Include embedding function in Chroma initialization
                        st.session_state.vectorstore = Chroma(
                            persist_directory=pre_ingested_vectorstore_path,
                            embedding_function=embeddings
                        )
                        st.session_state.retriever = st.session_state.vectorstore.as_retriever()
                        st.sidebar.success(f"Loaded pre-ingested data for {selected_site}.")
                    except Exception as e:
                        st.sidebar.error(f"Error loading pre-ingested data for {selected_site}: {str(e)}")
                else:
                    st.sidebar.error(f"Pre-ingested data for {selected_site} not found at path: {pre_ingested_vectorstore_path}")
                    
    else:
        # Allow the user to input custom URLs
        custom_sports_sites = st.sidebar.text_input(
            "Enter custom URLs for crawling data (comma-separated)",
            "https://www.nba.com, https://www.espn.com"
        ).split(",")

        if st.sidebar.button("Ingest Data"):
            with st.spinner("Crawling and ingesting data..."):
                st.session_state.all_documents = []
                for site in custom_sports_sites:
                    site = site.strip()
                    if site:
                        documents = crawl_and_ingest(site, debug=enable_debug)
                        st.session_state.all_documents.extend(documents)
                st.sidebar.success(f"Data ingested from {len(custom_sports_sites)} sites.")

        if st.sidebar.button("Create Vector Store"):
            with st.spinner("Creating vector store from dynamically ingested data..."):
                if st.session_state.all_documents:
                    st.session_state.vectorstore = create_vectorstore(st.session_state.all_documents, debug=enable_debug)
                    st.session_state.retriever = st.session_state.vectorstore.as_retriever()
                    st.sidebar.success("Vector store created and retriever set up.")
                else:
                    st.sidebar.error("No documents available. Please ingest data first.")

    # User question input
    st.subheader("Ask a Sports-Related Question")
    user_question = st.text_input("Enter your question about sports news or events:", "What pick of the Draft was Bronny James Jr?")

    if st.button("Get Answer"):
        if not st.session_state.retriever:
            st.error("Please load or create the vector store first.")
        elif user_question:
            # Initialize progress bar
            progress_bar = st.progress(0)
            progress_status = st.empty()

            with st.spinner("Starting RAG process..."):
                # Step 1: Initial Retrieval and Hypothetical Document Generation
                progress_status.text("Step 1: Performing Initial Contextual Retrieval...")
                retrieved_docs = contextual_retrieval(user_question, st.session_state.retriever, debug=enable_debug)
                progress_bar.progress(0.25)
                
                if enable_debug:
                    st.write(f"Contextual Retrieval Output: {retrieved_docs[:2]}")  # Display first 2 for brevity

                # Step 2: Corrective RAG
                progress_status.text("Step 2: Generating Corrective RAG Answer...")
                rag_answer = corrective_rag(user_question, retrieved_docs, debug=enable_debug)
                progress_bar.progress(0.5)

                # Step 3: Self-Refinement on RAG Answer
                progress_status.text("Step 3: Refining RAG Answer with Self-RAG...")
                rag_refined_answer = self_rag(user_question, rag_answer, debug=enable_debug)
                rag_confidence = evaluate_confidence(rag_refined_answer, debug=enable_debug)
                progress_bar.progress(0.6)
                
                if enable_debug:
                    st.write(f"Refined RAG Answer: {rag_refined_answer}")
                    st.write(f"RAG Confidence Score: {rag_confidence}")

                # Step 4: External Tavily Search
                progress_status.text("Step 4: Performing Tavily Web Search...")
                tavily_context = tavily_search(user_question, debug=enable_debug)
                tavily_answer = generate_answer_from_context(tavily_context, user_question)
                progress_bar.progress(0.8)

                # Self-RAG on Tavily answer
                progress_status.text("Refining Tavily Answer with Self-RAG...")
                tavily_refined_answer = self_rag(user_question, tavily_answer, debug=enable_debug)
                tavily_confidence = evaluate_confidence(tavily_refined_answer, debug=enable_debug)
                progress_bar.progress(0.9)
                
                if enable_debug:
                    st.write(f"Tavily Answer: {tavily_refined_answer}")
                    st.write(f"Tavily Confidence Score: {tavily_confidence}")

                # Step 5: Decision Mechanism
                progress_status.text("Step 5: Making Final Decision...")
                if rag_confidence > tavily_confidence:
                    final_answer = rag_refined_answer
                    source = "RAG-based response"
                elif tavily_confidence > rag_confidence:
                    final_answer = tavily_refined_answer
                    source = "Tavily-based response"
                else:
                    # Combine answers if confidence scores are similar
                    combined_prompt = (
                        f"Here are two potential answers to the question:\n\n"
                        f"Answer 1 (RAG-based):\n{rag_refined_answer}\n\n"
                        f"Answer 2 (Tavily-based):\n{tavily_refined_answer}\n\n"
                        f"Based on these, provide the best possible answer to the question: {user_question}"
                    )
                    llm = ChatOllama(model="llama3.2", temperature=0)
                    final_answer = llm.invoke([{"role": "user", "content": combined_prompt}]).content
                    source = "Combined response"
                progress_bar.progress(1.0)
                
                if enable_debug:
                    st.write(f"Final Answer Selected from {source}: {final_answer}")

                # Optional fact-check with custom sources (sources parameter removed here)
                if include_fact_check:
                    progress_status.text("Performing final fact-check...")
                    final_answer = final_fact_check(user_question, final_answer, st.session_state.retriever, debug=enable_debug)
                    progress_bar.progress(1.0)

                # Display final answer
                st.subheader("Answer")
                st.write(final_answer)
        else:
            st.error("Please enter a question.")


Overwriting ../../../src/sports_news_rag/app.py
