RAG, RAG with Memory, Adaptive RAG, Corrective RAG, self-RAG, Agentive RAG... are you lost? Let me help you with this guide.

1/ Simple RAG
Retrieves relevant documents based on the query and uses them to generate an answer.

2/ Simple RAG with Memory
Extends Simple RAG by maintaining context from previous interactions.

3/ Branched RAG
Performs multiple retrieval steps, refining the search based on intermediate results.

4/ HyDE (Hypothetical Document Embedding)
Generates a hypothetical ideal document before retrieval to improve search relevance.

5/ Adaptive RAG
Dynamically adjusts retrieval and generation strategies based on the query type or difficulty.

6/ Corrective RAG (CRAG)
Iteratively refines generated responses by fact-checking against retrieved information.

7/ Self-RAG
The model critiques and improves its own responses using self-reflection and retrieval.

8/ Agentic RAG
Combines RAG with agentic behavior, allowing for more complex, multi-step problem-solving.


https://python.langchain.com/v0.1/docs/get_started/quickstart/

langchain quick start ^


https://python.langchain.com/docs/integrations/providers/ollama/

Ollama integrations ^

Tool calling:
https://ollama.com/blog/tool-support
https://python.langchain.com/docs/how_to/tool_calling/


- Easy example:
https://github.com/Shubhamsaboo/awesome-llm-apps/blob/main/llama3.1_local_rag/llama3.1_local_rag.py

In [1]:
import torch

# Print the PyTorch version
print(f"PyTorch version: {torch.__version__}")

# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    print("CUDA is available! GPU is ready to be used.")
    print(f"Number of GPUs available: {torch.cuda.device_count()}")
    print(f"Current GPU: {torch.cuda.get_device_name(torch.cuda.current_device())}")
else:
    print("CUDA is not available. GPU is not set up correctly.")

# Print additional GPU details
if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
        print(f"  - Total Memory: {torch.cuda.get_device_properties(i).total_memory / 1e9} GB")
        print(f"  - Compute Capability: {torch.cuda.get_device_capability(i)}")

if torch.cuda.is_available():
    # Create a random tensor and move it to the GPU
    tensor = torch.rand(3, 3).cuda()
    print("Tensor on GPU:", tensor)
else:
    print("GPU is not available, cannot move tensor to GPU.")


PyTorch version: 2.5.0+cu124
CUDA is available! GPU is ready to be used.
Number of GPUs available: 1
Current GPU: NVIDIA GeForce RTX 4090
GPU 0: NVIDIA GeForce RTX 4090
  - Total Memory: 25.756696576 GB
  - Compute Capability: (8, 9)
Tensor on GPU: tensor([[0.3241, 0.3342, 0.9298],
        [0.4177, 0.4081, 0.3960],
        [0.2993, 0.5588, 0.3605]], device='cuda:0')


In [2]:
import os
from dotenv import load_dotenv


# Print the current working directory (optional for debugging)
print(os.getcwd())

# Set the path to your .env file relative to the current working directory
dotenv_path = os.path.join(os.getcwd(), '../../.env')
load_dotenv(dotenv_path)


# Set up API keys
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")


/workspaces/custom_ollama_docker/notebooks/contextual_retreivel_rag/local_optimizer_rag


In [3]:
%%writefile ../../../src/git_repo_model/modules/data_crawling.py

from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain.schema import Document
import os
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import NotebookLoader
import concurrent.futures
import pandas as pd

def crawl_and_ingest(directory_path, file_types=None, debug=False):
    """
    Crawls the specified directory for files with given extensions,
    processes the contents, and returns documents ready for RAG ingestion.

    Parameters:
    - directory_path: Path to the directory to crawl.
    - file_types: List of file extensions to include (e.g., [".py", ".md", ".csv"]). 
                  Default includes various code and data formats.
    - debug: Boolean flag to print debug information.
    """
    # Expanded default file types to include more programming languages and data formats
    if file_types is None:
        file_types = [".py", ".ipynb", ".txt", ".md", ".csv", ".js", ".html", 
                      ".css", ".json", ".yaml", ".yml", ".xml", ".r", ".cpp", 
                      ".java", ".scala", ".sql"]

    if debug:
        print(f"Starting to load files from directory: {directory_path} with file types: {file_types}")

    docs = []

    # Traverse the directory for specified file types
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            file_path = os.path.join(root, file)

            if any(file.endswith(ext) for ext in file_types):
                if debug:
                    print(f"Found file: {file_path}")

                if file.endswith(".csv"):  # CSV files
                    try:
                        df = pd.read_csv(file_path, on_bad_lines = 'skip')
                        if df.empty:
                            if debug:
                                print(f"CSV file is empty: {file_path}")
                            continue
                        content = df.to_string()  # Convert to string for ingestion
                        docs.append(Document(page_content=content, metadata={"file_name": file_path}))
                    except pd.errors.EmptyDataError:
                        if debug:
                            print(f"Empty CSV file skipped: {file_path}")

                elif file.endswith(".ipynb"):  # Jupyter Notebooks
                    try:
                        loader = NotebookLoader(file_path, include_outputs=False, max_output_length=0)
                        notebook_docs = loader.load()
                        docs.extend(notebook_docs)
                    except Exception as e:
                        if debug:
                            print(f"Error reading {file_path}: {e}")

                else:  # Text and other code files
                    with open(file_path, 'r', encoding='utf-8') as f:
                        content = f.read()
                    docs.append(Document(page_content=content, metadata={"file_name": file_path}))

    if debug:
        print(f"Total documents loaded: {len(docs)}")
    return docs


def process_chunk(chunk, debug=False):
    """
    Generates and quality checks propositions for a given chunk.
    """
    propositions = generate_propositions(chunk.page_content, debug)
    high_quality_propositions = quality_check_propositions(propositions, debug)
    return [Document(page_content=prop) for prop in high_quality_propositions]

def generate_propositions(text, debug=False):
    """
    Generates propositions from the given text using an LLM.
    """
    llm = ChatOllama(model="llama3.2", temperature=0)
    max_length = 2000
    text = text[:max_length] if len(text) > max_length else text

    proposition_prompt = (
        f"Break down the following text into concise, complete, and meaningful factual statements:\n\n{text}\n\n"
        "Provide each proposition as a separate statement."
    )
    response = llm.invoke([{"role": "user", "content": proposition_prompt}]).content

    propositions = [prop.strip() for prop in response.split('\n') if prop.strip()]

    if debug:
        print(f"Generated propositions: {propositions[:5]}...")  # Print first 5 propositions for brevity

    return propositions

def quality_check_propositions(propositions, debug=False):
    """
    Checks the quality of the propositions for accuracy, clarity, completeness, and conciseness.
    """
    llm = ChatOllama(model="llama3.2", temperature=0)
    high_quality_propositions = []

    batch_size = 5
    for i in range(0, len(propositions), batch_size):
        batch = propositions[i:i + batch_size]
        quality_prompt = (
            f"Evaluate the following propositions for accuracy, clarity, completeness, and conciseness. "
            f"Score each aspect from 1 to 10 and provide an overall assessment. Reply with 'pass' if the proposition is acceptable:\n\n"
            f"{', '.join(batch)}"
        )
        response = llm.invoke([{"role": "user", "content": quality_prompt}]).content

        results = response.lower().split('\n')

        if debug:
            print(f"Batch being processed: {batch}")
            print(f"LLM Response: {response}")
            print(f"Number of results received: {len(results)}, Number of propositions in batch: {len(batch)}")

        min_length = min(len(results), len(batch))
        for j in range(min_length):
            if 'pass' in results[j]:
                high_quality_propositions.append(batch[j])

    return high_quality_propositions


def main(debug=False):
    # Specify the local repo path and file types to include
    directory_path = "../../../"
    # Expanded file types to include various code and documentation formats
    file_types = [".py", ".md", ".csv", ".ipynb", ".html", ".json", ".yaml", ".r", ".cpp", ".java", ".scala", ".sql"]
    documents = crawl_and_ingest(directory_path, file_types, debug)
    if debug:
        print(f"Total documents processed for ingestion: {len(documents)}")


if __name__ == "__main__":
    main(debug=True)


Writing ../../../src/git_repo_model/modules/data_crawling.py


In [4]:
%%writefile ../../../src/git_repo_model/modules/git_data_crawling.py

import requests
from langchain.schema import Document

def extract_repo_info(repo_url):
    """
    Extracts the username and repository name from a GitHub URL.
    """
    parts = repo_url.rstrip('/').split('/')
    if len(parts) < 5:
        raise ValueError("Invalid GitHub repository URL. Must be in the format: https://github.com/username/repo")
    return parts[-2], parts[-1]

def fetch_repo_tree(username, repo_name, debug=False):
    """
    Fetches the main branch file tree from a GitHub repository.
    """
    api_url = f"https://api.github.com/repos/{username}/{repo_name}/git/trees/main?recursive=1"
    headers = {"Accept": "application/vnd.github.v3+json"}
    
    response = requests.get(api_url, headers=headers)
    response.raise_for_status()
    tree = response.json().get("tree", [])
    
    if debug:
        print(f"Fetched {len(tree)} items from GitHub API for {repo_name}")
    
    return tree

def load_file_content(username, repo_name, file_info, debug=False):
    """
    Loads the content of a single file from a GitHub repository.
    """
    file_url = f"https://raw.githubusercontent.com/{username}/{repo_name}/main/{file_info['path']}"
    file_content = requests.get(file_url).text
    
    if debug:
        print(f"Loaded file: {file_info['path']}")
    
    return Document(page_content=file_content, metadata={"file_name": file_info["path"]})

def load_github_repo(repo_url, file_types=None, debug=False):
    """
    Load the main branch files from a GitHub repository and return them as documents.
    
    Parameters:
    - repo_url: URL of the GitHub repository.
    - file_types: List of file extensions to include (e.g., [".py", ".md", ".csv"]). 
                  Defaults to a broad selection of code and data formats.
    - debug: Boolean flag to print debug information.
    """
    username, repo_name = extract_repo_info(repo_url)
    tree = fetch_repo_tree(username, repo_name, debug)
    
    # Default file types if none are provided
    if file_types is None:
        file_types = [
            ".py", ".ipynb", ".txt", ".md", ".csv", ".js", ".html", 
            ".css", ".json", ".yaml", ".yml", ".xml", ".r", ".cpp", 
            ".java", ".scala", ".sql"
        ]
    
    documents = []
    for file_info in tree:
        if file_info["type"] == "blob" and any(file_info["path"].endswith(ext) for ext in file_types):
            document = load_file_content(username, repo_name, file_info, debug)
            documents.append(document)
    
    if debug:
        print(f"Total files loaded from GitHub repository '{repo_name}': {len(documents)}")
    
    return documents

def main():
    # Test URL - Replace this with any public GitHub repository URL
    test_repo_url = "https://github.com/ghadfield32/coach_analysis"  # Example URL
    
    # Enable debug to view process details
    debug = True
    
    # Specify file types to include, if desired
    file_types = [".py", ".md", ".csv", ".ipynb", ".html", ".json", ".yaml", ".r", ".cpp", ".java", ".scala", ".sql"]
    
    try:
        documents = load_github_repo(test_repo_url, file_types, debug)
        print("\nLoaded documents:")
        for doc in documents:
            print(f"File: {doc.metadata['file_name']} - Content preview: {doc.page_content[:100]}...")  # Show first 100 characters of each document
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    main()


Writing ../../../src/git_repo_model/modules/git_data_crawling.py


In [5]:
%%writefile ../../../src/git_repo_model/modules/vector_store.py
from langchain_community.vectorstores import Chroma
from langchain_ollama import OllamaEmbeddings
from langchain.schema import Document
import os 

def create_vectorstore(documents, persist_directory='../../../data/chroma_dbs', debug=False):
    # Ensure the persistence directory exists
    if not os.path.exists(persist_directory):
        os.makedirs(persist_directory)
        if debug:
            print(f"Created new persistence directory at {persist_directory}")

    embeddings = OllamaEmbeddings(model="llama3.2")
    vectorstore = Chroma.from_documents(
        documents=documents,
        embedding=embeddings,
        persist_directory=persist_directory
    )
    if debug:
        print(f"Vector store created at {persist_directory} with {len(documents)} documents.")
    return vectorstore


def create_pre_ingested_vectorstore(site_name, documents):
    # Create directory if it doesn't exist
    directory = f"../../data/vectorstores/{site_name.lower()}"
    os.makedirs(directory, exist_ok=True)
    
    # Create the vector store
    embeddings = OllamaEmbeddings(model="llama3.2")
    vectorstore = Chroma.from_documents(documents, embedding=embeddings, persist_directory=directory)
    print(f"Vector store for {site_name} created and saved at {directory}")

def main(debug=False):
    # Use a list of high-quality Document objects instead of dictionaries
    sample_docs = [Document(page_content="This is a high-quality sample document for testing.")]
    vectorstore = create_vectorstore(sample_docs, debug=debug)
    if debug:
        print("Vector store successfully created.")
        
    # Example usage:
    site_name = "local_repo_files"
    documents = [Document(page_content="This is a sample document for NFL data.")]
    create_pre_ingested_vectorstore(site_name, documents)

if __name__ == "__main__":
    main(debug=True)


Writing ../../../src/git_repo_model/modules/vector_store.py


In [6]:
%%writefile ../../../src/git_repo_model/modules/contextual_retrieval.py

import copy
from langchain_community.vectorstores import Chroma
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain.schema import Document

def create_contextual_nodes(documents, debug=False):
    llm = ChatOllama(model="llama3.2", temperature=0)
    contextual_documents = []

    for doc in documents:
        context_prompt = (
            f"Generate contextual information for better understanding:\n\n{doc.page_content}\n\n"
            "Context:"
        )
        context = llm.invoke([{"role": "user", "content": context_prompt}]).content
        enriched_doc = copy.deepcopy(doc)
        enriched_doc.metadata["context"] = context
        contextual_documents.append(enriched_doc)

        if debug:
            print(f"Generated context for document '{doc.metadata.get('file_name', 'unknown')}'")

    return contextual_documents

def create_embedding_retriever(documents, persist_directory='../../../data/chroma_dbs', debug=False):
    """
    Creates a Chroma vector store retriever using contextual nodes.
    
    Parameters:
    - documents (List[Document]): List of contextually enriched Document objects.
    - persist_directory (str): Directory to persist the Chroma database.
    - debug (bool): Flag for printing debug information.
    
    Returns:
    - Chroma: Chroma vector store retriever object.
    """
    # Create embeddings with Ollama
    embeddings = OllamaEmbeddings(model="llama3.2")
    
    # Create the Chroma vector store
    if debug:
        print(f"Creating vector store with {len(documents)} contextually enriched documents...")
        
    vectorstore = Chroma.from_documents(
        documents=documents,
        embedding=embeddings,
        persist_directory=persist_directory
    )
    
    if debug:
        print(f"Vector store created at {persist_directory}")
    
    return vectorstore

def main(debug=True):
    # Sample documents representing local files
    sample_docs = [
        Document(page_content="This document contains information about file structure optimization.", metadata={"file_name": "file_structure_optimization.txt"}),
        Document(page_content="Guide on improving data storage efficiency.", metadata={"file_name": "data_storage_efficiency.txt"})
    ]
    
    # Create contextual nodes
    contextual_docs = create_contextual_nodes(sample_docs, debug=debug)
    
    # Create and test the vector store
    vectorstore = create_embedding_retriever(contextual_docs, debug=debug)
    
    if debug:
        print(f"Successfully created contextual retriever with {len(contextual_docs)} contextually enriched documents.")

if __name__ == "__main__":
    main(debug=True)


Writing ../../../src/git_repo_model/modules/contextual_retrieval.py


In [7]:
%%writefile ../../../src/git_repo_model/modules/hyde_rag.py

from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain.schema import Document

def contextual_retrieval(question, retriever, debug=False):
    llm = ChatOllama(model="llama3.2", temperature=0)
    hypo_prompt = f"Answer the question with background knowledge:\n\n{question}\n\nAnswer:"
    hypo_answer = llm.invoke([{"role": "user", "content": hypo_prompt}]).content

    if debug:
        print(f"Hypothetical answer generated: {hypo_answer}")

    retrieved_docs = retriever.invoke(hypo_answer)
    
    if debug:
        print(f"Number of documents retrieved: {len(retrieved_docs)}")
        
    return retrieved_docs

def main(debug=False):
    question = "What are the best practices for optimizing local file storage?"
    
    # Example documents on file storage and optimization
    sample_docs = [
        Document(page_content="Methods to optimize file storage efficiency.", metadata={"file_name": "file_optimization_guide.txt"})
    ]
    
    # Create contextual nodes and retriever
    contextual_docs = create_contextual_nodes(sample_docs, debug=debug)
    vectorstore = create_embedding_retriever(contextual_docs, debug=debug)
    retriever = vectorstore.as_retriever()
    
    # Test the contextual retrieval
    contextual_retrieval(question, retriever, debug)

if __name__ == "__main__":
    main(debug=True)



Writing ../../../src/git_repo_model/modules/hyde_rag.py


In [8]:
%%writefile ../../../src/git_repo_model/modules/corrective_rag.py

from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain.schema import Document

def corrective_rag(retrieved_docs, debug=False):
    """
    Analyze and make recommendations based on retrieved file chunks.
    """
    llm = ChatOllama(model="llama3.2", temperature=0)
    recommendations = []

    for doc in retrieved_docs:
        prompt = (
            f"Based on the following content:\n\n{doc.page_content}\n\n"
            "Suggest improvements for file structure, storage efficiency, and best practices."
        )
        recommendation = llm.invoke([{"role": "user", "content": prompt}]).content
        recommendations.append(Document(page_content=recommendation, metadata=doc.metadata))

        if debug:
            print(f"Recommendation for {doc.metadata.get('file_name', 'unknown')}: {recommendation}")

    return recommendations

def main(debug=False):
    # Sample document on file structure
    retrieved_docs = [
        Document(page_content="This document covers tips on file organization.", metadata={"file_name": "file_organization_guide.txt"})
    ]
    recommendations = corrective_rag(retrieved_docs, debug=debug)
    
    if debug:
        for recommendation in recommendations:
            print(f"Final recommendation for {recommendation.metadata.get('file_name', 'unknown')}: {recommendation.page_content}")

if __name__ == "__main__":
    main(debug=True)




Writing ../../../src/git_repo_model/modules/corrective_rag.py


In [9]:
%%writefile ../../../src/git_repo_model/modules/self_rag.py

from langchain_ollama import OllamaEmbeddings, ChatOllama

def self_rag(question, initial_answer, debug=False):
    llm = ChatOllama(model="llama3.2", temperature=0)
    if debug:
        print(f"Initial answer before self-refinement: {initial_answer}")
    
    max_reflections = 2
    for i in range(max_reflections):
        reflect_prompt = f"Answer: {initial_answer}\n\nReflect on the answer and identify areas for improvement."
        reflection = llm.invoke([{"role": "user", "content": reflect_prompt}]).content

        if debug:
            print(f"Reflection result for iteration {i+1}: {reflection}")

        if "no improvements" in reflection.lower():
            if debug:
                print(f"No further improvements suggested after {i+1} iterations.")
            break
        else:
            improve_prompt = f"Based on the reflection: {reflection}\n\nProvide an improved answer to the question: {question}"
            initial_answer = llm.invoke([{"role": "user", "content": improve_prompt}]).content

            if debug:
                print(f"Improved answer after iteration {i+1}: {initial_answer}")

    return initial_answer

def main(debug=False):
    question = "What are effective techniques for optimizing local file storage?"
    initial_answer = "Local file storage optimization requires strategies such as compression and proper file structure."
    refined_answer = self_rag(question, initial_answer, debug=debug)
    
    if debug:
        print(f"Final refined answer: {refined_answer}")

if __name__ == "__main__":
    main(debug=True)



Writing ../../../src/git_repo_model/modules/self_rag.py


In [10]:
%%writefile ../../../src/git_repo_model/modules/web_search.py
from langchain_community.retrievers import TavilySearchAPIRetriever

tavily_retriever = TavilySearchAPIRetriever(k=3)

def tavily_search(question, debug=False):
    docs = tavily_retriever.invoke(question)
    context = "\n\n".join(f"Source {i+1} ({doc.metadata.get('source')}):\n{doc.page_content}" for i, doc in enumerate(docs))
    if debug:
        print(f"Web search context retrieved: {context[:500]}...")  # Display first 500 chars
    return context

def main(debug=False):
    question = "Tell me about file optimization?"
    context = tavily_search(question, debug)
    if debug:
        print(f"Retrieved context from Tavily search: {context}")

if __name__ == "__main__":
    main(debug=True)


Writing ../../../src/git_repo_model/modules/web_search.py


In [11]:
%%writefile ../../../src/git_repo_model/modules/decision_mechanism.py
from modules.hyde_rag import contextual_retrieval
from modules.corrective_rag import corrective_rag
from modules.web_search import tavily_search
from modules.self_rag import self_rag  # Include the self_rag module for refinement
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain.schema import Document
import streamlit as st

def evaluate_confidence(answer, debug=False):
    """Evaluate the confidence of an answer using a language model."""
    llm = ChatOllama(model="llama3.2", temperature=0)
    eval_prompt = (
        f"Evaluate the confidence level (on a scale of 1-10) of the following answer being correct, "
        f"fully supported by reliable sources, and free from contradictions or inaccuracies:\n\n{answer}\n\n"
        "Confidence Score:"
    )
    confidence_score = llm.invoke([{"role": "user", "content": eval_prompt}]).content
    try:
        score = int(confidence_score.strip())
    except ValueError:
        score = 5  # Default to medium confidence if the evaluation fails
    if debug:
        print(f"Confidence score evaluated: {score}")
    return score

def decide_and_answer(question, retriever, progress_bar=None, progress_status=None, debug=False):
    """Generate answers using RAG and Tavily, and decide the best answer with self-refinement."""
    progress_step = 0.25

    # Step 1: Use contextual retrieval to get documents and generate an initial RAG-based answer
    if progress_status:
        progress_status.text("Step 1/4: Running HyDE retrieval...")
    retrieved_docs = contextual_retrieval(question, retriever, debug)
    if progress_bar:
        progress_bar.progress(progress_step)

    # Step 2: Generate a corrective RAG-based answer
    if progress_status:
        progress_status.text("Step 2/4: Generating a corrective RAG answer...")
    rag_answer = corrective_rag(retrieved_docs, debug)
    rag_refined_answer = self_rag(question, rag_answer, debug)  # Refine RAG answer with self-rag
    rag_confidence = evaluate_confidence(rag_refined_answer, debug)
    progress_step += 0.25
    if progress_bar:
        progress_bar.progress(progress_step)

    # Step 3: Use Tavily search to generate an answer
    if progress_status:
        progress_status.text("Step 3/4: Running Tavily search for additional context...")
    tavily_context = tavily_search(question, debug)
    tavily_prompt = f"Context: {tavily_context}\n\nQuestion: {question}\n\nAnswer:"
    llm = ChatOllama(model="llama3.2", temperature=0)
    tavily_initial_answer = llm.invoke([{"role": "user", "content": tavily_prompt}]).content
    tavily_refined_answer = self_rag(question, tavily_initial_answer, debug)  # Refine Tavily answer with self-rag
    tavily_confidence = evaluate_confidence(tavily_refined_answer, debug)
    progress_step += 0.25
    if progress_bar:
        progress_bar.progress(progress_step)

    # Step 4: Decision mechanism to choose the final answer based on confidence scores
    if progress_status:
        progress_status.text("Step 4/4: Making the final decision...")
    if rag_confidence > tavily_confidence:
        final_answer = rag_refined_answer
        source = "RAG-based response"
    elif tavily_confidence > rag_confidence:
        final_answer = tavily_refined_answer
        source = "Tavily-based response"
    else:
        # Combine answers if confidence scores are similar
        combined_prompt = (
            f"Here are two potential answers to the question:\n\n"
            f"Answer 1 (RAG-based):\n{rag_refined_answer}\n\n"
            f"Answer 2 (Tavily-based):\n{tavily_refined_answer}\n\n"
            f"Based on these, provide the best possible answer to the question: {question}"
        )
        final_answer = llm.invoke([{"role": "user", "content": combined_prompt}]).content
        source = "Combined response"

    if debug:
        print(f"Selected final answer from: {source}")
    return final_answer

def main(debug=False):
    question = "What are the best methods to organize a large local file repository for efficiency?"
    sample_docs = [
        Document(page_content="This is a sample document on file organization best practices.", metadata={"file_name": "organization_best_practices.txt"})
    ]
    vectorstore = create_embedding_retriever(create_contextual_nodes(sample_docs, debug=debug), debug=debug)
    retriever = vectorstore.as_retriever()

    # Streamlit progress bar and status
    progress_bar = st.progress(0)
    progress_status = st.empty()

    final_answer = decide_and_answer(question, retriever, progress_bar, progress_status, debug)
    st.write(f"Final answer selected: {final_answer}")

if __name__ == "__main__":
    main(debug=True)



Writing ../../../src/git_repo_model/modules/decision_mechanism.py


In [12]:
%%writefile ../../../src/git_repo_model/fact_checker.py
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain.schema import Document

from modules.hyde_rag import contextual_retrieval
from modules.web_search import tavily_search  
from modules.vector_store import create_vectorstore

def final_fact_check(question, answer, retriever, debug=False):
    """
    Perform a final fact-check of the answer based on a combined context from retrieved documents and web search results.

    Parameters:
    question (str): The question asked by the user.
    answer (str): The initial answer generated by the RAG or web search.
    retriever: The retriever object created from the vector store.
    debug (bool): If True, print debug information.

    Returns:
    str: The fact-checked and potentially corrected answer.
    """
    # Initialize the LLM for fact-checking
    llm = ChatOllama(model="llama3.2", temperature=0)

    # Retrieve documents using HyDE
    retrieved_docs = contextual_retrieval(question, retriever, debug=debug)
    context = "\n\n".join(doc.page_content for doc in retrieved_docs) if retrieved_docs else ""

    # Retrieve web context using Tavily search
    tavily_context = tavily_search(question, debug=debug)

    # Combine both contexts
    combined_context = context + "\n\n" + tavily_context

    # Debug output for context combination
    if debug:
        print(f"Combined context for fact-checking:\n{combined_context}")

    # Create the fact-checking prompt
    fact_check_prompt = (
        f"Context: {combined_context}\n\nAnswer: {answer}\n\n"
        f"Verify the accuracy of the answer based on the context. Provide a corrected answer if necessary."
    )

    # Generate the fact-checked answer using the LLM
    final_answer = llm.invoke([{"role": "user", "content": fact_check_prompt}]).content

    # Debug output for final answer
    if debug:
        print(f"Fact-checked answer: {final_answer}")

    return final_answer

def main(debug=False):
    """
    Test the final_fact_check function with sample input.
    """
    # Sample question and answer for a repository-focused example
    question = "What are the best practices for organizing a local code repository?"
    initial_answer = "Organize files by language, with folders for Python, JavaScript, and SQL scripts."

    # Create a sample retriever for repository organization context
    sample_docs = [
        Document(page_content="Best practices for organizing a code repository include structuring folders by project modules, using clear naming conventions, and maintaining a README for documentation.", metadata={"file_name": "repo_organization_guide.md"}),
        Document(page_content="Consider creating separate folders for data, scripts, and tests. A well-documented repository is easier for collaboration and maintenance.", metadata={"file_name": "repo_best_practices.md"})
    ]
    vectorstore = create_vectorstore(sample_docs, debug=debug)
    retriever = vectorstore.as_retriever()

    # Run the final_fact_check function
    corrected_answer = final_fact_check(question, initial_answer, retriever, debug=debug)
    if debug:
        print(f"Corrected answer after final fact-check: {corrected_answer}")

if __name__ == "__main__":
    main(debug=True)


Writing ../../../src/git_repo_model/fact_checker.py


In [13]:
%%writefile ../../../src/git_repo_model/main.py

# Import necessary modules for the main workflow
from modules.data_crawling import crawl_and_ingest
from modules.vector_store import create_vectorstore
from modules.decision_mechanism import decide_and_answer
from modules.fact_checker import final_fact_check
from modules.hyde_rag import contextual_retrieval  # Use the new contextual retrieval function
from modules.corrective_rag import corrective_rag  # Import corrective_rag function for code improvements
from modules.git_data_crawling import load_github_repo  # Import the GitHub repo loader
from modules.hyde_rag import contextual_retrieval

def run_rag_pipeline(data_source, repo_path=None, repo_url=None, question="What are the best practices for organizing a code repository?", debug=False):
    """
    Run the RAG pipeline by loading documents from either a local directory or GitHub repository,
    creating a vector store, generating recommendations, performing fact-checking, and presenting the final answer.
    
    Parameters:
    - data_source (str): "Local" or "Git"
    - repo_path (str, optional): Local directory path (required if data_source is "Local")
    - repo_url (str, optional): GitHub repository URL (required if data_source is "Git")
    - question (str): The question for which recommendations are generated
    - debug (bool): Enable debug outputs for step-by-step tracing
    """
    # Define the file types for filtering
    file_types = [".py", ".md", ".csv", ".ipynb", ".html", ".json", ".yaml", ".r", ".cpp", ".java", ".scala", ".sql"]

    # Step 1: Load documents based on the selected data source
    if data_source == "Local":
        if not repo_path:
            raise ValueError("Local directory path must be provided for Local source.")
        documents = crawl_and_ingest(repo_path, file_types, debug)
    elif data_source == "Git":
        if not repo_url:
            raise ValueError("GitHub repository URL must be provided for Git source.")
        documents = load_github_repo(repo_url, file_types, debug)
    else:
        raise ValueError("Invalid data source selected. Choose either 'Local' or 'Git'.")
    
    # Display loaded documents for debugging
    if debug:
        print(f"Total documents loaded: {len(documents)}")
    
    # Step 2: Create a vector store and a retriever
    vectorstore = create_vectorstore(documents, debug=debug)
    retriever = vectorstore.as_retriever()
    
    # Step 3: Retrieve relevant documents using contextual retrieval
    retrieved_docs = contextual_retrieval(question, retriever, debug)
    
    # Step 4: Generate recommendations using corrective RAG
    recommendations = corrective_rag(retrieved_docs, debug=debug)
    
    # Display initial recommendations
    if debug:
        for rec in recommendations:
            print(f"Initial recommendation for {rec.metadata.get('file_name', 'unknown')}:\n{rec.page_content}")
    
    # Step 5: Perform a final fact-check on the recommendations
    initial_answer = " ".join([rec.page_content for rec in recommendations])
    corrected_answer = final_fact_check(question, initial_answer, retriever, debug=debug)
    
    # Display the fact-checked answer for debugging
    if debug:
        print(f"Corrected answer after final fact-check:\n{corrected_answer}")
    
    # Step 6: Decide and return the best answer
    final_answer = decide_and_answer(question, retriever, debug=debug)
    return final_answer

def main():
    # Set the data source and parameters for testing
    data_source = "Git"  # Set to "Local" or "Git" as needed
    question = "Can you help me with how to make a streamlit app out of the data in the data section?"
    
    # Define paths based on data source
    if data_source == "Local":
        repo_path = "../../"  # Example local directory path
        repo_url = None
    elif data_source == "Git":
        repo_path = None
        repo_url = "https://github.com/ghadfield32/coach_analysis"  # Example GitHub URL
    else:
        print("Invalid data source selected. Please choose 'Local' or 'Git'.")
        return
    
    # Run the RAG pipeline and print the final answer
    try:
        final_answer = run_rag_pipeline(data_source, repo_path=repo_path, repo_url=repo_url, question=question, debug=True)
        print("\nFinal Answer:")
        print(final_answer)
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    main()


Writing ../../../src/git_repo_model/main.py


In [15]:
%%writefile ../../../src/git_repo_model/app.py

import streamlit as st
from modules.decision_mechanism import decide_and_answer
from modules.vector_store import create_vectorstore
from modules.data_crawling import crawl_and_ingest
from modules.git_data_crawling import load_github_repo
from fact_checker import final_fact_check
from modules.hyde_rag import contextual_retrieval  # Ensure we're importing contextual retrieval
from modules.corrective_rag import corrective_rag

def display_rag_guide():
    """Display an in-depth guide to RAG methods and their use in the pipeline."""
    st.markdown("## RAG Methods Explained")
    st.markdown("""
    **RAG (Retrieval-Augmented Generation)** is a powerful method for answering complex questions by retrieving relevant documents and generating responses. This app uses multiple RAG approaches, each carefully selected to enhance specific stages of the answer generation pipeline. Below is an overview of each step and the methods used:

    ### 1. Data Crawling and Ingestion
    - **Module**: `data_crawling.py` and `git_data_crawling.py`
    - **Purpose**: This stage gathers documents from either a local directory or a GitHub repository. We support various file types (.py, .md, .csv, .ipynb, etc.) to capture a wide array of content. The documents are then processed and loaded into a format that’s ready for retrieval.
    - **Why**: Comprehensive document crawling ensures a robust information base, allowing us to answer diverse questions based on specific data within your repository.

    ### 2. Creating a Vector Store
    - **Module**: `vector_store.py`
    - **Purpose**: After gathering documents, they are embedded into a vector space using embeddings from Ollama’s `llama3.2` model. These embeddings are stored in a vector database (Chroma) for fast retrieval based on semantic similarity.
    - **Why**: Embedding documents in a vector space makes it easier to identify the most relevant documents for a given question, enabling faster and more accurate retrieval.

    ### 3. Contextual Retrieval
    - **Module**: `contextual_retrieval.py`
    - **Purpose**: Here, we enrich the content of each document by generating additional context, which provides a more nuanced basis for retrieval. Contextual Retrieval retrieves documents relevant to the question while enhancing them with extra contextual information to improve the accuracy of the generated answer.
    - **Why**: Enriching each document with context allows the model to better understand and extract specific details relevant to the query. This approach improves retrieval precision, especially for complex queries.

    ### 4. Corrective RAG (CRAG)
    - **Module**: `corrective_rag.py`
    - **Purpose**: This stage uses the retrieved documents to make specific recommendations or corrections based on their content. Corrective RAG reviews the initial answer for accuracy and coherence, then refines it by validating details against the retrieved documents.
    - **Why**: By iteratively refining the initial response, CRAG ensures that the answer aligns well with the available data, creating a more accurate and trustworthy output.

    ### 5. Self-RAG
    - **Module**: `self_rag.py`
    - **Purpose**: Self-RAG applies reflection on the initial response, critiquing and refining the answer. If the answer is found to need improvements, Self-RAG modifies the response accordingly, repeating this process for up to two reflections.
    - **Why**: Self-refinement makes the response more robust and precise. By allowing the model to evaluate and adjust its output, Self-RAG helps in creating answers that are both accurate and concise.

    ### 6. Web-Based Retrieval (Tavily Search)
    - **Module**: `web_search.py`
    - **Purpose**: When additional context is required, Tavily Search retrieves relevant information from the web. This step is integrated to provide external context that might complement the repository data, especially if the query requires a broader view.
    - **Why**: Incorporating web-based retrieval ensures that the app can supplement internal data with up-to-date information from the web, enhancing response quality for more general or complex questions.

    ### 7. Decision Mechanism
    - **Module**: `decision_mechanism.py`
    - **Purpose**: In this stage, confidence scores for each generated answer are evaluated, and the most reliable answer is selected. If the confidence scores are similar, the system combines elements from both answers for a balanced response.
    - **Why**: A final decision mechanism selects the most appropriate response, ensuring the chosen answer is both relevant and trustworthy.

    Each of these methods contributes uniquely to building answers with increased accuracy and adaptability, making this pipeline well-suited for navigating complex datasets and generating informed answers.
    """)


def run_rag_pipeline(data_source, repo_path=None, repo_url=None, question="What are the best practices for organizing a code repository?", file_types=None, debug=False):
    st.write("### Starting RAG pipeline...")
    st.write(f"Data Source: {data_source}")
    st.write(f"Question: {question}")
    st.write(f"File Types: {file_types}")

    progress_bar = st.progress(0)

    # Step 1: Load documents based on the selected data source
    st.write("#### Step 1: Loading Documents...")
    if data_source == "Local":
        if not repo_path:
            raise ValueError("Local directory path must be provided for Local source.")
        documents = crawl_and_ingest(repo_path, file_types, debug)
    elif data_source == "Git":
        if not repo_url:
            raise ValueError("GitHub repository URL must be provided for Git source.")
        documents = load_github_repo(repo_url, file_types, debug)
    else:
        raise ValueError("Invalid data source selected. Choose either 'Local' or 'Git'.")
    
    st.write(f"Loaded {len(documents)} documents.")
    progress_bar.progress(0.2)

    # Step 2: Create a vector store and a retriever
    st.write("#### Step 2: Creating Vector Store and Retriever...")
    vectorstore = create_vectorstore(documents, debug=debug)
    retriever = vectorstore.as_retriever()

    st.write("Vector store created successfully.")
    progress_bar.progress(0.4)

    # Step 3: Retrieve relevant documents using contextual retrieval
    st.write("#### Step 3: Retrieving Relevant Documents (Contextual Retrieval)...")
    retrieved_docs = contextual_retrieval(question, retriever, debug)
    st.write(f"Retrieved {len(retrieved_docs)} relevant documents.")
    progress_bar.progress(0.6)

    # Step 4: Generate recommendations using corrective RAG
    st.write("#### Step 4: Generating Recommendations (Corrective RAG)...")
    recommendations = corrective_rag(retrieved_docs, debug=debug)
    st.write(f"Generated {len(recommendations)} recommendations.")
    progress_bar.progress(0.8)

    # Step 5: Perform a final fact-check on the recommendations
    st.write("#### Step 5: Performing Final Fact-Check...")
    initial_answer = " ".join([rec.page_content for rec in recommendations])
    corrected_answer = final_fact_check(question, initial_answer, retriever, debug=debug)
    st.write("Fact-check completed.")
    progress_bar.progress(0.9)

    # Step 6: Decide and return the best answer
    st.write("#### Step 6: Finalizing the Best Answer...")
    final_answer = decide_and_answer(question, retriever, debug=debug)
    st.write("Pipeline completed.")
    progress_bar.progress(1.0)

    return final_answer

def main():
    st.title("GitHub and Local Repository RAG Explorer")
    
    # Option to display the RAG methods guide
    if st.checkbox("Show RAG Methods Guide"):
        display_rag_guide()

    data_source = st.radio("Select Data Source:", ["Local", "Git"])
    question = st.text_input("Enter your question:", "What are the best practices for organizing a code repository?")
    debug = st.checkbox("Enable Debug Mode", value=False)

    # File type filter selection
    available_file_types = [".py", ".md", ".csv", ".ipynb", ".html", ".json", ".yaml", ".r", ".cpp", ".java", ".scala", ".sql"]
    file_types = st.multiselect("Select file types to include:", available_file_types, default=available_file_types)

    if data_source == "Local":
        repo_path = st.text_input("Enter Local Directory Path:")
        repo_url = None
    elif data_source == "Git":
        repo_path = None
        repo_url = st.text_input("Enter GitHub Repository URL:")

    if st.button("Run RAG Pipeline"):
        try:
            final_answer = run_rag_pipeline(data_source, repo_path=repo_path, repo_url=repo_url, question=question, file_types=file_types, debug=debug)
            st.write("### Final Answer:")
            st.write(final_answer)
        except Exception as e:
            st.error(f"An error occurred: {e}")

if __name__ == "__main__":
    main()



Overwriting ../../../src/git_repo_model/app.py
