In [20]:
import sys
import os

# Construct paths relative to notebook location
# Notebook is in: Notebooks/LLM_integration/Integrate.ipynb
project_root = os.path.abspath(os.path.join(os.getcwd(), '../..'))
chroma_db_path = os.path.join(project_root, "Chroma_db_database")

# Import libraries
import ollama
import chromadb

print("✓ Libraries imported successfully")
print(f"Project root: {project_root}")
print(f"ChromaDB path: {chroma_db_path}")


✓ Libraries imported successfully
Project root: c:\Users\patha\Desktop\Experiments\Policy-as-Code-Engine-for-Academic-Compliance
ChromaDB path: c:\Users\patha\Desktop\Experiments\Policy-as-Code-Engine-for-Academic-Compliance\Chroma_db_database


In [21]:
# Connect to ChromaDB
client = chromadb.PersistentClient(path=chroma_db_path)

# Get or create collection
collection = client.get_or_create_collection("policy_docs")

# Check if collection has data
count = collection.count()
print(f"\nCollection 'policy_docs' has {count} documents.")

if count == 0:
    print("\n WARNING: Collection is empty!")
    print("Please run your embedding ingestion notebook first:")
    print("  - Notebooks/Data Ingestion/03_generate_embeddings_ollama.ipynb")
    print("  - Notebooks/Data Ingestion/04_ingest_to_chromadb.ipynb")
else:
    print("✓ Collection is ready for querying.\n")



Collection 'policy_docs' has 34 documents.
✓ Collection is ready for querying.



In [22]:
def rag_answer(user_query, top_k=3, verbose=True):
    """
    Retrieves relevant policy documents and generates answer using LLM.
    
    Args:
        user_query: The question to answer
        top_k: Number of documents to retrieve
        verbose: Print progress messages
    
    Returns:
        Generated answer with citations and sources
    """
    if collection.count() == 0:
        return "Error: No documents in database. Please populate ChromaDB first."
    
    # 1. Embed the user question
    if verbose:
        print(f"Embedding query: {user_query}")
    
    query_embedding = ollama.embeddings(
        model="nomic-embed-text:latest",
        prompt=user_query
    )["embedding"]

    # 2. Fetch the most relevant docs
    if verbose:
        print(f"Querying ChromaDB for top {top_k} documents...")
    
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=top_k,
        include=["documents", "metadatas", "distances"]
    )
    
    context_docs = results["documents"][0]
    sources = results["metadatas"][0]
    distances = results["distances"][0]

    if not context_docs:
        return "No relevant documents found."

    # 3. Build the LLM context with sources
    if verbose:
        print(f" Retrieved {len(context_docs)} documents.\n")
    
    context = "\n\n".join([
        f"[Source: {sources[i]['source']}]\n{doc[:1000]}"
        for i, doc in enumerate(context_docs)
    ])

    # 4. Construct prompt for Qwen2.5
    prompt = (
    "You are an expert in academic compliance regulations (UGC, AICTE, institutional bylaws). "
    "Given the following regulatory sources and clauses, answer the question below accurately and comprehensively. "
    "If the question asks for multiple items (objectives, guidelines, requirements, etc.), "
    "list ALL items found in the context - do not summarize or condense. "
    "Always cite the document source for each claim.\n\n"
    "==== Context ====\n"
    f"{context}\n"
    "==== End Context ====\n\n"
    f"Question: {user_query}\n\n"
    "Answer (provide complete details with citations):"
)

    # 5. Generate LLM answer
    if verbose:
        print("Generating answer \n")
    
    response = ollama.generate(
        model="qwen2.5:7b",
        prompt=prompt
    )["response"]
    
    # 6. Return answer with source metadata
    answer_with_sources = f"{response}\n\n--- Retrieved Sources ---\n"
    for i, src in enumerate(sources):
        answer_with_sources += f"{i+1}. {src['source']} (distance: {distances[i]:.4f})\n"
    
    return answer_with_sources

print("✓ RAG function defined successfully")


✓ RAG function defined successfully


In [None]:
# ============================================================
# Interactive mode: Keep asking questions until 'quit'
# ============================================================
print("Type your question and press Enter. Type 'quit' to exit.\n")

while True:
    user_input = input("Your question: ").strip()
    
    if user_input.lower() in ['quit', 'exit', 'q']:
        print("Exiting...")
        break
    
    if not user_input:
        print("Please enter a question.\n")
        continue
    
    print("\nProcessing...\n")
    answer = rag_answer(user_input, top_k=3, verbose=True)
    print("\n" + "="*60)
    print("Answer:")
    print("="*60)
    print(answer)
    print("\n")
