In [None]:
# RAG based Gradio solution to give information from related documents, using Llama3.2 and nomic-embed-text over OLLAMA
# Took help of Claude and Course material.

In [None]:
import os, glob
import sqlite3
import json
import numpy as np
from typing import List, Dict, Tuple
import requests
import gradio as gr
from datetime import datetime

embedding_model = 'nomic-embed-text'
llm_model = 'llama3.2'
RagDist_k = 6
folders = glob.glob("../../week5/knowledge-base/*")
folders

In [None]:

class OllamaEmbeddings:
    """Generate embeddings using Ollama's embedding models."""
    
    def __init__(self, model: str = embedding_model, base_url: str = "http://localhost:11434"):
        self.model = model
        self.base_url = base_url
        
    def embed_text(self, text: str) -> List[float]:
        """Generate embedding for a single text."""
        print('Processing', text[:70].replace('\n',' | '))
        response = requests.post(
            f"{self.base_url}/api/embeddings",
            json={"model": self.model, "prompt": text}
        )
        if response.status_code == 200:
            return response.json()["embedding"]
        else:
            raise Exception(f"Error generating embedding: {response.text}")
    
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Generate embeddings for multiple texts."""
        return [self.embed_text(text) for text in texts]


class SQLiteVectorStore:
    """Vector store using SQLite for storing and retrieving document embeddings."""
    
    def __init__(self, db_path: str = "vector_store.db"):
        self.db_path = db_path
        self.conn = sqlite3.connect(db_path, check_same_thread=False)
        self._create_table()
    
    def _create_table(self):
        """Create the documents table if it doesn't exist."""
        cursor = self.conn.cursor()
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS documents (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                content TEXT NOT NULL,
                embedding TEXT NOT NULL,
                metadata TEXT,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        """)
        self.conn.commit()
    
    def add_documents(self, texts: List[str], embeddings: List[List[float]], 
                     metadatas: List[Dict] = None):
        """Add documents with their embeddings to the store."""
        cursor = self.conn.cursor()
        if metadatas is None:
            metadatas = [{}] * len(texts)
        
        for text, embedding, metadata in zip(texts, embeddings, metadatas):
            cursor.execute("""
                INSERT INTO documents (content, embedding, metadata)
                VALUES (?, ?, ?)
            """, (text, json.dumps(embedding), json.dumps(metadata)))
        
        self.conn.commit()
    
    def cosine_similarity(self, vec1: np.ndarray, vec2: np.ndarray) -> float:
        """Calculate cosine similarity between two vectors."""
        return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
    
    def similarity_search(self, query_embedding: List[float], k: int = 3) -> List[Tuple[str, float, Dict]]:
        """Search for the k most similar documents."""
        cursor = self.conn.cursor()
        cursor.execute("SELECT content, embedding, metadata FROM documents")
        results = cursor.fetchall()
        
        query_vec = np.array(query_embedding)
        similarities = []
        
        for content, embedding_json, metadata_json in results:
            doc_vec = np.array(json.loads(embedding_json))
            similarity = self.cosine_similarity(query_vec, doc_vec)
            similarities.append((content, similarity, json.loads(metadata_json)))
        
        # Sort by similarity (highest first) and return top k
        similarities.sort(key=lambda x: x[1], reverse=True)
        return similarities[:k]
    
    def clear_all(self):
        """Clear all documents from the store."""
        cursor = self.conn.cursor()
        cursor.execute("DELETE FROM documents")
        self.conn.commit()
    
    def get_document_count(self) -> int:
        """Get the total number of documents in the store."""
        cursor = self.conn.cursor()
        cursor.execute("SELECT COUNT(*) FROM documents")
        return cursor.fetchone()[0]


class OllamaLLM:
    """Interact with Ollama LLM for text generation."""
    
    def __init__(self, model: str = llm_model, base_url: str = "http://localhost:11434"):
        self.model = model
        self.base_url = base_url
    
    def generate(self, prompt: str, stream: bool = False) -> str:
        """Generate text from the LLM."""
        response = requests.post(
            f"{self.base_url}/api/generate",
            json={"model": self.model, "prompt": prompt, "stream": stream}
        )
        
        if response.status_code == 200:
            return response.json()["response"]
        else:
            raise Exception(f"Error generating response: {response.text}")


class RAGSystem:
    """RAG system combining vector store, embeddings, and LLM."""
    
    def __init__(self, embedding_model: str = embedding_model, 
                 llm_model: str = llm_model,
                 db_path: str = "vector_store.db"):
        self.embeddings = OllamaEmbeddings(model=embedding_model)
        self.vector_store = SQLiteVectorStore(db_path=db_path)
        self.llm = OllamaLLM(model=llm_model)
    
    def add_documents(self, documents: List[Dict[str, str]]):
        """
        Add documents to the RAG system.
        documents: List of dicts with 'content' and optional 'metadata'
        """
        texts = [doc['content'] for doc in documents]
        metadatas = [doc.get('metadata', {}) for doc in documents]
        
        print(f"Generating embeddings for {len(texts)} documents...")
        embeddings = self.embeddings.embed_documents(texts)
        
        print("Storing documents in vector store...")
        self.vector_store.add_documents(texts, embeddings, metadatas)
        print(f"Successfully added {len(texts)} documents!")
    
    def query(self, question: str, k: int = 3) -> str:
        """Query the RAG system with a question."""
        # Generate embedding for the query
        query_embedding = self.embeddings.embed_text(question)
        
        # Retrieve relevant documents
        results = self.vector_store.similarity_search(query_embedding, k=k)
        
        if not results:
            return "I don't have any information to answer this question."
        
        # Build context from retrieved documents
        context = "\n\n".join([
            f"Document {i+1} (Relevance: {score:.2f}):\n{content}"
            for i, (content, score, _) in enumerate(results)
        ])
        
        # Create prompt for LLM
        prompt = f"""You are a helpful assistant answering questions based on the provided context.
            Use the following context to answer the question. If you cannot answer the question based on the context, say so.
            
            Context:
            {context}
            
            Question: {question}
            
            Answer:"""
        
        # Generate response
        response = self.llm.generate(prompt)
        return response
    
    def get_stats(self) -> str:
        """Get statistics about the RAG system."""
        doc_count = self.vector_store.get_document_count()
        return f"Total documents in database: {doc_count}"



In [None]:
def load_documents() -> List[Dict[str, str]]:
    """
    Read all files from specified folders and format them for RAG system.    
    Args:
        folders: List of folder paths to read files from
    Returns:
        List of dictionaries with 'content' and 'metadata' keys
    """
    from pathlib import Path
    
    documents = []
    supported_extensions = {'.md'}
    
    for folder in folders:
        folder_path = Path(folder)
        
        if not folder_path.exists():
            print(f"Warning: Folder '{folder}' does not exist. Skipping...")
            continue
        
        if not folder_path.is_dir():
            print(f"Warning: '{folder}' is not a directory. Skipping...")
            continue
        
        folder_name = folder_path.name
        
        # Get all files in the folder
        files = [f for f in folder_path.iterdir() if f.is_file()]
        
        for file_path in files:
            # Check if file extension is supported
            if file_path.suffix.lower() not in supported_extensions:
                print(f"Skipping unsupported file type: {file_path.name}")
                continue
            
            try:
                # Read file content
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()
                
                # Create document dictionary
                document = {
                    'metadata': {
                        'type': folder_name,
                        'name': file_path.name,
                        'datalen': len(content)
                    },
                    'content': content,
                }
                
                documents.append(document)
                print(f"✓ Loaded: {file_path.name} from folder '{folder_name}'")
                
            except Exception as e:
                print(f"Error reading file {file_path.name}: {str(e)}")
                continue
    
    print(f"\nTotal documents loaded: {len(documents)}")
    return documents


In [None]:
def create_gradio_interface(rag_system: RAGSystem):
    """Create Gradio chat interface for the RAG system."""
    
    def chat_fn(message, history):
        """Process chat messages."""
        try:
            response = rag_system.query(message, k=RagDist_k)
            return response
        except Exception as e:
            return f"Error: {str(e)}\n\nMake sure Ollama is running with the required models installed."
    
    def load_data():
        """Load sample documents into the system."""
        try:
            documents = load_documents()
            rag_system.add_documents(documents)
            stats = rag_system.get_stats()
            return f"✅ Sample documents loaded successfully!\n{stats}"
        except Exception as e:
            return f"❌ Error loading documents: {str(e)}"
    
    def get_stats():
        """Get system statistics."""
        return rag_system.get_stats()
    
    with gr.Blocks(title="RAG System - Company Knowledge Base", theme=gr.themes.Soft()) as demo:
        gr.Markdown("# 🤖 RAG System - Company Knowledge Base")
        gr.Markdown("Ask questions about company information, contracts, employees, and products.")
        
        with gr.Row():
            with gr.Column(scale=3):
                chatbot = gr.ChatInterface(
                    fn=chat_fn,
                    examples=[
                        "Who is the CTO of the company?",
                        "Who is the CEO of the company?",
                        "What products does the company offer?",
                    ],
                    title="",
                    description="💬 Chat with the company knowledge base"
                )
            
            with gr.Column(scale=1):
                gr.Markdown("### 📊 System Controls")
                load_btn = gr.Button("📥 Load Documents", variant="primary")
                stats_btn = gr.Button("📈 Get Statistics")
                output_box = gr.Textbox(label="System Output", lines=5)
                
                load_btn.click(fn=load_data, outputs=output_box)
                stats_btn.click(fn=get_stats, outputs=output_box)
                
                gr.Markdown(f"""
                ### 📝 Instructions:
                1. Make sure Ollama is running
                2. Click "Load Sample Documents" 
                3. Start asking questions!
                
                ### 🔧 Required Models:
                - `ollama pull {embedding_model}`
                - `ollama pull {llm_model}`
                """)
    
    return demo


def main():
    """Main function to run the RAG system."""
    print("=" * 60)
    print("RAG System with Ollama and SQLite")
    print("=" * 60)
    
    # Initialize RAG system
    print("\nInitializing RAG system...")
    rag_system = RAGSystem(
        embedding_model=embedding_model,
        llm_model=llm_model,
        db_path="vector_store.db"
    )
    
    print("\n⚠️  Make sure Ollama is running and you have the required models:")
    print(f"   - ollama pull {embedding_model}")
    print(f"   - ollama pull {llm_model}")
    print("\nStarting Gradio interface...")
    
    # Create and launch Gradio interface
    demo = create_gradio_interface(rag_system)
    demo.launch(share=False)


main()