In [None]:
# Phase 1: Basic Multimodal RAG System with Gemini
# Requirements: pip install gradio chromadb langchain sentence-transformers google-generativeai pypdf pillow python-docx python-pptx

import gradio as gr
import google.generativeai as genai
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Fixed import for HuggingFace embeddings
try:
    from langchain_community.embeddings import HuggingFaceEmbeddings
except ImportError:
    from langchain.embeddings import HuggingFaceEmbeddings
import chromadb
from chromadb.config import Settings
import os
import json
from datetime import datetime
from typing import List, Dict, Any
import base64
from PIL import Image
import io
from langchain_openai import OpenAIEmbeddings
import openai


# Document processing imports
import PyPDF2
from docx import Document
from pptx import Presentation

# Initialize ChromaDB for vector storage
class MultimodalRAG:
    def __init__(self, gemini_api_key: str, openai_api_key: str):
        """
        Initialize the Multimodal RAG system
        
        Args:
            gemini_api_key: Your Gemini API key
        """
        # Configure Gemini
        genai.configure(api_key=gemini_api_key)
        self.gemini_model = genai.GenerativeModel('gemini-1.5-flash')
        
        # Initialize embeddings (free HuggingFace model) with error handling
        try:
            print("Initializing embeddings...")
            #self.embeddings = HuggingFaceEmbeddings(
            #    model_name="sentence-transformers/all-MiniLM-L6-v2",
            #    model_kwargs={'device': 'cpu'}  # Explicitly use CPU
            #)
            openai_api_key = ""
            self.embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
            print("Embeddings initialized successfully")
        except Exception as e:
            print(f"Error initializing embeddings: {e}")
            raise e
        
        # Initialize ChromaDB (free vector database)
        try:
            print("Initializing ChromaDB...")
            self.chroma_client = chromadb.PersistentClient(
                path="./knowledge_base",
                settings=Settings(anonymized_telemetry=False, allow_reset=True)
            )
            print("ChromaDB initialized successfully")
        except Exception as e:
            print(f"Error initializing ChromaDB: {e}")
            raise e
        
        # Create collections for different content types
        try:
            self.text_collection = self.chroma_client.get_or_create_collection(
                name="text_documents",
                metadata={"description": "Text-based documents"}
            )
            
            self.image_collection = self.chroma_client.get_or_create_collection(
                name="image_documents", 
                metadata={"description": "Image-based documents"}
            )
            print("Collections created successfully")
        except Exception as e:
            print(f"Error creating collections: {e}")
            raise e
        
        # Text splitter for chunking documents
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
            length_function=len
        )
        
        # Session memory storage (in-memory for Phase 1)
        self.session_memory = {}
    
    def extract_text_from_file(self, file_path: str, file_type: str) -> str:
        """
        Extract text from various file formats
        
        Args:
            file_path: Path to the file
            file_type: Type of file (pdf, docx, pptx, txt)
            
        Returns:
            Extracted text content
        """
        try:
            if file_type == 'pdf':
                with open(file_path, 'rb') as file:
                    pdf_reader = PyPDF2.PdfReader(file)
                    text = ""
                    for page in pdf_reader.pages:
                        text += page.extract_text() + "\n"
                    return text
                        
            elif file_type == 'docx':
                doc = Document(file_path)
                text = ""
                for paragraph in doc.paragraphs:
                    text += paragraph.text + "\n"
                return text
                
            elif file_type == 'pptx':
                prs = Presentation(file_path)
                text = ""
                for slide in prs.slides:
                    for shape in slide.shapes:
                        if hasattr(shape, "text"):
                            text += shape.text + "\n"
                return text
                
            elif file_type == 'txt':
                with open(file_path, 'r', encoding='utf-8') as file:
                    return file.read()
                    
        except Exception as e:
            return f"Error extracting text: {str(e)}"
    
    def process_image_with_gemini(self, image_path: str) -> str:
        """
        Extract text/information from images using Gemini Vision
        
        Args:
            image_path: Path to the image file
            
        Returns:
            Extracted text and description
        """
        try:
            # Open and process image
            image = Image.open(image_path)
            
            # Use Gemini to analyze the image
            prompt = """
            Analyze this image and provide:
            1. Any text visible in the image (OCR)
            2. A detailed description of the content
            3. Key information or concepts shown
            4. Context that might be useful for search
            
            Format your response clearly with these sections.
            """
            
            response = self.gemini_model.generate_content([prompt, image])
            return response.text
            
        except Exception as e:
            return f"Error processing image: {str(e)}"
    
    def add_document(self, file_path: str, file_name: str, session_id: str = "default") -> str:
        """
        Add a document to the knowledge base
        
        Args:
            file_path: Path to the document
            file_name: Original name of the file
            session_id: Session identifier for memory
            
        Returns:
            Status message
        """
        try:
            # Determine file type
            file_extension = file_name.lower().split('.')[-1]
            
            # Process based on file type
            if file_extension in ['pdf', 'docx', 'pptx', 'txt']:
                # Text-based document processing
                content = self.extract_text_from_file(file_path, file_extension)
                
                if content and not content.startswith("Error"):
                    # Split content into chunks
                    chunks = self.text_splitter.split_text(content)
                    
                    # Generate embeddings and store
                    for i, chunk in enumerate(chunks):
                        try:
                            # Create unique ID for each chunk
                            chunk_id = f"{file_name}_{i}_{int(datetime.now().timestamp())}"
                            
                            # Generate embedding with error handling
                            embedding = self.embeddings.embed_query(chunk)
                            
                            # Store in ChromaDB
                            self.text_collection.add(
                                embeddings=[embedding],
                                documents=[chunk],
                                metadatas=[{
                                    "file_name": file_name,
                                    "file_type": file_extension,
                                    "chunk_index": i,
                                    "session_id": session_id,
                                    "timestamp": datetime.now().isoformat()
                                }],
                                ids=[chunk_id]
                            )
                        except Exception as chunk_error:
                            print(f"Error processing chunk {i}: {chunk_error}")
                            continue
                    
                    # Update session memory
                    if session_id not in self.session_memory:
                        self.session_memory[session_id] = []
                    
                    self.session_memory[session_id].append({
                        "file_name": file_name,
                        "file_type": file_extension,
                        "chunks_count": len(chunks),
                        "timestamp": datetime.now().isoformat()
                    })
                    
                    return f"✅ Successfully processed {file_name} ({len(chunks)} chunks)"
                else:
                    return f"❌ Failed to extract content from {file_name}"
                    
            elif file_extension in ['jpg', 'jpeg', 'png', 'gif', 'bmp']:
                # Image processing
                content = self.process_image_with_gemini(file_path)
                
                if content and not content.startswith("Error"):
                    # Generate embedding for image content
                    embedding = self.embeddings.embed_query(content)
                    
                    # Create unique ID
                    doc_id = f"{file_name}_{int(datetime.now().timestamp())}"
                    
                    # Store in image collection
                    self.image_collection.add(
                        embeddings=[embedding],
                        documents=[content],
                        metadatas=[{
                            "file_name": file_name,
                            "file_type": "image",
                            "session_id": session_id,
                            "timestamp": datetime.now().isoformat()
                        }],
                        ids=[doc_id]
                    )
                    
                    # Update session memory
                    if session_id not in self.session_memory:
                        self.session_memory[session_id] = []
                    
                    self.session_memory[session_id].append({
                        "file_name": file_name,
                        "file_type": "image",
                        "timestamp": datetime.now().isoformat()
                    })
                    
                    return f"✅ Successfully processed image {file_name}"
                else:
                    return f"❌ Failed to process image {file_name}"
            else:
                return f"❌ Unsupported file type: {file_extension}"
                
        except Exception as e:
            return f"❌ Error processing {file_name}: {str(e)}"
    
    def search_knowledge_base(self, query: str, session_id: str = "default", top_k: int = 5) -> List[Dict]:
        """
        Search the knowledge base for relevant information
        
        Args:
            query: User's search query
            session_id: Session identifier
            top_k: Number of top results to return
            
        Returns:
            List of relevant documents with metadata
        """
        try:
            # Generate query embedding
            query_embedding = self.embeddings.embed_query(query)
            
            # Search in text collection
            text_results = self.text_collection.query(
                query_embeddings=[query_embedding],
                n_results=max(1, top_k//2),
                where={"session_id": session_id} if session_id in self.session_memory else None
            )
            
            # Search in image collection  
            image_results = self.image_collection.query(
                query_embeddings=[query_embedding],
                n_results=max(1, top_k//2),
                where={"session_id": session_id} if session_id in self.session_memory else None
            )
            
            # Combine and format results
            all_results = []
            
            # Process text results
            if text_results['documents'] and text_results['documents'][0]:
                for i, doc in enumerate(text_results['documents'][0]):
                    all_results.append({
                        'content': doc,
                        'metadata': text_results['metadatas'][0][i],
                        'distance': text_results['distances'][0][i],
                        'type': 'text'
                    })
            
            # Process image results
            if image_results['documents'] and image_results['documents'][0]:
                for i, doc in enumerate(image_results['documents'][0]):
                    all_results.append({
                        'content': doc,
                        'metadata': image_results['metadatas'][0][i],
                        'distance': image_results['distances'][0][i],
                        'type': 'image'
                    })
            
            # Sort by relevance (distance)
            all_results.sort(key=lambda x: x['distance'])
            
            return all_results[:top_k]
            
        except Exception as e:
            print(f"Search error: {e}")
            return [{"content": f"Search error: {str(e)}", "metadata": {}, "distance": 1.0, "type": "error"}]
    
    def generate_answer(self, query: str, session_id: str = "default") -> str:
        """
        Generate an answer using retrieved context and Gemini
        
        Args:
            query: User's question
            session_id: Session identifier
            
        Returns:
            Generated answer
        """
        try:
            # Search for relevant documents
            relevant_docs = self.search_knowledge_base(query, session_id)
            
            if not relevant_docs or all(doc['type'] == 'error' for doc in relevant_docs):
                return "I couldn't find relevant information in your knowledge base. Please upload some documents first."
            
            # Prepare context from retrieved documents
            context = ""
            sources = []
            
            for i, doc in enumerate(relevant_docs):
                if doc['type'] != 'error':
                    context += f"\nDocument {i+1} ({doc['type']}) from {doc['metadata'].get('file_name', 'Unknown')}:\n"
                    context += doc['content'][:500] + "...\n"  # Limit context length
                    sources.append(doc['metadata'].get('file_name', 'Unknown'))
            
            if not context:
                return "I couldn't find relevant information in your knowledge base. Please upload some documents first."
            
            # Create prompt for Gemini
            prompt = f"""
            Based on the following context from the user's knowledge base, please answer the question.
            
            Context:
            {context}
            
            Question: {query}
            
            Instructions:
            1. Provide a comprehensive answer based on the context
            2. If the context doesn't contain enough information, mention what's missing
            3. Cite the source documents when relevant
            4. Be conversational and helpful
            
            Answer:
            """
            
            # Generate response using Gemini
            response = self.gemini_model.generate_content(prompt)
            
            # Add sources information
            if sources:
                sources_text = f"\n\n📚 **Sources:** {', '.join(set(sources))}"
                return response.text + sources_text
            else:
                return response.text
            
        except Exception as e:
            return f"Error generating answer: {str(e)}"
    
    def get_session_info(self, session_id: str = "default") -> str:
        """
        Get information about the current session
        
        Args:
            session_id: Session identifier
            
        Returns:
            Session information as formatted string
        """
        if session_id not in self.session_memory:
            return "No documents uploaded in this session."
        
        docs = self.session_memory[session_id]
        info = f"📁 **Session Documents ({len(docs)} files):**\n\n"
        
        for doc in docs:
            info += f"• {doc['file_name']} ({doc['file_type']}) - {doc['timestamp'][:19]}\n"
            if 'chunks_count' in doc:
                info += f"  └── {doc['chunks_count']} text chunks\n"
        
        return info

# Initialize the RAG system (will be done in Gradio interface)
rag_system = None

def initialize_system(gemini_key: str, openai_key: str) -> str:
    """Initialize the RAG system with Gemini API key"""
    global rag_system
    if not gemini_key:
        return "❌ Please provide a Gemini API key"
    if not openai_key:
        return "❌ Please provide a OpenAI API key"
    
    try:
        print("Starting system initialization...")
        rag_system = MultimodalRAG(gemini_key,openai_key)
        print("System initialization completed!")
        return "✅ System initialized successfully!"
    except Exception as e:
        error_msg = f"❌ Error initializing system: {str(e)}"
        print(error_msg)
        return error_msg

def upload_document(files, session_id: str = "default") -> str:
    """Handle document upload in Gradio"""
    if rag_system is None:
        return "❌ Please initialize the system with your API key first"
    
    if not files:
        return "❌ No files uploaded"
    
    results = []
    for file in files:
        result = rag_system.add_document(file.name, os.path.basename(file.name), session_id)
        results.append(result)
    
    return "\n".join(results)

def ask_question(question: str, session_id: str = "default") -> str:
    """Handle question answering in Gradio"""
    if rag_system is None:
        return "❌ Please initialize the system with your API key first"
    
    if not question:
        return "❌ Please ask a question"
    
    return rag_system.generate_answer(question, session_id)

def get_session_status(session_id: str = "default") -> str:
    """Get session status for Gradio"""
    if rag_system is None:
        return "❌ System not initialized"
    
    return rag_system.get_session_info(session_id)

# Create Gradio Interface
def create_gradio_interface():
    """Create the Gradio web interface"""
    
    with gr.Blocks(title="Enterprise Knowledge Assistant - Phase 1", theme=gr.themes.Soft()) as demo:
        gr.Markdown("""
        # 🧠 Enterprise Knowledge Assistant (Phase 1)
        
        Upload documents (PDF, DOCX, PPTX, TXT, Images) and ask questions about their content.
        The system uses multimodal RAG to understand both text and visual content.
        
        **Note:** Make sure you have installed: `pip install sentence-transformers`
        """)
        
        with gr.Tab("🔧 Setup"):
            gr.Markdown("### Initialize the System")
            api_key_input = gr.Textbox(
                label="Gemini API Key", 
                placeholder="Enter your Gemini API key here...",
                type="password"
            )
            openai_key_input = gr.Textbox(
            label="OpenAI API Key", 
            placeholder="Enter your OpenAI API key here...",
             type="password"
            )

            init_btn = gr.Button("Initialize System", variant="primary")
            init_status = gr.Textbox(label="Status", interactive=False)
            
            init_btn.click(
                fn=initialize_system,
                inputs=[api_key_input,openai_key_input],
                outputs=[init_status]
            )
        
        with gr.Tab("📁 Upload Documents"):
            gr.Markdown("### Upload Your Documents")
            session_input = gr.Textbox(
                label="Session ID", 
                value="default",
                placeholder="Enter session ID (optional)"
            )
            
            file_upload = gr.Files(
                label="Upload Documents",
                file_count="multiple",
                file_types=[".pdf", ".docx", ".pptx", ".txt", ".jpg", ".jpeg", ".png", ".gif", ".bmp"]
            )
            
            upload_btn = gr.Button("Process Documents", variant="primary")
            upload_status = gr.Textbox(label="Upload Status", interactive=False, lines=5)
            
            upload_btn.click(
                fn=upload_document,
                inputs=[file_upload, session_input],
                outputs=[upload_status]
            )
        
        with gr.Tab("🤖 Ask Questions"):
            gr.Markdown("### Query Your Knowledge Base")
            
            session_query = gr.Textbox(
                label="Session ID", 
                value="default",
                placeholder="Enter session ID"
            )
            
            question_input = gr.Textbox(
                label="Your Question",
                placeholder="Ask anything about your uploaded documents...",
                lines=3
            )
            
            ask_btn = gr.Button("Get Answer", variant="primary")
            answer_output = gr.Textbox(
                label="Answer", 
                interactive=False, 
                lines=10
            )
            
            ask_btn.click(
                fn=ask_question,
                inputs=[question_input, session_query],
                outputs=[answer_output]
            )
        
        with gr.Tab("📊 Session Info"):
            gr.Markdown("### Session Status")
            
            session_status_input = gr.Textbox(
                label="Session ID", 
                value="default"
            )
            
            status_btn = gr.Button("Check Status")
            status_output = gr.Textbox(
                label="Session Information", 
                interactive=False, 
                lines=8
            )
            
            status_btn.click(
                fn=get_session_status,
                inputs=[session_status_input],
                outputs=[status_output]
            )
        
        gr.Markdown("""
        ### 🔧 Troubleshooting
        If you encounter import errors:
        1. Make sure you're in your virtual environment
        2. Run: `pip install sentence-transformers torch transformers`
        3. For ChromaDB issues: `pip install chromadb --upgrade`
        """)
    
    return demo

# Launch the application
if __name__ == "__main__":
    demo = create_gradio_interface()
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True  # Creates public URL for sharing
    )

  from .autonotebook import tqdm as notebook_tqdm


* Running on local URL:  http://0.0.0.0:7860
* Running on public URL: https://12faf331e5ae567f12.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Starting system initialization...
Initializing embeddings...
Embeddings initialized successfully
Initializing ChromaDB...
ChromaDB initialized successfully
Collections created successfully
System initialization completed!
