In [3]:
pip install customtkinter


Collecting customtkinterNote: you may need to restart the kernel to use updated packages.

  Downloading customtkinter-5.2.2-py3-none-any.whl.metadata (677 bytes)
Collecting darkdetect (from customtkinter)
  Downloading darkdetect-0.8.0-py3-none-any.whl.metadata (3.6 kB)
Downloading customtkinter-5.2.2-py3-none-any.whl (296 kB)
   ---------------------------------------- 0.0/296.1 kB ? eta -:--:--
   - -------------------------------------- 10.2/296.1 kB ? eta -:--:--
   ----- --------------------------------- 41.0/296.1 kB 487.6 kB/s eta 0:00:01
   ---------------- ----------------------- 122.9/296.1 kB 1.0 MB/s eta 0:00:01
   ---------------------------------- ----- 256.0/296.1 kB 1.4 MB/s eta 0:00:01
   -------------------------------------- - 286.7/296.1 kB 1.4 MB/s eta 0:00:01
   ---------------------------------------- 296.1/296.1 kB 1.3 MB/s eta 0:00:00
Downloading darkdetect-0.8.0-py3-none-any.whl (9.0 kB)
Installing collected packages: darkdetect, customtkinter
Successfully in

In [None]:
# Cell 1: Imports and Setup (Interactive Chat Application)
import os
import json
import threading

# --- GUI Library ---
import customtkinter as ctk

# --- Core Logic Libraries ---
from tenacity import retry, stop_after_attempt, wait_exponential
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
import uuid
import pyarrow as pa
from phi.vectordb.lancedb import LanceDb
from phi.document import Document as PhiDocument
import PyPDF2
import google.generativeai as genai

# --- Initialize with premium configuration ---
# IMPORTANT: Replace with your actual Google API Key
os.environ['GOOGLE_API_KEY'] = "AIzaSyB5LHC0ntTSiM4rG8FNd3mQV6XqXDwx_lE"  # Your premium key


# --- Global Variables for Core Components ---
# We initialize these globally so the GUI can access them.
llm = None
embedder = None
vector_db = None
rag_prompt_template = None

def initialize_ai_components():
    """Initializes all the AI and database components."""
    global llm, embedder, vector_db, rag_prompt_template
    
    print("Initializing AI components...")
    
    # Premium model selection
    gemini_model = "models/gemini-2.5-pro"  # Using 1M token context
    embedder = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    # Enhanced LLM configuration
    llm = ChatGoogleGenerativeAI(
        model=gemini_model,
        temperature=0,
        max_retries=2,
        request_timeout=120,
        convert_system_message_to_human=False
    )

    # RAG Prompt Template
    rag_prompt_template = ChatPromptTemplate.from_messages([
        ("system", (
            "INVOICE ANALYTICS ENGINE (Gemini 1.5 Pro)\n"
            "You are a helpful assistant for analyzing invoice documents.\n"
            "1. Extract exact values from the provided context.\n"
            "2. If asked for structured data, provide it in the requested format (JSON or Markdown tables).\n"
            "3. If you don't know the answer, say so clearly.\n\n"
            "CONTEXT:\n{context}"
        )),
        ("human", "Query: {query}")
    ])
    
    print("AI components initialized successfully!")


def setup_vector_db(pdf_path: str, db_path: str) -> LanceDb:
    """Processes the PDF and sets up the LanceDB vector store."""
    import lancedb
    
    print(f"Processing PDF: {pdf_path}")
    
    # --- PDF Processing ---
    text = ""
    try:
        with open(pdf_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            for page in reader.pages:
                text += page.extract_text() or ""
    except Exception as e:
        print(f"PDF extraction error: {e}")
        return None

    if not text:
        print("No text extracted from PDF.")
        return None

    splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
    chunks = [
        PhiDocument(name=f"chunk_{i}", content=chunk, meta_data={"source": pdf_path})
        for i, chunk in enumerate(splitter.split_text(text))
    ]

    # --- Vector DB Setup ---
    if os.path.exists(db_path):
        import shutil
        shutil.rmtree(db_path)
    os.makedirs(db_path, exist_ok=True)
    
    db = lancedb.connect(db_path)
    test_embedding = embedder.embed_query("test")
    dim = len(test_embedding)
    
    schema = pa.schema([
        pa.field("id", pa.string()),
        pa.field("content", pa.string()),
        pa.field("vector", pa.list_(pa.float32(), dim))
    ])
    
    table = db.create_table("invoice_data", schema=schema)
    
    if chunks:
        embeddings = embedder.embed_documents([ch.content for ch in chunks])
        data = [{
            "id": str(uuid.uuid4()),
            "content": ch.content,
            "vector": emb,
        } for ch, emb in zip(chunks, embeddings)]
        table.add(data)
            
    print("Vector database created successfully.")
    return table

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def get_ai_response(question: str) -> str:
    """Queries the vector DB and gets a response from the LLM."""
    global vector_db, llm, embedder, rag_prompt_template
    
    if not all([vector_db, llm, embedder, rag_prompt_template]):
        return "Error: AI components not initialized."

    try:
        query_embedding = embedder.embed_query(question)
        results = vector_db.search(query_embedding).limit(8).to_list()
        
        context = "\n---\n".join(r['content'] for r in results)
        
        chain = rag_prompt_template | llm | StrOutputParser()
        
        response = chain.invoke({
            "context": context,
            "query": question
        })
        
        return response
    except Exception as e:
        print(f"Query failed: {type(e).__name__}: {e}")
        return f"An error occurred while processing your request. Please try again."


# --- Interactive Chat Application GUI ---
class ChatApplication(ctk.CTk):
    def __init__(self):
        super().__init__()

        self.title("Invoice Analysis Chatbot")
        self.geometry("800x600")
        
        ctk.set_appearance_mode("dark")
        ctk.set_default_color_theme("blue")

        # --- Configure Grid Layout ---
        self.grid_rowconfigure(0, weight=1)
        self.grid_columnconfigure(0, weight=1)

        # --- Main Frame ---
        self.main_frame = ctk.CTkFrame(self)
        self.main_frame.grid(row=0, column=0, padx=10, pady=10, sticky="nsew")
        self.main_frame.grid_rowconfigure(0, weight=1)
        self.main_frame.grid_columnconfigure(0, weight=1)

        # --- Output Textbox ---
        self.output_textbox = ctk.CTkTextbox(self.main_frame, state="disabled", wrap="word", font=("Arial", 14))
        self.output_textbox.grid(row=0, column=0, columnspan=2, padx=10, pady=10, sticky="nsew")

        # --- Input Frame ---
        self.input_frame = ctk.CTkFrame(self.main_frame, fg_color="transparent")
        self.input_frame.grid(row=1, column=0, columnspan=2, padx=10, pady=(0,10), sticky="ew")
        self.input_frame.grid_columnconfigure(0, weight=1)

        # --- Input Entry Box ---
        self.input_entry = ctk.CTkEntry(self.input_frame, placeholder_text="Ask a question about the invoice...", font=("Arial", 14))
        self.input_entry.grid(row=0, column=0, padx=(0,10), sticky="ew")
        self.input_entry.bind("<Return>", self.submit_question)

        # --- Submit Button ---
        self.submit_button = ctk.CTkButton(self.input_frame, text="Submit", command=self.submit_question)
        self.submit_button.grid(row=0, column=1)

        self.add_message("System", "Welcome! Please ask a question about the loaded invoice.")

    def add_message(self, sender: str, message: str):
        """Adds a message to the output textbox."""
        self.output_textbox.configure(state="normal")
        
        # Format JSON for better readability
        if message.strip().startswith("{") or message.strip().startswith("["):
            try:
                parsed_json = json.loads(message)
                formatted_message = json.dumps(parsed_json, indent=4)
                message = f"\n{formatted_message}"
            except json.JSONDecodeError:
                pass # Keep original message if not valid JSON

        self.output_textbox.insert("end", f"{sender}: {message}\n\n")
        self.output_textbox.configure(state="disabled")
        self.output_textbox.see("end") # Auto-scroll to the bottom

    def submit_question(self, event=None):
        """Handles the submission of a question from the input box."""
        question = self.input_entry.get().strip()
        if not question:
            return

        self.add_message("You", question)
        self.input_entry.delete(0, "end")
        
        # Disable input while processing
        self.submit_button.configure(state="disabled", text="Thinking...")
        self.input_entry.configure(state="disabled")

        # Run AI query in a separate thread to keep the GUI responsive
        threading.Thread(target=self.process_in_background, args=(question,)).start()

    def process_in_background(self, question: str):
        """Processes the AI request and updates the GUI from the thread."""
        response = get_ai_response(question)
        self.add_message("Bot", response)
        
        # Re-enable input
        self.submit_button.configure(state="normal", text="Submit")
        self.input_entry.configure(state="normal")


if __name__ == "__main__":
    # --- Setup ---
    # Make sure you have an 'invoice.pdf' file in the same directory
    pdf_path = os.path.join(os.getcwd(), "invoice.pdf")
    lancedb_dir = os.path.join(os.getcwd(), "lancedb_invoices_interactive")

    if not os.path.exists(pdf_path):
        print("Error: 'invoice.pdf' not found. Please place it in the same directory as the script.")
    else:
        # 1. Initialize AI components first
        initialize_ai_components()
        
        # 2. Setup the vector database with the PDF
        vector_db = setup_vector_db(pdf_path, lancedb_dir)
        
        if vector_db:
            # 3. Launch the GUI Application
            app = ChatApplication()
            app.mainloop()

Initializing AI components...
AI components initialized successfully!
Processing PDF: C:\Users\ALISH\Agent Practice\invoice.pdf
Vector database created successfully.
