In [None]:
import os
import gradio as gr
import fitz ## PyMuPDF
import sqlite3
from llama_index.core import VectorStoreIndex, Document
from datetime import datetime
import uuid
from tqdm.notebook import tqdm
import traceback
from dotenv import load_dotenv


In [None]:
load_dotenv(dotenv_path="../../project_secrets.env")
load_dotenv(dotenv_path="../../../ai_sdlc_secrets.env")

In [None]:
DB_NAME = "pdf_qa_logs.db"

## Initialize the database
def init_db(db_name: str = DB_NAME):
    conn = sqlite3.connect(db_name)
    c = conn.cursor()
    c.execute(
        '''
        CREATE TABLE IF NOT EXISTS interactions (
            id TEXT PRIMARY KEY,
            timestamp TEXT,
            pdf_name TEXT,
            query TEXT,
            response TEXT)''') 
    ## We might add prompts and other metadata
    conn.commit()
    conn.close()

## Extract text from PDF using fitz
def extract_text_from_pdf(pdf_bytes: bytes) -> str: ## Find the dtype later!
    pdf_doc = fitz.open(stream=pdf_bytes, filetype="pdf")
    text = ""
    for page_num in range(pdf_doc.page_count):
        page = pdf_doc.load_page(page_num)
        text += page.get_text()
    return text

## Process PDF and create index:
def process_pdf(pdf_bytes: bytes) -> VectorStoreIndex:
    extracted_text = extract_text_from_pdf(pdf_bytes)
    document = Document(text=extracted_text)
    index = VectorStoreIndex.from_documents([document])
    return index

## Log to SQLite DB:
def log_interaction(pdf_name: str, query: str, response: str, db_name: str = DB_NAME) -> None:
    conn = sqlite3.connect(database=db_name)
    c = conn.cursor()
    interaction_id = str(uuid.uuid4())
    timestamp = datetime.now().isoformat()
    c.execute(
        "INSERT INTO interactions VALUES (?, ?, ?, ?, ?)",
        (interaction_id, timestamp, pdf_name, query, response))
    conn.commit()
    conn.close()
    return None

def query_pdf(pdf: bytes, query: str) -> str:
    print(type(pdf))
    if pdf is None:
        return "Please upload a PDF"
    if not query.strip():
        return "Please enter a valid query"
    
    try:
        pdf_name = pdf.name if hasattr(pdf, "name") else "PDF-Unknown"
        index = process_pdf(pdf) ## Passing bytes directly

        query_engine = index.as_query_engine()
        response = query_engine.query(query)
        
        log_interaction(pdf_name, query, response.response)
    except Exception as e:
        return f"An error occurred: {str(e)}-{traceback.format_exc()}"
    
    return response

In [None]:
with gr.Blocks() as app:
    pdf_upload = gr.File(label="Upload PDF", type="binary")
    query_input = gr.Textbox(label="Ask a question about the PDF")
    output = gr.Textbox(label="Answer")

    query_button = gr.Button("Submit")
    query_button.click(query_pdf, inputs=[pdf_upload, query_input], outputs=output)

app.launch()