In [1]:
import os
import tkinter as tk
from tkinter import filedialog, scrolledtext, messagebox
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI

In [2]:
# Function to read text from multiple PDFs
def read_pdfs_from_folder(folder_path):
    raw_text = ''
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.pdf'):
            file_path = os.path.join(folder_path, file_name)
            pdfreader = PdfReader(file_path)
            for i, page in enumerate(pdfreader.pages):
                content = page.extract_text()
                if content:
                    raw_text += content
    return raw_text

In [3]:
# Function to process PDFs and answer questions
def process_pdfs_and_answer_questions(folder_path, questions):
    raw_text = read_pdfs_from_folder(folder_path)
    
    if not raw_text:
        return [("No text found in PDFs.", "Please check the PDF files in the selected folder.")]
    
    text_splitter = CharacterTextSplitter(
        separator = "\n",
        chunk_size = 800,
        chunk_overlap = 200,
        length_function = len,
    )
    texts = text_splitter.split_text(raw_text)

    if not texts:
        return [("Text splitting resulted in empty chunks.", "Please check the PDF files and ensure they contain readable text.")]
    
    embeddings = OpenAIEmbeddings()
    document_search = FAISS.from_texts(texts, embeddings)
    chain = load_qa_chain(OpenAI(), chain_type="stuff")
    
    results = []
    for question in questions:
        docs = document_search.similarity_search(question)
        if not docs:
            results.append((question, "No relevant documents found."))
            continue
        answer = chain.run(input_documents=docs, question=question)
        results.append((question, answer))
    
    return results

In [4]:
# Function to browse folder
def browse_folder():
    folder_selected = filedialog.askdirectory()
    folder_path_entry.delete(0, tk.END)
    folder_path_entry.insert(0, folder_selected)

In [5]:
# Function to run the processing and display the results
def run_processing():
    folder_path = folder_path_entry.get()
    if not folder_path:
        messagebox.showerror("Error", "Please select a folder containing PDF files.")
        return
    
    questions = questions_text.get("1.0", tk.END).strip().split('\n')
    if not questions:
        messagebox.showerror("Error", "Please enter at least one question.")
        return
    
    results = process_pdfs_and_answer_questions(folder_path, questions)
    
    for question, answer in results:
        result_text.insert(tk.END, f"Question: {question}\nAnswer: {answer}\n\n")

In [6]:
# Create the main window
window = tk.Tk()
window.title("PDF QA System")

''

In [7]:
# Folder selection UI
folder_frame = tk.Frame(window)
folder_frame.pack(pady=10)

folder_path_label = tk.Label(folder_frame, text="Folder Path:")
folder_path_label.pack(side=tk.LEFT, padx=5)

folder_path_entry = tk.Entry(folder_frame, width=50)
folder_path_entry.pack(side=tk.LEFT, padx=5)

browse_button = tk.Button(folder_frame, text="Browse", command=browse_folder)
browse_button.pack(side=tk.LEFT, padx=5)

In [8]:
# Questions input UI
questions_label = tk.Label(window, text="Enter your questions (one per line):")
questions_label.pack(pady=10)

questions_text = scrolledtext.ScrolledText(window, width=80, height=10)
questions_text.pack(pady=10)

In [9]:
# Run button
run_button = tk.Button(window, text="Run", command=run_processing)
run_button.pack(pady=10)

# Result display UI
result_label = tk.Label(window, text="Results:")
result_label.pack(pady=10)

result_text = scrolledtext.ScrolledText(window, width=80, height=20)
result_text.pack(pady=10)

# Start the GUI event loop
window.mainloop()

  warn_deprecated(
Exception in Tkinter callback
Traceback (most recent call last):
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.12_3.12.1520.0_x64__qbz5n2kfra8p0\Lib\tkinter\__init__.py", line 1968, in __call__
    return self.func(*args)
           ^^^^^^^^^^^^^^^^
  File "C:\Users\petha\AppData\Local\Temp\ipykernel_10364\3009294353.py", line 13, in run_processing
    results = process_pdfs_and_answer_questions(folder_path, questions)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\petha\AppData\Local\Temp\ipykernel_10364\194440483.py", line 19, in process_pdfs_and_answer_questions
    embeddings = OpenAIEmbeddings()
                 ^^^^^^^^^^^^^^^^^^
  File "C:\Users\petha\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\langchain_core\_api\deprecation.py", line 183, in warn_if_direct_instance
    return wrapped(self, *args, **kwargs)
       