In [None]:
!pip install -q transformers sentence-transformers torch langchain chromadb pypdf ipywidgets


In [None]:
# Install required packages
!pip install -q transformers sentence-transformers torch langchain chromadb pypdf ipywidgets

import ipywidgets as widgets
from IPython.display import display
import tempfile
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from transformers import pipeline
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA

class PDFChatbot:
    def __init__(self):
        # Initialize LLM
        print("Loading language model...")
        self.llm = HuggingFacePipeline(
            pipeline=pipeline(
                "text2text-generation",
                model="google/flan-t5-small",
                max_length=512
            )
        )

        # Initialize embeddings
        print("Loading embedding model...")
        self.embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2"
        )

        self.qa_chain = None
        self.setup_widgets()

    def setup_widgets(self):
        # Create widgets
        self.file_uploader = widgets.FileUpload(
            description='Upload PDF',
            accept='.pdf',
            multiple=False
        )

        self.process_button = widgets.Button(
            description='Process PDF',
            disabled=False,
            button_style='primary'
        )

        self.question_input = widgets.Text(
            description='Question:',
            placeholder='Ask a question about the PDF...',
            disabled=True
        )

        self.ask_button = widgets.Button(
            description='Ask',
            disabled=True,
            button_style='success'
        )

        self.output = widgets.Output()

        # Setup event handlers
        self.process_button.on_click(self.process_pdf)
        self.ask_button.on_click(self.ask_question)

        # Display widgets
        display(widgets.VBox([
            widgets.HTML("<h2>PDF Question Answering System</h2>"),
            self.file_uploader,
            self.process_button,
            self.output,
            widgets.HTML("<h3>Ask Questions</h3>"),
            self.question_input,
            self.ask_button
        ]))

    def process_pdf(self, _):
        with self.output:
            self.output.clear_output()
            if not self.file_uploader.value:
                print("Please upload a PDF first!")
                return

            print("Processing PDF...")

            # Save uploaded file
            pdf_data = next(iter(self.file_uploader.value.values()))
            with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
                temp_file.write(pdf_data['content'])
                pdf_path = temp_file.name

            try:
                # Load and split document
                loader = PyPDFLoader(pdf_path)
                pages = loader.load()

                text_splitter = RecursiveCharacterTextSplitter(
                    chunk_size=1000,
                    chunk_overlap=100
                )
                splits = text_splitter.split_documents(pages)

                # Create vector store
                vectorstore = Chroma.from_documents(
                    documents=splits,
                    embedding=self.embeddings
                )

                # Create QA chain
                self.qa_chain = RetrievalQA.from_chain_type(
                    llm=self.llm,
                    chain_type="stuff",
                    retriever=vectorstore.as_retriever(),
                )

                print("PDF processed successfully!")
                self.question_input.disabled = False
                self.ask_button.disabled = False

            except Exception as e:
                print(f"Error processing PDF: {str(e)}")

            finally:
                os.unlink(pdf_path)

    def ask_question(self, _):
        with self.output:
            self.output.clear_output()
            if not self.question_input.value:
                print("Please enter a question!")
                return

            if not self.qa_chain:
                print("Please process a PDF first!")
                return

            try:
                print("Thinking...")
                response = self.qa_chain.run(self.question_input.value)
                print("\nAnswer:", response)
            except Exception as e:
                print(f"Error: {str(e)}")

# Create and display the chatbot
chatbot = PDFChatbot()

Loading language model...
Loading embedding model...


VBox(children=(HTML(value='<h2>PDF Question Answering System</h2>'), FileUpload(value={}, accept='.pdf', descr…