In [1]:
#Install required libraries
!pip install transformers torch sentencepiece PyPDF2 sentence-transformers faiss-cpu gradio

#Importing necessary req. libraries
import PyPDF2
import faiss
import gradio as gr
import torch
from sentence_transformers import SentenceTransformer
from transformers import pipeline

#Extract Text from PDF
def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            text += page.extract_text() + "n"
    return text

#Split Text into Chunks (for the speed)
def split_text_into_chunks(text, chunk_size=300):
    words = text.split()
    chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
    return chunks

#Convert Text to Embeddings
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

#Build FAISS Index
def build_faiss_index(chunk_embeddings):
    d = chunk_embeddings.shape[1]
    index = faiss.IndexFlatL2(d)
    index.add(chunk_embeddings)
    return index

#Retrieve Relevant Chunks (Set top_k=1 for speed)
def get_relevant_chunks(question, index, chunks, top_k=1):
    question_embedding = embedding_model.encode([question])
    distances, indices = index.search(question_embedding, top_k)
    relevant_chunks = [chunks[idx] for idx in indices[0]]
    return relevant_chunks

#Load Fast Question answering Model
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2", device=0 if torch.cuda.is_available() else -1)  # Uses GPU if available

#Generating Answer
def generate_response(question, relevant_chunks):
    context = " ".join(relevant_chunks)
    response = qa_pipeline(question=question, context=context)
    return response["answer"]

#Main Chatbot
def chatbot(question):
    relevant_chunks = get_relevant_chunks(question, index, chunks)
    response = generate_response(question, relevant_chunks)
    return response

#PDF and Prepare Data
pdf_path = "/content/jess401.pdf"
book_text = extract_text_from_pdf(pdf_path)
chunks = split_text_into_chunks(book_text)
chunk_embeddings = embedding_model.encode(chunks)
index = build_faiss_index(chunk_embeddings)

#Launching the Gradio Chatbot
iface = gr.Interface(fn=chatbot, inputs="text", outputs="text", title="PDF-Based Question Answering Chatbot")
iface.launch(share=True)


Defaulting to user installation because normal site-packages is not writeable
Collecting transformers
  Downloading transformers-4.48.3-py3-none-any.whl.metadata (44 kB)
Collecting torch
  Downloading torch-2.6.0-cp313-cp313-win_amd64.whl.metadata (28 kB)
Collecting sentencepiece
  Downloading sentencepiece-0.2.0.tar.gz (2.6 MB)
     ---------------------------------------- 0.0/2.6 MB ? eta -:--:--
     ---------------------------------------- 0.0/2.6 MB ? eta -:--:--
     ---------------------------------------- 0.0/2.6 MB ? eta -:--:--
     ---------------------------------------- 0.0/2.6 MB ? eta -:--:--
     ---------------------------------------- 0.0/2.6 MB ? eta -:--:--
     ---------------------------------------- 0.0/2.6 MB ? eta -:--:--
     ---------------------------------------- 0.0/2.6 MB ? eta -:--:--
     ---------------------------------------- 0.0/2.6 MB ? eta -:--:--
     ---------------------------------------- 0.0/2.6 MB ? eta -:--:--
     -------------------------

  error: subprocess-exited-with-error
  
  × python setup.py egg_info did not run successfully.
  │ exit code: 1
  ╰─> [50 lines of output]
      -- Building for: NMake Makefiles
        Compatibility with CMake < 3.5 will be removed from a future version of
        CMake.
      
        Update the VERSION argument <min> value or use a ...<max> suffix to tell
        CMake that the project does not need compatibility with older versions.
      
      
      -- VERSION: 0.2.0
      CMake Error at CMakeLists.txt:23 (project):
        Generator
      
          NMake Makefiles
      
        does not support platform specification, but platform
      
          x64
      
        was specified.
      
      
      CMake Error: CMAKE_C_COMPILER not set, after EnableLanguage
      CMake Error: CMAKE_CXX_COMPILER not set, after EnableLanguage
      -- Configuring incomplete, errors occurred!
      Traceback (most recent call last):
        File "<string>", line 2, in <module>
          exec(

ModuleNotFoundError: No module named 'PyPDF2'