In [2]:
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import torch

device = 0 if torch.cuda.is_available() else -1

# Load model and tokenizer for question-answering
model_path = "distilbert-base-uncased-distilled-squad"
model = AutoModelForQuestionAnswering.from_pretrained(model_path).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_path)
qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer,device=0)

def get_pdf_text(pdf_path):
    text = ""
    pdf_reader = PdfReader(pdf_path)
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = text_splitter.split_text(text)
    return chunks

def create_faiss_vector_store(chunks):
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L12-v2")
    vectorstore = FAISS.from_texts(texts=chunks, embedding=embeddings)
    return vectorstore

def answer_question(question, vectorstore):
    # Retrieve relevant documents
    relevant_docs = vectorstore.similarity_search(question)
    context = " ".join([doc.page_content for doc in relevant_docs])
    
    # Format question and context for QA pipeline
    qa_input = {"question": question, "context": context}
    
    # Get answer from the QA pipeline
    response = qa_pipeline(qa_input)
    return response["answer"]

# Main logic
if __name__ == "__main__":
    pdf_path = "transformer.pdf"  # Path to your PDF file
    question = "what is transformers wrt to llms"
    
    # Process PDF and create vector store
    text = get_pdf_text(pdf_path)
    text_chunks = get_text_chunks(text)
    vectorstore = create_faiss_vector_store(text_chunks)
    
    # Answer the question
    answer = answer_question(question, vectorstore)
    print("Answer:", answer)


Answer: Core type transformer and (ii) Shell type transformer
