In [None]:
import os
import PyPDF2
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI


  embeddings = HuggingFaceEmbeddings()


Answer: The Constitution of the Islamic Republic of Pakistan, adopted on April 12, 1973, is the supreme law of the country.  It establishes Pakistan as a federal republic with Islam as the state religion.  The constitution outlines the structure of the government, including the roles of the President, Parliament (comprising the National Assembly and the Senate), the Prime Minister, the judiciary, and provincial governments.  It also guarantees fundamental rights and principles of policy, including those related to Islamic teachings and the protection of minorities.  The constitution has been amended several times since its adoption.



In [None]:
# Import API key from config file
from config import GOOGLE_API_KEY
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

In [None]:
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
    return text

In [None]:
def create_qa_system(pdf_paths):
    all_texts = ""
    for pdf_path in pdf_paths:
        # Extract text from each PDF
        pdf_text = extract_text_from_pdf(pdf_path)
        all_texts += pdf_text + "\n"  # Combine text from all PDFs

    # Split combined text into chunks
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    texts = text_splitter.split_text(all_texts)

    # Create embeddings
    embeddings = HuggingFaceEmbeddings()

    # Create vector store
    db = Chroma.from_texts(texts, embeddings)

    # Initialize Gemini
    llm = ChatGoogleGenerativeAI(
        model="gemini-1.5-flash",
        temperature=0,
    )

    # Create a retrieval chain
    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever(search_kwargs={"k": 1}))

    return qa 

In [None]:
def main():
    pdf_paths = ["Pakistan.pdf"]  
    qa_system = create_qa_system(pdf_paths)
    
    responses = []

    while True:
        query = input("Enter your question (or 'quit' to exit): ")
        if query.lower() == 'quit':
            break
        answer = qa_system.run(query)
        print("Answer:", answer)
        responses.append({'query': query, 'answer': answer})

if __name__ == "__main__":
    main()