In [None]:
from pdfminer.high_level import extract_text
from langchain_groq import ChatGroq
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from google.colab import files
import os
import pickle

FAISS_STORE_PATH = "faiss_store.pkl"

def initialize_llm():
    return ChatGroq(
        temperature=0,
        groq_api_key="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",
        model_name="llama-3.1-70b-versatile"
    )

def initialize_embeddings():
    try:
        from langchain.embeddings import HuggingFaceEmbeddings
        return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    except ModuleNotFoundError:
        from langchain.embeddings import OpenAIEmbeddings
        return OpenAIEmbeddings()

def extract_text_from_pdfs(uploaded_files):
    all_text = ""
    for file_name in uploaded_files.keys():
        all_text += extract_text(file_name) + "\n"
    return all_text

def build_vector_store(text_data, store_path, embeddings):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    text_chunks = text_splitter.split_text(text_data)
    vector_store = FAISS.from_texts(text_chunks, embeddings)
    with open(store_path, "wb") as f:
        pickle.dump(vector_store, f)

def load_vector_store(store_path):
    with open(store_path, "rb") as f:
        return pickle.load(f)

def run_qa(vector_store, query, llm):
    retriever = vector_store.as_retriever()
    chain = RetrievalQA.from_llm(llm=llm, retriever=retriever)
    return chain.run(query)

def main():
    uploaded_files = files.upload()
    llm = initialize_llm()
    embeddings = initialize_embeddings()
    if not os.path.exists(FAISS_STORE_PATH):
        text_data = extract_text_from_pdfs(uploaded_files)
        build_vector_store(text_data, FAISS_STORE_PATH, embeddings)
    query = input("Ask a question: ").strip()
    if query:
        vector_store = load_vector_store(FAISS_STORE_PATH)
        answer = run_qa(vector_store, query, llm)
        print(f"Answer:\n{answer}")

if __name__ == "__main__":
    main()
