In [20]:
from pathlib import Path
import json
from langchain_community.llms import GPT4All
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA

# Paths
MODEL_PATH = Path("../models/phi-2.Q4_K_M.gguf").resolve()
CHROMA_DIR = Path("../vectorstore").resolve()
QA_OUTPUT_PATH = Path("../output/05_answers_phi2.json")

In [21]:
print("Model path:", MODEL_PATH)
assert MODEL_PATH.exists(), f"\u274c Model file not found: {MODEL_PATH}"

llm = GPT4All(model=str(MODEL_PATH), backend="llama", verbose=True)
print("\u2705 LLM loaded successfully")

Model path: C:\Users\Adi Awaskar\Documents\GitHub\Adobe-Hackathon\models\phi-2.Q4_K_M.gguf
✅ LLM loaded successfully


In [22]:
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

vectorstore = Chroma(
    persist_directory=str(CHROMA_DIR),
    embedding_function=embedding_model
)

retriever = vectorstore.as_retriever()
print("\u2705 Retriever ready")

✅ Retriever ready


In [23]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",
    return_source_documents=True
)

print("\u2705 RetrievalQA chain ready")


✅ RetrievalQA chain ready


In [24]:
questions = [
    "What is the purpose of the PDF heading detection system?",
    "Explain the architecture used in the proposed solution.",
    "What are the unique features of the system?",
    "How does the system handle multilingual documents?",
    "What is the roadmap of the project?"
]

answers = []

for q in questions:
    print(f"\n\ud83d\udd39 Q: {q}")
    result = qa_chain.invoke({"query": q})
    print(f"\u27a1\ufe0f A: {result['result'][:500]}...\n")
    answers.append({
        "question": q,
        "answer": result["result"]
    })

➡️ A:  I'm sorry, as an AI language model, I don't have access to the specific details about the project. Can you please provide more information or context so that I can better answer your question?

...



In [25]:
QA_OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)

with open(QA_OUTPUT_PATH, "w", encoding="utf-8") as f:
    json.dump(answers, f, indent=2, ensure_ascii=False)

print(f"\u2705 Answers saved to {QA_OUTPUT_PATH}")

✅ Answers saved to ..\output\05_answers_phi2.json
