In [None]:
# ----------------- STEP 1: Install These First -----------------
# pip install langchain sentence-transformers faiss-cpu transformers pymupdf requests

# ----------------- STEP 2: Load PDF and Split -----------------
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
import os

pdf_path = os.path.abspath("../Data/Medical_book.pdf")
if not os.path.isfile(pdf_path):
    raise ValueError(f"❌ PDF not found at: {pdf_path}")

# Load and split PDF
print("📄 Loading PDF...")
loader = PyPDFLoader(pdf_path)
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
text_chunks = text_splitter.split_documents(documents)
print(f"✅ Loaded and split into {len(text_chunks)} chunks.")

# ----------------- STEP 3: Create FAISS Embedding Index -----------------
print("🔍 Creating FAISS index...")
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
faiss_index = FAISS.from_documents(text_chunks, embedding=embedding_model)
faiss_index.save_local("medical_index")
print("✅ FAISS index saved as 'medical_index'.")

# ----------------- STEP 4: Call HuggingFace API -----------------
import requests

def call_huggingface_api(prompt, model="google/flan-t5-large", hf_token=None):
    API_URL = f"https://api-inference.huggingface.co/models/{model}"
    headers = {"Authorization": f"Bearer {hf_token}"} if hf_token else {}

    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": 256,
            "temperature": 0.5,
            "return_full_text": False
        }
    }

    response = requests.post(API_URL, headers=headers, json=payload)
    if response.status_code != 200:
        raise RuntimeError(f"❌ API Error {response.status_code}: {response.text}")

    result = response.json()
    if isinstance(result, list):
        return result[0].get("generated_text", "").strip()
    return str(result)

# ----------------- STEP 5: Ask Question -----------------
def ask_question(query, model="google/flan-t5-large", hf_token=None):
    # Retrieve relevant chunks
    docs = faiss_index.similarity_search(query, k=3)
    context = "\n\n".join([doc.page_content for doc in docs])

    prompt = f"""You are a helpful medical assistant. Answer ONLY using the context below.

Context:
{context}

Question: {query}
Answer:"""

    return call_huggingface_api(prompt, model=model, hf_token=hf_token)

# ----------------- STEP 6: Try It -----------------
if __name__ == "__main__":
    question = "What is Acne?"
    print("🧠 Asking:", question)
    answer = ask_question(question)
    print("💬 Answer:", answer)
