In [1]:
from langchain_community.document_loaders import PDFPlumberLoader
from langchain_experimental.text_splitter import SemanticChunker
# from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

In [None]:
loader = PDFPlumberLoader(r"D:\Petramount\Courses\AI\LLM\Basic_Home_Remedies.pdf")
docs = loader.load()

# Check the number of pages
print("Number of pages in the PDF:",len(docs))

# Load the random page content
docs[1].page_content

In [None]:
text_splitter = SemanticChunker(HuggingFaceEmbeddings())
documents = text_splitter.split_documents(docs)

In [None]:
print("Number of chunks created: ", len(documents))

print(documents[0].page_content)

In [5]:
# Instantiate the embedding model
embedder = HuggingFaceEmbeddings()

# Create the vector store 
vector = FAISS.from_documents(documents, embedder)
retriever = vector.as_retriever(search_type="similarity", search_kwargs={"k": 1})

In [None]:
# from langchain_community.llms import Ollama

# llm = Ollama(model="llama3.2:1b")

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import AIMessage, HumanMessage, SystemMessage

llm = ChatOpenAI(openai_api_base = "http://localhost:1234/v1", openai_api_key = "lm_studio", model = "qwen3-0.6b")



In [None]:
# 2. If you don't know the answer, just say that "I don't know" but don't make up an answer on your own.\n

In [7]:
from langchain.chains import RetrievalQA
from langchain.chains.llm import LLMChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.prompts import PromptTemplate

prompt = """
1. You are doctor
2. Use the following pieces of context to answer the question at the end.
3. Answer only by using the context and articulate it better, use bullet point and emoji if required
4. Keep the answer crisp and limited to 3,4 sentences.

Context: {context}

Question: {question}

Answer to the question:"""


QA_CHAIN_PROMPT = PromptTemplate.from_template(prompt) 

llm_chain = LLMChain(
                  llm=llm, 
                  prompt=QA_CHAIN_PROMPT, 
                  callbacks=None, 
                  verbose=True)

document_prompt = PromptTemplate(
    input_variables=["page_content", "source"],
    template="Context:\ncontent:{page_content}\nsource:{source}",
)

combine_documents_chain = StuffDocumentsChain(
                  llm_chain=llm_chain,
                  document_variable_name="context",
                  document_prompt=document_prompt,
                  callbacks=None,
              )

qa = RetrievalQA(
                  combine_documents_chain=combine_documents_chain,
                  verbose=True,
                  retriever=retriever,
                  return_source_documents=True,
              )

In [None]:
print(qa("remedy for cough?")["result"])