In [1]:
import os
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from langchain_core.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

In [18]:
model_path = r"C:\Users\chill\tinyllama"
tokenizer_path = model_path

In [19]:

def load_llm_local(model_path=model_path):
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_path)

    pipe = pipeline(
        "text2text-generation",
        model=model,
        tokenizer=tokenizer,
        max_length=512,
        torch_dtype=torch.float32,
        device=0 if torch.cuda.is_available() else -1
    )

    llm = HuggingFacePipeline(pipeline=pipe)
    return llm

In [24]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain.llms import HuggingFacePipeline
import torch

def load_llm_local(model_path=r"C:\Users\chill\tinyllama"):
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(model_path)

    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_length=512,
        do_sample=True,
        temperature=0.7,
        top_p=0.95,
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
        device=0 if torch.cuda.is_available() else -1
    )

    return HuggingFacePipeline(pipeline=pipe)


In [25]:
# Update custom prompt function if not already done
def set_custom_prompt(custom_prompt_template):
    return PromptTemplate(template=custom_prompt_template, input_variables=["context", "question"])

custom_prompt_template = """
Use the context below to answer the question. If unsure, say "I don't know".

Context: {context}
Question: {question}

Answer:
"""

# Final chain
qa_chain = RetrievalQA.from_chain_type(
    llm=load_llm_local(),
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 3}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": set_custom_prompt(custom_prompt_template)}
)

# Run it
user_query = input("Enter your question: ")
response = qa_chain.invoke({"question": user_query})


RuntimeError: Error(s) in loading state_dict for Linear:
	size mismatch for weight: copying a param with shape torch.Size([32000, 2048]) from checkpoint, the shape in current model is torch.Size([32000, 4096]).

In [20]:
custom_prompt_template = """
Use the pieces of information provided in the context to answer user's question.
IF you don't know the answer, just say that you don't know, don't try to make up an answer.
Don't provide anything out of the given context.

Context: {context}
Question: {question}

Start the answer directly. No small talk please.
"""

In [21]:
def set_custom_prompt(custom_prompt_template):
    prompt = PromptTemplate(template=custom_prompt_template, input_variables=["context", "question"])
    return prompt

In [22]:
DB_FAISS_PATH = "vectorstore/db_faiss"
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.load_local(DB_FAISS_PATH, embeddings=embedding_model, allow_dangerous_deserialization=True)


In [23]:
qa_chain = RetrievalQA.from_chain_type(
    llm=load_llm_local(),  # Local FLAN-T5 model
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={'k': 3}),
    return_source_documents=True,
    chain_type_kwargs={'prompt': set_custom_prompt(custom_prompt_template)}
)

ValueError: Unrecognized configuration class <class 'transformers.models.llama.configuration_llama.LlamaConfig'> for this kind of AutoModel: AutoModelForSeq2SeqLM.
Model type should be one of BartConfig, BigBirdPegasusConfig, BlenderbotConfig, BlenderbotSmallConfig, EncoderDecoderConfig, FSMTConfig, GPTSanJapaneseConfig, LEDConfig, LongT5Config, M2M100Config, MarianConfig, MBartConfig, MT5Config, MvpConfig, NllbMoeConfig, PegasusConfig, PegasusXConfig, PLBartConfig, ProphetNetConfig, Qwen2AudioConfig, SeamlessM4TConfig, SeamlessM4Tv2Config, SwitchTransformersConfig, T5Config, UMT5Config, XLMProphetNetConfig.

In [26]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
import torch

# Define your model and database paths
model_path = r"C:\Users\chill\tinyllama"  # This directory must contain the model files and tokenizer.
DB_FAISS_PATH = "vectorstore/db_faiss"

# Step 1: Load a causal language model (TinyLlama in this example)
def load_llm_local(model_path=model_path):
    # Load the tokenizer and model using classes for causal language models.
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(model_path)
    
    # Create a text-generation pipeline.
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_length=512,
        do_sample=True,
        temperature=0.7,
        top_k=50,
        top_p=0.95,
        torch_dtype=torch.float32,
        device=0 if torch.cuda.is_available() else -1
    )
    
    # Wrap the pipeline for LangChain compatibility.
    llm = HuggingFacePipeline(pipeline=pipe)
    return llm

# Step 2: Define your custom prompt template.
custom_prompt_template = """
Use the context below to answer the question. If unsure, just say "I don't know".

Context: {context}
Question: {query}

Answer:
"""

def set_custom_prompt(template):
    # The prompt expects two inputs: context and query.
    return PromptTemplate(template=template, input_variables=["context", "query"])

# Step 3: Load the FAISS vector database and embedding model.
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.load_local(DB_FAISS_PATH, embeddings=embedding_model, allow_dangerous_deserialization=True)

# Step 4: Create the RetrievalQA chain.
qa_chain = RetrievalQA.from_chain_type(
    llm=load_llm_local(),
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={'k': 3}),
    return_source_documents=True,
    chain_type_kwargs={'prompt': set_custom_prompt(custom_prompt_template)}
)

# Step 5: Ask the user for a question and get a response.
user_query = input("Enter your question: ")
response = qa_chain.invoke({"query": user_query})

# Step 6: Print the output.
print("\nRESULT:\n", response["result"])
print("\nSOURCE DOCUMENTS:")
for doc in response["source_documents"]:
    print("-", doc.metadata.get("source", "Unknown"))


RuntimeError: Error(s) in loading state_dict for Linear:
	size mismatch for weight: copying a param with shape torch.Size([32000, 2048]) from checkpoint, the shape in current model is torch.Size([32000, 4096]).

In [27]:
pip install ctransformers


Note: you may need to restart the kernel to use updated packages.


In [29]:
qa_chain = RetrievalQA.from_chain_type(
    llm=load_llm_local(),
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={'k': 3}),
    return_source_documents=True,
    input_key="query",
    chain_type_kwargs={'prompt': set_custom_prompt(custom_prompt_template)}
)

In [30]:
from langchain.llms import CTransformers
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# Paths
model_path = r"C:\Users\chill\llama\llama-2-7b-chat.Q4_K_M.gguf"
DB_FAISS_PATH = "vectorstore/db_faiss"

# Step 1: Load the LLM using CTransformers (no tokenizer needed)
def load_llm_local():
    return CTransformers(
        model=model_path,
        model_type="llama",  # You can also try "llama2" depending on support
        config={
            "max_new_tokens": 512,
            "temperature": 0.7,
            "top_p": 0.95
        }
    )

# Step 2: Custom prompt template
custom_prompt_template = """
Use the context below to answer the question. If unsure, just say "I don't know".

Context: {context}
Question: {query}

Answer:
"""

def set_custom_prompt(template):
    return PromptTemplate(template=template, input_variables=["context", "query"])

# Step 3: Load FAISS vector store
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.load_local(DB_FAISS_PATH, embeddings=embedding_model, allow_dangerous_deserialization=True)

# Step 4: Create the RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=load_llm_local(),
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={'k': 3}),
    return_source_documents=True,
    input_key="query",
    chain_type_kwargs={'prompt': set_custom_prompt(custom_prompt_template)}
)

# Step 5: Ask user and invoke
user_query = input("Enter your question: ")
response = qa_chain.invoke({"query": user_query})

# Step 6: Print result
print("\nRESULT:\n", response["result"])
print("\nSOURCE DOCUMENTS:")
for doc in response["source_documents"]:
    print("-", doc.metadata.get("source", "Unknown"))


ValueError: Missing some input keys: {'query'}

In [31]:
from langchain.llms import CTransformers
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA, LLMChain
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# 1. Load LLM via CTransformers (GGUF)
def load_llm_local():
    return CTransformers(
        model="C:/Users/chill/llama/llama-2-7b-chat.Q4_K_M.gguf",
        model_type="llama",
        config={
            "max_new_tokens": 512,
            "temperature": 0.7,
            "top_p": 0.95
        }
    )

# 2. Prompt template using "query"
prompt_template = PromptTemplate(
    template="""
Use the context below to answer the question. If unsure, just say "I don't know".

Context: {context}
Question: {query}

Answer:
""",
    input_variables=["context", "query"]
)

# 3. Load vectorstore
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.load_local("vectorstore/db_faiss", embeddings=embedding_model, allow_dangerous_deserialization=True)

# 4. Manually create QA chain
llm = load_llm_local()
llm_chain = LLMChain(llm=llm, prompt=prompt_template)

qa_chain = RetrievalQA(
    retriever=db.as_retriever(search_kwargs={"k": 3}),
    combine_documents_chain=llm_chain,
    return_source_documents=True,
    input_key="query"  # Tell it to expect "query" in input
)

# 5. Run
user_query = input("Enter your question: ")
response = qa_chain.invoke({"query": user_query})

print("\nRESULT:\n", response["result"])
print("\nSOURCE DOCUMENTS:")
for doc in response["source_documents"]:
    print("-", doc.metadata.get("source", "Unknown"))


  llm_chain = LLMChain(llm=llm, prompt=prompt_template)
  qa_chain = RetrievalQA(


AttributeError: 'LLMChain' object has no attribute 'get'

In [32]:
from langchain.llms import CTransformers
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain

# 1. Load LLM
def load_llm_local():
    return CTransformers(
        model="C:/Users/chill/llama/llama-2-7b-chat.Q4_K_M.gguf",
        model_type="llama",
        config={
            "max_new_tokens": 512,
            "temperature": 0.7,
            "top_p": 0.95
        }
    )

# 2. Custom prompt
custom_prompt_template = PromptTemplate(
    template="""
Use the context below to answer the question. If unsure, just say "I don't know".

Context: {context}
Question: {query}

Answer:
""",
    input_variables=["context", "query"]
)

# 3. Load FAISS DB
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.load_local("vectorstore/db_faiss", embeddings=embedding_model, allow_dangerous_deserialization=True)

# 4. Wrap in StuffDocumentsChain
llm = load_llm_local()
stuff_chain = StuffDocumentsChain(
    llm_chain=load_qa_chain(llm=llm, chain_type="stuff", prompt=custom_prompt_template),
    document_variable_name="context"
)

# 5. Create RetrievalQA with custom input_key
qa_chain = RetrievalQA(
    retriever=db.as_retriever(search_kwargs={"k": 3}),
    combine_documents_chain=stuff_chain,
    return_source_documents=True,
    input_key="query"
)

# 6. Run the query
user_query = input("Enter your question: ")
response = qa_chain.invoke({"query": user_query})

# 7. Show results
print("\nRESULT:\n", response["result"])
print("\nSOURCE DOCUMENTS:")
for doc in response["source_documents"]:
    print("-", doc.metadata.get("source", "Unknown"))


stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/docs/how_to/#qa-with-rag
  llm_chain=load_qa_chain(llm=llm, chain_type="stuff", prompt=custom_prompt_template),
  stuff_chain = StuffDocumentsChain(


AttributeError: 'StuffDocumentsChain' object has no attribute 'prompt'

In [34]:
from langchain.llms import CTransformers
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains.llm import LLMChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain

# Step 1: Load LLaMA model with CTransformers
def load_llm_local():
    return CTransformers(
        model="C:/Users/chill/llama/llama-2-7b-chat.Q4_K_M.gguf",
        model_type="llama",
        config={
            "max_new_tokens": 512,
            "temperature": 0.7,
            "top_p": 0.95
        }
    )

# Step 2: Custom prompt
custom_prompt_template = PromptTemplate(
    template="""
Use the context below to answer the question. If unsure, just say "I don't know".

Context: {context}
Question: {input}

Answer:
""",
    input_variables=["context", "input"]
)

# Step 3: Load FAISS vector store
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.load_local("vectorstore/db_faiss", embeddings=embedding_model, allow_dangerous_deserialization=True)

# Step 4: Wrap LLM in a chain using the prompt
llm = load_llm_local()
llm_chain = LLMChain(llm=llm, prompt=custom_prompt_template)
stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="context")

# Step 5: Build RetrievalQA without chain_type_kwargs (since we define combine_documents_chain manually)
qa_chain = RetrievalQA(
    retriever=db.as_retriever(search_kwargs={"k": 3}),
    combine_documents_chain=stuff_chain,
    return_source_documents=True,
    input_key="input"
)

# Step 6: Run the query
user_query = input("Enter your question: ")
response = qa_chain.invoke({"input": user_query})

# Step 7: Display result
print("\nRESULT:\n", response["result"])
print("\nSOURCE DOCUMENTS:")
for doc in response["source_documents"]:
    print("-", doc.metadata.get("source", "Unknown"))


ValueError: Missing some input keys: {'input'}

In [36]:
from langchain.llms import CTransformers
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains.llm import LLMChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain

# Step 1: Load LLaMA model with CTransformers
def load_llm_local():
    return CTransformers(
        model="C:/Users/chill/llama/llama-2-7b-chat.Q4_K_M.gguf",
        model_type="llama",
        config={
            "max_new_tokens": 512,
            "temperature": 0.7,
            "top_p": 0.95
        }
    )

In [37]:
from langchain.prompts import PromptTemplate

custom_prompt_template = PromptTemplate(
    template="""
<s>[INST] <<SYS>>
You are an expert in medical knowledge. Use the following context to answer the question.
Context: {context}
<</SYS>>

{question} [/INST]
""",
    input_variables=["context", "question"]
)

In [38]:
# Step 3: Load FAISS vector store
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.load_local("vectorstore/db_faiss", embeddings=embedding_model, allow_dangerous_deserialization=True)

# Step 4: Wrap LLM in a chain using the prompt
llm = load_llm_local()
llm_chain = LLMChain(llm=llm, prompt=custom_prompt_template)
stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="context")

# Step 5: Build RetrievalQA with input_key="question"
qa_chain = RetrievalQA(
    retriever=db.as_retriever(search_kwargs={"k": 3}),
    combine_documents_chain=stuff_chain,
    return_source_documents=True,
    input_key="question"
)


In [None]:
user_query = input("Enter your question: ")
response = qa_chain.invoke({"question": user_query})

# Step 7: Display result
print("\nRESULT:\n", response["result"])
print("\nSOURCE DOCUMENTS:")
for doc in response["source_documents"]:
    print("-", doc.metadata.get("source", "Unknown"))

In [41]:


from langchain.chains import RetrievalQA
from langchain.llms import CTransformers

# Load the model
llm = CTransformers(
    model="C:/Users/chill/llama/llama-2-7b-chat.Q4_K_M.gguf",
    model_type="llama",
    config={
        "max_new_tokens": 256,  # Reduced to leave room for input
        "temperature": 0.7,
        "top_p": 0.95
    }
)

# Set up retriever with k=1
retriever = db.as_retriever(search_kwargs={"k": 1})

# Create QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",  # Uses StuffDocumentsChain
    retriever=retriever,
    return_source_documents=True,
    input_key="question"
)

# Test it
response = qa_chain({"question": "Your question here"})
print(response["result"])

  response = qa_chain({"question": "Your question here"})


 


In [42]:
from langchain.llms import CTransformers
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains.llm import LLMChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain

# Step 1: Load LLaMA model with CTransformers
def load_llm_local():
    return CTransformers(
        model="C:/Users/chill/llama/llama-2-7b-chat.Q4_K_M.gguf",
        model_type="llama",
        config={
            "max_new_tokens": 256,  # Reduced to avoid context length issues
            "temperature": 0.7,
            "top_p": 0.95
        }
    )

# Step 2: Custom prompt with LLaMA-2 chat format
custom_prompt_template = PromptTemplate(
    template="""
<s>[INST] <<SYS>>
You are an expert in medical knowledge. Use the following context to answer the question.
Context: {context}
<</SYS>>

{question} [/INST]
""",
    input_variables=["context", "question"]
)

# Step 3: Load FAISS vector store
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.load_local("vectorstore/db_faiss", embeddings=embedding_model, allow_dangerous_deserialization=True)

# Step 4: Wrap LLM in a chain
llm = load_llm_local()
llm_chain = LLMChain(llm=llm, prompt=custom_prompt_template)
stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="context")

# Step 5: Build RetrievalQA with k=1 to avoid token limit issues
retriever = db.as_retriever(search_kwargs={"k": 1})  # Fetch only 1 document
qa_chain = RetrievalQA(
    retriever=retriever,
    combine_documents_chain=stuff_chain,
    return_source_documents=True,
    input_key="question"
)

# Step 6: Run the query using invoke
user_query = input("Enter your question: ")
response = qa_chain.invoke({"question": user_query})

# Step 7: Print the answer and source documents
print("\nRESULT:\n", response["result"])
print("\nSOURCE DOCUMENTS:")
for doc in response["source_documents"]:
    print("-", doc.metadata.get("source", "Unknown"))


RESULT:
 A wart is a small, benign growth caused by a viral infection of the skin or mucous membrane. The virus infects the surface layer of the skin or mucous membrane and causes the growth of abnormal cells, resulting in a wart. Warts are not cancerous, but they can be unsightly and may cause discomfort or embarrassment. They are caused by members of the human papilloma virus (HPV) family of viruses.

SOURCE DOCUMENTS:
- data\MedicalBook.pdf


In [43]:
user_query = input("Enter your question: ")
response = qa_chain.invoke({"question": user_query})

# Step 7: Print the answer and source documents
print("\nRESULT:\n", response["result"])
print("\nSOURCE DOCUMENTS:")
for doc in response["source_documents"]:
    print("-", doc.metadata.get("source", "Unknown"))


RESULT:
 In the context of cellular development, cancer refers to a process where abnormal cells within the body begin to grow out of control, acquire the ability to invade nearby structures, and travel through the bloodstream to invade distant structures. This uncontrolled growth and invasion of surrounding tissues is characteristic of cancer.
Cancer can arise from any type of cell in the body, including epithelial cells (which form the lining of organs and glands), connective tissue cells (such as bone or muscle), and stem cells (which have the ability to differentiate into multiple cell types). When cancer cells acquire mutations that allow them to grow and divide uncontrollably, they can form a tumor.
The term "malignant" refers specifically to cancer or cancer cells. Malignant tumors are those that are invasive and can spread to other parts of the body, while benign tumors are non-invasive and do not spread.
Sarcoma is a type of cancer that originates from connective tissue such 

In [45]:
from langchain.llms import CTransformers
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains.llm import LLMChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain

# Step 1: Load LLaMA model with optimized settings
def load_llm_local():
    return CTransformers(
        model="C:/Users/chill/llama/llama-2-7b-chat.Q4_K_M.gguf",
        model_type="llama",
        config={
            "max_new_tokens": 100,  # Reduced for speed
            "temperature": 0.7,
            "top_p": 0.95
        }
    )

# Step 2: Simplified prompt
custom_prompt_template = PromptTemplate(
    template="""
<s>[INST] Use the context to answer: {context}
Question: {question} [/INST]
""",
    input_variables=["context", "question"]
)

# Step 3: Load FAISS vector store
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.load_local("vectorstore/db_faiss", embeddings=embedding_model, allow_dangerous_deserialization=True)

# Step 4: Wrap LLM in a chain
llm = load_llm_local()
llm_chain = LLMChain(llm=llm, prompt=custom_prompt_template)
stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="context")

# Step 5: Build RetrievalQA with k=1
retriever = db.as_retriever(search_kwargs={"k": 1})
qa_chain = RetrievalQA(
    retriever=retriever,
    combine_documents_chain=stuff_chain,
    return_source_documents=True,
    input_key="question"
)

# Step 6: Run the query
user_query = input("Enter your question: ")
response = qa_chain.invoke({"question": user_query})

# Step 7: Print the answer and source documents
print("\nRESULT:\n", response["result"])
print("\nSOURCE DOCUMENTS:")
for doc in response["source_documents"]:
    print("-", doc.metadata.get("source", "Unknown"))


RESULT:
 There are several methods for removing warts, including:
1. Numbing the skin and then scraping off the wart: This is a common method for removing small to medium-sized warts. The area is numbed with a local anesthetic, and then the wart is scrapped off with a scalpel or cryotherapy tool.
2. Electrocautery (electric burning): This method involves using an electric needle to burn

SOURCE DOCUMENTS:
- data\MedicalBook.pdf
