In [2]:
import os
os.chdir("../")

In [3]:
%pwd

'c:\\Users\\abhay\\Downloads\\Medical Chatbot'

In [4]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
! pip install --upgrade sentence-transformers




In [6]:
from pinecone import Pinecone

In [7]:
! pip install langchain-groq




In [11]:
# -----------------------
# 0. Import necessary modules
# -----------------------
import os
from dotenv import load_dotenv

load_dotenv()

GROQ_API_KEY = os.getenv("GROQ_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")

# -----------------------
# 1. Load and Split PDF Data
# -----------------------
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

def load_pdf_file(data_path):
    """Load PDF files from specified directory"""
    loader = DirectoryLoader(data_path, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

def text_split(extracted_data):
    """Split documents into chunks"""
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

# Load all PDFs from current directory (Data.pdf and Data2.pdf)
print("Loading PDF files...")
extracted_data = load_pdf_file(data_path="./")
print(f"Loaded {len(extracted_data)} documents")

text_chunks = text_split(extracted_data)
print("Length of Text Chunks:", len(text_chunks))

# -----------------------
# 2. Download HuggingFace Embeddings
# -----------------------
from langchain_huggingface import HuggingFaceEmbeddings

def download_hugging_face_embeddings():
    """Initialize HuggingFace embeddings model"""
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

print("Loading embeddings model...")
embeddings = download_hugging_face_embeddings()

query_result = embeddings.embed_query("Hello world")
print("Embedding vector length:", len(query_result))

# -----------------------
# 3. Set up Pinecone
# -----------------------
from pinecone import Pinecone, ServerlessSpec
import time

pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = "mkc"

# Delete existing index to start fresh with both books
print("Checking for existing index...")
if index_name in [index.name for index in pc.list_indexes()]:
    print(f"Deleting existing index: {index_name}")
    pc.delete_index(index_name)
    print("Waiting for deletion to complete...")
    time.sleep(5)  # Wait for deletion to complete

# Create new index
print("Creating new index...")
if index_name not in [index.name for index in pc.list_indexes()]:
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        )
    )
    print("Waiting for index to be ready...")
    time.sleep(10)  # Wait for index to be ready

# -----------------------
# 4. Upload Embeddings to Pinecone
# -----------------------
from langchain_pinecone import PineconeVectorStore

print("Uploading embeddings to Pinecone...")
docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings,
)

print("Upload complete!")

# -----------------------
# 5. Set up Retriever
# -----------------------
print("Setting up retriever...")
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 3})

# Test retrieval
print("\nTesting retrieval...")
retrieved_docs = retriever.invoke("What is Acne?")
print(f"Retrieved {len(retrieved_docs)} documents")

# -----------------------
# 6. Set up Groq LLM
# -----------------------
from langchain_groq import ChatGroq

print("Setting up LLM...")
llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    temperature=0.2,
    max_tokens=1024,
    api_key=GROQ_API_KEY
)

# -----------------------
# 7. Create RAG Chain
# -----------------------
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

prompt_template = """Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}

Question: {question}

Answer:"""

PROMPT = PromptTemplate(
    template=prompt_template, 
    input_variables=["context", "question"]
)

print("Creating RAG chain...")
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT}
)

# -----------------------
# 8. Query the System
# -----------------------
def query_rag_system(question):
    """Query the RAG system with a question"""
    result = qa_chain.invoke({"query": question})
    return result

# Example usage
print("\n" + "="*50)
print("RAG System Ready!")
print("="*50)
print("\nTesting with sample question...")
result = query_rag_system("What is Acne?")
print("\nQuestion: What is Acne?")
print("\nAnswer:", result['result'])
print("\n" + "="*50)
print("You can now query from both Data.pdf and Data2.pdf!")
print("="*50)

python-dotenv could not parse statement starting at line 4
python-dotenv could not parse statement starting at line 6
python-dotenv could not parse statement starting at line 7


Loading PDF files...
Loaded 4843 documents
Length of Text Chunks: 43035
Loading embeddings model...
Embedding vector length: 384
Checking for existing index...
Deleting existing index: mkc
Waiting for deletion to complete...
Creating new index...
Waiting for index to be ready...
Uploading embeddings to Pinecone...
Upload complete!
Setting up retriever...

Testing retrieval...
Retrieved 3 documents
Setting up LLM...
Creating RAG chain...

RAG System Ready!

Testing with sample question...

Question: What is Acne?

Answer: Acne is a skin disorder in which the sebaceous glands become inflamed, characterized by common blemishes such as blackheads, whiteheads, and yellowheads (pustules), often associated with high hormone levels, clogged pores, and increased production of sebum, the skin's oily secretion.

You can now query from both Data.pdf and Data2.pdf!


In [12]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    temperature=0.4,
    max_tokens=500,
    model="llama-3.3-70b-versatile",
    api_key=GROQ_API_KEY
)

# -----------------------
# 7. Create RAG Chain
# -----------------------
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise.\n\n{context}"
)

#done
prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}"),
])

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

# -----------------------
# 8. Test the RAG System
# -----------------------
response = rag_chain.invoke({"input":"Lately, I've been feeling extremely tired, even after a full night's sleep. I also noticed that my heart races sometimes and I get short of breath just from climbing a few stairs. I've had some swelling in my ankles too. I thought it was just stress, but it’s been going on for weeks now."})
print("Response 1:", response["answer"])



Response 1: It's possible that you're experiencing symptoms related to sleep apnea, which can cause fatigue, heart problems, and shortness of breath. The swelling in your ankles could be a sign of cardiovascular issues, such as high blood pressure or left ventricular failure, which can be associated with sleep apnea. I recommend consulting a doctor to determine the underlying cause of your symptoms and receive proper diagnosis and treatment.


In [13]:
response = rag_chain.invoke({"input": "I have asthma and sometimes struggle with shortness of breath. What treatments are recommended, and what precautions should I take to manage my condition"})
print("Response 1:", response["answer"])

Response 1: For asthma, treatments include bronchodilators to widen narrowed airways, and newer medications taken daily to prevent asthma attacks. To manage your condition, it's recommended to minimize exposure to allergens and avoid asthma and allergy triggers. Additionally, urgent measures to control asthma attacks and ongoing treatment to prevent attacks are equally important to prevent respiratory failure.


In [14]:
response = rag_chain.invoke({"input": "i am suffering from coldaswell as high pain in body suggest me some   medicine "})
print("Response 1:", response["answer"])

Response 1: For a cold and body pain, you can try over-the-counter medications such as aspirin or nonsteroidal anti-inflammatory drugs (NSAIDs) like ibuprofen to relieve headache and muscle pain. Additionally, decongestants can help relieve stuffiness or a runny nose. However, it's always best to consult a doctor before taking any medication.


In [15]:
response = rag_chain.invoke({"input": " give me sybptom of maleria"})
print("Response 1:", response["answer"])

Response 1: I don't know the symptoms of malaria from the given context.
