In [63]:
import os
from dotenv import load_dotenv


load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
# PINECONE

In [64]:
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

In [65]:
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [66]:
def load_pdf_files(data):
    loader =  DirectoryLoader(
        data,
        glob="*.pdf",
        loader_cls=PyPDFLoader  
    )

    documents = loader.load()
    return documents



In [121]:
extracted_data = load_pdf_files("data/")

In [None]:
extracted_data

In [69]:
len(extracted_data)

759

In [70]:
from typing import List
from langchain.schema import Document

In [71]:
def fetch_page_content(docs: List[Document]) -> List[Document]:
    this_docs : List[Document] = []
    for doc in docs:
        src = doc.metadata.get("source")
        this_docs.append(
            Document(
                page_content=doc.page_content,
                metadata={"source":src}
            )
        )
    return this_docs
    

In [72]:
minimal_docs = fetch_page_content(extracted_data)

In [None]:
minimal_docs

In [74]:
# chunking Operation ::
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [75]:
def text_split(minimal_docs):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 500,
        chunk_overlap = 20,
        length_function = len
    )
    texts_chunk = text_splitter.split_documents(minimal_docs)
    return texts_chunk

In [76]:
texts_chunk = text_split(minimal_docs)
print(f"Number of chunks : {len(texts_chunk)}")

Number of chunks : 6973


In [77]:
# embedding
from langchain_huggingface import HuggingFaceEmbeddings


In [78]:
def load_embedding_model():
    embedding = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"

    )
    return embedding

In [79]:
embedding = load_embedding_model()

In [80]:
vector = embedding.embed_query("hello world")
len(vector) 

384

In [81]:
from pinecone import Pinecone
pinecone_api_key = PINECONE_API_KEY
pc = Pinecone(api_key=pinecone_api_key)

In [82]:
from pinecone import ServerlessSpec
 
index_name = "medical-chatbot"
if not pc.has_index(index_name):
    pc.create_index(
    name=index_name,
    dimension=384,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    )
)
    
index = pc.has_index(index_name)



In [None]:
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents=texts_chunk,
    embedding=embedding,
    index_name=index_name
)

In [None]:
# existing index :::
docsearch = PineconeVectorStore.from_existing_index(
    embedding=embedding,
    index_name=index_name
)

In [None]:
retriever =  docsearch.as_retriever(search_type="similarity",search_kwargs={"k":3})

In [None]:
retrieved_docs =  retriever.invoke("What is acne?")
retrieved_docs

[Document(id='89d2f4cc-a4ab-4da1-8bfa-fd1c824fa689', metadata={'source': 'data/The_GALE_ENCYCLOPEDIA_of_MEDICINE_SECOND.pdf'}, page_content='Corticosteriod —A group of synthetic hormones\nthat are used to prevent or reduce inflammation.\nToxic effects may result from rapid withdrawal after\nprolonged use or from continued use of large doses.\nPatch test—A skin test that is done to identify aller-\ngens. A suspected substance is applied to the skin.\nAfter 24–48 hours, if the area is red and swollen,\nthe test is positive for that substance. If no reaction\noccurs, another substance is applied. This is con-'),
 Document(id='408f56b0-cd6b-4696-89d9-822dd9e3406a', metadata={'source': 'data/The_GALE_ENCYCLOPEDIA_of_MEDICINE_SECOND.pdf'}, page_content='Corticosteroids —A group of anti-inflammatory\nsubstances often used to treat skin conditions.\nImmune response—The protective reaction by the\nimmune system against foreign antigens (sub-\nstances that the body perceives as potentially dan-\

In [84]:
from langchain_groq import ChatGroq
# llm = Groq(api_key=os.environ["GROQ_API_KEY"])
llm = ChatGroq(
    model="llama-3.1-8b-instant", 
    temperature=0
   
)




In [98]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate
)

In [100]:
system_prompt = (
    "You are a medical assistant for question-answering tasks. "
    "Use the following pieces of retrieved content to answer. "
    "If you don't know the answer, say that you don't know. "
    "Use a maximum of three sentences and keep the answer concise.\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        SystemMessagePromptTemplate.from_template(system_prompt),
        HumanMessagePromptTemplate.from_template("{input}")
    ]
)



In [102]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
reg_chain = create_retrieval_chain(retriever,question_answer_chain)

In [119]:
response = reg_chain.invoke({"input" : "what is the treatment of dizziness?"})

In [120]:
print(response['answer'])

The treatment of dizziness is determined by the underlying cause, and may include bed rest for colds or influenza, medication to control blood pressure or treat arteriosclerosis, or physical therapy for persistent dizziness. Homeopathic therapies, such as osteopathic adjustments and acupuncture, may also be effective. Additionally, nutritionists may provide guidance on dietary supplements and food choices.
