In [7]:
# Importing the required libraries
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings

In [31]:
from dotenv import load_dotenv
import os

load_dotenv(dotenv_path=r"C:\Users\ADMIN\Documents\venv\.env")
os.environ['PINECONE_API_KEY']=os.getenv("PINECONE_API_KEY")
os.environ['OPENAI_API_KEY']=os.getenv("OPENAI_API_KEY")

In [9]:
# Extract Data from the PDF File

def load_pdf_file(data):
    loader = DirectoryLoader(
        data,
        glob="*.pdf",
        loader_cls=PyPDFLoader
    )
    
    documents = loader.load()

    return documents

In [10]:
extracted_data = load_pdf_file(data="../Data/")

In [11]:
# extracted_data

In [12]:
# Split the Data into Text Chunks

def split_text(data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(data)
    return text_chunks

In [13]:
test_chunk = split_text(extracted_data)
print("Length of the text chunk: ", len(test_chunk))

Length of the text chunk:  5860


In [14]:
# Download the embeddings from the Hugging Face Model Hub

def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [15]:
embeddings = download_hugging_face_embeddings()

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [16]:
query_result = embeddings.embed_query("Hello World")
print("Length", len(query_result))

Length 384


'pcsk_6b72e4_9CG27rVTp8arKG1HzGnPF1b2AVm9iVVvrQdBXXYXhLk4tydtBESGQh8L5ZPfdQT'

In [None]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
import os

pc = Pinecone(api_key="pcsk_6b72e4_9CG27rVTp8arKG1HzGnPF1b2AVm9iVVvrQdBXXYXhLk4tydtBESGQh8L5ZPfdQT")

index_name = "medical-chatbot"

# pc.create_index(
#     name=index_name, 
#     metric="cosine", 
#     dimension=384,
#     spec=ServerlessSpec(
#         cloud="aws",
#         region="us-west-1",
#     )
# )
pc.create_index(
    name=index_name,
    dimension=384, # Replace with your model dimensions
    metric="cosine", # Replace with your model metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

In [21]:
# Embed each chunk and upset the embedding into your Pinecone index.

from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents=test_chunk,
    index_name=index_name,
    embedding=embeddings,
)

In [22]:
# Load Existing Index

from langchain_pinecone import PineconeVectorStore

# Embed each chunk and upset the embedding into your Pinecone index.
docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings,
)

In [23]:
docsearch

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x1d5bbd41df0>

In [25]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 3})

In [27]:
retrieved_docs = retriever.invoke("What is Acne?")

In [28]:
retrieved_docs

[Document(id='76da4a47-dc2a-4d13-99e6-35480337f920', metadata={'page': 39.0, 'source': '..\\Data\\Medical_book.pdf'}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 226\nAcne\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 26'),
 Document(id='407dfdd1-0aa7-4aea-ba1e-1d19b16884da', metadata={'page': 38.0, 'source': '..\\Data\\Medical_book.pdf'}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 2 25\nAcne\nAcne vulgaris affecting a woman’s face. Acne is the general\nname given to a skin disorder in which the sebaceous\nglands become inflamed.(Photograph by Biophoto Associ-\nates, Photo Researchers, Inc. Reproduced by permission.)\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 25'),
 Document(id='8f9d2c0c-ad32-469f-9870-bcaaf77fdce2', metadata={'page': 37.0, 'source': '..\\Data\\Medical_book.pdf'}, page_content='Acidosis see Respiratory acidosis; Renal\ntubular acidosis; Metabolic acidosis\nAcne\nDefinition\nAcne is a common skin disease characterized by\npimples on the face, chest, and back. It

In [32]:
from langchain_openai import OpenAI
llm = OpenAI(temperature=0.4, max_tokens=500)

In [35]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),  # Corrected: changed "System" -> "system"
        ("user", "{input}"),        # Changed role to "user"
    ]
)


In [36]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [38]:
response = rag_chain.invoke({"input": "What is Acne?"})
print(response['answer'])



Acne is a common skin disease that causes pimples on the face, chest, and back. It occurs when the pores of the skin become clogged with oil, dead skin cells, and bacteria. Acne vulgaris is the most common form of acne and affects approximately 17 million people in the United States.


In [40]:
response = rag_chain.invoke({"input": "What is Acromegaly and gigantism?"})
print(response['answer'])



Acromegaly and gigantism are disorders caused by the abnormal release of a chemical from the pituitary gland in the brain, leading to increased growth in bone and soft tissue. This can result in a variety of disturbances throughout the body, including unusual height. The disorder is relatively rare, affecting both men and women, and is often not diagnosed until middle age due to the gradual onset of symptoms.
