In [1]:
print("Hello World")

Hello World


In [2]:
%pwd

'd:\\SL\\Project Files\\Chatbot\\healthcare-chatbot\\research'

In [3]:
import os
os.chdir("../")

In [4]:
%pwd

'd:\\SL\\Project Files\\Chatbot\\healthcare-chatbot'

In [5]:
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [6]:
# Extract data from PDF

def load_pdf_file(data_path):
    loader = DirectoryLoader(data_path,
                             glob="*.pdf",
                             loader_cls=PyPDFLoader)
    return loader.load()

In [7]:
extracted_data = load_pdf_file("Data/")

In [8]:
#extracted_data

In [9]:
# Split the data into text chunks

def text_split(data):
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    return splitter.split_documents(data)

In [10]:
text_chunks = text_split(extracted_data)
print(len(text_chunks))

5860


In [11]:
from langchain.embeddings import HuggingFaceEmbeddings

#Download the embeddings from HuggingFace
def download_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [12]:
embeddings = download_embeddings()

  embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
  from .autonotebook import tqdm as notebook_tqdm


In [13]:
query_result = embeddings.embed_query("Hello World")
print(len(query_result))

384


In [14]:
from dotenv import load_dotenv
load_dotenv()

PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")

In [15]:
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

In [16]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = "healthcarebot"

pc.create_index(
    name=index_name,
    dimension=384,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    )

)

PineconeApiException: (409)
Reason: Conflict
HTTP response headers: HTTPHeaderDict({'content-type': 'text/plain; charset=utf-8', 'access-control-allow-origin': '*', 'vary': 'origin,access-control-request-method,access-control-request-headers', 'access-control-expose-headers': '*', 'x-pinecone-api-version': '2024-04', 'X-Cloud-Trace-Context': 'a1a60ed66c82e06e265ed117f0b3df37', 'Date': 'Sat, 04 Jan 2025 10:48:34 GMT', 'Server': 'Google Frontend', 'Content-Length': '85', 'Via': '1.1 google', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000'})
HTTP response body: {"error":{"code":"ALREADY_EXISTS","message":"Resource  already exists"},"status":409}


In [17]:
# Embed each chunk and upsert the embeddings into your Pinecone
from langchain.vectorstores import Pinecone as PineconeVStore

docsearch = PineconeVStore.from_documents(
    documents = text_chunks,
    index_name = index_name,
    embedding = embeddings
)

In [18]:
# Load Existing Pinecone Index
docsearch = PineconeVStore.from_existing_index(
    index_name = index_name,
    embedding = embeddings)

In [19]:
docsearch

<langchain_community.vectorstores.pinecone.Pinecone at 0x24c195a7ed0>

In [20]:
retriever = docsearch.as_retriever(search_type = "similarity",
                                   search_kwargs = {"k": 3})

In [21]:
retrieved_docs = retriever.invoke("What is Dandruff?")
retrieved_docs

[Document(metadata={'page': 139.0, 'source': 'Data\\Gale Encyclopedia of Medicine.pdf'}, page_content='its own tissues.\nChemotherapy—The treatment of diseases, usual-\nly cancer, with drugs (chemicals).\nHair follicles—Tiny organs in the skin, each one of\nwhich grows a single hair.\nLupus erythematosus —An autoimmune disease\nthat can damage skin, joints, kidneys, and other\norgans.\nRingworm—A fungal infection of the skin, usually\nknown as tinea corporis.\nSystemic—Affecting all or most parts of the body.\ntime, minoxidil produces satisfactory results in about one'),
 Document(metadata={'page': 139.0, 'source': 'Data\\Gale Encyclopedia of Medicine.pdf'}, page_content='its own tissues.\nChemotherapy—The treatment of diseases, usual-\nly cancer, with drugs (chemicals).\nHair follicles—Tiny organs in the skin, each one of\nwhich grows a single hair.\nLupus erythematosus —An autoimmune disease\nthat can damage skin, joints, kidneys, and other\norgans.\nRingworm—A fungal infection of th

Groq API

In [22]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    temperature=1,
    max_retries=2,
    
)

In [23]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
"You are an assistant for question-answering tasks. "
"Use the following pieces of retrieved context to answer "
"the question. If you don't know the answer, say that you "
"don't know. Use three sentences maximum and keep the "
"answer concise."
"\n\n"
"{context}")

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}")
    ]
)

In [24]:
question_answering_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answering_chain)

In [25]:
response = rag_chain.invoke({"input":"What is Diarrhea and remedies?"})
print(response["answer"])   

Diarrhea is not explicitly defined in the given context, but it is mentioned as a condition that can be treated with antidiarrheal drugs and by replacing lost fluids to prevent dehydration. The context also mentions that stopping the antibiotic that caused the disease can help treat antibiotic-associated colitis and subsequently diarrhea. Additionally, encouraging the individual to replace lost fluids is a method to treat diarrhea.


OpenAI API - Needs Credits

In [61]:
from langchain_openai import OpenAI
llm = OpenAI(temperature=0.5,max_tokens=500)

In [1]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
"You are an assistant for question-answering tasks. "
"Use the following pieces of retrieved context to answer "
"the question. If you don't know the answer, say that you "
"don't know. Use three sentences maximum and keep the "
"answer concise."
"\n\n"
"{context}")

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}")
    ]
)

In [73]:
question_answering_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answering_chain)

In [None]:
response = rag_chain.invoke({"input":"What is Dandruff?"})
response["answer"]