In [1]:
print("hello world")

hello world


In [2]:
%pwd

'/home/vicron/Desktop/Healthcare/Healthcare-chatbot.-1/research'

In [2]:
%pwd

'/home/vicron/Desktop/Healthcare/Healthcare-chatbot.-1/research'

In [3]:
import os
os.chdir("../")


In [4]:
print("Current directory:", os.getcwd())


Current directory: /home/vicron/Desktop/Healthcare/Healthcare-chatbot.-1


In [5]:
%pip install -U langchain-huggingface sentence-transformers

Note: you may need to restart the kernel to use updated packages.


In [6]:
#imports
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings

In [7]:
#Check data

data_dir = "Data/"
if not os.path.exists(data_dir):
    raise FileNotFoundError(f"Directory '{data_dir}' does not exist. Please create it and add PDF files.")

In [8]:
#Load Data

def load_pdf(data):
    loader = DirectoryLoader(
        data,
        glob="*.pdf",
        loader_cls=PyPDFLoader
    )
    documents = loader.load()
    return documents

In [9]:
extracted_data = load_pdf(data=data_dir)

In [10]:
#extracted_data

In [19]:
def split_text(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=30)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [20]:
text_chunks =  split_text(extracted_data)
print(f"Length of text chunks : {len(text_chunks)}")

Length of text chunks : 3010


In [21]:
#downloading  Huggingface embeddings
def download_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings


In [22]:
embeddings = download_embeddings()

  from .autonotebook import tqdm as notebook_tqdm


In [23]:
query_result = embeddings.embed_query("Hello Victor")
print ("length", len(query_result))

length 384


In [24]:
#query_result

In [47]:
from dotenv import load_dotenv
load_dotenv()

True

In [48]:
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY')

In [27]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = "healthcare-chatbot"


pc.create_index(
    name=index_name,
    dimension=384,  
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    )
)



{
    "name": "healthcare-chatbot",
    "metric": "cosine",
    "host": "healthcare-chatbot-ccj89ta.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 384,
    "deletion_protection": "disabled",
    "tags": null
}

In [49]:
import os
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["GEMINI_API_KEY"] = GEMINI_API_KEY


In [29]:
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings,  
)

In [30]:
#Load Existing Index
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_existing_index(
    index_name = index_name,
    embedding = embeddings
)


In [32]:
docsearch

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x7f3778fe4590>

In [33]:
retriever = docsearch.as_retriever(search_type="similarity",search_kwargs={"k":3})

In [34]:
retrieved_docs = retriever.invoke("What is acne")

In [35]:
retrieved_docs

[Document(id='9216618c-0372-4686-9071-a32388054646', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 40.0, 'page_label': '41', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': 'Data/Gale Encyclopedia of Medicine Vol. 1 (A-B).pdf', 'total_pages': 637.0}, page_content='Cliffs, NJ: Prentice Hall, 1995.\nGoldstein, Sanford M., and Richard B. Odom. “Skin &\nAppendages: Pustular Disorders.” In Current Medical\nDiagnosis and Treatment, 1996.35th ed. Ed. Stephen\nMcPhee, et al. Stamford: Appleton & Lange, 1995.\nKaptchuk, Ted J., Z’ev Rosenberg, and K’an Herb Co., Inc.\nK’an Herbals: Formulas by Ted Kaptchuk, O.M.D.San\nFrancisco: Andrew Miller, 1996.\nPERIODICALS\n“Adult Acne.”Harvard Women’s Health Watch(Mar. 1995): 4-\n5.\nBergfeld, Wilma F. “The Evaluation and Management of Acne:\nEconomic Considerations.” Journal of the American\nAcademy of Dermatology 32 (1995): S52-6.\nBillings, Laura. “Getting Clear.”Health Mag

In [50]:
from langchain.chat_models import ChatOpenAI


llm = ChatOpenAI(
    base_url="https://openrouter.ai/api/v1",
    model="deepseek/deepseek-r1-0528-qwen3-8b",
    temperature=0.3,
    max_tokens=512,
    openai_api_key=os.environ["GEMINI_API_KEY"]
)



In [109]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


system_prompt = (
    "You are a knowledgeable and safe AI assistant for medical question-answering.\n"
    "Use the provided context below as your primary source of truth.\n"
    "If the context answers the user's question directly, use it.\n"
    "If the context is missing or incomplete, you may use general, widely accepted medical knowledge to help answer simple or common questions.\n"
    "Be honest about uncertainty, and never invent treatments or diagnoses.\n"
    "Do not mention the context or say phrases like 'Based on the context' or 'According to the context'. Just answer plainly.\n"
    "If the question requires specific facts not found in the context or commonly known, reply: \"I don't know.\n\n"
    "{context}"
)


prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}")
])


In [110]:
question_answer_chain = create_stuff_documents_chain(llm,prompt)
rag_chain = create_retrieval_chain(retriever,question_answer_chain)

In [101]:
response = rag_chain.invoke({"input":"what is Acne"})
print(response["answer"])


Acne is a common skin disease characterized by pimples on the face, chest, and back. It occurs when the pores of the skin become clogged with oil, dead skin cells, and bacteria.


In [102]:
response = rag_chain.invoke({"input":"what is stats"})
print(response["answer"])


Statistics is a branch of mathematics dealing with the collection, analysis, interpretation, presentation, and organization of data. In medicine, statistics is used to analyze clinical trial data, determine the effectiveness of treatments, and understand disease patterns in populations. It helps healthcare professionals make evidence-based decisions and assess the reliability of medical findings.


In [113]:
response = rag_chain.invoke({"input":"I have fever,pain in my chest and headache,what should i do?,should i visit a medical doctor?"})
print(response["answer"])

Yes, you should visit a medical doctor promptly. Experiencing fever, chest pain, and headache warrants a professional evaluation to determine the cause and receive appropriate treatment.


In [104]:
response = rag_chain.invoke({"input":"what are the symptoms of typhoid"})
print(response["answer"])

Typhoid fever typically presents with symptoms such as high fever, abdominal pain, headache, and sometimes a characteristic rash. Other symptoms can include weakness, loss of appetite, and in some cases, diarrhea or constipation. If you suspect typhoid fever, it's important to seek medical attention for proper diagnosis and treatment.
