In [48]:
print("ALLAH")

ALLAH


In [49]:
import os
print("Current directory:", os.getcwd())

Current directory: d:\Medical Chatbot\Medical-Chatbot-Generative-AI


In [50]:
print("Working directory:", os.getcwd())

Working directory: d:\Medical Chatbot\Medical-Chatbot-Generative-AI


In [51]:
import langchain
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from dotenv import load_dotenv
from pinecone import Pinecone, ServerlessSpec
import requests
import json

In [53]:
def load_pdf_file(data):
    loader = DirectoryLoader(data,
                            glob="*.pdf",
                            loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

extracted_data = load_pdf_file(data='Data/')
print(f"Loaded {len(extracted_data)} documents")

Loaded 4505 documents


In [54]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

text_chunks = text_split(extracted_data)
print("Length of Text Chunks:", len(text_chunks))

Length of Text Chunks: 40000


In [55]:
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

embeddings = download_hugging_face_embeddings()
query_result = embeddings.embed_query("Hello world")
print("Embedding Length:", len(query_result))

Embedding Length: 384


In [71]:
load_dotenv()
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
DEEPSEEK_API_KEY = os.environ.get('DEEPSEEK_API_KEY')

In [57]:
pc = Pinecone(api_key=PINECONE_API_KEY)
index_name = "medical-chatbot"

# Create index (run only once)
try:
    pc.create_index(
        name=index_name,
        dimension=384, 
        metric="cosine", 
        spec=ServerlessSpec(
            cloud="aws", 
            region="us-east-1"
        ) 
    )
    print(f"Created index: {index_name}")
except Exception as e:
    print(f"Index might already exist: {e}")

Index might already exist: (409)
Reason: Conflict
HTTP response headers: HTTPHeaderDict({'content-type': 'text/plain; charset=utf-8', 'access-control-allow-origin': '*', 'vary': 'origin,access-control-request-method,access-control-request-headers', 'access-control-expose-headers': '*', 'x-pinecone-api-version': '2025-01', 'x-cloud-trace-context': '6a48ceeb356dd04c31ac4e7dc6eee131', 'date': 'Sun, 25 May 2025 18:25:35 GMT', 'server': 'Google Frontend', 'Content-Length': '85', 'Via': '1.1 google', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000'})
HTTP response body: {"error":{"code":"ALREADY_EXISTS","message":"Resource  already exists"},"status":409}



In [72]:
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["DEEPSEEK_API_KEY"] = DEEPSEEK_API_KEY

In [73]:
class DeepSeekLLM:
    def __init__(self, api_key, model="deepseek-chat"):
        self.api_key = api_key
        self.model = model
        self.base_url = "https://api.deepseek.com/v1/chat/completions"
    
    def invoke(self, prompt):
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json"
        }
        
        # Handle different prompt formats
        if hasattr(prompt, 'format_messages'):
            # If it's a ChatPromptTemplate
            messages = prompt.format_messages()
            formatted_messages = []
            for msg in messages:
                if hasattr(msg, 'content') and hasattr(msg, 'type'):
                    role = "user" if msg.type == "human" else "system" if msg.type == "system" else "assistant"
                    formatted_messages.append({"role": role, "content": msg.content})
        elif isinstance(prompt, str):
            # If it's a simple string
            formatted_messages = [{"role": "user", "content": prompt}]
        else:
            # If it's already formatted messages
            formatted_messages = prompt
        
        data = {
            "model": self.model,
            "messages": formatted_messages,
            "temperature": 0.7,
            "max_tokens": 1000
        }
        
        try:
            response = requests.post(self.base_url, headers=headers, json=data)
            response.raise_for_status()
            result = response.json()
            return DeepSeekResponse(result['choices'][0]['message']['content'])
        except Exception as e:
            print(f"Error calling DeepSeek API: {e}")
            return DeepSeekResponse("I apologize, but I'm unable to process your request at the moment.")

class DeepSeekResponse:
    def __init__(self, content):
        self.content = content

In [60]:
llm = DeepSeekLLM(api_key=DEEPSEEK_API_KEY)

In [61]:
try:
    # Try to load existing index first
    docsearch = PineconeVectorStore.from_existing_index(
        index_name=index_name,
        embedding=embeddings
    )
    print("Loaded existing vector store")
except:
    # If no existing index, create new one
    docsearch = PineconeVectorStore.from_documents(
        documents=text_chunks,
        index_name=index_name,
        embedding=embeddings, 
    )
    print("Created new vector store")

print("Vector store ready:", docsearch)

Loaded existing vector store
Vector store ready: <langchain_pinecone.vectorstores.PineconeVectorStore object at 0x000002075C8EFB50>


In [62]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 3})
retrieved_docs = retriever.invoke("What is Acne?")
print(f"Retrieved {len(retrieved_docs)} documents for test query")

Retrieved 3 documents for test query


In [63]:
system_prompt = (
    "You are a medical assistant chatbot trained on medical literature. "
    "Use the following pieces of retrieved context to answer medical questions accurately. "
    "If you don't know the answer based on the provided context, say that you "
    "don't have enough information in the medical literature provided. "
    "Always provide accurate, helpful medical information but remind users to "
    "consult healthcare professionals for personal medical advice. "
    "Keep answers clear and informative."
    "\n\n"
    "Context: {context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

# Custom document chain for DeepSeek
def create_deepseek_stuff_documents_chain(llm, prompt):
    def format_docs(docs):
        return "\n\n".join([doc.page_content for doc in docs])
    
    def chain_func(inputs):
        context = format_docs(inputs["context"])
        formatted_prompt = prompt.format_messages(context=context, input=inputs["input"])
        
        # Convert to simple format for DeepSeek
        messages = []
        for msg in formatted_prompt:
            role = "user" if msg.type == "human" else "system"
            messages.append({"role": role, "content": msg.content})
        
        response = llm.invoke(messages)
        return response.content
    
    return chain_func

question_answer_chain = create_deepseek_stuff_documents_chain(llm, prompt)

# Custom retrieval chain
def create_deepseek_retrieval_chain(retriever, combine_docs_chain):
    def chain_func(inputs):
        # Retrieve relevant documents
        docs = retriever.invoke(inputs["input"])
        
        # Combine documents and generate answer
        answer = combine_docs_chain({"context": docs, "input": inputs["input"]})
        
        return {
            "input": inputs["input"],
            "context": docs,
            "answer": answer
        }
    
    return chain_func

rag_chain = create_deepseek_retrieval_chain(retriever, question_answer_chain)

In [70]:
def ask_medical_question(question):
    """Function to ask questions to the medical chatbot"""
    try:
        response = rag_chain({"input": question})
        return response["answer"]
    except Exception as e:
        return f"Error processing question: {e}"

# Test with sample questions
test_questions = [
    "What is Acromegaly and gigantism?",
    "What are the symptoms of diabetes?",
    "How is hypertension treated?",
    "What causes acne?"
]

print("=== Medical Chatbot Test ===")
for question in test_questions:
    print(f"\nQuestion: {question}")
    answer = ask_medical_question(question)
    print(f"Answer: {answer}")
    print("-" * 50)


=== Medical Chatbot Test ===

Question: What is Acromegaly and gigantism?
Error calling DeepSeek API: 402 Client Error: Payment Required for url: https://api.deepseek.com/v1/chat/completions
Answer: I apologize, but I'm unable to process your request at the moment.
--------------------------------------------------

Question: What are the symptoms of diabetes?
Error calling DeepSeek API: 402 Client Error: Payment Required for url: https://api.deepseek.com/v1/chat/completions
Answer: I apologize, but I'm unable to process your request at the moment.
--------------------------------------------------

Question: How is hypertension treated?
Error calling DeepSeek API: 402 Client Error: Payment Required for url: https://api.deepseek.com/v1/chat/completions
Answer: I apologize, but I'm unable to process your request at the moment.
--------------------------------------------------

Question: What causes acne?
Error calling DeepSeek API: 402 Client Error: Payment Required for url: https://ap

In [19]:
def medical_chatbot():
    """Interactive medical chatbot"""
    print("=== Medical Chatbot ===")
    print("Ask me any medical question based on the loaded medical literature.")
    print("Type 'quit' to exit.")
    
    while True:
        question = input("\nYour question: ")
        if question.lower() in ['quit', 'exit', 'bye']:
            print("Thank you for using the Medical Chatbot!")
            break
        
        if question.strip():
            answer = ask_medical_question(question)
            print(f"\nAnswer: {answer}")
            print("\nRemember: This information is for educational purposes only. Always consult with healthcare professionals for personal medical advice.")

# Uncomment the line below to start interactive mode
# medical_chatbot()

print("\n=== Setup Complete ===")
print("Your medical chatbot is ready!")
print("Use ask_medical_question('your question') to get answers")
print("Or run medical_chatbot() for interactive mode")