In [1]:
import os
from uuid import uuid4
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from pinecone import Pinecone, ServerlessSpec
from langchain.llms import HuggingFaceHub
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers

In [2]:
from dotenv import load_dotenv

load_dotenv() 

True

In [3]:
API_KEY=os.getenv('PINECONE_API_KEY')

In [4]:
def load_pdf(data):
    loader = DirectoryLoader(data,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)
    
    documents = loader.load()

    return documents

In [5]:
extracted_data = load_pdf("../data/")

In [6]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [7]:
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 5859


In [8]:
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [9]:
embeddings = download_hugging_face_embeddings()

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [10]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'BertModel'})
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [11]:
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone, ServerlessSpec
from uuid import uuid4


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  from langchain_pinecone.vectorstores import Pinecone, PineconeVectorStore


In [12]:
pc = Pinecone(api_key=API_KEY)

In [13]:
index_name = "medical-chatbot-llama2"

if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384,   
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )
    

In [14]:
uuids = [str(uuid4()) for _ in range(len(text_chunks))]

In [15]:
#vector_store = PineconeVectorStore.from_texts(
#    [t.page_content for t in text_chunks],
#    embedding=embeddings,
#    ids=uuids,
#    index_name=index_name
#)

In [16]:
# Connect to the existing vector store
vector_store = PineconeVectorStore(
    index=pc.Index(index_name),   
    embedding=embeddings,        
)

In [17]:
prompt_template = """
You are a helpful and knowledgeable **medical assistant chatbot**.  
Your job is to answer the user's questions based only on the provided context, unless the question is a general greeting or basic small talk.

### Instructions:
1. If the question is about medicine or health:
   - Use ONLY the information from the context below.
   - If the context does not provide enough information, say:  
     "I'm not sure about that based on my medical sources."
   - Do NOT invent or make up answers.

2. If the user greets you or asks something simple like "hi", "hello", "how are you", or "who are you":
   - Respond naturally as a friendly medical assistant.  
   Example: "Hello! I'm your medical assistant. How can I help you today?"

3. If the user asks something completely unrelated to medicine:
   - Politely say you can only help with medical or health-related questions.

---

Context: {context}
Question: {question}

Helpful answer:
"""


In [18]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [19]:
llm=CTransformers(model="../model/llama-2-7b-chat.ggmlv3.q2_K.bin",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.8})

In [22]:
retriever = vector_store.as_retriever(search_kwargs={'k': 3})

In [23]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="map_reduce",
    retriever=retriever
)

In [21]:
query = "what causes a heart attack?"
print("Q:", query)
print("A:", qa.invoke(query))

Q: what causes a heart attack?


Number of tokens (600) exceeded maximum context length (512).
Number of tokens (601) exceeded maximum context length (512).
Number of tokens (602) exceeded maximum context length (512).
Number of tokens (603) exceeded maximum context length (512).
Number of tokens (604) exceeded maximum context length (512).
Number of tokens (605) exceeded maximum context length (512).
Number of tokens (606) exceeded maximum context length (512).
Number of tokens (607) exceeded maximum context length (512).
Number of tokens (608) exceeded maximum context length (512).
Number of tokens (609) exceeded maximum context length (512).
Number of tokens (610) exceeded maximum context length (512).
Number of tokens (611) exceeded maximum context length (512).
Number of tokens (612) exceeded maximum context length (512).
Number of tokens (613) exceeded maximum context length (512).
Number of tokens (614) exceeded maximum context length (512).
Number of tokens (615) exceeded maximum context length (512).
Number o

A: {'query': 'what causes a heart attack?', 'result': "\nA person's\nThe patient 1:\n\n\nAn actual heart attack has several ways, thank you have provided by:\nOf course of the question\nAtheros\nan \nAtheros. What is not Helpful\nWhile\nA heart attack and comments (usefuller:\nIt does not available on a fellow will come from Experts:\nAn actual heart muscle\nThe heart attack occurs when you can cause an houring Questions\nA person does not found in the question\n\n\nAtheros:\nAnother information is provided by a friend, Thank you want to this question. What do you are highly likely involves a question\nAtheros\nIf you need to the following pieces:\nan in question is below\nThe answer:\nA heart attack may include references and Additional Question\nA person does not Helpful Answered • Examples\nA heart attack?\nA person here, please. What do not possible or Comments\nAtheros\nTo\nAthermalory Scribe content\nAn actual heart attack?\nI don's\nA heart musclears\nYou have been studied the q