In [1]:
import os
from uuid import uuid4
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from pinecone import Pinecone, ServerlessSpec
from langchain.llms import HuggingFaceHub
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers

In [2]:
from dotenv import load_dotenv

load_dotenv() 

True

In [3]:
API_KEY=os.getenv('PINECONE_API_KEY')

In [4]:
def load_pdf(data):
    loader = DirectoryLoader(data,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)
    
    documents = loader.load()

    return documents

In [5]:
extracted_data = load_pdf("../data/")

In [6]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 300, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [7]:
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 9826


In [8]:
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [9]:
embeddings = download_hugging_face_embeddings()

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [10]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'BertModel'})
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [11]:
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone, ServerlessSpec
from uuid import uuid4


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  from langchain_pinecone.vectorstores import Pinecone, PineconeVectorStore


In [12]:
pc = Pinecone(api_key=API_KEY)

In [13]:
index_name = "medical-chatbot-llama2"

if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384,   
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )
    

In [14]:
uuids = [str(uuid4()) for _ in range(len(text_chunks))]

In [None]:
#vector_store = PineconeVectorStore.from_texts(
#    [t.page_content for t in text_chunks],
#    embedding=embeddings,
#    ids=uuids,
#    index_name=index_name
#)

In [None]:
# Connect to the existing vector store
vector_store = PineconeVectorStore(
    index=pc.Index(index_name),   
    embedding=embeddings,        
)

In [23]:
prompt_template = """
You are a helpful and friendly medical assistant chatbot.

Instructions:
1. Use ONLY the context below to answer medical questions.
2. If the user greets you or asks small talk, respond naturally.
3. If the context does not have the answer, say: "I'm not sure based on my sources."
4. Do not include labels like "Helpful answer" or "Unhelpful answer" — just give the clean answer.

Context: {context}
Question: {question}

Answer:
"""


In [24]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [25]:
llm=CTransformers(model="../model/llama-2-7b-chat.ggmlv3.q2_K.bin",
                  model_type="llama",
                  config={'max_new_tokens':1024,
                          'temperature':0.8})

In [26]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",   
    retriever=vector_store.as_retriever(search_kwargs={'k': 4}),
    return_source_documents=False  
)

In [27]:
query = "what causes a heart attack?"
result = qa({"query": query}) 
answer = result['result']       

print("Q:", query)
print("A:", answer)


Q: what causes a heart attack?
A:  A heart attack is caused by the complete blockage of the blood flow in the coronary arteries, which can be due to atherosclerosis or a blood clot.
