In [2]:
print("OK")

OK


In [3]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import ctransformers

In [4]:
from dotenv import load_dotenv
import os

load_dotenv()
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
HF_TOKEN = os.getenv('HF_TOKEN')
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')

In [5]:
def load_pdf(data):
    loader = DirectoryLoader(data, glob = "*.pdf", loader_cls = PyPDFLoader)

    document = loader.load()
    return document

In [7]:
extracted_data = load_pdf("../data/")

### Create text chunks

In [8]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [9]:
text_chunks = text_split(extracted_data)

### Embedding text chunks

In [10]:
def download_hf_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [11]:
embedding = download_hf_embeddings()

  embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")


In [12]:
embedding

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

### Initialize Pinecone

In [13]:
index_name = "medical-chatbot"

In [None]:
docsearch = Pinecone.from_texts([t.page_content for t in text_chunks], embedding, index_name = index_name)

In [14]:
docsearch = Pinecone.from_existing_index(index_name, embedding)

query = "What are allergies?"

docs = docsearch.similarity_search(query, k = 3)

print(docs)

[Document(page_content='ORGANIZATIONS\nAmerican Academy of Ophthalmology. 655 Beach Street, PO\nBox 7424, San Francisco, CA 94120-7424. <http://www.eyenet.org>.KEY TERMS\nAllergen —A substance capable of inducing an\nallergic response.\nAllergic reaction —An immune system reaction to\na substance in the environment; symptomsinclude rash, inflammation, sneezing, itchy wateryeyes, and runny nose.\nConjunctiva —The mucous membrane that covers\nthe white part of the eyes and lines the eyelids.'), Document(page_content='Although environmental medicine is gaining more\nrespect within conventional medicine, detoxificationKEY TERMS\nAllergen —A foreign substance, such as mites in\nhouse dust or animal dander, that wheninhaled,causes the airways to narrow and pro-duces symptoms of asthma.\nAntibody —A protein, also called immunoglobu-\nlin, produced by immune system cells to removeantigens (the foreign substances that trigger theimmune response).\nFibromyalgia —A condition of debilitating pain,

In [15]:
prompt_template = """
Use the following pieces of information to answer the user's question
If you don't know the answer just say you don't know. Do not try to make up answers.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else:
Helpful answer: 
"""

In [22]:
PROMPT = PromptTemplate(template=prompt_template, input_variables = ["context", "question"])

### Loading the llama 3.1 model

In [17]:
from langchain_ollama.llms import OllamaLLM


model = OllamaLLM(model='llama3.1')

In [19]:
model.invoke(input="Hello world")

"Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat?"

In [20]:
qa=RetrievalQA.from_chain_type(
    llm=model,
    chain_type="stuff",
    retriever=docsearch.as_retriever(search_kwargs={'k':2}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT}
)

In [21]:
user_input = input(f"Input prompt:")
result = qa({"query": user_input})
print(f"Response: {result}")

  result = qa({"query": user_input})


Response: {'query': 'what is aneurysm', 'result': 'A sac or bulge that forms because of a weak spot in the wall of an artery or heart chamber.', 'source_documents': [Document(page_content='Laurie Barclay, MD\nCerebral aneurysm\nDefinition\nA cerebral aneurysm occurs at a weak point in the\nwall of a blood vessel (artery) that supplies blood to thebrain. Because of the flaw, the artery wall bulges outwardand fills with blood. This bulge is called an aneurysm. Ananeurysm can rupture, spilling blood into the surround-ing body tissue. A ruptured cerebral aneurysm can causepermanent brain damage, disability, or death .\nDescription\nA cerebral aneurysm can occur anywhere in the brain.'), Document(page_content='Aneurysm —A sac or bulge that forms because of a\nweak spot in the wall of an artery or heart chamber.\nCardiac shunt —A defect in the wall of the heart\nthat allows blood from different chambers to mix.\nCoronary occlusive artery disease —Blockage of\nthe arteries that supply blood t