In [13]:
import os
os.chdir("../")

In [14]:
%pwd


'D:\\medical_chatbot'

In [15]:
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [16]:
# extract data from file
def load_file(data):
    loader= DirectoryLoader(data,
                            glob="*.pdf",
                            loader_cls=PyPDFLoader)

    documents=loader.load()

    return documents

In [17]:
extracted_data=load_file(data='Data/')

In [18]:
#extracted_data

In [19]:
# split data to chunks
def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks

In [20]:
text_chunks=text_split(extracted_data)
print("Len", len(text_chunks))

Len 5860


In [21]:
#text_chunks

In [22]:
from langchain.embeddings import HuggingFaceEmbeddings

In [23]:
# get embedding model
def get_hugging_face_embeddings():
    embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [24]:
embeddings=get_hugging_face_embeddings()

In [25]:
query_result = embeddings.embed_query("Hello world")
print("Length", len(query_result))

Length 384


In [26]:
#query_result

In [27]:
from dotenv import load_dotenv
load_dotenv()

True

In [28]:
import os
PINECONE_API_KEY=os.environ.get('PINECONE_API_KEY')
OPENAI_API_KEY=os.environ.get('OPENAI_API_KEY')

In [29]:
import os
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [29]:
# embed each chunk and add into pinecone index.
from langchain_pinecone import Pinecone

docsearch = Pinecone.from_documents(
    documents=text_chunks,
    index_name="medical",
    embedding=embeddings,
)

In [30]:
# Get Existing index

from langchain_pinecone import Pinecone
docsearch = Pinecone.from_existing_index(
    index_name="medical",
    embedding=embeddings
)

In [31]:
docsearch

<langchain_pinecone.vectorstores.Pinecone at 0x1fea44379d0>

In [32]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [33]:
retrieved_docs = retriever.invoke("What is Agency?")

In [34]:
retrieved_docs

[Document(page_content='Hormone —A chemical produced in one part of\nthe body, which travels to another part of the body\nin order to exert its effect.\n• the functioning of muscles\n• normal kidney function\n• production of blood cells\n• the normal processes involved in maintaining the skele-\ntal system\n• proper functioning of the brain and nerves\n• the normal responses of the immune system\nAldosterone, also produced by the adrenal cortex,\nplays a central role in maintaining the appropriate pro-', metadata={'page': 66, 'source': 'Data\\data.pdf'}),
 Document(page_content='KEY TERMS\nACTH—Adrenocorticotropic hormone, a hor-\nmone normally produced by the pituitary gland,\nsometimes taken as a treatment for arthritis and\nother disorders.\nAntibody —An immune system protein which\nbinds to a substance from the environment.\nNSAIDs—Non-steroidal antiinflammatory drugs,\nincluding aspirin and ibuprofen.\nTracheostomy tube—A tube which is inserted into\nan incision in the trachea (tr

In [35]:
from langchain_openai import OpenAI
llm = OpenAI(temperature=0.4, max_tokens=500)

In [36]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [37]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [41]:
response = rag_chain.invoke({"input": "what is break bone?"})
print(response["answer"])



Break bone, also known as a fracture, is a crack or break in a bone that can be caused by various factors such as wear and tear, age, or inflammation. Hormones, such as estrogen, play a role in the normal development of bones and can also affect bone health. Menopause, a stage in a woman's life when the ovaries stop producing eggs and menstruation stops, can increase the risk of osteoporosis, a disease where bones become weak and brittle. Other bone-related conditions include osteogenesis imperfecta, osteomalacia, and osteomyelitis.
