In [1]:
%pwd

'c:\\Users\\a.dusukov\\Desktop\\chatbot\\Medical-Chatbot-Generative-AI\\research'

In [2]:
import os
os.chdir("../")

In [3]:
%pwd

'c:\\Users\\a.dusukov\\Desktop\\chatbot\\Medical-Chatbot-Generative-AI'

In [15]:
pip install --upgrade sentence-transformers

Collecting sentence-transformers
  Using cached sentence_transformers-3.3.1-py3-none-any.whl.metadata (10 kB)
Using cached sentence_transformers-3.3.1-py3-none-any.whl (268 kB)
Installing collected packages: sentence-transformers
  Attempting uninstall: sentence-transformers
    Found existing installation: sentence-transformers 2.2.2
    Uninstalling sentence-transformers-2.2.2:
      Successfully uninstalled sentence-transformers-2.2.2
Successfully installed sentence-transformers-3.3.1
Note: you may need to restart the kernel to use updated packages.


In [5]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [6]:
def load_pdf_file(data):
    loader = DirectoryLoader(data,
                             glob="*.pdf",
                             loader_cls = PyPDFLoader)
    documents = loader.load()

    return documents

In [7]:
extracted_data = load_pdf_file(data = 'Data/')

In [8]:
print(extracted_data[2])

page_content='The GALE
ENCYCLOPEDIA
of MEDICINE
SECOND EDITION
JACQUELINE L. LONGE, EDITOR
DEIRDRE S. BLANCHFIELD, ASSOCIATE EDITOR
VOLUME
A-B
1' metadata={'source': 'Data\\Medical_book.pdf', 'page': 2}


In [9]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [10]:
text_chunks = text_split(extracted_data)
print(len(text_chunks))

5860


In [11]:
from langchain.embeddings import HuggingFaceEmbeddings

In [12]:
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [16]:
embeddings = download_hugging_face_embeddings()

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [19]:
from sentence_transformers import SentenceTransformer
print("Sentence Transformers package imported successfully!")

Sentence Transformers package imported successfully!


In [20]:
query_result = embeddings.embed_query("Hi there!")
print(len(query_result))

384


In [21]:
query_result[:5]

[-0.09151123464107513,
 0.02514786832034588,
 0.0020471480675041676,
 0.00518106808885932,
 -0.015277120284736156]

In [46]:
from dotenv import load_dotenv
load_dotenv()

True

In [47]:
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

In [26]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec

pc = Pinecone(api_key = PINECONE_API_KEY)

index_name = "medicalbot"

pc.create_index(
    name = index_name,
    dimension = 384,
    metric = "cosine",
    spec = ServerlessSpec(
        cloud = "aws",
        region = "us-east-1"
    )
)

In [48]:
import os
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [29]:
!pip install langchain[pinecone] pinecone-client





In [31]:
from langchain.vectorstores import Pinecone

In [35]:
docsearch = Pinecone.from_documents(
    documents = text_chunks,
    index_name = index_name,
    embedding = embeddings,
)

In [36]:
docsearch = Pinecone.from_existing_index(
    index_name = index_name,
    embedding = embeddings
)

In [37]:
docsearch

<langchain_community.vectorstores.pinecone.Pinecone at 0x17f93b2bc10>

In [39]:
retriever = docsearch.as_retriever(search_type = "similarity", search_kwargs = {"k": 3})

In [40]:
retriever_docs = retriever.invoke("What is the reason of headache")

In [42]:
retriever_docs

 Document(metadata={'page': 304.0, 'source': 'Data\\Medical_book.pdf'}, page_content='flashing lights, that some people have 10-30 min-\nutes before a migraine attack.\nInflammation —Pain, redness, swelling, and heat\nthat usually develop in response to injury or illness.\nTreatment You Need. New York, NY: The Guilford Press,\n1995.\nORGANIZATIONS\nAmerican Council for Headache Education (ACHE). 19 Man-\ntua Road, Mt. Royal, NJ 08061. (800) 255-2243. <http://\nwww.achenet.org>.\nNational Headache Foundation. 428 W. St. James Place, Chica-\ngo, IL 60614. (800) 843-2256. <http://www.head'),
 Document(metadata={'page': 584.0, 'source': 'Data\\Medical_book.pdf'}, page_content='no evidence that head injury causes brain tumors, but\nresearchers are trying to determine the relationship, if\nany, between brain tumors and viruses, family history,\nand long-term exposure to electromagnetic fields.\nSymptoms do not usually appear until the tumor\ngrows large enough to displace, damage, or destroy

In [49]:
from langchain_openai import OpenAI
llm = OpenAI(temperature = 0.7, max_tokens = 500)

In [55]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that "
    "the question is not connected to the pfd files uploaded. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [56]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [59]:
response = rag_chain.invoke({"input": "Is there a limit of times X-rays can be apllied on a human body?"})
print(response["answer"])



Yes, excessive or repeated doses of radiation can cause changes in other types of body tissue. However, for bone x-rays, the amount of radiation is small and the risk of harm is very low. The patient can immediately resume normal activities after the test and there is no pain or discomfort associated with it.
