In [2]:
print("Hi")

Hi


In [3]:
%pwd

'/Users/sfayaz/Documents/Personal/FullStackMLProjects/Medical-Chatbot/research'

In [4]:
import os
os.chdir("../")

In [5]:
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter


  from .autonotebook import tqdm as notebook_tqdm


In [6]:
def load_pdf__data(data) : 
    loader = DirectoryLoader(data,glob="*.pdf",loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [7]:
extracted_data = load_pdf__data("Data")

In [8]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)
    docs = text_splitter.split_documents(extracted_data)
    return docs

In [11]:
text_chunks = text_split(extracted_data)
print(f"Number of text chunks: {len(text_chunks)}")

Number of text chunks: 5859


In [9]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer

In [10]:
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings
    

In [12]:
embeddings = download_hugging_face_embeddings()

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [13]:
query_result = embeddings.embed_query("What is a clinical trial?")
print(f"Query result embedding vector length: {len(query_result)}")

Query result embedding vector length: 384


In [14]:
from dotenv import load_dotenv
load_dotenv()
PINCECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
EURI_API_KEY = os.environ.get("EURI_API_KEY")


In [15]:
from pinecone import Pinecone, ServerlessSpec
from pinecone.grpc import PineconeGRPC as Pinecone 

pc = Pinecone(api_key=PINCECONE_API_KEY)

index_name = "medical-chatbot"


if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
            ),
    )
    
index = pc.Index(index_name)

In [16]:
from langchain_pinecone import PineconeVectorStore


docSearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings,
)

In [17]:
retriever = docSearch.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [18]:
retrieved_docs = retriever.invoke("what is acne?")
retrieved_docs

[Document(id='8eaf4956-a7f9-4e01-8db4-42478ab1571a', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 39.0, 'page_label': '40', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': 'Data/Medical_book.pdf', 'total_pages': 637.0}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 226\nAcne\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 26'),
 Document(id='9e77ccd9-f3c0-4a31-99b9-ac15c864f8b6', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 39.0, 'page_label': '40', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': 'Data/Medical_book.pdf', 'total_pages': 637.0}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 226\nAcne\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 26'),
 Document(id='f562761e-814c-4a6d-be54-cbfae919eab7', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 39.0, 'p

In [19]:
from euriai.langchain import create_chat_model

chatModel = create_chat_model(
    api_key=EURI_API_KEY,
    model="gpt-4.1-nano",
    temperature=0.7
)

response = chatModel.invoke("What is artificial intelligence?")
print(response.content)


Artificial intelligence (AI) refers to the development of computer systems or software that can perform tasks typically requiring human intelligence. These tasks include learning from experience, understanding natural language, recognizing patterns, solving problems, making decisions, and adapting to new information. AI can be categorized into narrow AI, which is designed for specific tasks (like virtual assistants or recommendation systems), and general AI, which would have the ability to perform any intellectual task a human can do (though this level of AI is still theoretical). Overall, AI aims to create machines that can simulate aspects of human cognition to improve efficiency, automate processes, and solve complex problems.


In [20]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [21]:
system_prompt = (
    "You are an Medical assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [22]:
question_answer_chain = create_stuff_documents_chain(chatModel,prompt)
rag_chain = create_retrieval_chain(retriever,question_answer_chain)

In [23]:
response = rag_chain.invoke({"input":"What is acne and how is it treated?"})
print(response["answer"])

Acne is a skin condition characterized by pimples, blackheads, and cysts, usually occurring on the face, chest, and back. Treatment options include topical medications, oral antibiotics, hormonal therapy, and sometimes isotretinoin, depending on severity. Proper skin care and avoiding irritants also help manage the condition.
