In [20]:
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.indexes import VectorstoreIndexCreator
from langchain.llms import OpenAI,Ollama
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

In [3]:
# web based loader
from langchain_community.document_loaders import WebBaseLoader
import bs4

## load,chunk and index the content of the html page

loader=WebBaseLoader(web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
                     bs_kwargs=dict(parse_only=bs4.SoupStrainer(
                         class_=("post-title","post-content","post-header")

                     )))

text_documents=loader.load()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [4]:
text_documents

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='\n\n      LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistake

In [21]:
## Pdf reader this will load the data 
from langchain_community.document_loaders import PyPDFLoader
loader=PyPDFLoader('./resourcev1.pdf')
docs=loader.load()

In [4]:
docs

[Document(metadata={'producer': 'macOS Version 14.1 (Build 23B74) Quartz PDFContext', 'creator': 'Preview', 'creationdate': "D:20250607104340Z00'00'", 'title': 'resource', 'author': 'Phoenix OnWork', 'moddate': "D:20250607104340Z00'00'", 'source': './resourcev1.pdf', 'total_pages': 201, 'page': 0, 'page_label': '1'}, page_content="15 - Cardiovascular Disorders\nChapter 206. Approach to the Cardiac Patient\nIntroduction\nSymptoms or the physical examination may suggest a cardiovascular disorder. For confirmation, selectednoninvasive and invasive tests are usually done (see Ch. 207).\nHistory\nA thorough history is fundamental; it cannot be replaced by testing. The history must include a thoroughsystems review because many symptoms apparently occurring in other systems (eg, dyspnea,indigestion) are often caused by cardiac disease. A family history is taken because many cardiacdisorders (eg, coronary artery disease, systemic hypertension, bicuspid aortic valve, hypertrophiccardiomyopathy,

### now we need to create pdf into chunks and then store into DB

In [22]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(docs)

## visualising the data
documents[:5]

[Document(metadata={'producer': 'macOS Version 14.1 (Build 23B74) Quartz PDFContext', 'creator': 'Preview', 'creationdate': "D:20250607104340Z00'00'", 'title': 'resource', 'author': 'Phoenix OnWork', 'moddate': "D:20250607104340Z00'00'", 'source': './resourcev1.pdf', 'total_pages': 201, 'page': 0, 'page_label': '1'}, page_content='15 - Cardiovascular Disorders\nChapter 206. Approach to the Cardiac Patient\nIntroduction\nSymptoms or the physical examination may suggest a cardiovascular disorder. For confirmation, selectednoninvasive and invasive tests are usually done (see Ch. 207).\nHistory\nA thorough history is fundamental; it cannot be replaced by testing. The history must include a thoroughsystems review because many symptoms apparently occurring in other systems (eg, dyspnea,indigestion) are often caused by cardiac disease. A family history is taken because many cardiacdisorders (eg, coronary artery disease, systemic hypertension, bicuspid aortic valve, hypertrophiccardiomyopathy,

In [8]:
documents[0]

Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-08-03T00:07:29+00:00', 'author': '', 'keywords': '', 'moddate': '2023-08-03T00:07:29+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': '../attention.pdf', 'total_pages': 15, 'page': 0, 'page_label': '1'}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.com\nNoam Shazeer∗\nGoogle Brain\nnoam@google.com\nNiki Parmar∗\nGoogle Research\nnikip@google.com\nJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.com\nAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.edu\nŁukasz Kaiser∗\nGoogle Brain\nlukas

In [23]:
## If we want to make use of openAI for embedding 

# ## Vector Embedding And Vector Store
# from langchain_openai import OpenAIEmbeddings
# from langchain_community.vectorstores import Chroma
# db = Chroma.from_documents(documents,OpenAIEmbeddings())



## Vector Embedding And Vector Store
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
db = Chroma.from_documents(documents[:],OllamaEmbeddings(
    model="nomic-embed-text",
    base_url="http://host.docker.internal:11434" 
))




In [27]:
## vector search

query="What is sun and what is best of ot"
docs=db.similarity_search(query,k=3)
print(docs[0].page_content)

3.2 Attention
An attention function can be described as mapping a query and a set of key-value pairs to an output,
where the query, keys, values, and output are all vectors. The output is computed as a weighted sum
3


In [None]:
# faiss vector search 
from langchain_community.vectorstores import FAISS

db = FAISS.from_documents(documents[:],OllamaEmbeddings(
    model="nomic-embed-text",
    base_url="http://host.docker.internal:11434" 
))

query="who is sun and what is best of ot"
docs=db.similarity_search(query,k=3)
print(docs[0].page_content)

best models from the literature. We show that the Transformer generalizes well to
other tasks by applying it successfully to English constituency parsing both with
large and limited training data.
∗Equal contribution. Listing order is random. Jakob proposed replacing RNNs with self-attention and started
the effort to evaluate this idea. Ashish, with Illia, designed and implemented the first Transformer models and
has been crucially involved in every aspect of this work. Noam proposed scaled dot-product attention, multi-head
attention and the parameter-free position representation and became the other person involved in nearly every
detail. Niki designed, implemented, tuned and evaluated countless model variants in our original codebase and
tensor2tensor. Llion also experimented with novel model variants, was responsible for our initial codebase, and
efficient inference and visualizations. Lukasz and Aidan spent countless long days designing various parts of and


# Retrevier and chain with langchain

In [30]:
from langchain.chains import RetrievalQA
from langchain_core.prompts import ChatPromptTemplate

prompt=ChatPromptTemplate.from_template(
    """

    answer the following question as best you can, stick to the, I will give the symptoms
    of patient and you have to suggest possible diseases and medication if possible
    <context>
    {context}
    </context>
    question: {input}

    """
)



In [31]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.llms import Ollama


llm=Ollama(model="Gemma3:1b",base_url="http://host.docker.internal:11434",temperature=0.9)

documents_chain=create_stuff_documents_chain(llm=llm,prompt=prompt)

#qa_chain=RetrievalQA.from_chain_type(llm=llm,retriever=db.as_retriever(),chain_type_kwargs={"prompt":prompt})




In [32]:
"""
Retrievers: A retriever is an interface that returns documents given
 an unstructured query. It is more general than a vector store.
 A retriever does not need to be able to store documents, only to 
 return (or retrieve) them. Vector stores can be used as the backbone
 of a retriever, but there are other types of retrievers as well. 
 https://python.langchain.com/docs/modules/data_connection/retrievers/   
"""

retriever=db.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0xffff60845ab0>, search_kwargs={})

In [33]:
"""
Retrieval chain:This chain takes in a user inquiry, which is then
passed to the retriever to fetch relevant documents. Those documents 
(and original inputs) are then passed to an LLM to generate a response
https://python.langchain.com/docs/modules/chains/
"""
from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever,documents_chain)

In [34]:
response=retrieval_chain.invoke({"input": "heart, liver, and kidney disorders and cancer including any related surgery or radiation therapy)."})


In [35]:
response['answer']

"Okay, let's analyze the provided context and formulate possible diagnoses and medications based on the information.\n\n**Possible Diseases and Medications:**\n\nGiven the patient's history and presented symptoms (generalized, slowly developing edema), here’s a breakdown of potential diagnoses and potential medication suggestions:\n\n**1. Possible Diseases & Conditions:**\n\n*   **Heart Failure:** The history of edema, coupled with the gradual onset and potentially slow development, strongly points towards heart failure. The underlying heart condition could be causing impaired venous return, leading to edema.\n*   **Liver Disease:**  Edema can be a symptom of various liver conditions, including cirrhosis, hepatitis, or dysfunction. The history of chronic alcohol abuse or kidney disorders needs consideration.\n*   **Kidney Disease:** Similar to liver disease, kidney dysfunction can contribute to edema.  The history of kidney disorders, coupled with the gradual onset, warrants investigat

In [7]:
from langchain.agents import initialize_agent, AgentType
from langchain.agents import Tool
from langchain.tools.serpapi.tool import SerpAPIWrapper
from langchain_community.llms import OpenAI

# Set up your language model
# llm = OpenAI(temperature=0)

# Create a custom WebMD Search Tool using SerpAPI
search = SerpAPIWrapper()  # You need to set SERPAPI_API_KEY in your environment

webmd_tool = Tool(
    name="WebMD Search",
    func=lambda q: search.run(f"site:webmd.com {q}"),
    description="Search WebMD for medical information related to user query"
)

# Initialize the agent with the tool
agent = initialize_agent(
    tools=[webmd_tool],
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

# Test the agent
response = agent.run("What are the symptoms of pneumonia?")
print(response)


ModuleNotFoundError: No module named 'langchain.tools.serpapi'

In [None]:
#os.environ['OPEN_API_KEY']=os.getenv('open_api_key')

import os 
# for tracing using langsmith

os.environ['LANGCHAIN_TRACING_V2']='true'
os.environ['LANGCHAIN_API_KEY']=os.getenv('langchain_api_key')

In [11]:
from langchain_community.utilities import SerpAPIWrapper

In [12]:
from langchain_community.agent_toolkits.load_tools import load_tools
import json
import os

with open("./key.json", "r") as f:
    secrets = json.load(f)

# Access individual secrets
langchain_api_key = secrets["serp_api"]

JSONDecodeError: Expecting property name enclosed in double quotes: line 24 column 142 (char 1293)

JSONDecodeError: Expecting value: line 1 column 1 (char 0)