### Midterm project for family caregivers

In [1]:
!pip install -qU langchain_openai langchain_core langchain langchain_community langchain-text-splitters

In [2]:
!pip install -qU langchain-qdrant==0.2.0

In [3]:
!pip install -qU faiss-cpu python-pptx==1.0.2 nltk==3.9.1 pymupdf beautifulsoup4 lxml 

In [4]:
!pip install -qU tiktoken pymupdf==1.25.2

In [5]:
!pip install -qU langgraph

In [6]:
!pip install xmltodict




In [11]:
import os
from getpass import getpass
os.environ["OPENAI_API_KEY"] = getpass("Please enter your OpenAI API key!")

In [22]:
os.environ["TAVILY_API_KEY"] = getpass("TAVILY_API_KEY")

In [10]:
from langchain_openai import ChatOpenAI

openai_chat_model = ChatOpenAI(model="gpt-4o-mini")

OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable

In [8]:
!mkdir data


mkdir: cannot create directory ‘data’: File exists


In [9]:
!curl https://www.ninds.nih.gov/health-information/disorders/amyotrophic-lateral-sclerosis-als -o data/overview.html
!curl https://www.als.org/understanding-als/symptoms-diagnosis -o data/symotoms.html
!curl https://www.mayoclinic.org/diseases-conditions/amyotrophic-lateral-sclerosis/symptoms-causes/syc-20354022 -o data/mayo-als.html

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   118  100   118    0     0   1095      0 --:--:-- --:--:-- --:--:--  1102
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 52365    0 52365    0     0   173k      0 --:--:-- --:--:-- --:--:--  173k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  558k    0  558k    0     0   790k      0 --:--:-- --:--:-- --:--:--  790k


In [85]:
import os

print("Files in data/:", os.listdir("data/"))

Files in data/: ['symotoms.html', 'overview.html', 'mayo-als.html']


In [11]:
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders import BSHTMLLoader

path = "data/"
text_loader = DirectoryLoader(path, glob="*.html", loader_cls=BSHTMLLoader)

docs = text_loader.load()

In [12]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 750,
    chunk_overlap  = 20,
    length_function = len
)
split_documents =  text_splitter.split_documents(docs)

In [86]:
print(len(split_documents))

48


In [14]:
from langchain_openai.embeddings import OpenAIEmbeddings

embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")

In [None]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

client = QdrantClient(":memory:")

client.create_collection(
    collection_name="ai_caregiver",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="ai_caregiver",
    embedding=embedding_model,
)



In [75]:
_ = vector_store.add_documents(documents=split_documents)

In [76]:
retriever = vector_store.as_retriever(search_kwargs={"k": 5})

In [78]:
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever

def retrieve(state):
    retrieved_docs = retriever.invoke(state["question"])
    return {"context": retrieved_docs}

In [79]:
from langchain_core.prompts import PromptTemplate

RAG_template = """
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say "I don't know, would you like to talk to a care coach?", don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.

{context}

Question: {question}

Helpful Answer:"""

rag_prompt = PromptTemplate.from_template(RAG_template) 

In [80]:
def generate(state):
    docs_content = "\n\n".join([doc.page_content for doc in state["context"]])
    messages = rag_prompt.format_prompt(context=docs_content, question=state["question"])
    respose = openai_chat_model.invoke(messages)
    return {"response": respose.content}

In [81]:
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
from langchain_core.documents import Document

class State(TypedDict):
  question: str
  context: List[Document]
  response: str

In [82]:
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve");
graph = graph_builder.compile();

In [91]:
response = graph.invoke({"question": "what are the symptoms of ALS?"})

In [92]:
response["response"];
print(response["response"])

Symptoms of ALS can vary but typically include gradual onset of muscle weakness, trouble walking, tripping, hand weakness, slurred speech, and muscle cramps or twitching. Other symptoms may involve uncontrollable emotional outbursts and difficulties with chewing or swallowing. Importantly, ALS does not affect the senses or bladder control.


In [None]:
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage

@tool
def ai_rag_tool(question: str) -> str:
    """Answer questions about ALS based on the retrieved documents. Input should be a fully formed question."""
    response = graph.invoke({"question": question})
    return{
        "messages": [HumanMessage(content=response["response"])],
        "context": response["context"],
    }

In [54]:
from langchain_community.utilities.pubmed import PubMedAPIWrapper
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage
from langchain_community.tools.tavily_search import TavilySearchResults


tavily_tool = TavilySearchResults(max_results=5)

# ✅ Initialize the PubMed API Wrapper
pubMed = PubMedAPIWrapper()  # Add API key if available

# ✅ Define the tool using @tool decorator
@tool
def pubmed_search_tool(query: str) -> dict:
    """Searches for academic and medical papers on PubMed based on a query string."""
    response = pubMed.run(query)  # Call PubMed API
    
    return {
        "messages": [HumanMessage(content=response)],
        "context": response,  # You may refine this further if needed
    }

In [55]:
##tool_belt = [ai_rag_tool, pubmed_tool]
tool_belt = [tavily_tool]

In [56]:
from langchain_openai.chat_models import ChatOpenAI

model = ChatOpenAI(model="gpt-4o", temperature=0.0)

model.bind(tools=tool_belt);

In [57]:
from typing import TypedDict, Annotated
from langgraph.graph.message import add_messages
import operator
from langchain_core.messages import BaseMessage
from langchain_core.documents import Document;


class AgentState(TypedDict):
  messages: Annotated[list, add_messages]
  context: list[Document]

In [58]:
from langgraph.prebuilt import ToolNode

def call_model(state):
  messages = state["messages"]
  response = model.invoke(messages)
  return {"messages" : [response]}

tool_node = ToolNode(tool_belt)

In [59]:
from langgraph.graph import StateGraph, END

uncompiled_graph = StateGraph(AgentState)

uncompiled_graph.add_node("agent", call_model)
uncompiled_graph.add_node("action", tool_node)

<langgraph.graph.state.StateGraph at 0x7f9f66ffd0d0>

In [60]:
uncompiled_graph.set_entry_point("agent")

<langgraph.graph.state.StateGraph at 0x7f9f66ffd0d0>

In [61]:
def should_continue(state):
  last_message = state["messages"][-1]

  if last_message.tool_calls:
    return "action"

  return END

uncompiled_graph.add_conditional_edges(
    "agent",
    should_continue
)

<langgraph.graph.state.StateGraph at 0x7f9f66ffd0d0>

In [62]:
uncompiled_graph.add_edge("action", "agent")
compiled_graph = uncompiled_graph.compile()

In [63]:
from langchain_core.messages import HumanMessage

inputs = {"messages" : [HumanMessage(content="Who is the current captain of the Winnipeg Jets?")]}

async for chunk in compiled_graph.astream(inputs, stream_mode="updates"):
    for node, values in chunk.items():
        print(f"Receiving update from node: '{node}'")
        print(values["messages"])
        print("\n\n")

print(values["messages"][-1].tool_calls) 

Receiving update from node: 'agent'
[AIMessage(content='As of my last update, the Winnipeg Jets named Adam Lowry as their captain in September 2023. Please verify with a current source to ensure this information is still accurate.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 37, 'prompt_tokens': 17, 'total_tokens': 54, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_523b9b6e5f', 'finish_reason': 'stop', 'logprobs': None}, id='run-585797c8-af52-43bb-898b-dc4ffa2de766-0', usage_metadata={'input_tokens': 17, 'output_tokens': 37, 'total_tokens': 54, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})]



[]


In [None]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

simple_rag = (
    {"context": retriever, "question": RunnablePassthrough()}  # Retrieve and pass query
    | rag_prompt  # Insert into prompt
    | openai_chat_model  # Pass through LLM
    | StrOutputParser()  # Parse output into readable format
)

In [33]:
result = simple_rag.invoke("What is the capital of France?")
print(result)

I don't know, would you like to talk to a care coach?
