Import environment variables

In [1]:
from dotenv import load_dotenv
import os

load_dotenv('.env')

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")

Load our document corpus from a file. (fetch_data.ipynb can be used to generate the file)

In [2]:
myfile = "source_documents.json"

import json
from langchain.schema import Document

# Load JSON data
with open(myfile, 'r') as file:
    data = json.load(file)

# Convert JSON data into a list of LangChain Document objects
docs = [
    Document(page_content=item["page_content"], metadata=item["metadata"])
    for item in data
]

print(f"loaded {len(docs)} docs")

loaded 489 docs


Split the documents into reasonably sized chunks that work for most embedding models

In [12]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500,       
    chunk_overlap=150,     
)
split_docs = []
for doc in docs:
    splits = text_splitter.split_text(doc.page_content)
    for i,split in enumerate(splits):
        metadata_with_chunk = {**doc.metadata, "chunk_id": i}
        
        # Create the document with the updated metadata
        split_doc = Document(page_content=split, metadata=metadata_with_chunk)
        split_docs.append(split_doc)

print(f"len(docs): {len(docs)}, len(split_docs):{len(split_docs)}")
print(split_docs[0])

len(docs): 489, len(split_docs):2756
page_content='alzheimer's disease and dementia | alzheimer's disease and dementia | cdc     alzheimer's disease and dementia alzheimer's basics learn about signs and symptoms of alzheimer's disease and who is affected. aug. 15, 2024 dementia basics learn about common types of dementia, signs and symptoms, and risk factors. aug. 17, 2024 signs and symptoms of alzheimer's learn how to recognize the early signs of alzheimer's disease. signs and symptoms of dementia learn what early signs and symptoms of dementia to look out for. tools and resources find a variety of resources about alzheimer’s disease and healthy aging. reducing risk learn what lifestyle behaviors can reduce the risk of developing dementia. additional topics healthy aging at any age information to help you stay healthy and strong throughout your life. sept. 3, 2024 alzheimer's disease program evidence-based, scientific information to educate, inform, and assist translating research int

Set up embeddings - we'll use OpenAI's text-embedding-3-large

In [5]:
from langchain_openai import OpenAIEmbeddings
embedding_model = "text-embedding-3-large"
openai_embeddings = OpenAIEmbeddings(
    model=embedding_model,
    openai_api_key=OPENAI_API_KEY  
)


Let's add the docs to a vector store. Make sure qdrant is running first (see README.md for more details). We can create it once and re-use it after that.

In [13]:
from langchain_qdrant import QdrantVectorStore
url="http://localhost:6333"
collection_name = "AlzheimersCare"

qdrant_vector_store = QdrantVectorStore.from_documents(
    split_docs,
    openai_embeddings,
    url=url,
    prefer_grpc=True,
    collection_name=collection_name,
)

Make sure we can load the vectorstore too

In [14]:
# make sure we can load it
store = QdrantVectorStore.from_existing_collection(
    embedding=openai_embeddings,
    collection_name=collection_name,
    url=url
)

Test it out by itself

In [15]:
retriever = store.as_retriever(
    search_type="mmr",  # Options: 'similarity', 'mmr', etc.
    search_kwargs={"k": 5}     # Number of documents to retrieve
)
retriever.invoke("How does stress impact caregivers?")

[Document(metadata={'chunk_id': 2, 'url': 'https://www.webmd.com/alzheimers/signficant-other-has-alzheimers', 'title': 'When Your Partner Has Alzheimer’s ', '_id': 'daa5f3cb-2f44-4b3c-8fda-1465d2d1c80f', '_collection_name': 'AlzheimersCare'}, page_content='strain may develop from the amount of time your loved one needs, as that can impact your ability to work and earn a living. things like lost wages, or loss of social security or pension benefits, can leave caregivers struggling to make ends meet. your risk for mental health issues also goes up. research shows that alzheimer’s caregivers are more likely than other caregivers to have depression, anxiety, and a poorer quality of life. among the physical health problems you’re at greater risk of having are high blood pressure, high blood sugar, weight gain or loss, and sleep disorders. some research suggests you’re also at greater risk for developing cognitive decline and alzheimer’s yourself. caregivers at most risk for reaching stress 

Test it out in a simple RAG chain: Create a prompt, initialize an LLM, and then use the retriever in a chain

In [16]:
from langchain_core.prompts import PromptTemplate

RAG_PROMPT_TEMPLATE = """
You are an empathetic, kind assistant that specializes in helping informal caregivers of dementia and Alzheimer's patients
navigate the stresses and questions of everyday life. Answer the question based on the context. If the answer is not
in the context, say you don't know. Be concise and conversational, and answer in language that a high school 
graduate with no specialized training can understand.

<context>
{context}
</context>

<question>
{query}
<question>
"""

rag_prompt = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)

In [40]:
from langchain_anthropic import ChatAnthropic

haiku_model_id = "claude-3-haiku-20240307" # cheaper and better to use for prototyping, although we'll use 3.5 in our app
claude_3_5_sonnet_model_id = "claude-3-5-sonnet-20240620"

llm = ChatAnthropic(
    model=haiku_model_id,    
    anthropic_api_key=ANTHROPIC_API_KEY
)

In [64]:
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough

# prototype a simple function to tack on sources at the end
def add_sources(context:list[Document])->str:
    sources_str = ""
    if len(context)>0:
        sources_str = "Sources: "
        for i,doc in enumerate(context):
            sources_str += f'[<a href="{doc.metadata["url"]}">{i + 1}</a>] '
    return sources_str

# standard RAG that passes the context through
rag_chain = (
    {"context": itemgetter("query") | retriever, "query": itemgetter("query")} 
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": rag_prompt | llm, "context": itemgetter("context")}
)

In [67]:
answer = await rag_chain.ainvoke(input={'query':"How can caregivers manage everyday stressors?"})


In [69]:
answer["response"].pretty_print()
print(add_sources(answer["context"]))



Based on the context provided, here are some ways caregivers can manage everyday stressors:

1. Stay physically active and take care of your own health. The context mentions the importance of staying fit, taking walks, and doing at-home exercise programs to maintain physical and mental health.

2. Take breaks and get away for a while. The context suggests planning time to go out with friends, go shopping, or watch a movie to get a break from the demands of caregiving.

3. Create a sanctuary or private space in your home to take a few minutes each day to get away from the stress.

4. Seek emotional support from family, friends, support groups, or a therapist. The context emphasizes the importance of not ignoring your own feelings of grief and stress.

5. Delegate tasks and responsibilities to your support network. The context suggests assigning different roles to different people in your network, such as listeners, doers, or relaxers.

6. Don't be afraid to ask for and accept help with