In [1]:
## Load environment variables

import os
from dotenv import load_dotenv, find_dotenv, dotenv_values

# Load with explicit path and allow override
dotenv_path = find_dotenv(usecwd=True)
print("dotenv_path:", dotenv_path or "NOT FOUND")
load_dotenv(dotenv_path=dotenv_path, override=True)

# Show what was parsed from the file (safe preview)
parsed = dotenv_values(dotenv_path) if dotenv_path else {}
print("Keys in .env:", sorted(parsed.keys()))
print("Has OPENAI_API_KEY in .env?:", "OPENAI_API_KEY" in parsed)

val = os.getenv("OPENAI_API_KEY")
print("Env OPENAI_API_KEY present?:", val is not None)
print("Value prefix (masked):", (val[:6] + "…") if val else None)

# Current working directory (to catch path mistakes)
print("cwd:", os.getcwd())

dotenv_path: /Users/anupam/Documents/Programming/rag101/.env
Keys in .env: ['LANGSMITH_API_KEY', 'LANGSMITH_ENDPOINT', 'LANGSMITH_PROJECT', 'LANGSMITH_TRACING', 'OPENAI_API_KEY', 'POSTS_SOURCE']
Has OPENAI_API_KEY in .env?: True
Env OPENAI_API_KEY present?: True
Value prefix (masked): sk-pro…
cwd: /Users/anupam/Documents/Programming/rag101


In [2]:
# Define LLM model

import getpass, os
from langchain.chat_models import init_chat_model

if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

llm = init_chat_model("gpt-4o-mini", model_provider="openai")


In [3]:
# Choose embeddings

import getpass
import os

if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [4]:
# Chose vector store

from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [11]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

# Load and chunk contents of the blog
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

print(f"Total characters: {len(docs[0].page_content)}")

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # chunk size (characters)
    chunk_overlap=200,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)

all_splits = text_splitter.split_documents(docs)

print(f"Split blog post into {len(all_splits)} sub-documents.")

# Index chunks
document_ids = vector_store.add_documents(documents=all_splits)

print("Document Ids:", document_ids[:5])

# Define prompt for question-answering
# N.B. for non-US LangSmith endpoints, you may need to specify
# api_url="https://api.smith.langchain.com" in hub.pull.
prompt = hub.pull("rlm/rag-prompt")


# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}


# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

Total characters: 43047
Split blog post into 63 sub-documents.
Document Ids: ['87b9f82a-5873-451d-a494-fd40a9ae64fd', '06b271e2-c98d-46d2-b137-90b1521d2554', 'bf7773a3-614a-4b24-a9ea-a39c90a01fac', '340c8cfd-159f-4984-a19b-f895565d9d61', 'dad54fb9-b8ca-4ce8-8916-394dbc0fbeaf']


In [12]:
# Here is the bug. The expected answer here is "I don't know", but the model outputs a JSON with thoughts
response = graph.invoke({"question": "What is the taste of an orange?"})
print(response["answer"])

{
    "thoughts": {
        "text": "The question about the taste of an orange is not addressed in the provided context.",
        "reasoning": "The retrieved context relates to memory types and APIs, not flavors or tastes.",
        "plan": "- Seek additional resources about orange flavor\n- Provide an answer based on typical knowledge\n- Note the unavailability of the specific context information",
        "criticism": "I should have acknowledged the missing context sooner.",
        "speak": "I don't have information about the taste of an orange based on the context provided."
    },
    "command": {
        "name": "none",
        "args": {}
    }
}


In [16]:
# This is happening because one of the retrieved contexts for this response has another prompt within it, which overrides the original user prompt
# See "Content: You should only respond in JSON format as described below..."

response = graph.invoke({"question": "What is the taste of an orange?"})
for i, doc in enumerate(response['context']):
    print(f"Context {i}:")
    print(f"  Content: {doc.page_content}")
    print("-" * 50)

print(response["answer"])

Context 0:
  Content: Short-Term Memory (STM) or Working Memory: It stores information that we are currently aware of and needed to carry out complex cognitive tasks such as learning and reasoning. Short-term memory is believed to have the capacity of about 7 items (Miller 1956) and lasts for 20-30 seconds.


Long-Term Memory (LTM): Long-term memory can store information for a remarkably long time, ranging from a few days to decades, with an essentially unlimited storage capacity. There are two subtypes of LTM:

Explicit / declarative memory: This is memory of facts and events, and refers to those memories that can be consciously recalled, including episodic memory (events and experiences) and semantic memory (facts and concepts).
Implicit / procedural memory: This type of memory is unconscious and involves skills and routines that are performed automatically, like riding a bike or typing on a keyboard.





Categorization of human memory.

We can roughly consider the following mapping