In [1]:
from IPython.display import display, HTML
display(HTML("<style>:root { --jp-notebook-max-width: 100% !important; }</style>"))

In [2]:
# %%capture
# !pip install git+https://github.com/brandonstarxel/chunking_evaluation.git
# !pip install watermark
# !pip install langchain-community langchain-core
# !pip install -U langchain-openai
# !pip install -U weaviate-client
# !pip install pypdf

In [3]:
import os
import weaviate

from chunking_evaluation.chunking import RecursiveTokenChunker
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from weaviate.classes.config import Configure

from credentials import OPENAI_API_KEY

In [4]:
%reload_ext watermark
%watermark -a "Armindo Guerra"

Author: Armindo Guerra



In [5]:
%watermark --iversions

IPython            : 8.27.0
langchain_core     : 0.3.47
weaviate           : 4.11.1
langchain_community: 0.3.20
chunking_evaluation: 0.1.0
langchain_openai   : 0.3.7



In [None]:
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY # load your open ai key

In [None]:
llm = ChatOpenAI(temperature=0.7, model="gpt-4o")

In [None]:
# Instantiate weaviate database client
vdb_client = weaviate.connect_to_local()
print("Connected to Weviate: ", vdb_client.is_ready())

# Simulando uma primeira interação com o Agente

In [None]:
# Define System Prompt
system_prompt = SystemMessage("You are a helpful AI Assistant. Answer the User's queries succinctly in one sentence.")

# Start Storage for Historical Message History
messages = [system_prompt]

while True:

    # Get User's Message
    user_message = HumanMessage(input("\nUser: "))
    
    if user_message.content.lower() == "exit":
        break

    else:
        # Extend Messages List With User Message
        messages.append(user_message)

    # Pass Entire Message Sequence to LLM to Generate Response
    response = llm.invoke(messages)
    
    print("\nAI Message: ", response.content)

    # Add AI's Response to Message List
    messages.append(response)

# Definindo o prompt para o processo de reflexão

In [None]:
reflection_prompt_template = """
You are analyzing conversations about research papers to create memories that will help guide future interactions. 
Your task is to extract key elements that would be most helpful when encountering similar academic discussions in the future.

Review the conversation and create a memory reflection following these rules:

1. For any field where you don't have enough information or the field isn't relevant, use "N/A"
2. Be extremely concise - each string should be one clear, actionable sentence
3. Focus only on information that would be useful for handling similar future conversations
4. Context_tags should be specific enough to match similar situations but general enough to be reusable

Output valid JSON in exactly this format:
{{
    "context_tags": [              // 2-4 keywords that would help identify similar future conversations
        string,                    // Use field-specific terms like "deep_learning", "methodology_question", "results_interpretation"
        ...
    ],
    "conversation_summary": string, // One sentence describing what the conversation accomplished
    "what_worked": string,         // Most effective approach or strategy used in this conversation
    "what_to_avoid": string        // Most important pitfall or ineffective approach to avoid
}}

Examples:
- Good context_tags: ["transformer_architecture", "attention_mechanism", "methodology_comparison"]
- Bad context_tags: ["machine_learning", "paper_discussion", "questions"]

- Good conversation_summary: "Explained how the attention mechanism in the BERT paper differs from traditional transformer architectures"
- Bad conversation_summary: "Discussed a machine learning paper"

- Good what_worked: "Using analogies from matrix multiplication to explain attention score calculations"
- Bad what_worked: "Explained the technical concepts well"

- Good what_to_avoid: "Diving into mathematical formulas before establishing user's familiarity with linear algebra fundamentals"
- Bad what_to_avoid: "Used complicated language"

Additional examples for different research scenarios:

Context tags examples:
- ["experimental_design", "control_groups", "methodology_critique"]
- ["statistical_significance", "p_value_interpretation", "sample_size"]
- ["research_limitations", "future_work", "methodology_gaps"]

Conversation summary examples:
- "Clarified why the paper's cross-validation approach was more robust than traditional hold-out methods"
- "Helped identify potential confounding variables in the study's experimental design"

What worked examples:
- "Breaking down complex statistical concepts using visual analogies and real-world examples"
- "Connecting the paper's methodology to similar approaches in related seminal papers"

What to avoid examples:
- "Assuming familiarity with domain-specific jargon without first checking understanding"
- "Over-focusing on mathematical proofs when the user needed intuitive understanding"

Do not include any text outside the JSON object in your response.

Here is the prior conversation:

{conversation}
"""

reflection_prompt = ChatPromptTemplate.from_template(reflection_prompt_template)

llm_reflection = ChatOpenAI(temperature=0.7, model="gpt-4o")

reflect = reflection_prompt | llm_reflection | JsonOutputParser()

In [None]:
def format_conversation(messages):
    
    # Create an empty list placeholder
    conversation = []
    
    # Start from index 1 to skip the first system message
    for message in messages[1:]:
        conversation.append(f"{message.type.upper()}: {message.content}")
    
    # Join with newlines
    return "\n".join(conversation)
    # return conversation

conversation = format_conversation(messages)

print(conversation)

In [None]:
reflection = reflect.invoke({"conversation": conversation})
print(reflection)

In [None]:
vdb_client.collections.create(
    "episodic_memory",
    vectorizer_config=[
        Configure.NamedVectors.text2vec_ollama(
            name="title_vector",
            source_properties=["title"],
            api_endpoint="http://host.docker.internal:11434",  # If using Docker, use this to contact your local Ollama instance
            model="nomic-embed-text",  # The model to use, e.g. "nomic-embed-text"
        )
    ],
    # Additional parameters not shown
)

In [None]:
def add_episodic_memory(messages, vdb_client):

    # Format Messages
    conversation = format_conversation(messages)

    # Create Reflection
    reflection = reflect.invoke({"conversation": conversation})

    collection = vdb_client.collections.get("episodic_memory")

    with collection.batch.dynamic() as batch:
        batch.add_object(
            properties={
                "conversation": conversation,
                "context_tags": reflection["context_tags"],
                "conversation_summary": reflection["conversation_summary"],
                "what_worked": reflection["what_worked"],
                "what_to_avoid": reflection["what_to_avoid"],
            },
        )

    failed_objects = collection.batch.failed_objects
    if failed_objects:
        print(f"Number of failed imports: {len(failed_objects)}")
        print(f"First failed object: {failed_objects[0]}")
    else:
        print(f"Objects inserted")

In [None]:
add_episodic_memory(messages, vdb_client)

In [None]:
def episodic_recall(query, vdb_client):
    
    # Load Database Collection
    episodic_memory = vdb_client.collections.get("episodic_memory")

    # Hybrid Semantic/BM25 Retrieval
    memory = episodic_memory.query.hybrid(
        query=query,
        alpha=0.5,
        limit=1,
    )
    
    return memory

query = "Talking about my name"
memory = episodic_recall(query, vdb_client)
memory.objects[0].properties

In [None]:
loader = PyPDFLoader("./coala_paper.pdf")
pages = []
for page in loader.load():
    pages.append(page)

# Combine all page contents into one string
document = " ".join(page.page_content for page in pages)

# Set up the chunker with your specified parameters
recursive_character_chunker = RecursiveTokenChunker(
    chunk_size=800,
    chunk_overlap=0,
    length_function=len,
    separators=["\n\n", "\n", ".", "?", "!", " ", ""]
)

# Split the combined text
recursive_character_chunks = recursive_character_chunker.split_text(document)

In [None]:
len(recursive_character_chunks)

In [None]:
vdb_client.collections.create(
    "semantic_memory",
    vectorizer_config=[
        Configure.NamedVectors.text2vec_ollama(
            name="title_vector",
            source_properties=["title"],
            api_endpoint="http://host.docker.internal:11434",  # If using Docker, use this to contact your local Ollama instance
            model="nomic-embed-text",  # The model to use, e.g. "nomic-embed-text"
        )
    ],
    # Additional parameters not shown
)

In [None]:
# Load Database Collection
coala_collection = vdb_client.collections.get("semantic_memory")

with coala_collection.batch.dynamic() as batch_coala:
    for chunk in recursive_character_chunks:
        # The model provider integration will automatically vectorize the object
        batch_coala.add_object(
            properties={
                "chunk": chunk,
            },
            # vector=vector  # Optionally provide a pre-obtained vector
        )
        if batch_coala.number_errors > 10:
            print("Batch import stopped due to excessive errors.")
            break

failed_objects = coala_collection.batch.failed_objects
if failed_objects:
    print(f"Number of failed imports: {len(failed_objects)}")
    print(f"First failed object: {failed_objects[0]}")

In [None]:
def semantic_recall(query, vdb_client):
    
    # Load Database Collection
    coala_collection = vdb_client.collections.get("semantic_memory")

    # Hybrid Semantic/BM25 Retrieval
    memories = coala_collection.query.hybrid(
        query=query,
        alpha=0.5,
        limit=15,
    )

    combined_text = ""
    
    for i, memory in enumerate(memories.objects):

        combined_text += f"\nCHUNK {i+1}:\n"
        combined_text += memory.properties['chunk'].strip()
    
    return combined_text

In [None]:
# memories = semantic_recall("What is retroformer?", vdb_client)
# print(memories)

In [None]:
def semantic_rag(query, vdb_client):

    memories = semantic_recall(query, vdb_client)

    semantic_prompt = f""" If needed, Use this grounded context to factually answer the next question.
    Let me know if you do not have enough information or context to answer a question.
    
    {memories}
    """
    
    return HumanMessage(semantic_prompt)

In [None]:
def episodic_system_prompt(query, vdb_client):
    # Get new memory
    memory = episodic_recall(query, vdb_client)
    
    # Load Existing Procedural Memory Instructions
    with open("./procedural_memory.txt", "r") as content:
        procedural_memory = content.read()
    
    # Get current conversation
    current_conversation = memory.objects[0].properties['conversation']
    
    # Update memory stores, excluding current conversation from history
    if current_conversation not in conversations:
        conversations.append(current_conversation)
    what_worked.update(memory.objects[0].properties['what_worked'].split('. '))
    what_to_avoid.update(memory.objects[0].properties['what_to_avoid'].split('. '))
    
    # Get previous conversations excluding the current one
    previous_convos = [conv for conv in conversations[-4:] if conv != current_conversation][-3:]
    
    # Create prompt with accumulated history
    episodic_prompt = f"""You are a helpful AI Assistant. Answer the user's questions to the best of your ability.
    You recall similar conversations with the user, here are the details:
    
    Current Conversation Match: {current_conversation}
    Previous Conversations: {' | '.join(previous_convos)}
    What has worked well: {' '.join(what_worked)}
    What to avoid: {' '.join(what_to_avoid)}
    
    Use these memories as context for your response to the user.
    
    Additionally, here are 10 guidelines for interactions with the current user: {procedural_memory}"""
    
    return SystemMessage(content=episodic_prompt)

In [None]:
def procedural_memory_update(what_worked, what_to_avoid):

    # Load Existing Procedural Memory Instructions
    with open("./procedural_memory.txt", "r") as content:
        current_takeaways = content.read()

    # Load Existing and Gathered Feedback into Prompt
    procedural_prompt = f"""You are maintaining a continuously updated list of the most important procedural behavior instructions for an AI assistant. 
    Your task is to refine and improve a list of key takeaways based on new conversation feedback while maintaining the most valuable existing insights.

    CURRENT TAKEAWAYS:
    {current_takeaways}

    NEW FEEDBACK:
    What Worked Well:
    {what_worked}

    What To Avoid:
    {what_to_avoid}

    Please generate an updated list of up to 10 key takeaways that combines:
    1. The most valuable insights from the current takeaways
    2. New learnings from the recent feedback
    3. Any synthesized insights combining multiple learnings

    Requirements for each takeaway:
    - Must be specific and actionable
    - Should address a distinct aspect of behavior
    - Include a clear rationale
    - Written in imperative form (e.g., "Maintain conversation context by...")

    Format each takeaway as:
    [#]. [Instruction] - [Brief rationale]

    The final list should:
    - Be ordered by importance/impact
    - Cover a diverse range of interaction aspects
    - Focus on concrete behaviors rather than abstract principles
    - Preserve particularly valuable existing takeaways
    - Incorporate new insights when they provide meaningful improvements

    Return up to but no more than 10 takeaways, replacing or combining existing ones as needed to maintain the most effective set of guidelines.
    Return only the list, no preamble or explanation.
    """

    # Generate New Procedural Memory
    procedural_memory = llm.invoke(procedural_prompt)

    # Write to File
    with open("./procedural_memory.txt", "w") as content:
        content.write(procedural_memory.content)

    return

# prompt = procedural_memory_update(what_worked, what_to_avoid)

In [None]:
# Simple storage for accumulated memories
conversations = []
what_worked = set()
what_to_avoid = set()

# Start Storage for Historical Message History
messages = []

while True:
    # Get User's Message
    user_input = input("\nUser: ")
    user_message = HumanMessage(content=user_input)
    
    # Generate new system prompt
    system_prompt = episodic_system_prompt(user_input, vdb_client)
    
    # Reconstruct messages list with new system prompt first
    messages = [
        system_prompt,  # New system prompt always first
        *[msg for msg in messages if not isinstance(msg, SystemMessage)]  # Old messages except system
    ]
    
    if user_input.lower() == "exit":
        add_episodic_memory(messages, vdb_client)
        print("\n == Conversation Stored in Episodic Memory ==")
        procedural_memory_update(what_worked, what_to_avoid)
        print("\n== Procedural Memory Updated ==")
        break
    if user_input.lower() == "exit_quiet":
        print("\n == Conversation Exited ==")
        break
    
    # Get context and add it as a temporary message
    context_message = semantic_rag(user_input, vdb_client)
    
    # Pass messages + context + user input to LLM
    response = llm.invoke([*messages, context_message, user_message])
    print("\nAI Message: ", response.content)
    
    # Add only the user message and response to permanent history
    messages.extend([user_message, response])

In [None]:
print(format_conversation(messages))

In [None]:
print(system_prompt.content)

In [None]:
print(context_message.content)