In [9]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="nomic-embed-text-v2-moe:latest")

In [56]:
vector = embeddings.embed_query("Hello, world!")
vector.__len__()

len(vector)

vector

[0.035544503,
 0.022141146,
 -0.0031282792,
 -0.053541426,
 -0.031493615,
 -0.035906337,
 0.0048250337,
 0.03290511,
 -0.0074005914,
 0.018139986,
 -0.0033449966,
 -0.031072015,
 0.059311315,
 -0.009700786,
 0.011966846,
 -0.012304815,
 -0.02474464,
 0.020620773,
 0.020653537,
 0.0045389133,
 0.05094597,
 -0.053764988,
 0.0028969122,
 -0.081011645,
 -0.0017085393,
 0.012681573,
 0.04951937,
 -0.041026734,
 0.049410876,
 0.041351516,
 -0.037418414,
 -0.038341247,
 0.032251403,
 -0.011293073,
 -0.06353289,
 0.05361984,
 0.008099089,
 -0.014306246,
 0.042595692,
 0.06105853,
 0.06772258,
 -0.0020974549,
 -0.00940951,
 0.04918419,
 -0.011917762,
 0.040059727,
 -0.016860534,
 0.008396817,
 0.031306956,
 -0.016937345,
 -0.024058364,
 -0.0040462697,
 0.02981428,
 -0.026197536,
 0.012938638,
 -0.054652676,
 0.019943008,
 -0.040400233,
 0.020883348,
 -0.033205096,
 -0.059710212,
 0.0013596527,
 0.083772294,
 -0.048477676,
 -0.037903875,
 -0.047012184,
 0.016901985,
 0.049351044,
 0.012703092,
 

In [15]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

dimensions = 768
index = faiss.IndexFlatL2(dimensions)

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

In [16]:
import pprint
from langchain_community.document_loaders import PyPDFLoader

file_path = "./Mem0.pdf"
loader = PyPDFLoader(file_path, mode="page")

docs = loader.load()

pprint.pp(docs[0].metadata)

print()
print(docs[0].page_content)

{'producer': 'pdfTeX-1.40.25',
 'creator': 'LaTeX with hyperref',
 'creationdate': '2025-04-29T01:11:16+00:00',
 'author': '',
 'keywords': '',
 'moddate': '2025-04-29T01:11:16+00:00',
 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live '
                    '2023) kpathsea version 6.3.5',
 'subject': '',
 'title': '',
 'trapped': '/False',
 'source': './Mem0.pdf',
 'total_pages': 23,
 'page': 0,
 'page_label': '1'}

Mem0: Building Production-Ready AI Agents with
Scalable Long-Term Memory
Prateek Chhikara, Dev Khant, Saket Aryan, Taranjeet Singh,and Deshraj Yadav
research@mem0.ai
Large Language Models (LLMs) have demonstrated remarkable prowess in generating contextually coherent
responses, yet their fixed context windows pose fundamental challenges for maintaining consistency over
prolonged multi-session dialogues. We introduceMem0, a scalable memory-centric architecture that addresses
this issue by dynamically extracting, consolidating, and retrieving salie

In [17]:
len(docs)

23

In [18]:
vector_store.add_documents(
    documents=docs, ids=[f"id{num + 1}" for num in range(0, len(docs))])

['id1',
 'id2',
 'id3',
 'id4',
 'id5',
 'id6',
 'id7',
 'id8',
 'id9',
 'id10',
 'id11',
 'id12',
 'id13',
 'id14',
 'id15',
 'id16',
 'id17',
 'id18',
 'id19',
 'id20',
 'id21',
 'id22',
 'id23']

In [19]:
similar_docs = vector_store.similarity_search("LOCOMO")

for doc in similar_docs:
    print(doc.page_content)

Mem0: Building Production-Ready AI Agents with Scalable Long-Term Memory
Table 1:Performance comparison of memory-enabled systems across different question types in theLOCOMO dataset.
Evaluation metrics include F1 score (F1), BLEU-1 (B1), and LLM-as-a-Judge score (J), with higher values indicating
better performance.A-Mem∗ represents results from our re-run of A-Mem to generate LLM-as-a-Judge scores by setting
temperature as 0.Mem0g indicates our proposed architecture enhanced with graph memory.Bold denotes the best
performance for each metric across all methods. (↑) represents higher score is better.
Method Single Hop Multi-Hop Open Domain Temporal
F1 ↑ B1 ↑ J↑ F1 ↑ B1 ↑ J↑ F1 ↑ B1 ↑ J↑ F1 ↑ B1 ↑ J↑
LoCoMo 25.02 19.75 – 12.04 11.16 – 40.36 29.05 – 18.41 14.77 –
ReadAgent 9.15 6.48 – 5.31 5.12 – 9.67 7.66 – 12.60 8.87 –
MemoryBank5.00 4.77 – 5.56 5.94 – 6.61 5.16 – 9.68 6.99 –
MemGPT 26.65 17.72 – 9.15 7.44 – 41.04 34.34 – 25.52 19.44 –
A-Mem 27.02 20.09 – 12.14 12.00 – 44.65 37.06 – 4

In [20]:
def get_context(prompt):
    context = vector_store.similarity_search(prompt)
    # print(context)
    return context

In [21]:
data = get_context("LOCOMO")
data

[Document(id='id9', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2025-04-29T01:11:16+00:00', 'author': '', 'keywords': '', 'moddate': '2025-04-29T01:11:16+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': './Mem0.pdf', 'total_pages': 23, 'page': 8, 'page_label': '9'}, page_content='Mem0: Building Production-Ready AI Agents with Scalable Long-Term Memory\nTable 1:Performance comparison of memory-enabled systems across different question types in theLOCOMO dataset.\nEvaluation metrics include F1 score (F1), BLEU-1 (B1), and LLM-as-a-Judge score (J), with higher values indicating\nbetter performance.A-Mem∗ represents results from our re-run of A-Mem to generate LLM-as-a-Judge scores by setting\ntemperature as 0.Mem0g indicates our proposed architecture enhanced with graph memory.Bold denotes the best\nperformance for ea

In [22]:
def format_document_list(docs):
    formatted = ""
    for doc in docs:
        formatted += doc.page_content
    # print(formatted)
    return formatted

In [23]:
format_document_list(data)

'Mem0: Building Production-Ready AI Agents with Scalable Long-Term Memory\nTable 1:Performance comparison of memory-enabled systems across different question types in theLOCOMO dataset.\nEvaluation metrics include F1 score (F1), BLEU-1 (B1), and LLM-as-a-Judge score (J), with higher values indicating\nbetter performance.A-Mem∗ represents results from our re-run of A-Mem to generate LLM-as-a-Judge scores by setting\ntemperature as 0.Mem0g indicates our proposed architecture enhanced with graph memory.Bold denotes the best\nperformance for each metric across all methods. (↑) represents higher score is better.\nMethod Single Hop Multi-Hop Open Domain Temporal\nF1 ↑ B1 ↑ J↑ F1 ↑ B1 ↑ J↑ F1 ↑ B1 ↑ J↑ F1 ↑ B1 ↑ J↑\nLoCoMo 25.02 19.75 – 12.04 11.16 – 40.36 29.05 – 18.41 14.77 –\nReadAgent 9.15 6.48 – 5.31 5.12 – 9.67 7.66 – 12.60 8.87 –\nMemoryBank5.00 4.77 – 5.56 5.94 – 6.61 5.16 – 9.68 6.99 –\nMemGPT 26.65 17.72 – 9.15 7.44 – 41.04 34.34 – 25.52 19.44 –\nA-Mem 27.02 20.09 – 12.14 12.00 – 44

In [52]:
messages = [
    (
        "system",
        """
        You are Mohit and you are AI expert,
        you have access to all the docs and research paper.
        You will get a prompt and some context, go through the context
        and explain that concept to the user.

        Your tone should be friendly. Do not use emojis.
        RESPOND IN PLAIN TEXT, do not use MarkDown syntax.
        """,
    ),
]

In [55]:
from langchain_groq import ChatGroq


def print_llm_response(prompt):
    similar_docs = get_context(f"PROMPT: {prompt}. Explain!")
    formatted_context = format_document_list(similar_docs[:3])

    messages.append(
        ("human", f"""PROMPT: {prompt} \nCONTEXT: {formatted_context}"""))
    
    if len(messages) > 7:
        messages = [messages[0]] + messages[-6:]
    
    print(f"""PROMPT: {prompt} \nCONTEXT: {formatted_context}""")

    llm = ChatGroq(
        model="groq/compound-mini",
        temperature=0,
        max_tokens=None,
        timeout=None,
        max_retries=2,
        streaming=True
    )

    response = ""

    for chunk in llm.stream(messages):
        response += chunk.content
        print(chunk.content, end="", flush=True)

    messages.append(("assistant", response))
    return response

In [47]:
def save_text(filename, content):
    with open(filename, 'w', encoding='utf-8') as file:
        file.write(content)

In [54]:
response = print_llm_response("What is mem0?")
save_text("AI_Response.md", response)

PROMPT: What is mem0? 
CONTEXT: Mem0: Building Production-Ready AI Agents with Scalable Long-Term Memory
P rom p t Te m p l at e f or Re s u lt s G e n e r at ion (M e m 0g )
(same as previous )
# APPROACH (Think step by step):
1. First, examine all memories that contain information related to the question
2. Examine the timestamps and content of these memories carefully
3. Look for explicit mentions of dates, times, locations, or events that answer the
question
4. If the answer requires calculation (e.g., converting relative time references), show
your work
5. Analyze the knowledge graph relations to understand the user’s knowledge context
6. Formulate a precise, concise answer based solely on the evidence in the memories
7. Double-check that your answer directly addresses the question asked
8. Ensure your final answer is specific and avoids vague time references
Memories for user {speaker_1_user_id}:
{speaker_1_memories}
Relations for user {speaker_1_user_id}:
{speaker_1_graph_memori