##### Copyright 2025 Google LLC.

In [None]:
# @title Licensed under the Apache License, Version 2.0 (the "License");
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Long Memory Layer using - Mem0, Gemini and Qdrant

<a target="_blank" href="https://colab.research.google.com/drive/13PYVsCenlKOI2iUnPE0A_LcfGDElOWTR?usp=sharing"><img src="https://colab.research.google.com/assets/colab-badge.svg" height=30/></a>

## Overview

Build a personalized travel agent with a long-term memory layer that can store and retrieve your preferences when recommending travel destinations and planning itineraries. The memory layer should be able to add, update, and search interactions based on your preferences. We will also see how one can then use this Memory with Gemini Client by also configuring the SYSTEM PROMPT.

## Prerequisites

You can run this quickstart in Google Colab.

To complete this quickstart on your own development environment, ensure that your environment meets the following requirements:

-  Python 3.11+
-  An installation of `jupyter` to run the notebook.

## Setup

First, download and install the Gemini API Python library and Mem0 package.

In [1]:
!pip install agno mem0ai google-genai
!pip install langchain langchain-community fastembed

### Grab an API Key

Before you can use the Gemini API, you must first obtain an API key. If you don't already have one, create a key with one click in Google AI Studio.

<a class="button button-primary" href="https://aistudio.google.com/app/apikey" target="_blank" rel="noopener noreferrer">Get an API key</a>

In Colab, add the key to the secrets manager under the "🔑" in the left panel. Give it the name `GOOGLE_API_KEY`.

Once you have the API key, pass it to the SDK. You can do this in two ways:

* Put the key in the `GOOGLE_API_KEY` environment variable (the SDK will automatically pick it up from there).
* Pass the key to `genai.Client(api_key=...)`

In [6]:
import os
from google.colab import userdata

from google.genai import Client

In [7]:
os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')

In [8]:
llm_client = Client()

### Define the Memory Configuration

In order to save and retrieve the memory as the context, we need Embedding model and Vector Store for storing the data and LLM to summarize and save the preference.

We will use:

- LLM: Gemini 2.5 Flash Lite [this is for Memory preference only]
- Embeddings: FastEmbed that runs on Onnx Runtime
- Vector Store: Qdrant

Why are we using Vector Store again? Because Mem0 have default embedding_model_dims that is 1536, with the open source models, we are using we need to modify this embedding dimensions with our own custom integrations.

In [None]:
from mem0 import Memory
from langchain_community.embeddings import FastEmbedEmbeddings

In [None]:
embeddings = FastEmbedEmbeddings(model_name = "jinaai/jina-embeddings-v2-base-en", max_length = 768)

In [4]:
config = {
    "llm": {
        "provider": "gemini",
        "config": {
            "model": "gemini-2.5-flash-lite",
            "temperature": 0.8,
        }
    },
    "vector_store": {
        "provider": "qdrant",
        "config": {
            "collection_name": "longterm",
            "path": "/tmp/db",
            "embedding_model_dims": 768,
        }
    },
    "embedder": {
        "provider": "langchain",
        "config": {
            "model": embeddings
        }
    }
}

In [None]:
client = Memory.from_config(config)

In [11]:
messages = [
    {"role": "user", "content": "What is the must try food in Baroda"},
    {"role": "assistant", "content": "Sev Usal is must"},
    {"role": "user", "content": "I'm not into street food, I prefer Gujarati thalis."},
    {"role": "assistant", "content": "Head to Mandap in Baroda, it’s famous for authentic Gujarati thalis."},
]

In [12]:
result1 = client.add(messages, user_id="personal", metadata={"category": "food"})

In [None]:
result1

In [14]:
messages2 = [
    {"role": "user", "content": "I'm planning to travel to Hong Kong which Airlines to use from Bangalore"},
    {"role": "assistant", "content": "Cathay Pacific is the best option and have the direct flights. Any preferences?"},
    {"role": "user", "content": "Yes, I need Hindu Vegetarian meal and prefer window seat or person seat"},
    {"role": "assistant", "content": "Sure, I will got it. Do you like do add anything else?"},
]

In [15]:
result2 = client.add(messages2, user_id="personal", metadata={"category": "travel"})

In [None]:
result2

## Search - Inference on new suggestion

In [17]:
query = "I am travelling to New york, suggest food places to try"

In [18]:
memories = client.search(query,user_id="personal",limit=30)

In [19]:
context = "\n".join(f"- {m['memory']}" for m in memories['results'])

In [None]:
context

## Generate LLM Response using Gemini 2.5 Pro

In [21]:
SYSTEM_PROMPT = """
You are an expert executive assistant who thinks carefully before responding,
adapting to the poliet communication style based on the previous user's established PREFERENCES and the complexity of their query.

Maintain a polished, professional tone that is warm yet efficient—concise for
simple questions, moderate for complex topics, and comprehensive for open-ended discussions.

Act as a trusted advisor who doesn't just answer questions but adds value through insights, anticipates needs,
and prioritizes what matters most while respecting the user's time with clear, actionable responses.
"""

In [22]:
def get_llm_response(query: str, user_id: str) -> str:
    # first extract the context out of Mem0 - memory results

    memories = client.search(query,user_id=user_id,limit=30)
    mem_results = memories['results']
    context = "\n".join(f"- {m['memory']}" for m in mem_results)

    USER_PROMPT = f"""
      <question>
      QUESTION: {query}
      </question>

      <PREFERENCE>
      Preference: {context}
      </PREFERENCE>
    """

    # Config the system prompt and make sure to define the input variables inside the USER PROMPT
    response = llm_client.models.generate_content(
        model="gemini-2.5-pro",
        contents=USER_PROMPT,
        config={
            "system_instruction": SYSTEM_PROMPT
            }
    )
    return response.text

In [23]:
user_query = "i need food and place recommendation for the food in New York"

In [25]:
response = get_llm_response(user_query, user_id="personal")

In [None]:
print(response)