In [None]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Get started with Vertex AI Memory Bank MCP

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/inardini/vertex-memory-bank-mcp/blob/main/get_started_with_memory_bank_mcp.ipynb">
      <img width="32px" src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2Finardini%2Fvertex-memory-bank-mcp%2Fmain%2Fget_started_with_memory_bank_mcp.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/inardini/vertex-memory-bank-mcp/main/get_started_with_memory_bank_mcp.ipynb">
      <img src="https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/inardini/vertex-memory-bank-mcp/blob/main/get_started_with_memory_bank_mcp.ipynb">
      <img width="32px" src="https://storage.googleapis.com/github-repo/generative-ai/logos/GitHub_Invertocat_Dark.svg" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

<div style="clear: both;"></div>

<p>
<b>Share to:</b>

<a href="https://www.linkedin.com/sharing/share-offsite/?url=https%3A//github.com/inardini/vertex-memory-bank-mcp/blob/main/get_started_with_memory_bank_mcp.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/8/81/LinkedIn_icon.svg" alt="LinkedIn logo">
</a>

<a href="https://bsky.app/intent/compose?text=https%3A//github.com/inardini/vertex-memory-bank-mcp/blob/main/get_started_with_memory_bank_mcp.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/7/7a/Bluesky_Logo.svg" alt="Bluesky logo">
</a>

<a href="https://twitter.com/intent/tweet?url=https%3A//github.com/inardini/vertex-memory-bank-mcp/blob/main/get_started_with_memory_bank_mcp.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/5a/X_icon_2.svg" alt="X logo">
</a>

<a href="https://reddit.com/submit?url=https%3A//github.com/inardini/vertex-memory-bank-mcp/blob/main/get_started_with_memory_bank_mcp.ipynb" target="_blank">
  <img width="20px" src="https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Logo.png" alt="Reddit logo">
</a>

<a href="https://www.facebook.com/sharer/sharer.php?u=https%3A//github.com/inardini/vertex-memory-bank-mcp/blob/main/get_started_with_memory_bank_mcp.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/51/Facebook_f_logo_%282019%29.svg" alt="Facebook logo">
</a>
</p>

| Author(s) |
| --- |
| [Ivan Nardini](https://github.com/inardini) |

## Overview

This tutorial is about showing how to use Vertex AI Memory Bank MCP server and how to integrate it with Gemini.

You'll learn how to:

- **Use Vertex AI Memory Bank with Python MCP Client** - Direct integration using the MCP protocol
- **Use Vertex AI Memory Bank with Gemini** - Building memory-augmented AI applications

This guide provides **code** that:
- Connects to the actual Vertex AI Memory Bank MCP server
- Executes memory operations
- Demonstrates how to use Vertex AI Memory Bank MCP server to give memory to Gemini.

<div class="alert alert-block alert-warning">
    <b>Warning:</b> This example is for demostration purpose only. Vertex AI Memory Bank MCP server is not a Google product. And it is not officially support.
</div>

## Get started

### Install Google Gen AI SDK and other required packages

Let's install all the necessary packages for the different integration methods we'll explore.


In [None]:
%pip install --upgrade --quiet "google-genai>=1.40.0" "mcp[cli]>=1.0.0" "google-cloud-aiplatform>=1.118.0" "pydantic>=2.0.0" "python-dotenv>=1.0.0"

### Authenticate your notebook environment (Colab only)

If you're running this notebook on Google Colab, run the cell below to authenticate your environment.

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

### Set Google Cloud project information

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).

Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
# Use the environment variable if the user doesn't provide Project ID.
import os
import vertexai

PROJECT_ID = "inardini-demos"  # @param {type: "string", placeholder: "[your-project-id]", isTemplate: true}
if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
    PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))

LOCATION = os.environ.get("GOOGLE_CLOUD_REGION", "us-central1")

# Initialize Vertex AI client
client = vertexai.Client(project=PROJECT_ID, location=LOCATION)

If you're running this notebook locally (not in Colab), you also need to set up your project information and your Google Cloud credentials in the `.env`.  

Create a `.env` file in the same directory as this notebook with the following content:

```
# Google Cloud Configuration
GOOGLE_CLOUD_PROJECT=your-project-id
GOOGLE_CLOUD_LOCATION=us-central1

# Authentication for Gemini (choose one)
# Option 1: Vertex AI with service Account Key
GOOGLE_APPLICATION_CREDENTIALS=/path/to/credentials.json

# Option 2: Google AI Studio with API Key 
# GOOGLE_API_KEY=your-api-key

# Optional: Existing Agent Engine name
# AGENT_ENGINE_NAME=projects/PROJECT_ID/locations/LOCATION/reasoningEngines/ENGINE_ID
```

Then run the following code to verify the configuration. 



In [None]:
# Load environment variables from .env file if it exists
if os.path.exists(".env"):
    from dotenv import load_dotenv
    load_dotenv()

    # Verify environment variables are set
    if not os.environ.get("GOOGLE_CLOUD_PROJECT"):
        print("Warning: GOOGLE_CLOUD_PROJECT not set. Please configure your .env file.")
    if not os.environ.get("GOOGLE_CLOUD_LOCATION"):
        print("Warning: GOOGLE_CLOUD_LOCATION not set. Using default: us-central1")
    if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") and not os.environ.get("GOOGLE_API_KEY"):
        print("Warning: GOOGLE_APPLICATION_CREDENTIALS or GOOGLE_API_KEY not set. Please configure your .env file.")

### Configure environment variables 
If you're running this notebook locally (not in Colab), you'll need 

       

### Import libraries

Import the necessary Python libraries for our tutorial.

In [None]:
# Standard imports
import os
import sys
import json
import asyncio
import logging
from datetime import datetime
from typing import Dict, List, Any, Optional
from IPython.display import Markdown, display

# MCP imports
import google.auth
import google.auth.transport.requests
from google import genai
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

## Set Vertex AI Memory Bank MCP server and its path

Set the MCP server path. Our client will start this script as a subprocess and communicate with it via stdio.



In [None]:
# Path to your Memory Bank MCP server
# Adjust this path to where you have the memory_bank_server.py file
MEMORY_BANK_SERVER_PATH = os.path.abspath("memory_bank_server.py")

if not os.path.exists(MEMORY_BANK_SERVER_PATH):
    print(f"Memory Bank server not found at: {MEMORY_BANK_SERVER_PATH}")
    print("Please ensure memory_bank_server.py is in the current directory")
else:
    print(f"Memory Bank server found at: {MEMORY_BANK_SERVER_PATH}")

## Understanding MCP Client Connection

To connect with MCP servers you need a client using:

- **stdio** - Communication through standard input/output (local servers)
- **SSE** - Server-Sent Events for remote servers

This Memory Bank server uses stdio, making it perfect for local development and containerized deployments.

For our client, you configure StdioServerParameters with init method, telling the MCP client how to start the server using memory_bank_server.py script.

After that, you use connect to start the server process, established the stdio communication channels (read, write), initialize the MCP session, and then lists the tools the server has advertised.

Finally, you have call_tool method to send a request to execute a tool (like create_memory) with specific arguments and then parses the JSON response from the server's output.



In [None]:
class MemoryBankMCPClient:
    """A client for interacting with the Memory Bank MCP server."""

    def __init__(self, server_path: str, project_id: str, location: str):
        """Initialize the Memory Bank MCP client."""
        self.project_id = project_id
        self.location = location
        self.server_params = StdioServerParameters(
            command=sys.executable,
            args=[server_path],
            env={
                "GOOGLE_CLOUD_PROJECT": self.project_id,
                "GOOGLE_CLOUD_LOCATION": self.location
            }
        )
        self.session = None
        self.initialized = False
        self.stdio_cm = None
        self.session_cm = None

    async def connect(self):
        """Establish connection to the MCP server."""
        # Store the context managers
        self.stdio_cm = stdio_client(self.server_params)
        self.read, self.write = await self.stdio_cm.__aenter__()

        self.session_cm = ClientSession(self.read, self.write)
        self.session = await self.session_cm.__aenter__()

        # Initialize the MCP session
        await self.session.initialize()
        print("Connected to Memory Bank MCP server")

        return self

    async def disconnect(self):
        """Close the connection to the MCP server."""
        if self.session_cm:
            await self.session_cm.__aexit__(None, None, None)
        if self.stdio_cm:
            await self.stdio_cm.__aexit__(None, None, None)
        print("\nDisconnected from Memory Bank MCP server")

    async def call_tool(self, tool_name: str, arguments: dict) -> Any:
        """Call a tool on the MCP server."""
        if not self.session:
            raise RuntimeError("Not connected to MCP server")

        result = await self.session.call_tool(tool_name, arguments)

        # Extract the actual content from CallToolResult
        if hasattr(result, 'content') and result.content:
            # Get the text content from the first content item
            if result.content[0].type == 'text':
                import json
                return json.loads(result.content[0].text)

        return result

    async def initialize_memory_bank(self, memory_topics: List[str] = None):
        """Initialize the Memory Bank."""
        if not self.initialized:
            result = await self.call_tool(
                "initialize_memory_bank",
                {
                    "project_id": os.environ["GOOGLE_CLOUD_PROJECT"],
                    "location": os.environ["GOOGLE_CLOUD_LOCATION"],
                    "memory_topics": memory_topics or ["USER_PREFERENCES", "USER_PERSONAL_INFO"]
                }
            )
            self.initialized = True
            return result
        return {"status": "already_initialized"}

## Use Vertex AI Memory Bank MCP for basic operations

Now that we define our MCP client, we can test our Vertex AI Memory Bank with a "Hello, World!" example. The example demonstrates the full lifecycle of interacting with the service:

- Create an instance of our client and connects to the Vertex AI Memory Bank MCP server.
- Create a new Vertex AI Memory Bank instance on Agent Engine.
- Run through some core CRUD (Create, Read, Update, Delete) operations, showing how to generate (from a conversation) and retrieve (with a smart search).
- Clean up the server subprocess.


In [None]:
async def demo_basic_operations():
    """Demonstrate basic Memory Bank operations via MCP."""

    # Create and connect client
    client = MemoryBankMCPClient(
        server_path=MEMORY_BANK_SERVER_PATH,
        project_id=PROJECT_ID,
        location=LOCATION
    )

    try:
        await client.connect()

        # Initialize Memory Bank
        print("\n Initializing Memory Bank...")
        init_result = await client.initialize_memory_bank()
        print(f"Result: {json.dumps(init_result, indent=2)[:200]}...")

        # Generate memories from a conversation
        print("\n Generating memories from conversation...")
        conversation = [
            {"role": "user", "content": "Hi, I'm Alice. I work as a data scientist in San Francisco."},
            {"role": "assistant", "content": "Nice to meet you, Alice! How can I help you today?"},
            {"role": "user", "content": "I'm interested in learning about machine learning with Python."}
        ]

        gen_result = await client.call_tool(
            "generate_memories",
            {
                "conversation": conversation,
                "scope": {"user_id": "alice_demo_123"},
                "wait_for_completion": True
            }
        )
        print(f"Generated memories: {json.dumps(gen_result, indent=2)[:300]}...")

        # Retrieve memories
        print("\n Retrieving memories...")
        retrieve_result = await client.call_tool(
            "retrieve_memories",
            {
                "scope": {"user_id": "alice_demo_123"},
                "search_query": "programming interests",
                "top_k": 3
            }
        )
        print(f"Retrieved: {json.dumps(retrieve_result, indent=2)[:300]}...")

    finally:
        await client.disconnect()

In [None]:
await demo_basic_operations()

## Using Memory Bank with Gemini and MCP

Now that you are a bit familiar with the server, let's integrate Memory Bank with Google's Gemini API using MCP, enabling automatic function calling and memory-augmented AI responses.

We start with establishing an MCP session as before. Then we create a session and we give Gemini a complex, multi-step prompt: "Create two memories for me, then retrieve them all to verify."

Instead of us manually calling create_memory twice and then retrieve_memories, the Gemini model intelligently parses the prompt, identifies the required tools from the MCP session, and executes them in the correct sequence to fulfill the request.

The Gemini SDK handles the entire back-and-forth tool calling loop for us. This is a massive simplification that lets you build complex, multi-tool agents with natural language.


In [None]:
async def demo_automatic_tool_calling():
    """Demonstrate Gemini's automatic tool calling with Memory Bank."""

    # Create Gemini client
    client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)

    # Create MCP server parameters
    server_params = StdioServerParameters(
        command=sys.executable,
        args=[MEMORY_BANK_SERVER_PATH],
        env={
            "GOOGLE_CLOUD_PROJECT": PROJECT_ID,
            "GOOGLE_CLOUD_LOCATION": LOCATION
        }
    )

    async with stdio_client(server_params) as (read, write):
        async with ClientSession(read, write) as session:
            # Initialize MCP session
            await session.initialize()

            print("Initializing Memory Bank...\n")

            # Initialize Memory Bank first
            init_result = await session.call_tool(
                "initialize_memory_bank",
                {
                    "project_id": PROJECT_ID,
                    "location": LOCATION,
                    "memory_topics": ["USER_PREFERENCES", "USER_PERSONAL_INFO"]
                }
            )

            if hasattr(init_result, 'content') and init_result.content:
                init_data = json.loads(init_result.content[0].text)
                engine_name = init_data.get('agent_engine_name')
                print(f"Memory Bank initialized with engine:\n   {engine_name}\n")

            # Prompt that will trigger automatic tool calling
            prompt = f"""
            Please help me manage my memories. Here's what I need you to do:

            1. Create a memory that I (user_id: "gemini_demo_user") prefer dark mode in all applications
            2. Create another memory that I (user_id: "gemini_demo_user") love Python programming
            3. Then retrieve all memories for user_id: "gemini_demo_user" to verify they were saved

            Show me a summary of what you found.
            """

            print("Sending request to Gemini with automatic tool calling enabled...\n")

            # Gemini will automatically call MCP tools to fulfill the request
            response = await client.aio.models.generate_content(
                model="gemini-2.5-flash",
                contents=prompt,
                config=genai.types.GenerateContentConfig(
                    temperature=0,
                    tools=[session],  # Pass MCP session directly - SDK handles everything!
                    # Automatic function calling is enabled by default
                ),
            )

            print("=" * 100)
            print("Gemini Response:")
            print("=" * 100)
            print(response.text)
            print("=" * 100)

In [None]:
async def demo_automatic_tool_calling():
    """Demonstrate Gemini's automatic tool calling with Memory Bank."""

    # Create Gemini client
    client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)

    # Create MCP server parameters
    server_params = StdioServerParameters(
        command=sys.executable,
        args=[MEMORY_BANK_SERVER_PATH],
        env={
            "GOOGLE_CLOUD_PROJECT": PROJECT_ID,
            "GOOGLE_CLOUD_LOCATION": LOCATION
        }
    )

    async with stdio_client(server_params) as (read, write):
        async with ClientSession(read, write) as session:
            # Initialize MCP session
            await session.initialize()

            print("Initializing Memory Bank...\n")

            # Initialize Memory Bank first
            init_result = await session.call_tool(
                "initialize_memory_bank",
                {
                    "project_id": PROJECT_ID,
                    "location": LOCATION,
                    "memory_topics": ["USER_PREFERENCES", "USER_PERSONAL_INFO"]
                }
            )

            if hasattr(init_result, 'content') and init_result.content:
                init_data = json.loads(init_result.content[0].text)
                engine_name = init_data.get('agent_engine_name')
                print(f"Memory Bank initialized with engine:\n   {engine_name}\n")

            # Turn 1: Store memories
            print("=" * 100)
            print("TURN 1: Storing memories")
            print("=" * 100)
            
            store_prompt = """
            Please create two memories for me:
            1. I (user_id: "gemini_demo_user") prefer dark mode in all applications
            2. I (user_id: "gemini_demo_user") love Python programming
            
            Confirm when the memories are stored.
            """

            print("Sending request to Gemini to store memories...\n")

            response1 = await client.aio.models.generate_content(
                model="gemini-2.5-flash",
                contents=store_prompt,
                config=genai.types.GenerateContentConfig(
                    temperature=0,
                    tools=[session],
                ),
            )

            print("Gemini Response:")
            print(response1.text)
            print("\n")

            # Turn 2: Retrieve memories
            print("=" * 100)
            print("TURN 2: Retrieving memories")
            print("=" * 100)
            
            retrieve_prompt = """
            Now retrieve all memories for user_id: "gemini_demo_user" and show me what you found.
            """

            print("Sending request to Gemini to retrieve memories...\n")

            response2 = await client.aio.models.generate_content(
                model="gemini-2.5-flash",
                contents=retrieve_prompt,
                config=genai.types.GenerateContentConfig(
                    temperature=0,
                    tools=[session],
                ),
            )

            print("Gemini Response:")
            print(response2.text)
            print("=" * 100)

Let's see Gemini with Vertex AI Memory Bank MCP in action.

In [None]:
await demo_automatic_tool_calling()

## Cleaning up

To avoid incurring charges to your Google Cloud account, delete the Agent Engine resources created in this tutorial.

The following code deletes all Agent Engines created during this notebook session.

In [None]:
# Delete all agent engines with rate limiting to avoid quota exhaustion
import time

# List all agent engines
agent_engines = list(client.agent_engines.list())
print(f"Found {len(agent_engines)} Agent Engine(s) to delete.\n")

# Delete each agent engine with delay between requests
deleted_count = 0
for i, engine in enumerate(agent_engines, 1):
    try:
        client.agent_engines.delete(name=engine.api_resource.name, force=True)
        print(f"[{i}/{len(agent_engines)}] Deleted Agent Engine: {engine.api_resource.name}")
        deleted_count += 1

        # Wait 10 seconds between deletions to avoid default quota limits
        if i < len(agent_engines):
            print(f"Waiting 10 seconds before next deletion to respect quota limits...")
            time.sleep(10)

    except Exception as e:
        print(f"[{i}/{len(agent_engines)}] Failed to delete {engine.api_resource.name}: {e}")