Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 18 additions & 28 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,27 +70,22 @@ jupyter lab --port 8888 --IdentityProvider.token MY_TOKEN --ip 0.0.0.0
"jupyter": {
"command": "docker",
"args": [
"run",
"-i",
"--rm",
"-e",
"DOCUMENT_URL",
"-e",
"DOCUMENT_TOKEN",
"-e",
"DOCUMENT_ID",
"-e",
"RUNTIME_URL",
"-e",
"RUNTIME_TOKEN",
"run", "-i", "--rm",
"-e", "DOCUMENT_URL",
"-e", "DOCUMENT_TOKEN",
"-e", "DOCUMENT_ID",
"-e", "RUNTIME_URL",
"-e", "RUNTIME_TOKEN",
"-e", "ALLOW_IMG_OUTPUT",
"datalayer/jupyter-mcp-server:latest"
],
"env": {
"DOCUMENT_URL": "http://host.docker.internal:8888",
"DOCUMENT_TOKEN": "MY_TOKEN",
"DOCUMENT_ID": "notebook.ipynb",
"RUNTIME_URL": "http://host.docker.internal:8888",
"RUNTIME_TOKEN": "MY_TOKEN"
"RUNTIME_TOKEN": "MY_TOKEN",
"ALLOW_IMG_OUTPUT": "true"
}
}
}
Expand All @@ -105,19 +100,13 @@ jupyter lab --port 8888 --IdentityProvider.token MY_TOKEN --ip 0.0.0.0
"jupyter": {
"command": "docker",
"args": [
"run",
"-i",
"--rm",
"-e",
"DOCUMENT_URL",
"-e",
"DOCUMENT_TOKEN",
"-e",
"DOCUMENT_ID",
"-e",
"RUNTIME_URL",
"-e",
"RUNTIME_TOKEN",
"run", "-i", "--rm",
"-e", "DOCUMENT_URL",
"-e", "DOCUMENT_TOKEN",
"-e", "DOCUMENT_ID",
"-e", "RUNTIME_URL",
"-e", "RUNTIME_TOKEN",
"-e", "ALLOW_IMG_OUTPUT",
"--network=host",
"datalayer/jupyter-mcp-server:latest"
],
Expand All @@ -126,7 +115,8 @@ jupyter lab --port 8888 --IdentityProvider.token MY_TOKEN --ip 0.0.0.0
"DOCUMENT_TOKEN": "MY_TOKEN",
"DOCUMENT_ID": "notebook.ipynb",
"RUNTIME_URL": "http://localhost:8888",
"RUNTIME_TOKEN": "MY_TOKEN"
"RUNTIME_TOKEN": "MY_TOKEN",
"ALLOW_IMG_OUTPUT": "true"
}
}
}
Expand Down
97 changes: 93 additions & 4 deletions docs/docs/tools/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ The server currently offers 11 tools:
- Input:
- `cell_index`(int): Index of the cell to insert (0-based). Use -1 to append at end and execute.
- `cell_source`(string): Code source.
- Returns: List of outputs from the executed cell.
- Returns: List of outputs from the executed cell (supports multimodal output including images).

#### 3. `delete_cell`

Expand Down Expand Up @@ -67,7 +67,7 @@ The server currently offers 11 tools:
- `timeout_seconds`: Maximum time to wait for execution (default: 300s)
- `progress_interval`: Seconds between progress updates (default: 5s)
- Returns:
- `list[str]`: List of outputs including progress updates
- `list[Union[str, ImageContent]]`: List of outputs including progress updates (supports multimodal output including images)

#### 10. `execute_cell_simple_timeout`

Expand All @@ -76,7 +76,7 @@ The server currently offers 11 tools:
- `cell_index`: Index of the cell to execute (0-based)
- `timeout_seconds`: Maximum time to wait for execution (default: 300s)
- Returns:
- `list[str]`: List of outputs from the executed cell
- `list[Union[str, ImageContent]]`: List of outputs from the executed cell (supports multimodal output including images)

#### 11. `execute_cell_with_progress`

Expand All @@ -85,4 +85,93 @@ The server currently offers 11 tools:
- `cell_index`: Index of the cell to execute (0-based)
- `timeout_seconds`: Maximum time to wait for execution (default: 300s)
- Returns:
- `list[str]`: List of outputs from the executed cell
- `list[Union[str, ImageContent]]`: List of outputs from the executed cell (supports multimodal output including images)

## Multimodal Output Support

The server supports multimodal output, allowing AI agents to directly receive and analyze visual content such as images and charts generated by code execution.

### Supported Output Types

- **Text Output**: Standard text output from code execution
- **Image Output**: PNG images generated by matplotlib, seaborn, plotly, and other visualization libraries
- **Error Output**: Error messages and tracebacks

### Environment Variable Configuration

Control multimodal output behavior using environment variables:

#### `ALLOW_IMG_OUTPUT`

Controls whether to return actual image content or text placeholders.

- **Default**: `true`
- **Values**: `true`, `false`, `1`, `0`, `yes`, `no`, `on`, `off`, `enable`, `disable`, `enabled`, `disabled`

**Example Docker Configuration:**

```json
{
"mcpServers": {
"jupyter": {
"command": "docker",
"args": [
"run", "-i", "--rm",
"-e", "DOCUMENT_URL",
"-e", "DOCUMENT_TOKEN",
"-e", "DOCUMENT_ID",
"-e", "RUNTIME_URL",
"-e", "RUNTIME_TOKEN",
"-e", "ALLOW_IMG_OUTPUT",
"datalayer/jupyter-mcp-server:latest"
],
"env": {
"DOCUMENT_URL": "http://host.docker.internal:8888",
"DOCUMENT_TOKEN": "MY_TOKEN",
"DOCUMENT_ID": "notebook.ipynb",
"RUNTIME_URL": "http://host.docker.internal:8888",
"RUNTIME_TOKEN": "MY_TOKEN",
"ALLOW_IMG_OUTPUT": "true"
}
}
}
}
```

### Output Behavior

#### When `ALLOW_IMG_OUTPUT=true` (Default)
- Images are returned as `ImageContent` objects with actual PNG data
- AI agents can directly analyze visual content
- Supports advanced multimodal reasoning

#### When `ALLOW_IMG_OUTPUT=false`
- Images are returned as text placeholders: `"[Image Output (PNG) - Image display disabled]"`
- Maintains backward compatibility with text-only LLMs
- Reduces bandwidth and token usage

### Use Cases

**Data Visualization Analysis:**
```python
import matplotlib.pyplot as plt
import pandas as pd

df = pd.read_csv('sales_data.csv')
df.plot(kind='bar', x='month', y='revenue')
plt.title('Monthly Revenue')
plt.show()
# AI can now "see" and analyze the chart content
```

**Machine Learning Model Visualization:**
```python
import matplotlib.pyplot as plt

# Plot training curves
plt.plot(epochs, train_loss, label='Training Loss')
plt.plot(epochs, val_loss, label='Validation Loss')
plt.legend()
plt.show()
# AI can evaluate training effectiveness from the visual curves
```
47 changes: 47 additions & 0 deletions jupyter_mcp_server/config_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Copyright (c) 2023-2024 Datalayer, Inc.
#
# BSD 3-Clause License

"""
Environment Configuration Management Module

This module manages environment variables for multimodal output support.
Following the same pattern as other environment variables in the project.
"""

import os


def _get_env_bool(env_name: str, default_value: bool = True) -> bool:
"""
Get boolean value from environment variable, supporting multiple formats.

Args:
env_name: Environment variable name
default_value: Default value

Returns:
bool: Boolean value
"""
env_value = os.getenv(env_name)
if env_value is None:
return default_value

# Supported true value formats
true_values = {'true', '1', 'yes', 'on', 'enable', 'enabled'}
# Supported false value formats
false_values = {'false', '0', 'no', 'off', 'disable', 'disabled'}

env_value_lower = env_value.lower().strip()

if env_value_lower in true_values:
return True
elif env_value_lower in false_values:
return False
else:
return default_value


# Multimodal Output Configuration
# Environment variable controls whether to return actual image content or text placeholder
ALLOW_IMG_OUTPUT: bool = _get_env_bool("ALLOW_IMG_OUTPUT", True)
17 changes: 9 additions & 8 deletions jupyter_mcp_server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
from jupyter_mcp_server.models import DocumentRuntime, CellInfo
from jupyter_mcp_server.utils import extract_output, safe_extract_outputs, format_cell_list, get_surrounding_cells_info
from jupyter_mcp_server.config import get_config, set_config
from typing import Literal
from typing import Literal, Union
from mcp.types import ImageContent


###############################################################################
Expand Down Expand Up @@ -416,15 +417,15 @@ async def _insert_cell():


@mcp.tool()
async def insert_execute_code_cell(cell_index: int, cell_source: str) -> list[str]:
async def insert_execute_code_cell(cell_index: int, cell_source: str) -> list[Union[str, ImageContent]]:
"""Insert and execute a code cell in a Jupyter notebook.

Args:
cell_index: Index of the cell to insert (0-based). Use -1 to append at end and execute.
cell_source: Code source

Returns:
list[str]: List of outputs from the executed cell
list[Union[str, ImageContent]]: List of outputs from the executed cell
"""
async def _insert_execute():
__ensure_kernel_alive()
Expand Down Expand Up @@ -532,13 +533,13 @@ async def _overwrite_cell():
return await __safe_notebook_operation(_overwrite_cell)

@mcp.tool()
async def execute_cell_with_progress(cell_index: int, timeout_seconds: int = 300) -> list[str]:
async def execute_cell_with_progress(cell_index: int, timeout_seconds: int = 300) -> list[Union[str, ImageContent]]:
"""Execute a specific cell with timeout and progress monitoring.
Args:
cell_index: Index of the cell to execute (0-based)
timeout_seconds: Maximum time to wait for execution (default: 300s)
Returns:
list[str]: List of outputs from the executed cell
list[Union[str, ImageContent]]: List of outputs from the executed cell
"""
async def _execute():
__ensure_kernel_alive()
Expand Down Expand Up @@ -606,7 +607,7 @@ async def _execute():

# Simpler real-time monitoring without forced sync
@mcp.tool()
async def execute_cell_simple_timeout(cell_index: int, timeout_seconds: int = 300) -> list[str]:
async def execute_cell_simple_timeout(cell_index: int, timeout_seconds: int = 300) -> list[Union[str, ImageContent]]:
"""Execute a cell with simple timeout (no forced real-time sync). To be used for short-running cells.
This won't force real-time updates but will work reliably.
"""
Expand Down Expand Up @@ -656,14 +657,14 @@ async def _execute():


@mcp.tool()
async def execute_cell_streaming(cell_index: int, timeout_seconds: int = 300, progress_interval: int = 5) -> list[str]:
async def execute_cell_streaming(cell_index: int, timeout_seconds: int = 300, progress_interval: int = 5) -> list[Union[str, ImageContent]]:
"""Execute cell with streaming progress updates. To be used for long-running cells.
Args:
cell_index: Index of the cell to execute (0-based)
timeout_seconds: Maximum time to wait for execution (default: 300s)
progress_interval: Seconds between progress updates (default: 5s)
Returns:
list[str]: List of outputs including progress updates
list[Union[str, ImageContent]]: List of outputs including progress updates
"""
async def _execute_streaming():
__ensure_kernel_alive()
Expand Down
19 changes: 14 additions & 5 deletions jupyter_mcp_server/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@

import re
from typing import Any, Union
from mcp.types import ImageContent
from .config_env import ALLOW_IMG_OUTPUT


def extract_output(output: Union[dict, Any]) -> str:
def extract_output(output: Union[dict, Any]) -> Union[str, ImageContent]:
"""
Extracts readable output from a Jupyter cell output dictionary.
Handles both traditional and CRDT-based Jupyter formats.
Expand Down Expand Up @@ -46,15 +48,22 @@ def extract_output(output: Union[dict, Any]) -> str:

elif output_type in ["display_data", "execute_result"]:
data = output.get("data", {})
if "image/png" in data:
if ALLOW_IMG_OUTPUT:
try:
return ImageContent(type="image", data=data["image/png"], mimeType="image/png")
except Exception:
# Fallback to text placeholder on error
return "[Image Output (PNG) - Error processing image]"
else:
return "[Image Output (PNG) - Image display disabled]"
if "text/plain" in data:
plain_text = data["text/plain"]
if hasattr(plain_text, 'source'):
plain_text = str(plain_text.source)
return strip_ansi_codes(str(plain_text))
elif "text/html" in data:
return "[HTML Output]"
elif "image/png" in data:
return "[Image Output (PNG)]"
else:
return f"[{output_type} Data: keys={list(data.keys())}]"

Expand Down Expand Up @@ -82,15 +91,15 @@ def strip_ansi_codes(text: str) -> str:
return ansi_escape.sub('', text)


def safe_extract_outputs(outputs: Any) -> list[str]:
def safe_extract_outputs(outputs: Any) -> list[Union[str, ImageContent]]:
"""
Safely extract all outputs from a cell, handling CRDT structures.

Args:
outputs: Cell outputs (could be CRDT YArray or traditional list)

Returns:
list[str]: List of string representations of outputs
list[Union[str, ImageContent]]: List of outputs (strings or image content)
"""
if not outputs:
return []
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@ test = [
"jupyter_server>=1.6,<3",
"pytest>=7.0",
"pytest-asyncio",
"pytest-timeout>=2.1.0",
"jupyterlab==4.4.1",
"jupyter-collaboration==4.0.2",
"datalayer_pycrdt==0.12.17"
"datalayer_pycrdt==0.12.17",
"pillow>=10.0.0"
]
lint = ["mdformat>0.7", "mdformat-gfm>=0.3.5", "ruff"]
typing = ["mypy>=0.990"]
Expand Down
Loading
Loading