# Wetlands LLM + MCP Testing

Test the LLM with MCP server integration using LangChain

In [1]:
import os
import json
import requests
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
from langchain.tools import tool

In [2]:
import sys
sys.path

['/usr/lib/python312.zip',
 '/usr/lib/python3.12',
 '/usr/lib/python3.12/lib-dynload',
 '',
 '/home/cboettig/Documents/github/boettiger-lab/wetlands/.venv/lib/python3.12/site-packages']

## Configuration

In [3]:
# Load config
import os
import json
# Use hosted MCP server by default
HOSTED_MCP_URL = "https://biodiversity-mcp.nrp-nautilus.io/"
CONFIG_PATH = 'config.json'
config = {}
if os.path.exists(CONFIG_PATH):
    with open(CONFIG_PATH) as f:
        config = json.load(f)
mcp_url = config.get('mcp_server_url', HOSTED_MCP_URL)
llm_endpoint = config.get('llm_endpoint', None)
llm_model = config.get('llm_model', None)

# Get API key from environment
api_key = os.getenv('NRP_API_KEY')
if not api_key:
    print("WARNING: NRP_API_KEY not set!")
    api_key = input("Enter your NRP API key: ")

print(f"LLM Endpoint: {llm_endpoint}")
print(f"Model: {llm_model}")
print(f"MCP Server: {mcp_url}")

LLM Endpoint: https://ellm.nrp-nautilus.io/v1
Model: gpt-oss
MCP Server: https://biodiversity-mcp.nrp-nautilus.io/mcp


## Load System Prompt

In [4]:
with open('system-prompt.md') as f:
    system_prompt = f.read()

print(f"System prompt loaded: {len(system_prompt)} characters")

System prompt loaded: 5131 characters


## Define MCP Query Tool

In [5]:
def query_mcp(sql_query: str) -> str:
    """Execute SQL query via MCP server"""
    payload = {
        "jsonrpc": "2.0",
        "id": 1,
        "method": "tools/call",
        "params": {
            "name": "query",
            "arguments": {
                "query": sql_query
            }
        }
    }
    
    headers = {
        "Content-Type": "application/json",
        "Accept": "application/json"
    }
    
    response = requests.post(mcp_url, json=payload, headers=headers)
    response.raise_for_status()
    
    data = response.json()
    if 'error' in data:
        raise Exception(f"MCP error: {data['error']['message']}")
    
    return data['result']['content'][0]['text']

@tool
def query_wetlands_data(query: str) -> str:
    """Execute a SQL query on the wetlands database using DuckDB.
    
    Args:
        query: SQL query to execute. Must start with S3 secret setup.
    
    Returns:
        Query results as a string
    """
    return query_mcp(query)

## Initialize LLM

In [6]:
# Initialize LangChain LLM
llm = ChatOpenAI(
    base_url=llm_endpoint,
    api_key=api_key,
    model=llm_model,
    temperature=0.7
)

# Bind the tool
llm_with_tools = llm.bind_tools([query_wetlands_data])

print("LLM initialized with tools")

LLM initialized with tools


## Test Direct MCP Query

In [None]:
# Test MCP server directly with plain SQL payload
test_query = """
CREATE OR REPLACE SECRET s3secret (
    TYPE S3,
    PROVIDER CONFIG,
    ENDPOINT 'minio.carlboettiger.info',
    USE_SSL true,
    URL_STYLE 'path'
);

SELECT COUNT(*) as total_hexagons 
FROM read_parquet('s3://public-wetlands/hex/**')
WHERE Z > 0;
"""

# Send plain SQL directly to MCP server at root URL
headers = {
    "Content-Type": "text/plain",
    "Accept": "application/json"
}
# Remove any trailing /mcp from mcp_url if present
mcp_root_url = mcp_url.rstrip('/')
if mcp_root_url.endswith('/mcp'):
    mcp_root_url = mcp_root_url[:-4]
response = requests.post(mcp_root_url, data=test_query, headers=headers)
response.raise_for_status()
result = response.json()
print("MCP Test Result:")
print(result)

HTTPError: 404 Client Error: Not Found for url: https://biodiversity-mcp.nrp-nautilus.io/mcp

## Test LLM Without Tool Calling

In [None]:
# Simple test without tools
messages = [
    SystemMessage(content="You are a helpful assistant."),
    HumanMessage(content="What is 2+2?")
]

response = llm.invoke(messages)
print("LLM Test (no tools):")
print(response.content)

## Test LLM With Tool Calling

In [None]:
def chat_with_tools(user_message: str):
    """Send message to LLM and handle tool calls"""
    messages = [
        SystemMessage(content=system_prompt),
        HumanMessage(content=user_message)
    ]
    
    # Get LLM response
    response = llm_with_tools.invoke(messages)
    
    print(f"\n{'='*60}")
    print(f"User: {user_message}")
    print(f"{'='*60}\n")
    
    # Check if LLM wants to call a tool
    if response.tool_calls:
        for tool_call in response.tool_calls:
            print(f"üîß LLM is calling tool: {tool_call['name']}\n")
            
            # Extract SQL query from tool call args
            args = tool_call.get('args', {})
            
            # Check if args is empty or doesn't have 'query' key
            if not args or 'query' not in args:
                print(f"‚ö†Ô∏è Empty or invalid args from LLM: {args}")
                print("LLM failed to provide query. Asking for plain text response instead...\n")
                
                # Get a non-tool response instead
                plain_response = llm.invoke(messages)
                print("ü§ñ Assistant:")
                print(plain_response.content)
                return plain_response.content
            
            sql_query = args['query']
            
            print("SQL Query:")
            print("-" * 60)
            print(sql_query)
            print("-" * 60)
            
            try:
                tool_result = query_wetlands_data.invoke({'query': sql_query})
                print("\nQuery Result:")
                print("-" * 60)
                print(tool_result)
                print("-" * 60)
                
                # Send result back to LLM for interpretation
                messages.append(response)
                messages.append(
                    HumanMessage(
                        content=f"Tool result: {tool_result}",
                        name="tool_result"
                    )
                )
                
                final_response = llm.invoke(messages)
                print("\nü§ñ Assistant:")
                print(final_response.content)
                return final_response.content
                
            except Exception as e:
                print(f"\n‚ùå Error executing query: {e}")
                print("Asking LLM to try again...\n")
                
                # Tell LLM about the error and ask it to try again
                messages.append(response)
                messages.append(
                    HumanMessage(
                        content=f"Error executing query: {e}\n\nPlease provide a corrected SQL query."
                    )
                )
                
                retry_response = llm.invoke(messages)
                print("ü§ñ Assistant (retry):")
                print(retry_response.content)
                return retry_response.content
    else:
        print("ü§ñ Assistant:")
        print(response.content)
        return response.content

## Example Queries

In [None]:
# Query 1: Total wetlands area
chat_with_tools("How many hectares of wetlands are there in total?")

In [None]:
# Query 2: Peatlands
chat_with_tools("What is the total area of peatlands in square kilometers?")

In [None]:
# Query 3: Freshwater vs saline
chat_with_tools("Compare the area of freshwater wetlands to saline wetlands")

In [None]:
# Your custom query
user_query = "What are the top 5 most common wetland types by area?"
chat_with_tools(user_query)

## Troubleshooting

If the LLM returns empty args, the model may not support tool calling properly. You can:
1. Try a different model (e.g., `gpt-4` or `gpt-3.5-turbo`)
2. Manually construct queries and call `query_mcp()` directly
3. Use the MCP server test above to verify queries work