


# Elasticsearch MCP Server for ChatGPT
This notebook deploys an MCP (Model Context Protocol) server that connects ChatGPT to Elasticsearch, enabling natural language queries over internal GitHub issues and pull requests.


## Install Dependencies

In [None]:
!pip install fastmcp elasticsearch pyngrok pandas -q
print("Dependencies installed")

## Import Libraries

In [None]:
import os
import json
import logging
import threading
import time
import pandas as pd
from typing import Dict, List, Any
from getpass import getpass
from fastmcp import FastMCP
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
from pyngrok import ngrok
from pyngrok.conf import PyngrokConfig

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

print("Libraries imported successfully")

## Setup Configuration

**Important**: Set these environment variables before running this notebook:
- `ELASTICSEARCH_URL` - Your Elasticsearch cluster URL
- `ELASTICSEARCH_API_KEY` - API key with read access to your index
- `NGROK_TOKEN` - Your ngrok auth token from [here](https://dashboard.ngrok.com/).

**How to set environment variables:**
- In Jupyter/VS Code: Use a `.env` file or set them in your terminal before starting

In [None]:
os.environ["ELASTICSEARCH_URL"] = os.environ.get("ELASTICSEARCH_URL") or getpass("Enter your Elasticsearch URL: ")
os.environ["ELASTICSEARCH_API_KEY"] = os.environ.get("ELASTICSEARCH_API_KEY") or getpass("Enter your Elasticsearch API key: ")
os.environ["NGROK_TOKEN"] = os.environ.get("NGROK_TOKEN") or getpass("Enter your Ngrok Token: ")
os.environ["ELASTICSEARCH_INDEX"] = os.environ.get("ELASTICSEARCH_INDEX") or getpass("Enter your Elasticsearch Index name (default: github_internal): ") or "github_internal"

ELASTICSEARCH_URL = os.environ["ELASTICSEARCH_URL"]
ELASTICSEARCH_API_KEY = os.environ["ELASTICSEARCH_API_KEY"]
NGROK_TOKEN = os.environ["NGROK_TOKEN"]
INDEX_NAME = os.environ["ELASTICSEARCH_INDEX"]

print("Configuration loaded successfully")
print(f"Index name: {INDEX_NAME}")
print(f"Elasticsearch URL: {ELASTICSEARCH_URL[:30]}...")

## Initialize Elasticsearch Client

In [None]:
es_client = Elasticsearch(
    ELASTICSEARCH_URL,
    api_key=ELASTICSEARCH_API_KEY
)

if es_client.ping():
    print("Elasticsearch connection successful")
    cluster_info = es_client.info()
    print(f"Cluster: {cluster_info['cluster_name']}")
    print(f"Version: {cluster_info['version']['number']}")
else:
    print("ERROR: Could not connect to Elasticsearch")

## Create Index with Mappings

In [None]:
try:
    es_client.indices.create(
        index=INDEX_NAME,
        body={
            "mappings": {
                "properties": {
                    "id": {"type": "keyword"},
                    "title": {"type": "text"},
                    "text": {"type": "text"},
                    "text_semantic": {
                        "type": "semantic_text",
                        "inference_id": ".elser-2-elasticsearch"
                    },
                    "url": {"type": "keyword"},
                    "type": {"type": "keyword"},
                    "status": {"type": "keyword"},
                    "priority": {"type": "keyword"},
                    "assignee": {"type": "keyword"},
                    "created_date": {"type": "date", "format": "iso8601"},
                    "resolved_date": {"type": "date", "format": "iso8601"},
                    "labels": {"type": "keyword"},
                    "related_pr": {"type": "keyword"}
                }
            }
        }
    )
    print(f"Index '{INDEX_NAME}' created successfully")
except Exception as e:
    if 'resource_already_exists_exception' in str(e):
        print(f"Index '{INDEX_NAME}' already exists")
    else:
        print(f"Error creating index: {e}")

## Load Sample Dataset

**Dataset**: 15 documents including issues, pull requests, and RFCs with realistic content, comments, and relationships.


In [None]:
file_path = 'github_internal_dataset.json'
df = pd.read_json(file_path)

documents = df.to_dict('records')
print(f"Loaded {len(documents)} documents from dataset")

df

## Ingest Documents to Elasticsearch

In [None]:
def generate_actions():
    for doc in documents:
        doc['text_semantic'] = doc['text']
        yield {
            '_index': INDEX_NAME,
            '_source': doc
        }

try:
    success, errors = bulk(es_client, generate_actions())
    print(f"Successfully indexed {success} documents")

    if errors:
        print(f"Errors during indexing: {errors}")

    print("Waiting 15 seconds for ELSER to process documents...")
    time.sleep(15)

    count = es_client.count(index=INDEX_NAME)['count']
    print(f"Total documents in index: {count}")

except Exception as e:
    print(f"Error during bulk indexing: {str(e)}")
    print("If you see timeout errors, wait a few seconds and try again")

## Define MCP Server

In [None]:
server_instructions = """
This MCP server provides access to TechCorp's internal GitHub issues and pull requests.
Use search to find relevant issues/PRs, then fetch to get complete details.
"""

def create_server():
    mcp = FastMCP(
        name="Elasticsearch GitHub Issues MCP",
        instructions=server_instructions
    )

    @mcp.tool()
    async def search(query: str) -> Dict[str, List[Dict[str, Any]]]:
        """
        Search for internal issues and PRs using hybrid search.
        Returns list with id, title, and url.
        """
        if not query or not query.strip():
            return {"results": []}

        logger.info(f"Searching for: '{query}'")

        try:
            response = es_client.search(
                index=INDEX_NAME,
                size=10,
                source=["id", "title", "url", "type", "priority"],
                retriever={
                    "rrf": {
                        "retrievers": [
                            {
                                "standard": {
                                    "query": {
                                        "semantic": {
                                            "field": "text_semantic",
                                            "query": query
                                        }
                                    }
                                }
                            },
                            {
                                "standard": {
                                    "query": {
                                        "multi_match": {
                                            "query": query,
                                            "fields": [
                                                "title^3",
                                                "text^2",
                                                "assignee^2",
                                                "type",
                                                "labels",
                                                "priority"
                                            ],
                                            "type": "best_fields",
                                            "fuzziness": "AUTO"
                                        }
                                    }
                                }
                            }
                        ],
                        "rank_window_size": 50,
                        "rank_constant": 60
                    }
                }
            )

            results = []
            if response and 'hits' in response:
                for hit in response['hits']['hits']:
                    source = hit['_source']
                    results.append({
                        "id": source.get('id', hit['_id']),
                        "title": source.get('title', 'Unknown'),
                        "url": source.get('url', '')
                    })

            logger.info(f"Found {len(results)} results")
            return {"results": results}

        except Exception as e:
            logger.error(f"Search error: {e}")
            raise ValueError(f"Search failed: {str(e)}")

    @mcp.tool()
    async def fetch(id: str) -> Dict[str, Any]:
        """
        Retrieve complete issue/PR details by ID.
        Returns id, title, text, url, and metadata.
        """
        if not id:
            raise ValueError("ID is required")

        logger.info(f"Fetching: {id}")

        try:
            response = es_client.search(
                index=INDEX_NAME,
                body={
                    "query": {
                        "term": {
                            "id": id
                        }
                    },
                    "size": 1
                }
            )

            if not response or not response['hits']['hits']:
                raise ValueError(f"Document with id '{id}' not found")

            hit = response['hits']['hits'][0]
            source = hit['_source']

            result = {
                "id": source.get('id', id),
                "title": source.get('title', 'Unknown'),
                "text": source.get('text', ''),
                "url": source.get('url', ''),
                "type": source.get('type', ''),
                "status": source.get('status', ''),
                "priority": source.get('priority', ''),
                "assignee": source.get('assignee', ''),
                "created_date": source.get('created_date', ''),
                "resolved_date": source.get('resolved_date', ''),
                "labels": source.get('labels', ''),
                "related_pr": source.get('related_pr', '')
            }

            logger.info(f"Fetched: {result['title']}")
            return result

        except Exception as e:
            logger.error(f"Fetch error: {e}")
            raise ValueError(f"Failed to fetch '{id}': {str(e)}")

    return mcp

print("MCP server defined successfully")

## Start Ngrok Tunnel

In [None]:
ngrok.set_auth_token(NGROK_TOKEN)

pyngrok_config = PyngrokConfig(region="us")
public_url = ngrok.connect(
    8000,
    "http",
    pyngrok_config=pyngrok_config,
    bind_tls=True
)

print("="*70)
print("MCP SERVER IS READY!")
print("="*70)
print(f"\nPublic URL (use in ChatGPT): {public_url}/sse")
print("\nIMPORTANT: Copy the URL above (including /sse at the end)")
print("\nTo connect in ChatGPT:")
print("1. Go to Settings > Connectors")
print("2. Click 'Create' or 'Add Custom Connector'")
print("3. Paste the URL above")
print("4. Save and start using!")
print("\nKeep this notebook running while using the connector")
print("="*70)

## Run MCP Server

In [None]:
server = create_server()

print("Starting MCP server...")
print("Server is running. To stop: Runtime > Interrupt execution")
print()

def run_server():
    server.run(transport="sse", host="0.0.0.0", port=8000)

server_thread = threading.Thread(target=run_server, daemon=True)
server_thread.start()

print("Server started successfully!")
print("Your ngrok URL is ready to use in ChatGPT")
print("Keep this cell running...")
print()

try:
    while True:
        time.sleep(1)
except KeyboardInterrupt:
    print("\nServer stopped")

## Cleanup (Optional)

In [None]:
try:
    result = es_client.indices.delete(index=INDEX_NAME, ignore=[400, 404])
    if result.get('acknowledged', False):
        print(f"Index '{INDEX_NAME}' deleted successfully")
    else:
        print(f"Error deleting index: {result}")
except Exception as e:
    print(f"Error: {e}")