From 5661478d98e65785e2b38b336aa2dc219ccb1aed Mon Sep 17 00:00:00 2001 From: Carly Richmond Date: Wed, 3 Dec 2025 16:09:50 +0000 Subject: [PATCH] Fixing formatting issue in notebook --- ...elasticsearch-mcp-server-for-chatgpt.ipynb | 141 +++++++++--------- 1 file changed, 70 insertions(+), 71 deletions(-) diff --git a/supporting-blog-content/elasticsearch-chatgpt-connector/elasticsearch-mcp-server-for-chatgpt.ipynb b/supporting-blog-content/elasticsearch-chatgpt-connector/elasticsearch-mcp-server-for-chatgpt.ipynb index ca5a08cc..6dc08e36 100644 --- a/supporting-blog-content/elasticsearch-chatgpt-connector/elasticsearch-mcp-server-for-chatgpt.ipynb +++ b/supporting-blog-content/elasticsearch-chatgpt-connector/elasticsearch-mcp-server-for-chatgpt.ipynb @@ -139,10 +139,20 @@ "metadata": {}, "outputs": [], "source": [ - "os.environ[\"ELASTICSEARCH_URL\"] = os.environ.get(\"ELASTICSEARCH_URL\") or getpass(\"Enter your Elasticsearch URL: \")\n", - "os.environ[\"ELASTICSEARCH_API_KEY\"] = os.environ.get(\"ELASTICSEARCH_API_KEY\") or getpass(\"Enter your Elasticsearch API key: \")\n", - "os.environ[\"NGROK_TOKEN\"] = os.environ.get(\"NGROK_TOKEN\") or getpass(\"Enter your Ngrok Token: \")\n", - "os.environ[\"ELASTICSEARCH_INDEX\"] = os.environ.get(\"ELASTICSEARCH_INDEX\") or getpass(\"Enter your Elasticsearch Index name (default: github_internal): \") or \"github_internal\"\n", + "os.environ[\"ELASTICSEARCH_URL\"] = os.environ.get(\"ELASTICSEARCH_URL\") or getpass(\n", + " \"Enter your Elasticsearch URL: \"\n", + ")\n", + "os.environ[\"ELASTICSEARCH_API_KEY\"] = os.environ.get(\n", + " \"ELASTICSEARCH_API_KEY\"\n", + ") or getpass(\"Enter your Elasticsearch API key: \")\n", + "os.environ[\"NGROK_TOKEN\"] = os.environ.get(\"NGROK_TOKEN\") or getpass(\n", + " \"Enter your Ngrok Token: \"\n", + ")\n", + "os.environ[\"ELASTICSEARCH_INDEX\"] = (\n", + " os.environ.get(\"ELASTICSEARCH_INDEX\")\n", + " or getpass(\"Enter your Elasticsearch Index name (default: github_internal): \")\n", + " or \"github_internal\"\n", + ")\n", "\n", "ELASTICSEARCH_URL = os.environ[\"ELASTICSEARCH_URL\"]\n", "ELASTICSEARCH_API_KEY = os.environ[\"ELASTICSEARCH_API_KEY\"]\n", @@ -177,10 +187,7 @@ }, "outputs": [], "source": [ - "es_client = Elasticsearch(\n", - " ELASTICSEARCH_URL,\n", - " api_key=ELASTICSEARCH_API_KEY\n", - ")\n", + "es_client = Elasticsearch(ELASTICSEARCH_URL, api_key=ELASTICSEARCH_API_KEY)\n", "\n", "if es_client.ping():\n", " print(\"Elasticsearch connection successful\")\n", @@ -225,7 +232,7 @@ " \"text\": {\"type\": \"text\"},\n", " \"text_semantic\": {\n", " \"type\": \"semantic_text\",\n", - " \"inference_id\": \".elser-2-elasticsearch\"\n", + " \"inference_id\": \".elser-2-elasticsearch\",\n", " },\n", " \"url\": {\"type\": \"keyword\"},\n", " \"type\": {\"type\": \"keyword\"},\n", @@ -235,14 +242,14 @@ " \"created_date\": {\"type\": \"date\", \"format\": \"iso8601\"},\n", " \"resolved_date\": {\"type\": \"date\", \"format\": \"iso8601\"},\n", " \"labels\": {\"type\": \"keyword\"},\n", - " \"related_pr\": {\"type\": \"keyword\"}\n", + " \"related_pr\": {\"type\": \"keyword\"},\n", " }\n", " }\n", - " }\n", + " },\n", " )\n", " print(f\"Index '{INDEX_NAME}' created successfully\")\n", "except Exception as e:\n", - " if 'resource_already_exists_exception' in str(e):\n", + " if \"resource_already_exists_exception\" in str(e):\n", " print(f\"Index '{INDEX_NAME}' already exists\")\n", " else:\n", " print(f\"Error creating index: {e}\")" @@ -629,10 +636,10 @@ } ], "source": [ - "file_path = 'github_internal_dataset.json'\n", + "file_path = \"github_internal_dataset.json\"\n", "df = pd.read_json(file_path)\n", "\n", - "documents = df.to_dict('records')\n", + "documents = df.to_dict(\"records\")\n", "print(f\"Loaded {len(documents)} documents from dataset\")\n", "\n", "df" @@ -663,11 +670,9 @@ "source": [ "def generate_actions():\n", " for doc in documents:\n", - " doc['text_semantic'] = doc['text']\n", - " yield {\n", - " '_index': INDEX_NAME,\n", - " '_source': doc\n", - " }\n", + " doc[\"text_semantic\"] = doc[\"text\"]\n", + " yield {\"_index\": INDEX_NAME, \"_source\": doc}\n", + "\n", "\n", "try:\n", " success, errors = bulk(es_client, generate_actions())\n", @@ -679,7 +684,7 @@ " print(\"Waiting 15 seconds for ELSER to process documents...\")\n", " time.sleep(15)\n", "\n", - " count = es_client.count(index=INDEX_NAME)['count']\n", + " count = es_client.count(index=INDEX_NAME)[\"count\"]\n", " print(f\"Total documents in index: {count}\")\n", "\n", "except Exception as e:\n", @@ -725,10 +730,10 @@ "Use search to find relevant issues/PRs, then fetch to get complete details.\n", "\"\"\"\n", "\n", + "\n", "def create_server():\n", " mcp = FastMCP(\n", - " name=\"Elasticsearch GitHub Issues MCP\",\n", - " instructions=server_instructions\n", + " name=\"Elasticsearch GitHub Issues MCP\", instructions=server_instructions\n", " )\n", "\n", " @mcp.tool()\n", @@ -757,7 +762,7 @@ " \"query\": {\n", " \"semantic\": {\n", " \"field\": \"text_semantic\",\n", - " \"query\": query\n", + " \"query\": query,\n", " }\n", " }\n", " }\n", @@ -774,31 +779,33 @@ " \"assignee^2\",\n", " \"type\",\n", " \"labels\",\n", - " \"priority\"\n", + " \"priority\",\n", " ],\n", " \"type\": \"best_fields\",\n", - " \"fuzziness\": \"AUTO\"\n", + " \"fuzziness\": \"AUTO\",\n", " }\n", " }\n", " }\n", - " }\n", + " },\n", " ],\n", " \"rank_window_size\": 50,\n", - " \"rank_constant\": 60\n", + " \"rank_constant\": 60,\n", " }\n", - " }\n", + " },\n", " )\n", "\n", " # Extract and format search results\n", " results = []\n", - " if response and 'hits' in response:\n", - " for hit in response['hits']['hits']:\n", - " source = hit['_source']\n", - " results.append({\n", - " \"id\": source.get('id', hit['_id']),\n", - " \"title\": source.get('title', 'Unknown'),\n", - " \"url\": source.get('url', '')\n", - " })\n", + " if response and \"hits\" in response:\n", + " for hit in response[\"hits\"][\"hits\"]:\n", + " source = hit[\"_source\"]\n", + " results.append(\n", + " {\n", + " \"id\": source.get(\"id\", hit[\"_id\"]),\n", + " \"title\": source.get(\"title\", \"Unknown\"),\n", + " \"url\": source.get(\"url\", \"\"),\n", + " }\n", + " )\n", "\n", " logger.info(f\"Found {len(results)} results\")\n", " return {\"results\": results}\n", @@ -821,37 +828,29 @@ " try:\n", " # Query by ID to get full document\n", " response = es_client.search(\n", - " index=INDEX_NAME,\n", - " body={\n", - " \"query\": {\n", - " \"term\": {\n", - " \"id\": id\n", - " }\n", - " },\n", - " \"size\": 1\n", - " }\n", + " index=INDEX_NAME, body={\"query\": {\"term\": {\"id\": id}}, \"size\": 1}\n", " )\n", "\n", - " if not response or not response['hits']['hits']:\n", + " if not response or not response[\"hits\"][\"hits\"]:\n", " raise ValueError(f\"Document with id '{id}' not found\")\n", "\n", - " hit = response['hits']['hits'][0]\n", - " source = hit['_source']\n", + " hit = response[\"hits\"][\"hits\"][0]\n", + " source = hit[\"_source\"]\n", "\n", " # Return all document fields\n", " result = {\n", - " \"id\": source.get('id', id),\n", - " \"title\": source.get('title', 'Unknown'),\n", - " \"text\": source.get('text', ''),\n", - " \"url\": source.get('url', ''),\n", - " \"type\": source.get('type', ''),\n", - " \"status\": source.get('status', ''),\n", - " \"priority\": source.get('priority', ''),\n", - " \"assignee\": source.get('assignee', ''),\n", - " \"created_date\": source.get('created_date', ''),\n", - " \"resolved_date\": source.get('resolved_date', ''),\n", - " \"labels\": source.get('labels', ''),\n", - " \"related_pr\": source.get('related_pr', '')\n", + " \"id\": source.get(\"id\", id),\n", + " \"title\": source.get(\"title\", \"Unknown\"),\n", + " \"text\": source.get(\"text\", \"\"),\n", + " \"url\": source.get(\"url\", \"\"),\n", + " \"type\": source.get(\"type\", \"\"),\n", + " \"status\": source.get(\"status\", \"\"),\n", + " \"priority\": source.get(\"priority\", \"\"),\n", + " \"assignee\": source.get(\"assignee\", \"\"),\n", + " \"created_date\": source.get(\"created_date\", \"\"),\n", + " \"resolved_date\": source.get(\"resolved_date\", \"\"),\n", + " \"labels\": source.get(\"labels\", \"\"),\n", + " \"related_pr\": source.get(\"related_pr\", \"\"),\n", " }\n", "\n", " logger.info(f\"Fetched: {result['title']}\")\n", @@ -863,6 +862,7 @@ "\n", " return mcp\n", "\n", + "\n", "print(\"MCP server defined successfully\")" ] }, @@ -892,16 +892,11 @@ "ngrok.set_auth_token(NGROK_TOKEN)\n", "\n", "pyngrok_config = PyngrokConfig(region=\"us\")\n", - "public_url = ngrok.connect(\n", - " 8000,\n", - " \"http\",\n", - " pyngrok_config=pyngrok_config,\n", - " bind_tls=True\n", - ")\n", + "public_url = ngrok.connect(8000, \"http\", pyngrok_config=pyngrok_config, bind_tls=True)\n", "\n", - "print(\"=\"*70)\n", + "print(\"=\" * 70)\n", "print(\"MCP SERVER IS READY!\")\n", - "print(\"=\"*70)\n", + "print(\"=\" * 70)\n", "print(f\"\\nPublic URL (use in ChatGPT): {public_url}/sse\")\n", "print(\"\\nIMPORTANT: Copy the URL above (including /sse at the end)\")\n", "print(\"\\nTo connect in ChatGPT:\")\n", @@ -910,7 +905,7 @@ "print(\"3. Paste the URL above\")\n", "print(\"4. Save and start using!\")\n", "print(\"\\nKeep this notebook running while using the connector\")\n", - "print(\"=\"*70)" + "print(\"=\" * 70)" ] }, { @@ -1089,9 +1084,11 @@ "print(\"Server is running. To stop: Runtime > Interrupt execution\")\n", "print()\n", "\n", + "\n", "def run_server():\n", " server.run(transport=\"sse\", host=\"0.0.0.0\", port=8000)\n", "\n", + "\n", "server_thread = threading.Thread(target=run_server, daemon=True)\n", "server_thread.start()\n", "\n", @@ -1143,8 +1140,10 @@ "outputs": [], "source": [ "try:\n", - " result = es_client.options(ignore_status=[400, 404]).indices.delete(index=INDEX_NAME)\n", - " if result.get('acknowledged', False):\n", + " result = es_client.options(ignore_status=[400, 404]).indices.delete(\n", + " index=INDEX_NAME\n", + " )\n", + " if result.get(\"acknowledged\", False):\n", " print(f\"Index '{INDEX_NAME}' deleted successfully\")\n", " else:\n", " print(f\"Error deleting index: {result}\")\n",