diff --git a/README.md b/README.md index 6d10013..65743ca 100644 --- a/README.md +++ b/README.md @@ -510,6 +510,71 @@ Captures a screenshot of the current page. } ``` +### take_grid_screenshot +Captures a screenshot with coordinate grid overlay for visual reference and element targeting. + +**Parameters:** +- `grid_spacing`: Pixels between grid lines + - Type: number + - Default: 50 +- `target_identification_mode`: Overlay mode for element targeting and grid display + - Type: string (enum) + - Enum: ["coordinates", "clickables", "numbered_elements"] + - Default: "coordinates" +- `outputPath` (optional): Path where to save the screenshot. If not provided, returns base64 data. + - Type: string + +**Example:** +```json +{ + "tool": "take_grid_screenshot", + "parameters": { + "grid_spacing": 50, + "target_identification_mode": "coordinates", + "outputPath": "/path/to/grid_screenshot.png" + } +} +``` + +### click_at_coordinates +Clicks at specific x,y coordinates on the viewport, enabling precise coordinate-based interactions. + +**Parameters:** +- `x` (required): X coordinate (horizontal position in pixels) + - Type: number +- `y` (required): Y coordinate (vertical position in pixels) + - Type: number +- `relative_to`: Coordinate reference point + - Type: string + - Enum: ["viewport", "center"] + - Default: "viewport" +- `scroll_if_needed`: Auto-scroll if coordinates are outside viewport + - Type: boolean + - Default: true + +**Examples:** +```json +// Click at absolute viewport coordinates +{ + "tool": "click_at_coordinates", + "parameters": { + "x": 250, + "y": 150, + "relative_to": "viewport" + } +} + +// Click relative to viewport center +{ + "tool": "click_at_coordinates", + "parameters": { + "x": -50, + "y": 25, + "relative_to": "center" + } +} +``` + ### close_session Closes the current browser session and cleans up resources. diff --git a/src/lib/server.js b/src/lib/server.js index 327a310..2d5bdef 100755 --- a/src/lib/server.js +++ b/src/lib/server.js @@ -479,27 +479,201 @@ server.tool( } ); +server.tool( + "click_at_coordinates", + "clicks at specific x,y coordinates on the viewport with visual feedback", + { + x: z.number().describe("X coordinate (horizontal position in pixels)"), + y: z.number().describe("Y coordinate (vertical position in pixels)"), + relative_to: z.enum(["viewport", "center"]).optional().default("viewport").describe("Coordinate reference point"), + scroll_if_needed: z.boolean().optional().default(true).describe("Auto-scroll if coordinates are outside viewport") + }, + async ({ x, y, relative_to = "viewport", scroll_if_needed = true }) => { + try { + const driver = getDriver(); + + // Validate coordinates are non-negative for viewport mode + if (relative_to === "viewport" && (x < 0 || y < 0)) { + throw new Error("Viewport coordinates must be non-negative"); + } + + // Get viewport dimensions to validate bounds + const viewportSize = await driver.executeScript(` + return { + width: window.innerWidth, + height: window.innerHeight + }; + `); + + // For viewport mode, check if coordinates are within bounds + if (relative_to === "viewport") { + if (x > viewportSize.width || y > viewportSize.height) { + if (scroll_if_needed) { + // Auto-scroll to bring coordinates into view + const scrollX = Math.max(0, x - viewportSize.width / 2); + const scrollY = Math.max(0, y - viewportSize.height / 2); + await driver.executeScript(`window.scrollTo(${scrollX}, ${scrollY})`); + } else { + throw new Error(`Coordinates (${x}, ${y}) are outside viewport bounds (${viewportSize.width}x${viewportSize.height})`); + } + } + } + + // Show click target indicator + await driver.executeScript(` + // Remove any existing click indicators + const existingIndicators = document.querySelectorAll('.mcp-click-indicator, .mcp-click-confirmation'); + existingIndicators.forEach(indicator => indicator.remove()); + + // Create click target indicator + const indicator = document.createElement('div'); + indicator.className = 'mcp-click-indicator'; + indicator.style.cssText = \` + position: fixed; + left: \${${x} - 10}px; + top: \${${y} - 10}px; + width: 20px; + height: 20px; + border: 2px solid red; + border-radius: 50%; + z-index: 2147483647; + pointer-events: none; + background: rgba(255, 0, 0, 0.2); + box-shadow: 0 0 10px rgba(255, 0, 0, 0.5); + \`; + document.body.appendChild(indicator); + `, x, y); + + // Human visibility delay (300ms) to see the click indicator + await new Promise(resolve => setTimeout(resolve, 300)); + + const actions = driver.actions({ bridge: true }); + + if (relative_to === "viewport") { + // Move mouse to coordinates first to trigger hover states, then click + await actions + .move({ x: x, y: y }) + .pause(100) // Small pause to simulate natural movement and trigger hover states + .perform(); + + // Then perform the click + await actions + .click() + .perform(); + } else { + // Move mouse relative to viewport center first to trigger hover states, then click + await actions + .move({ x: x, y: y }) + .pause(100) // Small pause to simulate natural movement and trigger hover states + .perform(); + + // Then perform the click + await actions + .click() + .perform(); + } + + // Show click confirmation flash + await driver.executeScript(` + // Create click confirmation flash + const confirmation = document.createElement('div'); + confirmation.className = 'mcp-click-confirmation'; + confirmation.style.cssText = \` + position: fixed; + left: \${${x} - 15}px; + top: \${${y} - 15}px; + width: 30px; + height: 30px; + border: 3px solid green; + border-radius: 50%; + z-index: 2147483647; + pointer-events: none; + background: rgba(0, 255, 0, 0.3); + box-shadow: 0 0 15px rgba(0, 255, 0, 0.7); + \`; + document.body.appendChild(confirmation); + `, x, y); + + // Brief flash to confirm click (100ms) + await new Promise(resolve => setTimeout(resolve, 100)); + + // Clean up all indicators + await driver.executeScript(` + const indicators = document.querySelectorAll('.mcp-click-indicator, .mcp-click-confirmation'); + indicators.forEach(indicator => indicator.remove()); + `); + + return { + content: [{ type: 'text', text: `Clicked at coordinates (${x}, ${y}) relative to ${relative_to} with visual feedback` }] + }; + } catch (e) { + // Clean up indicators on error + try { + await driver.executeScript(` + const indicators = document.querySelectorAll('.mcp-click-indicator, .mcp-click-confirmation'); + indicators.forEach(indicator => indicator.remove()); + `); + } catch (cleanupError) { + // Ignore cleanup errors + } + + return { + content: [{ type: 'text', text: `Error clicking at coordinates: ${e.message}` }] + }; + } + } +); + server.tool( "take_screenshot", "captures a screenshot of the current page", { - outputPath: z.string().optional().describe("Optional path where to save the screenshot. If not provided, returns base64 data.") + outputPath: z.string().optional().describe("Optional path where to save the screenshot. If not provided, returns base64 data."), + scale: z.number().optional().default(0.5).describe("Scale percentage for resizing the image (default 0.5 = 50%)") }, - async ({ outputPath }) => { + async ({ outputPath, scale = 0.5 }) => { try { const driver = getDriver(); const screenshot = await driver.takeScreenshot(); + + // Resize the screenshot if scale is not 1.0 + let finalScreenshot = screenshot; + if (scale !== 1.0) { + finalScreenshot = await driver.executeScript(` + return new Promise((resolve) => { + const img = new Image(); + img.onload = function() { + const canvas = document.createElement('canvas'); + const ctx = canvas.getContext('2d'); + + const scaledWidth = Math.round(img.width * ${scale}); + const scaledHeight = Math.round(img.height * ${scale}); + + canvas.width = scaledWidth; + canvas.height = scaledHeight; + + ctx.drawImage(img, 0, 0, scaledWidth, scaledHeight); + + // Convert to base64 + const base64 = canvas.toDataURL('image/png').split(',')[1]; + resolve(base64); + }; + img.src = 'data:image/png;base64,' + arguments[0]; + }); + `, screenshot); + } + if (outputPath) { const fs = await import('fs'); - await fs.promises.writeFile(outputPath, screenshot, 'base64'); + await fs.promises.writeFile(outputPath, finalScreenshot, 'base64'); return { - content: [{ type: 'text', text: `Screenshot saved to ${outputPath}` }] + content: [{ type: 'text', text: `Screenshot saved to ${outputPath} (scale: ${Math.round(scale * 100)}%)` }] }; } else { return { content: [ - { type: 'text', text: 'Screenshot captured as base64:' }, - { type: 'text', text: screenshot } + { type: 'text', text: `Screenshot captured as base64 (scale: ${Math.round(scale * 100)}%):` }, + { type: 'text', text: finalScreenshot } ] }; } @@ -511,6 +685,274 @@ server.tool( } ); +server.tool( + "take_grid_screenshot", + "captures a screenshot with coordinate grid overlay for visual reference", + { + grid_spacing: z.number().optional().default(50).describe("Pixels between grid lines"), + target_identification_mode: z.enum(["coordinates", "highlights"]).optional().default("coordinates").describe("Mode for target identification: 'coordinates' shows grid with coordinate labels, 'highlights' shows red outlines around clickables"), + outputPath: z.string().optional().describe("Optional path where to save the screenshot. If not provided, returns base64 data."), + scale: z.number().optional().default(0.5).describe("Scale percentage for resizing the image (default 0.5 = 50%)") + }, + async ({ grid_spacing = 50, target_identification_mode = "coordinates", outputPath, scale = 0.5 }) => { + try { + const driver = getDriver(); + + // Inject JavaScript to create grid overlay + const gridOverlayScript = ` + // Remove any existing grid overlay + const existingOverlay = document.getElementById('mcp-grid-overlay'); + if (existingOverlay) { + existingOverlay.remove(); + } + + // Create main grid overlay container + const gridOverlay = document.createElement('div'); + gridOverlay.id = 'mcp-grid-overlay'; + gridOverlay.style.cssText = \` + position: fixed; + top: 0; + left: 0; + width: 100%; + height: 100%; + pointer-events: none; + z-index: 2147483647; + font-family: monospace; + font-size: 10px; + \`; + + const viewportWidth = window.innerWidth; + const viewportHeight = window.innerHeight; + + const showCoordinates = ${target_identification_mode === "coordinates"}; + const highlightClickables = ${target_identification_mode === "highlights"}; + + // Add vertical grid lines - only in coordinates mode + if (showCoordinates) { + for (let x = 0; x <= viewportWidth; x += ${grid_spacing}) { + const vLine = document.createElement('div'); + vLine.style.cssText = \` + position: absolute; + left: \${x}px; + top: 0; + width: 2px; + height: 100vh; + background: rgba(0, 0, 255, 0.6); + border-left: 1px solid rgba(255, 255, 255, 0.8); + border-right: 1px solid rgba(0, 0, 0, 0.6); + \`; + gridOverlay.appendChild(vLine); + + // Add coordinate labels for major grid lines + if (x % (${grid_spacing} * 2) === 0) { + const label = document.createElement('div'); + label.style.cssText = \` + position: absolute; + left: \${x + 4}px; + top: 2px; + font-family: Monaco, "Lucida Console", "Courier New", monospace; + font-size: 12px; + font-weight: bold; + color: black; + background: rgba(255, 255, 255, 0.95); + padding: 2px 4px; + border-radius: 2px; + text-shadow: none; + line-height: 1.2; + white-space: nowrap; + border: 1px solid rgba(0, 0, 0, 0.5); + box-shadow: 0 1px 3px rgba(0, 0, 0, 0.3); + z-index: 2147483647; + \`; + label.textContent = 'x:' + x; + gridOverlay.appendChild(label); + } + } + } + + // Add horizontal grid lines - only in coordinates mode + if (showCoordinates) { + for (let y = 0; y <= viewportHeight; y += ${grid_spacing}) { + const hLine = document.createElement('div'); + hLine.style.cssText = \` + position: absolute; + left: 0; + top: \${y}px; + width: 100vw; + height: 2px; + background: rgba(0, 0, 255, 0.6); + border-top: 1px solid rgba(255, 255, 255, 0.8); + border-bottom: 1px solid rgba(0, 0, 0, 0.6); + \`; + gridOverlay.appendChild(hLine); + + // Add coordinate labels for major grid lines (including y:0) + if (y % (${grid_spacing} * 2) === 0) { + const label = document.createElement('div'); + label.style.cssText = \` + position: absolute; + left: 2px; + top: \${y + 4}px; + font-family: Monaco, "Lucida Console", "Courier New", monospace; + font-size: 12px; + font-weight: bold; + color: black; + background: rgba(255, 255, 255, 0.95); + padding: 2px 4px; + border-radius: 2px; + text-shadow: none; + line-height: 1.2; + white-space: nowrap; + border: 1px solid rgba(0, 0, 0, 0.5); + box-shadow: 0 1px 3px rgba(0, 0, 0, 0.3); + z-index: 2147483647; + \`; + // Make origin label exactly like normal case labels, just with different content + label.textContent = (y === 0) ? 'x:0, y:0' : 'y:' + y; + gridOverlay.appendChild(label); + } + } + } + + // Highlight clickable elements if requested + if (highlightClickables) { + const clickableSelector = 'a, button, input[type="button"], input[type="submit"], [onclick], [role="button"], [tabindex]:not([tabindex="-1"]), select, textarea, input:not([type="hidden"])'; + const clickables = document.querySelectorAll(clickableSelector); + + clickables.forEach((el, index) => { + // Only highlight visible elements + const rect = el.getBoundingClientRect(); + if (rect.width > 0 && rect.height > 0) { + const originalOutline = el.style.outline; + el.style.outline = '2px solid rgba(255, 0, 0, 0.7)'; + el.setAttribute('data-mcp-original-outline', originalOutline); + + // Calculate center coordinates + const centerX = Math.round(rect.left + rect.width / 2); + const centerY = Math.round(rect.top + rect.height / 2); + + // Add center coordinate label - positioned like a folder tab at upper-left corner + const centerLabel = document.createElement('div'); + centerLabel.className = 'mcp-center-label'; + centerLabel.style.cssText = \` + position: absolute; + left: \${rect.left + window.scrollX}px; + top: \${rect.top + window.scrollY - 16}px; + font-family: Monaco, "Lucida Console", "Courier New", monospace; + font-size: 12px; + font-weight: bold; + color: black; + background: red; + padding: 2px 4px; + border-radius: 2px; + text-shadow: none; + line-height: 1.2; + white-space: nowrap; + z-index: 2147483646; + pointer-events: none; + border: 2px solid rgba(255, 0, 0, 1); + border-bottom: none; + box-shadow: 0 -2px 4px rgba(0, 0, 0, 0.3); + transform: translateX(-1px); + \`; + centerLabel.textContent = \`x: \${centerX}, y: \${centerY}\`; + document.body.appendChild(centerLabel); + } + }); + } + + document.body.appendChild(gridOverlay); + return true; + `; + + // Inject the grid overlay + await driver.executeScript(gridOverlayScript); + + // Human visibility delay - let user see the grid + await new Promise(resolve => setTimeout(resolve, 500)); + + // Take screenshot with grid overlay + const screenshot = await driver.takeScreenshot(); + + // Resize the screenshot if scale is not 1.0 + let finalScreenshot = screenshot; + if (scale !== 1.0) { + finalScreenshot = await driver.executeScript(` + return new Promise((resolve) => { + const img = new Image(); + img.onload = function() { + const canvas = document.createElement('canvas'); + const ctx = canvas.getContext('2d'); + + const scaledWidth = Math.round(img.width * ${scale}); + const scaledHeight = Math.round(img.height * ${scale}); + + canvas.width = scaledWidth; + canvas.height = scaledHeight; + + ctx.drawImage(img, 0, 0, scaledWidth, scaledHeight); + + // Convert to base64 + const base64 = canvas.toDataURL('image/png').split(',')[1]; + resolve(base64); + }; + img.src = 'data:image/png;base64,' + arguments[0]; + }); + `, screenshot); + } + + // Clean up the grid overlay and element highlighting + const cleanupScript = ` + // Remove grid overlay + const gridOverlay = document.getElementById('mcp-grid-overlay'); + if (gridOverlay) { + gridOverlay.remove(); + } + + // Remove element highlighting + const highlightedElements = document.querySelectorAll('[data-mcp-original-outline]'); + highlightedElements.forEach(el => { + el.style.outline = el.getAttribute('data-mcp-original-outline'); + el.removeAttribute('data-mcp-original-outline'); + }); + + // Remove center coordinate labels + const centerLabels = document.querySelectorAll('.mcp-center-label'); + centerLabels.forEach(label => label.remove()); + + return true; + `; + + await driver.executeScript(cleanupScript); + + if (outputPath) { + const fs = await import('fs'); + await fs.promises.writeFile(outputPath, finalScreenshot, 'base64'); + return { + content: [{ + type: 'text', + text: `Grid screenshot saved to ${outputPath} (grid_spacing: ${grid_spacing}px, mode: ${target_identification_mode}, scale: ${Math.round(scale * 100)}%)` + }] + }; + } else { + return { + content: [ + { + type: 'text', + text: `Grid screenshot captured (grid_spacing: ${grid_spacing}px, mode: ${target_identification_mode}, scale: ${Math.round(scale * 100)}%):` + }, + { type: 'text', text: finalScreenshot } + ] + }; + } + } catch (e) { + return { + content: [{ type: 'text', text: `Error taking grid screenshot: ${e.message}` }] + }; + } + } +); + server.tool( "close_session", "closes the current browser session",