diff --git a/README.md b/README.md index f11d947..45bd3d7 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ PageIndex is a revolutionary document processing system that uses **reasoning-ba - **Local PDF Processing**: Upload local PDF files directly without manual uploads - **URL Support**: Process documents from URLs - **Full PageIndex Integration**: Access all PageIndex capabilities (OCR, tree generation, reasoning-based retrieval) -- **Secure Authentication**: API key authentication with PageIndex platform +- **Secure OAuth Authentication**: OAuth 2.1 with PKCE and automatic token refresh - **TypeScript**: Full type safety with MCP SDK - **Desktop Extension (DXT)**: One-click installation for Claude Desktop with secure configuration @@ -28,11 +28,7 @@ PageIndex is a revolutionary document processing system that uses **reasoning-ba ### Getting Started -First, you'll need to create an API key: - -1. Visit https://dash.pageindex.ai/api-keys -2. Create a new API key for your application -3. Copy the API key for use in the configuration below +The PageIndex MCP server uses OAuth 2.1 authentication for secure access. When you first run the server, it will guide you through the authentication process by opening your browser to authorize the application. ### For Claude Desktop (Recommended) @@ -40,12 +36,12 @@ First, you'll need to create an API key: 1. Download the latest `.dxt` file from [Releases](https://github.com/VectifyAI/pageindex-mcp/releases) 2. Double-click the `.dxt` file to install automatically in Claude Desktop -3. Enter your PageIndex API key in the simple configuration interface +3. The OAuth authentication will be handled automatically when you first use the extension **Benefits of DXT Installation:** - **No technical setup** - just download and double-click -- **Secure configuration** - API keys stored securely by Claude Desktop +- **Secure OAuth authentication** - handled automatically through your browser - **Automatic updates** - extensions update seamlessly - **Full local PDF support** - upload and process PDFs directly from your computer @@ -64,34 +60,57 @@ Add to your MCP configuration: "mcpServers": { "pageindex": { "command": "npx", - "args": ["-y", "pageindex-mcp"], - "env": { - "PAGEINDEX_API_KEY": "" - } + "args": ["-y", "pageindex-mcp"] } } } ``` -#### Option 2: Remote MCP Server +**Authentication Process:** +1. When you first connect, the server will automatically open your browser for OAuth authentication +2. Log in to your PageIndex account and authorize the application +3. The authentication tokens are securely stored locally and automatically refreshed +4. Subsequent connections will use the stored credentials automatically + +> **Note**: This local server provides full PDF upload capabilities and handles all authentication automatically. -Alternatively, connect directly to PageIndex without this wrapper: +#### Option 2: Direct Connection to PageIndex + +Connect directly to the PageIndex OAuth-enabled MCP server: ```json { "mcpServers": { "pageindex": { "type": "http", - "url": "https://dash.pageindex.ai/api/mcp", - "headers": { - "Authorization": "Bearer " - } + "url": "https://mcp.pageindex.ai" + } + } +} +``` + +**Authentication Process:** +1. The MCP client will automatically handle the OAuth flow +2. You'll be redirected to authorize the application in your browser +3. Authentication tokens are managed by the MCP client +4. Automatic token refresh is handled by the server + +**For clients that don't support HTTP MCP servers:** + +If your MCP client doesn't support HTTP servers directly, you can use [mcp-remote](https://github.com/geelen/mcp-remote) as a bridge: + +```json +{ + "mcpServers": { + "pageindex": { + "command": "npx", + "args": ["-y", "mcp-remote", "https://mcp.pageindex.ai"] } } } ``` -> **Note**: Option 1 provides local PDF upload capabilities, while Option 2 connects directly to PageIndex but requires manual PDF uploads via the dashboard. +> **Note**: Option 1 provides local PDF upload capabilities, while Option 2 only supports PDF processing via URLs (no local file uploads). ## Available Tools diff --git a/manifest.json b/manifest.json index f81b5b3..a8ea0dc 100644 --- a/manifest.json +++ b/manifest.json @@ -4,7 +4,7 @@ "display_name": "PageIndex", "version": "1.1.2", "description": "MCP server for PageIndex", - "long_description": "This extension provides access to PageIndex's next-generation reasoning-based RAG system. Unlike traditional vector search, PageIndex uses multi-step reasoning to understand and retrieve information from documents.", + "long_description": "This extension provides access to PageIndex's next-generation reasoning-based RAG system. Unlike traditional vector search, PageIndex uses multi-step reasoning to understand and retrieve information from documents. Authentication is handled automatically via OAuth 2.1.", "author": { "name": "VectifyAI", "url": "https://github.com/VectifyAI" @@ -19,7 +19,8 @@ "document-processing", "pdf", "ocr", - "reasoning-rag" + "reasoning-rag", + "oauth" ], "license": "MIT", "compatibility": { @@ -34,26 +35,7 @@ "entry_point": "build/index.js", "mcp_config": { "command": "node", - "args": ["${__dirname}/build/index.js"], - "env": { - "PAGEINDEX_API_KEY": "${user_config.api_key}", - "PAGEINDEX_API_URL": "${user_config.api_url}" - } - } - }, - "user_config": { - "api_key": { - "type": "string", - "title": "PageIndex API Key", - "description": "Your PageIndex API key. Get one at https://dash.pageindex.ai/api-keys", - "required": true - }, - "api_url": { - "type": "string", - "title": "PageIndex API URL", - "description": "PageIndex API base URL (optional, defaults to https://dash.pageindex.ai)", - "required": false, - "default": "https://dash.pageindex.ai" + "args": ["${__dirname}/build/index.js"] } } } diff --git a/package.json b/package.json index 18cfe95..9b8c569 100644 --- a/package.json +++ b/package.json @@ -32,9 +32,12 @@ "license": "MIT", "dependencies": { "@modelcontextprotocol/sdk": "^1.17.3", + "@types/lodash": "^4.17.20", "date-fns": "^4.1.0", + "lodash": "^4.17.21", "mime-types": "^2.1.35", "p-retry": "^7.0.0", + "pkce-challenge": "^5.0.0", "zod": "^3.23.8", "zod-to-json-schema": "^3.24.6" }, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 253d2a3..d8472e7 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -11,15 +11,24 @@ importers: '@modelcontextprotocol/sdk': specifier: ^1.17.3 version: 1.17.4 + '@types/lodash': + specifier: ^4.17.20 + version: 4.17.20 date-fns: specifier: ^4.1.0 version: 4.1.0 + lodash: + specifier: ^4.17.21 + version: 4.17.21 mime-types: specifier: ^2.1.35 version: 2.1.35 p-retry: specifier: ^7.0.0 version: 7.0.0 + pkce-challenge: + specifier: ^5.0.0 + version: 5.0.0 zod: specifier: ^3.23.8 version: 3.25.76 @@ -614,6 +623,9 @@ packages: '@types/estree@1.0.8': resolution: {integrity: sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==} + '@types/lodash@4.17.20': + resolution: {integrity: sha512-H3MHACvFUEiujabxhaI/ImO6gUrd8oOurg7LQtS7mbwIXA/cUqWrvBsaeJ23aZEPk1TAYkurjfMbSELfoCXlGA==} + '@types/mime-types@2.1.4': resolution: {integrity: sha512-lfU4b34HOri+kAY5UheuFMWPDOI+OPceBSHZKp69gEyTL/mmJ4cnU6Y/rlme3UL3GyOn6Y42hyIEw0/q8sWx5w==} @@ -2848,6 +2860,8 @@ snapshots: '@types/estree@1.0.8': {} + '@types/lodash@4.17.20': {} + '@types/mime-types@2.1.4': {} '@types/mute-stream@0.0.4': diff --git a/src/client/auth.ts b/src/client/auth.ts index 125c292..7d0f63a 100644 --- a/src/client/auth.ts +++ b/src/client/auth.ts @@ -1,25 +1,37 @@ +import type { OAuthTokens } from '@modelcontextprotocol/sdk/shared/auth.js'; import type { FetchLike } from '@modelcontextprotocol/sdk/shared/transport.js'; /** - * Creates an authenticated fetch function that adds Bearer token authentication - * This is a simplified approach compared to implementing the complex OAuth interface + * Creates an OAuth-authenticated fetch function that adds Bearer token authentication + * using OAuth access tokens with automatic refresh capability */ -export function createAuthenticatedFetch( - apiKey: string, +export function createOAuthAuthenticatedFetch( + getTokens: () => Promise, additionalHeaders: Record = {}, ): FetchLike { return async (input: string | URL, init?: RequestInit): Promise => { + const tokens = await getTokens(); + if (!tokens?.access_token) { + throw new Error('No valid OAuth access token available'); + } + const headers = new Headers(init?.headers); - headers.set('Authorization', `Bearer ${apiKey}`); + headers.set('Authorization', `Bearer ${tokens.access_token}`); - // Add additional headers (like client identification) Object.entries(additionalHeaders).forEach(([key, value]) => { headers.set(key, value); }); - return fetch(input, { + const response = await fetch(input, { ...init, headers, }); + + // If we get 401 Unauthorized, the token has expired + if (response.status === 401) { + throw new Error('TOKEN_EXPIRED'); + } + + return response; }; } diff --git a/src/client/mcp-client.ts b/src/client/mcp-client.ts index 4157919..2d8ea3c 100644 --- a/src/client/mcp-client.ts +++ b/src/client/mcp-client.ts @@ -1,23 +1,59 @@ +import { + type OAuthClientProvider, + UnauthorizedError, +} from '@modelcontextprotocol/sdk/client/auth.js'; import { Client } from '@modelcontextprotocol/sdk/client/index.js'; import { SSEClientTransport } from '@modelcontextprotocol/sdk/client/sse.js'; import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js'; import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; -import pRetry, { AbortError } from 'p-retry'; -import { CONFIG as config } from '../config.js'; -import { createAuthenticatedFetch } from './auth.js'; +import pRetry from 'p-retry'; +import { CONFIG } from '../config.js'; +import { PageIndexOAuthProvider } from './oauth-provider.js'; /** - * Wrapper for MCP Client to connect to remote PageIndex MCP server + * Wrapper for MCP Client to connect to remote PageIndex MCP server with OAuth authentication */ export class PageIndexMcpClient { private client: Client | null = null; private transport: StreamableHTTPClientTransport | SSEClientTransport | null = null; - private apiKey: string; - private connected = false; + private oauthProvider: OAuthClientProvider; + + constructor(oauthProvider?: OAuthClientProvider) { + if (oauthProvider) { + this.oauthProvider = oauthProvider; + } else { + // Create default OAuth provider with built-in configuration + this.oauthProvider = new PageIndexOAuthProvider( + 'http://localhost:8090/callback', + { + client_name: __CLIENT_NAME__, + redirect_uris: ['http://localhost:8090/callback'], + token_endpoint_auth_method: 'none', // Public client by default + grant_types: ['authorization_code'], + response_types: ['code'], + scope: 'mcp:access', + }, + ); + } + } - constructor(apiKey: string) { - this.apiKey = apiKey; + /** + * Create a PageIndexMcpClient that prioritizes stored client information + */ + static async createWithStoredClientInfo(): Promise { + const storedClientInfo = await PageIndexOAuthProvider.getStoredClientInfo(); + if (storedClientInfo) { + const oauthProvider = new PageIndexOAuthProvider( + 'http://localhost:8090/callback', + storedClientInfo, + ); + // Load stored tokens and client info to sync internal state + await oauthProvider.loadFromStorage(); + return new PageIndexMcpClient(oauthProvider); + } else { + return new PageIndexMcpClient(); + } } /** @@ -31,90 +67,87 @@ export class PageIndexMcpClient { } /** - * Connect to the remote PageIndex MCP server + * Create transport instance with authProvider */ - async connect(): Promise { - if (this.connected) { - return; + private async createTransport(): Promise< + StreamableHTTPClientTransport | SSEClientTransport + > { + const streamableHttpUrl = new URL(`${CONFIG.apiUrl}/mcp`); + streamableHttpUrl.searchParams.set('local_upload', '1'); + + const sseUrl = new URL(`${CONFIG.apiUrl}/mcp/sse`); + sseUrl.searchParams.set('local_upload', '1'); + + // Try StreamableHTTP first, fallback to SSE for compatibility + try { + return new StreamableHTTPClientTransport(streamableHttpUrl, { + authProvider: this.oauthProvider, + requestInit: { + headers: { + ...this.getClientHeaders(), + 'Content-Type': 'application/json', + }, + }, + }); + } catch { + return new SSEClientTransport(sseUrl, { + authProvider: this.oauthProvider, + }); } + } - await pRetry( - async () => { - const streamableHttpUrl = new URL(`${config.apiUrl}/api/mcp`); - streamableHttpUrl.searchParams.set('local_upload', '1'); - - const sseUrl = new URL(`${config.apiUrl}/api/mcp/sse`); - sseUrl.searchParams.set('local_upload', '1'); - - this.client = new Client({ - name: 'pageindex-mcp', - version: __VERSION__, - }); + /** + * Attempt connection with OAuth authentication and recursive retry + */ + private async attemptConnection(): Promise { + try { + // Create transport with authProvider + const transport = await this.createTransport(); + this.transport = transport; + + // Create client + this.client = new Client({ + name: 'pageindex-mcp', + version: __VERSION__, + }); + + // Attempt connection + await this.client.connect(transport); + + console.error('Connected to PageIndex MCP server successfully.\n'); + } catch (error) { + if (error instanceof UnauthorizedError) { + console.error('Authentication required, starting OAuth flow...\n'); + + if (this.oauthProvider instanceof PageIndexOAuthProvider) { + // Wait for OAuth callback + const authCode = await this.oauthProvider.waitForOAuthCallback(); + + // Use finishAuth to complete authentication + if (this.transport) { + await this.transport.finishAuth(authCode); + } - // Try StreamableHTTP first, fallback to SSE for compatibility - try { - // Use simplified authenticated fetch with client headers - const clientHeaders = this.getClientHeaders(); - const authenticatedFetch = createAuthenticatedFetch( - this.apiKey, - clientHeaders, + console.error( + 'OAuth authentication completed, retrying connection...\n', ); - this.transport = new StreamableHTTPClientTransport( - streamableHttpUrl, - { - fetch: authenticatedFetch, - requestInit: { - headers: { - 'Content-Type': 'application/json', - }, - }, - }, - ); - await this.client.connect(this.transport); - } catch (_error) { - try { - // For SSE transport, we need to pass authenticated fetch as well - const clientHeaders = this.getClientHeaders(); - const authenticatedFetch = createAuthenticatedFetch( - this.apiKey, - clientHeaders, - ); - this.transport = new SSEClientTransport(sseUrl, { - fetch: authenticatedFetch, - }); - await this.client.connect(this.transport); - } catch (sseError) { - throw new AbortError( - `Failed to connect to PageIndex MCP server: ${sseError}`, - ); - } + // Recursive retry + await this.attemptConnection(); + } else { + throw new Error('OAuth provider does not support callback waiting'); } - - this.connected = true; - }, - { - retries: 3, - factor: 2, - minTimeout: 1000, - maxTimeout: 8000, - onFailedAttempt: (error) => { - console.warn( - `Connection attempt ${error.attemptNumber} failed. ${error.retriesLeft} retries left.`, - ); - }, - }, - ); + } else { + // Re-throw other errors + throw error; + } + } } /** * Call a tool on the remote MCP server */ async callTool(name: string, params: any): Promise { - if (!this.client || !this.connected) { - throw new Error('Client not connected. Call connect() first.'); - } - return pRetry( async () => { if (!this.client) { @@ -132,19 +165,26 @@ export class PageIndexMcpClient { minTimeout: 500, maxTimeout: 3000, onFailedAttempt: (error) => { - console.warn( - `Tool call "${name}" attempt ${error.attemptNumber} failed. ${error.retriesLeft} retries left.`, + console.error( + `Tool call "${name}" attempt ${error.attemptNumber} failed. ${error.retriesLeft} retries left.\n`, ); }, }, ); } + /** + * Connect using OAuth authentication with finishAuth pattern + */ + public async connect(): Promise { + await this.attemptConnection(); + } + /** * List available tools on the remote server */ async listTools() { - if (!this.client || !this.connected) { + if (!this.client) { throw new Error('Client not connected. Call connect() first.'); } @@ -152,6 +192,14 @@ export class PageIndexMcpClient { return tools; } + /** + * Reconnect to the server + */ + async reconnect(): Promise { + await this.close(); + await this.connect(); + } + /** * Close the connection */ @@ -164,14 +212,5 @@ export class PageIndexMcpClient { if (this.client) { this.client = null; } - - this.connected = false; - } - - /** - * Check if client is connected - */ - isConnected(): boolean { - return this.connected; } } diff --git a/src/client/oauth-provider.ts b/src/client/oauth-provider.ts new file mode 100644 index 0000000..16d5d1b --- /dev/null +++ b/src/client/oauth-provider.ts @@ -0,0 +1,291 @@ +import { exec } from 'node:child_process'; +import fs from 'node:fs/promises'; +import { createServer } from 'node:http'; +import os from 'node:os'; +import path from 'node:path'; +import { URL } from 'node:url'; +import { promisify } from 'node:util'; +import type { OAuthClientProvider } from '@modelcontextprotocol/sdk/client/auth.js'; +import type { + OAuthClientInformation, + OAuthClientInformationFull, + OAuthClientMetadata, + OAuthTokens, +} from '@modelcontextprotocol/sdk/shared/auth.js'; +import { throttle } from 'lodash'; + +const execAsync = promisify(exec); + +interface StoredTokens { + tokens?: OAuthTokens; + clientInfo?: OAuthClientInformationFull; + codeVerifier?: string; +} + +/** + * File-based OAuth client provider for PageIndex MCP client + * Stores tokens and client information securely in user's home directory + */ +export class PageIndexOAuthProvider implements OAuthClientProvider { + private _tokens?: OAuthTokens; + private _clientInfo?: OAuthClientInformationFull; + private _codeVerifier?: string; + private tokenFilePath: string; + private throttledOpenBrowser: (url: string) => Promise; + + /** + * Check for existing client information in storage + */ + static async getStoredClientInfo( + tokenStoragePath?: string, + ): Promise { + const filePath = + tokenStoragePath || + path.join(os.homedir(), '.pageindex-mcp', 'oauth-tokens.json'); + + try { + const data = await fs.readFile(filePath, 'utf-8'); + const stored: StoredTokens = JSON.parse(data); + return stored.clientInfo; + } catch { + return undefined; + } + } + + constructor( + private readonly _redirectUrl: string | URL, + private readonly _clientMetadata: OAuthClientMetadata, + tokenStoragePath?: string, + ) { + this.tokenFilePath = + tokenStoragePath || + path.join(os.homedir(), '.pageindex-mcp', 'oauth-tokens.json'); + + this.throttledOpenBrowser = throttle(this.openBrowser.bind(this), 1000); + } + + get redirectUrl(): string | URL { + return this._redirectUrl; + } + + get clientMetadata(): OAuthClientMetadata { + return this._clientMetadata; + } + + async state(): Promise { + return ( + Math.random().toString(36).substring(2, 15) + + Math.random().toString(36).substring(2, 15) + ); + } + + async clientInformation(): Promise { + if (!this._clientInfo) { + await this.loadFromStorage(); + } + return this._clientInfo; + } + + async saveClientInformation( + clientInformation: OAuthClientInformationFull, + ): Promise { + this._clientInfo = clientInformation; + await this.saveToStorage(); + } + + async tokens(): Promise { + if (!this._tokens) { + await this.loadFromStorage(); + } + return this._tokens; + } + + async saveTokens(tokens: OAuthTokens): Promise { + this._tokens = tokens; + await this.saveToStorage(); + } + + async redirectToAuthorization(authorizationUrl: URL): Promise { + try { + await this.throttledOpenBrowser(authorizationUrl.toString()); + } catch (error) { + console.error( + error instanceof Error + ? `Failed to open browser: ${error.message}\n` + : 'Failed to open browser\n', + ); + process.exit(1); + } + } + + async saveCodeVerifier(codeVerifier: string): Promise { + this._codeVerifier = codeVerifier; + await this.saveToStorage(); + } + + async codeVerifier(): Promise { + if (!this._codeVerifier) { + await this.loadFromStorage(); + if (!this._codeVerifier) { + throw new Error( + 'No code verifier found. Please restart the OAuth flow.', + ); + } + } + return this._codeVerifier; + } + + async invalidateCredentials( + scope: 'all' | 'client' | 'tokens' | 'verifier', + ): Promise { + switch (scope) { + case 'all': + this._tokens = undefined; + this._clientInfo = undefined; + this._codeVerifier = undefined; + break; + case 'client': + this._clientInfo = undefined; + break; + case 'tokens': + this._tokens = undefined; + break; + case 'verifier': + this._codeVerifier = undefined; + break; + } + await this.saveToStorage(); + } + + /** + * Waits for OAuth callback by starting a temporary HTTP server + */ + async waitForOAuthCallback(): Promise { + return new Promise((resolve, reject) => { + const redirectUrl = new URL(this._redirectUrl); + const port = parseInt(redirectUrl.port, 10) || 8090; + + const server = createServer((req, res) => { + const url = new URL(req.url || '', `http://localhost:${port}`); + + if (url.pathname === redirectUrl.pathname) { + const code = url.searchParams.get('code'); + const error = url.searchParams.get('error'); + + if (error) { + res.writeHead(400, { 'Content-Type': 'text/html' }); + res.end(` + + + Authorization Failed + + +

Authorization Failed

+

Error: ${error}

+

Please try the authorization process again.

+

You can close this tab and return to your terminal.

+ + + `); + server.close(); + reject(new Error(`OAuth error: ${error}`)); + return; + } + + if (code) { + server.close(); + resolve(code); + + // For DXT builds, redirect directly without showing content + if (__CLIENT_TYPE__ === 'dxt') { + res.writeHead(302, { Location: 'claude://claude.ai/new' }); + res.end(); + } else { + res.writeHead(200, { 'Content-Type': 'text/html' }); + res.end(` + + + Authorization Successful + + +

Authorization Successful

+

You can close this tab now.

+ + + `); + } + return; + } + } + res.writeHead(404); + res.end('Not found'); + }); + + server.listen(port, () => { + console.error(`Listening for OAuth callback on port ${port}...\n`); + }); + + server.on('error', (err) => { + reject(new Error(`Failed to start callback server: ${err.message}`)); + }); + + setTimeout( + () => { + server.close(); + reject(new Error('OAuth callback timeout after 5 minutes')); + }, + 5 * 60 * 1000, + ); + }); + } + + private async openBrowser(url: string): Promise { + const platform = process.platform; + let command: string; + + switch (platform) { + case 'darwin': + command = `open "${url}"`; + break; + case 'win32': + command = `start "${url}"`; + break; + default: + command = `xdg-open "${url}"`; + break; + } + await execAsync(command); + } + + public async loadFromStorage(): Promise { + try { + const data = await fs.readFile(this.tokenFilePath, 'utf-8'); + const stored: StoredTokens = JSON.parse(data); + + this._tokens = stored.tokens; + this._clientInfo = stored.clientInfo; + this._codeVerifier = stored.codeVerifier; + } catch (_error) { + console.error( + 'No existing OAuth tokens found, starting fresh authentication.\n', + ); + } + } + + private async saveToStorage(): Promise { + const stored: StoredTokens = { + tokens: this._tokens, + clientInfo: this._clientInfo, + codeVerifier: this._codeVerifier, + }; + + try { + await fs.mkdir(path.dirname(this.tokenFilePath), { recursive: true }); + await fs.writeFile(this.tokenFilePath, JSON.stringify(stored, null, 2), { + mode: 0o600, + }); + } catch (error) { + console.error(`Warning: Failed to save OAuth tokens: ${error}\n`); + } + } +} diff --git a/src/config.ts b/src/config.ts index a5f0098..f4ffb4a 100644 --- a/src/config.ts +++ b/src/config.ts @@ -1,18 +1,11 @@ interface Config { - apiKey: string; apiUrl: string; debug: boolean; connectionTimeout: number; } -const apiKey = process.env.PAGEINDEX_API_KEY; -if (!apiKey) { - throw new Error('PAGEINDEX_API_KEY environment variable is required'); -} - export const CONFIG: Config = { - apiKey, - apiUrl: process.env.PAGEINDEX_API_URL || 'https://dash.pageindex.ai', + apiUrl: process.env.PAGEINDEX_API_URL || 'https://mcp.pageindex.ai', debug: process.env.DEBUG === 'true', connectionTimeout: parseInt(process.env.CONNECTION_TIMEOUT || '30000', 10), }; diff --git a/src/globals.d.ts b/src/globals.d.ts index f954661..8e896f7 100644 --- a/src/globals.d.ts +++ b/src/globals.d.ts @@ -1,2 +1,3 @@ declare const __VERSION__: string; declare const __CLIENT_TYPE__: 'dxt' | 'npm'; +declare const __CLIENT_NAME__: string; diff --git a/src/index.ts b/src/index.ts index 4304e44..261da7a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -5,19 +5,20 @@ import { startServer } from './server.js'; async function main() { try { await startServer(); - } catch (_error) { + } catch (error) { + console.error(`Failed to start server: ${error}\n`); process.exit(1); } } // Handle uncaught exceptions process.on('uncaughtException', (error) => { - console.error('Uncaught Exception:', error); + console.error(`Uncaught Exception: ${error}\n`); process.exit(1); }); -process.on('unhandledRejection', (reason, promise) => { - console.error('Unhandled Rejection at:', promise, 'reason:', reason); +process.on('unhandledRejection', (reason, _promise) => { + console.error(`Unhandled Rejection: ${reason}\n`); process.exit(1); }); diff --git a/src/server.ts b/src/server.ts index 372b3f6..fe74ec4 100644 --- a/src/server.ts +++ b/src/server.ts @@ -6,7 +6,6 @@ import { } from '@modelcontextprotocol/sdk/types.js'; import { zodToJsonSchema } from 'zod-to-json-schema'; import { PageIndexMcpClient } from './client/mcp-client.js'; -import { CONFIG as config } from './config.js'; import { executeTool, getTools, @@ -19,8 +18,8 @@ import { */ class PageIndexStdioServer { private server: Server; - private mcpClient: PageIndexMcpClient; - private remoteToolsProxy: RemoteToolsProxy; + private mcpClient: PageIndexMcpClient | null = null; + private connectPromise: Promise | null = null; constructor() { this.server = new Server( @@ -34,14 +33,21 @@ class PageIndexStdioServer { }, }, ); - - this.mcpClient = new PageIndexMcpClient(config.apiKey); - this.remoteToolsProxy = new RemoteToolsProxy(this.mcpClient); this.setupHandlers(); } private setupHandlers() { this.server.setRequestHandler(ListToolsRequestSchema, async () => { + // Initialize remote connection on first list tools request + if (!this.mcpClient) { + await this.connectToRemoteServer(); + } + + // biome-ignore lint/style/noNonNullAssertion: mcpClient is ensured to be non-null here + const remoteToolsProxy = new RemoteToolsProxy(this.mcpClient!); + const remoteTools = await remoteToolsProxy.fetchRemoteTools(); + updateToolsWithRemote(remoteTools); + const tools = getTools(); const toolsResponse = { tools: tools.map((tool) => ({ @@ -60,7 +66,11 @@ class PageIndexStdioServer { const { name, arguments: args } = request.params; try { - const result = await executeTool(name, args, this.mcpClient); + if (!this.mcpClient) { + await this.connectToRemoteServer(); + } + // biome-ignore lint/style/noNonNullAssertion: mcpClient is ensured to be non-null here + const result = await executeTool(name, args, this.mcpClient!); return result; } catch (error) { return { @@ -85,27 +95,52 @@ class PageIndexStdioServer { }); this.server.onerror = (error) => { - console.error('MCP Server error:', error); + console.error(`MCP Server error: ${error}\n`); }; } + /** + * Connect to remote PageIndex MCP server + */ + private async connectToRemoteServer() { + // If the client is already connected, return immediately. + if (this.mcpClient) { + return; + } + + // If a connection is already in progress, wait for it to complete. + if (this.connectPromise) { + return await this.connectPromise; + } + + // If no connection is in progress, start a new one and "lock" it. + this.connectPromise = (async () => { + try { + const mcpClient = await PageIndexMcpClient.createWithStoredClientInfo(); + await mcpClient.connect(); + this.mcpClient = mcpClient; + } catch (error) { + // If the connection fails, clear the "lock" to allow for a retry on the next call. + this.connectPromise = null; + console.error(`Failed to initialize remote connection: ${error}\n`); + // Re-throw the error so the caller knows the connection failed. + throw error; + } + })(); + + return await this.connectPromise; + } + /** * Start the stdio server */ async start() { try { - // Connect to remote MCP client - await this.mcpClient.connect(); - - // Fetch remote tools and update local tools registry - const remoteTools = await this.remoteToolsProxy.fetchRemoteTools(); - updateToolsWithRemote(remoteTools); - - // Start the stdio server const transport = new StdioServerTransport(); await this.server.connect(transport); + // this.connectToRemoteServer(); } catch (error) { - console.error('Failed to start server:', error); + console.error(`Failed to start server: ${error}\n`); process.exit(1); } } @@ -115,9 +150,11 @@ class PageIndexStdioServer { */ async stop() { try { - await this.mcpClient.close(); + if (this.mcpClient) { + await this.mcpClient.close(); + } } catch (error) { - console.error('Error during server shutdown:', error); + console.error(`Error during server shutdown: ${error}\n`); } } } diff --git a/src/tools/index.ts b/src/tools/index.ts index 9965259..26d84a3 100644 --- a/src/tools/index.ts +++ b/src/tools/index.ts @@ -5,14 +5,9 @@ import { processDocumentTool } from './process-document.js'; import type { ToolDefinition } from './types.js'; export { RemoteToolsProxy } from './remote-proxy.js'; -// Re-export ToolDefinition type export { ToolDefinition } from './types.js'; -// Local tools that require file system access or special local processing const localTools: ToolDefinition[] = [processDocumentTool]; - -// Combined tools (local + remote) -// This will be populated by the server after fetching remote tools let tools: ToolDefinition[] = [...localTools]; /** @@ -47,7 +42,6 @@ export async function executeTool( if (!tool) { throw new Error(`Tool not found: ${name}`); } - // Validate parameters try { const validatedParams = tool.inputSchema.parse(params); return await tool.handler(validatedParams, client); diff --git a/src/tools/process-document.ts b/src/tools/process-document.ts index cf4d5cd..5c382b7 100644 --- a/src/tools/process-document.ts +++ b/src/tools/process-document.ts @@ -202,24 +202,27 @@ async function downloadPdf(url: string): Promise { } catch (error: any) { // For arxiv.org URLs, try adding .pdf suffix if original request failed if (url.includes('arxiv.org') && !url.endsWith('.pdf')) { - console.log( - `Initial request failed for arxiv URL: ${url}, retrying with .pdf suffix`, + console.error( + `Initial request failed for arxiv URL: ${url}, retrying with .pdf suffix\n`, ); const retryUrl = url.endsWith('/') ? `${url}pdf` : `${url}.pdf`; try { response = await fetchWithRetry(retryUrl); - console.log( - `Successfully retrieved PDF from retry URL: ${retryUrl}`, + console.error( + `Successfully retrieved PDF from retry URL: ${retryUrl}\n`, ); } catch (retryError: any) { - console.log( - `Retry with .pdf suffix also failed: ${retryError.message}`, + console.error( + `Retry with .pdf suffix also failed: ${retryError.message}\n`, ); const enhancedError = new AbortError( `Failed to retrieve PDF from ${url}. Tried both original URL and ${retryUrl}`, ); - enhancedError.name = 'ArxivRetryFailed'; + Object.defineProperty(enhancedError, 'name', { + value: 'ArxivRetryFailed', + configurable: true, + }); throw enhancedError; } } else { @@ -269,8 +272,8 @@ async function downloadPdf(url: string): Promise { !contentType.includes('octet-stream') && !contentType.includes('application/pdf') ) { - console.warn( - `Unexpected content-type: ${contentType}, but PDF magic bytes validated`, + console.error( + `Unexpected content-type: ${contentType}, but PDF magic bytes validated\n`, ); } @@ -303,12 +306,12 @@ async function downloadPdf(url: string): Promise { } // Don't retry ArxivRetryFailed errors - if (error.name === 'ArxivRetryFailed') { + if ((error as any).name === 'ArxivRetryFailed') { throw error; } - console.warn( - `PDF download attempt ${error.attemptNumber} failed. ${error.retriesLeft} retries left. URL: ${url}`, + console.error( + `PDF download attempt ${error.attemptNumber} failed. ${error.retriesLeft} retries left. URL: ${url}\n`, ); }, }, diff --git a/src/tools/remote-proxy.ts b/src/tools/remote-proxy.ts index eeca9c6..dac1750 100644 --- a/src/tools/remote-proxy.ts +++ b/src/tools/remote-proxy.ts @@ -53,7 +53,7 @@ export class RemoteToolsProxy { return this.remoteTools; } catch (error) { - console.error('Failed to fetch remote tools:', error); + console.error(`Failed to fetch remote tools: ${error}\n`); return []; } } diff --git a/tsup.config.ts b/tsup.config.ts index fbf9ebd..57a29a6 100644 --- a/tsup.config.ts +++ b/tsup.config.ts @@ -15,6 +15,7 @@ export default defineConfig({ define: { __VERSION__: `"${packageJson.version}"`, __CLIENT_TYPE__: `"${process.env.CLIENT_TYPE || 'npm'}"`, + __CLIENT_NAME__: `"${process.env.CLIENT_TYPE === 'dxt' ? 'Claude Desktop' : 'PageIndex MCP'}"`, }, platform: 'node', onSuccess: async () => {