diff --git a/extension/src/background.ts b/extension/src/background.ts index c6452262..2154a915 100644 --- a/extension/src/background.ts +++ b/extension/src/background.ts @@ -252,6 +252,8 @@ async function handleCommand(cmd: Command): Promise { return await handleCloseWindow(cmd, workspace); case 'sessions': return await handleSessions(cmd); + case 'set-file-input': + return await handleSetFileInput(cmd, workspace); default: return { id: cmd.id, ok: false, error: `Unknown action: ${cmd.action}` }; } @@ -579,6 +581,19 @@ async function handleCloseWindow(cmd: Command, workspace: string): Promise { + if (!cmd.files || !Array.isArray(cmd.files) || cmd.files.length === 0) { + return { id: cmd.id, ok: false, error: 'Missing or empty files array' }; + } + const tabId = await resolveTabId(cmd.tabId, workspace); + try { + await executor.setFileInputFiles(tabId, cmd.files, cmd.selector); + return { id: cmd.id, ok: true, data: { count: cmd.files.length } }; + } catch (err) { + return { id: cmd.id, ok: false, error: err instanceof Error ? err.message : String(err) }; + } +} + async function handleSessions(cmd: Command): Promise { const now = Date.now(); const data = await Promise.all([...automationSessions.entries()].map(async ([workspace, session]) => ({ diff --git a/extension/src/cdp.ts b/extension/src/cdp.ts index 06df720a..09f609c4 100644 --- a/extension/src/cdp.ts +++ b/extension/src/cdp.ts @@ -147,6 +147,48 @@ export async function screenshot( } } +/** + * Set local file paths on a file input element via CDP DOM.setFileInputFiles. + * This bypasses the need to send large base64 payloads through the message channel — + * Chrome reads the files directly from the local filesystem. + * + * @param tabId - Target tab ID + * @param files - Array of absolute local file paths + * @param selector - CSS selector to find the file input (optional, defaults to first file input) + */ +export async function setFileInputFiles( + tabId: number, + files: string[], + selector?: string, +): Promise { + await ensureAttached(tabId); + + // Enable DOM domain (required for DOM.querySelector and DOM.setFileInputFiles) + await chrome.debugger.sendCommand({ tabId }, 'DOM.enable'); + + // Get the document root + const doc = await chrome.debugger.sendCommand({ tabId }, 'DOM.getDocument') as { + root: { nodeId: number }; + }; + + // Find the file input element + const query = selector || 'input[type="file"]'; + const result = await chrome.debugger.sendCommand({ tabId }, 'DOM.querySelector', { + nodeId: doc.root.nodeId, + selector: query, + }) as { nodeId: number }; + + if (!result.nodeId) { + throw new Error(`No element found matching selector: ${query}`); + } + + // Set files directly via CDP — Chrome reads from local filesystem + await chrome.debugger.sendCommand({ tabId }, 'DOM.setFileInputFiles', { + files, + nodeId: result.nodeId, + }); +} + export async function detach(tabId: number): Promise { if (!attached.has(tabId)) return; attached.delete(tabId); diff --git a/extension/src/protocol.ts b/extension/src/protocol.ts index 53fe0d1b..5ed86b85 100644 --- a/extension/src/protocol.ts +++ b/extension/src/protocol.ts @@ -5,7 +5,7 @@ * Everything else is just JS code sent via 'exec'. */ -export type Action = 'exec' | 'navigate' | 'tabs' | 'cookies' | 'screenshot' | 'close-window' | 'sessions'; +export type Action = 'exec' | 'navigate' | 'tabs' | 'cookies' | 'screenshot' | 'close-window' | 'sessions' | 'set-file-input'; export interface Command { /** Unique request ID */ @@ -32,6 +32,10 @@ export interface Command { quality?: number; /** Whether to capture full page (not just viewport) */ fullPage?: boolean; + /** Local file paths for set-file-input action */ + files?: string[]; + /** CSS selector for file input element (set-file-input action) */ + selector?: string; } export interface Result { diff --git a/src/browser/daemon-client.ts b/src/browser/daemon-client.ts index 72dae06f..4798cb7e 100644 --- a/src/browser/daemon-client.ts +++ b/src/browser/daemon-client.ts @@ -19,7 +19,7 @@ function generateId(): string { export interface DaemonCommand { id: string; - action: 'exec' | 'navigate' | 'tabs' | 'cookies' | 'screenshot' | 'close-window' | 'sessions'; + action: 'exec' | 'navigate' | 'tabs' | 'cookies' | 'screenshot' | 'close-window' | 'sessions' | 'set-file-input'; tabId?: number; code?: string; workspace?: string; @@ -30,6 +30,10 @@ export interface DaemonCommand { format?: 'png' | 'jpeg'; quality?: number; fullPage?: boolean; + /** Local file paths for set-file-input action */ + files?: string[]; + /** CSS selector for file input element (set-file-input action) */ + selector?: string; } export interface DaemonResult { diff --git a/src/browser/page.ts b/src/browser/page.ts index a1bd2264..bfbd8aff 100644 --- a/src/browser/page.ts +++ b/src/browser/page.ts @@ -339,6 +339,22 @@ export class Page implements IPage { return Array.isArray(result) ? result : []; } + /** + * Set local file paths on a file input element via CDP DOM.setFileInputFiles. + * Chrome reads the files directly from the local filesystem, avoiding the + * payload size limits of base64-in-evaluate. + */ + async setFileInput(files: string[], selector?: string): Promise { + const result = await sendCommand('set-file-input', { + files, + selector, + ...this._cmdOpts(), + }) as { count?: number }; + if (!result?.count) { + throw new Error('setFileInput returned no count — command may not be supported by the extension'); + } + } + async waitForCapture(timeout: number = 10): Promise { const maxMs = timeout * 1000; await sendCommand('exec', { diff --git a/src/clis/xiaohongshu/publish.test.ts b/src/clis/xiaohongshu/publish.test.ts index 959961fb..55ead889 100644 --- a/src/clis/xiaohongshu/publish.test.ts +++ b/src/clis/xiaohongshu/publish.test.ts @@ -8,7 +8,7 @@ import { getRegistry } from '../../registry.js'; import type { IPage } from '../../types.js'; import './publish.js'; -function createPageMock(evaluateResults: any[]): IPage { +function createPageMock(evaluateResults: any[], overrides: Partial = {}): IPage { const evaluate = vi.fn(); for (const result of evaluateResults) { evaluate.mockResolvedValueOnce(result); @@ -37,10 +37,88 @@ function createPageMock(evaluateResults: any[]): IPage { getCookies: vi.fn().mockResolvedValue([]), screenshot: vi.fn().mockResolvedValue(''), waitForCapture: vi.fn().mockResolvedValue(undefined), + ...overrides, }; } describe('xiaohongshu publish', () => { + it('prefers CDP setFileInput upload when the page supports it', async () => { + const cmd = getRegistry().get('xiaohongshu/publish'); + expect(cmd?.func).toBeTypeOf('function'); + + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'opencli-xhs-publish-')); + const imagePath = path.join(tempDir, 'demo.jpg'); + fs.writeFileSync(imagePath, Buffer.from([0xff, 0xd8, 0xff, 0xd9])); + + const setFileInput = vi.fn().mockResolvedValue(undefined); + const page = createPageMock([ + 'https://creator.xiaohongshu.com/publish/publish?from=menu_left', + { ok: true, target: '上传图文', text: '上传图文' }, + { state: 'editor_ready', hasTitleInput: true, hasImageInput: true, hasVideoSurface: false }, + 'input[type="file"][accept*="image"],input[type="file"][accept*=".jpg"],input[type="file"][accept*=".jpeg"],input[type="file"][accept*=".png"],input[type="file"][accept*=".gif"],input[type="file"][accept*=".webp"]', + false, + true, + { ok: true, sel: 'input[maxlength="20"]' }, + { ok: true, sel: '[contenteditable="true"][class*="content"]' }, + true, + 'https://creator.xiaohongshu.com/publish/success', + '发布成功', + ], { + setFileInput, + }); + + const result = await cmd!.func!(page, { + title: 'CDP上传优先', + content: '优先走 setFileInput 主路径', + images: imagePath, + topics: '', + draft: false, + }); + + expect(setFileInput).toHaveBeenCalledWith( + [imagePath], + expect.stringContaining('input[type="file"][accept*="image"]'), + ); + const evaluateCalls = (page.evaluate as any).mock.calls.map((args: any[]) => String(args[0])); + expect(evaluateCalls.some((code: string) => code.includes('atob(img.base64)'))).toBe(false); + expect(result).toEqual([ + { + status: '✅ 发布成功', + detail: '"CDP上传优先" · 1张图片 · 发布成功', + }, + ]); + }); + + it('fails fast when only a generic file input exists on the page', async () => { + const cmd = getRegistry().get('xiaohongshu/publish'); + expect(cmd?.func).toBeTypeOf('function'); + + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'opencli-xhs-publish-')); + const imagePath = path.join(tempDir, 'demo.jpg'); + fs.writeFileSync(imagePath, Buffer.from([0xff, 0xd8, 0xff, 0xd9])); + + const setFileInput = vi.fn().mockResolvedValue(undefined); + const page = createPageMock([ + 'https://creator.xiaohongshu.com/publish/publish?from=menu_left', + { ok: true, target: '上传图文', text: '上传图文' }, + { state: 'editor_ready', hasTitleInput: true, hasImageInput: true, hasVideoSurface: false }, + null, + ], { + setFileInput, + }); + + await expect(cmd!.func!(page, { + title: '不要走泛化上传', + content: 'generic file input 应该直接报错', + images: imagePath, + topics: '', + draft: false, + })).rejects.toThrow('Image injection failed: No file input found on page'); + + expect(setFileInput).not.toHaveBeenCalled(); + expect(page.screenshot).toHaveBeenCalledWith({ path: '/tmp/xhs_publish_upload_debug.png' }); + }); + it('selects the image-text tab and publishes successfully', async () => { const cmd = getRegistry().get('xiaohongshu/publish'); expect(cmd?.func).toBeTypeOf('function'); diff --git a/src/clis/xiaohongshu/publish.ts b/src/clis/xiaohongshu/publish.ts index e5c7c544..d0c5fa7b 100644 --- a/src/clis/xiaohongshu/publish.ts +++ b/src/clis/xiaohongshu/publish.ts @@ -3,7 +3,7 @@ * * Flow: * 1. Navigate to creator publish page - * 2. Upload images via DataTransfer injection into the file input + * 2. Upload images via CDP DOM.setFileInputFiles (with base64 fallback) * 3. Fill title and body text * 4. Add topic hashtags * 5. Publish (or save as draft) @@ -43,44 +43,98 @@ const TITLE_SELECTORS = [ 'input[maxlength]', ]; -type ImagePayload = { name: string; mimeType: string; base64: string }; +const SUPPORTED_EXTENSIONS: Record = { + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.png': 'image/png', + '.gif': 'image/gif', + '.webp': 'image/webp', +}; /** - * Read a local image and return the name, MIME type, and base64 content. - * Throws if the file does not exist or the extension is unsupported. + * Validate image paths: check existence and extension. + * Returns resolved absolute paths. */ -function readImageFile(filePath: string): ImagePayload { - const absPath = path.resolve(filePath); - if (!fs.existsSync(absPath)) throw new Error(`Image file not found: ${absPath}`); - const ext = path.extname(absPath).toLowerCase(); - const mimeMap: Record = { - '.jpg': 'image/jpeg', - '.jpeg': 'image/jpeg', - '.png': 'image/png', - '.gif': 'image/gif', - '.webp': 'image/webp', - }; - const mimeType = mimeMap[ext]; - if (!mimeType) throw new Error(`Unsupported image format "${ext}". Supported: jpg, png, gif, webp`); - const base64 = fs.readFileSync(absPath).toString('base64'); - return { name: path.basename(absPath), mimeType, base64 }; +function validateImagePaths(filePaths: string[]): string[] { + return filePaths.map((filePath) => { + const absPath = path.resolve(filePath); + if (!fs.existsSync(absPath)) throw new Error(`Image file not found: ${absPath}`); + const ext = path.extname(absPath).toLowerCase(); + if (!SUPPORTED_EXTENSIONS[ext]) { + throw new Error(`Unsupported image format "${ext}". Supported: jpg, png, gif, webp`); + } + return absPath; + }); } +/** CSS selector for image-accepting file inputs. */ +const IMAGE_INPUT_SELECTOR = 'input[type="file"][accept*="image"],' + + 'input[type="file"][accept*=".jpg"],' + + 'input[type="file"][accept*=".jpeg"],' + + 'input[type="file"][accept*=".png"],' + + 'input[type="file"][accept*=".gif"],' + + 'input[type="file"][accept*=".webp"]'; + /** - * Inject images into the page's file input using DataTransfer. - * Converts base64 payloads to File objects in the browser context, then dispatches - * a synthetic 'change' event on the input element. + * Upload images via CDP DOM.setFileInputFiles — Chrome reads files directly + * from the local filesystem, avoiding base64 payload size limits. * - * Returns { ok, count, error }. + * Falls back to the legacy base64 DataTransfer approach if the extension + * does not support set-file-input (e.g. older extension version). */ -async function injectImages(page: IPage, images: ImagePayload[]): Promise<{ ok: boolean; count: number; error?: string }> { +async function uploadImages( + page: IPage, + absPaths: string[], +): Promise<{ ok: boolean; count: number; error?: string }> { + // ── Primary: CDP DOM.setFileInputFiles ────────────────────────────── + if (page.setFileInput) { + try { + // Find image-accepting file input on the page + const selector: string | null = await page.evaluate(` + (() => { + const sels = ${JSON.stringify(IMAGE_INPUT_SELECTOR)}; + const el = document.querySelector(sels); + return el ? sels : null; + })() + `); + if (!selector) { + return { ok: false, count: 0, error: 'No file input found on page' }; + } + await page.setFileInput(absPaths, selector); + return { ok: true, count: absPaths.length }; + } catch (err) { + // If set-file-input action is not supported by extension, fall through to legacy + const msg = err instanceof Error ? err.message : String(err); + if (msg.includes('Unknown action') || msg.includes('not supported')) { + // Extension too old — fall through to legacy base64 method + } else { + return { ok: false, count: 0, error: msg }; + } + } + } + + // ── Fallback: legacy base64 DataTransfer injection ───────────────── + const images = absPaths.map((absPath) => { + const base64 = fs.readFileSync(absPath).toString('base64'); + const ext = path.extname(absPath).toLowerCase(); + return { name: path.basename(absPath), mimeType: SUPPORTED_EXTENSIONS[ext], base64 }; + }); + + // Warn if total payload is large — this may fail with older extensions + const totalBytes = images.reduce((sum, img) => sum + img.base64.length, 0); + if (totalBytes > 500_000) { + console.warn( + `[warn] Total image payload is ${(totalBytes / 1024 / 1024).toFixed(1)}MB (base64). ` + + 'This may fail with the browser bridge. Update the extension to v1.6+ for CDP-based upload, ' + + 'or compress images before publishing.' + ); + } + const payload = JSON.stringify(images); return page.evaluate(` (async () => { const images = ${payload}; - // Only use image-capable file inputs. Do not fall back to a generic uploader, - // otherwise we can accidentally feed images into the video upload flow. const inputs = Array.from(document.querySelectorAll('input[type="file"]')); const input = inputs.find(el => { const accept = el.getAttribute('accept') || ''; @@ -346,8 +400,8 @@ cli({ if (imagePaths.length > MAX_IMAGES) throw new Error(`Too many images: ${imagePaths.length} (max ${MAX_IMAGES})`); - // Read images in Node.js context before navigating (fast-fail on bad paths) - const imageData: ImagePayload[] = imagePaths.map(readImageFile); + // Validate image paths before navigating (fast-fail on bad paths / unsupported formats) + const absImagePaths = validateImagePaths(imagePaths); // ── Step 1: Navigate to publish page ────────────────────────────────────── await page.goto(PUBLISH_URL); @@ -377,7 +431,7 @@ cli({ } // ── Step 3: Upload images ────────────────────────────────────────────────── - const upload = await injectImages(page, imageData); + const upload = await uploadImages(page, absImagePaths); if (!upload.ok) { await page.screenshot({ path: '/tmp/xhs_publish_upload_debug.png' }); throw new Error( @@ -532,7 +586,7 @@ cli({ status: isSuccess ? `✅ ${verb}` : '⚠️ 操作完成,请在浏览器中确认', detail: [ `"${title}"`, - `${imageData.length}张图片`, + `${absImagePaths.length}张图片`, topics.length ? `话题: ${topics.join(' ')}` : '', successMsg || finalUrl || '', ] diff --git a/src/types.ts b/src/types.ts index 43ceb983..f1647e6e 100644 --- a/src/types.ts +++ b/src/types.ts @@ -67,6 +67,11 @@ export interface IPage { getInterceptedRequests(): Promise; waitForCapture(timeout?: number): Promise; screenshot(options?: ScreenshotOptions): Promise; + /** + * Set local file paths on a file input element via CDP DOM.setFileInputFiles. + * Chrome reads the files directly — no base64 encoding or payload size limits. + */ + setFileInput?(files: string[], selector?: string): Promise; closeWindow?(): Promise; /** Returns the current page URL, or null if unavailable. */ getCurrentUrl?(): Promise;