jackwener · jackwener · May 31, 2026 · May 31, 2026 · May 31, 2026 · May 31, 2026
diff --git a/clis/chatgpt/utils.js b/clis/chatgpt/utils.js
@@ -609,7 +609,18 @@ async function waitForChatGPTUploadPreview(page, fileNames) {
                 const scope = root || document.body;
                 if (!scope) return false;
 
-                const previewNodes = scope.querySelectorAll('img[src], canvas, video, [style*="background-image"], [data-testid*="attachment"], [data-testid*="upload"], [class*="attachment"], [class*="upload"]');
+                const isVisibleMedia = (node) => {
+                    if (!(node instanceof HTMLElement)) return false;
+                    const style = window.getComputedStyle(node);
+                    if (style.display === 'none' || style.visibility === 'hidden') return false;
+                    const rect = node.getBoundingClientRect();
+                    const width = node.naturalWidth || node.videoWidth || rect.width || 0;
+                    const height = node.naturalHeight || node.videoHeight || rect.height || 0;
+                    if (width > 32 && height > 32) return true;
+                    const backgroundImage = style.backgroundImage || '';
+                    return /url\\(/.test(backgroundImage) && rect.width > 32 && rect.height > 32;
+                };
+                const previewNodes = Array.from(scope.querySelectorAll('img[src], canvas, video, [style*="background-image"]')).filter(isVisibleMedia);
                 return previewNodes.length >= names.length;
             })()
         `)), 'chatgpt upload preview detection');
@@ -746,6 +757,17 @@ export async function getChatGPTVisibleImageUrls(page) {
                 const text = [alt, cls, testId, label, src.toLowerCase()].join(' ');
                 return /avatar|profile|logo|icon/.test(text);
             };
+            const isUserUploadPreview = (img) => {
+                const alt = (img.getAttribute('alt') || '').toLowerCase();
+                const turn = img.closest('section[data-testid^="conversation-turn"]');
+                const heading = (turn?.querySelector('h4')?.innerText || '').toLowerCase();
+                if (/you said|你说/.test(heading)) return true;
+                if (/chatgpt|assistant|助手/.test(heading)) return false;
+                const openButtonLabel = (img.closest('button[aria-label^="Open image:"]')?.getAttribute('aria-label') || '').toLowerCase();
+                const previewText = [alt, openButtonLabel].join(' ');
+                return /\.(png|jpe?g|webp|gif|heic|heif)(?:\b|$)/i.test(previewText)
+                    || /ref-|reference|参考|upload|uploaded|attachment/.test(previewText);
+            };
 
             const imgs = Array.from(document.querySelectorAll('img')).filter(img =>
                 img instanceof HTMLImageElement && isVisible(img)
@@ -758,6 +780,7 @@ export async function getChatGPTVisibleImageUrls(page) {
 
                 if (!src) continue;
                 if (isDecorative(img, src)) continue;
+                if (isUserUploadPreview(img)) continue;
                 if (width < 128 && height < 128) continue;
                 addUrl(src);
             }
@@ -777,16 +800,41 @@ export async function getChatGPTVisibleImageUrls(page) {
                 }
             }
 
-            // Some image experiences render to a canvas. Returning the data URL
-            // lets the downstream asset exporter save it without needing a DOM
-            // selector to rediscover the canvas.
+            // Some ChatGPT image surfaces mount large transparent canvases as
+            // placeholders/overlays before the real backend image is ready. If
+            // those data URLs are accepted as generated assets, the adapter can
+            // save a blank transparent PNG while reporting success. Prefer real
+            // <img>/background URLs; only keep a canvas if it contains at least
+            // one non-transparent/non-white sampled pixel.
             for (const canvas of Array.from(document.querySelectorAll('canvas'))) {
                 if (!(canvas instanceof HTMLCanvasElement) || !isVisible(canvas) || isDecorative(canvas)) continue;
                 const width = canvas.width || canvas.getBoundingClientRect().width || 0;
                 const height = canvas.height || canvas.getBoundingClientRect().height || 0;
                 if (width < 128 && height < 128) continue;
                 try {
-                    addUrl(canvas.toDataURL('image/png'));
+                    const ctx = canvas.getContext('2d', { willReadFrequently: true });
+                    if (!ctx) continue;
+                    const sourceWidth = Math.max(1, Math.floor(canvas.width || width));
+                    const sourceHeight = Math.max(1, Math.floor(canvas.height || height));
+                    const xCount = Math.min(sourceWidth, 16);
+                    const yCount = Math.min(sourceHeight, 16);
+                    let hasContent = false;
+                    for (let yi = 0; yi < yCount && !hasContent; yi += 1) {
+                        const y = Math.min(sourceHeight - 1, Math.floor((yi + 0.5) * sourceHeight / yCount));
+                        for (let xi = 0; xi < xCount && !hasContent; xi += 1) {
+                            const x = Math.min(sourceWidth - 1, Math.floor((xi + 0.5) * sourceWidth / xCount));
+                            const pixel = ctx.getImageData(x, y, 1, 1).data;
+                            const r = pixel[0];
+                            const g = pixel[1];
+                            const b = pixel[2];
+                            const a = pixel[3];
+                            if (a > 0 && !(r > 248 && g > 248 && b > 248)) {
+                                hasContent = true;
+                                break;
+                            }
+                        }
+                    }
+                    if (hasContent) addUrl(canvas.toDataURL('image/png'));
                 } catch { }
             }
             return urls;

diff --git a/clis/chatgpt/utils.test.js b/clis/chatgpt/utils.test.js
@@ -193,10 +193,13 @@ describe('chatgpt generated image detection', () => {
         ]);
     });
 
-    it('detects visible generated canvases as data URLs', async () => {
+    it('detects visible generated canvases as data URLs when they contain pixels', async () => {
         const page = createDomPage('<!doctype html><canvas width="512" height="512"></canvas>', (window) => {
             const canvas = window.document.querySelector('canvas');
             canvas.getBoundingClientRect = () => ({ width: 512, height: 512 });
+            canvas.getContext = () => ({
+                getImageData: () => ({ data: new Uint8ClampedArray([255, 0, 0, 255]) }),
+            });
             canvas.toDataURL = () => 'data:image/png;base64,ZmFrZQ==';
         });
 
@@ -205,6 +208,103 @@ describe('chatgpt generated image detection', () => {
         ]);
     });
 
+    it('samples generated canvas content outside the top-left corner', async () => {
+        const page = createDomPage('<!doctype html><canvas width="512" height="512"></canvas>', (window) => {
+            const canvas = window.document.querySelector('canvas');
+            canvas.getBoundingClientRect = () => ({ width: 512, height: 512 });
+            canvas.getContext = () => ({
+                getImageData: (x, y) => ({
+                    data: x > 480 && y > 480
+                        ? new Uint8ClampedArray([255, 0, 0, 255])
+                        : new Uint8ClampedArray([0, 0, 0, 0]),
+                }),
+            });
+            canvas.toDataURL = () => 'data:image/png;base64,lower-right';
+        });
+
+        await expect(getChatGPTVisibleImageUrls(page)).resolves.toEqual([
+            'data:image/png;base64,lower-right',
+        ]);
+    });
+
+    it('samples generated canvas content near the center', async () => {
+        const page = createDomPage('<!doctype html><canvas width="512" height="512"></canvas>', (window) => {
+            const canvas = window.document.querySelector('canvas');
+            canvas.getBoundingClientRect = () => ({ width: 512, height: 512 });
+            canvas.getContext = () => ({
+                getImageData: (x, y) => {
+                    const inCenter = x >= 240 && x <= 272 && y >= 240 && y <= 272;
+                    return { data: new Uint8ClampedArray(inCenter ? [0, 80, 200, 255] : [255, 255, 255, 255]) };
+                },
+            });
+            canvas.toDataURL = () => 'data:image/png;base64,center';
+        });
+
+        await expect(getChatGPTVisibleImageUrls(page)).resolves.toEqual([
+            'data:image/png;base64,center',
+        ]);
+    });
+
+    it('ignores transparent placeholder canvases', async () => {
+        const page = createDomPage('<!doctype html><canvas width="512" height="512"></canvas>', (window) => {
+            const canvas = window.document.querySelector('canvas');
+            canvas.getBoundingClientRect = () => ({ width: 512, height: 512 });
+            canvas.getContext = () => ({
+                getImageData: () => ({ data: new Uint8ClampedArray([0, 0, 0, 0]) }),
+            });
+            canvas.toDataURL = () => 'data:image/png;base64,blank';
+        });
+
+        await expect(getChatGPTVisibleImageUrls(page)).resolves.toEqual([]);
+    });
+
+    it('ignores user-uploaded reference image previews', async () => {
+        const page = createDomPage(`
+            <!doctype html>
+            <section data-testid="conversation-turn-1">
+              <h4>You said:</h4>
+              <button aria-label="Open image: reference.png">
+                <img alt="reference.png" src="https://chatgpt.com/backend-api/uploaded/reference.png">
+              </button>
+            </section>
+            <section data-testid="conversation-turn-2">
+              <h4>ChatGPT said:</h4>
+              <img alt="generated image" src="https://chatgpt.com/backend-api/generated/foo.webp">
+            </section>
+        `, (window) => {
+            for (const img of window.document.querySelectorAll('img')) {
+                Object.defineProperty(img, 'naturalWidth', { configurable: true, value: 512 });
+                Object.defineProperty(img, 'naturalHeight', { configurable: true, value: 512 });
+                img.getBoundingClientRect = () => ({ width: 512, height: 512 });
+            }
+        });
+
+        await expect(getChatGPTVisibleImageUrls(page)).resolves.toEqual([
+            'https://chatgpt.com/backend-api/generated/foo.webp',
+        ]);
+    });
+
+    it('keeps assistant generated images even when they are inside an open-image button', async () => {
+        const page = createDomPage(`
+            <!doctype html>
+            <section data-testid="conversation-turn-2">
+              <h4>ChatGPT said:</h4>
+              <button aria-label="Open image: generated image">
+                <img alt="generated image" src="https://chatgpt.com/backend-api/generated/foo.webp">
+              </button>
+            </section>
+        `, (window) => {
+            const img = window.document.querySelector('img');
+            Object.defineProperty(img, 'naturalWidth', { configurable: true, value: 512 });
+            Object.defineProperty(img, 'naturalHeight', { configurable: true, value: 512 });
+            img.getBoundingClientRect = () => ({ width: 512, height: 512 });
+        });
+
+        await expect(getChatGPTVisibleImageUrls(page)).resolves.toEqual([
+            'https://chatgpt.com/backend-api/generated/foo.webp',
+        ]);
+    });
+
     it('exports assets for generated CSS background images', async () => {
         const imageUrl = 'https://chatgpt.com/backend-api/generated/foo.webp';
         const page = createDomPage(`
@@ -322,6 +422,59 @@ describe('chatgpt image upload helper', () => {
         expect(fallbackScript).toContain('stopPropagation()');
     });
 
+    it('does not treat generic upload controls as uploaded image previews', async () => {
+        const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'opencli-chatgpt-'));
+        tempDirs.push(dir);
+        const filePath = path.join(dir, 'cat.png');
+        fs.writeFileSync(filePath, 'fake-png');
+
+        const dom = new JSDOM(`
+            <!doctype html>
+            <main>
+              <div aria-label="Chat with ChatGPT">
+                <button class="upload-button" data-testid="upload-button">Attach</button>
+              </div>
+            </main>
+        `, { url: 'https://chatgpt.com/new', runScripts: 'outside-only' });
+        const page = {
+            setFileInput: vi.fn().mockResolvedValue(undefined),
+            wait: vi.fn().mockResolvedValue(undefined),
+            evaluate: vi.fn((script) => Promise.resolve(dom.window.eval(String(script)))),
+        };
+
+        const result = await uploadChatGPTImages(page, [filePath]);
+
+        expect(result.ok).toBe(false);
+        expect(result.reason).toContain('image upload preview did not appear');
+    });
+
+    it('accepts a real uploaded media preview even when the filename text is absent', async () => {
+        const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'opencli-chatgpt-'));
+        tempDirs.push(dir);
+        const filePath = path.join(dir, 'cat.png');
+        fs.writeFileSync(filePath, 'fake-png');
+
+        const dom = new JSDOM(`
+            <!doctype html>
+            <main>
+              <div aria-label="Chat with ChatGPT">
+                <img src="blob:https://chatgpt.com/upload-preview">
+              </div>
+            </main>
+        `, { url: 'https://chatgpt.com/new', runScripts: 'outside-only' });
+        const img = dom.window.document.querySelector('img');
+        Object.defineProperty(img, 'naturalWidth', { configurable: true, value: 512 });
+        Object.defineProperty(img, 'naturalHeight', { configurable: true, value: 512 });
+        img.getBoundingClientRect = () => ({ width: 512, height: 512 });
+        const page = {
+            setFileInput: vi.fn().mockResolvedValue(undefined),
+            wait: vi.fn().mockResolvedValue(undefined),
+            evaluate: vi.fn((script) => Promise.resolve(dom.window.eval(String(script)))),
+        };
+
+        await expect(uploadChatGPTImages(page, [filePath])).resolves.toEqual({ ok: true, files: [filePath] });
+    });
+
     it('exposes image MIME inference for fallback upload', () => {
         expect(__test__.imageMimeFromPath('/tmp/a.png')).toBe('image/png');
         expect(__test__.imageMimeFromPath('/tmp/a.webp')).toBe('image/webp');