Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 53 additions & 5 deletions clis/chatgpt/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -609,7 +609,18 @@ async function waitForChatGPTUploadPreview(page, fileNames) {
const scope = root || document.body;
if (!scope) return false;

const previewNodes = scope.querySelectorAll('img[src], canvas, video, [style*="background-image"], [data-testid*="attachment"], [data-testid*="upload"], [class*="attachment"], [class*="upload"]');
const isVisibleMedia = (node) => {
if (!(node instanceof HTMLElement)) return false;
const style = window.getComputedStyle(node);
if (style.display === 'none' || style.visibility === 'hidden') return false;
const rect = node.getBoundingClientRect();
const width = node.naturalWidth || node.videoWidth || rect.width || 0;
const height = node.naturalHeight || node.videoHeight || rect.height || 0;
if (width > 32 && height > 32) return true;
const backgroundImage = style.backgroundImage || '';
return /url\\(/.test(backgroundImage) && rect.width > 32 && rect.height > 32;
};
const previewNodes = Array.from(scope.querySelectorAll('img[src], canvas, video, [style*="background-image"]')).filter(isVisibleMedia);
return previewNodes.length >= names.length;
})()
`)), 'chatgpt upload preview detection');
Expand Down Expand Up @@ -746,6 +757,17 @@ export async function getChatGPTVisibleImageUrls(page) {
const text = [alt, cls, testId, label, src.toLowerCase()].join(' ');
return /avatar|profile|logo|icon/.test(text);
};
const isUserUploadPreview = (img) => {
const alt = (img.getAttribute('alt') || '').toLowerCase();
const turn = img.closest('section[data-testid^="conversation-turn"]');
const heading = (turn?.querySelector('h4')?.innerText || '').toLowerCase();
if (/you said|你说/.test(heading)) return true;
if (/chatgpt|assistant|助手/.test(heading)) return false;
const openButtonLabel = (img.closest('button[aria-label^="Open image:"]')?.getAttribute('aria-label') || '').toLowerCase();
const previewText = [alt, openButtonLabel].join(' ');
return /\.(png|jpe?g|webp|gif|heic|heif)(?:\b|$)/i.test(previewText)
|| /ref-|reference|参考|upload|uploaded|attachment/.test(previewText);
};

const imgs = Array.from(document.querySelectorAll('img')).filter(img =>
img instanceof HTMLImageElement && isVisible(img)
Expand All @@ -758,6 +780,7 @@ export async function getChatGPTVisibleImageUrls(page) {

if (!src) continue;
if (isDecorative(img, src)) continue;
if (isUserUploadPreview(img)) continue;
if (width < 128 && height < 128) continue;
addUrl(src);
}
Expand All @@ -777,16 +800,41 @@ export async function getChatGPTVisibleImageUrls(page) {
}
}

// Some image experiences render to a canvas. Returning the data URL
// lets the downstream asset exporter save it without needing a DOM
// selector to rediscover the canvas.
// Some ChatGPT image surfaces mount large transparent canvases as
// placeholders/overlays before the real backend image is ready. If
// those data URLs are accepted as generated assets, the adapter can
// save a blank transparent PNG while reporting success. Prefer real
// <img>/background URLs; only keep a canvas if it contains at least
// one non-transparent/non-white sampled pixel.
for (const canvas of Array.from(document.querySelectorAll('canvas'))) {
if (!(canvas instanceof HTMLCanvasElement) || !isVisible(canvas) || isDecorative(canvas)) continue;
const width = canvas.width || canvas.getBoundingClientRect().width || 0;
const height = canvas.height || canvas.getBoundingClientRect().height || 0;
if (width < 128 && height < 128) continue;
try {
addUrl(canvas.toDataURL('image/png'));
const ctx = canvas.getContext('2d', { willReadFrequently: true });
if (!ctx) continue;
const sourceWidth = Math.max(1, Math.floor(canvas.width || width));
const sourceHeight = Math.max(1, Math.floor(canvas.height || height));
const xCount = Math.min(sourceWidth, 16);
const yCount = Math.min(sourceHeight, 16);
let hasContent = false;
for (let yi = 0; yi < yCount && !hasContent; yi += 1) {
const y = Math.min(sourceHeight - 1, Math.floor((yi + 0.5) * sourceHeight / yCount));
for (let xi = 0; xi < xCount && !hasContent; xi += 1) {
const x = Math.min(sourceWidth - 1, Math.floor((xi + 0.5) * sourceWidth / xCount));
const pixel = ctx.getImageData(x, y, 1, 1).data;
const r = pixel[0];
const g = pixel[1];
const b = pixel[2];
const a = pixel[3];
if (a > 0 && !(r > 248 && g > 248 && b > 248)) {
hasContent = true;
break;
}
}
}
if (hasContent) addUrl(canvas.toDataURL('image/png'));
} catch { }
}
return urls;
Expand Down
155 changes: 154 additions & 1 deletion clis/chatgpt/utils.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,13 @@ describe('chatgpt generated image detection', () => {
]);
});

it('detects visible generated canvases as data URLs', async () => {
it('detects visible generated canvases as data URLs when they contain pixels', async () => {
const page = createDomPage('<!doctype html><canvas width="512" height="512"></canvas>', (window) => {
const canvas = window.document.querySelector('canvas');
canvas.getBoundingClientRect = () => ({ width: 512, height: 512 });
canvas.getContext = () => ({
getImageData: () => ({ data: new Uint8ClampedArray([255, 0, 0, 255]) }),
});
canvas.toDataURL = () => 'data:image/png;base64,ZmFrZQ==';
});

Expand All @@ -205,6 +208,103 @@ describe('chatgpt generated image detection', () => {
]);
});

it('samples generated canvas content outside the top-left corner', async () => {
const page = createDomPage('<!doctype html><canvas width="512" height="512"></canvas>', (window) => {
const canvas = window.document.querySelector('canvas');
canvas.getBoundingClientRect = () => ({ width: 512, height: 512 });
canvas.getContext = () => ({
getImageData: (x, y) => ({
data: x > 480 && y > 480
? new Uint8ClampedArray([255, 0, 0, 255])
: new Uint8ClampedArray([0, 0, 0, 0]),
}),
});
canvas.toDataURL = () => 'data:image/png;base64,lower-right';
});

await expect(getChatGPTVisibleImageUrls(page)).resolves.toEqual([
'data:image/png;base64,lower-right',
]);
});

it('samples generated canvas content near the center', async () => {
const page = createDomPage('<!doctype html><canvas width="512" height="512"></canvas>', (window) => {
const canvas = window.document.querySelector('canvas');
canvas.getBoundingClientRect = () => ({ width: 512, height: 512 });
canvas.getContext = () => ({
getImageData: (x, y) => {
const inCenter = x >= 240 && x <= 272 && y >= 240 && y <= 272;
return { data: new Uint8ClampedArray(inCenter ? [0, 80, 200, 255] : [255, 255, 255, 255]) };
},
});
canvas.toDataURL = () => 'data:image/png;base64,center';
});

await expect(getChatGPTVisibleImageUrls(page)).resolves.toEqual([
'data:image/png;base64,center',
]);
});

it('ignores transparent placeholder canvases', async () => {
const page = createDomPage('<!doctype html><canvas width="512" height="512"></canvas>', (window) => {
const canvas = window.document.querySelector('canvas');
canvas.getBoundingClientRect = () => ({ width: 512, height: 512 });
canvas.getContext = () => ({
getImageData: () => ({ data: new Uint8ClampedArray([0, 0, 0, 0]) }),
});
canvas.toDataURL = () => 'data:image/png;base64,blank';
});

await expect(getChatGPTVisibleImageUrls(page)).resolves.toEqual([]);
});

it('ignores user-uploaded reference image previews', async () => {
const page = createDomPage(`
<!doctype html>
<section data-testid="conversation-turn-1">
<h4>You said:</h4>
<button aria-label="Open image: reference.png">
<img alt="reference.png" src="https://chatgpt.com/backend-api/uploaded/reference.png">
</button>
</section>
<section data-testid="conversation-turn-2">
<h4>ChatGPT said:</h4>
<img alt="generated image" src="https://chatgpt.com/backend-api/generated/foo.webp">
</section>
`, (window) => {
for (const img of window.document.querySelectorAll('img')) {
Object.defineProperty(img, 'naturalWidth', { configurable: true, value: 512 });
Object.defineProperty(img, 'naturalHeight', { configurable: true, value: 512 });
img.getBoundingClientRect = () => ({ width: 512, height: 512 });
}
});

await expect(getChatGPTVisibleImageUrls(page)).resolves.toEqual([
'https://chatgpt.com/backend-api/generated/foo.webp',
]);
});

it('keeps assistant generated images even when they are inside an open-image button', async () => {
const page = createDomPage(`
<!doctype html>
<section data-testid="conversation-turn-2">
<h4>ChatGPT said:</h4>
<button aria-label="Open image: generated image">
<img alt="generated image" src="https://chatgpt.com/backend-api/generated/foo.webp">
</button>
</section>
`, (window) => {
const img = window.document.querySelector('img');
Object.defineProperty(img, 'naturalWidth', { configurable: true, value: 512 });
Object.defineProperty(img, 'naturalHeight', { configurable: true, value: 512 });
img.getBoundingClientRect = () => ({ width: 512, height: 512 });
});

await expect(getChatGPTVisibleImageUrls(page)).resolves.toEqual([
'https://chatgpt.com/backend-api/generated/foo.webp',
]);
});

it('exports assets for generated CSS background images', async () => {
const imageUrl = 'https://chatgpt.com/backend-api/generated/foo.webp';
const page = createDomPage(`
Expand Down Expand Up @@ -322,6 +422,59 @@ describe('chatgpt image upload helper', () => {
expect(fallbackScript).toContain('stopPropagation()');
});

it('does not treat generic upload controls as uploaded image previews', async () => {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'opencli-chatgpt-'));
tempDirs.push(dir);
const filePath = path.join(dir, 'cat.png');
fs.writeFileSync(filePath, 'fake-png');

const dom = new JSDOM(`
<!doctype html>
<main>
<div aria-label="Chat with ChatGPT">
<button class="upload-button" data-testid="upload-button">Attach</button>
</div>
</main>
`, { url: 'https://chatgpt.com/new', runScripts: 'outside-only' });
const page = {
setFileInput: vi.fn().mockResolvedValue(undefined),
wait: vi.fn().mockResolvedValue(undefined),
evaluate: vi.fn((script) => Promise.resolve(dom.window.eval(String(script)))),
};

const result = await uploadChatGPTImages(page, [filePath]);

expect(result.ok).toBe(false);
expect(result.reason).toContain('image upload preview did not appear');
});

it('accepts a real uploaded media preview even when the filename text is absent', async () => {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'opencli-chatgpt-'));
tempDirs.push(dir);
const filePath = path.join(dir, 'cat.png');
fs.writeFileSync(filePath, 'fake-png');

const dom = new JSDOM(`
<!doctype html>
<main>
<div aria-label="Chat with ChatGPT">
<img src="blob:https://chatgpt.com/upload-preview">
</div>
</main>
`, { url: 'https://chatgpt.com/new', runScripts: 'outside-only' });
const img = dom.window.document.querySelector('img');
Object.defineProperty(img, 'naturalWidth', { configurable: true, value: 512 });
Object.defineProperty(img, 'naturalHeight', { configurable: true, value: 512 });
img.getBoundingClientRect = () => ({ width: 512, height: 512 });
const page = {
setFileInput: vi.fn().mockResolvedValue(undefined),
wait: vi.fn().mockResolvedValue(undefined),
evaluate: vi.fn((script) => Promise.resolve(dom.window.eval(String(script)))),
};

await expect(uploadChatGPTImages(page, [filePath])).resolves.toEqual({ ok: true, files: [filePath] });
});

it('exposes image MIME inference for fallback upload', () => {
expect(__test__.imageMimeFromPath('/tmp/a.png')).toBe('image/png');
expect(__test__.imageMimeFromPath('/tmp/a.webp')).toBe('image/webp');
Expand Down