From 6ddabaea1decc03a1b442dc9f9c3dcfa152233ee Mon Sep 17 00:00:00 2001 From: codedogQBY <1369175442@qq.com> Date: Sun, 31 May 2026 01:18:54 +0800 Subject: [PATCH] fix(tts): skip footnote markers during narration --- packages/app-expo/assets/reader/reader.html | 110 +++++++++++++----- .../assets/reader/reader.template.html | 110 +++++++++++++----- .../src/components/reader/FoliateViewer.tsx | 77 ++++-------- packages/core/src/tts/index.ts | 8 +- packages/core/src/tts/text-utils.test.ts | 23 ++++ packages/core/src/tts/text-utils.ts | 48 +++++++- 6 files changed, 267 insertions(+), 109 deletions(-) create mode 100644 packages/core/src/tts/text-utils.test.ts diff --git a/packages/app-expo/assets/reader/reader.html b/packages/app-expo/assets/reader/reader.html index b752caad..12a5af2e 100644 --- a/packages/app-expo/assets/reader/reader.html +++ b/packages/app-expo/assets/reader/reader.html @@ -2231,11 +2231,8 @@ var walker = doc.createTreeWalker(doc.body, NodeFilter.SHOW_TEXT, { acceptNode: function(node) { if (!node.nodeValue || !node.nodeValue.trim()) return NodeFilter.FILTER_SKIP; - var parent = node.parentElement; - var tag = parent && parent.tagName && parent.tagName.toLowerCase(); - if (tag === 'script' || tag === 'style') return NodeFilter.FILTER_REJECT; - if (tag === 'rt' || tag === 'rp') return NodeFilter.FILTER_REJECT; - if (parent && parent.closest && parent.closest('.readany-translation')) return NodeFilter.FILTER_REJECT; + if (isTTSFootnoteMarker(node.nodeValue)) return NodeFilter.FILTER_REJECT; + if (shouldSkipTTSNode(node.parentElement)) return NodeFilter.FILTER_REJECT; return NodeFilter.FILTER_ACCEPT; } }); @@ -2263,12 +2260,12 @@ } if (isVisible) { - var t = textNode.nodeValue.trim(); + var t = normalizeTTSText(textNode.nodeValue); if (t) { texts.push(t); totalLen += t.length; } } textNode = walker.nextNode(); } - var result = texts.join(' ').trim(); + var result = normalizeTTSText(texts.join(' ')); return result.length > maxLen ? result.slice(0, maxLen) : result; } catch (e) { return ''; } } @@ -2300,11 +2297,8 @@ var walker = doc.createTreeWalker(doc.body, NodeFilter.SHOW_TEXT, { acceptNode: function(node) { if (!node.nodeValue || !node.nodeValue.trim()) return NodeFilter.FILTER_SKIP; - var parent = node.parentElement; - var tag = parent && parent.tagName && parent.tagName.toLowerCase(); - if (tag === 'script' || tag === 'style') return NodeFilter.FILTER_REJECT; - if (tag === 'rt' || tag === 'rp') return NodeFilter.FILTER_REJECT; - if (parent && parent.closest && parent.closest('.readany-translation')) return NodeFilter.FILTER_REJECT; + if (isTTSFootnoteMarker(node.nodeValue)) return NodeFilter.FILTER_REJECT; + if (shouldSkipTTSNode(node.parentElement)) return NodeFilter.FILTER_REJECT; return NodeFilter.FILTER_ACCEPT; } }); @@ -2343,13 +2337,13 @@ if (isVisible) { visibleCount++; - var t = textNode.nodeValue.trim(); + var t = normalizeTTSText(textNode.nodeValue); if (t) { texts.push(t); } } textNode = walker.nextNode(); } - var result = texts.join(' ').trim(); + var result = normalizeTTSText(texts.join(' ')); return { text: result, debug: { @@ -2470,7 +2464,7 @@ if (!win) return []; return Array.from(blocks).filter((block) => { if (!block || !block.textContent || !block.textContent.trim()) return false; - if (block.closest && block.closest('.readany-translation')) return false; + if (shouldSkipTTSNode(block)) return false; try { const range = doc.createRange(); range.selectNodeContents(block); @@ -2485,7 +2479,7 @@ const blockSelector = "p, h1, h2, h3, h4, h5, h6, li, blockquote, dd, dt, figcaption, pre, td, th"; return Array.from(doc.querySelectorAll(blockSelector)).filter((block) => { if (!block || !block.textContent || !block.textContent.trim()) return false; - if (block.closest && block.closest('.readany-translation')) return false; + if (shouldSkipTTSNode(block)) return false; return true; }); } @@ -2506,12 +2500,53 @@ } function fallbackSentenceSegments(text) { - const normalized = (text || '').replace(/\s+/g, ' ').trim(); + const normalized = normalizeTTSText(text); if (!normalized) return []; const matches = normalized.match(/[^。!?!?;;\n]+[。!?!?;;…]?/gu) || []; return (matches.length ? matches : [normalized]).map((segment) => segment.trim()).filter(Boolean); } + const TTS_FOOTNOTE_MARKER_PATTERN = /(?:\s*(?:\[(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})\]|[(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})]|【(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})】|〔(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})〕|[?((?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10}))]?|\((?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})\)))+/gu; + const TTS_FOOTNOTE_MARKER_ONLY_PATTERN = /^(?:\s*(?:\[(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})\]|[(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})]|【(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})】|〔(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})〕|[?((?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10}))]?|\((?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})\)))+\s*$/u; + const TTS_SKIPPED_ELEMENT_SELECTOR = [ + 'script', + 'style', + 'rt', + 'rp', + 'sup', + '.readany-translation', + '[role="doc-noteref"]', + '[role="doc-footnote"]', + '[epub\\:type~="noteref"]', + '[epub\\:type~="footnote"]', + '[type~="noteref"]', + '[type~="footnote"]', + 'a[href^="#fn"]', + 'a[href^="#footnote"]', + 'a[href*="footnote"]', + 'a[href*="note"]', + 'a.noteref', + 'a.footnote', + '.noteref', + '.footnote', + '.footnote-ref', + '.endnote', + '.duokan-footnote', + '.calibre-footnote', + ].join(','); + + function normalizeTTSText(text) { + return String(text || '').replace(TTS_FOOTNOTE_MARKER_PATTERN, '').replace(/\s+/g, ' ').trim(); + } + + function isTTSFootnoteMarker(text) { + return TTS_FOOTNOTE_MARKER_ONLY_PATTERN.test(String(text || '')); + } + + function shouldSkipTTSNode(element) { + return !!(element && element.closest && element.closest(TTS_SKIPPED_ELEMENT_SELECTOR)); + } + function stripCfiAssertions(value) { return String(value || '').replace(/\[[^\]]*\]/g, ''); } @@ -2538,12 +2573,12 @@ } function getTTSSegmentIdentity(cfi, text) { - return `${getTTSCfiStartIdentity(cfi)}::${String(text || '').replace(/\s+/g, ' ').trim()}`; + return `${getTTSCfiStartIdentity(cfi)}::${normalizeTTSText(text)}`; } function summarizeTTSSegmentsForLog(segments, limit) { return (segments || []).slice(0, limit || 8).map(function(segment, index) { - var text = String(segment && segment.text || '').replace(/\s+/g, ' ').trim(); + var text = normalizeTTSText(segment && segment.text); return { index: index, cfi: segment && segment.cfi || null, @@ -2604,7 +2639,7 @@ const seen = new Set(); for (const detail of [current, ...(following || [])]) { if (!detail || !detail.text || !detail.cfi) continue; - const text = String(detail.text).replace(/\s+/g, ' ').trim(); + const text = normalizeTTSText(detail.text); const identity = getTTSSegmentIdentity(detail.cfi, text); if (!text || seen.has(identity)) continue; seen.add(identity); @@ -2640,7 +2675,7 @@ : null; const currentText = current && current.text - ? String(current.text).replace(/\s+/g, ' ').trim() + ? normalizeTTSText(current.text) : ''; const currentIdentity = currentText ? getTTSSegmentIdentity(current && current.cfi, currentText) : null; @@ -2649,7 +2684,7 @@ const seen = new Set(); for (const detail of details || []) { if (!detail || !detail.text || !detail.cfi) continue; - const text = String(detail.text).replace(/\s+/g, ' ').trim(); + const text = normalizeTTSText(detail.text); const identity = getTTSSegmentIdentity(detail.cfi, text); if (!text || (currentIdentity && identity === currentIdentity) || seen.has(identity)) continue; seen.add(identity); @@ -2722,7 +2757,7 @@ } if (!started) continue; - const textNodes = getTextNodes(block).map((node) => { + const textNodes = getTTSTextNodes(block).map((node) => { const text = node.nodeValue || ''; return { node, text }; }).filter((item) => item.text.trim().length > 0); @@ -2771,7 +2806,7 @@ range.setStart(startPos.node, startPos.offset); range.setEnd(endPos.node, endPos.offset); - const text = absoluteText.slice(start, end).replace(/\s+/g, ' ').trim(); + const text = normalizeTTSText(absoluteText.slice(start, end)); if (!text) continue; try { @@ -2902,7 +2937,7 @@ for (var blockIndex = 0; blockIndex < visibleBlocks.length; blockIndex++) { const block = visibleBlocks[blockIndex]; - const textNodes = getTextNodes(block).map((node) => { + const textNodes = getTTSTextNodes(block).map((node) => { const text = node.nodeValue || ''; return { node, text }; }).filter((item) => item.text.trim().length > 0); @@ -2965,7 +3000,7 @@ continue; } - const text = absoluteText.slice(start, end).replace(/\s+/g, ' ').trim(); + const text = normalizeTTSText(absoluteText.slice(start, end)); if (!text) { stats.skippedEmptyText++; continue; @@ -3635,6 +3670,29 @@ return nodes; } + function getTTSTextNodes(element) { + const walker = element.ownerDocument.createTreeWalker( + element, + NodeFilter.SHOW_TEXT, + { + acceptNode: function (node) { + if (isInsideRubyAnnotation(node)) return NodeFilter.FILTER_REJECT; + if (isTTSFootnoteMarker(node.nodeValue || '')) return NodeFilter.FILTER_REJECT; + if (shouldSkipTTSNode(node.parentElement)) return NodeFilter.FILTER_REJECT; + return NodeFilter.FILTER_ACCEPT; + }, + } + ); + const nodes = []; + let node; + while ((node = walker.nextNode())) { + if (node.textContent && node.textContent.trim()) { + nodes.push(node); + } + } + return nodes; + } + function extractBlockText(block) { const walker = block.ownerDocument.createTreeWalker( block, diff --git a/packages/app-expo/assets/reader/reader.template.html b/packages/app-expo/assets/reader/reader.template.html index 1dbb97dd..6aa4de63 100644 --- a/packages/app-expo/assets/reader/reader.template.html +++ b/packages/app-expo/assets/reader/reader.template.html @@ -2231,11 +2231,8 @@ var walker = doc.createTreeWalker(doc.body, NodeFilter.SHOW_TEXT, { acceptNode: function(node) { if (!node.nodeValue || !node.nodeValue.trim()) return NodeFilter.FILTER_SKIP; - var parent = node.parentElement; - var tag = parent && parent.tagName && parent.tagName.toLowerCase(); - if (tag === 'script' || tag === 'style') return NodeFilter.FILTER_REJECT; - if (tag === 'rt' || tag === 'rp') return NodeFilter.FILTER_REJECT; - if (parent && parent.closest && parent.closest('.readany-translation')) return NodeFilter.FILTER_REJECT; + if (isTTSFootnoteMarker(node.nodeValue)) return NodeFilter.FILTER_REJECT; + if (shouldSkipTTSNode(node.parentElement)) return NodeFilter.FILTER_REJECT; return NodeFilter.FILTER_ACCEPT; } }); @@ -2263,12 +2260,12 @@ } if (isVisible) { - var t = textNode.nodeValue.trim(); + var t = normalizeTTSText(textNode.nodeValue); if (t) { texts.push(t); totalLen += t.length; } } textNode = walker.nextNode(); } - var result = texts.join(' ').trim(); + var result = normalizeTTSText(texts.join(' ')); return result.length > maxLen ? result.slice(0, maxLen) : result; } catch (e) { return ''; } } @@ -2300,11 +2297,8 @@ var walker = doc.createTreeWalker(doc.body, NodeFilter.SHOW_TEXT, { acceptNode: function(node) { if (!node.nodeValue || !node.nodeValue.trim()) return NodeFilter.FILTER_SKIP; - var parent = node.parentElement; - var tag = parent && parent.tagName && parent.tagName.toLowerCase(); - if (tag === 'script' || tag === 'style') return NodeFilter.FILTER_REJECT; - if (tag === 'rt' || tag === 'rp') return NodeFilter.FILTER_REJECT; - if (parent && parent.closest && parent.closest('.readany-translation')) return NodeFilter.FILTER_REJECT; + if (isTTSFootnoteMarker(node.nodeValue)) return NodeFilter.FILTER_REJECT; + if (shouldSkipTTSNode(node.parentElement)) return NodeFilter.FILTER_REJECT; return NodeFilter.FILTER_ACCEPT; } }); @@ -2343,13 +2337,13 @@ if (isVisible) { visibleCount++; - var t = textNode.nodeValue.trim(); + var t = normalizeTTSText(textNode.nodeValue); if (t) { texts.push(t); } } textNode = walker.nextNode(); } - var result = texts.join(' ').trim(); + var result = normalizeTTSText(texts.join(' ')); return { text: result, debug: { @@ -2470,7 +2464,7 @@ if (!win) return []; return Array.from(blocks).filter((block) => { if (!block || !block.textContent || !block.textContent.trim()) return false; - if (block.closest && block.closest('.readany-translation')) return false; + if (shouldSkipTTSNode(block)) return false; try { const range = doc.createRange(); range.selectNodeContents(block); @@ -2485,7 +2479,7 @@ const blockSelector = "p, h1, h2, h3, h4, h5, h6, li, blockquote, dd, dt, figcaption, pre, td, th"; return Array.from(doc.querySelectorAll(blockSelector)).filter((block) => { if (!block || !block.textContent || !block.textContent.trim()) return false; - if (block.closest && block.closest('.readany-translation')) return false; + if (shouldSkipTTSNode(block)) return false; return true; }); } @@ -2506,12 +2500,53 @@ } function fallbackSentenceSegments(text) { - const normalized = (text || '').replace(/\s+/g, ' ').trim(); + const normalized = normalizeTTSText(text); if (!normalized) return []; const matches = normalized.match(/[^。!?!?;;\n]+[。!?!?;;…]?/gu) || []; return (matches.length ? matches : [normalized]).map((segment) => segment.trim()).filter(Boolean); } + const TTS_FOOTNOTE_MARKER_PATTERN = /(?:\s*(?:\[(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})\]|[(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})]|【(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})】|〔(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})〕|[?((?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10}))]?|\((?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})\)))+/gu; + const TTS_FOOTNOTE_MARKER_ONLY_PATTERN = /^(?:\s*(?:\[(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})\]|[(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})]|【(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})】|〔(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})〕|[?((?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10}))]?|\((?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})\)))+\s*$/u; + const TTS_SKIPPED_ELEMENT_SELECTOR = [ + 'script', + 'style', + 'rt', + 'rp', + 'sup', + '.readany-translation', + '[role="doc-noteref"]', + '[role="doc-footnote"]', + '[epub\\:type~="noteref"]', + '[epub\\:type~="footnote"]', + '[type~="noteref"]', + '[type~="footnote"]', + 'a[href^="#fn"]', + 'a[href^="#footnote"]', + 'a[href*="footnote"]', + 'a[href*="note"]', + 'a.noteref', + 'a.footnote', + '.noteref', + '.footnote', + '.footnote-ref', + '.endnote', + '.duokan-footnote', + '.calibre-footnote', + ].join(','); + + function normalizeTTSText(text) { + return String(text || '').replace(TTS_FOOTNOTE_MARKER_PATTERN, '').replace(/\s+/g, ' ').trim(); + } + + function isTTSFootnoteMarker(text) { + return TTS_FOOTNOTE_MARKER_ONLY_PATTERN.test(String(text || '')); + } + + function shouldSkipTTSNode(element) { + return !!(element && element.closest && element.closest(TTS_SKIPPED_ELEMENT_SELECTOR)); + } + function stripCfiAssertions(value) { return String(value || '').replace(/\[[^\]]*\]/g, ''); } @@ -2538,12 +2573,12 @@ } function getTTSSegmentIdentity(cfi, text) { - return `${getTTSCfiStartIdentity(cfi)}::${String(text || '').replace(/\s+/g, ' ').trim()}`; + return `${getTTSCfiStartIdentity(cfi)}::${normalizeTTSText(text)}`; } function summarizeTTSSegmentsForLog(segments, limit) { return (segments || []).slice(0, limit || 8).map(function(segment, index) { - var text = String(segment && segment.text || '').replace(/\s+/g, ' ').trim(); + var text = normalizeTTSText(segment && segment.text); return { index: index, cfi: segment && segment.cfi || null, @@ -2604,7 +2639,7 @@ const seen = new Set(); for (const detail of [current, ...(following || [])]) { if (!detail || !detail.text || !detail.cfi) continue; - const text = String(detail.text).replace(/\s+/g, ' ').trim(); + const text = normalizeTTSText(detail.text); const identity = getTTSSegmentIdentity(detail.cfi, text); if (!text || seen.has(identity)) continue; seen.add(identity); @@ -2640,7 +2675,7 @@ : null; const currentText = current && current.text - ? String(current.text).replace(/\s+/g, ' ').trim() + ? normalizeTTSText(current.text) : ''; const currentIdentity = currentText ? getTTSSegmentIdentity(current && current.cfi, currentText) : null; @@ -2649,7 +2684,7 @@ const seen = new Set(); for (const detail of details || []) { if (!detail || !detail.text || !detail.cfi) continue; - const text = String(detail.text).replace(/\s+/g, ' ').trim(); + const text = normalizeTTSText(detail.text); const identity = getTTSSegmentIdentity(detail.cfi, text); if (!text || (currentIdentity && identity === currentIdentity) || seen.has(identity)) continue; seen.add(identity); @@ -2722,7 +2757,7 @@ } if (!started) continue; - const textNodes = getTextNodes(block).map((node) => { + const textNodes = getTTSTextNodes(block).map((node) => { const text = node.nodeValue || ''; return { node, text }; }).filter((item) => item.text.trim().length > 0); @@ -2771,7 +2806,7 @@ range.setStart(startPos.node, startPos.offset); range.setEnd(endPos.node, endPos.offset); - const text = absoluteText.slice(start, end).replace(/\s+/g, ' ').trim(); + const text = normalizeTTSText(absoluteText.slice(start, end)); if (!text) continue; try { @@ -2902,7 +2937,7 @@ for (var blockIndex = 0; blockIndex < visibleBlocks.length; blockIndex++) { const block = visibleBlocks[blockIndex]; - const textNodes = getTextNodes(block).map((node) => { + const textNodes = getTTSTextNodes(block).map((node) => { const text = node.nodeValue || ''; return { node, text }; }).filter((item) => item.text.trim().length > 0); @@ -2965,7 +3000,7 @@ continue; } - const text = absoluteText.slice(start, end).replace(/\s+/g, ' ').trim(); + const text = normalizeTTSText(absoluteText.slice(start, end)); if (!text) { stats.skippedEmptyText++; continue; @@ -3635,6 +3670,29 @@ return nodes; } + function getTTSTextNodes(element) { + const walker = element.ownerDocument.createTreeWalker( + element, + NodeFilter.SHOW_TEXT, + { + acceptNode: function (node) { + if (isInsideRubyAnnotation(node)) return NodeFilter.FILTER_REJECT; + if (isTTSFootnoteMarker(node.nodeValue || '')) return NodeFilter.FILTER_REJECT; + if (shouldSkipTTSNode(node.parentElement)) return NodeFilter.FILTER_REJECT; + return NodeFilter.FILTER_ACCEPT; + }, + } + ); + const nodes = []; + let node; + while ((node = walker.nextNode())) { + if (node.textContent && node.textContent.trim()) { + nodes.push(node); + } + } + return nodes; + } + function extractBlockText(block) { const walker = block.ownerDocument.createTreeWalker( block, diff --git a/packages/app/src/components/reader/FoliateViewer.tsx b/packages/app/src/components/reader/FoliateViewer.tsx index ab3eb701..a327d7d5 100644 --- a/packages/app/src/components/reader/FoliateViewer.tsx +++ b/packages/app/src/components/reader/FoliateViewer.tsx @@ -13,6 +13,7 @@ import type { ChapterTranslationResult, } from "@readany/core/translation/chapter-translator"; import type { ViewSettings } from "@readany/core/types"; +import { cleanText, isTTSFootnoteMarker, shouldSkipTTSNode } from "@readany/core/tts"; import { Overlayer } from "foliate-js/overlayer.js"; import { marked } from "marked"; /** @@ -201,9 +202,14 @@ function getSelectionAdvanceIntent( const REMOTE_FONT_LINK_ATTR = "data-readany-remote-font-link"; function normalizeTTSSegmentText(text?: string | null) { - return String(text || "") - .replace(/\s+/g, " ") - .trim(); + return cleanText(String(text || "")); +} + +function acceptTTSNode(node: Node) { + if (!node.nodeValue?.trim()) return NodeFilter.FILTER_SKIP; + if (isTTSFootnoteMarker(node.nodeValue)) return NodeFilter.FILTER_REJECT; + const parent = (node as Text).parentElement; + return shouldSkipTTSNode(parent) ? NodeFilter.FILTER_REJECT : NodeFilter.FILTER_ACCEPT; } function getTTSSegmentIdentity(cfi?: string | null, text?: string | null) { @@ -556,7 +562,7 @@ export const FoliateViewer = forwardRef let visibleBlocks = Array.from(doc.querySelectorAll(blockSelector)).filter((block) => { if (!block.textContent?.trim()) return false; - if (block.closest(".readany-translation")) return false; + if (shouldSkipTTSNode(block)) return false; return isRectVisibleInReader(block.getBoundingClientRect()); }); @@ -566,7 +572,7 @@ export const FoliateViewer = forwardRef visibleBlocks = Array.from(doc.querySelectorAll("div, section, article, span")).filter( (el) => { if (!el.textContent?.trim()) return false; - if (el.closest(".readany-translation")) return false; + if (shouldSkipTTSNode(el)) return false; // Only leaf-level elements with direct text content if (el.querySelector("div, section, article, p")) return false; return isRectVisibleInReader(el.getBoundingClientRect()); @@ -581,16 +587,7 @@ export const FoliateViewer = forwardRef for (const block of visibleBlocks) { const walker = doc.createTreeWalker(block, NodeFilter.SHOW_TEXT, { - acceptNode: (node) => { - if (!node.nodeValue?.trim()) return NodeFilter.FILTER_SKIP; - const parent = (node as Text).parentElement; - if (!parent) return NodeFilter.FILTER_ACCEPT; - const tag = parent.tagName.toLowerCase(); - if (tag === "script" || tag === "style") return NodeFilter.FILTER_REJECT; - if (tag === "rt" || tag === "rp") return NodeFilter.FILTER_REJECT; - if (parent.closest(".readany-translation")) return NodeFilter.FILTER_REJECT; - return NodeFilter.FILTER_ACCEPT; - }, + acceptNode: acceptTTSNode, }); const positionedNodes: Array<{ node: Text; start: number; end: number }> = []; @@ -657,7 +654,7 @@ export const FoliateViewer = forwardRef range.setEnd(endPos.node, endPos.offset); if (!isRangeStartVisibleInReader(range)) continue; - const text = absoluteText.slice(start, end).replace(/\s+/g, " ").trim(); + const text = normalizeTTSSegmentText(absoluteText.slice(start, end)); if (!text) continue; try { @@ -991,15 +988,7 @@ export const FoliateViewer = forwardRef const visibleRight = pStart; // end - size = (start + size) - size = start const walker = doc.createTreeWalker(doc.body, NodeFilter.SHOW_TEXT, { - acceptNode: (node: Node) => { - if (!node.nodeValue?.trim()) return NodeFilter.FILTER_SKIP; - const parent = (node as Text).parentElement; - const tag = parent?.tagName?.toLowerCase(); - if (tag === "script" || tag === "style") return NodeFilter.FILTER_REJECT; - if (tag === "rt" || tag === "rp") return NodeFilter.FILTER_REJECT; - if (parent?.closest?.(".readany-translation")) return NodeFilter.FILTER_REJECT; - return NodeFilter.FILTER_ACCEPT; - }, + acceptNode: acceptTTSNode, }); const visibleTexts: string[] = []; @@ -1009,7 +998,7 @@ export const FoliateViewer = forwardRef range.selectNodeContents(textNode); const rect = range.getBoundingClientRect(); if (rect.right > visibleLeft && rect.left < visibleRight && rect.width > 0) { - const text = textNode.nodeValue?.trim(); + const text = normalizeTTSSegmentText(textNode.nodeValue); if (text) visibleTexts.push(text); } textNode = walker.nextNode(); @@ -1024,14 +1013,7 @@ export const FoliateViewer = forwardRef const vh = win.innerHeight; const walker = doc.createTreeWalker(doc.body, NodeFilter.SHOW_TEXT, { - acceptNode: (node: Node) => { - if (!node.nodeValue?.trim()) return NodeFilter.FILTER_SKIP; - const parent = (node as Text).parentElement; - const tag = parent?.tagName?.toLowerCase(); - if (tag === "script" || tag === "style") return NodeFilter.FILTER_REJECT; - if (tag === "rt" || tag === "rp") return NodeFilter.FILTER_REJECT; - return NodeFilter.FILTER_ACCEPT; - }, + acceptNode: acceptTTSNode, }); const visibleTexts: string[] = []; @@ -1047,7 +1029,7 @@ export const FoliateViewer = forwardRef rect.top < vh && rect.width > 0 ) { - const text = textNode.nodeValue?.trim(); + const text = normalizeTTSSegmentText(textNode.nodeValue); if (text) visibleTexts.push(text); } textNode = walker.nextNode(); @@ -1058,7 +1040,7 @@ export const FoliateViewer = forwardRef } // Fallback: return full section text - return doc.body?.innerText?.trim() || ""; + return normalizeTTSSegmentText(doc.body?.innerText); } catch { return ""; } @@ -1436,14 +1418,7 @@ export const FoliateViewer = forwardRef const visibleLeft = pStart - pSize; const visibleRight = pStart; const walker = doc.createTreeWalker(doc.body, NodeFilter.SHOW_TEXT, { - acceptNode: (node: Node) => { - if (!node.nodeValue?.trim()) return NodeFilter.FILTER_SKIP; - const parent = (node as Text).parentElement; - const tag = parent?.tagName?.toLowerCase(); - if (tag === "script" || tag === "style" || tag === "rt" || tag === "rp") return NodeFilter.FILTER_REJECT; - if (parent?.closest?.(".readany-translation")) return NodeFilter.FILTER_REJECT; - return NodeFilter.FILTER_ACCEPT; - }, + acceptNode: acceptTTSNode, }); const visibleTexts: string[] = []; let textNode = walker.nextNode(); @@ -1452,7 +1427,7 @@ export const FoliateViewer = forwardRef range.selectNodeContents(textNode); const rect = range.getBoundingClientRect(); if (rect.right > visibleLeft && rect.left < visibleRight && rect.width > 0) { - const text = textNode.nodeValue?.trim(); + const text = normalizeTTSSegmentText(textNode.nodeValue); if (text) visibleTexts.push(text); } textNode = walker.nextNode(); @@ -1464,13 +1439,7 @@ export const FoliateViewer = forwardRef const vw = win.innerWidth; const vh = win.innerHeight; const walker = doc.createTreeWalker(doc.body, NodeFilter.SHOW_TEXT, { - acceptNode: (node: Node) => { - if (!node.nodeValue?.trim()) return NodeFilter.FILTER_SKIP; - const parent = (node as Text).parentElement; - const tag = parent?.tagName?.toLowerCase(); - if (tag === "script" || tag === "style" || tag === "rt" || tag === "rp") return NodeFilter.FILTER_REJECT; - return NodeFilter.FILTER_ACCEPT; - }, + acceptNode: acceptTTSNode, }); const visibleTexts: string[] = []; let textNode = walker.nextNode(); @@ -1479,7 +1448,7 @@ export const FoliateViewer = forwardRef range.selectNodeContents(textNode); const rect = range.getBoundingClientRect(); if (rect.right > 0 && rect.left < vw && rect.bottom > 0 && rect.top < vh && rect.width > 0) { - const text = textNode.nodeValue?.trim(); + const text = normalizeTTSSegmentText(textNode.nodeValue); if (text) visibleTexts.push(text); } textNode = walker.nextNode(); @@ -1491,7 +1460,7 @@ export const FoliateViewer = forwardRef // Fallback: if no visible text detected, use section text if (!surroundingText) { const rawText = doc.body?.textContent || ""; - surroundingText = rawText.replace(/\s+/g, " ").trim().slice(0, 2000); + surroundingText = normalizeTTSSegmentText(rawText).slice(0, 2000); } } } catch { diff --git a/packages/core/src/tts/index.ts b/packages/core/src/tts/index.ts index bfe48963..6e3c49ef 100644 --- a/packages/core/src/tts/index.ts +++ b/packages/core/src/tts/index.ts @@ -10,7 +10,13 @@ export type { export { DEFAULT_TTS_CONFIG, DASHSCOPE_VOICES, normalizeTTSConfig, normalizeTTSEngine } from "./types"; // Text utilities -export { cleanText, countChars, splitIntoChunks } from "./text-utils"; +export { + cleanText, + countChars, + isTTSFootnoteMarker, + shouldSkipTTSNode, + splitIntoChunks, +} from "./text-utils"; export { buildNarrationPreview, getTTSVoiceLabel, splitNarrationText } from "./display"; export { compareVoiceLanguage, getLocaleDisplayLabel, groupEdgeTTSVoices } from "./voice-groups"; diff --git a/packages/core/src/tts/text-utils.test.ts b/packages/core/src/tts/text-utils.test.ts new file mode 100644 index 00000000..8ef1a490 --- /dev/null +++ b/packages/core/src/tts/text-utils.test.ts @@ -0,0 +1,23 @@ +import { describe, expect, it } from "vitest"; +import { cleanText, isTTSFootnoteMarker } from "./text-utils"; + +describe("TTS text utils", () => { + it("removes numeric footnote markers from narration text", () => { + expect(cleanText("她听了这话[12],便不再言语。")).toBe("她听了这话,便不再言语。"); + expect(cleanText("This sentence[3] keeps flowing.")).toBe("This sentence keeps flowing."); + expect(cleanText("这一段(45)仍然应当连续朗读。")).toBe("这一段仍然应当连续朗读。"); + }); + + it("removes Chinese numeral footnote markers from narration text", () => { + expect(cleanText("宝玉听了[十二],忙回头看。")).toBe("宝玉听了,忙回头看。"); + expect(cleanText("此处另有注释[二十三],不应读出。")).toBe("此处另有注释,不应读出。"); + expect(cleanText("他又看了一眼【一】才明白。")).toBe("他又看了一眼才明白。"); + }); + + it("detects standalone footnote marker text nodes", () => { + expect(isTTSFootnoteMarker("[十二]")).toBe(true); + expect(isTTSFootnoteMarker("[23]")).toBe(true); + expect(isTTSFootnoteMarker("(四)")).toBe(true); + expect(isTTSFootnoteMarker("正文[十二]")).toBe(false); + }); +}); diff --git a/packages/core/src/tts/text-utils.ts b/packages/core/src/tts/text-utils.ts index 82e70b17..bee5804d 100644 --- a/packages/core/src/tts/text-utils.ts +++ b/packages/core/src/tts/text-utils.ts @@ -2,10 +2,54 @@ * TTS text processing utilities — platform agnostic. */ -/** Clean text for TTS: remove references like [1], extra whitespace */ +const FOOTNOTE_MARKER_PATTERN = + /(?:\s*(?:\[(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})\]|[(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})]|【(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})】|〔(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})〕|[?((?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10}))]?|\((?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})\)))+/gu; + +const FOOTNOTE_MARKER_ONLY_PATTERN = + /^(?:\s*(?:\[(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})\]|[(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})]|【(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})】|〔(?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})〕|[?((?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10}))]?|\((?:\d{1,4}|[一二三四五六七八九十百千万零〇两]{1,8}|[ivxlcdmIVXLCDM]{1,10})\)))+\s*$/u; + +const TTS_SKIPPED_ELEMENT_SELECTOR = [ + "script", + "style", + "rt", + "rp", + "sup", + ".readany-translation", + '[role="doc-noteref"]', + '[role="doc-footnote"]', + '[epub\\:type~="noteref"]', + '[epub\\:type~="footnote"]', + '[type~="noteref"]', + '[type~="footnote"]', + 'a[href^="#fn"]', + 'a[href^="#footnote"]', + 'a[href*="footnote"]', + 'a[href*="note"]', + 'a.noteref', + 'a.footnote', + ".noteref", + ".footnote", + ".footnote-ref", + ".endnote", + ".duokan-footnote", + ".calibre-footnote", +].join(","); + +/** Return true when a text node only contains a footnote marker such as [12] or [十二]. */ +export function isTTSFootnoteMarker(text: string): boolean { + return FOOTNOTE_MARKER_ONLY_PATTERN.test(text); +} + +/** Return true when an element should not contribute text to TTS. */ +export function shouldSkipTTSNode(element: Element | null | undefined): boolean { + if (!element) return false; + return Boolean(element.closest(TTS_SKIPPED_ELEMENT_SELECTOR)); +} + +/** Clean text for TTS: remove footnote references and extra whitespace. */ export function cleanText(text: string): string { return text - .replace(/\[\d+\]/g, "") + .replace(FOOTNOTE_MARKER_PATTERN, "") .replace(/\s+/g, " ") .trim(); }