/
remark-smart-word-wrap.js
227 lines (198 loc) · 7.52 KB
/
remark-smart-word-wrap.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
'use babel';
// Some portions drawn from https://github.com/ben-eb/remark-word-wrap/blob/master/src/index.js
import is from 'unist-util-is';
import visitParents from 'unist-util-visit-parents';
import { splitWords, joinWords, normalizeTextNodes } from './text-util';
// TODO: Make configurable.
const SENTENCE_BREAKS = true;
// Do a sentence wrap only after this column.
const SENTENCE_MIN_MARGIN = 15;
// A good compromise between old 80 char and being too wide to read comfortably.
const WRAP_WIDTH = 92;
/**
* Heuristic: End of sentence must be two letters or more, with the last letter lowercase,
* followed by a period, exclamation point, question mark, colon, or semicolon.
* Except for colon or semicolon, a final or preceding parenthesis or quote is allowed.
* TODO: This should be /\p{L}/u (with a toLower() call below), but we need Unicode regex support.
* TODO: Could also handle rare cases with quotes and parentheses at sentence end.
* TODO: Should be OK for most Latin langauges but may need to rethink 2-letter restriction.
*/
const SENTENCE_RE = /([A-Z0-9a-z'’][a-z])([.?!]['"’”)]?|['"’”)][.?!]|[:;]) *$/;
/**
* Is this word ending a sentence? Goal is to be conservative, not perfect, and avoid
* false positives.
*/
function isEndOfSentenceWord(word) {
return !!word.match(SENTENCE_RE);
}
/**
* Text length of node. We skip delimiter characters [](), **..**, etc. for simplicity.
*/
function nodeLength(node) {
let len = 0;
if (is('text', node)) {
len += node.value.length;
} else if (is('image', node)) {
len += node.url.length + (node.alt || '').length;
} else if (is('link', node)) {
len += node.url.length;
} else if (is('linkReference', node)) {
len += node.identifier.length;
}
if (node.children) {
node.children.forEach(child => {
len += nodeLength(child);
});
}
// Breaks or anything else treated as zero length.
return len;
}
const FIRST_WORD_RE = /^\s*(\S+)/;
function firstWord(text) {
const match = text.match(FIRST_WORD_RE);
return (match && match[1]) || '';
}
/**
* Text length of the first wrappable portion of node.
*/
function nodeMinLength(node) {
let len;
if (is('text', node)) {
len = firstWord(node.value).length;
} else if (is('strong', node) || is('emphasis', node)) {
// Only word wrap strong or emphasis text, not links.
len = nodeMinLength(node.children[0]);
} else {
len = nodeLength(node);
}
return len;
}
export default function attacher(opts) {
const { width } = {
width: WRAP_WIDTH,
...opts
};
function visitor(node, ancestors) {
if (!is('paragraph', node)) {
return;
}
// TODO: For now, we do not wrap or normalize footnote definitions at all since they need to be indented. Should add this.
if (ancestors && ancestors.length > 0 && is('footnoteDefinition', ancestors[ancestors.length - 1])) {
return;
}
// Maintain paragraph reflow logic: Full set of lines and column position within each node.
// Maintained for only the current piece of text being wrapped, which may begin at any column.
// Rules: We just flow across all pieces, and ignore delimiter text when counting widths,
// for simplicity. However there are a few tricky things are around mixing text and sibling
// nodes (strong, emphasis, and link), when breaking is forbidden:
// Case 1: You can't break after a node if the text follows it with no whitespace.
// Case 2: You can't break before a node if the text precedes it with no whitespace.
// Case 3: You can't break immediately at the start of a formatted node (strong, emphasis, or link).
let position = 0;
let breakAllowed = false;
let sentenceEnded = false;
let currentLine;
let lines = [];
function resetColumn() {
position = 0;
sentenceEnded = false;
breakAllowed = false;
}
// Start accumulating lines fresh from current postion or on new line.
function newText(newNode, withBreak) {
currentLine = [];
lines = [currentLine];
if (withBreak) {
resetColumn();
}
}
function trimTrailingWhitespace() {
if (currentLine.length > 0) {
currentLine[currentLine.length - 1] = currentLine[currentLine.length - 1].trimRight();
}
}
// Add linebreak on current text, if allowed.
function breakLine(isPlain) {
// If a node isPlain, i.e. not inside a strong/emphasis/link, it's fine to break.
// If a node is strong/emphasis/link formatted, we can't break it on the
// first character, so have to wait until next opportunity.
// Also avoid double breaks.
// Handles Case 3.
const breakOk = position > 0 && (isPlain || currentLine.length > 0);
if (breakOk) {
trimTrailingWhitespace();
currentLine = [];
lines.push(currentLine);
resetColumn();
}
return breakOk;
}
function addWord(word, followsSpace, isPlain) {
// Wrap if possible. Handles Case 1.
breakAllowed = breakAllowed || followsSpace || word.startsWith(' ');
const doSentenceBreak = SENTENCE_BREAKS && sentenceEnded && position >= SENTENCE_MIN_MARGIN;
if (breakAllowed && (doSentenceBreak || position + word.trimRight().length + 1 >= width)) {
const didBreak = breakLine(isPlain);
if (word == ' ') {
return;
}
currentLine.push(didBreak ? word.trimLeft() : word);
} else {
currentLine.push(word);
}
position += word.length + 1;
sentenceEnded = isEndOfSentenceWord(word);
breakAllowed = word.endsWith(' ');
}
function addUnbreakableNode(node) {
if (breakAllowed && position + nodeLength(node) + 1 >= width) {
newText(node, true);
}
position += nodeLength(node);
sentenceEnded = false;
breakAllowed = false;
}
function getLineBrokenText() {
return joinWords(lines);
}
function process(parent, formatChain) {
// Don't bother wrapping text inside links at all; just normalize the anchor text.
// You can't wrap URLs. And wrapping anchor text is usually less clear than
// splitting out the whole anchor and URL on one line.
// We don't do this for emphasis or strong text, as they most often do flow well in the source.
if (is('link', parent)) {
addUnbreakableNode(parent);
return;
}
for (let i = 0; i < parent.children.length; ++i) {
const current = parent.children[i];
const next = i + 1 < parent.children.length && parent.children[i + 1];
if (current.children) {
process(current, formatChain.concat(current.type));
} else if (is('text', current)) {
newText(current, false);
const isPlain = formatChain.length === 0;
const words = splitWords(current.value);
for (let j = 0; j < words.length; ++j) {
addWord(words[j], j > 0, isPlain);
}
// Add break at the end of this text node if the next next word/link isn't going to fit.
// Unless there is no whitespace at the end of the last text node.
// Handles Case 2.
if (next && position + nodeMinLength(next) + 1 >= width && words[words.length - 1].endsWith(' ')) {
breakLine(isPlain);
}
current.value = getLineBrokenText();
}
}
}
process(node, []);
}
return ast => {
// Mutate all pure text nodes to have normalized whitespace, so we don't
// confuse ' ' and '\n' etc.
normalizeTextNodes(ast);
// Then walk all nodes.
visitParents(ast, visitor);
}
}