Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for surrogate pairs and full width characters #20

Merged
merged 7 commits into from
Jul 22, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 38 additions & 24 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
const stringWidth = require('string-width');
const stripAnsi = require('strip-ansi');

const ESCAPES = [
const ESCAPES = new Set([
'\u001B',
'\u009B'
];
]);

const END_CODE = 39;

Expand Down Expand Up @@ -37,26 +37,35 @@ const ESCAPE_CODES = new Map([
[47, 49]
]);

const wrapAnsi = code => `${ESCAPES[0]}[${code}m`;
const wrapAnsi = code => `${ESCAPES.values().next().value}[${code}m`;

// Calculate the length of words split on ' ', ignoring
// the extra characters added by ansi escape codes
const wordLengths = str => str.split(' ').map(s => stringWidth(s));

// Wrap a long word across multiple rows
// Ansi escape codes do not count towards length
function wrapWord(rows, word, cols) {
const wrapWord = (rows, word, cols) => {
const arr = Array.from(word);

let insideEscape = false;
let visible = stripAnsi(rows[rows.length - 1]).length;
let visible = stringWidth(stripAnsi(rows[rows.length - 1]));

for (let i = 0; i < word.length; i++) {
const x = word[i];
for (const item of arr.entries()) {
const i = item[0];
const char = item[1];
const charLength = stringWidth(char);

rows[rows.length - 1] += x;
if (visible + charLength <= cols) {
rows[rows.length - 1] += char;
} else {
rows.push(char);
visible = 0;
}

if (ESCAPES.indexOf(x) !== -1) {
if (ESCAPES.has(char)) {
insideEscape = true;
} else if (insideEscape && x === 'm') {
} else if (insideEscape && char === 'm') {
insideEscape = false;
continue;
}
Expand All @@ -65,9 +74,9 @@ function wrapWord(rows, word, cols) {
continue;
}

visible++;
visible += charLength;

if (visible >= cols && i < word.length - 1) {
if (visible === cols && i < arr.length - 1) {
rows.push('');
visible = 0;
}
Expand All @@ -78,7 +87,7 @@ function wrapWord(rows, word, cols) {
if (!visible && rows[rows.length - 1].length > 0 && rows.length > 1) {
rows[rows.length - 2] += rows.pop();
}
}
};

// The wrap-ansi module can be invoked
// in either 'hard' or 'soft' wrap mode
Expand All @@ -87,7 +96,7 @@ function wrapWord(rows, word, cols) {
// than cols characters
//
// 'soft' allows long words to expand past the column length
function exec(str, cols, opts) {
const exec = (str, cols, opts) => {
const options = opts || {};

let pre = '';
Expand All @@ -98,7 +107,10 @@ function exec(str, cols, opts) {
const words = str.split(' ');
const rows = [''];

for (let i = 0, word; (word = words[i]) !== undefined; i++) {
for (const item of Array.from(words).entries()) {
const i = item[0];
const word = item[1];

let rowLength = stringWidth(rows[rows.length - 1]);

if (rowLength) {
Expand Down Expand Up @@ -135,33 +147,35 @@ function exec(str, cols, opts) {

pre = rows.map(x => x.trim()).join('\n');

for (let j = 0; j < pre.length; j++) {
const y = pre[j];
for (const item of Array.from(pre).entries()) {
const i = item[0];
const char = item[1];

ret += y;
ret += char;

if (ESCAPES.indexOf(y) !== -1) {
const code = parseFloat(/\d[^m]*/.exec(pre.slice(j, j + 4)));
if (ESCAPES.has(char)) {
const code = parseFloat(/\d[^m]*/.exec(pre.slice(i, i + 4)));
escapeCode = code === END_CODE ? null : code;
}

const code = ESCAPE_CODES.get(parseInt(escapeCode, 10));
const code = ESCAPE_CODES.get(Number(escapeCode));

if (escapeCode && code) {
if (pre[j + 1] === '\n') {
if (pre[i + 1] === '\n') {
ret += wrapAnsi(code);
} else if (y === '\n') {
} else if (char === '\n') {
ret += wrapAnsi(escapeCode);
}
}
}

return ret;
}
};

// For each newline, invoke the method separately
module.exports = (str, cols, opts) => {
return String(str)
.normalize()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why normalize here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To normalize unicode characters that creates the same characters but with different code points and possibly different lengths.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense

Copy link
Contributor Author

@kevva kevva Jul 21, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like this which both produces ñ:

'\xF1' === 'n\u0303'
//=> false

.split('\n')
.map(line => exec(line, cols, opts))
.join('\n');
Expand Down
9 changes: 4 additions & 5 deletions test.js
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,11 @@ test('no word-wrapping', t => {
t.is(res3, 'The q\nuick\nbrown\n\u001B[31mfox j\u001B[39m\n\u001B[31mumped\u001B[39m\n\u001B[31mover\u001B[39m\n\u001B[31m\u001B[39mthe l\nazy \u001B[32md\u001B[39m\n\u001B[32mog an\u001B[39m\n\u001B[32md the\u001B[39m\n\u001B[32mn ran\u001B[39m\n\u001B[32maway\u001B[39m\n\u001B[32mwith\u001B[39m\n\u001B[32mthe u\u001B[39m\n\u001B[32mnicor\u001B[39m\n\u001B[32mn.\u001B[39m');
});

// https://github.com/chalk/wrap-ansi/issues/10
test.failing('supports fullwidth characters', t => {
test('supports fullwidth characters', t => {
t.is(m('안녕하세', 4, {hard: true}), '안녕\n하세');
});

// https://github.com/chalk/wrap-ansi/issues/11
test.failing('supports unicode surrogate pairs', t => {
t.is(m('a\ud83c\ude00bc', 2, {hard: true}), 'a\n\ud83c\ude00\nbc');
test('supports unicode surrogate pairs', t => {
t.is(m('a\uD83C\uDE00bc', 2, {hard: true}), 'a\n\uD83C\uDE00\nbc');
t.is(m('a\uD83C\uDE00bc\uD83C\uDE00d\uD83C\uDE00', 2, {hard: true}), 'a\n\uD83C\uDE00\nbc\n\uD83C\uDE00\nd\n\uD83C\uDE00');
});