Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(tocObj): skip permalink symbol #175

Merged
merged 3 commits into from Feb 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
15 changes: 13 additions & 2 deletions lib/toc_obj.js
@@ -1,6 +1,7 @@
'use strict';
const { DomHandler, DomUtils, Parser } = require('htmlparser2');
const escapeHTML = require('./escape_html');
const nonWord = /^\s*[^a-zA-Z0-9]\s*$/;

const parseHtml = html => {
const handler = new DomHandler(null, {});
Expand All @@ -9,7 +10,7 @@ const parseHtml = html => {
};

const getId = ({ attribs, parent }) => {
return attribs.id || (!parent ? null : getId(parent));
return attribs.id || (!parent ? '' : getId(parent));
};

function tocObj(str, options = {}) {
Expand All @@ -30,7 +31,17 @@ function tocObj(str, options = {}) {
const el = headings[i];
const level = +el.name[1];
const id = getId(el);
const text = escapeHTML(DomUtils.getText(el));
let text = '';
for (const element of el.children) {
const elText = DomUtils.getText(element);
// Skip permalink symbol wrapped in <a>
// permalink is a single non-word character, word = [a-Z0-9]
// permalink may be wrapped in whitespace(s)
if (element.name !== 'a' || !nonWord.test(elText)) {
text += escapeHTML(elText);
}
}
if (!text) text = escapeHTML(DomUtils.getText(el));

result.push({ text, id, level });
}
Expand Down
91 changes: 85 additions & 6 deletions test/toc_obj.spec.js
Expand Up @@ -66,19 +66,15 @@ describe('tocObj', () => {
it('no id attribute', () => {
const noid = '<h1>Title 1</h1>';
const result = tocObj(noid);
const checkNull = result[0].id === null;

result.length.should.eql(1);
checkNull.should.eql(true);
result[0].id.should.eql('');
});

it('empty value in id attribute', () => {
const noid = '<h1 id="">Title 1</h1>';
const result = tocObj(noid);
const checkNull = result[0].id === null;

result.length.should.eql(1);
checkNull.should.eql(true);
result[0].id.should.eql('');
});

it('invalid input', () => {
Expand All @@ -87,4 +83,87 @@ describe('tocObj', () => {

result.length.should.eql(0);
});

it('empty text', () => {
const input = '<h1></h1>';
const result = tocObj(input);

result[0].text.should.eql('');
});

describe('children element', () => {
it('<a> element with permalink + text', () => {
const input = [
'<h1><a>#</a>foo</h1>',
'<h1>foo<a>#</a></h1>',
'<h1><a>#</a>foo<a>#</a></h1>',
'<h1><a># </a>foo</h1>',
'<h1><a># </a>foo<a> #</a></h1>',
'<h1><a>号</a>foo</h1>'
];
const result = input.map(str => tocObj(str));

result.forEach(str => str[0].text.should.eql('foo'));
});

it('<a> element - no text', () => {
const input = '<h1><a>foo</a></h1>';
const result = tocObj(input);

result[0].text.should.eql('foo');
});

it('<a> element - single permalink', () => {
const input = '<h1><a>#</a></h1>';
const result = tocObj(input);

result[0].text.should.eql('#');
});

it('<a> element - non-permalink', () => {
const input = '<h1><a>a</a> one</h1>';
const result = tocObj(input);

result[0].text.should.eql('a one');
});

it('non-permalink <a> element + text', () => {
const input = [
'<h1><a>foo</a>bar</h1>',
'<h1>foo<a>bar</a></h1>'
];
const result = input.map(str => tocObj(str));

result.forEach(str => str[0].text.should.eql('foobar'));
});

it('non-permalink <a> element + unicode text', () => {
const input = [
'<h1><a>这是</a>测试</h1>',
'<h1>这是<a>测试</a></h1>'
];
const result = input.map(str => tocObj(str));

result.forEach(str => str[0].text.should.eql('这是测试'));
});

it('multiple <a> elements', () => {
const input = '<h1><a>foo</a><a>bar</a></h1>';
const result = tocObj(input);

result[0].text.should.eql('foobar');
});

it('element + text', () => {
const input = [
'<h1><i>foo</i>barbaz</h1>',
'<h1><i>foo</i>bar</i>baz</h1>',
'<h1>foo<i>bar</i>baz</h1>',
'<h1>foobarba<i>z</i></h1>'
];
const result = input.map(str => tocObj(str));

result.forEach(str => str[0].text.should.eql('foobarbaz'));
});
});
});