Skip to content

Commit

Permalink
Do not add spaces in front of certain punctuation marks like '.', '?'…
Browse files Browse the repository at this point in the history
… or ')'.

But '(' would still get its space.
  • Loading branch information
Necoro committed May 10, 2020
1 parent 7d64848 commit a6c1c02
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 5 deletions.
29 changes: 24 additions & 5 deletions html2text.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ func FromHTMLNode(doc *html.Node, o ...Options) (string, error) {
}

ctx := textifyTraverseContext{
buf: bytes.Buffer{},
options: options,
buf: bytes.Buffer{},
options: options,
citationMap: map[string]int{},
}
if err := ctx.traverse(doc); err != nil {
Expand Down Expand Up @@ -431,6 +431,25 @@ func (ctx *textifyTraverseContext) traverseChildren(node *html.Node) error {
return nil
}

// Tests r for being a character where no space should be inserted in front of.
func punctNoSpaceBefore(r rune) bool {
switch r {
case '.', ',', ';', '!', '?', ')', ']', '>':
return true
default:
return false
}
}

// Tests r for being a character where no space should be inserted after.
func punctNoSpaceAfter(r rune) bool {
switch r {
case '(', '[', '<':
return true
default:
return false
}
}
func (ctx *textifyTraverseContext) emit(data string) error {
if data == "" {
return nil
Expand All @@ -441,14 +460,14 @@ func (ctx *textifyTraverseContext) emit(data string) error {
)
for _, line := range lines {
runes := []rune(line)
startsWithSpace := unicode.IsSpace(runes[0])
if !startsWithSpace && !ctx.endsWithSpace && !strings.HasPrefix(data, ".") {
startsWithSpace := unicode.IsSpace(runes[0]) || punctNoSpaceBefore(runes[0])
if !startsWithSpace && !ctx.endsWithSpace {
if err = ctx.buf.WriteByte(' '); err != nil {
return err
}
ctx.lineLength++
}
ctx.endsWithSpace = unicode.IsSpace(runes[len(runes)-1])
ctx.endsWithSpace = unicode.IsSpace(runes[len(runes)-1]) || punctNoSpaceAfter(runes[len(runes)-1])
for _, c := range line {
if _, err = ctx.buf.WriteString(string(c)); err != nil {
return err
Expand Down
8 changes: 8 additions & 0 deletions html2text_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,14 @@ func TestCitationStyleLinks(t *testing.T) {
`<a href="">Link</a>`,
"Link",
},
{
`<a href="http://example1.com/">Link1</a><a href="http://example2.com/">Link2</a>`,
"Link1 [1] Link2 [2]\n\n[1] http://example1.com/\n[2] http://example2.com/",
},
{
`<a href="http://example1.com/">Link1</a> (<a href="http://example2.com/">Link2</a>)`,
"Link1 [1] (Link2 [2])\n\n[1] http://example1.com/\n[2] http://example2.com/",
},
{
`<a href="http://example1.com/">Link1</a>? <a href="http://example2.com/">Link2</a>!`,
"Link1 [1]? Link2 [2]!\n\n[1] http://example1.com/\n[2] http://example2.com/",
Expand Down

0 comments on commit a6c1c02

Please sign in to comment.