From a6c1c021f31ea688a7f8e8d904f9db3a18f11876 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20=27Necoro=27=20Neumann?= Date: Sun, 10 May 2020 22:49:18 +0200 Subject: [PATCH] Do not add spaces in front of certain punctuation marks like '.', '?' or ')'. But '(' would still get its space. --- html2text.go | 29 ++++++++++++++++++++++++----- html2text_test.go | 8 ++++++++ 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/html2text.go b/html2text.go index 12da0d4..1fe39fb 100644 --- a/html2text.go +++ b/html2text.go @@ -72,8 +72,8 @@ func FromHTMLNode(doc *html.Node, o ...Options) (string, error) { } ctx := textifyTraverseContext{ - buf: bytes.Buffer{}, - options: options, + buf: bytes.Buffer{}, + options: options, citationMap: map[string]int{}, } if err := ctx.traverse(doc); err != nil { @@ -431,6 +431,25 @@ func (ctx *textifyTraverseContext) traverseChildren(node *html.Node) error { return nil } +// Tests r for being a character where no space should be inserted in front of. +func punctNoSpaceBefore(r rune) bool { + switch r { + case '.', ',', ';', '!', '?', ')', ']', '>': + return true + default: + return false + } +} + +// Tests r for being a character where no space should be inserted after. +func punctNoSpaceAfter(r rune) bool { + switch r { + case '(', '[', '<': + return true + default: + return false + } +} func (ctx *textifyTraverseContext) emit(data string) error { if data == "" { return nil @@ -441,14 +460,14 @@ func (ctx *textifyTraverseContext) emit(data string) error { ) for _, line := range lines { runes := []rune(line) - startsWithSpace := unicode.IsSpace(runes[0]) - if !startsWithSpace && !ctx.endsWithSpace && !strings.HasPrefix(data, ".") { + startsWithSpace := unicode.IsSpace(runes[0]) || punctNoSpaceBefore(runes[0]) + if !startsWithSpace && !ctx.endsWithSpace { if err = ctx.buf.WriteByte(' '); err != nil { return err } ctx.lineLength++ } - ctx.endsWithSpace = unicode.IsSpace(runes[len(runes)-1]) + ctx.endsWithSpace = unicode.IsSpace(runes[len(runes)-1]) || punctNoSpaceAfter(runes[len(runes)-1]) for _, c := range line { if _, err = ctx.buf.WriteString(string(c)); err != nil { return err diff --git a/html2text_test.go b/html2text_test.go index 71558a5..92f2575 100644 --- a/html2text_test.go +++ b/html2text_test.go @@ -542,6 +542,14 @@ func TestCitationStyleLinks(t *testing.T) { `Link`, "Link", }, + { + `Link1Link2`, + "Link1 [1] Link2 [2]\n\n[1] http://example1.com/\n[2] http://example2.com/", + }, + { + `Link1 (Link2)`, + "Link1 [1] (Link2 [2])\n\n[1] http://example1.com/\n[2] http://example2.com/", + }, { `Link1? Link2!`, "Link1 [1]? Link2 [2]!\n\n[1] http://example1.com/\n[2] http://example2.com/",