From a6caf55ec8abf60d181267102aca5ed3933e142c Mon Sep 17 00:00:00 2001 From: Mathias Biilmann Christensen Date: Mon, 2 Jan 2017 21:54:52 -0800 Subject: [PATCH 1/7] Add truncate template function This commit adds a truncate template function for safely truncating text without breaking words. The truncate function is HTML aware, so if the input text is a template.HTML it will be truncated without leaving broken or unclosed HTML tags. {{ "this is a very long text" | truncate 10 " ..." }} {{ "With [Markdown](/markdown) inside." | markdownify | truncate 10 }} --- docs/content/templates/functions.md | 9 ++ tpl/template_funcs.go | 135 +++++++++++++++++++++++++++- tpl/template_funcs_test.go | 55 ++++++++++++ 3 files changed, 198 insertions(+), 1 deletion(-) diff --git a/docs/content/templates/functions.md b/docs/content/templates/functions.md index 10be0f7de38..7ed4c4c3b45 100644 --- a/docs/content/templates/functions.md +++ b/docs/content/templates/functions.md @@ -662,6 +662,15 @@ e.g. * `{{slicestr "BatMan" 3}}` → "Man" * `{{slicestr "BatMan" 0 3}}` → "Bat" +### truncate + +Truncate a text to a max length without cutting words or HTML tags in half. Since go templates are HTML aware, truncate will handle normal strings vs HTML strings intelligently. + +e.q. + +* `{{ "this is a text" | truncate 10 " ..." }}` → this is a ... +* `{{ "With [Markdown](#markdown) inside." | markdownify | truncate 10 }}` → With <a href='#markdown'>Markdown …</a> + ### split Split a string into substrings separated by a delimiter. diff --git a/tpl/template_funcs.go b/tpl/template_funcs.go index fd333745411..2a79a2937c4 100644 --- a/tpl/template_funcs.go +++ b/tpl/template_funcs.go @@ -38,6 +38,7 @@ import ( "strings" "sync" "time" + "unicode" "unicode/utf8" "github.com/bep/inflect" @@ -55,7 +56,14 @@ import ( ) var ( - funcMap template.FuncMap + funcMap template.FuncMap + tagRE = regexp.MustCompile(`(?s)<(/)?([^ ]+?)(?:(\s*/)| .*?)?>`) + htmlRE = regexp.MustCompile(`(?s)<.*?>|((?:\w[-\w]*|&.*?;)+)`) + htmlSinglets = map[string]bool{ + "br": true, "col": true, "link": true, + "base": true, "img": true, "param": true, + "area": true, "hr": true, "input": true, + } ) // eq returns the boolean truth of arg1 == arg2. @@ -239,6 +247,130 @@ func slicestr(a interface{}, startEnd ...interface{}) (string, error) { } +func truncate(a interface{}, options ...interface{}) (template.HTML, error) { + length, err := cast.ToIntE(a) + if err != nil { + return "", err + } + var textParam interface{} + var ellipsis template.HTML + + switch len(options) { + case 0: + return "", errors.New("truncate requires a length and a string") + case 1: + textParam = options[0] + ellipsis = " …" + case 2: + textParam = options[1] + var ok bool + if ellipsis, ok = options[0].(template.HTML); !ok { + s, e := cast.ToStringE(options[0]) + if e != nil { + return "", errors.New("ellipsis must be a string") + } + ellipsis = template.HTML(html.EscapeString(s)) + } + default: + return "", errors.New("too many arguments passed to truncate") + } + if err != nil { + return "", errors.New("text to truncate must be a string") + } + text, err := cast.ToStringE(textParam) + if err != nil { + return "", errors.New("text must be a string") + } + + if html, ok := textParam.(template.HTML); ok { + return truncateHTML(length, ellipsis, html) + } + + if len(text) <= length { + return template.HTML(html.EscapeString(text)), nil + } + + var lastWordIndex int + var lastNonSpace int + for i, r := range text { + if unicode.IsSpace(r) { + lastWordIndex = lastNonSpace + } else { + lastNonSpace = i + } + if i >= length { + return template.HTML(html.EscapeString(text[0:lastWordIndex+1])) + ellipsis, nil + } + } + + return template.HTML(html.EscapeString(text)), nil +} + +func truncateHTML(length int, ellipsis, text template.HTML) (template.HTML, error) { + if len(text) <= length { + return text, nil + } + + var pos, endTextPos, currentLen int + openTags := []string{} + + for currentLen < length { + slice := string(text[pos:]) + m := htmlRE.FindStringSubmatchIndex(slice) + if len(m) == 0 { + // Checked through whole string + break + } + + pos += m[1] + if len(m) == 4 && m[3]-m[2] > 0 { + // It's an actual non-HTML word or char + currentLen += (m[3] - m[2]) + 1 // 1 space between each word + if currentLen >= length { + endTextPos = pos + } + continue + } + + tag := tagRE.FindStringSubmatch(slice[m[0]:m[1]]) + if len(tag) == 0 || currentLen >= length { + // Don't worry about non tags or tags after our truncate point + continue + } + closingTag := tag[1] + tagname := strings.ToLower(tag[2]) + selfClosing := tag[3] + + _, singlet := htmlSinglets[tagname] + if !singlet && selfClosing == "" { + if closingTag == "" { + // Add it to the start of the open tags list + openTags = append([]string{tagname}, openTags...) + } else { + for i, tag := range openTags { + if tag == tagname { + // SGML: An end tag closes, back to the matching start tag, + // all unclosed intervening start tags with omitted end tags + openTags = openTags[i+1:] + break + } + } + } + } + } + + if currentLen < length { + return text, nil + } + + out := text[0:endTextPos] + out += ellipsis + for _, tag := range openTags { + out += ("") + } + return out, nil +} + // hasPrefix tests whether the input s begins with prefix. func hasPrefix(s, prefix interface{}) (bool, error) { ss, err := cast.ToStringE(s) @@ -2188,6 +2320,7 @@ func initFuncMap() { "title": title, "time": asTime, "trim": trim, + "truncate": truncate, "upper": upper, "urlize": helpers.CurrentPathSpec().URLize, "where": where, diff --git a/tpl/template_funcs_test.go b/tpl/template_funcs_test.go index 37f075a9932..76c445d70be 100644 --- a/tpl/template_funcs_test.go +++ b/tpl/template_funcs_test.go @@ -157,6 +157,8 @@ substr: {{substr "BatMan" 3 3}} title: {{title "Bat man"}} time: {{ (time "2015-01-21").Year }} trim: {{ trim "++Batman--" "+-" }} +truncate: {{ "this is a very long text" | truncate 10 " ..." }} +truncate: {{ "With [Markdown](/markdown) inside." | markdownify | truncate 10 }} upper: {{upper "BatMan"}} urlize: {{ "Bat Man" | urlize }} ` @@ -228,6 +230,8 @@ substr: Man title: Bat Man time: 2015 trim: Batman +truncate: this is a ... +truncate: With Markdown … upper: BATMAN urlize: bat-man ` @@ -815,6 +819,57 @@ func TestSlicestr(t *testing.T) { } } +func TestTruncate(t *testing.T) { + var err error + cases := []struct { + v1 interface{} + v2 interface{} + v3 interface{} + want interface{} + isErr bool + }{ + {10, "I am a test sentence", nil, template.HTML("I am a …"), false}, + {10, "", "I am a test sentence", template.HTML("I am a"), false}, + {10, "", "a b c d e f g h i j k", template.HTML("a b c d e"), false}, + {12, "", "Should be escaped", template.HTML("<b>Should be"), false}, + {10, template.HTML(" Read more"), "I am a test sentence", template.HTML("I am a Read more"), false}, + {10, template.HTML("I have a Markdown link inside."), nil, template.HTML("I have a Markdown …"), false}, + {10, nil, nil, template.HTML(""), true}, + {nil, nil, nil, template.HTML(""), true}, + } + for i, c := range cases { + var result template.HTML + if c.v2 == nil { + result, err = truncate(c.v1) + } else if c.v3 == nil { + result, err = truncate(c.v1, c.v2) + } else { + result, err = truncate(c.v1, c.v2, c.v3) + } + + if c.isErr { + if err == nil { + t.Errorf("[%d] Slice didn't return an expected error", i) + } + } else { + if err != nil { + t.Errorf("[%d] failed: %s", i, err) + continue + } + if !reflect.DeepEqual(result, c.want) { + t.Errorf("[%d] got '%s' but expected '%s'", i, result, c.want) + } + } + } + + // Too many arguments + _, err = truncate(10, " ...", "I am a test sentence", "wrong") + if err == nil { + t.Errorf("Should have errored") + } + +} + func TestHasPrefix(t *testing.T) { cases := []struct { s interface{} From ee61aab7f06ed749c40c74dc879e4d72e361f8dc Mon Sep 17 00:00:00 2001 From: Mathias Biilmann Christensen Date: Wed, 4 Jan 2017 23:34:29 -0800 Subject: [PATCH 2/7] Make truncate work with unicode Add test cases for some edge cases and japanese characters --- tpl/template_func_truncate.go | 164 +++++++++++++++++++++++++++++ tpl/template_func_truncate_test.go | 76 +++++++++++++ tpl/template_funcs.go | 134 +---------------------- tpl/template_funcs_test.go | 53 +--------- 4 files changed, 242 insertions(+), 185 deletions(-) create mode 100644 tpl/template_func_truncate.go create mode 100644 tpl/template_func_truncate_test.go diff --git a/tpl/template_func_truncate.go b/tpl/template_func_truncate.go new file mode 100644 index 00000000000..f0f3ba9ef63 --- /dev/null +++ b/tpl/template_func_truncate.go @@ -0,0 +1,164 @@ +// Copyright 2016 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tpl + +import ( + "errors" + "html" + "html/template" + "regexp" + "unicode" + "unicode/utf8" + + "github.com/spf13/cast" +) + +var ( + tagRE = regexp.MustCompile(`^<(/)?([^ ]+?)(?:(\s*/)| .*?)?>`) + htmlSinglets = map[string]bool{ + "br": true, "col": true, "link": true, + "base": true, "img": true, "param": true, + "area": true, "hr": true, "input": true, + } +) + +type openTag struct { + name string + pos int +} + +func truncate(a interface{}, options ...interface{}) (template.HTML, error) { + length, err := cast.ToIntE(a) + if err != nil { + return "", err + } + var textParam interface{} + var ellipsis template.HTML + + switch len(options) { + case 0: + return "", errors.New("truncate requires a length and a string") + case 1: + textParam = options[0] + ellipsis = " …" + case 2: + textParam = options[1] + var ok bool + if ellipsis, ok = options[0].(template.HTML); !ok { + s, e := cast.ToStringE(options[0]) + if e != nil { + return "", errors.New("ellipsis must be a string") + } + ellipsis = template.HTML(html.EscapeString(s)) + } + default: + return "", errors.New("too many arguments passed to truncate") + } + if err != nil { + return "", errors.New("text to truncate must be a string") + } + text, err := cast.ToStringE(textParam) + if err != nil { + return "", errors.New("text must be a string") + } + + if html, ok := textParam.(template.HTML); ok { + return truncateHTML(length, ellipsis, html) + } + + if utf8.RuneCountInString(text) <= length { + return template.HTML(html.EscapeString(text)), nil + } + + var lastWordIndex, lastNonSpace, currentLen int + for i, r := range text { + currentLen++ + if unicode.IsSpace(r) { + lastWordIndex = lastNonSpace + } else if unicode.In(r, unicode.Han, unicode.Hangul, unicode.Hiragana, unicode.Katakana) { + lastWordIndex = i + } else { + lastNonSpace = i + utf8.RuneLen(r) + } + if currentLen > length { + if lastWordIndex == 0 { + return template.HTML(html.EscapeString(text[0:i])) + ellipsis, nil + } + return template.HTML(html.EscapeString(text[0:lastWordIndex])) + ellipsis, nil + } + } + + return template.HTML(html.EscapeString(text)), nil +} + +func truncateHTML(length int, ellipsis, text template.HTML) (template.HTML, error) { + if utf8.RuneCountInString(string(text)) <= length { + return text, nil + } + + openTags := []openTag{} + + var lastWordIndex, lastNonSpace, currentLen, endTextPos, nextTag int + for i, r := range text { + if i < nextTag { + continue + } + slice := string(text[i:]) + m := tagRE.FindStringSubmatchIndex(slice) + if len(m) > 0 && m[0] == 0 { + tagname := slice[m[4]:m[5]] + if m[2] == -1 { + openTags = append(openTags, openTag{name: tagname, pos: i}) + } else { + // SGML: An end tag closes, back to the matching start tag, + // all unclosed intervening start tags with omitted end tags + for i, tag := range openTags { + if tag.name == tagname { + openTags = openTags[i:] + break + } + } + } + nextTag = i + m[1] + continue + } + + currentLen++ + if unicode.IsSpace(r) { + lastWordIndex = lastNonSpace + } else if unicode.In(r, unicode.Han, unicode.Hangul, unicode.Hiragana, unicode.Katakana) { + lastWordIndex = i + } else { + lastNonSpace = i + utf8.RuneLen(r) + } + if currentLen > length { + if lastWordIndex == 0 { + endTextPos = i + } else { + endTextPos = lastWordIndex + } + out := text[0:endTextPos] + ellipsis + for _, tag := range openTags { + if tag.pos > endTextPos { + break + } + out += ("") + } + + return out, nil + } + } + + return text, nil +} diff --git a/tpl/template_func_truncate_test.go b/tpl/template_func_truncate_test.go new file mode 100644 index 00000000000..20eabb9ab4d --- /dev/null +++ b/tpl/template_func_truncate_test.go @@ -0,0 +1,76 @@ +// Copyright 2016 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tpl + +import ( + "html/template" + "reflect" + "testing" +) + +func TestTruncate(t *testing.T) { + var err error + cases := []struct { + v1 interface{} + v2 interface{} + v3 interface{} + want interface{} + isErr bool + }{ + {10, "I am a test sentence", nil, template.HTML("I am a …"), false}, + {10, "", "I am a test sentence", template.HTML("I am a"), false}, + {10, "", "a b c d e f g h i j k", template.HTML("a b c d e"), false}, + {12, "", "Should be escaped", template.HTML("<b>Should be"), false}, + {10, template.HTML(" Read more"), "I am a test sentence", template.HTML("I am a Read more"), false}, + {20, template.HTML("I have a Markdown link inside."), nil, template.HTML("I have a Markdown …"), false}, + {10, "IamanextremelylongwordthatjustgoesonandonandonjusttoannoyyoualmostasifIwaswritteninGermanActuallyIbettheresagermanwordforthis", nil, template.HTML("Iamanextre …"), false}, + {10, template.HTML("

IamanextremelylongwordthatjustgoesonandonandonjusttoannoyyoualmostasifIwaswritteninGermanActuallyIbettheresagermanwordforthis

"), nil, template.HTML("

Iamanextre …

"), false}, + {13, template.HTML("With Markdown inside."), nil, template.HTML("With Markdown …"), false}, + {14, "Hello中国 Good 好的", nil, template.HTML("Hello中国 Good 好 …"), false}, + {14, template.HTML("

Hello中国 Good 好的

"), nil, template.HTML("

Hello中国 Good 好 …

"), false}, + {10, nil, nil, template.HTML(""), true}, + {nil, nil, nil, template.HTML(""), true}, + } + for i, c := range cases { + var result template.HTML + if c.v2 == nil { + result, err = truncate(c.v1) + } else if c.v3 == nil { + result, err = truncate(c.v1, c.v2) + } else { + result, err = truncate(c.v1, c.v2, c.v3) + } + + if c.isErr { + if err == nil { + t.Errorf("[%d] Slice didn't return an expected error", i) + } + } else { + if err != nil { + t.Errorf("[%d] failed: %s", i, err) + continue + } + if !reflect.DeepEqual(result, c.want) { + t.Errorf("[%d] got '%s' but expected '%s'", i, result, c.want) + } + } + } + + // Too many arguments + _, err = truncate(10, " ...", "I am a test sentence", "wrong") + if err == nil { + t.Errorf("Should have errored") + } + +} diff --git a/tpl/template_funcs.go b/tpl/template_funcs.go index 2a79a2937c4..596902fcdda 100644 --- a/tpl/template_funcs.go +++ b/tpl/template_funcs.go @@ -38,7 +38,6 @@ import ( "strings" "sync" "time" - "unicode" "unicode/utf8" "github.com/bep/inflect" @@ -56,14 +55,7 @@ import ( ) var ( - funcMap template.FuncMap - tagRE = regexp.MustCompile(`(?s)<(/)?([^ ]+?)(?:(\s*/)| .*?)?>`) - htmlRE = regexp.MustCompile(`(?s)<.*?>|((?:\w[-\w]*|&.*?;)+)`) - htmlSinglets = map[string]bool{ - "br": true, "col": true, "link": true, - "base": true, "img": true, "param": true, - "area": true, "hr": true, "input": true, - } + funcMap template.FuncMap ) // eq returns the boolean truth of arg1 == arg2. @@ -247,130 +239,6 @@ func slicestr(a interface{}, startEnd ...interface{}) (string, error) { } -func truncate(a interface{}, options ...interface{}) (template.HTML, error) { - length, err := cast.ToIntE(a) - if err != nil { - return "", err - } - var textParam interface{} - var ellipsis template.HTML - - switch len(options) { - case 0: - return "", errors.New("truncate requires a length and a string") - case 1: - textParam = options[0] - ellipsis = " …" - case 2: - textParam = options[1] - var ok bool - if ellipsis, ok = options[0].(template.HTML); !ok { - s, e := cast.ToStringE(options[0]) - if e != nil { - return "", errors.New("ellipsis must be a string") - } - ellipsis = template.HTML(html.EscapeString(s)) - } - default: - return "", errors.New("too many arguments passed to truncate") - } - if err != nil { - return "", errors.New("text to truncate must be a string") - } - text, err := cast.ToStringE(textParam) - if err != nil { - return "", errors.New("text must be a string") - } - - if html, ok := textParam.(template.HTML); ok { - return truncateHTML(length, ellipsis, html) - } - - if len(text) <= length { - return template.HTML(html.EscapeString(text)), nil - } - - var lastWordIndex int - var lastNonSpace int - for i, r := range text { - if unicode.IsSpace(r) { - lastWordIndex = lastNonSpace - } else { - lastNonSpace = i - } - if i >= length { - return template.HTML(html.EscapeString(text[0:lastWordIndex+1])) + ellipsis, nil - } - } - - return template.HTML(html.EscapeString(text)), nil -} - -func truncateHTML(length int, ellipsis, text template.HTML) (template.HTML, error) { - if len(text) <= length { - return text, nil - } - - var pos, endTextPos, currentLen int - openTags := []string{} - - for currentLen < length { - slice := string(text[pos:]) - m := htmlRE.FindStringSubmatchIndex(slice) - if len(m) == 0 { - // Checked through whole string - break - } - - pos += m[1] - if len(m) == 4 && m[3]-m[2] > 0 { - // It's an actual non-HTML word or char - currentLen += (m[3] - m[2]) + 1 // 1 space between each word - if currentLen >= length { - endTextPos = pos - } - continue - } - - tag := tagRE.FindStringSubmatch(slice[m[0]:m[1]]) - if len(tag) == 0 || currentLen >= length { - // Don't worry about non tags or tags after our truncate point - continue - } - closingTag := tag[1] - tagname := strings.ToLower(tag[2]) - selfClosing := tag[3] - - _, singlet := htmlSinglets[tagname] - if !singlet && selfClosing == "" { - if closingTag == "" { - // Add it to the start of the open tags list - openTags = append([]string{tagname}, openTags...) - } else { - for i, tag := range openTags { - if tag == tagname { - // SGML: An end tag closes, back to the matching start tag, - // all unclosed intervening start tags with omitted end tags - openTags = openTags[i+1:] - break - } - } - } - } - } - - if currentLen < length { - return text, nil - } - - out := text[0:endTextPos] - out += ellipsis - for _, tag := range openTags { - out += ("") - } - return out, nil -} - // hasPrefix tests whether the input s begins with prefix. func hasPrefix(s, prefix interface{}) (bool, error) { ss, err := cast.ToStringE(s) diff --git a/tpl/template_funcs_test.go b/tpl/template_funcs_test.go index 76c445d70be..fd51e3a1a1f 100644 --- a/tpl/template_funcs_test.go +++ b/tpl/template_funcs_test.go @@ -158,7 +158,7 @@ title: {{title "Bat man"}} time: {{ (time "2015-01-21").Year }} trim: {{ trim "++Batman--" "+-" }} truncate: {{ "this is a very long text" | truncate 10 " ..." }} -truncate: {{ "With [Markdown](/markdown) inside." | markdownify | truncate 10 }} +truncate: {{ "With [Markdown](/markdown) inside." | markdownify | truncate 14 }} upper: {{upper "BatMan"}} urlize: {{ "Bat Man" | urlize }} ` @@ -819,57 +819,6 @@ func TestSlicestr(t *testing.T) { } } -func TestTruncate(t *testing.T) { - var err error - cases := []struct { - v1 interface{} - v2 interface{} - v3 interface{} - want interface{} - isErr bool - }{ - {10, "I am a test sentence", nil, template.HTML("I am a …"), false}, - {10, "", "I am a test sentence", template.HTML("I am a"), false}, - {10, "", "a b c d e f g h i j k", template.HTML("a b c d e"), false}, - {12, "", "Should be escaped", template.HTML("<b>Should be"), false}, - {10, template.HTML(" Read more"), "I am a test sentence", template.HTML("I am a Read more"), false}, - {10, template.HTML("I have a Markdown link inside."), nil, template.HTML("I have a Markdown …"), false}, - {10, nil, nil, template.HTML(""), true}, - {nil, nil, nil, template.HTML(""), true}, - } - for i, c := range cases { - var result template.HTML - if c.v2 == nil { - result, err = truncate(c.v1) - } else if c.v3 == nil { - result, err = truncate(c.v1, c.v2) - } else { - result, err = truncate(c.v1, c.v2, c.v3) - } - - if c.isErr { - if err == nil { - t.Errorf("[%d] Slice didn't return an expected error", i) - } - } else { - if err != nil { - t.Errorf("[%d] failed: %s", i, err) - continue - } - if !reflect.DeepEqual(result, c.want) { - t.Errorf("[%d] got '%s' but expected '%s'", i, result, c.want) - } - } - } - - // Too many arguments - _, err = truncate(10, " ...", "I am a test sentence", "wrong") - if err == nil { - t.Errorf("Should have errored") - } - -} - func TestHasPrefix(t *testing.T) { cases := []struct { s interface{} From 0585b616623cbfd902a3a1b66a14d821d52b97cd Mon Sep 17 00:00:00 2001 From: Mathias Biilmann Christensen Date: Thu, 5 Jan 2017 00:58:50 -0800 Subject: [PATCH 3/7] Handle self closing tags --- tpl/template_func_truncate.go | 6 +++++- tpl/template_func_truncate_test.go | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tpl/template_func_truncate.go b/tpl/template_func_truncate.go index f0f3ba9ef63..21f409d5fb4 100644 --- a/tpl/template_func_truncate.go +++ b/tpl/template_func_truncate.go @@ -117,7 +117,12 @@ func truncateHTML(length int, ellipsis, text template.HTML) (template.HTML, erro slice := string(text[i:]) m := tagRE.FindStringSubmatchIndex(slice) if len(m) > 0 && m[0] == 0 { + nextTag = i + m[1] tagname := slice[m[4]:m[5]] + _, singlet := htmlSinglets[tagname] + if singlet || m[6] != -1 { + continue + } if m[2] == -1 { openTags = append(openTags, openTag{name: tagname, pos: i}) } else { @@ -130,7 +135,6 @@ func truncateHTML(length int, ellipsis, text template.HTML) (template.HTML, erro } } } - nextTag = i + m[1] continue } diff --git a/tpl/template_func_truncate_test.go b/tpl/template_func_truncate_test.go index 20eabb9ab4d..d2f84f92504 100644 --- a/tpl/template_func_truncate_test.go +++ b/tpl/template_func_truncate_test.go @@ -38,6 +38,7 @@ func TestTruncate(t *testing.T) { {10, template.HTML("

IamanextremelylongwordthatjustgoesonandonandonjusttoannoyyoualmostasifIwaswritteninGermanActuallyIbettheresagermanwordforthis

"), nil, template.HTML("

Iamanextre …

"), false}, {13, template.HTML("With Markdown inside."), nil, template.HTML("With Markdown …"), false}, {14, "Hello中国 Good 好的", nil, template.HTML("Hello中国 Good 好 …"), false}, + {15, "", template.HTML("A
tag that's not closed"), template.HTML("A
tag that's"), false}, {14, template.HTML("

Hello中国 Good 好的

"), nil, template.HTML("

Hello中国 Good 好 …

"), false}, {10, nil, nil, template.HTML(""), true}, {nil, nil, nil, template.HTML(""), true}, From a7ec4f3afcb80dd278b86f2a11e9c9eb11fc60e9 Mon Sep 17 00:00:00 2001 From: Mathias Biilmann Christensen Date: Thu, 5 Jan 2017 10:35:51 -0800 Subject: [PATCH 4/7] Fix truncation edge case --- tpl/template_func_truncate.go | 3 ++- tpl/template_func_truncate_test.go | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tpl/template_func_truncate.go b/tpl/template_func_truncate.go index 21f409d5fb4..d2e47a2601e 100644 --- a/tpl/template_func_truncate.go +++ b/tpl/template_func_truncate.go @@ -108,8 +108,8 @@ func truncateHTML(length int, ellipsis, text template.HTML) (template.HTML, erro } openTags := []openTag{} - var lastWordIndex, lastNonSpace, currentLen, endTextPos, nextTag int + for i, r := range text { if i < nextTag { continue @@ -119,6 +119,7 @@ func truncateHTML(length int, ellipsis, text template.HTML) (template.HTML, erro if len(m) > 0 && m[0] == 0 { nextTag = i + m[1] tagname := slice[m[4]:m[5]] + lastWordIndex = lastNonSpace _, singlet := htmlSinglets[tagname] if singlet || m[6] != -1 { continue diff --git a/tpl/template_func_truncate_test.go b/tpl/template_func_truncate_test.go index d2f84f92504..544b44ef4b5 100644 --- a/tpl/template_func_truncate_test.go +++ b/tpl/template_func_truncate_test.go @@ -40,6 +40,7 @@ func TestTruncate(t *testing.T) { {14, "Hello中国 Good 好的", nil, template.HTML("Hello中国 Good 好 …"), false}, {15, "", template.HTML("A
tag that's not closed"), template.HTML("A
tag that's"), false}, {14, template.HTML("

Hello中国 Good 好的

"), nil, template.HTML("

Hello中国 Good 好 …

"), false}, + {2, template.HTML("

P1

P2

"), nil, template.HTML("

P1 …

"), false}, {10, nil, nil, template.HTML(""), true}, {nil, nil, nil, template.HTML(""), true}, } From 9496e952d78dbfb47d1d53a3b02b777c2051937e Mon Sep 17 00:00:00 2001 From: Mathias Biilmann Christensen Date: Thu, 5 Jan 2017 11:09:59 -0800 Subject: [PATCH 5/7] Just 1 code branch for handling truncation Avoid having two separate code branches for truncating text and HTML --- tpl/template_func_truncate.go | 101 +++++++++++++++------------------- 1 file changed, 44 insertions(+), 57 deletions(-) diff --git a/tpl/template_func_truncate.go b/tpl/template_func_truncate.go index d2e47a2601e..42700eb1031 100644 --- a/tpl/template_func_truncate.go +++ b/tpl/template_func_truncate.go @@ -73,38 +73,13 @@ func truncate(a interface{}, options ...interface{}) (template.HTML, error) { return "", errors.New("text must be a string") } - if html, ok := textParam.(template.HTML); ok { - return truncateHTML(length, ellipsis, html) - } + _, isHTML := textParam.(template.HTML) if utf8.RuneCountInString(text) <= length { - return template.HTML(html.EscapeString(text)), nil - } - - var lastWordIndex, lastNonSpace, currentLen int - for i, r := range text { - currentLen++ - if unicode.IsSpace(r) { - lastWordIndex = lastNonSpace - } else if unicode.In(r, unicode.Han, unicode.Hangul, unicode.Hiragana, unicode.Katakana) { - lastWordIndex = i - } else { - lastNonSpace = i + utf8.RuneLen(r) + if isHTML { + return template.HTML(text), nil } - if currentLen > length { - if lastWordIndex == 0 { - return template.HTML(html.EscapeString(text[0:i])) + ellipsis, nil - } - return template.HTML(html.EscapeString(text[0:lastWordIndex])) + ellipsis, nil - } - } - - return template.HTML(html.EscapeString(text)), nil -} - -func truncateHTML(length int, ellipsis, text template.HTML) (template.HTML, error) { - if utf8.RuneCountInString(string(text)) <= length { - return text, nil + return template.HTML(html.EscapeString(text)), nil } openTags := []openTag{} @@ -114,29 +89,33 @@ func truncateHTML(length int, ellipsis, text template.HTML) (template.HTML, erro if i < nextTag { continue } - slice := string(text[i:]) - m := tagRE.FindStringSubmatchIndex(slice) - if len(m) > 0 && m[0] == 0 { - nextTag = i + m[1] - tagname := slice[m[4]:m[5]] - lastWordIndex = lastNonSpace - _, singlet := htmlSinglets[tagname] - if singlet || m[6] != -1 { - continue - } - if m[2] == -1 { - openTags = append(openTags, openTag{name: tagname, pos: i}) - } else { - // SGML: An end tag closes, back to the matching start tag, - // all unclosed intervening start tags with omitted end tags - for i, tag := range openTags { - if tag.name == tagname { - openTags = openTags[i:] - break + + if isHTML { + // Make sure we keep tag of HTML tags + slice := string(text[i:]) + m := tagRE.FindStringSubmatchIndex(slice) + if len(m) > 0 && m[0] == 0 { + nextTag = i + m[1] + tagname := slice[m[4]:m[5]] + lastWordIndex = lastNonSpace + _, singlet := htmlSinglets[tagname] + if singlet || m[6] != -1 { + continue + } + if m[2] == -1 { + openTags = append(openTags, openTag{name: tagname, pos: i}) + } else { + // SGML: An end tag closes, back to the matching start tag, + // all unclosed intervening start tags with omitted end tags + for i, tag := range openTags { + if tag.name == tagname { + openTags = openTags[i:] + break + } } } + continue } - continue } currentLen++ @@ -147,23 +126,31 @@ func truncateHTML(length int, ellipsis, text template.HTML) (template.HTML, erro } else { lastNonSpace = i + utf8.RuneLen(r) } + if currentLen > length { if lastWordIndex == 0 { endTextPos = i } else { endTextPos = lastWordIndex } - out := text[0:endTextPos] + ellipsis - for _, tag := range openTags { - if tag.pos > endTextPos { - break + out := text[0:endTextPos] + if isHTML { + // Close out any open HTML tags + out += string(ellipsis) + for _, tag := range openTags { + if tag.pos > endTextPos { + break + } + out += ("") } - out += ("") + return template.HTML(out), nil } - - return out, nil + return template.HTML(html.EscapeString(out)) + ellipsis, nil } } - return text, nil + if isHTML { + return template.HTML(text), nil + } + return template.HTML(html.EscapeString(text)), nil } From 5f30c36c708adbb966659d5e42a7c79ba0c2a4e6 Mon Sep 17 00:00:00 2001 From: Mathias Biilmann Christensen Date: Thu, 5 Jan 2017 11:49:03 -0800 Subject: [PATCH 6/7] Get rid of unecessary string cast --- tpl/template_func_truncate.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/template_func_truncate.go b/tpl/template_func_truncate.go index 42700eb1031..c3e7a34c989 100644 --- a/tpl/template_func_truncate.go +++ b/tpl/template_func_truncate.go @@ -92,7 +92,7 @@ func truncate(a interface{}, options ...interface{}) (template.HTML, error) { if isHTML { // Make sure we keep tag of HTML tags - slice := string(text[i:]) + slice := text[i:] m := tagRE.FindStringSubmatchIndex(slice) if len(m) > 0 && m[0] == 0 { nextTag = i + m[1] From fdb12e6a56213ed1e06b703d6ece94a95f82ed9a Mon Sep 17 00:00:00 2001 From: Mathias Biilmann Christensen Date: Thu, 5 Jan 2017 18:38:36 -0800 Subject: [PATCH 7/7] Rewrite tag closing code for truncate --- docs/content/templates/functions.md | 9 ++-- tpl/template_func_truncate.go | 64 ++++++++++++++--------------- tpl/template_func_truncate_test.go | 4 ++ 3 files changed, 41 insertions(+), 36 deletions(-) diff --git a/docs/content/templates/functions.md b/docs/content/templates/functions.md index 7ed4c4c3b45..7dc5f32feeb 100644 --- a/docs/content/templates/functions.md +++ b/docs/content/templates/functions.md @@ -664,12 +664,13 @@ e.g. ### truncate -Truncate a text to a max length without cutting words or HTML tags in half. Since go templates are HTML aware, truncate will handle normal strings vs HTML strings intelligently. +Truncate a text to a max length without cutting words or leaving unclosed HTML tags. Since Go templates are HTML-aware, truncate will handle normal strings vs HTML strings intelligently. It's important to note that if you have a raw string that contains HTML tags that you want treated as HTML, you will need to convert the string to HTML using the safeHTML template function before sending the value to truncate; otherwise, the HTML tags will be escaped by truncate. -e.q. +e.g. -* `{{ "this is a text" | truncate 10 " ..." }}` → this is a ... -* `{{ "With [Markdown](#markdown) inside." | markdownify | truncate 10 }}` → With <a href='#markdown'>Markdown …</a> +* `{{ "this is a text" | truncate 10 " ..." }}` → `this is a ...` +* `{{ "Keep my HTML" | safeHTML | truncate 10 }}` → `Keep my …` +* `{{ "With [Markdown](#markdown) inside." | markdownify | truncate 10 }}` → `With Markdown …` ### split diff --git a/tpl/template_func_truncate.go b/tpl/template_func_truncate.go index c3e7a34c989..b5886edaea7 100644 --- a/tpl/template_func_truncate.go +++ b/tpl/template_func_truncate.go @@ -33,9 +33,10 @@ var ( } ) -type openTag struct { - name string - pos int +type htmlTag struct { + name string + pos int + openTag bool } func truncate(a interface{}, options ...interface{}) (template.HTML, error) { @@ -44,7 +45,7 @@ func truncate(a interface{}, options ...interface{}) (template.HTML, error) { return "", err } var textParam interface{} - var ellipsis template.HTML + var ellipsis string switch len(options) { case 0: @@ -54,13 +55,12 @@ func truncate(a interface{}, options ...interface{}) (template.HTML, error) { ellipsis = " …" case 2: textParam = options[1] - var ok bool - if ellipsis, ok = options[0].(template.HTML); !ok { - s, e := cast.ToStringE(options[0]) - if e != nil { - return "", errors.New("ellipsis must be a string") - } - ellipsis = template.HTML(html.EscapeString(s)) + ellipsis, err = cast.ToStringE(options[0]) + if err != nil { + return "", errors.New("ellipsis must be a string") + } + if _, ok := options[0].(template.HTML); !ok { + ellipsis = html.EscapeString(ellipsis) } default: return "", errors.New("too many arguments passed to truncate") @@ -82,7 +82,7 @@ func truncate(a interface{}, options ...interface{}) (template.HTML, error) { return template.HTML(html.EscapeString(text)), nil } - openTags := []openTag{} + tags := []htmlTag{} var lastWordIndex, lastNonSpace, currentLen, endTextPos, nextTag int for i, r := range text { @@ -99,21 +99,10 @@ func truncate(a interface{}, options ...interface{}) (template.HTML, error) { tagname := slice[m[4]:m[5]] lastWordIndex = lastNonSpace _, singlet := htmlSinglets[tagname] - if singlet || m[6] != -1 { - continue - } - if m[2] == -1 { - openTags = append(openTags, openTag{name: tagname, pos: i}) - } else { - // SGML: An end tag closes, back to the matching start tag, - // all unclosed intervening start tags with omitted end tags - for i, tag := range openTags { - if tag.name == tagname { - openTags = openTags[i:] - break - } - } + if !singlet && m[6] == -1 { + tags = append(tags, htmlTag{name: tagname, pos: i, openTag: m[2] == -1}) } + continue } } @@ -135,17 +124,28 @@ func truncate(a interface{}, options ...interface{}) (template.HTML, error) { } out := text[0:endTextPos] if isHTML { + out += ellipsis // Close out any open HTML tags - out += string(ellipsis) - for _, tag := range openTags { - if tag.pos > endTextPos { - break + var currentTag *htmlTag + for i := len(tags) - 1; i >= 0; i-- { + tag := tags[i] + if tag.pos >= endTextPos || currentTag != nil { + if currentTag != nil && currentTag.name == tag.name { + currentTag = nil + } + continue + } + + if tag.openTag { + out += ("") + } else { + currentTag = &tag } - out += ("") } + return template.HTML(out), nil } - return template.HTML(html.EscapeString(out)) + ellipsis, nil + return template.HTML(html.EscapeString(out) + ellipsis), nil } } diff --git a/tpl/template_func_truncate_test.go b/tpl/template_func_truncate_test.go index 544b44ef4b5..95368f83ef1 100644 --- a/tpl/template_func_truncate_test.go +++ b/tpl/template_func_truncate_test.go @@ -16,6 +16,7 @@ package tpl import ( "html/template" "reflect" + "strings" "testing" ) @@ -41,6 +42,9 @@ func TestTruncate(t *testing.T) { {15, "", template.HTML("A
tag that's not closed"), template.HTML("A
tag that's"), false}, {14, template.HTML("

Hello中国 Good 好的

"), nil, template.HTML("

Hello中国 Good 好 …

"), false}, {2, template.HTML("

P1

P2

"), nil, template.HTML("

P1 …

"), false}, + {3, template.HTML(strings.Repeat("

P

", 20)), nil, template.HTML("

P

P

P …

"), false}, + {18, template.HTML("

test hello test something

"), nil, template.HTML("

test hello test …

"), false}, + {4, template.HTML("

abc d e

"), nil, template.HTML("

abc …

"), false}, {10, nil, nil, template.HTML(""), true}, {nil, nil, nil, template.HTML(""), true}, }