Skip to content

Commit

Permalink
feat(term): ansi: implement ANSI aware truncation
Browse files Browse the repository at this point in the history
This implements an ANSI and wide-characters aware truncation algorithm
that uses the newly merged [ANSI parser state machine][statemachine] and
the fantastic library uniseg.

Since this is using the ANSI state machine, it's compatible with `CSI m`
(SGR) style sequence, `OSC 8` (hyperlinks), and basically any other
escape sequence supported in the state machine (DCS, ESC, SOS, PM, APC).

Related: muesli/reflow#71

[statemachine]: https://github.com/charmbracelet/x/blob/main/exp/term/ansi/parser/transition_table.go
  • Loading branch information
aymanbagabas committed Mar 13, 2024
1 parent 8cc69f8 commit 14e2626
Show file tree
Hide file tree
Showing 3 changed files with 206 additions and 0 deletions.
115 changes: 115 additions & 0 deletions exp/term/ansi/truncate.go
@@ -0,0 +1,115 @@
package ansi

import (
"bytes"

. "github.com/charmbracelet/x/exp/term/ansi/parser"
"github.com/rivo/uniseg"
)

// Truncate truncates a string to a given length, adding a tail to the
// end if the string is longer than the given length.
// This function is aware of ANSI escape codes and will not break them, and
// accounts for wide-characters (such as East Asians and emojis).
func Truncate(s string, length int, tail string) string {
tw := StringWidth(tail)
length -= tw
if length < 0 {
return ""
}

var cluster []byte
var buf bytes.Buffer
curWidth := 0
ignoring := false
gstate := -1
pstate := GroundState // initial state
b := []byte(s)
i := 0

// Here we iterate over the bytes of the string and collect printable
// characters and runes. We also keep track of the width of the string
// in cells.
// Once we reach the given length, we start ignoring characters and only
// collect ANSI escape codes until we reach the end of string.
for i < len(b) {
state, action := Table.Transition(pstate, b[i])
// log.Printf("pstate: %s, state: %s, action: %s, code: %q", StateNames[pstate], StateNames[state], ActionNames[action], s[i])

switch action {
case CollectAction:
// This action happens when we transition to the Utf8State.
if w := utf8ByteLen(b[i]); w > 1 {
var width int
cluster, _, width, gstate = uniseg.FirstGraphemeCluster(b[i:], gstate)

// log.Printf("cluster: %q, width: %d, curWidth: %d", string(cluster), width, curWidth)

// increment the index by the length of the cluster
i += len(cluster)

// Are we ignoring? Skip to the next byte
if ignoring {
continue
}

// Is this gonna be too wide?
// If so write the tail and stop collecting.
if curWidth+width >= length && !ignoring {
ignoring = true
buf.WriteString(tail)
}

if curWidth+width > length {
continue
}

curWidth += width
for _, r := range cluster {
buf.WriteByte(r)
}

// Done collecting, now we're back in the ground state.
pstate = GroundState
continue
} else {
// Collecting sequence intermediate bytes
buf.WriteByte(b[i])
}
case PrintAction:
// Is this gonna be too wide?
// If so write the tail and stop collecting.
if curWidth >= length && !ignoring {
ignoring = true
buf.WriteString(tail)
}

// Skip to the next byte if we're ignoring
if ignoring {
i++
continue
}

// collects printable ASCII
curWidth++
fallthrough
default:
buf.WriteByte(b[i])
i++
}

// Transition to the next state.
pstate = state

// log.Printf("buf: %q, curWidth: %d, ignoring: %v", buf.String(), curWidth, ignoring)

// Once we reach the given length, we start ignoring runes and write
// the tail to the buffer.
if curWidth > length && !ignoring {
ignoring = true
buf.WriteString(tail)
}
}

return buf.String()
}
59 changes: 59 additions & 0 deletions exp/term/ansi/truncate_test.go
@@ -0,0 +1,59 @@
package ansi

import (
"testing"
)

var tcases = []struct {
name string
input string
tail string
width int
expect string
}{
{"empty", "", "", 0, ""},
{"simple", "foobar", "", 3, "foo"},
{"passthrough", "foobar", "", 10, "foobar"},
{"ascii", "hello", "", 3, "hel"},
{"emoji", "👋", "", 2, "👋"},
{"wideemoji", "🫧", "", 2, "🫧"},
{"controlemoji", "\x1b[31mhello 👋abc\x1b[0m", "", 8, "\x1b[31mhello 👋\x1b[0m"},
{"osc8", "\x1b]8;;https://charm.sh\x1b\\Charmbracelet 🫧\x1b]8;;\x1b\\", "", 5, "\x1b]8;;https://charm.sh\x1b\\Charm\x1b]8;;\x1b\\"},
{"osc8_8bit", "\x9d8;;https://charm.sh\x9cCharmbracelet 🫧\x9d8;;\x9c", "", 5, "\x9d8;;https://charm.sh\x9cCharm\x9d8;;\x9c"},
{"style_tail", "\x1B[38;5;219mHiya!", "…", 3, "\x1B[38;5;219mHi…"},
{"double_style_tail", "\x1B[38;5;219mHiya!\x1B[38;5;219mHello", "…", 7, "\x1B[38;5;219mHiya!\x1B[38;5;219mH…"},
{"noop", "\x1B[7m--", "", 2, "\x1B[7m--"},
{"double_width", "\x1B[38;2;249;38;114m你好\x1B[0m", "", 3, "\x1B[38;2;249;38;114m你\x1B[0m"},
{"double_width_rune", "你", "", 1, ""},
{"double_width_runes", "你好", "", 2, "你"},
{"spaces_only", " ", "…", 2, " …"},
{"longer_tail", "foo", "...", 2, ""},
{"same_tail_width", "foo", "...", 3, "..."},
{"same_tail_width_control", "\x1b[31mfoo\x1b[0m", "...", 3, "\x1b[31m...\x1b[0m"},
{"same_width", "foo", "", 3, "foo"},
{"truncate_with_tail", "foobar", ".", 4, "foo."},
{"style", "I really \x1B[38;2;249;38;114mlove\x1B[0m Go!", "", 8, "I really\x1B[38;2;249;38;114m\x1B[0m"},
{"dcs", "\x1BPq#0;2;0;0;0#1;2;100;100;0#2;2;0;100;0#1~~@@vv@@~~@@~~$#2??}}GG}}??}}??-#1!14@\x1B\\foobar", "…", 4, "\x1BPq#0;2;0;0;0#1;2;100;100;0#2;2;0;100;0#1~~@@vv@@~~@@~~$#2??}}GG}}??}}??-#1!14@\x1B\\foo…"},
{"emoji_tail", "\x1b[36mHello there!\x1b[m", "😃", 8, "\x1b[36mHello 😃\x1b[m"},
{"unicode", "\x1b[35mClaire‘s Boutique\x1b[0m", "", 8, "\x1b[35mClaire‘s\x1b[0m"},
}

func TestTruncate(t *testing.T) {
for i, c := range tcases {
t.Run(c.name, func(t *testing.T) {
if result := Truncate(c.input, c.width, c.tail); result != c.expect {
t.Errorf("test case %d failed: expected %q, got %q", i+1, c.expect, result)
}
})
}
}

func BenchmarkTruncateString(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
b.ReportAllocs()
b.ResetTimer()
for pb.Next() {
Truncate("foo", 2, "")
}
})
}
32 changes: 32 additions & 0 deletions exp/term/examples/truncate/main.go
@@ -0,0 +1,32 @@
package main

import (
"flag"
"fmt"
"log"
"strconv"
"strings"

"github.com/charmbracelet/x/exp/term/ansi"
)

var (
length = flag.Int("l", 80, "length of the output")
tail = flag.String("tail", "", "tail of the output")
)

func main() {
flag.Parse()

var err error
input := strings.Join(flag.Args(), " ")
input, err = strconv.Unquote(`"` + input + `"`)
if err != nil {
log.Fatalf("could not unquote input: %v", err)
}

output := ansi.Truncate(input, *length, *tail)
output = strconv.Quote(output)
output = output[1 : len(output)-1] // remove quotes
fmt.Print(output)
}

0 comments on commit 14e2626

Please sign in to comment.