Skip to content

Commit

Permalink
Custom word separator
Browse files Browse the repository at this point in the history
  • Loading branch information
c-bata committed Jun 22, 2018
1 parent c704dcd commit 75aacfa
Show file tree
Hide file tree
Showing 2 changed files with 253 additions and 10 deletions.
118 changes: 109 additions & 9 deletions document.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ type Document struct {
// So if Document is "日本(cursor)語", cursorPosition is 2.
// But DisplayedCursorPosition returns 4 because '日' and '本' are double width characters.
cursorPosition int
// Separator to determine the beginning of a word. Space is applied if empty.
Separator string
}

// NewDocument return the new empty document.
Expand Down Expand Up @@ -97,7 +99,13 @@ func (d *Document) GetWordAfterCursorWithSpace() string {
func (d *Document) FindStartOfPreviousWord() int {
// Reverse the text before the cursor, in order to do an efficient backwards search.
x := d.TextBeforeCursor()
if i := strings.LastIndexByte(x, ' '); i != -1 {
var i int
if d.Separator == "" {
i = strings.LastIndexByte(x, ' ')
} else {
i = strings.LastIndexAny(x, d.Separator)
}
if i != -1 {
return i + 1
} else {
return 0
Expand All @@ -108,42 +116,66 @@ func (d *Document) FindStartOfPreviousWord() int {
// pointing to the end of the current word. Return `None` if nothing was found.
func (d *Document) FindEndOfCurrentWord() int {
x := d.TextAfterCursor()
if i := strings.IndexByte(x, ' '); i != -1 {
var i int
if d.Separator == "" {
i = strings.IndexByte(x, ' ')
} else {
i = strings.IndexAny(x, d.Separator)
}
if i != -1 {
return i
} else {
return len([]rune(x))
return len(x)
}
}

// FindStartOfPreviousWordWithSpace is almost the same as FindStartOfPreviousWord.
// The only difference is to ignore contiguous spaces.
// The only difference is to ignore contiguous spaces or separators.
func (d *Document) FindStartOfPreviousWordWithSpace() int {
// Reverse the text before the cursor, in order to do an efficient backwards search.
x := d.TextBeforeCursor()
var start, end int

end := lastIndexByteNot(x, ' ')
if d.Separator == "" {
end = lastIndexByteNot(x, ' ')
} else {
end = lastIndexAnyNot(x, d.Separator)
}
if end == -1 {
return 0
}

start := strings.LastIndexByte(x[:end], ' ')
if d.Separator == "" {
start = strings.LastIndexByte(x[:end], ' ')
} else {
start = strings.LastIndexAny(x[:end], d.Separator)
}
if start == -1 {
return 0
}
return start + 1
}

// FindEndOfCurrentWordWithSpace is almost the same as FindEndOfCurrentWord.
// The only difference is to ignore contiguous spaces.
// The only difference is to ignore contiguous spaces or separators.
func (d *Document) FindEndOfCurrentWordWithSpace() int {
x := d.TextAfterCursor()
var start, end int

start := indexByteNot(x, ' ')
if d.Separator == "" {
start = indexByteNot(x, ' ')
} else {
start = indexAnyNot(x, d.Separator)
}
if start == -1 {
return len(x)
}

end := strings.IndexByte(x[start:], ' ')
if d.Separator == "" {
end = strings.IndexByte(x[start:], ' ')
} else {
end = strings.IndexAny(x[start:], d.Separator)
}
if end == -1 {
return len(x)
}
Expand Down Expand Up @@ -369,3 +401,71 @@ func lastIndexByteNot(s string, c byte) int {
}
return -1
}

type asciiSet [8]uint32

func (as *asciiSet) notContains(c byte) bool {
return (as[c>>5] & (1 << uint(c&31))) == 0
}

func makeASCIISet(chars string) (as asciiSet, ok bool) {
for i := 0; i < len(chars); i++ {
c := chars[i]
if c >= utf8.RuneSelf {
return as, false
}
as[c>>5] |= 1 << uint(c&31)
}
return as, true
}

func indexAnyNot(s, chars string) int {
if len(chars) > 0 {
if len(s) > 8 {
if as, isASCII := makeASCIISet(chars); isASCII {
for i := 0; i < len(s); i++ {
if as.notContains(s[i]) {
return i
}
}
return -1
}
}
for i := 0; i < len(s); {
// I don't know why strings.IndexAny doesn't add rune count here.
r, size := utf8.DecodeRuneInString(s[i:])
i += size
for _, c := range chars {
if r != c {
return i
}
}
}
}
return -1
}

func lastIndexAnyNot(s, chars string) int {
if len(chars) > 0 {
if len(s) > 8 {
if as, isASCII := makeASCIISet(chars); isASCII {
for i := len(s) - 1; i >= 0; i-- {
if as.notContains(s[i]) {
return i
}
}
return -1
}
}
for i := len(s); i > 0; {
r, size := utf8.DecodeLastRuneInString(s[:i])
i -= size
for _, c := range chars {
if r != c {
return i
}
}
}
}
return -1
}
Loading

0 comments on commit 75aacfa

Please sign in to comment.